diff --git a/src/deps/src/zlib/doc/algorithm.txt b/src/deps/src/zlib/doc/algorithm.txt new file mode 100644 index 000000000..029e5a313 --- /dev/null +++ b/src/deps/src/zlib/doc/algorithm.txt @@ -0,0 +1,209 @@ +1. Compression algorithm (deflate) + +The deflation algorithm used by gzip (also zip and zlib) is a variation of +LZ77 (Lempel-Ziv 1977, see reference below). It finds duplicated strings in +the input data. The second occurrence of a string is replaced by a +pointer to the previous string, in the form of a pair (distance, +length). Distances are limited to 32K bytes, and lengths are limited +to 258 bytes. When a string does not occur anywhere in the previous +32K bytes, it is emitted as a sequence of literal bytes. (In this +description, `string' must be taken as an arbitrary sequence of bytes, +and is not restricted to printable characters.) + +Literals or match lengths are compressed with one Huffman tree, and +match distances are compressed with another tree. The trees are stored +in a compact form at the start of each block. The blocks can have any +size (except that the compressed data for one block must fit in +available memory). A block is terminated when deflate() determines that +it would be useful to start another block with fresh trees. (This is +somewhat similar to the behavior of LZW-based _compress_.) + +Duplicated strings are found using a hash table. All input strings of +length 3 are inserted in the hash table. A hash index is computed for +the next 3 bytes. If the hash chain for this index is not empty, all +strings in the chain are compared with the current input string, and +the longest match is selected. + +The hash chains are searched starting with the most recent strings, to +favor small distances and thus take advantage of the Huffman encoding. +The hash chains are singly linked. There are no deletions from the +hash chains, the algorithm simply discards matches that are too old. + +To avoid a worst-case situation, very long hash chains are arbitrarily +truncated at a certain length, determined by a runtime option (level +parameter of deflateInit). So deflate() does not always find the longest +possible match but generally finds a match which is long enough. + +deflate() also defers the selection of matches with a lazy evaluation +mechanism. After a match of length N has been found, deflate() searches for +a longer match at the next input byte. If a longer match is found, the +previous match is truncated to a length of one (thus producing a single +literal byte) and the process of lazy evaluation begins again. Otherwise, +the original match is kept, and the next match search is attempted only N +steps later. + +The lazy match evaluation is also subject to a runtime parameter. If +the current match is long enough, deflate() reduces the search for a longer +match, thus speeding up the whole process. If compression ratio is more +important than speed, deflate() attempts a complete second search even if +the first match is already long enough. + +The lazy match evaluation is not performed for the fastest compression +modes (level parameter 1 to 3). For these fast modes, new strings +are inserted in the hash table only when no match was found, or +when the match is not too long. This degrades the compression ratio +but saves time since there are both fewer insertions and fewer searches. + + +2. Decompression algorithm (inflate) + +2.1 Introduction + +The key question is how to represent a Huffman code (or any prefix code) so +that you can decode fast. The most important characteristic is that shorter +codes are much more common than longer codes, so pay attention to decoding the +short codes fast, and let the long codes take longer to decode. + +inflate() sets up a first level table that covers some number of bits of +input less than the length of longest code. It gets that many bits from the +stream, and looks it up in the table. The table will tell if the next +code is that many bits or less and how many, and if it is, it will tell +the value, else it will point to the next level table for which inflate() +grabs more bits and tries to decode a longer code. + +How many bits to make the first lookup is a tradeoff between the time it +takes to decode and the time it takes to build the table. If building the +table took no time (and if you had infinite memory), then there would only +be a first level table to cover all the way to the longest code. However, +building the table ends up taking a lot longer for more bits since short +codes are replicated many times in such a table. What inflate() does is +simply to make the number of bits in the first table a variable, and then +to set that variable for the maximum speed. + +For inflate, which has 286 possible codes for the literal/length tree, the size +of the first table is nine bits. Also the distance trees have 30 possible +values, and the size of the first table is six bits. Note that for each of +those cases, the table ended up one bit longer than the ``average'' code +length, i.e. the code length of an approximately flat code which would be a +little more than eight bits for 286 symbols and a little less than five bits +for 30 symbols. + + +2.2 More details on the inflate table lookup + +Ok, you want to know what this cleverly obfuscated inflate tree actually +looks like. You are correct that it's not a Huffman tree. It is simply a +lookup table for the first, let's say, nine bits of a Huffman symbol. The +symbol could be as short as one bit or as long as 15 bits. If a particular +symbol is shorter than nine bits, then that symbol's translation is duplicated +in all those entries that start with that symbol's bits. For example, if the +symbol is four bits, then it's duplicated 32 times in a nine-bit table. If a +symbol is nine bits long, it appears in the table once. + +If the symbol is longer than nine bits, then that entry in the table points +to another similar table for the remaining bits. Again, there are duplicated +entries as needed. The idea is that most of the time the symbol will be short +and there will only be one table look up. (That's whole idea behind data +compression in the first place.) For the less frequent long symbols, there +will be two lookups. If you had a compression method with really long +symbols, you could have as many levels of lookups as is efficient. For +inflate, two is enough. + +So a table entry either points to another table (in which case nine bits in +the above example are gobbled), or it contains the translation for the symbol +and the number of bits to gobble. Then you start again with the next +ungobbled bit. + +You may wonder: why not just have one lookup table for how ever many bits the +longest symbol is? The reason is that if you do that, you end up spending +more time filling in duplicate symbol entries than you do actually decoding. +At least for deflate's output that generates new trees every several 10's of +kbytes. You can imagine that filling in a 2^15 entry table for a 15-bit code +would take too long if you're only decoding several thousand symbols. At the +other extreme, you could make a new table for every bit in the code. In fact, +that's essentially a Huffman tree. But then you spend too much time +traversing the tree while decoding, even for short symbols. + +So the number of bits for the first lookup table is a trade of the time to +fill out the table vs. the time spent looking at the second level and above of +the table. + +Here is an example, scaled down: + +The code being decoded, with 10 symbols, from 1 to 6 bits long: + +A: 0 +B: 10 +C: 1100 +D: 11010 +E: 11011 +F: 11100 +G: 11101 +H: 11110 +I: 111110 +J: 111111 + +Let's make the first table three bits long (eight entries): + +000: A,1 +001: A,1 +010: A,1 +011: A,1 +100: B,2 +101: B,2 +110: -> table X (gobble 3 bits) +111: -> table Y (gobble 3 bits) + +Each entry is what the bits decode as and how many bits that is, i.e. how +many bits to gobble. Or the entry points to another table, with the number of +bits to gobble implicit in the size of the table. + +Table X is two bits long since the longest code starting with 110 is five bits +long: + +00: C,1 +01: C,1 +10: D,2 +11: E,2 + +Table Y is three bits long since the longest code starting with 111 is six +bits long: + +000: F,2 +001: F,2 +010: G,2 +011: G,2 +100: H,2 +101: H,2 +110: I,3 +111: J,3 + +So what we have here are three tables with a total of 20 entries that had to +be constructed. That's compared to 64 entries for a single table. Or +compared to 16 entries for a Huffman tree (six two entry tables and one four +entry table). Assuming that the code ideally represents the probability of +the symbols, it takes on the average 1.25 lookups per symbol. That's compared +to one lookup for the single table, or 1.66 lookups per symbol for the +Huffman tree. + +There, I think that gives you a picture of what's going on. For inflate, the +meaning of a particular symbol is often more than just a letter. It can be a +byte (a "literal"), or it can be either a length or a distance which +indicates a base value and a number of bits to fetch after the code that is +added to the base value. Or it might be the special end-of-block code. The +data structures created in inftrees.c try to encode all that information +compactly in the tables. + + +Jean-loup Gailly Mark Adler +jloup@gzip.org madler@alumni.caltech.edu + + +References: + +[LZ77] Ziv J., Lempel A., ``A Universal Algorithm for Sequential Data +Compression,'' IEEE Transactions on Information Theory, Vol. 23, No. 3, +pp. 337-343. + +``DEFLATE Compressed Data Format Specification'' available in +http://tools.ietf.org/html/rfc1951 diff --git a/src/deps/src/zlib/doc/crc-doc.1.0.pdf b/src/deps/src/zlib/doc/crc-doc.1.0.pdf new file mode 100644 index 000000000..d6942ecc0 Binary files /dev/null and b/src/deps/src/zlib/doc/crc-doc.1.0.pdf differ diff --git a/src/deps/src/zlib/doc/rfc1950.txt b/src/deps/src/zlib/doc/rfc1950.txt new file mode 100644 index 000000000..ce6428a0f --- /dev/null +++ b/src/deps/src/zlib/doc/rfc1950.txt @@ -0,0 +1,619 @@ + + + + + + +Network Working Group P. Deutsch +Request for Comments: 1950 Aladdin Enterprises +Category: Informational J-L. Gailly + Info-ZIP + May 1996 + + + ZLIB Compressed Data Format Specification version 3.3 + +Status of This Memo + + This memo provides information for the Internet community. This memo + does not specify an Internet standard of any kind. Distribution of + this memo is unlimited. + +IESG Note: + + The IESG takes no position on the validity of any Intellectual + Property Rights statements contained in this document. + +Notices + + Copyright (c) 1996 L. Peter Deutsch and Jean-Loup Gailly + + Permission is granted to copy and distribute this document for any + purpose and without charge, including translations into other + languages and incorporation into compilations, provided that the + copyright notice and this notice are preserved, and that any + substantive changes or deletions from the original are clearly + marked. + + A pointer to the latest version of this and related documentation in + HTML format can be found at the URL + . + +Abstract + + This specification defines a lossless compressed data format. The + data can be produced or consumed, even for an arbitrarily long + sequentially presented input data stream, using only an a priori + bounded amount of intermediate storage. The format presently uses + the DEFLATE compression method but can be easily extended to use + other compression methods. It can be implemented readily in a manner + not covered by patents. This specification also defines the ADLER-32 + checksum (an extension and improvement of the Fletcher checksum), + used for detection of data corruption, and provides an algorithm for + computing it. + + + + +Deutsch & Gailly Informational [Page 1] + +RFC 1950 ZLIB Compressed Data Format Specification May 1996 + + +Table of Contents + + 1. Introduction ................................................... 2 + 1.1. Purpose ................................................... 2 + 1.2. Intended audience ......................................... 3 + 1.3. Scope ..................................................... 3 + 1.4. Compliance ................................................ 3 + 1.5. Definitions of terms and conventions used ................ 3 + 1.6. Changes from previous versions ............................ 3 + 2. Detailed specification ......................................... 3 + 2.1. Overall conventions ....................................... 3 + 2.2. Data format ............................................... 4 + 2.3. Compliance ................................................ 7 + 3. References ..................................................... 7 + 4. Source code .................................................... 8 + 5. Security Considerations ........................................ 8 + 6. Acknowledgements ............................................... 8 + 7. Authors' Addresses ............................................. 8 + 8. Appendix: Rationale ............................................ 9 + 9. Appendix: Sample code ..........................................10 + +1. Introduction + + 1.1. Purpose + + The purpose of this specification is to define a lossless + compressed data format that: + + * Is independent of CPU type, operating system, file system, + and character set, and hence can be used for interchange; + + * Can be produced or consumed, even for an arbitrarily long + sequentially presented input data stream, using only an a + priori bounded amount of intermediate storage, and hence can + be used in data communications or similar structures such as + Unix filters; + + * Can use a number of different compression methods; + + * Can be implemented readily in a manner not covered by + patents, and hence can be practiced freely. + + The data format defined by this specification does not attempt to + allow random access to compressed data. + + + + + + + +Deutsch & Gailly Informational [Page 2] + +RFC 1950 ZLIB Compressed Data Format Specification May 1996 + + + 1.2. Intended audience + + This specification is intended for use by implementors of software + to compress data into zlib format and/or decompress data from zlib + format. + + The text of the specification assumes a basic background in + programming at the level of bits and other primitive data + representations. + + 1.3. Scope + + The specification specifies a compressed data format that can be + used for in-memory compression of a sequence of arbitrary bytes. + + 1.4. Compliance + + Unless otherwise indicated below, a compliant decompressor must be + able to accept and decompress any data set that conforms to all + the specifications presented here; a compliant compressor must + produce data sets that conform to all the specifications presented + here. + + 1.5. Definitions of terms and conventions used + + byte: 8 bits stored or transmitted as a unit (same as an octet). + (For this specification, a byte is exactly 8 bits, even on + machines which store a character on a number of bits different + from 8.) See below, for the numbering of bits within a byte. + + 1.6. Changes from previous versions + + Version 3.1 was the first public release of this specification. + In version 3.2, some terminology was changed and the Adler-32 + sample code was rewritten for clarity. In version 3.3, the + support for a preset dictionary was introduced, and the + specification was converted to RFC style. + +2. Detailed specification + + 2.1. Overall conventions + + In the diagrams below, a box like this: + + +---+ + | | <-- the vertical bars might be missing + +---+ + + + + +Deutsch & Gailly Informational [Page 3] + +RFC 1950 ZLIB Compressed Data Format Specification May 1996 + + + represents one byte; a box like this: + + +==============+ + | | + +==============+ + + represents a variable number of bytes. + + Bytes stored within a computer do not have a "bit order", since + they are always treated as a unit. However, a byte considered as + an integer between 0 and 255 does have a most- and least- + significant bit, and since we write numbers with the most- + significant digit on the left, we also write bytes with the most- + significant bit on the left. In the diagrams below, we number the + bits of a byte so that bit 0 is the least-significant bit, i.e., + the bits are numbered: + + +--------+ + |76543210| + +--------+ + + Within a computer, a number may occupy multiple bytes. All + multi-byte numbers in the format described here are stored with + the MOST-significant byte first (at the lower memory address). + For example, the decimal number 520 is stored as: + + 0 1 + +--------+--------+ + |00000010|00001000| + +--------+--------+ + ^ ^ + | | + | + less significant byte = 8 + + more significant byte = 2 x 256 + + 2.2. Data format + + A zlib stream has the following structure: + + 0 1 + +---+---+ + |CMF|FLG| (more-->) + +---+---+ + + + + + + + + +Deutsch & Gailly Informational [Page 4] + +RFC 1950 ZLIB Compressed Data Format Specification May 1996 + + + (if FLG.FDICT set) + + 0 1 2 3 + +---+---+---+---+ + | DICTID | (more-->) + +---+---+---+---+ + + +=====================+---+---+---+---+ + |...compressed data...| ADLER32 | + +=====================+---+---+---+---+ + + Any data which may appear after ADLER32 are not part of the zlib + stream. + + CMF (Compression Method and flags) + This byte is divided into a 4-bit compression method and a 4- + bit information field depending on the compression method. + + bits 0 to 3 CM Compression method + bits 4 to 7 CINFO Compression info + + CM (Compression method) + This identifies the compression method used in the file. CM = 8 + denotes the "deflate" compression method with a window size up + to 32K. This is the method used by gzip and PNG (see + references [1] and [2] in Chapter 3, below, for the reference + documents). CM = 15 is reserved. It might be used in a future + version of this specification to indicate the presence of an + extra field before the compressed data. + + CINFO (Compression info) + For CM = 8, CINFO is the base-2 logarithm of the LZ77 window + size, minus eight (CINFO=7 indicates a 32K window size). Values + of CINFO above 7 are not allowed in this version of the + specification. CINFO is not defined in this specification for + CM not equal to 8. + + FLG (FLaGs) + This flag byte is divided as follows: + + bits 0 to 4 FCHECK (check bits for CMF and FLG) + bit 5 FDICT (preset dictionary) + bits 6 to 7 FLEVEL (compression level) + + The FCHECK value must be such that CMF and FLG, when viewed as + a 16-bit unsigned integer stored in MSB order (CMF*256 + FLG), + is a multiple of 31. + + + + +Deutsch & Gailly Informational [Page 5] + +RFC 1950 ZLIB Compressed Data Format Specification May 1996 + + + FDICT (Preset dictionary) + If FDICT is set, a DICT dictionary identifier is present + immediately after the FLG byte. The dictionary is a sequence of + bytes which are initially fed to the compressor without + producing any compressed output. DICT is the Adler-32 checksum + of this sequence of bytes (see the definition of ADLER32 + below). The decompressor can use this identifier to determine + which dictionary has been used by the compressor. + + FLEVEL (Compression level) + These flags are available for use by specific compression + methods. The "deflate" method (CM = 8) sets these flags as + follows: + + 0 - compressor used fastest algorithm + 1 - compressor used fast algorithm + 2 - compressor used default algorithm + 3 - compressor used maximum compression, slowest algorithm + + The information in FLEVEL is not needed for decompression; it + is there to indicate if recompression might be worthwhile. + + compressed data + For compression method 8, the compressed data is stored in the + deflate compressed data format as described in the document + "DEFLATE Compressed Data Format Specification" by L. Peter + Deutsch. (See reference [3] in Chapter 3, below) + + Other compressed data formats are not specified in this version + of the zlib specification. + + ADLER32 (Adler-32 checksum) + This contains a checksum value of the uncompressed data + (excluding any dictionary data) computed according to Adler-32 + algorithm. This algorithm is a 32-bit extension and improvement + of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 + standard. See references [4] and [5] in Chapter 3, below) + + Adler-32 is composed of two sums accumulated per byte: s1 is + the sum of all bytes, s2 is the sum of all s1 values. Both sums + are done modulo 65521. s1 is initialized to 1, s2 to zero. The + Adler-32 checksum is stored as s2*65536 + s1 in most- + significant-byte first (network) order. + + + + + + + + +Deutsch & Gailly Informational [Page 6] + +RFC 1950 ZLIB Compressed Data Format Specification May 1996 + + + 2.3. Compliance + + A compliant compressor must produce streams with correct CMF, FLG + and ADLER32, but need not support preset dictionaries. When the + zlib data format is used as part of another standard data format, + the compressor may use only preset dictionaries that are specified + by this other data format. If this other format does not use the + preset dictionary feature, the compressor must not set the FDICT + flag. + + A compliant decompressor must check CMF, FLG, and ADLER32, and + provide an error indication if any of these have incorrect values. + A compliant decompressor must give an error indication if CM is + not one of the values defined in this specification (only the + value 8 is permitted in this version), since another value could + indicate the presence of new features that would cause subsequent + data to be interpreted incorrectly. A compliant decompressor must + give an error indication if FDICT is set and DICTID is not the + identifier of a known preset dictionary. A decompressor may + ignore FLEVEL and still be compliant. When the zlib data format + is being used as a part of another standard format, a compliant + decompressor must support all the preset dictionaries specified by + the other format. When the other format does not use the preset + dictionary feature, a compliant decompressor must reject any + stream in which the FDICT flag is set. + +3. References + + [1] Deutsch, L.P.,"GZIP Compressed Data Format Specification", + available in ftp://ftp.uu.net/pub/archiving/zip/doc/ + + [2] Thomas Boutell, "PNG (Portable Network Graphics) specification", + available in ftp://ftp.uu.net/graphics/png/documents/ + + [3] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification", + available in ftp://ftp.uu.net/pub/archiving/zip/doc/ + + [4] Fletcher, J. G., "An Arithmetic Checksum for Serial + Transmissions," IEEE Transactions on Communications, Vol. COM-30, + No. 1, January 1982, pp. 247-252. + + [5] ITU-T Recommendation X.224, Annex D, "Checksum Algorithms," + November, 1993, pp. 144, 145. (Available from + gopher://info.itu.ch). ITU-T X.244 is also the same as ISO 8073. + + + + + + + +Deutsch & Gailly Informational [Page 7] + +RFC 1950 ZLIB Compressed Data Format Specification May 1996 + + +4. Source code + + Source code for a C language implementation of a "zlib" compliant + library is available at ftp://ftp.uu.net/pub/archiving/zip/zlib/. + +5. Security Considerations + + A decoder that fails to check the ADLER32 checksum value may be + subject to undetected data corruption. + +6. Acknowledgements + + Trademarks cited in this document are the property of their + respective owners. + + Jean-Loup Gailly and Mark Adler designed the zlib format and wrote + the related software described in this specification. Glenn + Randers-Pehrson converted this document to RFC and HTML format. + +7. Authors' Addresses + + L. Peter Deutsch + Aladdin Enterprises + 203 Santa Margarita Ave. + Menlo Park, CA 94025 + + Phone: (415) 322-0103 (AM only) + FAX: (415) 322-1734 + EMail: + + + Jean-Loup Gailly + + EMail: + + Questions about the technical content of this specification can be + sent by email to + + Jean-Loup Gailly and + Mark Adler + + Editorial comments on this specification can be sent by email to + + L. Peter Deutsch and + Glenn Randers-Pehrson + + + + + + +Deutsch & Gailly Informational [Page 8] + +RFC 1950 ZLIB Compressed Data Format Specification May 1996 + + +8. Appendix: Rationale + + 8.1. Preset dictionaries + + A preset dictionary is specially useful to compress short input + sequences. The compressor can take advantage of the dictionary + context to encode the input in a more compact manner. The + decompressor can be initialized with the appropriate context by + virtually decompressing a compressed version of the dictionary + without producing any output. However for certain compression + algorithms such as the deflate algorithm this operation can be + achieved without actually performing any decompression. + + The compressor and the decompressor must use exactly the same + dictionary. The dictionary may be fixed or may be chosen among a + certain number of predefined dictionaries, according to the kind + of input data. The decompressor can determine which dictionary has + been chosen by the compressor by checking the dictionary + identifier. This document does not specify the contents of + predefined dictionaries, since the optimal dictionaries are + application specific. Standard data formats using this feature of + the zlib specification must precisely define the allowed + dictionaries. + + 8.2. The Adler-32 algorithm + + The Adler-32 algorithm is much faster than the CRC32 algorithm yet + still provides an extremely low probability of undetected errors. + + The modulo on unsigned long accumulators can be delayed for 5552 + bytes, so the modulo operation time is negligible. If the bytes + are a, b, c, the second sum is 3a + 2b + c + 3, and so is position + and order sensitive, unlike the first sum, which is just a + checksum. That 65521 is prime is important to avoid a possible + large class of two-byte errors that leave the check unchanged. + (The Fletcher checksum uses 255, which is not prime and which also + makes the Fletcher check insensitive to single byte changes 0 <-> + 255.) + + The sum s1 is initialized to 1 instead of zero to make the length + of the sequence part of s2, so that the length does not have to be + checked separately. (Any sequence of zeroes has a Fletcher + checksum of zero.) + + + + + + + + +Deutsch & Gailly Informational [Page 9] + +RFC 1950 ZLIB Compressed Data Format Specification May 1996 + + +9. Appendix: Sample code + + The following C code computes the Adler-32 checksum of a data buffer. + It is written for clarity, not for speed. The sample code is in the + ANSI C programming language. Non C users may find it easier to read + with these hints: + + & Bitwise AND operator. + >> Bitwise right shift operator. When applied to an + unsigned quantity, as here, right shift inserts zero bit(s) + at the left. + << Bitwise left shift operator. Left shift inserts zero + bit(s) at the right. + ++ "n++" increments the variable n. + % modulo operator: a % b is the remainder of a divided by b. + + #define BASE 65521 /* largest prime smaller than 65536 */ + + /* + Update a running Adler-32 checksum with the bytes buf[0..len-1] + and return the updated checksum. The Adler-32 checksum should be + initialized to 1. + + Usage example: + + unsigned long adler = 1L; + + while (read_buffer(buffer, length) != EOF) { + adler = update_adler32(adler, buffer, length); + } + if (adler != original_adler) error(); + */ + unsigned long update_adler32(unsigned long adler, + unsigned char *buf, int len) + { + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int n; + + for (n = 0; n < len; n++) { + s1 = (s1 + buf[n]) % BASE; + s2 = (s2 + s1) % BASE; + } + return (s2 << 16) + s1; + } + + /* Return the adler32 of the bytes buf[0..len-1] */ + + + + +Deutsch & Gailly Informational [Page 10] + +RFC 1950 ZLIB Compressed Data Format Specification May 1996 + + + unsigned long adler32(unsigned char *buf, int len) + { + return update_adler32(1L, buf, len); + } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Deutsch & Gailly Informational [Page 11] + diff --git a/src/deps/src/zlib/doc/rfc1951.txt b/src/deps/src/zlib/doc/rfc1951.txt new file mode 100644 index 000000000..403c8c722 --- /dev/null +++ b/src/deps/src/zlib/doc/rfc1951.txt @@ -0,0 +1,955 @@ + + + + + + +Network Working Group P. Deutsch +Request for Comments: 1951 Aladdin Enterprises +Category: Informational May 1996 + + + DEFLATE Compressed Data Format Specification version 1.3 + +Status of This Memo + + This memo provides information for the Internet community. This memo + does not specify an Internet standard of any kind. Distribution of + this memo is unlimited. + +IESG Note: + + The IESG takes no position on the validity of any Intellectual + Property Rights statements contained in this document. + +Notices + + Copyright (c) 1996 L. Peter Deutsch + + Permission is granted to copy and distribute this document for any + purpose and without charge, including translations into other + languages and incorporation into compilations, provided that the + copyright notice and this notice are preserved, and that any + substantive changes or deletions from the original are clearly + marked. + + A pointer to the latest version of this and related documentation in + HTML format can be found at the URL + . + +Abstract + + This specification defines a lossless compressed data format that + compresses data using a combination of the LZ77 algorithm and Huffman + coding, with efficiency comparable to the best currently available + general-purpose compression methods. The data can be produced or + consumed, even for an arbitrarily long sequentially presented input + data stream, using only an a priori bounded amount of intermediate + storage. The format can be implemented readily in a manner not + covered by patents. + + + + + + + + +Deutsch Informational [Page 1] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + +Table of Contents + + 1. Introduction ................................................... 2 + 1.1. Purpose ................................................... 2 + 1.2. Intended audience ......................................... 3 + 1.3. Scope ..................................................... 3 + 1.4. Compliance ................................................ 3 + 1.5. Definitions of terms and conventions used ................ 3 + 1.6. Changes from previous versions ............................ 4 + 2. Compressed representation overview ............................. 4 + 3. Detailed specification ......................................... 5 + 3.1. Overall conventions ....................................... 5 + 3.1.1. Packing into bytes .................................. 5 + 3.2. Compressed block format ................................... 6 + 3.2.1. Synopsis of prefix and Huffman coding ............... 6 + 3.2.2. Use of Huffman coding in the "deflate" format ....... 7 + 3.2.3. Details of block format ............................. 9 + 3.2.4. Non-compressed blocks (BTYPE=00) ................... 11 + 3.2.5. Compressed blocks (length and distance codes) ...... 11 + 3.2.6. Compression with fixed Huffman codes (BTYPE=01) .... 12 + 3.2.7. Compression with dynamic Huffman codes (BTYPE=10) .. 13 + 3.3. Compliance ............................................... 14 + 4. Compression algorithm details ................................. 14 + 5. References .................................................... 16 + 6. Security Considerations ....................................... 16 + 7. Source code ................................................... 16 + 8. Acknowledgements .............................................. 16 + 9. Author's Address .............................................. 17 + +1. Introduction + + 1.1. Purpose + + The purpose of this specification is to define a lossless + compressed data format that: + * Is independent of CPU type, operating system, file system, + and character set, and hence can be used for interchange; + * Can be produced or consumed, even for an arbitrarily long + sequentially presented input data stream, using only an a + priori bounded amount of intermediate storage, and hence + can be used in data communications or similar structures + such as Unix filters; + * Compresses data with efficiency comparable to the best + currently available general-purpose compression methods, + and in particular considerably better than the "compress" + program; + * Can be implemented readily in a manner not covered by + patents, and hence can be practiced freely; + + + +Deutsch Informational [Page 2] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + * Is compatible with the file format produced by the current + widely used gzip utility, in that conforming decompressors + will be able to read data produced by the existing gzip + compressor. + + The data format defined by this specification does not attempt to: + + * Allow random access to compressed data; + * Compress specialized data (e.g., raster graphics) as well + as the best currently available specialized algorithms. + + A simple counting argument shows that no lossless compression + algorithm can compress every possible input data set. For the + format defined here, the worst case expansion is 5 bytes per 32K- + byte block, i.e., a size increase of 0.015% for large data sets. + English text usually compresses by a factor of 2.5 to 3; + executable files usually compress somewhat less; graphical data + such as raster images may compress much more. + + 1.2. Intended audience + + This specification is intended for use by implementors of software + to compress data into "deflate" format and/or decompress data from + "deflate" format. + + The text of the specification assumes a basic background in + programming at the level of bits and other primitive data + representations. Familiarity with the technique of Huffman coding + is helpful but not required. + + 1.3. Scope + + The specification specifies a method for representing a sequence + of bytes as a (usually shorter) sequence of bits, and a method for + packing the latter bit sequence into bytes. + + 1.4. Compliance + + Unless otherwise indicated below, a compliant decompressor must be + able to accept and decompress any data set that conforms to all + the specifications presented here; a compliant compressor must + produce data sets that conform to all the specifications presented + here. + + 1.5. Definitions of terms and conventions used + + Byte: 8 bits stored or transmitted as a unit (same as an octet). + For this specification, a byte is exactly 8 bits, even on machines + + + +Deutsch Informational [Page 3] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + which store a character on a number of bits different from eight. + See below, for the numbering of bits within a byte. + + String: a sequence of arbitrary bytes. + + 1.6. Changes from previous versions + + There have been no technical changes to the deflate format since + version 1.1 of this specification. In version 1.2, some + terminology was changed. Version 1.3 is a conversion of the + specification to RFC style. + +2. Compressed representation overview + + A compressed data set consists of a series of blocks, corresponding + to successive blocks of input data. The block sizes are arbitrary, + except that non-compressible blocks are limited to 65,535 bytes. + + Each block is compressed using a combination of the LZ77 algorithm + and Huffman coding. The Huffman trees for each block are independent + of those for previous or subsequent blocks; the LZ77 algorithm may + use a reference to a duplicated string occurring in a previous block, + up to 32K input bytes before. + + Each block consists of two parts: a pair of Huffman code trees that + describe the representation of the compressed data part, and a + compressed data part. (The Huffman trees themselves are compressed + using Huffman encoding.) The compressed data consists of a series of + elements of two types: literal bytes (of strings that have not been + detected as duplicated within the previous 32K input bytes), and + pointers to duplicated strings, where a pointer is represented as a + pair . The representation used in the + "deflate" format limits distances to 32K bytes and lengths to 258 + bytes, but does not limit the size of a block, except for + uncompressible blocks, which are limited as noted above. + + Each type of value (literals, distances, and lengths) in the + compressed data is represented using a Huffman code, using one code + tree for literals and lengths and a separate code tree for distances. + The code trees for each block appear in a compact form just before + the compressed data for that block. + + + + + + + + + + +Deutsch Informational [Page 4] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + +3. Detailed specification + + 3.1. Overall conventions In the diagrams below, a box like this: + + +---+ + | | <-- the vertical bars might be missing + +---+ + + represents one byte; a box like this: + + +==============+ + | | + +==============+ + + represents a variable number of bytes. + + Bytes stored within a computer do not have a "bit order", since + they are always treated as a unit. However, a byte considered as + an integer between 0 and 255 does have a most- and least- + significant bit, and since we write numbers with the most- + significant digit on the left, we also write bytes with the most- + significant bit on the left. In the diagrams below, we number the + bits of a byte so that bit 0 is the least-significant bit, i.e., + the bits are numbered: + + +--------+ + |76543210| + +--------+ + + Within a computer, a number may occupy multiple bytes. All + multi-byte numbers in the format described here are stored with + the least-significant byte first (at the lower memory address). + For example, the decimal number 520 is stored as: + + 0 1 + +--------+--------+ + |00001000|00000010| + +--------+--------+ + ^ ^ + | | + | + more significant byte = 2 x 256 + + less significant byte = 8 + + 3.1.1. Packing into bytes + + This document does not address the issue of the order in which + bits of a byte are transmitted on a bit-sequential medium, + since the final data format described here is byte- rather than + + + +Deutsch Informational [Page 5] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + bit-oriented. However, we describe the compressed block format + in below, as a sequence of data elements of various bit + lengths, not a sequence of bytes. We must therefore specify + how to pack these data elements into bytes to form the final + compressed byte sequence: + + * Data elements are packed into bytes in order of + increasing bit number within the byte, i.e., starting + with the least-significant bit of the byte. + * Data elements other than Huffman codes are packed + starting with the least-significant bit of the data + element. + * Huffman codes are packed starting with the most- + significant bit of the code. + + In other words, if one were to print out the compressed data as + a sequence of bytes, starting with the first byte at the + *right* margin and proceeding to the *left*, with the most- + significant bit of each byte on the left as usual, one would be + able to parse the result from right to left, with fixed-width + elements in the correct MSB-to-LSB order and Huffman codes in + bit-reversed order (i.e., with the first bit of the code in the + relative LSB position). + + 3.2. Compressed block format + + 3.2.1. Synopsis of prefix and Huffman coding + + Prefix coding represents symbols from an a priori known + alphabet by bit sequences (codes), one code for each symbol, in + a manner such that different symbols may be represented by bit + sequences of different lengths, but a parser can always parse + an encoded string unambiguously symbol-by-symbol. + + We define a prefix code in terms of a binary tree in which the + two edges descending from each non-leaf node are labeled 0 and + 1 and in which the leaf nodes correspond one-for-one with (are + labeled with) the symbols of the alphabet; then the code for a + symbol is the sequence of 0's and 1's on the edges leading from + the root to the leaf labeled with that symbol. For example: + + + + + + + + + + + +Deutsch Informational [Page 6] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + /\ Symbol Code + 0 1 ------ ---- + / \ A 00 + /\ B B 1 + 0 1 C 011 + / \ D 010 + A /\ + 0 1 + / \ + D C + + A parser can decode the next symbol from an encoded input + stream by walking down the tree from the root, at each step + choosing the edge corresponding to the next input bit. + + Given an alphabet with known symbol frequencies, the Huffman + algorithm allows the construction of an optimal prefix code + (one which represents strings with those symbol frequencies + using the fewest bits of any possible prefix codes for that + alphabet). Such a code is called a Huffman code. (See + reference [1] in Chapter 5, references for additional + information on Huffman codes.) + + Note that in the "deflate" format, the Huffman codes for the + various alphabets must not exceed certain maximum code lengths. + This constraint complicates the algorithm for computing code + lengths from symbol frequencies. Again, see Chapter 5, + references for details. + + 3.2.2. Use of Huffman coding in the "deflate" format + + The Huffman codes used for each alphabet in the "deflate" + format have two additional rules: + + * All codes of a given bit length have lexicographically + consecutive values, in the same order as the symbols + they represent; + + * Shorter codes lexicographically precede longer codes. + + + + + + + + + + + + +Deutsch Informational [Page 7] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + We could recode the example above to follow this rule as + follows, assuming that the order of the alphabet is ABCD: + + Symbol Code + ------ ---- + A 10 + B 0 + C 110 + D 111 + + I.e., 0 precedes 10 which precedes 11x, and 110 and 111 are + lexicographically consecutive. + + Given this rule, we can define the Huffman code for an alphabet + just by giving the bit lengths of the codes for each symbol of + the alphabet in order; this is sufficient to determine the + actual codes. In our example, the code is completely defined + by the sequence of bit lengths (2, 1, 3, 3). The following + algorithm generates the codes as integers, intended to be read + from most- to least-significant bit. The code lengths are + initially in tree[I].Len; the codes are produced in + tree[I].Code. + + 1) Count the number of codes for each code length. Let + bl_count[N] be the number of codes of length N, N >= 1. + + 2) Find the numerical value of the smallest code for each + code length: + + code = 0; + bl_count[0] = 0; + for (bits = 1; bits <= MAX_BITS; bits++) { + code = (code + bl_count[bits-1]) << 1; + next_code[bits] = code; + } + + 3) Assign numerical values to all codes, using consecutive + values for all codes of the same length with the base + values determined at step 2. Codes that are never used + (which have a bit length of zero) must not be assigned a + value. + + for (n = 0; n <= max_code; n++) { + len = tree[n].Len; + if (len != 0) { + tree[n].Code = next_code[len]; + next_code[len]++; + } + + + +Deutsch Informational [Page 8] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + } + + Example: + + Consider the alphabet ABCDEFGH, with bit lengths (3, 3, 3, 3, + 3, 2, 4, 4). After step 1, we have: + + N bl_count[N] + - ----------- + 2 1 + 3 5 + 4 2 + + Step 2 computes the following next_code values: + + N next_code[N] + - ------------ + 1 0 + 2 0 + 3 2 + 4 14 + + Step 3 produces the following code values: + + Symbol Length Code + ------ ------ ---- + A 3 010 + B 3 011 + C 3 100 + D 3 101 + E 3 110 + F 2 00 + G 4 1110 + H 4 1111 + + 3.2.3. Details of block format + + Each block of compressed data begins with 3 header bits + containing the following data: + + first bit BFINAL + next 2 bits BTYPE + + Note that the header bits do not necessarily begin on a byte + boundary, since a block does not necessarily occupy an integral + number of bytes. + + + + + +Deutsch Informational [Page 9] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + BFINAL is set if and only if this is the last block of the data + set. + + BTYPE specifies how the data are compressed, as follows: + + 00 - no compression + 01 - compressed with fixed Huffman codes + 10 - compressed with dynamic Huffman codes + 11 - reserved (error) + + The only difference between the two compressed cases is how the + Huffman codes for the literal/length and distance alphabets are + defined. + + In all cases, the decoding algorithm for the actual data is as + follows: + + do + read block header from input stream. + if stored with no compression + skip any remaining bits in current partially + processed byte + read LEN and NLEN (see next section) + copy LEN bytes of data to output + otherwise + if compressed with dynamic Huffman codes + read representation of code trees (see + subsection below) + loop (until end of block code recognized) + decode literal/length value from input stream + if value < 256 + copy value (literal byte) to output stream + otherwise + if value = end of block (256) + break from loop + otherwise (value = 257..285) + decode distance from input stream + + move backwards distance bytes in the output + stream, and copy length bytes from this + position to the output stream. + end loop + while not last block + + Note that a duplicated string reference may refer to a string + in a previous block; i.e., the backward distance may cross one + or more block boundaries. However a distance cannot refer past + the beginning of the output stream. (An application using a + + + +Deutsch Informational [Page 10] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + preset dictionary might discard part of the output stream; a + distance can refer to that part of the output stream anyway) + Note also that the referenced string may overlap the current + position; for example, if the last 2 bytes decoded have values + X and Y, a string reference with + adds X,Y,X,Y,X to the output stream. + + We now specify each compression method in turn. + + 3.2.4. Non-compressed blocks (BTYPE=00) + + Any bits of input up to the next byte boundary are ignored. + The rest of the block consists of the following information: + + 0 1 2 3 4... + +---+---+---+---+================================+ + | LEN | NLEN |... LEN bytes of literal data...| + +---+---+---+---+================================+ + + LEN is the number of data bytes in the block. NLEN is the + one's complement of LEN. + + 3.2.5. Compressed blocks (length and distance codes) + + As noted above, encoded data blocks in the "deflate" format + consist of sequences of symbols drawn from three conceptually + distinct alphabets: either literal bytes, from the alphabet of + byte values (0..255), or pairs, + where the length is drawn from (3..258) and the distance is + drawn from (1..32,768). In fact, the literal and length + alphabets are merged into a single alphabet (0..285), where + values 0..255 represent literal bytes, the value 256 indicates + end-of-block, and values 257..285 represent length codes + (possibly in conjunction with extra bits following the symbol + code) as follows: + + + + + + + + + + + + + + + + +Deutsch Informational [Page 11] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + Extra Extra Extra + Code Bits Length(s) Code Bits Lengths Code Bits Length(s) + ---- ---- ------ ---- ---- ------- ---- ---- ------- + 257 0 3 267 1 15,16 277 4 67-82 + 258 0 4 268 1 17,18 278 4 83-98 + 259 0 5 269 2 19-22 279 4 99-114 + 260 0 6 270 2 23-26 280 4 115-130 + 261 0 7 271 2 27-30 281 5 131-162 + 262 0 8 272 2 31-34 282 5 163-194 + 263 0 9 273 3 35-42 283 5 195-226 + 264 0 10 274 3 43-50 284 5 227-257 + 265 1 11,12 275 3 51-58 285 0 258 + 266 1 13,14 276 3 59-66 + + The extra bits should be interpreted as a machine integer + stored with the most-significant bit first, e.g., bits 1110 + represent the value 14. + + Extra Extra Extra + Code Bits Dist Code Bits Dist Code Bits Distance + ---- ---- ---- ---- ---- ------ ---- ---- -------- + 0 0 1 10 4 33-48 20 9 1025-1536 + 1 0 2 11 4 49-64 21 9 1537-2048 + 2 0 3 12 5 65-96 22 10 2049-3072 + 3 0 4 13 5 97-128 23 10 3073-4096 + 4 1 5,6 14 6 129-192 24 11 4097-6144 + 5 1 7,8 15 6 193-256 25 11 6145-8192 + 6 2 9-12 16 7 257-384 26 12 8193-12288 + 7 2 13-16 17 7 385-512 27 12 12289-16384 + 8 3 17-24 18 8 513-768 28 13 16385-24576 + 9 3 25-32 19 8 769-1024 29 13 24577-32768 + + 3.2.6. Compression with fixed Huffman codes (BTYPE=01) + + The Huffman codes for the two alphabets are fixed, and are not + represented explicitly in the data. The Huffman code lengths + for the literal/length alphabet are: + + Lit Value Bits Codes + --------- ---- ----- + 0 - 143 8 00110000 through + 10111111 + 144 - 255 9 110010000 through + 111111111 + 256 - 279 7 0000000 through + 0010111 + 280 - 287 8 11000000 through + 11000111 + + + +Deutsch Informational [Page 12] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + The code lengths are sufficient to generate the actual codes, + as described above; we show the codes in the table for added + clarity. Literal/length values 286-287 will never actually + occur in the compressed data, but participate in the code + construction. + + Distance codes 0-31 are represented by (fixed-length) 5-bit + codes, with possible additional bits as shown in the table + shown in Paragraph 3.2.5, above. Note that distance codes 30- + 31 will never actually occur in the compressed data. + + 3.2.7. Compression with dynamic Huffman codes (BTYPE=10) + + The Huffman codes for the two alphabets appear in the block + immediately after the header bits and before the actual + compressed data, first the literal/length code and then the + distance code. Each code is defined by a sequence of code + lengths, as discussed in Paragraph 3.2.2, above. For even + greater compactness, the code length sequences themselves are + compressed using a Huffman code. The alphabet for code lengths + is as follows: + + 0 - 15: Represent code lengths of 0 - 15 + 16: Copy the previous code length 3 - 6 times. + The next 2 bits indicate repeat length + (0 = 3, ... , 3 = 6) + Example: Codes 8, 16 (+2 bits 11), + 16 (+2 bits 10) will expand to + 12 code lengths of 8 (1 + 6 + 5) + 17: Repeat a code length of 0 for 3 - 10 times. + (3 bits of length) + 18: Repeat a code length of 0 for 11 - 138 times + (7 bits of length) + + A code length of 0 indicates that the corresponding symbol in + the literal/length or distance alphabet will not occur in the + block, and should not participate in the Huffman code + construction algorithm given earlier. If only one distance + code is used, it is encoded using one bit, not zero bits; in + this case there is a single code length of one, with one unused + code. One distance code of zero bits means that there are no + distance codes used at all (the data is all literals). + + We can now define the format of the block: + + 5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286) + 5 Bits: HDIST, # of Distance codes - 1 (1 - 32) + 4 Bits: HCLEN, # of Code Length codes - 4 (4 - 19) + + + +Deutsch Informational [Page 13] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + (HCLEN + 4) x 3 bits: code lengths for the code length + alphabet given just above, in the order: 16, 17, 18, + 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 + + These code lengths are interpreted as 3-bit integers + (0-7); as above, a code length of 0 means the + corresponding symbol (literal/length or distance code + length) is not used. + + HLIT + 257 code lengths for the literal/length alphabet, + encoded using the code length Huffman code + + HDIST + 1 code lengths for the distance alphabet, + encoded using the code length Huffman code + + The actual compressed data of the block, + encoded using the literal/length and distance Huffman + codes + + The literal/length symbol 256 (end of data), + encoded using the literal/length Huffman code + + The code length repeat codes can cross from HLIT + 257 to the + HDIST + 1 code lengths. In other words, all code lengths form + a single sequence of HLIT + HDIST + 258 values. + + 3.3. Compliance + + A compressor may limit further the ranges of values specified in + the previous section and still be compliant; for example, it may + limit the range of backward pointers to some value smaller than + 32K. Similarly, a compressor may limit the size of blocks so that + a compressible block fits in memory. + + A compliant decompressor must accept the full range of possible + values defined in the previous section, and must accept blocks of + arbitrary size. + +4. Compression algorithm details + + While it is the intent of this document to define the "deflate" + compressed data format without reference to any particular + compression algorithm, the format is related to the compressed + formats produced by LZ77 (Lempel-Ziv 1977, see reference [2] below); + since many variations of LZ77 are patented, it is strongly + recommended that the implementor of a compressor follow the general + algorithm presented here, which is known not to be patented per se. + The material in this section is not part of the definition of the + + + +Deutsch Informational [Page 14] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + + specification per se, and a compressor need not follow it in order to + be compliant. + + The compressor terminates a block when it determines that starting a + new block with fresh trees would be useful, or when the block size + fills up the compressor's block buffer. + + The compressor uses a chained hash table to find duplicated strings, + using a hash function that operates on 3-byte sequences. At any + given point during compression, let XYZ be the next 3 input bytes to + be examined (not necessarily all different, of course). First, the + compressor examines the hash chain for XYZ. If the chain is empty, + the compressor simply writes out X as a literal byte and advances one + byte in the input. If the hash chain is not empty, indicating that + the sequence XYZ (or, if we are unlucky, some other 3 bytes with the + same hash function value) has occurred recently, the compressor + compares all strings on the XYZ hash chain with the actual input data + sequence starting at the current point, and selects the longest + match. + + The compressor searches the hash chains starting with the most recent + strings, to favor small distances and thus take advantage of the + Huffman encoding. The hash chains are singly linked. There are no + deletions from the hash chains; the algorithm simply discards matches + that are too old. To avoid a worst-case situation, very long hash + chains are arbitrarily truncated at a certain length, determined by a + run-time parameter. + + To improve overall compression, the compressor optionally defers the + selection of matches ("lazy matching"): after a match of length N has + been found, the compressor searches for a longer match starting at + the next input byte. If it finds a longer match, it truncates the + previous match to a length of one (thus producing a single literal + byte) and then emits the longer match. Otherwise, it emits the + original match, and, as described above, advances N bytes before + continuing. + + Run-time parameters also control this "lazy match" procedure. If + compression ratio is most important, the compressor attempts a + complete second search regardless of the length of the first match. + In the normal case, if the current match is "long enough", the + compressor reduces the search for a longer match, thus speeding up + the process. If speed is most important, the compressor inserts new + strings in the hash table only when no match was found, or when the + match is not "too long". This degrades the compression ratio but + saves time since there are both fewer insertions and fewer searches. + + + + + +Deutsch Informational [Page 15] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + +5. References + + [1] Huffman, D. A., "A Method for the Construction of Minimum + Redundancy Codes", Proceedings of the Institute of Radio + Engineers, September 1952, Volume 40, Number 9, pp. 1098-1101. + + [2] Ziv J., Lempel A., "A Universal Algorithm for Sequential Data + Compression", IEEE Transactions on Information Theory, Vol. 23, + No. 3, pp. 337-343. + + [3] Gailly, J.-L., and Adler, M., ZLIB documentation and sources, + available in ftp://ftp.uu.net/pub/archiving/zip/doc/ + + [4] Gailly, J.-L., and Adler, M., GZIP documentation and sources, + available as gzip-*.tar in ftp://prep.ai.mit.edu/pub/gnu/ + + [5] Schwartz, E. S., and Kallick, B. "Generating a canonical prefix + encoding." Comm. ACM, 7,3 (Mar. 1964), pp. 166-169. + + [6] Hirschberg and Lelewer, "Efficient decoding of prefix codes," + Comm. ACM, 33,4, April 1990, pp. 449-459. + +6. Security Considerations + + Any data compression method involves the reduction of redundancy in + the data. Consequently, any corruption of the data is likely to have + severe effects and be difficult to correct. Uncompressed text, on + the other hand, will probably still be readable despite the presence + of some corrupted bytes. + + It is recommended that systems using this data format provide some + means of validating the integrity of the compressed data. See + reference [3], for example. + +7. Source code + + Source code for a C language implementation of a "deflate" compliant + compressor and decompressor is available within the zlib package at + ftp://ftp.uu.net/pub/archiving/zip/zlib/. + +8. Acknowledgements + + Trademarks cited in this document are the property of their + respective owners. + + Phil Katz designed the deflate format. Jean-Loup Gailly and Mark + Adler wrote the related software described in this specification. + Glenn Randers-Pehrson converted this document to RFC and HTML format. + + + +Deutsch Informational [Page 16] + +RFC 1951 DEFLATE Compressed Data Format Specification May 1996 + + +9. Author's Address + + L. Peter Deutsch + Aladdin Enterprises + 203 Santa Margarita Ave. + Menlo Park, CA 94025 + + Phone: (415) 322-0103 (AM only) + FAX: (415) 322-1734 + EMail: + + Questions about the technical content of this specification can be + sent by email to: + + Jean-Loup Gailly and + Mark Adler + + Editorial comments on this specification can be sent by email to: + + L. Peter Deutsch and + Glenn Randers-Pehrson + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Deutsch Informational [Page 17] + diff --git a/src/deps/src/zlib/doc/rfc1952.txt b/src/deps/src/zlib/doc/rfc1952.txt new file mode 100644 index 000000000..a8e51b456 --- /dev/null +++ b/src/deps/src/zlib/doc/rfc1952.txt @@ -0,0 +1,675 @@ + + + + + + +Network Working Group P. Deutsch +Request for Comments: 1952 Aladdin Enterprises +Category: Informational May 1996 + + + GZIP file format specification version 4.3 + +Status of This Memo + + This memo provides information for the Internet community. This memo + does not specify an Internet standard of any kind. Distribution of + this memo is unlimited. + +IESG Note: + + The IESG takes no position on the validity of any Intellectual + Property Rights statements contained in this document. + +Notices + + Copyright (c) 1996 L. Peter Deutsch + + Permission is granted to copy and distribute this document for any + purpose and without charge, including translations into other + languages and incorporation into compilations, provided that the + copyright notice and this notice are preserved, and that any + substantive changes or deletions from the original are clearly + marked. + + A pointer to the latest version of this and related documentation in + HTML format can be found at the URL + . + +Abstract + + This specification defines a lossless compressed data format that is + compatible with the widely used GZIP utility. The format includes a + cyclic redundancy check value for detecting data corruption. The + format presently uses the DEFLATE method of compression but can be + easily extended to use other compression methods. The format can be + implemented readily in a manner not covered by patents. + + + + + + + + + + +Deutsch Informational [Page 1] + +RFC 1952 GZIP File Format Specification May 1996 + + +Table of Contents + + 1. Introduction ................................................... 2 + 1.1. Purpose ................................................... 2 + 1.2. Intended audience ......................................... 3 + 1.3. Scope ..................................................... 3 + 1.4. Compliance ................................................ 3 + 1.5. Definitions of terms and conventions used ................. 3 + 1.6. Changes from previous versions ............................ 3 + 2. Detailed specification ......................................... 4 + 2.1. Overall conventions ....................................... 4 + 2.2. File format ............................................... 5 + 2.3. Member format ............................................. 5 + 2.3.1. Member header and trailer ........................... 6 + 2.3.1.1. Extra field ................................... 8 + 2.3.1.2. Compliance .................................... 9 + 3. References .................................................. 9 + 4. Security Considerations .................................... 10 + 5. Acknowledgements ........................................... 10 + 6. Author's Address ........................................... 10 + 7. Appendix: Jean-Loup Gailly's gzip utility .................. 11 + 8. Appendix: Sample CRC Code .................................. 11 + +1. Introduction + + 1.1. Purpose + + The purpose of this specification is to define a lossless + compressed data format that: + + * Is independent of CPU type, operating system, file system, + and character set, and hence can be used for interchange; + * Can compress or decompress a data stream (as opposed to a + randomly accessible file) to produce another data stream, + using only an a priori bounded amount of intermediate + storage, and hence can be used in data communications or + similar structures such as Unix filters; + * Compresses data with efficiency comparable to the best + currently available general-purpose compression methods, + and in particular considerably better than the "compress" + program; + * Can be implemented readily in a manner not covered by + patents, and hence can be practiced freely; + * Is compatible with the file format produced by the current + widely used gzip utility, in that conforming decompressors + will be able to read data produced by the existing gzip + compressor. + + + + +Deutsch Informational [Page 2] + +RFC 1952 GZIP File Format Specification May 1996 + + + The data format defined by this specification does not attempt to: + + * Provide random access to compressed data; + * Compress specialized data (e.g., raster graphics) as well as + the best currently available specialized algorithms. + + 1.2. Intended audience + + This specification is intended for use by implementors of software + to compress data into gzip format and/or decompress data from gzip + format. + + The text of the specification assumes a basic background in + programming at the level of bits and other primitive data + representations. + + 1.3. Scope + + The specification specifies a compression method and a file format + (the latter assuming only that a file can store a sequence of + arbitrary bytes). It does not specify any particular interface to + a file system or anything about character sets or encodings + (except for file names and comments, which are optional). + + 1.4. Compliance + + Unless otherwise indicated below, a compliant decompressor must be + able to accept and decompress any file that conforms to all the + specifications presented here; a compliant compressor must produce + files that conform to all the specifications presented here. The + material in the appendices is not part of the specification per se + and is not relevant to compliance. + + 1.5. Definitions of terms and conventions used + + byte: 8 bits stored or transmitted as a unit (same as an octet). + (For this specification, a byte is exactly 8 bits, even on + machines which store a character on a number of bits different + from 8.) See below for the numbering of bits within a byte. + + 1.6. Changes from previous versions + + There have been no technical changes to the gzip format since + version 4.1 of this specification. In version 4.2, some + terminology was changed, and the sample CRC code was rewritten for + clarity and to eliminate the requirement for the caller to do pre- + and post-conditioning. Version 4.3 is a conversion of the + specification to RFC style. + + + +Deutsch Informational [Page 3] + +RFC 1952 GZIP File Format Specification May 1996 + + +2. Detailed specification + + 2.1. Overall conventions + + In the diagrams below, a box like this: + + +---+ + | | <-- the vertical bars might be missing + +---+ + + represents one byte; a box like this: + + +==============+ + | | + +==============+ + + represents a variable number of bytes. + + Bytes stored within a computer do not have a "bit order", since + they are always treated as a unit. However, a byte considered as + an integer between 0 and 255 does have a most- and least- + significant bit, and since we write numbers with the most- + significant digit on the left, we also write bytes with the most- + significant bit on the left. In the diagrams below, we number the + bits of a byte so that bit 0 is the least-significant bit, i.e., + the bits are numbered: + + +--------+ + |76543210| + +--------+ + + This document does not address the issue of the order in which + bits of a byte are transmitted on a bit-sequential medium, since + the data format described here is byte- rather than bit-oriented. + + Within a computer, a number may occupy multiple bytes. All + multi-byte numbers in the format described here are stored with + the least-significant byte first (at the lower memory address). + For example, the decimal number 520 is stored as: + + 0 1 + +--------+--------+ + |00001000|00000010| + +--------+--------+ + ^ ^ + | | + | + more significant byte = 2 x 256 + + less significant byte = 8 + + + +Deutsch Informational [Page 4] + +RFC 1952 GZIP File Format Specification May 1996 + + + 2.2. File format + + A gzip file consists of a series of "members" (compressed data + sets). The format of each member is specified in the following + section. The members simply appear one after another in the file, + with no additional information before, between, or after them. + + 2.3. Member format + + Each member has the following structure: + + +---+---+---+---+---+---+---+---+---+---+ + |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->) + +---+---+---+---+---+---+---+---+---+---+ + + (if FLG.FEXTRA set) + + +---+---+=================================+ + | XLEN |...XLEN bytes of "extra field"...| (more-->) + +---+---+=================================+ + + (if FLG.FNAME set) + + +=========================================+ + |...original file name, zero-terminated...| (more-->) + +=========================================+ + + (if FLG.FCOMMENT set) + + +===================================+ + |...file comment, zero-terminated...| (more-->) + +===================================+ + + (if FLG.FHCRC set) + + +---+---+ + | CRC16 | + +---+---+ + + +=======================+ + |...compressed blocks...| (more-->) + +=======================+ + + 0 1 2 3 4 5 6 7 + +---+---+---+---+---+---+---+---+ + | CRC32 | ISIZE | + +---+---+---+---+---+---+---+---+ + + + + +Deutsch Informational [Page 5] + +RFC 1952 GZIP File Format Specification May 1996 + + + 2.3.1. Member header and trailer + + ID1 (IDentification 1) + ID2 (IDentification 2) + These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139 + (0x8b, \213), to identify the file as being in gzip format. + + CM (Compression Method) + This identifies the compression method used in the file. CM + = 0-7 are reserved. CM = 8 denotes the "deflate" + compression method, which is the one customarily used by + gzip and which is documented elsewhere. + + FLG (FLaGs) + This flag byte is divided into individual bits as follows: + + bit 0 FTEXT + bit 1 FHCRC + bit 2 FEXTRA + bit 3 FNAME + bit 4 FCOMMENT + bit 5 reserved + bit 6 reserved + bit 7 reserved + + If FTEXT is set, the file is probably ASCII text. This is + an optional indication, which the compressor may set by + checking a small amount of the input data to see whether any + non-ASCII characters are present. In case of doubt, FTEXT + is cleared, indicating binary data. For systems which have + different file formats for ascii text and binary data, the + decompressor can use FTEXT to choose the appropriate format. + We deliberately do not specify the algorithm used to set + this bit, since a compressor always has the option of + leaving it cleared and a decompressor always has the option + of ignoring it and letting some other program handle issues + of data conversion. + + If FHCRC is set, a CRC16 for the gzip header is present, + immediately before the compressed data. The CRC16 consists + of the two least significant bytes of the CRC32 for all + bytes of the gzip header up to and not including the CRC16. + [The FHCRC bit was never set by versions of gzip up to + 1.2.4, even though it was documented with a different + meaning in gzip 1.2.4.] + + If FEXTRA is set, optional extra fields are present, as + described in a following section. + + + +Deutsch Informational [Page 6] + +RFC 1952 GZIP File Format Specification May 1996 + + + If FNAME is set, an original file name is present, + terminated by a zero byte. The name must consist of ISO + 8859-1 (LATIN-1) characters; on operating systems using + EBCDIC or any other character set for file names, the name + must be translated to the ISO LATIN-1 character set. This + is the original name of the file being compressed, with any + directory components removed, and, if the file being + compressed is on a file system with case insensitive names, + forced to lower case. There is no original file name if the + data was compressed from a source other than a named file; + for example, if the source was stdin on a Unix system, there + is no file name. + + If FCOMMENT is set, a zero-terminated file comment is + present. This comment is not interpreted; it is only + intended for human consumption. The comment must consist of + ISO 8859-1 (LATIN-1) characters. Line breaks should be + denoted by a single line feed character (10 decimal). + + Reserved FLG bits must be zero. + + MTIME (Modification TIME) + This gives the most recent modification time of the original + file being compressed. The time is in Unix format, i.e., + seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this + may cause problems for MS-DOS and other systems that use + local rather than Universal time.) If the compressed data + did not come from a file, MTIME is set to the time at which + compression started. MTIME = 0 means no time stamp is + available. + + XFL (eXtra FLags) + These flags are available for use by specific compression + methods. The "deflate" method (CM = 8) sets these flags as + follows: + + XFL = 2 - compressor used maximum compression, + slowest algorithm + XFL = 4 - compressor used fastest algorithm + + OS (Operating System) + This identifies the type of file system on which compression + took place. This may be useful in determining end-of-line + convention for text files. The currently defined values are + as follows: + + + + + + +Deutsch Informational [Page 7] + +RFC 1952 GZIP File Format Specification May 1996 + + + 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32) + 1 - Amiga + 2 - VMS (or OpenVMS) + 3 - Unix + 4 - VM/CMS + 5 - Atari TOS + 6 - HPFS filesystem (OS/2, NT) + 7 - Macintosh + 8 - Z-System + 9 - CP/M + 10 - TOPS-20 + 11 - NTFS filesystem (NT) + 12 - QDOS + 13 - Acorn RISCOS + 255 - unknown + + XLEN (eXtra LENgth) + If FLG.FEXTRA is set, this gives the length of the optional + extra field. See below for details. + + CRC32 (CRC-32) + This contains a Cyclic Redundancy Check value of the + uncompressed data computed according to CRC-32 algorithm + used in the ISO 3309 standard and in section 8.1.1.6.2 of + ITU-T recommendation V.42. (See http://www.iso.ch for + ordering ISO documents. See gopher://info.itu.ch for an + online version of ITU-T V.42.) + + ISIZE (Input SIZE) + This contains the size of the original (uncompressed) input + data modulo 2^32. + + 2.3.1.1. Extra field + + If the FLG.FEXTRA bit is set, an "extra field" is present in + the header, with total length XLEN bytes. It consists of a + series of subfields, each of the form: + + +---+---+---+---+==================================+ + |SI1|SI2| LEN |... LEN bytes of subfield data ...| + +---+---+---+---+==================================+ + + SI1 and SI2 provide a subfield ID, typically two ASCII letters + with some mnemonic value. Jean-Loup Gailly + is maintaining a registry of subfield + IDs; please send him any subfield ID you wish to use. Subfield + IDs with SI2 = 0 are reserved for future use. The following + IDs are currently defined: + + + +Deutsch Informational [Page 8] + +RFC 1952 GZIP File Format Specification May 1996 + + + SI1 SI2 Data + ---------- ---------- ---- + 0x41 ('A') 0x70 ('P') Apollo file type information + + LEN gives the length of the subfield data, excluding the 4 + initial bytes. + + 2.3.1.2. Compliance + + A compliant compressor must produce files with correct ID1, + ID2, CM, CRC32, and ISIZE, but may set all the other fields in + the fixed-length part of the header to default values (255 for + OS, 0 for all others). The compressor must set all reserved + bits to zero. + + A compliant decompressor must check ID1, ID2, and CM, and + provide an error indication if any of these have incorrect + values. It must examine FEXTRA/XLEN, FNAME, FCOMMENT and FHCRC + at least so it can skip over the optional fields if they are + present. It need not examine any other part of the header or + trailer; in particular, a decompressor may ignore FTEXT and OS + and always produce binary output, and still be compliant. A + compliant decompressor must give an error indication if any + reserved bit is non-zero, since such a bit could indicate the + presence of a new field that would cause subsequent data to be + interpreted incorrectly. + +3. References + + [1] "Information Processing - 8-bit single-byte coded graphic + character sets - Part 1: Latin alphabet No.1" (ISO 8859-1:1987). + The ISO 8859-1 (Latin-1) character set is a superset of 7-bit + ASCII. Files defining this character set are available as + iso_8859-1.* in ftp://ftp.uu.net/graphics/png/documents/ + + [2] ISO 3309 + + [3] ITU-T recommendation V.42 + + [4] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification", + available in ftp://ftp.uu.net/pub/archiving/zip/doc/ + + [5] Gailly, J.-L., GZIP documentation, available as gzip-*.tar in + ftp://prep.ai.mit.edu/pub/gnu/ + + [6] Sarwate, D.V., "Computation of Cyclic Redundancy Checks via Table + Look-Up", Communications of the ACM, 31(8), pp.1008-1013. + + + + +Deutsch Informational [Page 9] + +RFC 1952 GZIP File Format Specification May 1996 + + + [7] Schwaderer, W.D., "CRC Calculation", April 85 PC Tech Journal, + pp.118-133. + + [8] ftp://ftp.adelaide.edu.au/pub/rocksoft/papers/crc_v3.txt, + describing the CRC concept. + +4. Security Considerations + + Any data compression method involves the reduction of redundancy in + the data. Consequently, any corruption of the data is likely to have + severe effects and be difficult to correct. Uncompressed text, on + the other hand, will probably still be readable despite the presence + of some corrupted bytes. + + It is recommended that systems using this data format provide some + means of validating the integrity of the compressed data, such as by + setting and checking the CRC-32 check value. + +5. Acknowledgements + + Trademarks cited in this document are the property of their + respective owners. + + Jean-Loup Gailly designed the gzip format and wrote, with Mark Adler, + the related software described in this specification. Glenn + Randers-Pehrson converted this document to RFC and HTML format. + +6. Author's Address + + L. Peter Deutsch + Aladdin Enterprises + 203 Santa Margarita Ave. + Menlo Park, CA 94025 + + Phone: (415) 322-0103 (AM only) + FAX: (415) 322-1734 + EMail: + + Questions about the technical content of this specification can be + sent by email to: + + Jean-Loup Gailly and + Mark Adler + + Editorial comments on this specification can be sent by email to: + + L. Peter Deutsch and + Glenn Randers-Pehrson + + + +Deutsch Informational [Page 10] + +RFC 1952 GZIP File Format Specification May 1996 + + +7. Appendix: Jean-Loup Gailly's gzip utility + + The most widely used implementation of gzip compression, and the + original documentation on which this specification is based, were + created by Jean-Loup Gailly . Since this + implementation is a de facto standard, we mention some more of its + features here. Again, the material in this section is not part of + the specification per se, and implementations need not follow it to + be compliant. + + When compressing or decompressing a file, gzip preserves the + protection, ownership, and modification time attributes on the local + file system, since there is no provision for representing protection + attributes in the gzip file format itself. Since the file format + includes a modification time, the gzip decompressor provides a + command line switch that assigns the modification time from the file, + rather than the local modification time of the compressed input, to + the decompressed output. + +8. Appendix: Sample CRC Code + + The following sample code represents a practical implementation of + the CRC (Cyclic Redundancy Check). (See also ISO 3309 and ITU-T V.42 + for a formal specification.) + + The sample code is in the ANSI C programming language. Non C users + may find it easier to read with these hints: + + & Bitwise AND operator. + ^ Bitwise exclusive-OR operator. + >> Bitwise right shift operator. When applied to an + unsigned quantity, as here, right shift inserts zero + bit(s) at the left. + ! Logical NOT operator. + ++ "n++" increments the variable n. + 0xNNN 0x introduces a hexadecimal (base 16) constant. + Suffix L indicates a long value (at least 32 bits). + + /* Table of CRCs of all 8-bit messages. */ + unsigned long crc_table[256]; + + /* Flag: has the table been computed? Initially false. */ + int crc_table_computed = 0; + + /* Make the table for a fast CRC. */ + void make_crc_table(void) + { + unsigned long c; + + + +Deutsch Informational [Page 11] + +RFC 1952 GZIP File Format Specification May 1996 + + + int n, k; + for (n = 0; n < 256; n++) { + c = (unsigned long) n; + for (k = 0; k < 8; k++) { + if (c & 1) { + c = 0xedb88320L ^ (c >> 1); + } else { + c = c >> 1; + } + } + crc_table[n] = c; + } + crc_table_computed = 1; + } + + /* + Update a running crc with the bytes buf[0..len-1] and return + the updated crc. The crc should be initialized to zero. Pre- and + post-conditioning (one's complement) is performed within this + function so it shouldn't be done by the caller. Usage example: + + unsigned long crc = 0L; + + while (read_buffer(buffer, length) != EOF) { + crc = update_crc(crc, buffer, length); + } + if (crc != original_crc) error(); + */ + unsigned long update_crc(unsigned long crc, + unsigned char *buf, int len) + { + unsigned long c = crc ^ 0xffffffffL; + int n; + + if (!crc_table_computed) + make_crc_table(); + for (n = 0; n < len; n++) { + c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8); + } + return c ^ 0xffffffffL; + } + + /* Return the CRC of the bytes buf[0..len-1]. */ + unsigned long crc(unsigned char *buf, int len) + { + return update_crc(0L, buf, len); + } + + + + +Deutsch Informational [Page 12] + diff --git a/src/deps/src/zlib/doc/txtvsbin.txt b/src/deps/src/zlib/doc/txtvsbin.txt new file mode 100644 index 000000000..2a901eaa6 --- /dev/null +++ b/src/deps/src/zlib/doc/txtvsbin.txt @@ -0,0 +1,107 @@ +A Fast Method for Identifying Plain Text Files +============================================== + + +Introduction +------------ + +Given a file coming from an unknown source, it is sometimes desirable +to find out whether the format of that file is plain text. Although +this may appear like a simple task, a fully accurate detection of the +file type requires heavy-duty semantic analysis on the file contents. +It is, however, possible to obtain satisfactory results by employing +various heuristics. + +Previous versions of PKZip and other zip-compatible compression tools +were using a crude detection scheme: if more than 80% (4/5) of the bytes +found in a certain buffer are within the range [7..127], the file is +labeled as plain text, otherwise it is labeled as binary. A prominent +limitation of this scheme is the restriction to Latin-based alphabets. +Other alphabets, like Greek, Cyrillic or Asian, make extensive use of +the bytes within the range [128..255], and texts using these alphabets +are most often misidentified by this scheme; in other words, the rate +of false negatives is sometimes too high, which means that the recall +is low. Another weakness of this scheme is a reduced precision, due to +the false positives that may occur when binary files containing large +amounts of textual characters are misidentified as plain text. + +In this article we propose a new, simple detection scheme that features +a much increased precision and a near-100% recall. This scheme is +designed to work on ASCII, Unicode and other ASCII-derived alphabets, +and it handles single-byte encodings (ISO-8859, MacRoman, KOI8, etc.) +and variable-sized encodings (ISO-2022, UTF-8, etc.). Wider encodings +(UCS-2/UTF-16 and UCS-4/UTF-32) are not handled, however. + + +The Algorithm +------------- + +The algorithm works by dividing the set of bytecodes [0..255] into three +categories: +- The allow list of textual bytecodes: + 9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255. +- The gray list of tolerated bytecodes: + 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC). +- The block list of undesired, non-textual bytecodes: + 0 (NUL) to 6, 14 to 31. + +If a file contains at least one byte that belongs to the allow list and +no byte that belongs to the block list, then the file is categorized as +plain text; otherwise, it is categorized as binary. (The boundary case, +when the file is empty, automatically falls into the latter category.) + + +Rationale +--------- + +The idea behind this algorithm relies on two observations. + +The first observation is that, although the full range of 7-bit codes +[0..127] is properly specified by the ASCII standard, most control +characters in the range [0..31] are not used in practice. The only +widely-used, almost universally-portable control codes are 9 (TAB), +10 (LF) and 13 (CR). There are a few more control codes that are +recognized on a reduced range of platforms and text viewers/editors: +7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB) and 27 (ESC); but these +codes are rarely (if ever) used alone, without being accompanied by +some printable text. Even the newer, portable text formats such as +XML avoid using control characters outside the list mentioned here. + +The second observation is that most of the binary files tend to contain +control characters, especially 0 (NUL). Even though the older text +detection schemes observe the presence of non-ASCII codes from the range +[128..255], the precision rarely has to suffer if this upper range is +labeled as textual, because the files that are genuinely binary tend to +contain both control characters and codes from the upper range. On the +other hand, the upper range needs to be labeled as textual, because it +is used by virtually all ASCII extensions. In particular, this range is +used for encoding non-Latin scripts. + +Since there is no counting involved, other than simply observing the +presence or the absence of some byte values, the algorithm produces +consistent results, regardless what alphabet encoding is being used. +(If counting were involved, it could be possible to obtain different +results on a text encoded, say, using ISO-8859-16 versus UTF-8.) + +There is an extra category of plain text files that are "polluted" with +one or more block-listed codes, either by mistake or by peculiar design +considerations. In such cases, a scheme that tolerates a small fraction +of block-listed codes would provide an increased recall (i.e. more true +positives). This, however, incurs a reduced precision overall, since +false positives are more likely to appear in binary files that contain +large chunks of textual data. Furthermore, "polluted" plain text should +be regarded as binary by general-purpose text detection schemes, because +general-purpose text processing algorithms might not be applicable. +Under this premise, it is safe to say that our detection method provides +a near-100% recall. + +Experiments have been run on many files coming from various platforms +and applications. We tried plain text files, system logs, source code, +formatted office documents, compiled object code, etc. The results +confirm the optimistic assumptions about the capabilities of this +algorithm. + + +-- +Cosmin Truta +Last updated: 2006-May-28 diff --git a/src/deps/src/zlib/examples/README.examples b/src/deps/src/zlib/examples/README.examples new file mode 100644 index 000000000..e3a4b88bb --- /dev/null +++ b/src/deps/src/zlib/examples/README.examples @@ -0,0 +1,54 @@ +This directory contains examples of the use of zlib and other relevant +programs and documentation. + +enough.c + calculation and justification of ENOUGH parameter in inftrees.h + - calculates the maximum table space used in inflate tree + construction over all possible Huffman codes + +fitblk.c + compress just enough input to nearly fill a requested output size + - zlib isn't designed to do this, but fitblk does it anyway + +gun.c + uncompress a gzip file + - illustrates the use of inflateBack() for high speed file-to-file + decompression using call-back functions + - is approximately twice as fast as gzip -d + - also provides Unix uncompress functionality, again twice as fast + +gzappend.c + append to a gzip file + - illustrates the use of the Z_BLOCK flush parameter for inflate() + - illustrates the use of deflatePrime() to start at any bit + +gzjoin.c + join gzip files without recalculating the crc or recompressing + - illustrates the use of the Z_BLOCK flush parameter for inflate() + - illustrates the use of crc32_combine() + +gzlog.c +gzlog.h + efficiently and robustly maintain a message log file in gzip format + - illustrates use of raw deflate, Z_PARTIAL_FLUSH, deflatePrime(), + and deflateSetDictionary() + - illustrates use of a gzip header extra field + +gznorm.c + normalize a gzip file by combining members into a single member + - demonstrates how to concatenate deflate streams using Z_BLOCK + +zlib_how.html + painfully comprehensive description of zpipe.c (see below) + - describes in excruciating detail the use of deflate() and inflate() + +zpipe.c + reads and writes zlib streams from stdin to stdout + - illustrates the proper use of deflate() and inflate() + - deeply commented in zlib_how.html (see above) + +zran.c +zran.h + index a zlib or gzip stream and randomly access it + - illustrates the use of Z_BLOCK, inflatePrime(), and + inflateSetDictionary() to provide random access diff --git a/src/deps/src/zlib/examples/enough.c b/src/deps/src/zlib/examples/enough.c new file mode 100644 index 000000000..8a3cade49 --- /dev/null +++ b/src/deps/src/zlib/examples/enough.c @@ -0,0 +1,597 @@ +/* enough.c -- determine the maximum size of inflate's Huffman code tables over + * all possible valid and complete prefix codes, subject to a length limit. + * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler + * Version 1.5 5 August 2018 Mark Adler + */ + +/* Version history: + 1.0 3 Jan 2007 First version (derived from codecount.c version 1.4) + 1.1 4 Jan 2007 Use faster incremental table usage computation + Prune examine() search on previously visited states + 1.2 5 Jan 2007 Comments clean up + As inflate does, decrease root for short codes + Refuse cases where inflate would increase root + 1.3 17 Feb 2008 Add argument for initial root table size + Fix bug for initial root table size == max - 1 + Use a macro to compute the history index + 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) + Clean up comparisons of different types + Clean up code indentation + 1.5 5 Aug 2018 Clean up code style, formatting, and comments + Show all the codes for the maximum, and only the maximum + */ + +/* + Examine all possible prefix codes for a given number of symbols and a + maximum code length in bits to determine the maximum table size for zlib's + inflate. Only complete prefix codes are counted. + + Two codes are considered distinct if the vectors of the number of codes per + length are not identical. So permutations of the symbol assignments result + in the same code for the counting, as do permutations of the assignments of + the bit values to the codes (i.e. only canonical codes are counted). + + We build a code from shorter to longer lengths, determining how many symbols + are coded at each length. At each step, we have how many symbols remain to + be coded, what the last code length used was, and how many bit patterns of + that length remain unused. Then we add one to the code length and double the + number of unused patterns to graduate to the next code length. We then + assign all portions of the remaining symbols to that code length that + preserve the properties of a correct and eventually complete code. Those + properties are: we cannot use more bit patterns than are available; and when + all the symbols are used, there are exactly zero possible bit patterns left + unused. + + The inflate Huffman decoding algorithm uses two-level lookup tables for + speed. There is a single first-level table to decode codes up to root bits + in length (root == 9 for literal/length codes and root == 6 for distance + codes, in the current inflate implementation). The base table has 1 << root + entries and is indexed by the next root bits of input. Codes shorter than + root bits have replicated table entries, so that the correct entry is + pointed to regardless of the bits that follow the short code. If the code is + longer than root bits, then the table entry points to a second-level table. + The size of that table is determined by the longest code with that root-bit + prefix. If that longest code has length len, then the table has size 1 << + (len - root), to index the remaining bits in that set of codes. Each + subsequent root-bit prefix then has its own sub-table. The total number of + table entries required by the code is calculated incrementally as the number + of codes at each bit length is populated. When all of the codes are shorter + than root bits, then root is reduced to the longest code length, resulting + in a single, smaller, one-level table. + + The inflate algorithm also provides for small values of root (relative to + the log2 of the number of symbols), where the shortest code has more bits + than root. In that case, root is increased to the length of the shortest + code. This program, by design, does not handle that case, so it is verified + that the number of symbols is less than 1 << (root + 1). + + In order to speed up the examination (by about ten orders of magnitude for + the default arguments), the intermediate states in the build-up of a code + are remembered and previously visited branches are pruned. The memory + required for this will increase rapidly with the total number of symbols and + the maximum code length in bits. However this is a very small price to pay + for the vast speedup. + + First, all of the possible prefix codes are counted, and reachable + intermediate states are noted by a non-zero count in a saved-results array. + Second, the intermediate states that lead to (root + 1) bit or longer codes + are used to look at all sub-codes from those junctures for their inflate + memory usage. (The amount of memory used is not affected by the number of + codes of root bits or less in length.) Third, the visited states in the + construction of those sub-codes and the associated calculation of the table + size is recalled in order to avoid recalculating from the same juncture. + Beginning the code examination at (root + 1) bit codes, which is enabled by + identifying the reachable nodes, accounts for about six of the orders of + magnitude of improvement for the default arguments. About another four + orders of magnitude come from not revisiting previous states. Out of + approximately 2x10^16 possible prefix codes, only about 2x10^6 sub-codes + need to be examined to cover all of the possible table memory usage cases + for the default arguments of 286 symbols limited to 15-bit codes. + + Note that the uintmax_t type is used for counting. It is quite easy to + exceed the capacity of an eight-byte integer with a large number of symbols + and a large maximum code length, so multiple-precision arithmetic would need + to replace the integer arithmetic in that case. This program will abort if + an overflow occurs. The big_t type identifies where the counting takes + place. + + The uintmax_t type is also used for calculating the number of possible codes + remaining at the maximum length. This limits the maximum code length to the + number of bits in a long long minus the number of bits needed to represent + the symbols in a flat code. The code_t type identifies where the bit-pattern + counting takes place. + */ + +#include +#include +#include +#include +#include +#include + +#define local static + +// Special data types. +typedef uintmax_t big_t; // type for code counting +#define PRIbig "ju" // printf format for big_t +typedef uintmax_t code_t; // type for bit pattern counting +struct tab { // type for been-here check + size_t len; // allocated length of bit vector in octets + char *vec; // allocated bit vector +}; + +/* The array for saving results, num[], is indexed with this triplet: + + syms: number of symbols remaining to code + left: number of available bit patterns at length len + len: number of bits in the codes currently being assigned + + Those indices are constrained thusly when saving results: + + syms: 3..totsym (totsym == total symbols to code) + left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6) + len: 1..max - 1 (max == maximum code length in bits) + + syms == 2 is not saved since that immediately leads to a single code. left + must be even, since it represents the number of available bit patterns at + the current length, which is double the number at the previous length. left + ends at syms-1 since left == syms immediately results in a single code. + (left > sym is not allowed since that would result in an incomplete code.) + len is less than max, since the code completes immediately when len == max. + + The offset into the array is calculated for the three indices with the first + one (syms) being outermost, and the last one (len) being innermost. We build + the array with length max-1 lists for the len index, with syms-3 of those + for each symbol. There are totsym-2 of those, with each one varying in + length as a function of sym. See the calculation of index in map() for the + index, and the calculation of size in main() for the size of the array. + + For the deflate example of 286 symbols limited to 15-bit codes, the array + has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than half + of the space allocated for saved results is actually used -- not all + possible triplets are reached in the generation of valid prefix codes. + */ + +/* The array for tracking visited states, done[], is itself indexed identically + to the num[] array as described above for the (syms, left, len) triplet. + Each element in the array is further indexed by the (mem, rem) doublet, + where mem is the amount of inflate table space used so far, and rem is the + remaining unused entries in the current inflate sub-table. Each indexed + element is simply one bit indicating whether the state has been visited or + not. Since the ranges for mem and rem are not known a priori, each bit + vector is of a variable size, and grows as needed to accommodate the visited + states. mem and rem are used to calculate a single index in a triangular + array. Since the range of mem is expected in the default case to be about + ten times larger than the range of rem, the array is skewed to reduce the + memory usage, with eight times the range for mem than for rem. See the + calculations for offset and bit in been_here() for the details. + + For the deflate example of 286 symbols limited to 15-bit codes, the bit + vectors grow to total 5.5 MB, in addition to the 4.3 MB done array itself. + */ + +// Type for a variable-length, allocated string. +typedef struct { + char *str; // pointer to allocated string + size_t size; // size of allocation + size_t len; // length of string, not including terminating zero +} string_t; + +// Clear a string_t. +local void string_clear(string_t *s) { + s->str[0] = 0; + s->len = 0; +} + +// Initialize a string_t. +local void string_init(string_t *s) { + s->size = 16; + s->str = malloc(s->size); + assert(s->str != NULL && "out of memory"); + string_clear(s); +} + +// Release the allocation of a string_t. +local void string_free(string_t *s) { + free(s->str); + s->str = NULL; + s->size = 0; + s->len = 0; +} + +// Save the results of printf with fmt and the subsequent argument list to s. +// Each call appends to s. The allocated space for s is increased as needed. +local void string_printf(string_t *s, char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + size_t len = s->len; + int ret = vsnprintf(s->str + len, s->size - len, fmt, ap); + assert(ret >= 0 && "out of memory"); + s->len += ret; + if (s->size < s->len + 1) { + do { + s->size <<= 1; + assert(s->size != 0 && "overflow"); + } while (s->size < s->len + 1); + s->str = realloc(s->str, s->size); + assert(s->str != NULL && "out of memory"); + vsnprintf(s->str + len, s->size - len, fmt, ap); + } + va_end(ap); +} + +// Globals to avoid propagating constants or constant pointers recursively. +struct { + int max; // maximum allowed bit length for the codes + int root; // size of base code table in bits + int large; // largest code table so far + size_t size; // number of elements in num and done + big_t tot; // total number of codes with maximum tables size + string_t out; // display of subcodes for maximum tables size + int *code; // number of symbols assigned to each bit length + big_t *num; // saved results array for code counting + struct tab *done; // states already evaluated array +} g; + +// Index function for num[] and done[]. +local inline size_t map(int syms, int left, int len) { + return ((size_t)((syms - 1) >> 1) * ((syms - 2) >> 1) + + (left >> 1) - 1) * (g.max - 1) + + len - 1; +} + +// Free allocated space in globals. +local void cleanup(void) { + if (g.done != NULL) { + for (size_t n = 0; n < g.size; n++) + if (g.done[n].len) + free(g.done[n].vec); + g.size = 0; + free(g.done); g.done = NULL; + } + free(g.num); g.num = NULL; + free(g.code); g.code = NULL; + string_free(&g.out); +} + +// Return the number of possible prefix codes using bit patterns of lengths len +// through max inclusive, coding syms symbols, with left bit patterns of length +// len unused -- return -1 if there is an overflow in the counting. Keep a +// record of previous results in num to prevent repeating the same calculation. +local big_t count(int syms, int left, int len) { + // see if only one possible code + if (syms == left) + return 1; + + // note and verify the expected state + assert(syms > left && left > 0 && len < g.max); + + // see if we've done this one already + size_t index = map(syms, left, len); + big_t got = g.num[index]; + if (got) + return got; // we have -- return the saved result + + // we need to use at least this many bit patterns so that the code won't be + // incomplete at the next length (more bit patterns than symbols) + int least = (left << 1) - syms; + if (least < 0) + least = 0; + + // we can use at most this many bit patterns, lest there not be enough + // available for the remaining symbols at the maximum length (if there were + // no limit to the code length, this would become: most = left - 1) + int most = (((code_t)left << (g.max - len)) - syms) / + (((code_t)1 << (g.max - len)) - 1); + + // count all possible codes from this juncture and add them up + big_t sum = 0; + for (int use = least; use <= most; use++) { + got = count(syms - use, (left - use) << 1, len + 1); + sum += got; + if (got == (big_t)-1 || sum < got) // overflow + return (big_t)-1; + } + + // verify that all recursive calls are productive + assert(sum != 0); + + // save the result and return it + g.num[index] = sum; + return sum; +} + +// Return true if we've been here before, set to true if not. Set a bit in a +// bit vector to indicate visiting this state. Each (syms,len,left) state has a +// variable size bit vector indexed by (mem,rem). The bit vector is lengthened +// as needed to allow setting the (mem,rem) bit. +local int been_here(int syms, int left, int len, int mem, int rem) { + // point to vector for (syms,left,len), bit in vector for (mem,rem) + size_t index = map(syms, left, len); + mem -= 1 << g.root; // mem always includes the root table + mem >>= 1; // mem and rem are always even + rem >>= 1; + size_t offset = (mem >> 3) + rem; + offset = ((offset * (offset + 1)) >> 1) + rem; + int bit = 1 << (mem & 7); + + // see if we've been here + size_t length = g.done[index].len; + if (offset < length && (g.done[index].vec[offset] & bit) != 0) + return 1; // done this! + + // we haven't been here before -- set the bit to show we have now + + // see if we need to lengthen the vector in order to set the bit + if (length <= offset) { + // if we have one already, enlarge it, zero out the appended space + char *vector; + if (length) { + do { + length <<= 1; + } while (length <= offset); + vector = realloc(g.done[index].vec, length); + assert(vector != NULL && "out of memory"); + memset(vector + g.done[index].len, 0, length - g.done[index].len); + } + + // otherwise we need to make a new vector and zero it out + else { + length = 16; + while (length <= offset) + length <<= 1; + vector = calloc(length, 1); + assert(vector != NULL && "out of memory"); + } + + // install the new vector + g.done[index].len = length; + g.done[index].vec = vector; + } + + // set the bit + g.done[index].vec[offset] |= bit; + return 0; +} + +// Examine all possible codes from the given node (syms, len, left). Compute +// the amount of memory required to build inflate's decoding tables, where the +// number of code structures used so far is mem, and the number remaining in +// the current sub-table is rem. +local void examine(int syms, int left, int len, int mem, int rem) { + // see if we have a complete code + if (syms == left) { + // set the last code entry + g.code[len] = left; + + // complete computation of memory used by this code + while (rem < left) { + left -= rem; + rem = 1 << (len - g.root); + mem += rem; + } + assert(rem == left); + + // if this is at the maximum, show the sub-code + if (mem >= g.large) { + // if this is a new maximum, update the maximum and clear out the + // printed sub-codes from the previous maximum + if (mem > g.large) { + g.large = mem; + string_clear(&g.out); + } + + // compute the starting state for this sub-code + syms = 0; + left = 1 << g.max; + for (int bits = g.max; bits > g.root; bits--) { + syms += g.code[bits]; + left -= g.code[bits]; + assert((left & 1) == 0); + left >>= 1; + } + + // print the starting state and the resulting sub-code to g.out + string_printf(&g.out, "<%u, %u, %u>:", + syms, g.root + 1, ((1 << g.root) - left) << 1); + for (int bits = g.root + 1; bits <= g.max; bits++) + if (g.code[bits]) + string_printf(&g.out, " %d[%d]", g.code[bits], bits); + string_printf(&g.out, "\n"); + } + + // remove entries as we drop back down in the recursion + g.code[len] = 0; + return; + } + + // prune the tree if we can + if (been_here(syms, left, len, mem, rem)) + return; + + // we need to use at least this many bit patterns so that the code won't be + // incomplete at the next length (more bit patterns than symbols) + int least = (left << 1) - syms; + if (least < 0) + least = 0; + + // we can use at most this many bit patterns, lest there not be enough + // available for the remaining symbols at the maximum length (if there were + // no limit to the code length, this would become: most = left - 1) + int most = (((code_t)left << (g.max - len)) - syms) / + (((code_t)1 << (g.max - len)) - 1); + + // occupy least table spaces, creating new sub-tables as needed + int use = least; + while (rem < use) { + use -= rem; + rem = 1 << (len - g.root); + mem += rem; + } + rem -= use; + + // examine codes from here, updating table space as we go + for (use = least; use <= most; use++) { + g.code[len] = use; + examine(syms - use, (left - use) << 1, len + 1, + mem + (rem ? 1 << (len - g.root) : 0), rem << 1); + if (rem == 0) { + rem = 1 << (len - g.root); + mem += rem; + } + rem--; + } + + // remove entries as we drop back down in the recursion + g.code[len] = 0; +} + +// Look at all sub-codes starting with root + 1 bits. Look at only the valid +// intermediate code states (syms, left, len). For each completed code, +// calculate the amount of memory required by inflate to build the decoding +// tables. Find the maximum amount of memory required and show the codes that +// require that maximum. +local void enough(int syms) { + // clear code + for (int n = 0; n <= g.max; n++) + g.code[n] = 0; + + // look at all (root + 1) bit and longer codes + string_clear(&g.out); // empty saved results + g.large = 1 << g.root; // base table + if (g.root < g.max) // otherwise, there's only a base table + for (int n = 3; n <= syms; n++) + for (int left = 2; left < n; left += 2) { + // look at all reachable (root + 1) bit nodes, and the + // resulting codes (complete at root + 2 or more) + size_t index = map(n, left, g.root + 1); + if (g.root + 1 < g.max && g.num[index]) // reachable node + examine(n, left, g.root + 1, 1 << g.root, 0); + + // also look at root bit codes with completions at root + 1 + // bits (not saved in num, since complete), just in case + if (g.num[index - 1] && n <= left << 1) + examine((n - left) << 1, (n - left) << 1, g.root + 1, + 1 << g.root, 0); + } + + // done + printf("maximum of %d table entries for root = %d\n", g.large, g.root); + fputs(g.out.str, stdout); +} + +// Examine and show the total number of possible prefix codes for a given +// maximum number of symbols, initial root table size, and maximum code length +// in bits -- those are the command arguments in that order. The default values +// are 286, 9, and 15 respectively, for the deflate literal/length code. The +// possible codes are counted for each number of coded symbols from two to the +// maximum. The counts for each of those and the total number of codes are +// shown. The maximum number of inflate table entries is then calculated across +// all possible codes. Each new maximum number of table entries and the +// associated sub-code (starting at root + 1 == 10 bits) is shown. +// +// To count and examine prefix codes that are not length-limited, provide a +// maximum length equal to the number of symbols minus one. +// +// For the deflate literal/length code, use "enough". For the deflate distance +// code, use "enough 30 6". +int main(int argc, char **argv) { + // set up globals for cleanup() + g.code = NULL; + g.num = NULL; + g.done = NULL; + string_init(&g.out); + + // get arguments -- default to the deflate literal/length code + int syms = 286; + g.root = 9; + g.max = 15; + if (argc > 1) { + syms = atoi(argv[1]); + if (argc > 2) { + g.root = atoi(argv[2]); + if (argc > 3) + g.max = atoi(argv[3]); + } + } + if (argc > 4 || syms < 2 || g.root < 1 || g.max < 1) { + fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n", + stderr); + return 1; + } + + // if not restricting the code length, the longest is syms - 1 + if (g.max > syms - 1) + g.max = syms - 1; + + // determine the number of bits in a code_t + int bits = 0; + for (code_t word = 1; word; word <<= 1) + bits++; + + // make sure that the calculation of most will not overflow + if (g.max > bits || (code_t)(syms - 2) >= ((code_t)-1 >> (g.max - 1))) { + fputs("abort: code length too long for internal types\n", stderr); + return 1; + } + + // reject impossible code requests + if ((code_t)(syms - 1) > ((code_t)1 << g.max) - 1) { + fprintf(stderr, "%d symbols cannot be coded in %d bits\n", + syms, g.max); + return 1; + } + + // allocate code vector + g.code = calloc(g.max + 1, sizeof(int)); + assert(g.code != NULL && "out of memory"); + + // determine size of saved results array, checking for overflows, + // allocate and clear the array (set all to zero with calloc()) + if (syms == 2) // iff max == 1 + g.num = NULL; // won't be saving any results + else { + g.size = syms >> 1; + int n = (syms - 1) >> 1; + assert(g.size <= (size_t)-1 / n && "overflow"); + g.size *= n; + n = g.max - 1; + assert(g.size <= (size_t)-1 / n && "overflow"); + g.size *= n; + g.num = calloc(g.size, sizeof(big_t)); + assert(g.num != NULL && "out of memory"); + } + + // count possible codes for all numbers of symbols, add up counts + big_t sum = 0; + for (int n = 2; n <= syms; n++) { + big_t got = count(n, 2, 1); + sum += got; + assert(got != (big_t)-1 && sum >= got && "overflow"); + } + printf("%"PRIbig" total codes for 2 to %d symbols", sum, syms); + if (g.max < syms - 1) + printf(" (%d-bit length limit)\n", g.max); + else + puts(" (no length limit)"); + + // allocate and clear done array for been_here() + if (syms == 2) + g.done = NULL; + else { + g.done = calloc(g.size, sizeof(struct tab)); + assert(g.done != NULL && "out of memory"); + } + + // find and show maximum inflate table usage + if (g.root > g.max) // reduce root to max length + g.root = g.max; + if ((code_t)syms < ((code_t)1 << (g.root + 1))) + enough(syms); + else + fputs("cannot handle minimum code lengths > root", stderr); + + // done + cleanup(); + return 0; +} diff --git a/src/deps/src/zlib/examples/fitblk.c b/src/deps/src/zlib/examples/fitblk.c new file mode 100644 index 000000000..723dc0028 --- /dev/null +++ b/src/deps/src/zlib/examples/fitblk.c @@ -0,0 +1,233 @@ +/* fitblk.c: example of fitting compressed output to a specified size + Not copyrighted -- provided to the public domain + Version 1.1 25 November 2004 Mark Adler */ + +/* Version history: + 1.0 24 Nov 2004 First version + 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() + Use fixed-size, stack-allocated raw buffers + Simplify code moving compression to subroutines + Use assert() for internal errors + Add detailed description of approach + */ + +/* Approach to just fitting a requested compressed size: + + fitblk performs three compression passes on a portion of the input + data in order to determine how much of that input will compress to + nearly the requested output block size. The first pass generates + enough deflate blocks to produce output to fill the requested + output size plus a specified excess amount (see the EXCESS define + below). The last deflate block may go quite a bit past that, but + is discarded. The second pass decompresses and recompresses just + the compressed data that fit in the requested plus excess sized + buffer. The deflate process is terminated after that amount of + input, which is less than the amount consumed on the first pass. + The last deflate block of the result will be of a comparable size + to the final product, so that the header for that deflate block and + the compression ratio for that block will be about the same as in + the final product. The third compression pass decompresses the + result of the second step, but only the compressed data up to the + requested size minus an amount to allow the compressed stream to + complete (see the MARGIN define below). That will result in a + final compressed stream whose length is less than or equal to the + requested size. Assuming sufficient input and a requested size + greater than a few hundred bytes, the shortfall will typically be + less than ten bytes. + + If the input is short enough that the first compression completes + before filling the requested output size, then that compressed + stream is return with no recompression. + + EXCESS is chosen to be just greater than the shortfall seen in a + two pass approach similar to the above. That shortfall is due to + the last deflate block compressing more efficiently with a smaller + header on the second pass. EXCESS is set to be large enough so + that there is enough uncompressed data for the second pass to fill + out the requested size, and small enough so that the final deflate + block of the second pass will be close in size to the final deflate + block of the third and final pass. MARGIN is chosen to be just + large enough to assure that the final compression has enough room + to complete in all cases. + */ + +#include +#include +#include +#include "zlib.h" + +#define local static + +/* print nastygram and leave */ +local void quit(char *why) +{ + fprintf(stderr, "fitblk abort: %s\n", why); + exit(1); +} + +#define RAWLEN 4096 /* intermediate uncompressed buffer size */ + +/* compress from file to def until provided buffer is full or end of + input reached; return last deflate() return value, or Z_ERRNO if + there was read error on the file */ +local int partcompress(FILE *in, z_streamp def) +{ + int ret, flush; + unsigned char raw[RAWLEN]; + + flush = Z_NO_FLUSH; + do { + def->avail_in = fread(raw, 1, RAWLEN, in); + if (ferror(in)) + return Z_ERRNO; + def->next_in = raw; + if (feof(in)) + flush = Z_FINISH; + ret = deflate(def, flush); + assert(ret != Z_STREAM_ERROR); + } while (def->avail_out != 0 && flush == Z_NO_FLUSH); + return ret; +} + +/* recompress from inf's input to def's output; the input for inf and + the output for def are set in those structures before calling; + return last deflate() return value, or Z_MEM_ERROR if inflate() + was not able to allocate enough memory when it needed to */ +local int recompress(z_streamp inf, z_streamp def) +{ + int ret, flush; + unsigned char raw[RAWLEN]; + + flush = Z_NO_FLUSH; + do { + /* decompress */ + inf->avail_out = RAWLEN; + inf->next_out = raw; + ret = inflate(inf, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && + ret != Z_NEED_DICT); + if (ret == Z_MEM_ERROR) + return ret; + + /* compress what was decompressed until done or no room */ + def->avail_in = RAWLEN - inf->avail_out; + def->next_in = raw; + if (inf->avail_out != 0) + flush = Z_FINISH; + ret = deflate(def, flush); + assert(ret != Z_STREAM_ERROR); + } while (ret != Z_STREAM_END && def->avail_out != 0); + return ret; +} + +#define EXCESS 256 /* empirically determined stream overage */ +#define MARGIN 8 /* amount to back off for completion */ + +/* compress from stdin to fixed-size block on stdout */ +int main(int argc, char **argv) +{ + int ret; /* return code */ + unsigned size; /* requested fixed output block size */ + unsigned have; /* bytes written by deflate() call */ + unsigned char *blk; /* intermediate and final stream */ + unsigned char *tmp; /* close to desired size stream */ + z_stream def, inf; /* zlib deflate and inflate states */ + + /* get requested output size */ + if (argc != 2) + quit("need one argument: size of output block"); + ret = strtol(argv[1], argv + 1, 10); + if (argv[1][0] != 0) + quit("argument must be a number"); + if (ret < 8) /* 8 is minimum zlib stream size */ + quit("need positive size of 8 or greater"); + size = (unsigned)ret; + + /* allocate memory for buffers and compression engine */ + blk = malloc(size + EXCESS); + def.zalloc = Z_NULL; + def.zfree = Z_NULL; + def.opaque = Z_NULL; + ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK || blk == NULL) + quit("out of memory"); + + /* compress from stdin until output full, or no more input */ + def.avail_out = size + EXCESS; + def.next_out = blk; + ret = partcompress(stdin, &def); + if (ret == Z_ERRNO) + quit("error reading input"); + + /* if it all fit, then size was undersubscribed -- done! */ + if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { + /* write block to stdout */ + have = size + EXCESS - def.avail_out; + if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) + quit("error writing output"); + + /* clean up and print results to stderr */ + ret = deflateEnd(&def); + assert(ret != Z_STREAM_ERROR); + free(blk); + fprintf(stderr, + "%u bytes unused out of %u requested (all input)\n", + size - have, size); + return 0; + } + + /* it didn't all fit -- set up for recompression */ + inf.zalloc = Z_NULL; + inf.zfree = Z_NULL; + inf.opaque = Z_NULL; + inf.avail_in = 0; + inf.next_in = Z_NULL; + ret = inflateInit(&inf); + tmp = malloc(size + EXCESS); + if (ret != Z_OK || tmp == NULL) + quit("out of memory"); + ret = deflateReset(&def); + assert(ret != Z_STREAM_ERROR); + + /* do first recompression close to the right amount */ + inf.avail_in = size + EXCESS; + inf.next_in = blk; + def.avail_out = size + EXCESS; + def.next_out = tmp; + ret = recompress(&inf, &def); + if (ret == Z_MEM_ERROR) + quit("out of memory"); + + /* set up for next recompression */ + ret = inflateReset(&inf); + assert(ret != Z_STREAM_ERROR); + ret = deflateReset(&def); + assert(ret != Z_STREAM_ERROR); + + /* do second and final recompression (third compression) */ + inf.avail_in = size - MARGIN; /* assure stream will complete */ + inf.next_in = tmp; + def.avail_out = size; + def.next_out = blk; + ret = recompress(&inf, &def); + if (ret == Z_MEM_ERROR) + quit("out of memory"); + assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ + + /* done -- write block to stdout */ + have = size - def.avail_out; + if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) + quit("error writing output"); + + /* clean up and print results to stderr */ + free(tmp); + ret = inflateEnd(&inf); + assert(ret != Z_STREAM_ERROR); + ret = deflateEnd(&def); + assert(ret != Z_STREAM_ERROR); + free(blk); + fprintf(stderr, + "%u bytes unused out of %u requested (%lu input)\n", + size - have, size, def.total_in); + return 0; +} diff --git a/src/deps/src/zlib/examples/gun.c b/src/deps/src/zlib/examples/gun.c new file mode 100644 index 000000000..bea5497e5 --- /dev/null +++ b/src/deps/src/zlib/examples/gun.c @@ -0,0 +1,702 @@ +/* gun.c -- simple gunzip to give an example of the use of inflateBack() + * Copyright (C) 2003, 2005, 2008, 2010, 2012 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + Version 1.7 12 August 2012 Mark Adler */ + +/* Version history: + 1.0 16 Feb 2003 First version for testing of inflateBack() + 1.1 21 Feb 2005 Decompress concatenated gzip streams + Remove use of "this" variable (C++ keyword) + Fix return value for in() + Improve allocation failure checking + Add typecasting for void * structures + Add -h option for command version and usage + Add a bunch of comments + 1.2 20 Mar 2005 Add Unix compress (LZW) decompression + Copy file attributes from input file to output file + 1.3 12 Jun 2005 Add casts for error messages [Oberhumer] + 1.4 8 Dec 2006 LZW decompression speed improvements + 1.5 9 Feb 2008 Avoid warning in latest version of gcc + 1.6 17 Jan 2010 Avoid signed/unsigned comparison warnings + 1.7 12 Aug 2012 Update for z_const usage in zlib 1.2.8 + */ + +/* + gun [ -t ] [ name ... ] + + decompresses the data in the named gzip files. If no arguments are given, + gun will decompress from stdin to stdout. The names must end in .gz, -gz, + .z, -z, _z, or .Z. The uncompressed data will be written to a file name + with the suffix stripped. On success, the original file is deleted. On + failure, the output file is deleted. For most failures, the command will + continue to process the remaining names on the command line. A memory + allocation failure will abort the command. If -t is specified, then the + listed files or stdin will be tested as gzip files for integrity (without + checking for a proper suffix), no output will be written, and no files + will be deleted. + + Like gzip, gun allows concatenated gzip streams and will decompress them, + writing all of the uncompressed data to the output. Unlike gzip, gun allows + an empty file on input, and will produce no error writing an empty output + file. + + gun will also decompress files made by Unix compress, which uses LZW + compression. These files are automatically detected by virtue of their + magic header bytes. Since the end of Unix compress stream is marked by the + end-of-file, they cannot be concatenated. If a Unix compress stream is + encountered in an input file, it is the last stream in that file. + + Like gunzip and uncompress, the file attributes of the original compressed + file are maintained in the final uncompressed file, to the extent that the + user permissions allow it. + + On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version + 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the + LZW decompression provided by gun is about twice as fast as the standard + Unix uncompress command. + */ + +/* external functions and related types and constants */ +#include /* fprintf() */ +#include /* malloc(), free() */ +#include /* strerror(), strcmp(), strlen(), memcpy() */ +#include /* errno */ +#include /* open() */ +#include /* read(), write(), close(), chown(), unlink() */ +#include +#include /* stat(), chmod() */ +#include /* utime() */ +#include "zlib.h" /* inflateBackInit(), inflateBack(), */ + /* inflateBackEnd(), crc32() */ + +/* function declaration */ +#define local static + +/* buffer constants */ +#define SIZE 32768U /* input and output buffer sizes */ +#define PIECE 16384 /* limits i/o chunks for 16-bit int case */ + +/* structure for infback() to pass to input function in() -- it maintains the + input file and a buffer of size SIZE */ +struct ind { + int infile; + unsigned char *inbuf; +}; + +/* Load input buffer, assumed to be empty, and return bytes loaded and a + pointer to them. read() is called until the buffer is full, or until it + returns end-of-file or error. Return 0 on error. */ +local unsigned in(void *in_desc, z_const unsigned char **buf) +{ + int ret; + unsigned len; + unsigned char *next; + struct ind *me = (struct ind *)in_desc; + + next = me->inbuf; + *buf = next; + len = 0; + do { + ret = PIECE; + if ((unsigned)ret > SIZE - len) + ret = (int)(SIZE - len); + ret = (int)read(me->infile, next, ret); + if (ret == -1) { + len = 0; + break; + } + next += ret; + len += ret; + } while (ret != 0 && len < SIZE); + return len; +} + +/* structure for infback() to pass to output function out() -- it maintains the + output file, a running CRC-32 check on the output and the total number of + bytes output, both for checking against the gzip trailer. (The length in + the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and + the output is greater than 4 GB.) */ +struct outd { + int outfile; + int check; /* true if checking crc and total */ + unsigned long crc; + unsigned long total; +}; + +/* Write output buffer and update the CRC-32 and total bytes written. write() + is called until all of the output is written or an error is encountered. + On success out() returns 0. For a write failure, out() returns 1. If the + output file descriptor is -1, then nothing is written. + */ +local int out(void *out_desc, unsigned char *buf, unsigned len) +{ + int ret; + struct outd *me = (struct outd *)out_desc; + + if (me->check) { + me->crc = crc32(me->crc, buf, len); + me->total += len; + } + if (me->outfile != -1) + do { + ret = PIECE; + if ((unsigned)ret > len) + ret = (int)len; + ret = (int)write(me->outfile, buf, ret); + if (ret == -1) + return 1; + buf += ret; + len -= ret; + } while (len != 0); + return 0; +} + +/* next input byte macro for use inside lunpipe() and gunpipe() */ +#define NEXT() (have ? 0 : (have = in(indp, &next)), \ + last = have ? (have--, (int)(*next++)) : -1) + +/* memory for gunpipe() and lunpipe() -- + the first 256 entries of prefix[] and suffix[] are never used, could + have offset the index, but it's faster to waste the memory */ +unsigned char inbuf[SIZE]; /* input buffer */ +unsigned char outbuf[SIZE]; /* output buffer */ +unsigned short prefix[65536]; /* index to LZW prefix string */ +unsigned char suffix[65536]; /* one-character LZW suffix */ +unsigned char match[65280 + 2]; /* buffer for reversed match or gzip + 32K sliding window */ + +/* throw out what's left in the current bits byte buffer (this is a vestigial + aspect of the compressed data format derived from an implementation that + made use of a special VAX machine instruction!) */ +#define FLUSHCODE() \ + do { \ + left = 0; \ + rem = 0; \ + if (chunk > have) { \ + chunk -= have; \ + have = 0; \ + if (NEXT() == -1) \ + break; \ + chunk--; \ + if (chunk > have) { \ + chunk = have = 0; \ + break; \ + } \ + } \ + have -= chunk; \ + next += chunk; \ + chunk = 0; \ + } while (0) + +/* Decompress a compress (LZW) file from indp to outfile. The compress magic + header (two bytes) has already been read and verified. There are have bytes + of buffered input at next. strm is used for passing error information back + to gunpipe(). + + lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of + file, read error, or write error (a write error indicated by strm->next_in + not equal to Z_NULL), or Z_DATA_ERROR for invalid input. + */ +local int lunpipe(unsigned have, z_const unsigned char *next, struct ind *indp, + int outfile, z_stream *strm) +{ + int last; /* last byte read by NEXT(), or -1 if EOF */ + unsigned chunk; /* bytes left in current chunk */ + int left; /* bits left in rem */ + unsigned rem; /* unused bits from input */ + int bits; /* current bits per code */ + unsigned code; /* code, table traversal index */ + unsigned mask; /* mask for current bits codes */ + int max; /* maximum bits per code for this stream */ + unsigned flags; /* compress flags, then block compress flag */ + unsigned end; /* last valid entry in prefix/suffix tables */ + unsigned temp; /* current code */ + unsigned prev; /* previous code */ + unsigned final; /* last character written for previous code */ + unsigned stack; /* next position for reversed string */ + unsigned outcnt; /* bytes in output buffer */ + struct outd outd; /* output structure */ + unsigned char *p; + + /* set up output */ + outd.outfile = outfile; + outd.check = 0; + + /* process remainder of compress header -- a flags byte */ + flags = NEXT(); + if (last == -1) + return Z_BUF_ERROR; + if (flags & 0x60) { + strm->msg = (char *)"unknown lzw flags set"; + return Z_DATA_ERROR; + } + max = flags & 0x1f; + if (max < 9 || max > 16) { + strm->msg = (char *)"lzw bits out of range"; + return Z_DATA_ERROR; + } + if (max == 9) /* 9 doesn't really mean 9 */ + max = 10; + flags &= 0x80; /* true if block compress */ + + /* clear table */ + bits = 9; + mask = 0x1ff; + end = flags ? 256 : 255; + + /* set up: get first 9-bit code, which is the first decompressed byte, but + don't create a table entry until the next code */ + if (NEXT() == -1) /* no compressed data is ok */ + return Z_OK; + final = prev = (unsigned)last; /* low 8 bits of code */ + if (NEXT() == -1) /* missing a bit */ + return Z_BUF_ERROR; + if (last & 1) { /* code must be < 256 */ + strm->msg = (char *)"invalid lzw code"; + return Z_DATA_ERROR; + } + rem = (unsigned)last >> 1; /* remaining 7 bits */ + left = 7; + chunk = bits - 2; /* 7 bytes left in this chunk */ + outbuf[0] = (unsigned char)final; /* write first decompressed byte */ + outcnt = 1; + + /* decode codes */ + stack = 0; + for (;;) { + /* if the table will be full after this, increment the code size */ + if (end >= mask && bits < max) { + FLUSHCODE(); + bits++; + mask <<= 1; + mask++; + } + + /* get a code of length bits */ + if (chunk == 0) /* decrement chunk modulo bits */ + chunk = bits; + code = rem; /* low bits of code */ + if (NEXT() == -1) { /* EOF is end of compressed data */ + /* write remaining buffered output */ + if (outcnt && out(&outd, outbuf, outcnt)) { + strm->next_in = outbuf; /* signal write error */ + return Z_BUF_ERROR; + } + return Z_OK; + } + code += (unsigned)last << left; /* middle (or high) bits of code */ + left += 8; + chunk--; + if (bits > left) { /* need more bits */ + if (NEXT() == -1) /* can't end in middle of code */ + return Z_BUF_ERROR; + code += (unsigned)last << left; /* high bits of code */ + left += 8; + chunk--; + } + code &= mask; /* mask to current code length */ + left -= bits; /* number of unused bits */ + rem = (unsigned)last >> (8 - left); /* unused bits from last byte */ + + /* process clear code (256) */ + if (code == 256 && flags) { + FLUSHCODE(); + bits = 9; /* initialize bits and mask */ + mask = 0x1ff; + end = 255; /* empty table */ + continue; /* get next code */ + } + + /* special code to reuse last match */ + temp = code; /* save the current code */ + if (code > end) { + /* Be picky on the allowed code here, and make sure that the code + we drop through (prev) will be a valid index so that random + input does not cause an exception. The code != end + 1 check is + empirically derived, and not checked in the original uncompress + code. If this ever causes a problem, that check could be safely + removed. Leaving this check in greatly improves gun's ability + to detect random or corrupted input after a compress header. + In any case, the prev > end check must be retained. */ + if (code != end + 1 || prev > end) { + strm->msg = (char *)"invalid lzw code"; + return Z_DATA_ERROR; + } + match[stack++] = (unsigned char)final; + code = prev; + } + + /* walk through linked list to generate output in reverse order */ + p = match + stack; + while (code >= 256) { + *p++ = suffix[code]; + code = prefix[code]; + } + stack = p - match; + match[stack++] = (unsigned char)code; + final = code; + + /* link new table entry */ + if (end < mask) { + end++; + prefix[end] = (unsigned short)prev; + suffix[end] = (unsigned char)final; + } + + /* set previous code for next iteration */ + prev = temp; + + /* write output in forward order */ + while (stack > SIZE - outcnt) { + while (outcnt < SIZE) + outbuf[outcnt++] = match[--stack]; + if (out(&outd, outbuf, outcnt)) { + strm->next_in = outbuf; /* signal write error */ + return Z_BUF_ERROR; + } + outcnt = 0; + } + p = match + stack; + do { + outbuf[outcnt++] = *--p; + } while (p > match); + stack = 0; + + /* loop for next code with final and prev as the last match, rem and + left provide the first 0..7 bits of the next code, end is the last + valid table entry */ + } +} + +/* Decompress a gzip file from infile to outfile. strm is assumed to have been + successfully initialized with inflateBackInit(). The input file may consist + of a series of gzip streams, in which case all of them will be decompressed + to the output file. If outfile is -1, then the gzip stream(s) integrity is + checked and nothing is written. + + The return value is a zlib error code: Z_MEM_ERROR if out of memory, + Z_DATA_ERROR if the header or the compressed data is invalid, or if the + trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends + prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip + stream) follows a valid gzip stream. + */ +local int gunpipe(z_stream *strm, int infile, int outfile) +{ + int ret, first, last; + unsigned have, flags, len; + z_const unsigned char *next = NULL; + struct ind ind, *indp; + struct outd outd; + + /* setup input buffer */ + ind.infile = infile; + ind.inbuf = inbuf; + indp = &ind; + + /* decompress concatenated gzip streams */ + have = 0; /* no input data read in yet */ + first = 1; /* looking for first gzip header */ + strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */ + for (;;) { + /* look for the two magic header bytes for a gzip stream */ + if (NEXT() == -1) { + ret = Z_OK; + break; /* empty gzip stream is ok */ + } + if (last != 31 || (NEXT() != 139 && last != 157)) { + strm->msg = (char *)"incorrect header check"; + ret = first ? Z_DATA_ERROR : Z_ERRNO; + break; /* not a gzip or compress header */ + } + first = 0; /* next non-header is junk */ + + /* process a compress (LZW) file -- can't be concatenated after this */ + if (last == 157) { + ret = lunpipe(have, next, indp, outfile, strm); + break; + } + + /* process remainder of gzip header */ + ret = Z_BUF_ERROR; + if (NEXT() != 8) { /* only deflate method allowed */ + if (last == -1) break; + strm->msg = (char *)"unknown compression method"; + ret = Z_DATA_ERROR; + break; + } + flags = NEXT(); /* header flags */ + NEXT(); /* discard mod time, xflgs, os */ + NEXT(); + NEXT(); + NEXT(); + NEXT(); + NEXT(); + if (last == -1) break; + if (flags & 0xe0) { + strm->msg = (char *)"unknown header flags set"; + ret = Z_DATA_ERROR; + break; + } + if (flags & 4) { /* extra field */ + len = NEXT(); + len += (unsigned)(NEXT()) << 8; + if (last == -1) break; + while (len > have) { + len -= have; + have = 0; + if (NEXT() == -1) break; + len--; + } + if (last == -1) break; + have -= len; + next += len; + } + if (flags & 8) /* file name */ + while (NEXT() != 0 && last != -1) + ; + if (flags & 16) /* comment */ + while (NEXT() != 0 && last != -1) + ; + if (flags & 2) { /* header crc */ + NEXT(); + NEXT(); + } + if (last == -1) break; + + /* set up output */ + outd.outfile = outfile; + outd.check = 1; + outd.crc = crc32(0L, Z_NULL, 0); + outd.total = 0; + + /* decompress data to output */ + strm->next_in = next; + strm->avail_in = have; + ret = inflateBack(strm, in, indp, out, &outd); + if (ret != Z_STREAM_END) break; + next = strm->next_in; + have = strm->avail_in; + strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */ + + /* check trailer */ + ret = Z_BUF_ERROR; + if (NEXT() != (int)(outd.crc & 0xff) || + NEXT() != (int)((outd.crc >> 8) & 0xff) || + NEXT() != (int)((outd.crc >> 16) & 0xff) || + NEXT() != (int)((outd.crc >> 24) & 0xff)) { + /* crc error */ + if (last != -1) { + strm->msg = (char *)"incorrect data check"; + ret = Z_DATA_ERROR; + } + break; + } + if (NEXT() != (int)(outd.total & 0xff) || + NEXT() != (int)((outd.total >> 8) & 0xff) || + NEXT() != (int)((outd.total >> 16) & 0xff) || + NEXT() != (int)((outd.total >> 24) & 0xff)) { + /* length error */ + if (last != -1) { + strm->msg = (char *)"incorrect length check"; + ret = Z_DATA_ERROR; + } + break; + } + + /* go back and look for another gzip stream */ + } + + /* clean up and return */ + return ret; +} + +/* Copy file attributes, from -> to, as best we can. This is best effort, so + no errors are reported. The mode bits, including suid, sgid, and the sticky + bit are copied (if allowed), the owner's user id and group id are copied + (again if allowed), and the access and modify times are copied. */ +local void copymeta(char *from, char *to) +{ + struct stat was; + struct utimbuf when; + + /* get all of from's Unix meta data, return if not a regular file */ + if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG) + return; + + /* set to's mode bits, ignore errors */ + (void)chmod(to, was.st_mode & 07777); + + /* copy owner's user and group, ignore errors */ + (void)chown(to, was.st_uid, was.st_gid); + + /* copy access and modify times, ignore errors */ + when.actime = was.st_atime; + when.modtime = was.st_mtime; + (void)utime(to, &when); +} + +/* Decompress the file inname to the file outnname, of if test is true, just + decompress without writing and check the gzip trailer for integrity. If + inname is NULL or an empty string, read from stdin. If outname is NULL or + an empty string, write to stdout. strm is a pre-initialized inflateBack + structure. When appropriate, copy the file attributes from inname to + outname. + + gunzip() returns 1 if there is an out-of-memory error or an unexpected + return code from gunpipe(). Otherwise it returns 0. + */ +local int gunzip(z_stream *strm, char *inname, char *outname, int test) +{ + int ret; + int infile, outfile; + + /* open files */ + if (inname == NULL || *inname == 0) { + inname = "-"; + infile = 0; /* stdin */ + } + else { + infile = open(inname, O_RDONLY, 0); + if (infile == -1) { + fprintf(stderr, "gun cannot open %s\n", inname); + return 0; + } + } + if (test) + outfile = -1; + else if (outname == NULL || *outname == 0) { + outname = "-"; + outfile = 1; /* stdout */ + } + else { + outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666); + if (outfile == -1) { + close(infile); + fprintf(stderr, "gun cannot create %s\n", outname); + return 0; + } + } + errno = 0; + + /* decompress */ + ret = gunpipe(strm, infile, outfile); + if (outfile > 2) close(outfile); + if (infile > 2) close(infile); + + /* interpret result */ + switch (ret) { + case Z_OK: + case Z_ERRNO: + if (infile > 2 && outfile > 2) { + copymeta(inname, outname); /* copy attributes */ + unlink(inname); + } + if (ret == Z_ERRNO) + fprintf(stderr, "gun warning: trailing garbage ignored in %s\n", + inname); + break; + case Z_DATA_ERROR: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg); + break; + case Z_MEM_ERROR: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun out of memory error--aborting\n"); + return 1; + case Z_BUF_ERROR: + if (outfile > 2) unlink(outname); + if (strm->next_in != Z_NULL) { + fprintf(stderr, "gun write error on %s: %s\n", + outname, strerror(errno)); + } + else if (errno) { + fprintf(stderr, "gun read error on %s: %s\n", + inname, strerror(errno)); + } + else { + fprintf(stderr, "gun unexpected end of file on %s\n", + inname); + } + break; + default: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun internal error--aborting\n"); + return 1; + } + return 0; +} + +/* Process the gun command line arguments. See the command syntax near the + beginning of this source file. */ +int main(int argc, char **argv) +{ + int ret, len, test; + char *outname; + unsigned char *window; + z_stream strm; + + /* initialize inflateBack state for repeated use */ + window = match; /* reuse LZW match buffer */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = inflateBackInit(&strm, 15, window); + if (ret != Z_OK) { + fprintf(stderr, "gun out of memory error--aborting\n"); + return 1; + } + + /* decompress each file to the same name with the suffix removed */ + argc--; + argv++; + test = 0; + if (argc && strcmp(*argv, "-h") == 0) { + fprintf(stderr, "gun 1.6 (17 Jan 2010)\n"); + fprintf(stderr, "Copyright (C) 2003-2010 Mark Adler\n"); + fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n"); + return 0; + } + if (argc && strcmp(*argv, "-t") == 0) { + test = 1; + argc--; + argv++; + } + if (argc) + do { + if (test) + outname = NULL; + else { + len = (int)strlen(*argv); + if (strcmp(*argv + len - 3, ".gz") == 0 || + strcmp(*argv + len - 3, "-gz") == 0) + len -= 3; + else if (strcmp(*argv + len - 2, ".z") == 0 || + strcmp(*argv + len - 2, "-z") == 0 || + strcmp(*argv + len - 2, "_z") == 0 || + strcmp(*argv + len - 2, ".Z") == 0) + len -= 2; + else { + fprintf(stderr, "gun error: no gz type on %s--skipping\n", + *argv); + continue; + } + outname = malloc(len + 1); + if (outname == NULL) { + fprintf(stderr, "gun out of memory error--aborting\n"); + ret = 1; + break; + } + memcpy(outname, *argv, len); + outname[len] = 0; + } + ret = gunzip(&strm, *argv, outname, test); + if (outname != NULL) free(outname); + if (ret) break; + } while (argv++, --argc); + else + ret = gunzip(&strm, NULL, NULL, test); + + /* clean up */ + inflateBackEnd(&strm); + return ret; +} diff --git a/src/deps/src/zlib/examples/gzappend.c b/src/deps/src/zlib/examples/gzappend.c new file mode 100644 index 000000000..23e93cf68 --- /dev/null +++ b/src/deps/src/zlib/examples/gzappend.c @@ -0,0 +1,504 @@ +/* gzappend -- command to append to a gzip file + + Copyright (C) 2003, 2012 Mark Adler, all rights reserved + version 1.2, 11 Oct 2012 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0 19 Oct 2003 - First version + * 1.1 4 Nov 2003 - Expand and clarify some comments and notes + * - Add version and copyright to help + * - Send help to stdout instead of stderr + * - Add some preemptive typecasts + * - Add L to constants in lseek() calls + * - Remove some debugging information in error messages + * - Use new data_type definition for zlib 1.2.1 + * - Simplify and unify file operations + * - Finish off gzip file in gztack() + * - Use deflatePrime() instead of adding empty blocks + * - Keep gzip file clean on appended file read errors + * - Use in-place rotate instead of auxiliary buffer + * (Why you ask? Because it was fun to write!) + * 1.2 11 Oct 2012 - Fix for proper z_const usage + * - Check for input buffer malloc failure + */ + +/* + gzappend takes a gzip file and appends to it, compressing files from the + command line or data from stdin. The gzip file is written to directly, to + avoid copying that file, in case it's large. Note that this results in the + unfriendly behavior that if gzappend fails, the gzip file is corrupted. + + This program was written to illustrate the use of the new Z_BLOCK option of + zlib 1.2.x's inflate() function. This option returns from inflate() at each + block boundary to facilitate locating and modifying the last block bit at + the start of the final deflate block. Also whether using Z_BLOCK or not, + another required feature of zlib 1.2.x is that inflate() now provides the + number of unused bits in the last input byte used. gzappend will not work + with versions of zlib earlier than 1.2.1. + + gzappend first decompresses the gzip file internally, discarding all but + the last 32K of uncompressed data, and noting the location of the last block + bit and the number of unused bits in the last byte of the compressed data. + The gzip trailer containing the CRC-32 and length of the uncompressed data + is verified. This trailer will be later overwritten. + + Then the last block bit is cleared by seeking back in the file and rewriting + the byte that contains it. Seeking forward, the last byte of the compressed + data is saved along with the number of unused bits to initialize deflate. + + A deflate process is initialized, using the last 32K of the uncompressed + data from the gzip file to initialize the dictionary. If the total + uncompressed data was less than 32K, then all of it is used to initialize + the dictionary. The deflate output bit buffer is also initialized with the + last bits from the original deflate stream. From here on, the data to + append is simply compressed using deflate, and written to the gzip file. + When that is complete, the new CRC-32 and uncompressed length are written + as the trailer of the gzip file. + */ + +#include +#include +#include +#include +#include +#include "zlib.h" + +#define local static +#define LGCHUNK 14 +#define CHUNK (1U << LGCHUNK) +#define DSIZE 32768U + +/* print an error message and terminate with extreme prejudice */ +local void bye(char *msg1, char *msg2) +{ + fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2); + exit(1); +} + +/* return the greatest common divisor of a and b using Euclid's algorithm, + modified to be fast when one argument much greater than the other, and + coded to avoid unnecessary swapping */ +local unsigned gcd(unsigned a, unsigned b) +{ + unsigned c; + + while (a && b) + if (a > b) { + c = b; + while (a - c >= c) + c <<= 1; + a -= c; + } + else { + c = a; + while (b - c >= c) + c <<= 1; + b -= c; + } + return a + b; +} + +/* rotate list[0..len-1] left by rot positions, in place */ +local void rotate(unsigned char *list, unsigned len, unsigned rot) +{ + unsigned char tmp; + unsigned cycles; + unsigned char *start, *last, *to, *from; + + /* normalize rot and handle degenerate cases */ + if (len < 2) return; + if (rot >= len) rot %= len; + if (rot == 0) return; + + /* pointer to last entry in list */ + last = list + (len - 1); + + /* do simple left shift by one */ + if (rot == 1) { + tmp = *list; + memmove(list, list + 1, len - 1); + *last = tmp; + return; + } + + /* do simple right shift by one */ + if (rot == len - 1) { + tmp = *last; + memmove(list + 1, list, len - 1); + *list = tmp; + return; + } + + /* otherwise do rotate as a set of cycles in place */ + cycles = gcd(len, rot); /* number of cycles */ + do { + start = from = list + cycles; /* start index is arbitrary */ + tmp = *from; /* save entry to be overwritten */ + for (;;) { + to = from; /* next step in cycle */ + from += rot; /* go right rot positions */ + if (from > last) from -= len; /* (pointer better not wrap) */ + if (from == start) break; /* all but one shifted */ + *to = *from; /* shift left */ + } + *to = tmp; /* complete the circle */ + } while (--cycles); +} + +/* structure for gzip file read operations */ +typedef struct { + int fd; /* file descriptor */ + int size; /* 1 << size is bytes in buf */ + unsigned left; /* bytes available at next */ + unsigned char *buf; /* buffer */ + z_const unsigned char *next; /* next byte in buffer */ + char *name; /* file name for error messages */ +} file; + +/* reload buffer */ +local int readin(file *in) +{ + int len; + + len = read(in->fd, in->buf, 1 << in->size); + if (len == -1) bye("error reading ", in->name); + in->left = (unsigned)len; + in->next = in->buf; + return len; +} + +/* read from file in, exit if end-of-file */ +local int readmore(file *in) +{ + if (readin(in) == 0) bye("unexpected end of ", in->name); + return 0; +} + +#define read1(in) (in->left == 0 ? readmore(in) : 0, \ + in->left--, *(in->next)++) + +/* skip over n bytes of in */ +local void skip(file *in, unsigned n) +{ + unsigned bypass; + + if (n > in->left) { + n -= in->left; + bypass = n & ~((1U << in->size) - 1); + if (bypass) { + if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1) + bye("seeking ", in->name); + n -= bypass; + } + readmore(in); + if (n > in->left) + bye("unexpected end of ", in->name); + } + in->left -= n; + in->next += n; +} + +/* read a four-byte unsigned integer, little-endian, from in */ +unsigned long read4(file *in) +{ + unsigned long val; + + val = read1(in); + val += (unsigned)read1(in) << 8; + val += (unsigned long)read1(in) << 16; + val += (unsigned long)read1(in) << 24; + return val; +} + +/* skip over gzip header */ +local void gzheader(file *in) +{ + int flags; + unsigned n; + + if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file"); + if (read1(in) != 8) bye("unknown compression method in", in->name); + flags = read1(in); + if (flags & 0xe0) bye("unknown header flags set in", in->name); + skip(in, 6); + if (flags & 4) { + n = read1(in); + n += (unsigned)(read1(in)) << 8; + skip(in, n); + } + if (flags & 8) while (read1(in) != 0) ; + if (flags & 16) while (read1(in) != 0) ; + if (flags & 2) skip(in, 2); +} + +/* decompress gzip file "name", return strm with a deflate stream ready to + continue compression of the data in the gzip file, and return a file + descriptor pointing to where to write the compressed data -- the deflate + stream is initialized to compress using level "level" */ +local int gzscan(char *name, z_stream *strm, int level) +{ + int ret, lastbit, left, full; + unsigned have; + unsigned long crc, tot; + unsigned char *window; + off_t lastoff, end; + file gz; + + /* open gzip file */ + gz.name = name; + gz.fd = open(name, O_RDWR, 0); + if (gz.fd == -1) bye("cannot open ", name); + gz.buf = malloc(CHUNK); + if (gz.buf == NULL) bye("out of memory", ""); + gz.size = LGCHUNK; + gz.left = 0; + + /* skip gzip header */ + gzheader(&gz); + + /* prepare to decompress */ + window = malloc(DSIZE); + if (window == NULL) bye("out of memory", ""); + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; + strm->opaque = Z_NULL; + ret = inflateInit2(strm, -15); + if (ret != Z_OK) bye("out of memory", " or library mismatch"); + + /* decompress the deflate stream, saving append information */ + lastbit = 0; + lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; + left = 0; + strm->avail_in = gz.left; + strm->next_in = gz.next; + crc = crc32(0L, Z_NULL, 0); + have = full = 0; + do { + /* if needed, get more input */ + if (strm->avail_in == 0) { + readmore(&gz); + strm->avail_in = gz.left; + strm->next_in = gz.next; + } + + /* set up output to next available section of sliding window */ + strm->avail_out = DSIZE - have; + strm->next_out = window + have; + + /* inflate and check for errors */ + ret = inflate(strm, Z_BLOCK); + if (ret == Z_STREAM_ERROR) bye("internal stream error!", ""); + if (ret == Z_MEM_ERROR) bye("out of memory", ""); + if (ret == Z_DATA_ERROR) + bye("invalid compressed data--format violated in", name); + + /* update crc and sliding window pointer */ + crc = crc32(crc, window + have, DSIZE - have - strm->avail_out); + if (strm->avail_out) + have = DSIZE - strm->avail_out; + else { + have = 0; + full = 1; + } + + /* process end of block */ + if (strm->data_type & 128) { + if (strm->data_type & 64) + left = strm->data_type & 0x1f; + else { + lastbit = strm->data_type & 0x1f; + lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in; + } + } + } while (ret != Z_STREAM_END); + inflateEnd(strm); + gz.left = strm->avail_in; + gz.next = strm->next_in; + + /* save the location of the end of the compressed data */ + end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; + + /* check gzip trailer and save total for deflate */ + if (crc != read4(&gz)) + bye("invalid compressed data--crc mismatch in ", name); + tot = strm->total_out; + if ((tot & 0xffffffffUL) != read4(&gz)) + bye("invalid compressed data--length mismatch in", name); + + /* if not at end of file, warn */ + if (gz.left || readin(&gz)) + fprintf(stderr, + "gzappend warning: junk at end of gzip file overwritten\n"); + + /* clear last block bit */ + lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET); + if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); + *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7))); + lseek(gz.fd, -1L, SEEK_CUR); + if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name); + + /* if window wrapped, build dictionary from window by rotating */ + if (full) { + rotate(window, DSIZE, have); + have = DSIZE; + } + + /* set up deflate stream with window, crc, total_in, and leftover bits */ + ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); + if (ret != Z_OK) bye("out of memory", ""); + deflateSetDictionary(strm, window, have); + strm->adler = crc; + strm->total_in = tot; + if (left) { + lseek(gz.fd, --end, SEEK_SET); + if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); + deflatePrime(strm, 8 - left, *gz.buf); + } + lseek(gz.fd, end, SEEK_SET); + + /* clean up and return */ + free(window); + free(gz.buf); + return gz.fd; +} + +/* append file "name" to gzip file gd using deflate stream strm -- if last + is true, then finish off the deflate stream at the end */ +local void gztack(char *name, int gd, z_stream *strm, int last) +{ + int fd, len, ret; + unsigned left; + unsigned char *in, *out; + + /* open file to compress and append */ + fd = 0; + if (name != NULL) { + fd = open(name, O_RDONLY, 0); + if (fd == -1) + fprintf(stderr, "gzappend warning: %s not found, skipping ...\n", + name); + } + + /* allocate buffers */ + in = malloc(CHUNK); + out = malloc(CHUNK); + if (in == NULL || out == NULL) bye("out of memory", ""); + + /* compress input file and append to gzip file */ + do { + /* get more input */ + len = read(fd, in, CHUNK); + if (len == -1) { + fprintf(stderr, + "gzappend warning: error reading %s, skipping rest ...\n", + name); + len = 0; + } + strm->avail_in = (unsigned)len; + strm->next_in = in; + if (len) strm->adler = crc32(strm->adler, in, (unsigned)len); + + /* compress and write all available output */ + do { + strm->avail_out = CHUNK; + strm->next_out = out; + ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH); + left = CHUNK - strm->avail_out; + while (left) { + len = write(gd, out + CHUNK - strm->avail_out - left, left); + if (len == -1) bye("writing gzip file", ""); + left -= (unsigned)len; + } + } while (strm->avail_out == 0 && ret != Z_STREAM_END); + } while (len != 0); + + /* write trailer after last entry */ + if (last) { + deflateEnd(strm); + out[0] = (unsigned char)(strm->adler); + out[1] = (unsigned char)(strm->adler >> 8); + out[2] = (unsigned char)(strm->adler >> 16); + out[3] = (unsigned char)(strm->adler >> 24); + out[4] = (unsigned char)(strm->total_in); + out[5] = (unsigned char)(strm->total_in >> 8); + out[6] = (unsigned char)(strm->total_in >> 16); + out[7] = (unsigned char)(strm->total_in >> 24); + len = 8; + do { + ret = write(gd, out + 8 - len, len); + if (ret == -1) bye("writing gzip file", ""); + len -= ret; + } while (len); + close(gd); + } + + /* clean up and return */ + free(out); + free(in); + if (fd > 0) close(fd); +} + +/* process the compression level option if present, scan the gzip file, and + append the specified files, or append the data from stdin if no other file + names are provided on the command line -- the gzip file must be writable + and seekable */ +int main(int argc, char **argv) +{ + int gd, level; + z_stream strm; + + /* ignore command name */ + argc--; argv++; + + /* provide usage if no arguments */ + if (*argv == NULL) { + printf( + "gzappend 1.2 (11 Oct 2012) Copyright (C) 2003, 2012 Mark Adler\n" + ); + printf( + "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n"); + return 0; + } + + /* set compression level */ + level = Z_DEFAULT_COMPRESSION; + if (argv[0][0] == '-') { + if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0) + bye("invalid compression level", ""); + level = argv[0][1] - '0'; + if (*++argv == NULL) bye("no gzip file name after options", ""); + } + + /* prepare to append to gzip file */ + gd = gzscan(*argv++, &strm, level); + + /* append files on command line, or from stdin if none */ + if (*argv == NULL) + gztack(NULL, gd, &strm, 1); + else + do { + gztack(*argv, gd, &strm, argv[1] == NULL); + } while (*++argv != NULL); + return 0; +} diff --git a/src/deps/src/zlib/examples/gzjoin.c b/src/deps/src/zlib/examples/gzjoin.c new file mode 100644 index 000000000..89e809844 --- /dev/null +++ b/src/deps/src/zlib/examples/gzjoin.c @@ -0,0 +1,449 @@ +/* gzjoin -- command to join gzip files into one gzip file + + Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved + version 1.2, 14 Aug 2012 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0 11 Dec 2004 - First version + * 1.1 12 Jun 2005 - Changed ssize_t to long for portability + * 1.2 14 Aug 2012 - Clean up for z_const usage + */ + +/* + gzjoin takes one or more gzip files on the command line and writes out a + single gzip file that will uncompress to the concatenation of the + uncompressed data from the individual gzip files. gzjoin does this without + having to recompress any of the data and without having to calculate a new + crc32 for the concatenated uncompressed data. gzjoin does however have to + decompress all of the input data in order to find the bits in the compressed + data that need to be modified to concatenate the streams. + + gzjoin does not do an integrity check on the input gzip files other than + checking the gzip header and decompressing the compressed data. They are + otherwise assumed to be complete and correct. + + Each joint between gzip files removes at least 18 bytes of previous trailer + and subsequent header, and inserts an average of about three bytes to the + compressed data in order to connect the streams. The output gzip file + has a minimal ten-byte gzip header with no file name or modification time. + + This program was written to illustrate the use of the Z_BLOCK option of + inflate() and the crc32_combine() function. gzjoin will not compile with + versions of zlib earlier than 1.2.3. + */ + +#include /* fputs(), fprintf(), fwrite(), putc() */ +#include /* exit(), malloc(), free() */ +#include /* open() */ +#include /* close(), read(), lseek() */ +#include "zlib.h" + /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ + +#define local static + +/* exit with an error (return a value to allow use in an expression) */ +local int bail(char *why1, char *why2) +{ + fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); + exit(1); + return 0; +} + +/* -- simple buffered file input with access to the buffer -- */ + +#define CHUNK 32768 /* must be a power of two and fit in unsigned */ + +/* bin buffered input file type */ +typedef struct { + char *name; /* name of file for error messages */ + int fd; /* file descriptor */ + unsigned left; /* bytes remaining at next */ + unsigned char *next; /* next byte to read */ + unsigned char *buf; /* allocated buffer of length CHUNK */ +} bin; + +/* close a buffered file and free allocated memory */ +local void bclose(bin *in) +{ + if (in != NULL) { + if (in->fd != -1) + close(in->fd); + if (in->buf != NULL) + free(in->buf); + free(in); + } +} + +/* open a buffered file for input, return a pointer to type bin, or NULL on + failure */ +local bin *bopen(char *name) +{ + bin *in; + + in = malloc(sizeof(bin)); + if (in == NULL) + return NULL; + in->buf = malloc(CHUNK); + in->fd = open(name, O_RDONLY, 0); + if (in->buf == NULL || in->fd == -1) { + bclose(in); + return NULL; + } + in->left = 0; + in->next = in->buf; + in->name = name; + return in; +} + +/* load buffer from file, return -1 on read error, 0 or 1 on success, with + 1 indicating that end-of-file was reached */ +local int bload(bin *in) +{ + long len; + + if (in == NULL) + return -1; + if (in->left != 0) + return 0; + in->next = in->buf; + do { + len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); + if (len < 0) + return -1; + in->left += (unsigned)len; + } while (len != 0 && in->left < CHUNK); + return len == 0 ? 1 : 0; +} + +/* get a byte from the file, bail if end of file */ +#define bget(in) (in->left ? 0 : bload(in), \ + in->left ? (in->left--, *(in->next)++) : \ + bail("unexpected end of file on ", in->name)) + +/* get a four-byte little-endian unsigned integer from file */ +local unsigned long bget4(bin *in) +{ + unsigned long val; + + val = bget(in); + val += (unsigned long)(bget(in)) << 8; + val += (unsigned long)(bget(in)) << 16; + val += (unsigned long)(bget(in)) << 24; + return val; +} + +/* skip bytes in file */ +local void bskip(bin *in, unsigned skip) +{ + /* check pointer */ + if (in == NULL) + return; + + /* easy case -- skip bytes in buffer */ + if (skip <= in->left) { + in->left -= skip; + in->next += skip; + return; + } + + /* skip what's in buffer, discard buffer contents */ + skip -= in->left; + in->left = 0; + + /* seek past multiples of CHUNK bytes */ + if (skip > CHUNK) { + unsigned left; + + left = skip & (CHUNK - 1); + if (left == 0) { + /* exact number of chunks: seek all the way minus one byte to check + for end-of-file with a read */ + lseek(in->fd, skip - 1, SEEK_CUR); + if (read(in->fd, in->buf, 1) != 1) + bail("unexpected end of file on ", in->name); + return; + } + + /* skip the integral chunks, update skip with remainder */ + lseek(in->fd, skip - left, SEEK_CUR); + skip = left; + } + + /* read more input and skip remainder */ + bload(in); + if (skip > in->left) + bail("unexpected end of file on ", in->name); + in->left -= skip; + in->next += skip; +} + +/* -- end of buffered input functions -- */ + +/* skip the gzip header from file in */ +local void gzhead(bin *in) +{ + int flags; + + /* verify gzip magic header and compression method */ + if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) + bail(in->name, " is not a valid gzip file"); + + /* get and verify flags */ + flags = bget(in); + if ((flags & 0xe0) != 0) + bail("unknown reserved bits set in ", in->name); + + /* skip modification time, extra flags, and os */ + bskip(in, 6); + + /* skip extra field if present */ + if (flags & 4) { + unsigned len; + + len = bget(in); + len += (unsigned)(bget(in)) << 8; + bskip(in, len); + } + + /* skip file name if present */ + if (flags & 8) + while (bget(in) != 0) + ; + + /* skip comment if present */ + if (flags & 16) + while (bget(in) != 0) + ; + + /* skip header crc if present */ + if (flags & 2) + bskip(in, 2); +} + +/* write a four-byte little-endian unsigned integer to out */ +local void put4(unsigned long val, FILE *out) +{ + putc(val & 0xff, out); + putc((val >> 8) & 0xff, out); + putc((val >> 16) & 0xff, out); + putc((val >> 24) & 0xff, out); +} + +/* Load up zlib stream from buffered input, bail if end of file */ +local void zpull(z_streamp strm, bin *in) +{ + if (in->left == 0) + bload(in); + if (in->left == 0) + bail("unexpected end of file on ", in->name); + strm->avail_in = in->left; + strm->next_in = in->next; +} + +/* Write header for gzip file to out and initialize trailer. */ +local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) +{ + fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); + *crc = crc32(0L, Z_NULL, 0); + *tot = 0; +} + +/* Copy the compressed data from name, zeroing the last block bit of the last + block if clr is true, and adding empty blocks as needed to get to a byte + boundary. If clr is false, then the last block becomes the last block of + the output, and the gzip trailer is written. crc and tot maintains the + crc and length (modulo 2^32) of the output for the trailer. The resulting + gzip file is written to out. gzinit() must be called before the first call + of gzcopy() to write the gzip header and to initialize crc and tot. */ +local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, + FILE *out) +{ + int ret; /* return value from zlib functions */ + int pos; /* where the "last block" bit is in byte */ + int last; /* true if processing the last block */ + bin *in; /* buffered input file */ + unsigned char *start; /* start of compressed data in buffer */ + unsigned char *junk; /* buffer for uncompressed data -- discarded */ + z_off_t len; /* length of uncompressed data (support > 4 GB) */ + z_stream strm; /* zlib inflate stream */ + + /* open gzip file and skip header */ + in = bopen(name); + if (in == NULL) + bail("could not open ", name); + gzhead(in); + + /* allocate buffer for uncompressed data and initialize raw inflate + stream */ + junk = malloc(CHUNK); + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -15); + if (junk == NULL || ret != Z_OK) + bail("out of memory", ""); + + /* inflate and copy compressed data, clear last-block bit if requested */ + len = 0; + zpull(&strm, in); + start = in->next; + last = start[0] & 1; + if (last && clr) + start[0] &= ~1; + strm.avail_out = 0; + for (;;) { + /* if input used and output done, write used input and get more */ + if (strm.avail_in == 0 && strm.avail_out != 0) { + fwrite(start, 1, strm.next_in - start, out); + start = in->buf; + in->left = 0; + zpull(&strm, in); + } + + /* decompress -- return early when end-of-block reached */ + strm.avail_out = CHUNK; + strm.next_out = junk; + ret = inflate(&strm, Z_BLOCK); + switch (ret) { + case Z_MEM_ERROR: + bail("out of memory", ""); + case Z_DATA_ERROR: + bail("invalid compressed data in ", in->name); + } + + /* update length of uncompressed data */ + len += CHUNK - strm.avail_out; + + /* check for block boundary (only get this when block copied out) */ + if (strm.data_type & 128) { + /* if that was the last block, then done */ + if (last) + break; + + /* number of unused bits in last byte */ + pos = strm.data_type & 7; + + /* find the next last-block bit */ + if (pos != 0) { + /* next last-block bit is in last used byte */ + pos = 0x100 >> pos; + last = strm.next_in[-1] & pos; + if (last && clr) + in->buf[strm.next_in - in->buf - 1] &= ~pos; + } + else { + /* next last-block bit is in next unused byte */ + if (strm.avail_in == 0) { + /* don't have that byte yet -- get it */ + fwrite(start, 1, strm.next_in - start, out); + start = in->buf; + in->left = 0; + zpull(&strm, in); + } + last = strm.next_in[0] & 1; + if (last && clr) + in->buf[strm.next_in - in->buf] &= ~1; + } + } + } + + /* update buffer with unused input */ + in->left = strm.avail_in; + in->next = in->buf + (strm.next_in - in->buf); + + /* copy used input, write empty blocks to get to byte boundary */ + pos = strm.data_type & 7; + fwrite(start, 1, in->next - start - 1, out); + last = in->next[-1]; + if (pos == 0 || !clr) + /* already at byte boundary, or last file: write last byte */ + putc(last, out); + else { + /* append empty blocks to last byte */ + last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */ + if (pos & 1) { + /* odd -- append an empty stored block */ + putc(last, out); + if (pos == 1) + putc(0, out); /* two more bits in block header */ + fwrite("\0\0\xff\xff", 1, 4, out); + } + else { + /* even -- append 1, 2, or 3 empty fixed blocks */ + switch (pos) { + case 6: + putc(last | 8, out); + last = 0; + case 4: + putc(last | 0x20, out); + last = 0; + case 2: + putc(last | 0x80, out); + putc(0, out); + } + } + } + + /* update crc and tot */ + *crc = crc32_combine(*crc, bget4(in), len); + *tot += (unsigned long)len; + + /* clean up */ + inflateEnd(&strm); + free(junk); + bclose(in); + + /* write trailer if this is the last gzip file */ + if (!clr) { + put4(*crc, out); + put4(*tot, out); + } +} + +/* join the gzip files on the command line, write result to stdout */ +int main(int argc, char **argv) +{ + unsigned long crc, tot; /* running crc and total uncompressed length */ + + /* skip command name */ + argc--; + argv++; + + /* show usage if no arguments */ + if (argc == 0) { + fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", + stderr); + return 0; + } + + /* join gzip files on command line and write to stdout */ + gzinit(&crc, &tot, stdout); + while (argc--) + gzcopy(*argv++, argc, &crc, &tot, stdout); + + /* done */ + return 0; +} diff --git a/src/deps/src/zlib/examples/gzlog.c b/src/deps/src/zlib/examples/gzlog.c new file mode 100644 index 000000000..da1b02e73 --- /dev/null +++ b/src/deps/src/zlib/examples/gzlog.c @@ -0,0 +1,1061 @@ +/* + * gzlog.c + * Copyright (C) 2004, 2008, 2012, 2016, 2019 Mark Adler, all rights reserved + * For conditions of distribution and use, see copyright notice in gzlog.h + * version 2.3, 25 May 2019 + */ + +/* + gzlog provides a mechanism for frequently appending short strings to a gzip + file that is efficient both in execution time and compression ratio. The + strategy is to write the short strings in an uncompressed form to the end of + the gzip file, only compressing when the amount of uncompressed data has + reached a given threshold. + + gzlog also provides protection against interruptions in the process due to + system crashes. The status of the operation is recorded in an extra field + in the gzip file, and is only updated once the gzip file is brought to a + valid state. The last data to be appended or compressed is saved in an + auxiliary file, so that if the operation is interrupted, it can be completed + the next time an append operation is attempted. + + gzlog maintains another auxiliary file with the last 32K of data from the + compressed portion, which is preloaded for the compression of the subsequent + data. This minimizes the impact to the compression ratio of appending. + */ + +/* + Operations Concept: + + Files (log name "foo"): + foo.gz -- gzip file with the complete log + foo.add -- last message to append or last data to compress + foo.dict -- dictionary of the last 32K of data for next compression + foo.temp -- temporary dictionary file for compression after this one + foo.lock -- lock file for reading and writing the other files + foo.repairs -- log file for log file recovery operations (not compressed) + + gzip file structure: + - fixed-length (no file name) header with extra field (see below) + - compressed data ending initially with empty stored block + - uncompressed data filling out originally empty stored block and + subsequent stored blocks as needed (16K max each) + - gzip trailer + - no junk at end (no other gzip streams) + + When appending data, the information in the first three items above plus the + foo.add file are sufficient to recover an interrupted append operation. The + extra field has the necessary information to restore the start of the last + stored block and determine where to append the data in the foo.add file, as + well as the crc and length of the gzip data before the append operation. + + The foo.add file is created before the gzip file is marked for append, and + deleted after the gzip file is marked as complete. So if the append + operation is interrupted, the data to add will still be there. If due to + some external force, the foo.add file gets deleted between when the append + operation was interrupted and when recovery is attempted, the gzip file will + still be restored, but without the appended data. + + When compressing data, the information in the first two items above plus the + foo.add file are sufficient to recover an interrupted compress operation. + The extra field has the necessary information to find the end of the + compressed data, and contains both the crc and length of just the compressed + data and of the complete set of data including the contents of the foo.add + file. + + Again, the foo.add file is maintained during the compress operation in case + of an interruption. If in the unlikely event the foo.add file with the data + to be compressed is missing due to some external force, a gzip file with + just the previous compressed data will be reconstructed. In this case, all + of the data that was to be compressed is lost (approximately one megabyte). + This will not occur if all that happened was an interruption of the compress + operation. + + The third state that is marked is the replacement of the old dictionary with + the new dictionary after a compress operation. Once compression is + complete, the gzip file is marked as being in the replace state. This + completes the gzip file, so an interrupt after being so marked does not + result in recompression. Then the dictionary file is replaced, and the gzip + file is marked as completed. This state prevents the possibility of + restarting compression with the wrong dictionary file. + + All three operations are wrapped by a lock/unlock procedure. In order to + gain exclusive access to the log files, first a foo.lock file must be + exclusively created. When all operations are complete, the lock is + released by deleting the foo.lock file. If when attempting to create the + lock file, it already exists and the modify time of the lock file is more + than five minutes old (set by the PATIENCE define below), then the old + lock file is considered stale and deleted, and the exclusive creation of + the lock file is retried. To assure that there are no false assessments + of the staleness of the lock file, the operations periodically touch the + lock file to update the modified date. + + Following is the definition of the extra field with all of the information + required to enable the above append and compress operations and their + recovery if interrupted. Multi-byte values are stored little endian + (consistent with the gzip format). File pointers are eight bytes long. + The crc's and lengths for the gzip trailer are four bytes long. (Note that + the length at the end of a gzip file is used for error checking only, and + for large files is actually the length modulo 2^32.) The stored block + length is two bytes long. The gzip extra field two-byte identification is + "ap" for append. It is assumed that writing the extra field to the file is + an "atomic" operation. That is, either all of the extra field is written + to the file, or none of it is, if the operation is interrupted right at the + point of updating the extra field. This is a reasonable assumption, since + the extra field is within the first 52 bytes of the file, which is smaller + than any expected block size for a mass storage device (usually 512 bytes or + larger). + + Extra field (35 bytes): + - Pointer to first stored block length -- this points to the two-byte length + of the first stored block, which is followed by the two-byte, one's + complement of that length. The stored block length is preceded by the + three-bit header of the stored block, which is the actual start of the + stored block in the deflate format. See the bit offset field below. + - Pointer to the last stored block length. This is the same as above, but + for the last stored block of the uncompressed data in the gzip file. + Initially this is the same as the first stored block length pointer. + When the stored block gets to 16K (see the MAX_STORE define), then a new + stored block as added, at which point the last stored block length pointer + is different from the first stored block length pointer. When they are + different, the first bit of the last stored block header is eight bits, or + one byte back from the block length. + - Compressed data crc and length. This is the crc and length of the data + that is in the compressed portion of the deflate stream. These are used + only in the event that the foo.add file containing the data to compress is + lost after a compress operation is interrupted. + - Total data crc and length. This is the crc and length of all of the data + stored in the gzip file, compressed and uncompressed. It is used to + reconstruct the gzip trailer when compressing, as well as when recovering + interrupted operations. + - Final stored block length. This is used to quickly find where to append, + and allows the restoration of the original final stored block state when + an append operation is interrupted. + - First stored block start as the number of bits back from the final stored + block first length byte. This value is in the range of 3..10, and is + stored as the low three bits of the final byte of the extra field after + subtracting three (0..7). This allows the last-block bit of the stored + block header to be updated when a new stored block is added, for the case + when the first stored block and the last stored block are the same. (When + they are different, the numbers of bits back is known to be eight.) This + also allows for new compressed data to be appended to the old compressed + data in the compress operation, overwriting the previous first stored + block, or for the compressed data to be terminated and a valid gzip file + reconstructed on the off chance that a compression operation was + interrupted and the data to compress in the foo.add file was deleted. + - The operation in process. This is the next two bits in the last byte (the + bits under the mask 0x18). The are interpreted as 0: nothing in process, + 1: append in process, 2: compress in process, 3: replace in process. + - The top three bits of the last byte in the extra field are reserved and + are currently set to zero. + + Main procedure: + - Exclusively create the foo.lock file using the O_CREAT and O_EXCL modes of + the system open() call. If the modify time of an existing lock file is + more than PATIENCE seconds old, then the lock file is deleted and the + exclusive create is retried. + - Load the extra field from the foo.gz file, and see if an operation was in + progress but not completed. If so, apply the recovery procedure below. + - Perform the append procedure with the provided data. + - If the uncompressed data in the foo.gz file is 1MB or more, apply the + compress procedure. + - Delete the foo.lock file. + + Append procedure: + - Put what to append in the foo.add file so that the operation can be + restarted if this procedure is interrupted. + - Mark the foo.gz extra field with the append operation in progress. + + Restore the original last-block bit and stored block length of the last + stored block from the information in the extra field, in case a previous + append operation was interrupted. + - Append the provided data to the last stored block, creating new stored + blocks as needed and updating the stored blocks last-block bits and + lengths. + - Update the crc and length with the new data, and write the gzip trailer. + - Write over the extra field (with a single write operation) with the new + pointers, lengths, and crc's, and mark the gzip file as not in process. + Though there is still a foo.add file, it will be ignored since nothing + is in process. If a foo.add file is leftover from a previously + completed operation, it is truncated when writing new data to it. + - Delete the foo.add file. + + Compress and replace procedures: + - Read all of the uncompressed data in the stored blocks in foo.gz and write + it to foo.add. Also write foo.temp with the last 32K of that data to + provide a dictionary for the next invocation of this procedure. + - Rewrite the extra field marking foo.gz with a compression in process. + * If there is no data provided to compress (due to a missing foo.add file + when recovering), reconstruct and truncate the foo.gz file to contain + only the previous compressed data and proceed to the step after the next + one. Otherwise ... + - Compress the data with the dictionary in foo.dict, and write to the + foo.gz file starting at the bit immediately following the last previously + compressed block. If there is no foo.dict, proceed anyway with the + compression at slightly reduced efficiency. (For the foo.dict file to be + missing requires some external failure beyond simply the interruption of + a compress operation.) During this process, the foo.lock file is + periodically touched to assure that that file is not considered stale by + another process before we're done. The deflation is terminated with a + non-last empty static block (10 bits long), that is then located and + written over by a last-bit-set empty stored block. + - Append the crc and length of the data in the gzip file (previously + calculated during the append operations). + - Write over the extra field with the updated stored block offsets, bits + back, crc's, and lengths, and mark foo.gz as in process for a replacement + of the dictionary. + @ Delete the foo.add file. + - Replace foo.dict with foo.temp. + - Write over the extra field, marking foo.gz as complete. + + Recovery procedure: + - If not a replace recovery, read in the foo.add file, and provide that data + to the appropriate recovery below. If there is no foo.add file, provide + a zero data length to the recovery. In that case, the append recovery + restores the foo.gz to the previous compressed + uncompressed data state. + For the compress recovery, a missing foo.add file results in foo.gz being + restored to the previous compressed-only data state. + - Append recovery: + - Pick up append at + step above + - Compress recovery: + - Pick up compress at * step above + - Replace recovery: + - Pick up compress at @ step above + - Log the repair with a date stamp in foo.repairs + */ + +#include +#include /* rename, fopen, fprintf, fclose */ +#include /* malloc, free */ +#include /* strlen, strrchr, strcpy, strncpy, strcmp */ +#include /* open */ +#include /* lseek, read, write, close, unlink, sleep, */ + /* ftruncate, fsync */ +#include /* errno */ +#include /* time, ctime */ +#include /* stat */ +#include /* utimes */ +#include "zlib.h" /* crc32 */ + +#include "gzlog.h" /* header for external access */ + +#define local static +typedef unsigned int uint; +typedef unsigned long ulong; + +/* Macro for debugging to deterministically force recovery operations */ +#ifdef GZLOG_DEBUG + #include /* longjmp */ + jmp_buf gzlog_jump; /* where to go back to */ + int gzlog_bail = 0; /* which point to bail at (1..8) */ + int gzlog_count = -1; /* number of times through to wait */ +# define BAIL(n) do { if (n == gzlog_bail && gzlog_count-- == 0) \ + longjmp(gzlog_jump, gzlog_bail); } while (0) +#else +# define BAIL(n) +#endif + +/* how old the lock file can be in seconds before considering it stale */ +#define PATIENCE 300 + +/* maximum stored block size in Kbytes -- must be in 1..63 */ +#define MAX_STORE 16 + +/* number of stored Kbytes to trigger compression (must be >= 32 to allow + dictionary construction, and <= 204 * MAX_STORE, in order for >> 10 to + discard the stored block headers contribution of five bytes each) */ +#define TRIGGER 1024 + +/* size of a deflate dictionary (this cannot be changed) */ +#define DICT 32768U + +/* values for the operation (2 bits) */ +#define NO_OP 0 +#define APPEND_OP 1 +#define COMPRESS_OP 2 +#define REPLACE_OP 3 + +/* macros to extract little-endian integers from an unsigned byte buffer */ +#define PULL2(p) ((p)[0]+((uint)((p)[1])<<8)) +#define PULL4(p) (PULL2(p)+((ulong)PULL2(p+2)<<16)) +#define PULL8(p) (PULL4(p)+((off_t)PULL4(p+4)<<32)) + +/* macros to store integers into a byte buffer in little-endian order */ +#define PUT2(p,a) do {(p)[0]=a;(p)[1]=(a)>>8;} while(0) +#define PUT4(p,a) do {PUT2(p,a);PUT2(p+2,a>>16);} while(0) +#define PUT8(p,a) do {PUT4(p,a);PUT4(p+4,a>>32);} while(0) + +/* internal structure for log information */ +#define LOGID "\106\035\172" /* should be three non-zero characters */ +struct log { + char id[4]; /* contains LOGID to detect inadvertent overwrites */ + int fd; /* file descriptor for .gz file, opened read/write */ + char *path; /* allocated path, e.g. "/var/log/foo" or "foo" */ + char *end; /* end of path, for appending suffices such as ".gz" */ + off_t first; /* offset of first stored block first length byte */ + int back; /* location of first block id in bits back from first */ + uint stored; /* bytes currently in last stored block */ + off_t last; /* offset of last stored block first length byte */ + ulong ccrc; /* crc of compressed data */ + ulong clen; /* length (modulo 2^32) of compressed data */ + ulong tcrc; /* crc of total data */ + ulong tlen; /* length (modulo 2^32) of total data */ + time_t lock; /* last modify time of our lock file */ +}; + +/* gzip header for gzlog */ +local unsigned char log_gzhead[] = { + 0x1f, 0x8b, /* magic gzip id */ + 8, /* compression method is deflate */ + 4, /* there is an extra field (no file name) */ + 0, 0, 0, 0, /* no modification time provided */ + 0, 0xff, /* no extra flags, no OS specified */ + 39, 0, 'a', 'p', 35, 0 /* extra field with "ap" subfield */ + /* 35 is EXTRA, 39 is EXTRA + 4 */ +}; + +#define HEAD sizeof(log_gzhead) /* should be 16 */ + +/* initial gzip extra field content (52 == HEAD + EXTRA + 1) */ +local unsigned char log_gzext[] = { + 52, 0, 0, 0, 0, 0, 0, 0, /* offset of first stored block length */ + 52, 0, 0, 0, 0, 0, 0, 0, /* offset of last stored block length */ + 0, 0, 0, 0, 0, 0, 0, 0, /* compressed data crc and length */ + 0, 0, 0, 0, 0, 0, 0, 0, /* total data crc and length */ + 0, 0, /* final stored block data length */ + 5 /* op is NO_OP, last bit 8 bits back */ +}; + +#define EXTRA sizeof(log_gzext) /* should be 35 */ + +/* initial gzip data and trailer */ +local unsigned char log_gzbody[] = { + 1, 0, 0, 0xff, 0xff, /* empty stored block (last) */ + 0, 0, 0, 0, /* crc */ + 0, 0, 0, 0 /* uncompressed length */ +}; + +#define BODY sizeof(log_gzbody) + +/* Exclusively create foo.lock in order to negotiate exclusive access to the + foo.* files. If the modify time of an existing lock file is greater than + PATIENCE seconds in the past, then consider the lock file to have been + abandoned, delete it, and try the exclusive create again. Save the lock + file modify time for verification of ownership. Return 0 on success, or -1 + on failure, usually due to an access restriction or invalid path. Note that + if stat() or unlink() fails, it may be due to another process noticing the + abandoned lock file a smidge sooner and deleting it, so those are not + flagged as an error. */ +local int log_lock(struct log *log) +{ + int fd; + struct stat st; + + strcpy(log->end, ".lock"); + while ((fd = open(log->path, O_CREAT | O_EXCL, 0644)) < 0) { + if (errno != EEXIST) + return -1; + if (stat(log->path, &st) == 0 && time(NULL) - st.st_mtime > PATIENCE) { + unlink(log->path); + continue; + } + sleep(2); /* relinquish the CPU for two seconds while waiting */ + } + close(fd); + if (stat(log->path, &st) == 0) + log->lock = st.st_mtime; + return 0; +} + +/* Update the modify time of the lock file to now, in order to prevent another + task from thinking that the lock is stale. Save the lock file modify time + for verification of ownership. */ +local void log_touch(struct log *log) +{ + struct stat st; + + strcpy(log->end, ".lock"); + utimes(log->path, NULL); + if (stat(log->path, &st) == 0) + log->lock = st.st_mtime; +} + +/* Check the log file modify time against what is expected. Return true if + this is not our lock. If it is our lock, touch it to keep it. */ +local int log_check(struct log *log) +{ + struct stat st; + + strcpy(log->end, ".lock"); + if (stat(log->path, &st) || st.st_mtime != log->lock) + return 1; + log_touch(log); + return 0; +} + +/* Unlock a previously acquired lock, but only if it's ours. */ +local void log_unlock(struct log *log) +{ + if (log_check(log)) + return; + strcpy(log->end, ".lock"); + unlink(log->path); + log->lock = 0; +} + +/* Check the gzip header and read in the extra field, filling in the values in + the log structure. Return op on success or -1 if the gzip header was not as + expected. op is the current operation in progress last written to the extra + field. This assumes that the gzip file has already been opened, with the + file descriptor log->fd. */ +local int log_head(struct log *log) +{ + int op; + unsigned char buf[HEAD + EXTRA]; + + if (lseek(log->fd, 0, SEEK_SET) < 0 || + read(log->fd, buf, HEAD + EXTRA) != HEAD + EXTRA || + memcmp(buf, log_gzhead, HEAD)) { + return -1; + } + log->first = PULL8(buf + HEAD); + log->last = PULL8(buf + HEAD + 8); + log->ccrc = PULL4(buf + HEAD + 16); + log->clen = PULL4(buf + HEAD + 20); + log->tcrc = PULL4(buf + HEAD + 24); + log->tlen = PULL4(buf + HEAD + 28); + log->stored = PULL2(buf + HEAD + 32); + log->back = 3 + (buf[HEAD + 34] & 7); + op = (buf[HEAD + 34] >> 3) & 3; + return op; +} + +/* Write over the extra field contents, marking the operation as op. Use fsync + to assure that the device is written to, and in the requested order. This + operation, and only this operation, is assumed to be atomic in order to + assure that the log is recoverable in the event of an interruption at any + point in the process. Return -1 if the write to foo.gz failed. */ +local int log_mark(struct log *log, int op) +{ + int ret; + unsigned char ext[EXTRA]; + + PUT8(ext, log->first); + PUT8(ext + 8, log->last); + PUT4(ext + 16, log->ccrc); + PUT4(ext + 20, log->clen); + PUT4(ext + 24, log->tcrc); + PUT4(ext + 28, log->tlen); + PUT2(ext + 32, log->stored); + ext[34] = log->back - 3 + (op << 3); + fsync(log->fd); + ret = lseek(log->fd, HEAD, SEEK_SET) < 0 || + write(log->fd, ext, EXTRA) != EXTRA ? -1 : 0; + fsync(log->fd); + return ret; +} + +/* Rewrite the last block header bits and subsequent zero bits to get to a byte + boundary, setting the last block bit if last is true, and then write the + remainder of the stored block header (length and one's complement). Leave + the file pointer after the end of the last stored block data. Return -1 if + there is a read or write failure on the foo.gz file */ +local int log_last(struct log *log, int last) +{ + int back, len, mask; + unsigned char buf[6]; + + /* determine the locations of the bytes and bits to modify */ + back = log->last == log->first ? log->back : 8; + len = back > 8 ? 2 : 1; /* bytes back from log->last */ + mask = 0x80 >> ((back - 1) & 7); /* mask for block last-bit */ + + /* get the byte to modify (one or two back) into buf[0] -- don't need to + read the byte if the last-bit is eight bits back, since in that case + the entire byte will be modified */ + buf[0] = 0; + if (back != 8 && (lseek(log->fd, log->last - len, SEEK_SET) < 0 || + read(log->fd, buf, 1) != 1)) + return -1; + + /* change the last-bit of the last stored block as requested -- note + that all bits above the last-bit are set to zero, per the type bits + of a stored block being 00 and per the convention that the bits to + bring the stream to a byte boundary are also zeros */ + buf[1] = 0; + buf[2 - len] = (*buf & (mask - 1)) + (last ? mask : 0); + + /* write the modified stored block header and lengths, move the file + pointer to after the last stored block data */ + PUT2(buf + 2, log->stored); + PUT2(buf + 4, log->stored ^ 0xffff); + return lseek(log->fd, log->last - len, SEEK_SET) < 0 || + write(log->fd, buf + 2 - len, len + 4) != len + 4 || + lseek(log->fd, log->stored, SEEK_CUR) < 0 ? -1 : 0; +} + +/* Append len bytes from data to the locked and open log file. len may be zero + if recovering and no .add file was found. In that case, the previous state + of the foo.gz file is restored. The data is appended uncompressed in + deflate stored blocks. Return -1 if there was an error reading or writing + the foo.gz file. */ +local int log_append(struct log *log, unsigned char *data, size_t len) +{ + uint put; + off_t end; + unsigned char buf[8]; + + /* set the last block last-bit and length, in case recovering an + interrupted append, then position the file pointer to append to the + block */ + if (log_last(log, 1)) + return -1; + + /* append, adding stored blocks and updating the offset of the last stored + block as needed, and update the total crc and length */ + while (len) { + /* append as much as we can to the last block */ + put = (MAX_STORE << 10) - log->stored; + if (put > len) + put = (uint)len; + if (put) { + if (write(log->fd, data, put) != put) + return -1; + BAIL(1); + log->tcrc = crc32(log->tcrc, data, put); + log->tlen += put; + log->stored += put; + data += put; + len -= put; + } + + /* if we need to, add a new empty stored block */ + if (len) { + /* mark current block as not last */ + if (log_last(log, 0)) + return -1; + + /* point to new, empty stored block */ + log->last += 4 + log->stored + 1; + log->stored = 0; + } + + /* mark last block as last, update its length */ + if (log_last(log, 1)) + return -1; + BAIL(2); + } + + /* write the new crc and length trailer, and truncate just in case (could + be recovering from partial append with a missing foo.add file) */ + PUT4(buf, log->tcrc); + PUT4(buf + 4, log->tlen); + if (write(log->fd, buf, 8) != 8 || + (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end)) + return -1; + + /* write the extra field, marking the log file as done, delete .add file */ + if (log_mark(log, NO_OP)) + return -1; + strcpy(log->end, ".add"); + unlink(log->path); /* ignore error, since may not exist */ + return 0; +} + +/* Replace the foo.dict file with the foo.temp file. Also delete the foo.add + file, since the compress operation may have been interrupted before that was + done. Returns 1 if memory could not be allocated, or -1 if reading or + writing foo.gz fails, or if the rename fails for some reason other than + foo.temp not existing. foo.temp not existing is a permitted error, since + the replace operation may have been interrupted after the rename is done, + but before foo.gz is marked as complete. */ +local int log_replace(struct log *log) +{ + int ret; + char *dest; + + /* delete foo.add file */ + strcpy(log->end, ".add"); + unlink(log->path); /* ignore error, since may not exist */ + BAIL(3); + + /* rename foo.name to foo.dict, replacing foo.dict if it exists */ + strcpy(log->end, ".dict"); + dest = malloc(strlen(log->path) + 1); + if (dest == NULL) + return -2; + strcpy(dest, log->path); + strcpy(log->end, ".temp"); + ret = rename(log->path, dest); + free(dest); + if (ret && errno != ENOENT) + return -1; + BAIL(4); + + /* mark the foo.gz file as done */ + return log_mark(log, NO_OP); +} + +/* Compress the len bytes at data and append the compressed data to the + foo.gz deflate data immediately after the previous compressed data. This + overwrites the previous uncompressed data, which was stored in foo.add + and is the data provided in data[0..len-1]. If this operation is + interrupted, it picks up at the start of this routine, with the foo.add + file read in again. If there is no data to compress (len == 0), then we + simply terminate the foo.gz file after the previously compressed data, + appending a final empty stored block and the gzip trailer. Return -1 if + reading or writing the log.gz file failed, or -2 if there was a memory + allocation failure. */ +local int log_compress(struct log *log, unsigned char *data, size_t len) +{ + int fd; + uint got, max; + ssize_t dict; + off_t end; + z_stream strm; + unsigned char buf[DICT]; + + /* compress and append compressed data */ + if (len) { + /* set up for deflate, allocating memory */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -15, 8, + Z_DEFAULT_STRATEGY) != Z_OK) + return -2; + + /* read in dictionary (last 32K of data that was compressed) */ + strcpy(log->end, ".dict"); + fd = open(log->path, O_RDONLY, 0); + if (fd >= 0) { + dict = read(fd, buf, DICT); + close(fd); + if (dict < 0) { + deflateEnd(&strm); + return -1; + } + if (dict) + deflateSetDictionary(&strm, buf, (uint)dict); + } + log_touch(log); + + /* prime deflate with last bits of previous block, position write + pointer to write those bits and overwrite what follows */ + if (lseek(log->fd, log->first - (log->back > 8 ? 2 : 1), + SEEK_SET) < 0 || + read(log->fd, buf, 1) != 1 || lseek(log->fd, -1, SEEK_CUR) < 0) { + deflateEnd(&strm); + return -1; + } + deflatePrime(&strm, (8 - log->back) & 7, *buf); + + /* compress, finishing with a partial non-last empty static block */ + strm.next_in = data; + max = (((uint)0 - 1) >> 1) + 1; /* in case int smaller than size_t */ + do { + strm.avail_in = len > max ? max : (uint)len; + len -= strm.avail_in; + do { + strm.avail_out = DICT; + strm.next_out = buf; + deflate(&strm, len ? Z_NO_FLUSH : Z_PARTIAL_FLUSH); + got = DICT - strm.avail_out; + if (got && write(log->fd, buf, got) != got) { + deflateEnd(&strm); + return -1; + } + log_touch(log); + } while (strm.avail_out == 0); + } while (len); + deflateEnd(&strm); + BAIL(5); + + /* find start of empty static block -- scanning backwards the first one + bit is the second bit of the block, if the last byte is zero, then + we know the byte before that has a one in the top bit, since an + empty static block is ten bits long */ + if ((log->first = lseek(log->fd, -1, SEEK_CUR)) < 0 || + read(log->fd, buf, 1) != 1) + return -1; + log->first++; + if (*buf) { + log->back = 1; + while ((*buf & ((uint)1 << (8 - log->back++))) == 0) + ; /* guaranteed to terminate, since *buf != 0 */ + } + else + log->back = 10; + + /* update compressed crc and length */ + log->ccrc = log->tcrc; + log->clen = log->tlen; + } + else { + /* no data to compress -- fix up existing gzip stream */ + log->tcrc = log->ccrc; + log->tlen = log->clen; + } + + /* complete and truncate gzip stream */ + log->last = log->first; + log->stored = 0; + PUT4(buf, log->tcrc); + PUT4(buf + 4, log->tlen); + if (log_last(log, 1) || write(log->fd, buf, 8) != 8 || + (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end)) + return -1; + BAIL(6); + + /* mark as being in the replace operation */ + if (log_mark(log, REPLACE_OP)) + return -1; + + /* execute the replace operation and mark the file as done */ + return log_replace(log); +} + +/* log a repair record to the .repairs file */ +local void log_log(struct log *log, int op, char *record) +{ + time_t now; + FILE *rec; + + now = time(NULL); + strcpy(log->end, ".repairs"); + rec = fopen(log->path, "a"); + if (rec == NULL) + return; + fprintf(rec, "%.24s %s recovery: %s\n", ctime(&now), op == APPEND_OP ? + "append" : (op == COMPRESS_OP ? "compress" : "replace"), record); + fclose(rec); + return; +} + +/* Recover the interrupted operation op. First read foo.add for recovering an + append or compress operation. Return -1 if there was an error reading or + writing foo.gz or reading an existing foo.add, or -2 if there was a memory + allocation failure. */ +local int log_recover(struct log *log, int op) +{ + int fd, ret = 0; + unsigned char *data = NULL; + size_t len = 0; + struct stat st; + + /* log recovery */ + log_log(log, op, "start"); + + /* load foo.add file if expected and present */ + if (op == APPEND_OP || op == COMPRESS_OP) { + strcpy(log->end, ".add"); + if (stat(log->path, &st) == 0 && st.st_size) { + len = (size_t)(st.st_size); + if ((off_t)len != st.st_size || + (data = malloc(st.st_size)) == NULL) { + log_log(log, op, "allocation failure"); + return -2; + } + if ((fd = open(log->path, O_RDONLY, 0)) < 0) { + free(data); + log_log(log, op, ".add file read failure"); + return -1; + } + ret = (size_t)read(fd, data, len) != len; + close(fd); + if (ret) { + free(data); + log_log(log, op, ".add file read failure"); + return -1; + } + log_log(log, op, "loaded .add file"); + } + else + log_log(log, op, "missing .add file!"); + } + + /* recover the interrupted operation */ + switch (op) { + case APPEND_OP: + ret = log_append(log, data, len); + break; + case COMPRESS_OP: + ret = log_compress(log, data, len); + break; + case REPLACE_OP: + ret = log_replace(log); + } + + /* log status */ + log_log(log, op, ret ? "failure" : "complete"); + + /* clean up */ + if (data != NULL) + free(data); + return ret; +} + +/* Close the foo.gz file (if open) and release the lock. */ +local void log_close(struct log *log) +{ + if (log->fd >= 0) + close(log->fd); + log->fd = -1; + log_unlock(log); +} + +/* Open foo.gz, verify the header, and load the extra field contents, after + first creating the foo.lock file to gain exclusive access to the foo.* + files. If foo.gz does not exist or is empty, then write the initial header, + extra, and body content of an empty foo.gz log file. If there is an error + creating the lock file due to access restrictions, or an error reading or + writing the foo.gz file, or if the foo.gz file is not a proper log file for + this object (e.g. not a gzip file or does not contain the expected extra + field), then return true. If there is an error, the lock is released. + Otherwise, the lock is left in place. */ +local int log_open(struct log *log) +{ + int op; + + /* release open file resource if left over -- can occur if lock lost + between gzlog_open() and gzlog_write() */ + if (log->fd >= 0) + close(log->fd); + log->fd = -1; + + /* negotiate exclusive access */ + if (log_lock(log) < 0) + return -1; + + /* open the log file, foo.gz */ + strcpy(log->end, ".gz"); + log->fd = open(log->path, O_RDWR | O_CREAT, 0644); + if (log->fd < 0) { + log_close(log); + return -1; + } + + /* if new, initialize foo.gz with an empty log, delete old dictionary */ + if (lseek(log->fd, 0, SEEK_END) == 0) { + if (write(log->fd, log_gzhead, HEAD) != HEAD || + write(log->fd, log_gzext, EXTRA) != EXTRA || + write(log->fd, log_gzbody, BODY) != BODY) { + log_close(log); + return -1; + } + strcpy(log->end, ".dict"); + unlink(log->path); + } + + /* verify log file and load extra field information */ + if ((op = log_head(log)) < 0) { + log_close(log); + return -1; + } + + /* check for interrupted process and if so, recover */ + if (op != NO_OP && log_recover(log, op)) { + log_close(log); + return -1; + } + + /* touch the lock file to prevent another process from grabbing it */ + log_touch(log); + return 0; +} + +/* See gzlog.h for the description of the external methods below */ +gzlog *gzlog_open(char *path) +{ + size_t n; + struct log *log; + + /* check arguments */ + if (path == NULL || *path == 0) + return NULL; + + /* allocate and initialize log structure */ + log = malloc(sizeof(struct log)); + if (log == NULL) + return NULL; + strcpy(log->id, LOGID); + log->fd = -1; + + /* save path and end of path for name construction */ + n = strlen(path); + log->path = malloc(n + 9); /* allow for ".repairs" */ + if (log->path == NULL) { + free(log); + return NULL; + } + strcpy(log->path, path); + log->end = log->path + n; + + /* gain exclusive access and verify log file -- may perform a + recovery operation if needed */ + if (log_open(log)) { + free(log->path); + free(log); + return NULL; + } + + /* return pointer to log structure */ + return log; +} + +/* gzlog_compress() return values: + 0: all good + -1: file i/o error (usually access issue) + -2: memory allocation failure + -3: invalid log pointer argument */ +int gzlog_compress(gzlog *logd) +{ + int fd, ret; + uint block; + size_t len, next; + unsigned char *data, buf[5]; + struct log *log = logd; + + /* check arguments */ + if (log == NULL || strcmp(log->id, LOGID)) + return -3; + + /* see if we lost the lock -- if so get it again and reload the extra + field information (it probably changed), recover last operation if + necessary */ + if (log_check(log) && log_open(log)) + return -1; + + /* create space for uncompressed data */ + len = ((size_t)(log->last - log->first) & ~(((size_t)1 << 10) - 1)) + + log->stored; + if ((data = malloc(len)) == NULL) + return -2; + + /* do statement here is just a cheap trick for error handling */ + do { + /* read in the uncompressed data */ + if (lseek(log->fd, log->first - 1, SEEK_SET) < 0) + break; + next = 0; + while (next < len) { + if (read(log->fd, buf, 5) != 5) + break; + block = PULL2(buf + 1); + if (next + block > len || + read(log->fd, (char *)data + next, block) != block) + break; + next += block; + } + if (lseek(log->fd, 0, SEEK_CUR) != log->last + 4 + log->stored) + break; + log_touch(log); + + /* write the uncompressed data to the .add file */ + strcpy(log->end, ".add"); + fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + break; + ret = (size_t)write(fd, data, len) != len; + if (ret | close(fd)) + break; + log_touch(log); + + /* write the dictionary for the next compress to the .temp file */ + strcpy(log->end, ".temp"); + fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + break; + next = DICT > len ? len : DICT; + ret = (size_t)write(fd, (char *)data + len - next, next) != next; + if (ret | close(fd)) + break; + log_touch(log); + + /* roll back to compressed data, mark the compress in progress */ + log->last = log->first; + log->stored = 0; + if (log_mark(log, COMPRESS_OP)) + break; + BAIL(7); + + /* compress and append the data (clears mark) */ + ret = log_compress(log, data, len); + free(data); + return ret; + } while (0); + + /* broke out of do above on i/o error */ + free(data); + return -1; +} + +/* gzlog_write() return values: + 0: all good + -1: file i/o error (usually access issue) + -2: memory allocation failure + -3: invalid log pointer argument */ +int gzlog_write(gzlog *logd, void *data, size_t len) +{ + int fd, ret; + struct log *log = logd; + + /* check arguments */ + if (log == NULL || strcmp(log->id, LOGID)) + return -3; + if (data == NULL || len <= 0) + return 0; + + /* see if we lost the lock -- if so get it again and reload the extra + field information (it probably changed), recover last operation if + necessary */ + if (log_check(log) && log_open(log)) + return -1; + + /* create and write .add file */ + strcpy(log->end, ".add"); + fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + return -1; + ret = (size_t)write(fd, data, len) != len; + if (ret | close(fd)) + return -1; + log_touch(log); + + /* mark log file with append in progress */ + if (log_mark(log, APPEND_OP)) + return -1; + BAIL(8); + + /* append data (clears mark) */ + if (log_append(log, data, len)) + return -1; + + /* check to see if it's time to compress -- if not, then done */ + if (((log->last - log->first) >> 10) + (log->stored >> 10) < TRIGGER) + return 0; + + /* time to compress */ + return gzlog_compress(log); +} + +/* gzlog_close() return values: + 0: ok + -3: invalid log pointer argument */ +int gzlog_close(gzlog *logd) +{ + struct log *log = logd; + + /* check arguments */ + if (log == NULL || strcmp(log->id, LOGID)) + return -3; + + /* close the log file and release the lock */ + log_close(log); + + /* free structure and return */ + if (log->path != NULL) + free(log->path); + strcpy(log->id, "bad"); + free(log); + return 0; +} diff --git a/src/deps/src/zlib/examples/gzlog.h b/src/deps/src/zlib/examples/gzlog.h new file mode 100644 index 000000000..4f0510955 --- /dev/null +++ b/src/deps/src/zlib/examples/gzlog.h @@ -0,0 +1,91 @@ +/* gzlog.h + Copyright (C) 2004, 2008, 2012 Mark Adler, all rights reserved + version 2.2, 14 Aug 2012 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* Version History: + 1.0 26 Nov 2004 First version + 2.0 25 Apr 2008 Complete redesign for recovery of interrupted operations + Interface changed slightly in that now path is a prefix + Compression now occurs as needed during gzlog_write() + gzlog_write() now always leaves the log file as valid gzip + 2.1 8 Jul 2012 Fix argument checks in gzlog_compress() and gzlog_write() + 2.2 14 Aug 2012 Clean up signed comparisons + */ + +/* + The gzlog object allows writing short messages to a gzipped log file, + opening the log file locked for small bursts, and then closing it. The log + object works by appending stored (uncompressed) data to the gzip file until + 1 MB has been accumulated. At that time, the stored data is compressed, and + replaces the uncompressed data in the file. The log file is truncated to + its new size at that time. After each write operation, the log file is a + valid gzip file that can decompressed to recover what was written. + + The gzlog operations can be interrupted at any point due to an application or + system crash, and the log file will be recovered the next time the log is + opened with gzlog_open(). + */ + +#ifndef GZLOG_H +#define GZLOG_H + +/* gzlog object type */ +typedef void gzlog; + +/* Open a gzlog object, creating the log file if it does not exist. Return + NULL on error. Note that gzlog_open() could take a while to complete if it + has to wait to verify that a lock is stale (possibly for five minutes), or + if there is significant contention with other instantiations of this object + when locking the resource. path is the prefix of the file names created by + this object. If path is "foo", then the log file will be "foo.gz", and + other auxiliary files will be created and destroyed during the process: + "foo.dict" for a compression dictionary, "foo.temp" for a temporary (next) + dictionary, "foo.add" for data being added or compressed, "foo.lock" for the + lock file, and "foo.repairs" to log recovery operations performed due to + interrupted gzlog operations. A gzlog_open() followed by a gzlog_close() + will recover a previously interrupted operation, if any. */ +gzlog *gzlog_open(char *path); + +/* Write to a gzlog object. Return zero on success, -1 if there is a file i/o + error on any of the gzlog files (this should not happen if gzlog_open() + succeeded, unless the device has run out of space or leftover auxiliary + files have permissions or ownership that prevent their use), -2 if there is + a memory allocation failure, or -3 if the log argument is invalid (e.g. if + it was not created by gzlog_open()). This function will write data to the + file uncompressed, until 1 MB has been accumulated, at which time that data + will be compressed. The log file will be a valid gzip file upon successful + return. */ +int gzlog_write(gzlog *log, void *data, size_t len); + +/* Force compression of any uncompressed data in the log. This should be used + sparingly, if at all. The main application would be when a log file will + not be appended to again. If this is used to compress frequently while + appending, it will both significantly increase the execution time and + reduce the compression ratio. The return codes are the same as for + gzlog_write(). */ +int gzlog_compress(gzlog *log); + +/* Close a gzlog object. Return zero on success, -3 if the log argument is + invalid. The log object is freed, and so cannot be referenced again. */ +int gzlog_close(gzlog *log); + +#endif diff --git a/src/deps/src/zlib/examples/gznorm.c b/src/deps/src/zlib/examples/gznorm.c new file mode 100644 index 000000000..68e0a0f29 --- /dev/null +++ b/src/deps/src/zlib/examples/gznorm.c @@ -0,0 +1,470 @@ +/* gznorm.c -- normalize a gzip stream + * Copyright (C) 2018 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * Version 1.0 7 Oct 2018 Mark Adler */ + +// gznorm takes a gzip stream, potentially containing multiple members, and +// converts it to a gzip stream with a single member. In addition the gzip +// header is normalized, removing the file name and time stamp, and setting the +// other header contents (XFL, OS) to fixed values. gznorm does not recompress +// the data, so it is fast, but no advantage is gained from the history that +// could be available across member boundaries. + +#include // fread, fwrite, putc, fflush, ferror, fprintf, + // vsnprintf, stdout, stderr, NULL, FILE +#include // malloc, free +#include // strerror +#include // errno +#include // va_list, va_start, va_end +#include "zlib.h" // inflateInit2, inflate, inflateReset, inflateEnd, + // z_stream, z_off_t, crc32_combine, Z_NULL, Z_BLOCK, + // Z_OK, Z_STREAM_END, Z_BUF_ERROR, Z_DATA_ERROR, + // Z_MEM_ERROR + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include +# include +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +#define local static + +// printf to an allocated string. Return the string, or NULL if the printf or +// allocation fails. +local char *aprintf(char *fmt, ...) { + // Get the length of the result of the printf. + va_list args; + va_start(args, fmt); + int len = vsnprintf(NULL, 0, fmt, args); + va_end(args); + if (len < 0) + return NULL; + + // Allocate the required space and printf to it. + char *str = malloc(len + 1); + if (str == NULL) + return NULL; + va_start(args, fmt); + vsnprintf(str, len + 1, fmt, args); + va_end(args); + return str; +} + +// Return with an error, putting an allocated error message in *err. Doing an +// inflateEnd() on an already ended state, or one with state set to Z_NULL, is +// permitted. +#define BYE(...) \ + do { \ + inflateEnd(&strm); \ + *err = aprintf(__VA_ARGS__); \ + return 1; \ + } while (0) + +// Chunk size for buffered reads and for decompression. Twice this many bytes +// will be allocated on the stack by gzip_normalize(). Must fit in an unsigned. +#define CHUNK 16384 + +// Read a gzip stream from in and write an equivalent normalized gzip stream to +// out. If given no input, an empty gzip stream will be written. If successful, +// 0 is returned, and *err is set to NULL. On error, 1 is returned, where the +// details of the error are returned in *err, a pointer to an allocated string. +// +// The input may be a stream with multiple gzip members, which is converted to +// a single gzip member on the output. Each gzip member is decompressed at the +// level of deflate blocks. This enables clearing the last-block bit, shifting +// the compressed data to concatenate to the previous member's compressed data, +// which can end at an arbitrary bit boundary, and identifying stored blocks in +// order to resynchronize those to byte boundaries. The deflate compressed data +// is terminated with a 10-bit empty fixed block. If any members on the input +// end with a 10-bit empty fixed block, then that block is excised from the +// stream. This avoids appending empty fixed blocks for every normalization, +// and assures that gzip_normalize applied a second time will not change the +// input. The pad bits after stored block headers and after the final deflate +// block are all forced to zeros. +local int gzip_normalize(FILE *in, FILE *out, char **err) { + // initialize the inflate engine to process a gzip member + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + if (inflateInit2(&strm, 15 + 16) != Z_OK) + BYE("out of memory"); + + // State while processing the input gzip stream. + enum { // BETWEEN -> HEAD -> BLOCK -> TAIL -> BETWEEN -> ... + BETWEEN, // between gzip members (must end in this state) + HEAD, // reading a gzip header + BLOCK, // reading deflate blocks + TAIL // reading a gzip trailer + } state = BETWEEN; // current component being processed + unsigned long crc = 0; // accumulated CRC of uncompressed data + unsigned long len = 0; // accumulated length of uncompressed data + unsigned long buf = 0; // deflate stream bit buffer of num bits + int num = 0; // number of bits in buf (at bottom) + + // Write a canonical gzip header (no mod time, file name, comment, extra + // block, or extra flags, and OS is marked as unknown). + fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); + + // Process the gzip stream from in until reaching the end of the input, + // encountering invalid input, or experiencing an i/o error. + int more; // true if not at the end of the input + do { + // State inside this loop. + unsigned char *put; // next input buffer location to process + int prev; // number of bits from previous block in + // the bit buffer, or -1 if not at the + // start of a block + unsigned long long memb; // uncompressed length of member + size_t tail; // number of trailer bytes read (0..8) + unsigned long part; // accumulated trailer component + + // Get the next chunk of input from in. + unsigned char dat[CHUNK]; + strm.avail_in = fread(dat, 1, CHUNK, in); + if (strm.avail_in == 0) + break; + more = strm.avail_in == CHUNK; + strm.next_in = put = dat; + + // Run that chunk of input through the inflate engine to exhaustion. + do { + // At this point it is assured that strm.avail_in > 0. + + // Inflate until the end of a gzip component (header, deflate + // block, trailer) is reached, or until all of the chunk is + // consumed. The resulting decompressed data is discarded, though + // the total size of the decompressed data in each member is + // tracked, for the calculation of the total CRC. + do { + // inflate and handle any errors + unsigned char scrap[CHUNK]; + strm.avail_out = CHUNK; + strm.next_out = scrap; + int ret = inflate(&strm, Z_BLOCK); + if (ret == Z_MEM_ERROR) + BYE("out of memory"); + if (ret == Z_DATA_ERROR) + BYE("input invalid: %s", strm.msg); + if (ret != Z_OK && ret != Z_BUF_ERROR && ret != Z_STREAM_END) + BYE("internal error"); + + // Update the number of uncompressed bytes generated in this + // member. The actual count (not modulo 2^32) is required to + // correctly compute the total CRC. + unsigned got = CHUNK - strm.avail_out; + memb += got; + if (memb < got) + BYE("overflow error"); + + // Continue to process this chunk until it is consumed, or + // until the end of a component (header, deflate block, or + // trailer) is reached. + } while (strm.avail_out == 0 && (strm.data_type & 0x80) == 0); + + // Since strm.avail_in was > 0 for the inflate call, some input was + // just consumed. It is therefore assured that put < strm.next_in. + + // Disposition the consumed component or part of a component. + switch (state) { + case BETWEEN: + state = HEAD; + // Fall through to HEAD when some or all of the header is + // processed. + + case HEAD: + // Discard the header. + if (strm.data_type & 0x80) { + // End of header reached -- deflate blocks follow. + put = strm.next_in; + prev = num; + memb = 0; + state = BLOCK; + } + break; + + case BLOCK: + // Copy the deflate stream to the output, but with the + // last-block-bit cleared. Re-synchronize stored block + // headers to the output byte boundaries. The bytes at + // put..strm.next_in-1 is the compressed data that has been + // processed and is ready to be copied to the output. + + // At this point, it is assured that new compressed data is + // available, i.e., put < strm.next_in. If prev is -1, then + // that compressed data starts in the middle of a deflate + // block. If prev is not -1, then the bits in the bit + // buffer, possibly combined with the bits in *put, contain + // the three-bit header of the new deflate block. In that + // case, prev is the number of bits from the previous block + // that remain in the bit buffer. Since num is the number + // of bits in the bit buffer, we have that num - prev is + // the number of bits from the new block currently in the + // bit buffer. + + // If strm.data_type & 0xc0 is 0x80, then the last byte of + // the available compressed data includes the last bits of + // the end of a deflate block. In that case, that last byte + // also has strm.data_type & 0x1f bits of the next deflate + // block, in the range 0..7. If strm.data_type & 0xc0 is + // 0xc0, then the last byte of the compressed data is the + // end of the deflate stream, followed by strm.data_type & + // 0x1f pad bits, also in the range 0..7. + + // Set bits to the number of bits not yet consumed from the + // last byte. If we are at the end of the block, bits is + // either the number of bits in the last byte belonging to + // the next block, or the number of pad bits after the + // final block. In either of those cases, bits is in the + // range 0..7. + ; // (required due to C syntax oddity) + int bits = strm.data_type & 0x1f; + + if (prev != -1) { + // We are at the start of a new block. Clear the last + // block bit, and check for special cases. If it is a + // stored block, then emit the header and pad to the + // next byte boundary. If it is a final, empty fixed + // block, then excise it. + + // Some or all of the three header bits for this block + // may already be in the bit buffer. Load any remaining + // header bits into the bit buffer. + if (num - prev < 3) { + buf += (unsigned long)*put++ << num; + num += 8; + } + + // Set last to have a 1 in the position of the last + // block bit in the bit buffer. + unsigned long last = (unsigned long)1 << prev; + + if (((buf >> prev) & 7) == 3) { + // This is a final fixed block. Load at least ten + // bits from this block, including the header, into + // the bit buffer. We already have at least three, + // so at most one more byte needs to be loaded. + if (num - prev < 10) { + if (put == strm.next_in) + // Need to go get and process more input. + // We'll end up back here to finish this. + break; + buf += (unsigned long)*put++ << num; + num += 8; + } + if (((buf >> prev) & 0x3ff) == 3) { + // That final fixed block is empty. Delete it + // to avoid adding an empty block every time a + // gzip stream is normalized. + num = prev; + buf &= last - 1; // zero the pad bits + } + } + else if (((buf >> prev) & 6) == 0) { + // This is a stored block. Flush to the next + // byte boundary after the three-bit header. + num = (prev + 10) & ~7; + buf &= last - 1; // zero the pad bits + } + + // Clear the last block bit. + buf &= ~last; + + // Write out complete bytes in the bit buffer. + while (num >= 8) { + putc(buf, out); + buf >>= 8; + num -= 8; + } + + // If no more bytes left to process, then we have + // consumed the byte that had bits from the next block. + if (put == strm.next_in) + bits = 0; + } + + // We are done handling the deflate block header. Now copy + // all or almost all of the remaining compressed data that + // has been processed so far. Don't copy one byte at the + // end if it contains bits from the next deflate block or + // pad bits at the end of a deflate block. + + // mix is 1 if we are at the end of a deflate block, and if + // some of the bits in the last byte follow this block. mix + // is 0 if we are in the middle of a deflate block, if the + // deflate block ended on a byte boundary, or if all of the + // compressed data processed so far has been consumed. + int mix = (strm.data_type & 0x80) && bits; + + // Copy all of the processed compressed data to the output, + // except for the last byte if it contains bits from the + // next deflate block or pad bits at the end of the deflate + // stream. Copy the data after shifting in num bits from + // buf in front of it, leaving num bits from the end of the + // compressed data in buf when done. + unsigned char *end = strm.next_in - mix; + if (put < end) { + if (num) + // Insert num bits from buf before the data being + // copied. + do { + buf += (unsigned)(*put++) << num; + putc(buf, out); + buf >>= 8; + } while (put < end); + else { + // No shifting needed -- write directly. + fwrite(put, 1, end - put, out); + put = end; + } + } + + // Process the last processed byte if it wasn't written. + if (mix) { + // Load the last byte into the bit buffer. + buf += (unsigned)(*put++) << num; + num += 8; + + if (strm.data_type & 0x40) { + // We are at the end of the deflate stream and + // there are bits pad bits. Discard the pad bits + // and write a byte to the output, if available. + // Leave the num bits left over in buf to prepend + // to the next deflate stream. + num -= bits; + if (num >= 8) { + putc(buf, out); + num -= 8; + buf >>= 8; + } + + // Force the pad bits in the bit buffer to zeros. + buf &= ((unsigned long)1 << num) - 1; + + // Don't need to set prev here since going to TAIL. + } + else + // At the end of an internal deflate block. Leave + // the last byte in the bit buffer to examine on + // the next entry to BLOCK, when more bits from the + // next block will be available. + prev = num - bits; // number of bits in buffer + // from current block + } + + // Don't have a byte left over, so we are in the middle of + // a deflate block, or the deflate block ended on a byte + // boundary. Set prev appropriately for the next entry into + // BLOCK. + else if (strm.data_type & 0x80) + // The block ended on a byte boundary, so no header + // bits are in the bit buffer. + prev = num; + else + // In the middle of a deflate block, so no header here. + prev = -1; + + // Check for the end of the deflate stream. + if ((strm.data_type & 0xc0) == 0xc0) { + // That ends the deflate stream on the input side, the + // pad bits were discarded, and any remaining bits from + // the last block in the stream are saved in the bit + // buffer to prepend to the next stream. Process the + // gzip trailer next. + tail = 0; + part = 0; + state = TAIL; + } + break; + + case TAIL: + // Accumulate available trailer bytes to update the total + // CRC and the total uncompressed length. + do { + part = (part >> 8) + ((unsigned long)(*put++) << 24); + tail++; + if (tail == 4) { + // Update the total CRC. + z_off_t len2 = memb; + if (len2 < 0 || (unsigned long long)len2 != memb) + BYE("overflow error"); + crc = crc ? crc32_combine(crc, part, len2) : part; + part = 0; + } + else if (tail == 8) { + // Update the total uncompressed length. (It's ok + // if this sum is done modulo 2^32.) + len += part; + + // At the end of a member. Set up to inflate an + // immediately following gzip member. (If we made + // it this far, then the trailer was valid.) + if (inflateReset(&strm) != Z_OK) + BYE("internal error"); + state = BETWEEN; + break; + } + } while (put < strm.next_in); + break; + } + + // Process the input buffer until completely consumed. + } while (strm.avail_in > 0); + + // Process input until end of file, invalid input, or i/o error. + } while (more); + + // Done with the inflate engine. + inflateEnd(&strm); + + // Verify the validity of the input. + if (state != BETWEEN) + BYE("input invalid: incomplete gzip stream"); + + // Write the remaining deflate stream bits, followed by a terminating + // deflate fixed block. + buf += (unsigned long)3 << num; + putc(buf, out); + putc(buf >> 8, out); + if (num > 6) + putc(0, out); + + // Write the gzip trailer, which is the CRC and the uncompressed length + // modulo 2^32, both in little-endian order. + putc(crc, out); + putc(crc >> 8, out); + putc(crc >> 16, out); + putc(crc >> 24, out); + putc(len, out); + putc(len >> 8, out); + putc(len >> 16, out); + putc(len >> 24, out); + fflush(out); + + // Check for any i/o errors. + if (ferror(in) || ferror(out)) + BYE("i/o error: %s", strerror(errno)); + + // All good! + *err = NULL; + return 0; +} + +// Normalize the gzip stream on stdin, writing the result to stdout. +int main(void) { + // Avoid end-of-line conversions on evil operating systems. + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); + + // Normalize from stdin to stdout, returning 1 on error, 0 if ok. + char *err; + int ret = gzip_normalize(stdin, stdout, &err); + if (ret) + fprintf(stderr, "gznorm error: %s\n", err); + free(err); + return ret; +} diff --git a/src/deps/src/zlib/examples/zlib_how.html b/src/deps/src/zlib/examples/zlib_how.html new file mode 100644 index 000000000..43271b988 --- /dev/null +++ b/src/deps/src/zlib/examples/zlib_how.html @@ -0,0 +1,549 @@ + + + + +zlib Usage Example + + + +

zlib Usage Example

+We often get questions about how the deflate() and inflate() functions should be used. +Users wonder when they should provide more input, when they should use more output, +what to do with a Z_BUF_ERROR, how to make sure the process terminates properly, and +so on. So for those who have read zlib.h (a few times), and +would like further edification, below is an annotated example in C of simple routines to compress and decompress +from an input file to an output file using deflate() and inflate() respectively. The +annotations are interspersed between lines of the code. So please read between the lines. +We hope this helps explain some of the intricacies of zlib. +

+Without further ado, here is the program zpipe.c: +


+/* zpipe.c: example of proper use of zlib's inflate() and deflate()
+   Not copyrighted -- provided to the public domain
+   Version 1.4  11 December 2005  Mark Adler */
+
+/* Version history:
+   1.0  30 Oct 2004  First version
+   1.1   8 Nov 2004  Add void casting for unused return values
+                     Use switch statement for inflate() return values
+   1.2   9 Nov 2004  Add assertions to document zlib guarantees
+   1.3   6 Apr 2005  Remove incorrect assertion in inf()
+   1.4  11 Dec 2005  Add hack to avoid MSDOS end-of-line conversions
+                     Avoid some compiler warnings for input and output buffers
+ */
+
+We now include the header files for the required definitions. From +stdio.h we use fopen(), fread(), fwrite(), +feof(), ferror(), and fclose() for file i/o, and +fputs() for error messages. From string.h we use +strcmp() for command line argument processing. +From assert.h we use the assert() macro. +From zlib.h +we use the basic compression functions deflateInit(), +deflate(), and deflateEnd(), and the basic decompression +functions inflateInit(), inflate(), and +inflateEnd(). +

+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include "zlib.h"
+
+This is an ugly hack required to avoid corruption of the input and output data on +Windows/MS-DOS systems. Without this, those systems would assume that the input and output +files are text, and try to convert the end-of-line characters from one standard to +another. That would corrupt binary data, and in particular would render the compressed data unusable. +This sets the input and output to binary which suppresses the end-of-line conversions. +SET_BINARY_MODE() will be used later on stdin and stdout, at the beginning of main(). +

+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
+#  include <fcntl.h>
+#  include <io.h>
+#  define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
+#else
+#  define SET_BINARY_MODE(file)
+#endif
+
+CHUNK is simply the buffer size for feeding data to and pulling data +from the zlib routines. Larger buffer sizes would be more efficient, +especially for inflate(). If the memory is available, buffers sizes +on the order of 128K or 256K bytes should be used. +

+#define CHUNK 16384
+
+The def() routine compresses data from an input file to an output file. The output data +will be in the zlib format, which is different from the gzip or zip +formats. The zlib format has a very small header of only two bytes to identify it as +a zlib stream and to provide decoding information, and a four-byte trailer with a fast +check value to verify the integrity of the uncompressed data after decoding. +

+/* Compress from file source to file dest until EOF on source.
+   def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
+   allocated for processing, Z_STREAM_ERROR if an invalid compression
+   level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
+   version of the library linked do not match, or Z_ERRNO if there is
+   an error reading or writing the files. */
+int def(FILE *source, FILE *dest, int level)
+{
+
+Here are the local variables for def(). ret will be used for zlib +return codes. flush will keep track of the current flushing state for deflate(), +which is either no flushing, or flush to completion after the end of the input file is reached. +have is the amount of data returned from deflate(). The strm structure +is used to pass information to and from the zlib routines, and to maintain the +deflate() state. in and out are the input and output buffers for +deflate(). +

+    int ret, flush;
+    unsigned have;
+    z_stream strm;
+    unsigned char in[CHUNK];
+    unsigned char out[CHUNK];
+
+The first thing we do is to initialize the zlib state for compression using +deflateInit(). This must be done before the first use of deflate(). +The zalloc, zfree, and opaque fields in the strm +structure must be initialized before calling deflateInit(). Here they are +set to the zlib constant Z_NULL to request that zlib use +the default memory allocation routines. An application may also choose to provide +custom memory allocation routines here. deflateInit() will allocate on the +order of 256K bytes for the internal state. +(See zlib Technical Details.) +

+deflateInit() is called with a pointer to the structure to be initialized and +the compression level, which is an integer in the range of -1 to 9. Lower compression +levels result in faster execution, but less compression. Higher levels result in +greater compression, but slower execution. The zlib constant Z_DEFAULT_COMPRESSION, +equal to -1, +provides a good compromise between compression and speed and is equivalent to level 6. +Level 0 actually does no compression at all, and in fact expands the data slightly to produce +the zlib format (it is not a byte-for-byte copy of the input). +More advanced applications of zlib +may use deflateInit2() here instead. Such an application may want to reduce how +much memory will be used, at some price in compression. Or it may need to request a +gzip header and trailer instead of a zlib header and trailer, or raw +encoding with no header or trailer at all. +

+We must check the return value of deflateInit() against the zlib constant +Z_OK to make sure that it was able to +allocate memory for the internal state, and that the provided arguments were valid. +deflateInit() will also check that the version of zlib that the zlib.h +file came from matches the version of zlib actually linked with the program. This +is especially important for environments in which zlib is a shared library. +

+Note that an application can initialize multiple, independent zlib streams, which can +operate in parallel. The state information maintained in the structure allows the zlib +routines to be reentrant. +


+    /* allocate deflate state */
+    strm.zalloc = Z_NULL;
+    strm.zfree = Z_NULL;
+    strm.opaque = Z_NULL;
+    ret = deflateInit(&strm, level);
+    if (ret != Z_OK)
+        return ret;
+
+With the pleasantries out of the way, now we can get down to business. The outer do-loop +reads all of the input file and exits at the bottom of the loop once end-of-file is reached. +This loop contains the only call of deflate(). So we must make sure that all of the +input data has been processed and that all of the output data has been generated and consumed +before we fall out of the loop at the bottom. +

+    /* compress until end of file */
+    do {
+
+We start off by reading data from the input file. The number of bytes read is put directly +into avail_in, and a pointer to those bytes is put into next_in. We also +check to see if end-of-file on the input has been reached using feof(). +If we are at the end of file, then flush is set to the +zlib constant Z_FINISH, which is later passed to deflate() to +indicate that this is the last chunk of input data to compress. +If we are not yet at the end of the input, then the zlib +constant Z_NO_FLUSH will be passed to deflate to indicate that we are still +in the middle of the uncompressed data. +

+If there is an error in reading from the input file, the process is aborted with +deflateEnd() being called to free the allocated zlib state before returning +the error. We wouldn't want a memory leak, now would we? deflateEnd() can be called +at any time after the state has been initialized. Once that's done, deflateInit() (or +deflateInit2()) would have to be called to start a new compression process. There is +no point here in checking the deflateEnd() return code. The deallocation can't fail. +


+        strm.avail_in = fread(in, 1, CHUNK, source);
+        if (ferror(source)) {
+            (void)deflateEnd(&strm);
+            return Z_ERRNO;
+        }
+        flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
+        strm.next_in = in;
+
+The inner do-loop passes our chunk of input data to deflate(), and then +keeps calling deflate() until it is done producing output. Once there is no more +new output, deflate() is guaranteed to have consumed all of the input, i.e., +avail_in will be zero. +

+        /* run deflate() on input until output buffer not full, finish
+           compression if all of source has been read in */
+        do {
+
+Output space is provided to deflate() by setting avail_out to the number +of available output bytes and next_out to a pointer to that space. +

+            strm.avail_out = CHUNK;
+            strm.next_out = out;
+
+Now we call the compression engine itself, deflate(). It takes as many of the +avail_in bytes at next_in as it can process, and writes as many as +avail_out bytes to next_out. Those counters and pointers are then +updated past the input data consumed and the output data written. It is the amount of +output space available that may limit how much input is consumed. +Hence the inner loop to make sure that +all of the input is consumed by providing more output space each time. Since avail_in +and next_in are updated by deflate(), we don't have to mess with those +between deflate() calls until it's all used up. +

+The parameters to deflate() are a pointer to the strm structure containing +the input and output information and the internal compression engine state, and a parameter +indicating whether and how to flush data to the output. Normally deflate will consume +several K bytes of input data before producing any output (except for the header), in order +to accumulate statistics on the data for optimum compression. It will then put out a burst of +compressed data, and proceed to consume more input before the next burst. Eventually, +deflate() +must be told to terminate the stream, complete the compression with provided input data, and +write out the trailer check value. deflate() will continue to compress normally as long +as the flush parameter is Z_NO_FLUSH. Once the Z_FINISH parameter is provided, +deflate() will begin to complete the compressed output stream. However depending on how +much output space is provided, deflate() may have to be called several times until it +has provided the complete compressed stream, even after it has consumed all of the input. The flush +parameter must continue to be Z_FINISH for those subsequent calls. +

+There are other values of the flush parameter that are used in more advanced applications. You can +force deflate() to produce a burst of output that encodes all of the input data provided +so far, even if it wouldn't have otherwise, for example to control data latency on a link with +compressed data. You can also ask that deflate() do that as well as erase any history up to +that point so that what follows can be decompressed independently, for example for random access +applications. Both requests will degrade compression by an amount depending on how often such +requests are made. +

+deflate() has a return value that can indicate errors, yet we do not check it here. Why +not? Well, it turns out that deflate() can do no wrong here. Let's go through +deflate()'s return values and dispense with them one by one. The possible values are +Z_OK, Z_STREAM_END, Z_STREAM_ERROR, or Z_BUF_ERROR. Z_OK +is, well, ok. Z_STREAM_END is also ok and will be returned for the last call of +deflate(). This is already guaranteed by calling deflate() with Z_FINISH +until it has no more output. Z_STREAM_ERROR is only possible if the stream is not +initialized properly, but we did initialize it properly. There is no harm in checking for +Z_STREAM_ERROR here, for example to check for the possibility that some +other part of the application inadvertently clobbered the memory containing the zlib state. +Z_BUF_ERROR will be explained further below, but +suffice it to say that this is simply an indication that deflate() could not consume +more input or produce more output. deflate() can be called again with more output space +or more available input, which it will be in this code. +


+            ret = deflate(&strm, flush);    /* no bad return value */
+            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
+
+Now we compute how much output deflate() provided on the last call, which is the +difference between how much space was provided before the call, and how much output space +is still available after the call. Then that data, if any, is written to the output file. +We can then reuse the output buffer for the next call of deflate(). Again if there +is a file i/o error, we call deflateEnd() before returning to avoid a memory leak. +

+            have = CHUNK - strm.avail_out;
+            if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
+                (void)deflateEnd(&strm);
+                return Z_ERRNO;
+            }
+
+The inner do-loop is repeated until the last deflate() call fails to fill the +provided output buffer. Then we know that deflate() has done as much as it can with +the provided input, and that all of that input has been consumed. We can then fall out of this +loop and reuse the input buffer. +

+The way we tell that deflate() has no more output is by seeing that it did not fill +the output buffer, leaving avail_out greater than zero. However suppose that +deflate() has no more output, but just so happened to exactly fill the output buffer! +avail_out is zero, and we can't tell that deflate() has done all it can. +As far as we know, deflate() +has more output for us. So we call it again. But now deflate() produces no output +at all, and avail_out remains unchanged as CHUNK. That deflate() call +wasn't able to do anything, either consume input or produce output, and so it returns +Z_BUF_ERROR. (See, I told you I'd cover this later.) However this is not a problem at +all. Now we finally have the desired indication that deflate() is really done, +and so we drop out of the inner loop to provide more input to deflate(). +

+With flush set to Z_FINISH, this final set of deflate() calls will +complete the output stream. Once that is done, subsequent calls of deflate() would return +Z_STREAM_ERROR if the flush parameter is not Z_FINISH, and do no more processing +until the state is reinitialized. +

+Some applications of zlib have two loops that call deflate() +instead of the single inner loop we have here. The first loop would call +without flushing and feed all of the data to deflate(). The second loop would call +deflate() with no more +data and the Z_FINISH parameter to complete the process. As you can see from this +example, that can be avoided by simply keeping track of the current flush state. +


+        } while (strm.avail_out == 0);
+        assert(strm.avail_in == 0);     /* all input will be used */
+
+Now we check to see if we have already processed all of the input file. That information was +saved in the flush variable, so we see if that was set to Z_FINISH. If so, +then we're done and we fall out of the outer loop. We're guaranteed to get Z_STREAM_END +from the last deflate() call, since we ran it until the last chunk of input was +consumed and all of the output was generated. +

+        /* done when last data in file processed */
+    } while (flush != Z_FINISH);
+    assert(ret == Z_STREAM_END);        /* stream will be complete */
+
+The process is complete, but we still need to deallocate the state to avoid a memory leak +(or rather more like a memory hemorrhage if you didn't do this). Then +finally we can return with a happy return value. +

+    /* clean up and return */
+    (void)deflateEnd(&strm);
+    return Z_OK;
+}
+
+Now we do the same thing for decompression in the inf() routine. inf() +decompresses what is hopefully a valid zlib stream from the input file and writes the +uncompressed data to the output file. Much of the discussion above for def() +applies to inf() as well, so the discussion here will focus on the differences between +the two. +

+/* Decompress from file source to file dest until stream ends or EOF.
+   inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
+   allocated for processing, Z_DATA_ERROR if the deflate data is
+   invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
+   the version of the library linked do not match, or Z_ERRNO if there
+   is an error reading or writing the files. */
+int inf(FILE *source, FILE *dest)
+{
+
+The local variables have the same functionality as they do for def(). The +only difference is that there is no flush variable, since inflate() +can tell from the zlib stream itself when the stream is complete. +

+    int ret;
+    unsigned have;
+    z_stream strm;
+    unsigned char in[CHUNK];
+    unsigned char out[CHUNK];
+
+The initialization of the state is the same, except that there is no compression level, +of course, and two more elements of the structure are initialized. avail_in +and next_in must be initialized before calling inflateInit(). This +is because the application has the option to provide the start of the zlib stream in +order for inflateInit() to have access to information about the compression +method to aid in memory allocation. In the current implementation of zlib +(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of +inflate() anyway. However those fields must be initialized since later versions +of zlib that provide more compression methods may take advantage of this interface. +In any case, no decompression is performed by inflateInit(), so the +avail_out and next_out fields do not need to be initialized before calling. +

+Here avail_in is set to zero and next_in is set to Z_NULL to +indicate that no input data is being provided. +


+    /* allocate inflate state */
+    strm.zalloc = Z_NULL;
+    strm.zfree = Z_NULL;
+    strm.opaque = Z_NULL;
+    strm.avail_in = 0;
+    strm.next_in = Z_NULL;
+    ret = inflateInit(&strm);
+    if (ret != Z_OK)
+        return ret;
+
+The outer do-loop decompresses input until inflate() indicates +that it has reached the end of the compressed data and has produced all of the uncompressed +output. This is in contrast to def() which processes all of the input file. +If end-of-file is reached before the compressed data self-terminates, then the compressed +data is incomplete and an error is returned. +

+    /* decompress until deflate stream ends or end of file */
+    do {
+
+We read input data and set the strm structure accordingly. If we've reached the +end of the input file, then we leave the outer loop and report an error, since the +compressed data is incomplete. Note that we may read more data than is eventually consumed +by inflate(), if the input file continues past the zlib stream. +For applications where zlib streams are embedded in other data, this routine would +need to be modified to return the unused data, or at least indicate how much of the input +data was not used, so the application would know where to pick up after the zlib stream. +

+        strm.avail_in = fread(in, 1, CHUNK, source);
+        if (ferror(source)) {
+            (void)inflateEnd(&strm);
+            return Z_ERRNO;
+        }
+        if (strm.avail_in == 0)
+            break;
+        strm.next_in = in;
+
+The inner do-loop has the same function it did in def(), which is to +keep calling inflate() until has generated all of the output it can with the +provided input. +

+        /* run inflate() on input until output buffer not full */
+        do {
+
+Just like in def(), the same output space is provided for each call of inflate(). +

+            strm.avail_out = CHUNK;
+            strm.next_out = out;
+
+Now we run the decompression engine itself. There is no need to adjust the flush parameter, since +the zlib format is self-terminating. The main difference here is that there are +return values that we need to pay attention to. Z_DATA_ERROR +indicates that inflate() detected an error in the zlib compressed data format, +which means that either the data is not a zlib stream to begin with, or that the data was +corrupted somewhere along the way since it was compressed. The other error to be processed is +Z_MEM_ERROR, which can occur since memory allocation is deferred until inflate() +needs it, unlike deflate(), whose memory is allocated at the start by deflateInit(). +

+Advanced applications may use +deflateSetDictionary() to prime deflate() with a set of likely data to improve the +first 32K or so of compression. This is noted in the zlib header, so inflate() +requests that that dictionary be provided before it can start to decompress. Without the dictionary, +correct decompression is not possible. For this routine, we have no idea what the dictionary is, +so the Z_NEED_DICT indication is converted to a Z_DATA_ERROR. +

+inflate() can also return Z_STREAM_ERROR, which should not be possible here, +but could be checked for as noted above for def(). Z_BUF_ERROR does not need to be +checked for here, for the same reasons noted for def(). Z_STREAM_END will be +checked for later. +


+            ret = inflate(&strm, Z_NO_FLUSH);
+            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
+            switch (ret) {
+            case Z_NEED_DICT:
+                ret = Z_DATA_ERROR;     /* and fall through */
+            case Z_DATA_ERROR:
+            case Z_MEM_ERROR:
+                (void)inflateEnd(&strm);
+                return ret;
+            }
+
+The output of inflate() is handled identically to that of deflate(). +

+            have = CHUNK - strm.avail_out;
+            if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
+                (void)inflateEnd(&strm);
+                return Z_ERRNO;
+            }
+
+The inner do-loop ends when inflate() has no more output as indicated +by not filling the output buffer, just as for deflate(). In this case, we cannot +assert that strm.avail_in will be zero, since the deflate stream may end before the file +does. +

+        } while (strm.avail_out == 0);
+
+The outer do-loop ends when inflate() reports that it has reached the +end of the input zlib stream, has completed the decompression and integrity +check, and has provided all of the output. This is indicated by the inflate() +return value Z_STREAM_END. The inner loop is guaranteed to leave ret +equal to Z_STREAM_END if the last chunk of the input file read contained the end +of the zlib stream. So if the return value is not Z_STREAM_END, the +loop continues to read more input. +

+        /* done when inflate() says it's done */
+    } while (ret != Z_STREAM_END);
+
+At this point, decompression successfully completed, or we broke out of the loop due to no +more data being available from the input file. If the last inflate() return value +is not Z_STREAM_END, then the zlib stream was incomplete and a data error +is returned. Otherwise, we return with a happy return value. Of course, inflateEnd() +is called first to avoid a memory leak. +

+    /* clean up and return */
+    (void)inflateEnd(&strm);
+    return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
+}
+
+That ends the routines that directly use zlib. The following routines make this +a command-line program by running data through the above routines from stdin to +stdout, and handling any errors reported by def() or inf(). +

+zerr() is used to interpret the possible error codes from def() +and inf(), as detailed in their comments above, and print out an error message. +Note that these are only a subset of the possible return values from deflate() +and inflate(). +


+/* report a zlib or i/o error */
+void zerr(int ret)
+{
+    fputs("zpipe: ", stderr);
+    switch (ret) {
+    case Z_ERRNO:
+        if (ferror(stdin))
+            fputs("error reading stdin\n", stderr);
+        if (ferror(stdout))
+            fputs("error writing stdout\n", stderr);
+        break;
+    case Z_STREAM_ERROR:
+        fputs("invalid compression level\n", stderr);
+        break;
+    case Z_DATA_ERROR:
+        fputs("invalid or incomplete deflate data\n", stderr);
+        break;
+    case Z_MEM_ERROR:
+        fputs("out of memory\n", stderr);
+        break;
+    case Z_VERSION_ERROR:
+        fputs("zlib version mismatch!\n", stderr);
+    }
+}
+
+Here is the main() routine used to test def() and inf(). The +zpipe command is simply a compression pipe from stdin to stdout, if +no arguments are given, or it is a decompression pipe if zpipe -d is used. If any other +arguments are provided, no compression or decompression is performed. Instead a usage +message is displayed. Examples are zpipe < foo.txt > foo.txt.z to compress, and +zpipe -d < foo.txt.z > foo.txt to decompress. +

+/* compress or decompress from stdin to stdout */
+int main(int argc, char **argv)
+{
+    int ret;
+
+    /* avoid end-of-line conversions */
+    SET_BINARY_MODE(stdin);
+    SET_BINARY_MODE(stdout);
+
+    /* do compression if no arguments */
+    if (argc == 1) {
+        ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
+        if (ret != Z_OK)
+            zerr(ret);
+        return ret;
+    }
+
+    /* do decompression if -d specified */
+    else if (argc == 2 && strcmp(argv[1], "-d") == 0) {
+        ret = inf(stdin, stdout);
+        if (ret != Z_OK)
+            zerr(ret);
+        return ret;
+    }
+
+    /* otherwise, report usage */
+    else {
+        fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr);
+        return 1;
+    }
+}
+
+
+Last modified 24 January 2023
+Copyright © 2004-2023 Mark Adler

+ +Creative Commons License + +Creative Commons Attribution-NoDerivatives 4.0 International License. + + diff --git a/src/deps/src/zlib/examples/zpipe.c b/src/deps/src/zlib/examples/zpipe.c new file mode 100644 index 000000000..83535d169 --- /dev/null +++ b/src/deps/src/zlib/examples/zpipe.c @@ -0,0 +1,205 @@ +/* zpipe.c: example of proper use of zlib's inflate() and deflate() + Not copyrighted -- provided to the public domain + Version 1.4 11 December 2005 Mark Adler */ + +/* Version history: + 1.0 30 Oct 2004 First version + 1.1 8 Nov 2004 Add void casting for unused return values + Use switch statement for inflate() return values + 1.2 9 Nov 2004 Add assertions to document zlib guarantees + 1.3 6 Apr 2005 Remove incorrect assertion in inf() + 1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions + Avoid some compiler warnings for input and output buffers + */ + +#include +#include +#include +#include "zlib.h" + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include +# include +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +#define CHUNK 16384 + +/* Compress from file source to file dest until EOF on source. + def() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_STREAM_ERROR if an invalid compression + level is supplied, Z_VERSION_ERROR if the version of zlib.h and the + version of the library linked do not match, or Z_ERRNO if there is + an error reading or writing the files. */ +int def(FILE *source, FILE *dest, int level) +{ + int ret, flush; + unsigned have; + z_stream strm; + unsigned char in[CHUNK]; + unsigned char out[CHUNK]; + + /* allocate deflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = deflateInit(&strm, level); + if (ret != Z_OK) + return ret; + + /* compress until end of file */ + do { + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } + flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; + strm.next_in = in; + + /* run deflate() on input until output buffer not full, finish + compression if all of source has been read in */ + do { + strm.avail_out = CHUNK; + strm.next_out = out; + ret = deflate(&strm, flush); /* no bad return value */ + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } + } while (strm.avail_out == 0); + assert(strm.avail_in == 0); /* all input will be used */ + + /* done when last data in file processed */ + } while (flush != Z_FINISH); + assert(ret == Z_STREAM_END); /* stream will be complete */ + + /* clean up and return */ + (void)deflateEnd(&strm); + return Z_OK; +} + +/* Decompress from file source to file dest until stream ends or EOF. + inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_DATA_ERROR if the deflate data is + invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and + the version of the library linked do not match, or Z_ERRNO if there + is an error reading or writing the files. */ +int inf(FILE *source, FILE *dest) +{ + int ret; + unsigned have; + z_stream strm; + unsigned char in[CHUNK]; + unsigned char out[CHUNK]; + + /* allocate inflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit(&strm); + if (ret != Z_OK) + return ret; + + /* decompress until deflate stream ends or end of file */ + do { + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } + if (strm.avail_in == 0) + break; + strm.next_in = in; + + /* run inflate() on input until output buffer not full */ + do { + strm.avail_out = CHUNK; + strm.next_out = out; + ret = inflate(&strm, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + switch (ret) { + case Z_NEED_DICT: + ret = Z_DATA_ERROR; /* and fall through */ + case Z_DATA_ERROR: + case Z_MEM_ERROR: + (void)inflateEnd(&strm); + return ret; + } + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } + } while (strm.avail_out == 0); + + /* done when inflate() says it's done */ + } while (ret != Z_STREAM_END); + + /* clean up and return */ + (void)inflateEnd(&strm); + return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; +} + +/* report a zlib or i/o error */ +void zerr(int ret) +{ + fputs("zpipe: ", stderr); + switch (ret) { + case Z_ERRNO: + if (ferror(stdin)) + fputs("error reading stdin\n", stderr); + if (ferror(stdout)) + fputs("error writing stdout\n", stderr); + break; + case Z_STREAM_ERROR: + fputs("invalid compression level\n", stderr); + break; + case Z_DATA_ERROR: + fputs("invalid or incomplete deflate data\n", stderr); + break; + case Z_MEM_ERROR: + fputs("out of memory\n", stderr); + break; + case Z_VERSION_ERROR: + fputs("zlib version mismatch!\n", stderr); + } +} + +/* compress or decompress from stdin to stdout */ +int main(int argc, char **argv) +{ + int ret; + + /* avoid end-of-line conversions */ + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); + + /* do compression if no arguments */ + if (argc == 1) { + ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* do decompression if -d specified */ + else if (argc == 2 && strcmp(argv[1], "-d") == 0) { + ret = inf(stdin, stdout); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* otherwise, report usage */ + else { + fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); + return 1; + } +} diff --git a/src/deps/src/zlib/examples/zran.c b/src/deps/src/zlib/examples/zran.c new file mode 100644 index 000000000..d3135955b --- /dev/null +++ b/src/deps/src/zlib/examples/zran.c @@ -0,0 +1,533 @@ +/* zran.c -- example of deflate stream indexing and random access + * Copyright (C) 2005, 2012, 2018, 2023 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * Version 1.4 13 Apr 2023 Mark Adler */ + +/* Version History: + 1.0 29 May 2005 First version + 1.1 29 Sep 2012 Fix memory reallocation error + 1.2 14 Oct 2018 Handle gzip streams with multiple members + Add a header file to facilitate usage in applications + 1.3 18 Feb 2023 Permit raw deflate streams as well as zlib and gzip + Permit crossing gzip member boundaries when extracting + Support a size_t size when extracting (was an int) + Do a binary search over the index for an access point + Expose the access point type to enable save and load + 1.4 13 Apr 2023 Add a NOPRIME define to not use inflatePrime() + */ + +// Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary() +// for random access of a compressed file. A file containing a raw deflate +// stream is provided on the command line. The compressed stream is decoded in +// its entirety, and an index built with access points about every SPAN bytes +// in the uncompressed output. The compressed file is left open, and can then +// be read randomly, having to decompress on the average SPAN/2 uncompressed +// bytes before getting to the desired block of data. +// +// An access point can be created at the start of any deflate block, by saving +// the starting file offset and bit of that block, and the 32K bytes of +// uncompressed data that precede that block. Also the uncompressed offset of +// that block is saved to provide a reference for locating a desired starting +// point in the uncompressed stream. deflate_index_build() decompresses the +// input raw deflate stream a block at a time, and at the end of each block +// decides if enough uncompressed data has gone by to justify the creation of a +// new access point. If so, that point is saved in a data structure that grows +// as needed to accommodate the points. +// +// To use the index, an offset in the uncompressed data is provided, for which +// the latest access point at or preceding that offset is located in the index. +// The input file is positioned to the specified location in the index, and if +// necessary the first few bits of the compressed data is read from the file. +// inflate is initialized with those bits and the 32K of uncompressed data, and +// decompression then proceeds until the desired offset in the file is reached. +// Then decompression continues to read the requested uncompressed data from +// the file. +// +// There is some fair bit of overhead to starting inflation for the random +// access, mainly copying the 32K byte dictionary. If small pieces of the file +// are being accessed, it would make sense to implement a cache to hold some +// lookahead to avoid many calls to deflate_index_extract() for small lengths. +// +// Another way to build an index would be to use inflateCopy(). That would not +// be constrained to have access points at block boundaries, but would require +// more memory per access point, and could not be saved to a file due to the +// use of pointers in the state. The approach here allows for storage of the +// index in a file. + +#include +#include +#include +#include +#include "zlib.h" +#include "zran.h" + +#define WINSIZE 32768U // sliding window size +#define CHUNK 16384 // file input buffer size + +// See comments in zran.h. +void deflate_index_free(struct deflate_index *index) { + if (index != NULL) { + free(index->list); + free(index); + } +} + +// Add an access point to the list. If out of memory, deallocate the existing +// list and return NULL. index->mode is temporarily the allocated number of +// access points, until it is time for deflate_index_build() to return. Then +// index->mode is set to the mode of inflation. +static struct deflate_index *add_point(struct deflate_index *index, int bits, + off_t in, off_t out, unsigned left, + unsigned char *window) { + if (index == NULL) { + // The list is empty. Create it, starting with eight access points. + index = malloc(sizeof(struct deflate_index)); + if (index == NULL) + return NULL; + index->have = 0; + index->mode = 8; + index->list = malloc(sizeof(point_t) * index->mode); + if (index->list == NULL) { + free(index); + return NULL; + } + } + + else if (index->have == index->mode) { + // The list is full. Make it bigger. + index->mode <<= 1; + point_t *next = realloc(index->list, sizeof(point_t) * index->mode); + if (next == NULL) { + deflate_index_free(index); + return NULL; + } + index->list = next; + } + + // Fill in the access point and increment how many we have. + point_t *next = (point_t *)(index->list) + index->have++; + if (index->have < 0) { + // Overflowed the int! + deflate_index_free(index); + return NULL; + } + next->out = out; + next->in = in; + next->bits = bits; + if (left) + memcpy(next->window, window + WINSIZE - left, left); + if (left < WINSIZE) + memcpy(next->window + left, window, WINSIZE - left); + + // Return the index, which may have been newly allocated or destroyed. + return index; +} + +// Decompression modes. These are the inflateInit2() windowBits parameter. +#define RAW -15 +#define ZLIB 15 +#define GZIP 31 + +// See comments in zran.h. +int deflate_index_build(FILE *in, off_t span, struct deflate_index **built) { + // Set up inflation state. + z_stream strm = {0}; // inflate engine (gets fired up later) + unsigned char buf[CHUNK]; // input buffer + unsigned char win[WINSIZE] = {0}; // output sliding window + off_t totin = 0; // total bytes read from input + off_t totout = 0; // total bytes uncompressed + int mode = 0; // mode: RAW, ZLIB, or GZIP (0 => not set yet) + + // Decompress from in, generating access points along the way. + int ret; // the return value from zlib, or Z_ERRNO + off_t last; // last access point uncompressed offset + struct deflate_index *index = NULL; // list of access points + do { + // Assure available input, at least until reaching EOF. + if (strm.avail_in == 0) { + strm.avail_in = fread(buf, 1, sizeof(buf), in); + totin += strm.avail_in; + strm.next_in = buf; + if (strm.avail_in < sizeof(buf) && ferror(in)) { + ret = Z_ERRNO; + break; + } + + if (mode == 0) { + // At the start of the input -- determine the type. Assume raw + // if it is neither zlib nor gzip. This could in theory result + // in a false positive for zlib, but in practice the fill bits + // after a stored block are always zeros, so a raw stream won't + // start with an 8 in the low nybble. + mode = strm.avail_in == 0 ? RAW : // empty -- will fail + (strm.next_in[0] & 0xf) == 8 ? ZLIB : + strm.next_in[0] == 0x1f ? GZIP : + /* else */ RAW; + ret = inflateInit2(&strm, mode); + if (ret != Z_OK) + break; + } + } + + // Assure available output. This rotates the output through, for use as + // a sliding window on the uncompressed data. + if (strm.avail_out == 0) { + strm.avail_out = sizeof(win); + strm.next_out = win; + } + + if (mode == RAW && index == NULL) + // We skip the inflate() call at the start of raw deflate data in + // order generate an access point there. Set data_type to imitate + // the end of a header. + strm.data_type = 0x80; + else { + // Inflate and update the number of uncompressed bytes. + unsigned before = strm.avail_out; + ret = inflate(&strm, Z_BLOCK); + totout += before - strm.avail_out; + } + + if ((strm.data_type & 0xc0) == 0x80 && + (index == NULL || totout - last >= span)) { + // We are at the end of a header or a non-last deflate block, so we + // can add an access point here. Furthermore, we are either at the + // very start for the first access point, or there has been span or + // more uncompressed bytes since the last access point, so we want + // to add an access point here. + index = add_point(index, strm.data_type & 7, totin - strm.avail_in, + totout, strm.avail_out, win); + if (index == NULL) { + ret = Z_MEM_ERROR; + break; + } + last = totout; + } + + if (ret == Z_STREAM_END && mode == GZIP && + (strm.avail_in || ungetc(getc(in), in) != EOF)) + // There is more input after the end of a gzip member. Reset the + // inflate state to read another gzip member. On success, this will + // set ret to Z_OK to continue decompressing. + ret = inflateReset2(&strm, GZIP); + + // Keep going until Z_STREAM_END or error. If the compressed data ends + // prematurely without a file read error, Z_BUF_ERROR is returned. + } while (ret == Z_OK); + inflateEnd(&strm); + + if (ret != Z_STREAM_END) { + // An error was encountered. Discard the index and return a negative + // error code. + deflate_index_free(index); + return ret == Z_NEED_DICT ? Z_DATA_ERROR : ret; + } + + // Shrink the index to only the occupied access points and return it. + index->mode = mode; + index->length = totout; + point_t *list = realloc(index->list, sizeof(point_t) * index->have); + if (list == NULL) { + // Seems like a realloc() to make something smaller should always work, + // but just in case. + deflate_index_free(index); + return Z_MEM_ERROR; + } + index->list = list; + *built = index; + return index->have; +} + +#ifdef NOPRIME +// Support zlib versions before 1.2.3 (July 2005), or incomplete zlib clones +// that do not have inflatePrime(). + +# define INFLATEPRIME inflatePreface + +// Append the low bits bits of value to in[] at bit position *have, updating +// *have. value must be zero above its low bits bits. bits must be positive. +// This assumes that any bits above the *have bits in the last byte are zeros. +// That assumption is preserved on return, as any bits above *have + bits in +// the last byte written will be set to zeros. +static inline void append_bits(unsigned value, int bits, + unsigned char *in, int *have) { + in += *have >> 3; // where the first bits from value will go + int k = *have & 7; // the number of bits already there + *have += bits; + if (k) + *in |= value << k; // write value above the low k bits + else + *in = value; + k = 8 - k; // the number of bits just appended + while (bits > k) { + value >>= k; // drop the bits appended + bits -= k; + k = 8; // now at a byte boundary + *++in = value; + } +} + +// Insert enough bits in the form of empty deflate blocks in front of the +// low bits bits of value, in order to bring the sequence to a byte boundary. +// Then feed that to inflate(). This does what inflatePrime() does, except that +// a negative value of bits is not supported. bits must be in 0..16. If the +// arguments are invalid, Z_STREAM_ERROR is returned. Otherwise the return +// value from inflate() is returned. +static int inflatePreface(z_stream *strm, int bits, int value) { + // Check input. + if (strm == Z_NULL || bits < 0 || bits > 16) + return Z_STREAM_ERROR; + if (bits == 0) + return Z_OK; + value &= (2 << (bits - 1)) - 1; + + // An empty dynamic block with an odd number of bits (95). The high bit of + // the last byte is unused. + static const unsigned char dyn[] = { + 4, 0xe0, 0x81, 8, 0, 0, 0, 0, 0x20, 0xa8, 0xab, 0x1f + }; + const int dynlen = 95; // number of bits in the block + + // Build an input buffer for inflate that is a multiple of eight bits in + // length, and that ends with the low bits bits of value. + unsigned char in[(dynlen + 3 * 10 + 16 + 7) / 8]; + int have = 0; + if (bits & 1) { + // Insert an empty dynamic block to get to an odd number of bits, so + // when bits bits from value are appended, we are at an even number of + // bits. + memcpy(in, dyn, sizeof(dyn)); + have = dynlen; + } + while ((have + bits) & 7) + // Insert empty fixed blocks until appending bits bits would put us on + // a byte boundary. This will insert at most three fixed blocks. + append_bits(2, 10, in, &have); + + // Append the bits bits from value, which takes us to a byte boundary. + append_bits(value, bits, in, &have); + + // Deliver the input to inflate(). There is no output space provided, but + // inflate() can't get stuck waiting on output not ingesting all of the + // provided input. The reason is that there will be at most 16 bits of + // input from value after the empty deflate blocks (which themselves + // generate no output). At least ten bits are needed to generate the first + // output byte from a fixed block. The last two bytes of the buffer have to + // be ingested in order to get ten bits, which is the most that value can + // occupy. + strm->avail_in = have >> 3; + strm->next_in = in; + strm->avail_out = 0; + strm->next_out = in; // not used, but can't be NULL + return inflate(strm, Z_NO_FLUSH); +} + +#else +# define INFLATEPRIME inflatePrime +#endif + +// See comments in zran.h. +ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, + off_t offset, unsigned char *buf, size_t len) { + // Do a quick sanity check on the index. + if (index == NULL || index->have < 1 || index->list[0].out != 0) + return Z_STREAM_ERROR; + + // If nothing to extract, return zero bytes extracted. + if (len == 0 || offset < 0 || offset >= index->length) + return 0; + + // Find the access point closest to but not after offset. + int lo = -1, hi = index->have; + point_t *point = index->list; + while (hi - lo > 1) { + int mid = (lo + hi) >> 1; + if (offset < point[mid].out) + hi = mid; + else + lo = mid; + } + point += lo; + + // Initialize the input file and prime the inflate engine to start there. + int ret = fseeko(in, point->in - (point->bits ? 1 : 0), SEEK_SET); + if (ret == -1) + return Z_ERRNO; + int ch = 0; + if (point->bits && (ch = getc(in)) == EOF) + return ferror(in) ? Z_ERRNO : Z_BUF_ERROR; + z_stream strm = {0}; + ret = inflateInit2(&strm, RAW); + if (ret != Z_OK) + return ret; + if (point->bits) + INFLATEPRIME(&strm, point->bits, ch >> (8 - point->bits)); + inflateSetDictionary(&strm, point->window, WINSIZE); + + // Skip uncompressed bytes until offset reached, then satisfy request. + unsigned char input[CHUNK]; + unsigned char discard[WINSIZE]; + offset -= point->out; // number of bytes to skip to get to offset + size_t left = len; // number of bytes left to read after offset + do { + if (offset) { + // Discard up to offset uncompressed bytes. + strm.avail_out = offset < WINSIZE ? (unsigned)offset : WINSIZE; + strm.next_out = discard; + } + else { + // Uncompress up to left bytes into buf. + strm.avail_out = left < UINT_MAX ? (unsigned)left : UINT_MAX; + strm.next_out = buf + len - left; + } + + // Uncompress, setting got to the number of bytes uncompressed. + if (strm.avail_in == 0) { + // Assure available input. + strm.avail_in = fread(input, 1, CHUNK, in); + if (strm.avail_in < CHUNK && ferror(in)) { + ret = Z_ERRNO; + break; + } + strm.next_in = input; + } + unsigned got = strm.avail_out; + ret = inflate(&strm, Z_NO_FLUSH); + got -= strm.avail_out; + + // Update the appropriate count. + if (offset) + offset -= got; + else + left -= got; + + // If we're at the end of a gzip member and there's more to read, + // continue to the next gzip member. + if (ret == Z_STREAM_END && index->mode == GZIP) { + // Discard the gzip trailer. + unsigned drop = 8; // length of gzip trailer + if (strm.avail_in >= drop) { + strm.avail_in -= drop; + strm.next_in += drop; + } + else { + // Read and discard the remainder of the gzip trailer. + drop -= strm.avail_in; + strm.avail_in = 0; + do { + if (getc(in) == EOF) + // The input does not have a complete trailer. + return ferror(in) ? Z_ERRNO : Z_BUF_ERROR; + } while (--drop); + } + + if (strm.avail_in || ungetc(getc(in), in) != EOF) { + // There's more after the gzip trailer. Use inflate to skip the + // gzip header and resume the raw inflate there. + inflateReset2(&strm, GZIP); + do { + if (strm.avail_in == 0) { + strm.avail_in = fread(input, 1, CHUNK, in); + if (strm.avail_in < CHUNK && ferror(in)) { + ret = Z_ERRNO; + break; + } + strm.next_in = input; + } + strm.avail_out = WINSIZE; + strm.next_out = discard; + ret = inflate(&strm, Z_BLOCK); // stop at end of header + } while (ret == Z_OK && (strm.data_type & 0x80) == 0); + if (ret != Z_OK) + break; + inflateReset2(&strm, RAW); + } + } + + // Continue until we have the requested data, the deflate data has + // ended, or an error is encountered. + } while (ret == Z_OK && left); + inflateEnd(&strm); + + // Return the number of uncompressed bytes read into buf, or the error. + return ret == Z_OK || ret == Z_STREAM_END ? len - left : ret; +} + +#ifdef TEST + +#define SPAN 1048576L // desired distance between access points +#define LEN 16384 // number of bytes to extract + +// Demonstrate the use of deflate_index_build() and deflate_index_extract() by +// processing the file provided on the command line, and extracting LEN bytes +// from 2/3rds of the way through the uncompressed output, writing that to +// stdout. An offset can be provided as the second argument, in which case the +// data is extracted from there instead. +int main(int argc, char **argv) { + // Open the input file. + if (argc < 2 || argc > 3) { + fprintf(stderr, "usage: zran file.raw [offset]\n"); + return 1; + } + FILE *in = fopen(argv[1], "rb"); + if (in == NULL) { + fprintf(stderr, "zran: could not open %s for reading\n", argv[1]); + return 1; + } + + // Get optional offset. + off_t offset = -1; + if (argc == 3) { + char *end; + offset = strtoll(argv[2], &end, 10); + if (*end || offset < 0) { + fprintf(stderr, "zran: %s is not a valid offset\n", argv[2]); + return 1; + } + } + + // Build index. + struct deflate_index *index = NULL; + int len = deflate_index_build(in, SPAN, &index); + if (len < 0) { + fclose(in); + switch (len) { + case Z_MEM_ERROR: + fprintf(stderr, "zran: out of memory\n"); + break; + case Z_BUF_ERROR: + fprintf(stderr, "zran: %s ended prematurely\n", argv[1]); + break; + case Z_DATA_ERROR: + fprintf(stderr, "zran: compressed data error in %s\n", argv[1]); + break; + case Z_ERRNO: + fprintf(stderr, "zran: read error on %s\n", argv[1]); + break; + default: + fprintf(stderr, "zran: error %d while building index\n", len); + } + return 1; + } + fprintf(stderr, "zran: built index with %d access points\n", len); + + // Use index by reading some bytes from an arbitrary offset. + unsigned char buf[LEN]; + if (offset == -1) + offset = ((index->length + 1) << 1) / 3; + ptrdiff_t got = deflate_index_extract(in, index, offset, buf, LEN); + if (got < 0) + fprintf(stderr, "zran: extraction failed: %s error\n", + got == Z_MEM_ERROR ? "out of memory" : "input corrupted"); + else { + fwrite(buf, 1, got, stdout); + fprintf(stderr, "zran: extracted %ld bytes at %lld\n", got, offset); + } + + // Clean up and exit. + deflate_index_free(index); + fclose(in); + return 0; +} + +#endif diff --git a/src/deps/src/zlib/examples/zran.h b/src/deps/src/zlib/examples/zran.h new file mode 100644 index 000000000..ebf780d0c --- /dev/null +++ b/src/deps/src/zlib/examples/zran.h @@ -0,0 +1,51 @@ +/* zran.h -- example of deflated stream indexing and random access + * Copyright (C) 2005, 2012, 2018, 2023 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * Version 1.3 18 Feb 2023 Mark Adler */ + +#include +#include "zlib.h" + +// Access point. +typedef struct point { + off_t out; // offset in uncompressed data + off_t in; // offset in compressed file of first full byte + int bits; // 0, or number of bits (1-7) from byte at in-1 + unsigned char window[32768]; // preceding 32K of uncompressed data +} point_t; + +// Access point list. +struct deflate_index { + int have; // number of access points in list + int mode; // -15 for raw, 15 for zlib, or 31 for gzip + off_t length; // total length of uncompressed data + point_t *list; // allocated list of access points +}; + +// Make one pass through a zlib, gzip, or raw deflate compressed stream and +// build an index, with access points about every span bytes of uncompressed +// output. gzip files with multiple members are fully indexed. span should be +// chosen to balance the speed of random access against the memory requirements +// of the list, which is about 32K bytes per access point. The return value is +// the number of access points on success (>= 1), Z_MEM_ERROR for out of +// memory, Z_BUF_ERROR for a premature end of input, Z_DATA_ERROR for a format +// or verification error in the input file, or Z_ERRNO for a file read error. +// On success, *built points to the resulting index. +int deflate_index_build(FILE *in, off_t span, struct deflate_index **built); + +// Use the index to read len bytes from offset into buf. Return the number of +// bytes read or a negative error code. If data is requested past the end of +// the uncompressed data, then deflate_index_extract() will return a value less +// than len, indicating how much was actually read into buf. If given a valid +// index, this function should not return an error unless the file was modified +// somehow since the index was generated, given that deflate_index_build() had +// validated all of the input. If nevertheless there is a failure, Z_BUF_ERROR +// is returned if the compressed data ends prematurely, Z_DATA_ERROR if the +// deflate compressed data is not valid, Z_MEM_ERROR if out of memory, +// Z_STREAM_ERROR if the index is not valid, or Z_ERRNO if there is an error +// reading or seeking on the input file. +ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, + off_t offset, unsigned char *buf, size_t len); + +// Deallocate an index built by deflate_index_build(). +void deflate_index_free(struct deflate_index *index); diff --git a/src/deps/src/zlib/test/example.c b/src/deps/src/zlib/test/example.c new file mode 100644 index 000000000..c3521dd59 --- /dev/null +++ b/src/deps/src/zlib/test/example.c @@ -0,0 +1,546 @@ +/* example.c -- usage example of the zlib compression library + * Copyright (C) 1995-2006, 2011, 2016 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zlib.h" +#include + +#ifdef STDC +# include +# include +#endif + +#if defined(VMS) || defined(RISCOS) +# define TESTFILE "foo-gz" +#else +# define TESTFILE "foo.gz" +#endif + +#define CHECK_ERR(err, msg) { \ + if (err != Z_OK) { \ + fprintf(stderr, "%s error: %d\n", msg, err); \ + exit(1); \ + } \ +} + +static z_const char hello[] = "hello, hello!"; +/* "hello world" would be more standard, but the repeated "hello" + * stresses the compression code better, sorry... + */ + +static const char dictionary[] = "hello"; +static uLong dictId; /* Adler32 value of the dictionary */ + +#ifdef Z_SOLO + +static void *myalloc(void *q, unsigned n, unsigned m) { + (void)q; + return calloc(n, m); +} + +static void myfree(void *q, void *p) { + (void)q; + free(p); +} + +static alloc_func zalloc = myalloc; +static free_func zfree = myfree; + +#else /* !Z_SOLO */ + +static alloc_func zalloc = (alloc_func)0; +static free_func zfree = (free_func)0; + +/* =========================================================================== + * Test compress() and uncompress() + */ +static void test_compress(Byte *compr, uLong comprLen, Byte *uncompr, + uLong uncomprLen) { + int err; + uLong len = (uLong)strlen(hello)+1; + + err = compress(compr, &comprLen, (const Bytef*)hello, len); + CHECK_ERR(err, "compress"); + + strcpy((char*)uncompr, "garbage"); + + err = uncompress(uncompr, &uncomprLen, compr, comprLen); + CHECK_ERR(err, "uncompress"); + + if (strcmp((char*)uncompr, hello)) { + fprintf(stderr, "bad uncompress\n"); + exit(1); + } else { + printf("uncompress(): %s\n", (char *)uncompr); + } +} + +/* =========================================================================== + * Test read/write of .gz files + */ +static void test_gzio(const char *fname, Byte *uncompr, uLong uncomprLen) { +#ifdef NO_GZCOMPRESS + fprintf(stderr, "NO_GZCOMPRESS -- gz* functions cannot compress\n"); +#else + int err; + int len = (int)strlen(hello)+1; + gzFile file; + z_off_t pos; + + file = gzopen(fname, "wb"); + if (file == NULL) { + fprintf(stderr, "gzopen error\n"); + exit(1); + } + gzputc(file, 'h'); + if (gzputs(file, "ello") != 4) { + fprintf(stderr, "gzputs err: %s\n", gzerror(file, &err)); + exit(1); + } + if (gzprintf(file, ", %s!", "hello") != 8) { + fprintf(stderr, "gzprintf err: %s\n", gzerror(file, &err)); + exit(1); + } + gzseek(file, 1L, SEEK_CUR); /* add one zero byte */ + gzclose(file); + + file = gzopen(fname, "rb"); + if (file == NULL) { + fprintf(stderr, "gzopen error\n"); + exit(1); + } + strcpy((char*)uncompr, "garbage"); + + if (gzread(file, uncompr, (unsigned)uncomprLen) != len) { + fprintf(stderr, "gzread err: %s\n", gzerror(file, &err)); + exit(1); + } + if (strcmp((char*)uncompr, hello)) { + fprintf(stderr, "bad gzread: %s\n", (char*)uncompr); + exit(1); + } else { + printf("gzread(): %s\n", (char*)uncompr); + } + + pos = gzseek(file, -8L, SEEK_CUR); + if (pos != 6 || gztell(file) != pos) { + fprintf(stderr, "gzseek error, pos=%ld, gztell=%ld\n", + (long)pos, (long)gztell(file)); + exit(1); + } + + if (gzgetc(file) != ' ') { + fprintf(stderr, "gzgetc error\n"); + exit(1); + } + + if (gzungetc(' ', file) != ' ') { + fprintf(stderr, "gzungetc error\n"); + exit(1); + } + + gzgets(file, (char*)uncompr, (int)uncomprLen); + if (strlen((char*)uncompr) != 7) { /* " hello!" */ + fprintf(stderr, "gzgets err after gzseek: %s\n", gzerror(file, &err)); + exit(1); + } + if (strcmp((char*)uncompr, hello + 6)) { + fprintf(stderr, "bad gzgets after gzseek\n"); + exit(1); + } else { + printf("gzgets() after gzseek: %s\n", (char*)uncompr); + } + + gzclose(file); +#endif +} + +#endif /* Z_SOLO */ + +/* =========================================================================== + * Test deflate() with small buffers + */ +static void test_deflate(Byte *compr, uLong comprLen) { + z_stream c_stream; /* compression stream */ + int err; + uLong len = (uLong)strlen(hello)+1; + + c_stream.zalloc = zalloc; + c_stream.zfree = zfree; + c_stream.opaque = (voidpf)0; + + err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); + CHECK_ERR(err, "deflateInit"); + + c_stream.next_in = (z_const unsigned char *)hello; + c_stream.next_out = compr; + + while (c_stream.total_in != len && c_stream.total_out < comprLen) { + c_stream.avail_in = c_stream.avail_out = 1; /* force small buffers */ + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + } + /* Finish the stream, still forcing small buffers: */ + for (;;) { + c_stream.avail_out = 1; + err = deflate(&c_stream, Z_FINISH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "deflate"); + } + + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); +} + +/* =========================================================================== + * Test inflate() with small buffers + */ +static void test_inflate(Byte *compr, uLong comprLen, Byte *uncompr, + uLong uncomprLen) { + int err; + z_stream d_stream; /* decompression stream */ + + strcpy((char*)uncompr, "garbage"); + + d_stream.zalloc = zalloc; + d_stream.zfree = zfree; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compr; + d_stream.avail_in = 0; + d_stream.next_out = uncompr; + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + while (d_stream.total_out < uncomprLen && d_stream.total_in < comprLen) { + d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */ + err = inflate(&d_stream, Z_NO_FLUSH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "inflate"); + } + + err = inflateEnd(&d_stream); + CHECK_ERR(err, "inflateEnd"); + + if (strcmp((char*)uncompr, hello)) { + fprintf(stderr, "bad inflate\n"); + exit(1); + } else { + printf("inflate(): %s\n", (char *)uncompr); + } +} + +/* =========================================================================== + * Test deflate() with large buffers and dynamic change of compression level + */ +static void test_large_deflate(Byte *compr, uLong comprLen, Byte *uncompr, + uLong uncomprLen) { + z_stream c_stream; /* compression stream */ + int err; + + c_stream.zalloc = zalloc; + c_stream.zfree = zfree; + c_stream.opaque = (voidpf)0; + + err = deflateInit(&c_stream, Z_BEST_SPEED); + CHECK_ERR(err, "deflateInit"); + + c_stream.next_out = compr; + c_stream.avail_out = (uInt)comprLen; + + /* At this point, uncompr is still mostly zeroes, so it should compress + * very well: + */ + c_stream.next_in = uncompr; + c_stream.avail_in = (uInt)uncomprLen; + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + if (c_stream.avail_in != 0) { + fprintf(stderr, "deflate not greedy\n"); + exit(1); + } + + /* Feed in already compressed data and switch to no compression: */ + deflateParams(&c_stream, Z_NO_COMPRESSION, Z_DEFAULT_STRATEGY); + c_stream.next_in = compr; + c_stream.avail_in = (uInt)uncomprLen/2; + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + + /* Switch back to compressing mode: */ + deflateParams(&c_stream, Z_BEST_COMPRESSION, Z_FILTERED); + c_stream.next_in = uncompr; + c_stream.avail_in = (uInt)uncomprLen; + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + + err = deflate(&c_stream, Z_FINISH); + if (err != Z_STREAM_END) { + fprintf(stderr, "deflate should report Z_STREAM_END\n"); + exit(1); + } + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); +} + +/* =========================================================================== + * Test inflate() with large buffers + */ +static void test_large_inflate(Byte *compr, uLong comprLen, Byte *uncompr, + uLong uncomprLen) { + int err; + z_stream d_stream; /* decompression stream */ + + strcpy((char*)uncompr, "garbage"); + + d_stream.zalloc = zalloc; + d_stream.zfree = zfree; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compr; + d_stream.avail_in = (uInt)comprLen; + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + for (;;) { + d_stream.next_out = uncompr; /* discard the output */ + d_stream.avail_out = (uInt)uncomprLen; + err = inflate(&d_stream, Z_NO_FLUSH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "large inflate"); + } + + err = inflateEnd(&d_stream); + CHECK_ERR(err, "inflateEnd"); + + if (d_stream.total_out != 2*uncomprLen + uncomprLen/2) { + fprintf(stderr, "bad large inflate: %ld\n", d_stream.total_out); + exit(1); + } else { + printf("large_inflate(): OK\n"); + } +} + +/* =========================================================================== + * Test deflate() with full flush + */ +static void test_flush(Byte *compr, uLong *comprLen) { + z_stream c_stream; /* compression stream */ + int err; + uInt len = (uInt)strlen(hello)+1; + + c_stream.zalloc = zalloc; + c_stream.zfree = zfree; + c_stream.opaque = (voidpf)0; + + err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); + CHECK_ERR(err, "deflateInit"); + + c_stream.next_in = (z_const unsigned char *)hello; + c_stream.next_out = compr; + c_stream.avail_in = 3; + c_stream.avail_out = (uInt)*comprLen; + err = deflate(&c_stream, Z_FULL_FLUSH); + CHECK_ERR(err, "deflate"); + + compr[3]++; /* force an error in first compressed block */ + c_stream.avail_in = len - 3; + + err = deflate(&c_stream, Z_FINISH); + if (err != Z_STREAM_END) { + CHECK_ERR(err, "deflate"); + } + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); + + *comprLen = c_stream.total_out; +} + +/* =========================================================================== + * Test inflateSync() + */ +static void test_sync(Byte *compr, uLong comprLen, Byte *uncompr, + uLong uncomprLen) { + int err; + z_stream d_stream; /* decompression stream */ + + strcpy((char*)uncompr, "garbage"); + + d_stream.zalloc = zalloc; + d_stream.zfree = zfree; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compr; + d_stream.avail_in = 2; /* just read the zlib header */ + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + d_stream.next_out = uncompr; + d_stream.avail_out = (uInt)uncomprLen; + + err = inflate(&d_stream, Z_NO_FLUSH); + CHECK_ERR(err, "inflate"); + + d_stream.avail_in = (uInt)comprLen-2; /* read all compressed data */ + err = inflateSync(&d_stream); /* but skip the damaged part */ + CHECK_ERR(err, "inflateSync"); + + err = inflate(&d_stream, Z_FINISH); + if (err != Z_STREAM_END) { + fprintf(stderr, "inflate should report Z_STREAM_END\n"); + exit(1); + } + err = inflateEnd(&d_stream); + CHECK_ERR(err, "inflateEnd"); + + printf("after inflateSync(): hel%s\n", (char *)uncompr); +} + +/* =========================================================================== + * Test deflate() with preset dictionary + */ +static void test_dict_deflate(Byte *compr, uLong comprLen) { + z_stream c_stream; /* compression stream */ + int err; + + c_stream.zalloc = zalloc; + c_stream.zfree = zfree; + c_stream.opaque = (voidpf)0; + + err = deflateInit(&c_stream, Z_BEST_COMPRESSION); + CHECK_ERR(err, "deflateInit"); + + err = deflateSetDictionary(&c_stream, + (const Bytef*)dictionary, (int)sizeof(dictionary)); + CHECK_ERR(err, "deflateSetDictionary"); + + dictId = c_stream.adler; + c_stream.next_out = compr; + c_stream.avail_out = (uInt)comprLen; + + c_stream.next_in = (z_const unsigned char *)hello; + c_stream.avail_in = (uInt)strlen(hello)+1; + + err = deflate(&c_stream, Z_FINISH); + if (err != Z_STREAM_END) { + fprintf(stderr, "deflate should report Z_STREAM_END\n"); + exit(1); + } + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); +} + +/* =========================================================================== + * Test inflate() with a preset dictionary + */ +static void test_dict_inflate(Byte *compr, uLong comprLen, Byte *uncompr, + uLong uncomprLen) { + int err; + z_stream d_stream; /* decompression stream */ + + strcpy((char*)uncompr, "garbage"); + + d_stream.zalloc = zalloc; + d_stream.zfree = zfree; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compr; + d_stream.avail_in = (uInt)comprLen; + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + d_stream.next_out = uncompr; + d_stream.avail_out = (uInt)uncomprLen; + + for (;;) { + err = inflate(&d_stream, Z_NO_FLUSH); + if (err == Z_STREAM_END) break; + if (err == Z_NEED_DICT) { + if (d_stream.adler != dictId) { + fprintf(stderr, "unexpected dictionary"); + exit(1); + } + err = inflateSetDictionary(&d_stream, (const Bytef*)dictionary, + (int)sizeof(dictionary)); + } + CHECK_ERR(err, "inflate with dict"); + } + + err = inflateEnd(&d_stream); + CHECK_ERR(err, "inflateEnd"); + + if (strcmp((char*)uncompr, hello)) { + fprintf(stderr, "bad inflate with dict\n"); + exit(1); + } else { + printf("inflate with dictionary: %s\n", (char *)uncompr); + } +} + +/* =========================================================================== + * Usage: example [output.gz [input.gz]] + */ + +int main(int argc, char *argv[]) { + Byte *compr, *uncompr; + uLong uncomprLen = 20000; + uLong comprLen = 3 * uncomprLen; + static const char* myVersion = ZLIB_VERSION; + + if (zlibVersion()[0] != myVersion[0]) { + fprintf(stderr, "incompatible zlib version\n"); + exit(1); + + } else if (strcmp(zlibVersion(), ZLIB_VERSION) != 0) { + fprintf(stderr, "warning: different zlib version linked: %s\n", + zlibVersion()); + } + + printf("zlib version %s = 0x%04x, compile flags = 0x%lx\n", + ZLIB_VERSION, ZLIB_VERNUM, zlibCompileFlags()); + + compr = (Byte*)calloc((uInt)comprLen, 1); + uncompr = (Byte*)calloc((uInt)uncomprLen, 1); + /* compr and uncompr are cleared to avoid reading uninitialized + * data and to ensure that uncompr compresses well. + */ + if (compr == Z_NULL || uncompr == Z_NULL) { + printf("out of memory\n"); + exit(1); + } + +#ifdef Z_SOLO + (void)argc; + (void)argv; +#else + test_compress(compr, comprLen, uncompr, uncomprLen); + + test_gzio((argc > 1 ? argv[1] : TESTFILE), + uncompr, uncomprLen); +#endif + + test_deflate(compr, comprLen); + test_inflate(compr, comprLen, uncompr, uncomprLen); + + test_large_deflate(compr, comprLen, uncompr, uncomprLen); + test_large_inflate(compr, comprLen, uncompr, uncomprLen); + + test_flush(compr, &comprLen); + test_sync(compr, comprLen, uncompr, uncomprLen); + comprLen = 3 * uncomprLen; + + test_dict_deflate(compr, comprLen); + test_dict_inflate(compr, comprLen, uncompr, uncomprLen); + + free(compr); + free(uncompr); + + return 0; +} diff --git a/src/deps/src/zlib/test/infcover.c b/src/deps/src/zlib/test/infcover.c new file mode 100644 index 000000000..8912c403d --- /dev/null +++ b/src/deps/src/zlib/test/infcover.c @@ -0,0 +1,672 @@ +/* infcover.c -- test zlib's inflate routines with full code coverage + * Copyright (C) 2011, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* to use, do: ./configure --cover && make cover */ + +#include +#include +#include +#include +#include "zlib.h" + +/* get definition of internal structure so we can mess with it (see pull()), + and so we can call inflate_trees() (see cover5()) */ +#define ZLIB_INTERNAL +#include "inftrees.h" +#include "inflate.h" + +#define local static + +/* -- memory tracking routines -- */ + +/* + These memory tracking routines are provided to zlib and track all of zlib's + allocations and deallocations, check for LIFO operations, keep a current + and high water mark of total bytes requested, optionally set a limit on the + total memory that can be allocated, and when done check for memory leaks. + + They are used as follows: + + z_stream strm; + mem_setup(&strm) initializes the memory tracking and sets the + zalloc, zfree, and opaque members of strm to use + memory tracking for all zlib operations on strm + mem_limit(&strm, limit) sets a limit on the total bytes requested -- a + request that exceeds this limit will result in an + allocation failure (returns NULL) -- setting the + limit to zero means no limit, which is the default + after mem_setup() + mem_used(&strm, "msg") prints to stderr "msg" and the total bytes used + mem_high(&strm, "msg") prints to stderr "msg" and the high water mark + mem_done(&strm, "msg") ends memory tracking, releases all allocations + for the tracking as well as leaked zlib blocks, if + any. If there was anything unusual, such as leaked + blocks, non-FIFO frees, or frees of addresses not + allocated, then "msg" and information about the + problem is printed to stderr. If everything is + normal, nothing is printed. mem_done resets the + strm members to Z_NULL to use the default memory + allocation routines on the next zlib initialization + using strm. + */ + +/* these items are strung together in a linked list, one for each allocation */ +struct mem_item { + void *ptr; /* pointer to allocated memory */ + size_t size; /* requested size of allocation */ + struct mem_item *next; /* pointer to next item in list, or NULL */ +}; + +/* this structure is at the root of the linked list, and tracks statistics */ +struct mem_zone { + struct mem_item *first; /* pointer to first item in list, or NULL */ + size_t total, highwater; /* total allocations, and largest total */ + size_t limit; /* memory allocation limit, or 0 if no limit */ + int notlifo, rogue; /* counts of non-LIFO frees and rogue frees */ +}; + +/* memory allocation routine to pass to zlib */ +local void *mem_alloc(void *mem, unsigned count, unsigned size) +{ + void *ptr; + struct mem_item *item; + struct mem_zone *zone = mem; + size_t len = count * (size_t)size; + + /* induced allocation failure */ + if (zone == NULL || (zone->limit && zone->total + len > zone->limit)) + return NULL; + + /* perform allocation using the standard library, fill memory with a + non-zero value to make sure that the code isn't depending on zeros */ + ptr = malloc(len); + if (ptr == NULL) + return NULL; + memset(ptr, 0xa5, len); + + /* create a new item for the list */ + item = malloc(sizeof(struct mem_item)); + if (item == NULL) { + free(ptr); + return NULL; + } + item->ptr = ptr; + item->size = len; + + /* insert item at the beginning of the list */ + item->next = zone->first; + zone->first = item; + + /* update the statistics */ + zone->total += item->size; + if (zone->total > zone->highwater) + zone->highwater = zone->total; + + /* return the allocated memory */ + return ptr; +} + +/* memory free routine to pass to zlib */ +local void mem_free(void *mem, void *ptr) +{ + struct mem_item *item, *next; + struct mem_zone *zone = mem; + + /* if no zone, just do a free */ + if (zone == NULL) { + free(ptr); + return; + } + + /* point next to the item that matches ptr, or NULL if not found -- remove + the item from the linked list if found */ + next = zone->first; + if (next) { + if (next->ptr == ptr) + zone->first = next->next; /* first one is it, remove from list */ + else { + do { /* search the linked list */ + item = next; + next = item->next; + } while (next != NULL && next->ptr != ptr); + if (next) { /* if found, remove from linked list */ + item->next = next->next; + zone->notlifo++; /* not a LIFO free */ + } + + } + } + + /* if found, update the statistics and free the item */ + if (next) { + zone->total -= next->size; + free(next); + } + + /* if not found, update the rogue count */ + else + zone->rogue++; + + /* in any case, do the requested free with the standard library function */ + free(ptr); +} + +/* set up a controlled memory allocation space for monitoring, set the stream + parameters to the controlled routines, with opaque pointing to the space */ +local void mem_setup(z_stream *strm) +{ + struct mem_zone *zone; + + zone = malloc(sizeof(struct mem_zone)); + assert(zone != NULL); + zone->first = NULL; + zone->total = 0; + zone->highwater = 0; + zone->limit = 0; + zone->notlifo = 0; + zone->rogue = 0; + strm->opaque = zone; + strm->zalloc = mem_alloc; + strm->zfree = mem_free; +} + +/* set a limit on the total memory allocation, or 0 to remove the limit */ +local void mem_limit(z_stream *strm, size_t limit) +{ + struct mem_zone *zone = strm->opaque; + + zone->limit = limit; +} + +/* show the current total requested allocations in bytes */ +local void mem_used(z_stream *strm, char *prefix) +{ + struct mem_zone *zone = strm->opaque; + + fprintf(stderr, "%s: %lu allocated\n", prefix, zone->total); +} + +/* show the high water allocation in bytes */ +local void mem_high(z_stream *strm, char *prefix) +{ + struct mem_zone *zone = strm->opaque; + + fprintf(stderr, "%s: %lu high water mark\n", prefix, zone->highwater); +} + +/* release the memory allocation zone -- if there are any surprises, notify */ +local void mem_done(z_stream *strm, char *prefix) +{ + int count = 0; + struct mem_item *item, *next; + struct mem_zone *zone = strm->opaque; + + /* show high water mark */ + mem_high(strm, prefix); + + /* free leftover allocations and item structures, if any */ + item = zone->first; + while (item != NULL) { + free(item->ptr); + next = item->next; + free(item); + item = next; + count++; + } + + /* issue alerts about anything unexpected */ + if (count || zone->total) + fprintf(stderr, "** %s: %lu bytes in %d blocks not freed\n", + prefix, zone->total, count); + if (zone->notlifo) + fprintf(stderr, "** %s: %d frees not LIFO\n", prefix, zone->notlifo); + if (zone->rogue) + fprintf(stderr, "** %s: %d frees not recognized\n", + prefix, zone->rogue); + + /* free the zone and delete from the stream */ + free(zone); + strm->opaque = Z_NULL; + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; +} + +/* -- inflate test routines -- */ + +/* Decode a hexadecimal string, set *len to length, in[] to the bytes. This + decodes liberally, in that hex digits can be adjacent, in which case two in + a row writes a byte. Or they can be delimited by any non-hex character, + where the delimiters are ignored except when a single hex digit is followed + by a delimiter, where that single digit writes a byte. The returned data is + allocated and must eventually be freed. NULL is returned if out of memory. + If the length is not needed, then len can be NULL. */ +local unsigned char *h2b(const char *hex, unsigned *len) +{ + unsigned char *in, *re; + unsigned next, val; + + in = malloc((strlen(hex) + 1) >> 1); + if (in == NULL) + return NULL; + next = 0; + val = 1; + do { + if (*hex >= '0' && *hex <= '9') + val = (val << 4) + *hex - '0'; + else if (*hex >= 'A' && *hex <= 'F') + val = (val << 4) + *hex - 'A' + 10; + else if (*hex >= 'a' && *hex <= 'f') + val = (val << 4) + *hex - 'a' + 10; + else if (val != 1 && val < 32) /* one digit followed by delimiter */ + val += 240; /* make it look like two digits */ + if (val > 255) { /* have two digits */ + in[next++] = val & 0xff; /* save the decoded byte */ + val = 1; /* start over */ + } + } while (*hex++); /* go through the loop with the terminating null */ + if (len != NULL) + *len = next; + re = realloc(in, next); + return re == NULL ? in : re; +} + +/* generic inflate() run, where hex is the hexadecimal input data, what is the + text to include in an error message, step is how much input data to feed + inflate() on each call, or zero to feed it all, win is the window bits + parameter to inflateInit2(), len is the size of the output buffer, and err + is the error code expected from the first inflate() call (the second + inflate() call is expected to return Z_STREAM_END). If win is 47, then + header information is collected with inflateGetHeader(). If a zlib stream + is looking for a dictionary, then an empty dictionary is provided. + inflate() is run until all of the input data is consumed. */ +local void inf(char *hex, char *what, unsigned step, int win, unsigned len, + int err) +{ + int ret; + unsigned have; + unsigned char *in, *out; + z_stream strm, copy; + gz_header head; + + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, win); + if (ret != Z_OK) { + mem_done(&strm, what); + return; + } + out = malloc(len); assert(out != NULL); + if (win == 47) { + head.extra = out; + head.extra_max = len; + head.name = out; + head.name_max = len; + head.comment = out; + head.comm_max = len; + ret = inflateGetHeader(&strm, &head); assert(ret == Z_OK); + } + in = h2b(hex, &have); assert(in != NULL); + if (step == 0 || step > have) + step = have; + strm.avail_in = step; + have -= step; + strm.next_in = in; + do { + strm.avail_out = len; + strm.next_out = out; + ret = inflate(&strm, Z_NO_FLUSH); assert(err == 9 || ret == err); + if (ret != Z_OK && ret != Z_BUF_ERROR && ret != Z_NEED_DICT) + break; + if (ret == Z_NEED_DICT) { + ret = inflateSetDictionary(&strm, in, 1); + assert(ret == Z_DATA_ERROR); + mem_limit(&strm, 1); + ret = inflateSetDictionary(&strm, out, 0); + assert(ret == Z_MEM_ERROR); + mem_limit(&strm, 0); + ((struct inflate_state *)strm.state)->mode = DICT; + ret = inflateSetDictionary(&strm, out, 0); + assert(ret == Z_OK); + ret = inflate(&strm, Z_NO_FLUSH); assert(ret == Z_BUF_ERROR); + } + ret = inflateCopy(©, &strm); assert(ret == Z_OK); + ret = inflateEnd(©); assert(ret == Z_OK); + err = 9; /* don't care next time around */ + have += strm.avail_in; + strm.avail_in = step > have ? have : step; + have -= strm.avail_in; + } while (strm.avail_in); + free(in); + free(out); + ret = inflateReset2(&strm, -8); assert(ret == Z_OK); + ret = inflateEnd(&strm); assert(ret == Z_OK); + mem_done(&strm, what); +} + +/* cover all of the lines in inflate.c up to inflate() */ +local void cover_support(void) +{ + int ret; + z_stream strm; + + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit(&strm); assert(ret == Z_OK); + mem_used(&strm, "inflate init"); + ret = inflatePrime(&strm, 5, 31); assert(ret == Z_OK); + ret = inflatePrime(&strm, -1, 0); assert(ret == Z_OK); + ret = inflateSetDictionary(&strm, Z_NULL, 0); + assert(ret == Z_STREAM_ERROR); + ret = inflateEnd(&strm); assert(ret == Z_OK); + mem_done(&strm, "prime"); + + inf("63 0", "force window allocation", 0, -15, 1, Z_OK); + inf("63 18 5", "force window replacement", 0, -8, 259, Z_OK); + inf("63 18 68 30 d0 0 0", "force split window update", 4, -8, 259, Z_OK); + inf("3 0", "use fixed blocks", 0, -15, 1, Z_STREAM_END); + inf("", "bad window size", 0, 1, 0, Z_STREAM_ERROR); + + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit_(&strm, "!", (int)sizeof(z_stream)); + assert(ret == Z_VERSION_ERROR); + mem_done(&strm, "wrong version"); + + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit(&strm); assert(ret == Z_OK); + ret = inflateEnd(&strm); assert(ret == Z_OK); + fputs("inflate built-in memory routines\n", stderr); +} + +/* cover all inflate() header and trailer cases and code after inflate() */ +local void cover_wrap(void) +{ + int ret; + z_stream strm, copy; + unsigned char dict[257]; + + ret = inflate(Z_NULL, 0); assert(ret == Z_STREAM_ERROR); + ret = inflateEnd(Z_NULL); assert(ret == Z_STREAM_ERROR); + ret = inflateCopy(Z_NULL, Z_NULL); assert(ret == Z_STREAM_ERROR); + fputs("inflate bad parameters\n", stderr); + + inf("1f 8b 0 0", "bad gzip method", 0, 31, 0, Z_DATA_ERROR); + inf("1f 8b 8 80", "bad gzip flags", 0, 31, 0, Z_DATA_ERROR); + inf("77 85", "bad zlib method", 0, 15, 0, Z_DATA_ERROR); + inf("8 99", "set window size from header", 0, 0, 0, Z_OK); + inf("78 9c", "bad zlib window size", 0, 8, 0, Z_DATA_ERROR); + inf("78 9c 63 0 0 0 1 0 1", "check adler32", 0, 15, 1, Z_STREAM_END); + inf("1f 8b 8 1e 0 0 0 0 0 0 1 0 0 0 0 0 0", "bad header crc", 0, 47, 1, + Z_DATA_ERROR); + inf("1f 8b 8 2 0 0 0 0 0 0 1d 26 3 0 0 0 0 0 0 0 0 0", "check gzip length", + 0, 47, 0, Z_STREAM_END); + inf("78 90", "bad zlib header check", 0, 47, 0, Z_DATA_ERROR); + inf("8 b8 0 0 0 1", "need dictionary", 0, 8, 0, Z_NEED_DICT); + inf("78 9c 63 0", "compute adler32", 0, 15, 1, Z_OK); + + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -8); + strm.avail_in = 2; + strm.next_in = (void *)"\x63"; + strm.avail_out = 1; + strm.next_out = (void *)&ret; + mem_limit(&strm, 1); + ret = inflate(&strm, Z_NO_FLUSH); assert(ret == Z_MEM_ERROR); + ret = inflate(&strm, Z_NO_FLUSH); assert(ret == Z_MEM_ERROR); + mem_limit(&strm, 0); + memset(dict, 0, 257); + ret = inflateSetDictionary(&strm, dict, 257); + assert(ret == Z_OK); + mem_limit(&strm, (sizeof(struct inflate_state) << 1) + 256); + ret = inflatePrime(&strm, 16, 0); assert(ret == Z_OK); + strm.avail_in = 2; + strm.next_in = (void *)"\x80"; + ret = inflateSync(&strm); assert(ret == Z_DATA_ERROR); + ret = inflate(&strm, Z_NO_FLUSH); assert(ret == Z_STREAM_ERROR); + strm.avail_in = 4; + strm.next_in = (void *)"\0\0\xff\xff"; + ret = inflateSync(&strm); assert(ret == Z_OK); + (void)inflateSyncPoint(&strm); + ret = inflateCopy(©, &strm); assert(ret == Z_MEM_ERROR); + mem_limit(&strm, 0); + ret = inflateUndermine(&strm, 1); assert(ret == Z_DATA_ERROR); + (void)inflateMark(&strm); + ret = inflateEnd(&strm); assert(ret == Z_OK); + mem_done(&strm, "miscellaneous, force memory errors"); +} + +/* input and output functions for inflateBack() */ +local unsigned pull(void *desc, unsigned char **buf) +{ + static unsigned int next = 0; + static unsigned char dat[] = {0x63, 0, 2, 0}; + struct inflate_state *state; + + if (desc == Z_NULL) { + next = 0; + return 0; /* no input (already provided at next_in) */ + } + state = (void *)((z_stream *)desc)->state; + if (state != Z_NULL) + state->mode = SYNC; /* force an otherwise impossible situation */ + return next < sizeof(dat) ? (*buf = dat + next++, 1) : 0; +} + +local int push(void *desc, unsigned char *buf, unsigned len) +{ + (void)buf; + (void)len; + return desc != Z_NULL; /* force error if desc not null */ +} + +/* cover inflateBack() up to common deflate data cases and after those */ +local void cover_back(void) +{ + int ret; + z_stream strm; + unsigned char win[32768]; + + ret = inflateBackInit_(Z_NULL, 0, win, 0, 0); + assert(ret == Z_VERSION_ERROR); + ret = inflateBackInit(Z_NULL, 0, win); assert(ret == Z_STREAM_ERROR); + ret = inflateBack(Z_NULL, Z_NULL, Z_NULL, Z_NULL, Z_NULL); + assert(ret == Z_STREAM_ERROR); + ret = inflateBackEnd(Z_NULL); assert(ret == Z_STREAM_ERROR); + fputs("inflateBack bad parameters\n", stderr); + + mem_setup(&strm); + ret = inflateBackInit(&strm, 15, win); assert(ret == Z_OK); + strm.avail_in = 2; + strm.next_in = (void *)"\x03"; + ret = inflateBack(&strm, pull, Z_NULL, push, Z_NULL); + assert(ret == Z_STREAM_END); + /* force output error */ + strm.avail_in = 3; + strm.next_in = (void *)"\x63\x00"; + ret = inflateBack(&strm, pull, Z_NULL, push, &strm); + assert(ret == Z_BUF_ERROR); + /* force mode error by mucking with state */ + ret = inflateBack(&strm, pull, &strm, push, Z_NULL); + assert(ret == Z_STREAM_ERROR); + ret = inflateBackEnd(&strm); assert(ret == Z_OK); + mem_done(&strm, "inflateBack bad state"); + + ret = inflateBackInit(&strm, 15, win); assert(ret == Z_OK); + ret = inflateBackEnd(&strm); assert(ret == Z_OK); + fputs("inflateBack built-in memory routines\n", stderr); +} + +/* do a raw inflate of data in hexadecimal with both inflate and inflateBack */ +local int try(char *hex, char *id, int err) +{ + int ret; + unsigned len, size; + unsigned char *in, *out, *win; + char *prefix; + z_stream strm; + + /* convert to hex */ + in = h2b(hex, &len); + assert(in != NULL); + + /* allocate work areas */ + size = len << 3; + out = malloc(size); + assert(out != NULL); + win = malloc(32768); + assert(win != NULL); + prefix = malloc(strlen(id) + 6); + assert(prefix != NULL); + + /* first with inflate */ + strcpy(prefix, id); + strcat(prefix, "-late"); + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, err < 0 ? 47 : -15); + assert(ret == Z_OK); + strm.avail_in = len; + strm.next_in = in; + do { + strm.avail_out = size; + strm.next_out = out; + ret = inflate(&strm, Z_TREES); + assert(ret != Z_STREAM_ERROR && ret != Z_MEM_ERROR); + if (ret == Z_DATA_ERROR || ret == Z_NEED_DICT) + break; + } while (strm.avail_in || strm.avail_out == 0); + if (err) { + assert(ret == Z_DATA_ERROR); + assert(strcmp(id, strm.msg) == 0); + } + inflateEnd(&strm); + mem_done(&strm, prefix); + + /* then with inflateBack */ + if (err >= 0) { + strcpy(prefix, id); + strcat(prefix, "-back"); + mem_setup(&strm); + ret = inflateBackInit(&strm, 15, win); + assert(ret == Z_OK); + strm.avail_in = len; + strm.next_in = in; + ret = inflateBack(&strm, pull, Z_NULL, push, Z_NULL); + assert(ret != Z_STREAM_ERROR); + if (err) { + assert(ret == Z_DATA_ERROR); + assert(strcmp(id, strm.msg) == 0); + } + inflateBackEnd(&strm); + mem_done(&strm, prefix); + } + + /* clean up */ + free(prefix); + free(win); + free(out); + free(in); + return ret; +} + +/* cover deflate data cases in both inflate() and inflateBack() */ +local void cover_inflate(void) +{ + try("0 0 0 0 0", "invalid stored block lengths", 1); + try("3 0", "fixed", 0); + try("6", "invalid block type", 1); + try("1 1 0 fe ff 0", "stored", 0); + try("fc 0 0", "too many length or distance symbols", 1); + try("4 0 fe ff", "invalid code lengths set", 1); + try("4 0 24 49 0", "invalid bit length repeat", 1); + try("4 0 24 e9 ff ff", "invalid bit length repeat", 1); + try("4 0 24 e9 ff 6d", "invalid code -- missing end-of-block", 1); + try("4 80 49 92 24 49 92 24 71 ff ff 93 11 0", + "invalid literal/lengths set", 1); + try("4 80 49 92 24 49 92 24 f b4 ff ff c3 84", "invalid distances set", 1); + try("4 c0 81 8 0 0 0 0 20 7f eb b 0 0", "invalid literal/length code", 1); + try("2 7e ff ff", "invalid distance code", 1); + try("c c0 81 0 0 0 0 0 90 ff 6b 4 0", "invalid distance too far back", 1); + + /* also trailer mismatch just in inflate() */ + try("1f 8b 8 0 0 0 0 0 0 0 3 0 0 0 0 1", "incorrect data check", -1); + try("1f 8b 8 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 1", + "incorrect length check", -1); + try("5 c0 21 d 0 0 0 80 b0 fe 6d 2f 91 6c", "pull 17", 0); + try("5 e0 81 91 24 cb b2 2c 49 e2 f 2e 8b 9a 47 56 9f fb fe ec d2 ff 1f", + "long code", 0); + try("ed c0 1 1 0 0 0 40 20 ff 57 1b 42 2c 4f", "length extra", 0); + try("ed cf c1 b1 2c 47 10 c4 30 fa 6f 35 1d 1 82 59 3d fb be 2e 2a fc f c", + "long distance and extra", 0); + try("ed c0 81 0 0 0 0 80 a0 fd a9 17 a9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6", "window end", 0); + inf("2 8 20 80 0 3 0", "inflate_fast TYPE return", 0, -15, 258, + Z_STREAM_END); + inf("63 18 5 40 c 0", "window wrap", 3, -8, 300, Z_OK); +} + +/* cover remaining lines in inftrees.c */ +local void cover_trees(void) +{ + int ret; + unsigned bits; + unsigned short lens[16], work[16]; + code *next, table[ENOUGH_DISTS]; + + /* we need to call inflate_table() directly in order to manifest not- + enough errors, since zlib insures that enough is always enough */ + for (bits = 0; bits < 15; bits++) + lens[bits] = (unsigned short)(bits + 1); + lens[15] = 15; + next = table; + bits = 15; + ret = inflate_table(DISTS, lens, 16, &next, &bits, work); + assert(ret == 1); + next = table; + bits = 1; + ret = inflate_table(DISTS, lens, 16, &next, &bits, work); + assert(ret == 1); + fputs("inflate_table not enough errors\n", stderr); +} + +/* cover remaining inffast.c decoding and window copying */ +local void cover_fast(void) +{ + inf("e5 e0 81 ad 6d cb b2 2c c9 01 1e 59 63 ae 7d ee fb 4d fd b5 35 41 68" + " ff 7f 0f 0 0 0", "fast length extra bits", 0, -8, 258, Z_DATA_ERROR); + inf("25 fd 81 b5 6d 59 b6 6a 49 ea af 35 6 34 eb 8c b9 f6 b9 1e ef 67 49" + " 50 fe ff ff 3f 0 0", "fast distance extra bits", 0, -8, 258, + Z_DATA_ERROR); + inf("3 7e 0 0 0 0 0", "fast invalid distance code", 0, -8, 258, + Z_DATA_ERROR); + inf("1b 7 0 0 0 0 0", "fast invalid literal/length code", 0, -8, 258, + Z_DATA_ERROR); + inf("d c7 1 ae eb 38 c 4 41 a0 87 72 de df fb 1f b8 36 b1 38 5d ff ff 0", + "fast 2nd level codes and too far back", 0, -8, 258, Z_DATA_ERROR); + inf("63 18 5 8c 10 8 0 0 0 0", "very common case", 0, -8, 259, Z_OK); + inf("63 60 60 18 c9 0 8 18 18 18 26 c0 28 0 29 0 0 0", + "contiguous and wrap around window", 6, -8, 259, Z_OK); + inf("63 0 3 0 0 0 0 0", "copy direct from output", 0, -8, 259, + Z_STREAM_END); +} + +int main(void) +{ + fprintf(stderr, "%s\n", zlibVersion()); + cover_support(); + cover_wrap(); + cover_back(); + cover_inflate(); + cover_trees(); + cover_fast(); + return 0; +} diff --git a/src/deps/src/zlib/test/minigzip.c b/src/deps/src/zlib/test/minigzip.c new file mode 100644 index 000000000..134e10e6c --- /dev/null +++ b/src/deps/src/zlib/test/minigzip.c @@ -0,0 +1,579 @@ +/* minigzip.c -- simulate gzip using the zlib compression library + * Copyright (C) 1995-2006, 2010, 2011, 2016 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * minigzip is a minimal implementation of the gzip utility. This is + * only an example of using zlib and isn't meant to replace the + * full-featured gzip. No attempt is made to deal with file systems + * limiting names to 14 or 8+3 characters, etc... Error checking is + * very limited. So use minigzip only for testing; use gzip for the + * real thing. On MSDOS, use only on file names without extension + * or in pipe mode. + */ + +/* @(#) $Id$ */ + +#include "zlib.h" +#include + +#ifdef STDC +# include +# include +#endif + +#ifdef USE_MMAP +# include +# include +# include +#endif + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include +# include +# ifdef UNDER_CE +# include +# endif +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +#if defined(_MSC_VER) && _MSC_VER < 1900 +# define snprintf _snprintf +#endif + +#ifdef VMS +# define unlink delete +# define GZ_SUFFIX "-gz" +#endif +#ifdef RISCOS +# define unlink remove +# define GZ_SUFFIX "-gz" +# define fileno(file) file->__file +#endif +#if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include /* for fileno */ +#endif + +#if !defined(Z_HAVE_UNISTD_H) && !defined(_LARGEFILE64_SOURCE) +#ifndef WIN32 /* unlink already in stdio.h for WIN32 */ + extern int unlink(const char *); +#endif +#endif + +#if defined(UNDER_CE) +# include +# define perror(s) pwinerror(s) + +/* Map the Windows error number in ERROR to a locale-dependent error + message string and return a pointer to it. Typically, the values + for ERROR come from GetLastError. + + The string pointed to shall not be modified by the application, + but may be overwritten by a subsequent call to strwinerror + + The strwinerror function does not change the current setting + of GetLastError. */ + +static char *strwinerror (error) + DWORD error; +{ + static char buf[1024]; + + wchar_t *msgbuf; + DWORD lasterr = GetLastError(); + DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM + | FORMAT_MESSAGE_ALLOCATE_BUFFER, + NULL, + error, + 0, /* Default language */ + (LPVOID)&msgbuf, + 0, + NULL); + if (chars != 0) { + /* If there is an \r\n appended, zap it. */ + if (chars >= 2 + && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') { + chars -= 2; + msgbuf[chars] = 0; + } + + if (chars > sizeof (buf) - 1) { + chars = sizeof (buf) - 1; + msgbuf[chars] = 0; + } + + wcstombs(buf, msgbuf, chars + 1); + LocalFree(msgbuf); + } + else { + sprintf(buf, "unknown win32 error (%ld)", error); + } + + SetLastError(lasterr); + return buf; +} + +static void pwinerror (s) + const char *s; +{ + if (s && *s) + fprintf(stderr, "%s: %s\n", s, strwinerror(GetLastError ())); + else + fprintf(stderr, "%s\n", strwinerror(GetLastError ())); +} + +#endif /* UNDER_CE */ + +#ifndef GZ_SUFFIX +# define GZ_SUFFIX ".gz" +#endif +#define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1) + +#define BUFLEN 16384 +#define MAX_NAME_LEN 1024 + +#ifdef MAXSEG_64K +# define local static + /* Needed for systems with limitation on stack size. */ +#else +# define local +#endif + +#ifdef Z_SOLO +/* for Z_SOLO, create simplified gz* functions using deflate and inflate */ + +#if defined(Z_HAVE_UNISTD_H) || defined(Z_LARGE) +# include /* for unlink() */ +#endif + +static void *myalloc(void *q, unsigned n, unsigned m) { + (void)q; + return calloc(n, m); +} + +static void myfree(void *q, void *p) { + (void)q; + free(p); +} + +typedef struct gzFile_s { + FILE *file; + int write; + int err; + char *msg; + z_stream strm; +} *gzFile; + +static gzFile gz_open(const char *path, int fd, const char *mode) { + gzFile gz; + int ret; + + gz = malloc(sizeof(struct gzFile_s)); + if (gz == NULL) + return NULL; + gz->write = strchr(mode, 'w') != NULL; + gz->strm.zalloc = myalloc; + gz->strm.zfree = myfree; + gz->strm.opaque = Z_NULL; + if (gz->write) + ret = deflateInit2(&(gz->strm), -1, 8, 15 + 16, 8, 0); + else { + gz->strm.next_in = 0; + gz->strm.avail_in = Z_NULL; + ret = inflateInit2(&(gz->strm), 15 + 16); + } + if (ret != Z_OK) { + free(gz); + return NULL; + } + gz->file = path == NULL ? fdopen(fd, gz->write ? "wb" : "rb") : + fopen(path, gz->write ? "wb" : "rb"); + if (gz->file == NULL) { + gz->write ? deflateEnd(&(gz->strm)) : inflateEnd(&(gz->strm)); + free(gz); + return NULL; + } + gz->err = 0; + gz->msg = ""; + return gz; +} + +static gzFile gzopen(const char *path, const char *mode) { + return gz_open(path, -1, mode); +} + +static gzFile gzdopen(int fd, const char *mode) { + return gz_open(NULL, fd, mode); +} + +static int gzwrite(gzFile gz, const void *buf, unsigned len) { + z_stream *strm; + unsigned char out[BUFLEN]; + + if (gz == NULL || !gz->write) + return 0; + strm = &(gz->strm); + strm->next_in = (void *)buf; + strm->avail_in = len; + do { + strm->next_out = out; + strm->avail_out = BUFLEN; + (void)deflate(strm, Z_NO_FLUSH); + fwrite(out, 1, BUFLEN - strm->avail_out, gz->file); + } while (strm->avail_out == 0); + return len; +} + +static int gzread(gzFile gz, void *buf, unsigned len) { + int ret; + unsigned got; + unsigned char in[1]; + z_stream *strm; + + if (gz == NULL || gz->write) + return 0; + if (gz->err) + return 0; + strm = &(gz->strm); + strm->next_out = (void *)buf; + strm->avail_out = len; + do { + got = fread(in, 1, 1, gz->file); + if (got == 0) + break; + strm->next_in = in; + strm->avail_in = 1; + ret = inflate(strm, Z_NO_FLUSH); + if (ret == Z_DATA_ERROR) { + gz->err = Z_DATA_ERROR; + gz->msg = strm->msg; + return 0; + } + if (ret == Z_STREAM_END) + inflateReset(strm); + } while (strm->avail_out); + return len - strm->avail_out; +} + +static int gzclose(gzFile gz) { + z_stream *strm; + unsigned char out[BUFLEN]; + + if (gz == NULL) + return Z_STREAM_ERROR; + strm = &(gz->strm); + if (gz->write) { + strm->next_in = Z_NULL; + strm->avail_in = 0; + do { + strm->next_out = out; + strm->avail_out = BUFLEN; + (void)deflate(strm, Z_FINISH); + fwrite(out, 1, BUFLEN - strm->avail_out, gz->file); + } while (strm->avail_out == 0); + deflateEnd(strm); + } + else + inflateEnd(strm); + fclose(gz->file); + free(gz); + return Z_OK; +} + +static const char *gzerror(gzFile gz, int *err) { + *err = gz->err; + return gz->msg; +} + +#endif + +static char *prog; + +/* =========================================================================== + * Display error message and exit + */ +static void error(const char *msg) { + fprintf(stderr, "%s: %s\n", prog, msg); + exit(1); +} + +#ifdef USE_MMAP /* MMAP version, Miguel Albrecht */ + +/* Try compressing the input file at once using mmap. Return Z_OK if + * success, Z_ERRNO otherwise. + */ +static int gz_compress_mmap(FILE *in, gzFile out) { + int len; + int err; + int ifd = fileno(in); + caddr_t buf; /* mmap'ed buffer for the entire input file */ + off_t buf_len; /* length of the input file */ + struct stat sb; + + /* Determine the size of the file, needed for mmap: */ + if (fstat(ifd, &sb) < 0) return Z_ERRNO; + buf_len = sb.st_size; + if (buf_len <= 0) return Z_ERRNO; + + /* Now do the actual mmap: */ + buf = mmap((caddr_t) 0, buf_len, PROT_READ, MAP_SHARED, ifd, (off_t)0); + if (buf == (caddr_t)(-1)) return Z_ERRNO; + + /* Compress the whole file at once: */ + len = gzwrite(out, (char *)buf, (unsigned)buf_len); + + if (len != (int)buf_len) error(gzerror(out, &err)); + + munmap(buf, buf_len); + fclose(in); + if (gzclose(out) != Z_OK) error("failed gzclose"); + return Z_OK; +} +#endif /* USE_MMAP */ + +/* =========================================================================== + * Compress input to output then close both files. + */ + +static void gz_compress(FILE *in, gzFile out) { + local char buf[BUFLEN]; + int len; + int err; + +#ifdef USE_MMAP + /* Try first compressing with mmap. If mmap fails (minigzip used in a + * pipe), use the normal fread loop. + */ + if (gz_compress_mmap(in, out) == Z_OK) return; +#endif + for (;;) { + len = (int)fread(buf, 1, sizeof(buf), in); + if (ferror(in)) { + perror("fread"); + exit(1); + } + if (len == 0) break; + + if (gzwrite(out, buf, (unsigned)len) != len) error(gzerror(out, &err)); + } + fclose(in); + if (gzclose(out) != Z_OK) error("failed gzclose"); +} + +/* =========================================================================== + * Uncompress input to output then close both files. + */ +static void gz_uncompress(gzFile in, FILE *out) { + local char buf[BUFLEN]; + int len; + int err; + + for (;;) { + len = gzread(in, buf, sizeof(buf)); + if (len < 0) error (gzerror(in, &err)); + if (len == 0) break; + + if ((int)fwrite(buf, 1, (unsigned)len, out) != len) { + error("failed fwrite"); + } + } + if (fclose(out)) error("failed fclose"); + + if (gzclose(in) != Z_OK) error("failed gzclose"); +} + + +/* =========================================================================== + * Compress the given file: create a corresponding .gz file and remove the + * original. + */ +static void file_compress(char *file, char *mode) { + local char outfile[MAX_NAME_LEN]; + FILE *in; + gzFile out; + + if (strlen(file) + strlen(GZ_SUFFIX) >= sizeof(outfile)) { + fprintf(stderr, "%s: filename too long\n", prog); + exit(1); + } + +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(outfile, sizeof(outfile), "%s%s", file, GZ_SUFFIX); +#else + strcpy(outfile, file); + strcat(outfile, GZ_SUFFIX); +#endif + + in = fopen(file, "rb"); + if (in == NULL) { + perror(file); + exit(1); + } + out = gzopen(outfile, mode); + if (out == NULL) { + fprintf(stderr, "%s: can't gzopen %s\n", prog, outfile); + exit(1); + } + gz_compress(in, out); + + unlink(file); +} + + +/* =========================================================================== + * Uncompress the given file and remove the original. + */ +static void file_uncompress(char *file) { + local char buf[MAX_NAME_LEN]; + char *infile, *outfile; + FILE *out; + gzFile in; + z_size_t len = strlen(file); + + if (len + strlen(GZ_SUFFIX) >= sizeof(buf)) { + fprintf(stderr, "%s: filename too long\n", prog); + exit(1); + } + +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(buf, sizeof(buf), "%s", file); +#else + strcpy(buf, file); +#endif + + if (len > SUFFIX_LEN && strcmp(file+len-SUFFIX_LEN, GZ_SUFFIX) == 0) { + infile = file; + outfile = buf; + outfile[len-3] = '\0'; + } else { + outfile = file; + infile = buf; +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(buf + len, sizeof(buf) - len, "%s", GZ_SUFFIX); +#else + strcat(infile, GZ_SUFFIX); +#endif + } + in = gzopen(infile, "rb"); + if (in == NULL) { + fprintf(stderr, "%s: can't gzopen %s\n", prog, infile); + exit(1); + } + out = fopen(outfile, "wb"); + if (out == NULL) { + perror(file); + exit(1); + } + + gz_uncompress(in, out); + + unlink(infile); +} + + +/* =========================================================================== + * Usage: minigzip [-c] [-d] [-f] [-h] [-r] [-1 to -9] [files...] + * -c : write to standard output + * -d : decompress + * -f : compress with Z_FILTERED + * -h : compress with Z_HUFFMAN_ONLY + * -r : compress with Z_RLE + * -1 to -9 : compression level + */ + +int main(int argc, char *argv[]) { + int copyout = 0; + int uncompr = 0; + gzFile file; + char *bname, outmode[20]; + +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(outmode, sizeof(outmode), "%s", "wb6 "); +#else + strcpy(outmode, "wb6 "); +#endif + + prog = argv[0]; + bname = strrchr(argv[0], '/'); + if (bname) + bname++; + else + bname = argv[0]; + argc--, argv++; + + if (!strcmp(bname, "gunzip")) + uncompr = 1; + else if (!strcmp(bname, "zcat")) + copyout = uncompr = 1; + + while (argc > 0) { + if (strcmp(*argv, "-c") == 0) + copyout = 1; + else if (strcmp(*argv, "-d") == 0) + uncompr = 1; + else if (strcmp(*argv, "-f") == 0) + outmode[3] = 'f'; + else if (strcmp(*argv, "-h") == 0) + outmode[3] = 'h'; + else if (strcmp(*argv, "-r") == 0) + outmode[3] = 'R'; + else if ((*argv)[0] == '-' && (*argv)[1] >= '1' && (*argv)[1] <= '9' && + (*argv)[2] == 0) + outmode[2] = (*argv)[1]; + else + break; + argc--, argv++; + } + if (outmode[3] == ' ') + outmode[3] = 0; + if (argc == 0) { + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); + if (uncompr) { + file = gzdopen(fileno(stdin), "rb"); + if (file == NULL) error("can't gzdopen stdin"); + gz_uncompress(file, stdout); + } else { + file = gzdopen(fileno(stdout), outmode); + if (file == NULL) error("can't gzdopen stdout"); + gz_compress(stdin, file); + } + } else { + if (copyout) { + SET_BINARY_MODE(stdout); + } + do { + if (uncompr) { + if (copyout) { + file = gzopen(*argv, "rb"); + if (file == NULL) + fprintf(stderr, "%s: can't gzopen %s\n", prog, *argv); + else + gz_uncompress(file, stdout); + } else { + file_uncompress(*argv); + } + } else { + if (copyout) { + FILE * in = fopen(*argv, "rb"); + + if (in == NULL) { + perror(*argv); + } else { + file = gzdopen(fileno(stdout), outmode); + if (file == NULL) error("can't gzdopen stdout"); + + gz_compress(in, file); + } + + } else { + file_compress(*argv, outmode); + } + } + } while (argv++, --argc); + } + return 0; +}