NCBI C++ ToolKit
ct_nlmzip_inflate.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: ct_nlmzip_inflate.cpp 94680 2021-08-30 13:02:50Z vakatov $ */
2 /*****************************************************************************
3 
4  Name: Nlmzip_inflate.c ur/compr/Nlmzip_inflate.c
5 
6  Description: Utility functions for uncompress data
7 
8  Author: Grisha Starchenko
9  InforMax, Inc.
10  Gaithersburg, USA.
11 
12  ***************************************************************************
13 
14  PUBLIC DOMAIN NOTICE
15  National Center for Biotechnology Information
16 
17  This software/database is a "United States Government Work" under the
18  terms of the United States Copyright Act. It was written as part of
19  the author's official duties as a United States Government employee
20  and thus cannot be copyrighted. This software/database is freely
21  available to the public for use. The National Library of Medicine and
22  the U.S. Government have not placed any restriction on its use or
23  reproduction.
24 
25  Although all reasonable efforts have been taken to ensure the accuracy
26  and reliability of the software and data, the NLM and the U.S.
27  Government do not and cannot warrant the performance or results that
28  may be obtained by using this software or data. The NLM and the U.S.
29  Government disclaim all warranties, express or implied, including
30  warranties of performance, merchantability or fitness for any
31  particular purpose.
32 
33  Please cite the author in any work or product based on this material.
34 
35  ***************************************************************************
36 
37  Entry Points:
38 
39  int
40  Nlmzip_inflate (void)
41 
42  Modification History:
43  11 Aug 1995 - grisha - original written
44 
45  Bugs and restriction on use:
46 
47  Notes:
48  1. Distance pointers never point before the beginning of the output
49  stream.
50 
51  2. Distance pointers can point back across blocks, up to 32k away.
52 
53  3. There is an implied maximum of 7 bits for the bit length table and
54  15 bits for the actual data.
55  4. If only one code exists, then it is encoded using one bit. (Zero
56  would be more efficient, but perhaps a little confusing.) If two
57  codes exist, they are coded using one bit each (0 and 1).
58 
59  5. There is no way of sending zero distance codes--a dummy must be
60  sent if there are none. (History: a pre 2.0 version of PKZIP would
61  store blocks with no distance codes, but this was discovered to be
62  too harsh a criterion.) Valid only for 1.93a. 2.04c does allow
63  zero distance codes, which is sent as one code of zero bits in
64  length.
65 
66  6. There are up to 286 literal/length codes. Code 256 represents the
67  end-of-block. Note however that the static length tree defines
68  288 codes just to fill out the Huffman codes. Codes 286 and 287
69  cannot be used though, since there is no length base or extra bits
70  defined for them. Similarly, there are up to 30 distance codes.
71  However, static trees define 32 codes (all 5 bits) to fill out the
72  Huffman codes, but the last two had better not show up in the data.
73 
74  7. Unzip can check dynamic Huffman blocks for complete code sets.
75  The exception is that a single code would not be complete (see #4).
76 
77  8. The five bits following the block type is really the number of
78  literal codes sent minus 257.
79 
80  9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
81  (1+6+6). Therefore, to output three times the length, you output
82  three codes (1+1+1), whereas to output four times the same length,
83  you only need two codes (1+3). Hmm.
84 
85  10. In the tree reconstruction algorithm, Code = Code + Increment
86  only if BitLength(i) is not zero. (Pretty obvious.)
87 
88  11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19)
89 
90  12. Note: length code 284 can represent 227-258, but length code 285
91  really is 258. The last length deserves its own, short code
92  since it gets used a lot in very redundant files. The length
93  258 is special since 258 - 3 (the min match length) is 255.
94 
95  13. The literal/length and distance code bit lengths are read as a
96  single stream of lengths. It is possible (and advantageous) for
97  a repeat code (16, 17, or 18) to go across the boundary between
98  the two sets of lengths.
99 
100 *****************************************************************************/
101 
102 #include <ncbi_pch.hpp>
103 #include "ct_nlmzip_i.h"
104 
106 
107 
108 /****************************************************************************/
109 /* DEFINES */
110 /****************************************************************************/
111 /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */
112 #define BMAX 16 /* maximum bit length of any code (16 for explode) */
113 #define N_MAX 288 /* maximum number of codes in any set */
114 
115 /* The Nlmzip_inflate algorithm uses a sliding 32K byte Nlmzip_window on the uncompressed
116  stream to find repeated byte strings. This is implemented here as a
117  circular buffer. The index is updated simply by incrementing and then
118  and'ing with 0x7fff (32K-1). It is left to other modules to supply
119  the 32K area. It is assumed to be usable as if it were declared
120  "uch Nlmzip_window[32768];" or as just "uch *Nlmzip_window;" and then malloc'ed
121  in the latter case. The definition must be in unzip.h, included above.
122 */
123 #define flush_output(w) (Nlmzip_outcnt=(w),Nlmzip_flush_window())
124 
125 /****************************************************************************/
126 /* STRUCT DEFINITIONS */
127 /****************************************************************************/
128 /* Huffman code lookup table entry--this entry is four bytes for machines
129  that have 16-bit pointers (e.g. PC's in the small or medium model).
130  Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16
131  means that v is a literal, 16 < e < 32 means that v is a pointer to
132  the next table, which codes e - 16 bits, and lastly e == 99 indicates
133  an unused code. If a code with e == 99 is looked up, this implies an
134  error in the data.
135 */
136 struct huft {
137  uch e; /* number of extra bits or operation */
138  uch b; /* number of bits in this code or subcode */
139  union {
140  ush n; /* literal, length base, or distance base */
141  struct huft *t; /* pointer to next Nlmzip_level of table */
142  } v;
143 };
144 
145 /****************************************************************************/
146 /* LOCAL FUNCTIONS PROTOTYPES */
147 /****************************************************************************/
148 
149 static int inflate_codes _((struct huft*,struct huft*,int,int));
150 static int huft_build _((
151  Uint4*,Uint4,Uint4,ush*,ush*,struct huft**,int*
152  ));
153 static int huft_free _((struct huft*));
154 static int inflate_stored _((void));
155 static int inflate_fixed _((void));
156 static int inflate_dynamic _((void));
157 static int inflate_block _((int*));
158 
159 /****************************************************************************/
160 /* LOCAL VARIABLES */
161 /****************************************************************************/
162 /* Tables for Nlmzip_deflate */
163 static Uint4 border[] = { /* Order of the bit length code lengths */
164  16, 17, 18, 0, 8, 7, 9, 6,
165  10, 5, 11, 4, 12, 3, 13, 2,
166  14, 1, 15
167  };
168 
169 static unsigned short cplens[] = { /* Copy lengths for literal 257..285 */
170  3, 4, 5, 6, 7, 8, 9, 10,
171  11, 13, 15, 17, 19, 23, 27, 31,
172  35, 43, 51, 59, 67, 83, 99, 115,
173  131, 163, 195, 227, 258, 0, 0
174  };
175 
176 /* note: see note #13 above about the 258 in this list. */
177 static ush cplext[] = { /* Extra bits for literal codes 257..285 */
178  0, 0, 0, 0, 0, 0, 0, 0,
179  1, 1, 1, 1, 2, 2, 2, 2,
180  3, 3, 3, 3, 4, 4, 4, 4,
181  5, 5, 5, 5, 0, 99, 99
182  }; /* 99==invalid */
183 
184 static ush cpdist[] = { /* Copy offsets for distance codes 0..29 */
185  1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
186  257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
187  8193, 12289, 16385, 24577
188  };
189 static ush cpdext[] = { /* Extra bits for distance codes */
190  0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
191  7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
192  12, 12, 13, 13
193  };
194 
195 /* Macros for Nlmzip_inflate() bit peeking and grabbing.
196  The usage is:
197 
198  NEEDBITS(j)
199  x = b & mask_bits[j];
200  DUMPBITS(j)
201 
202  where NEEDBITS makes sure that b has at least j bits in it, and
203  DUMPBITS removes the bits from b. The macros use the variable k
204  for the number of bits in b. Normally, b and k are register
205  variables for speed, and are initialized at the beginning of a
206  routine that uses these macros from a global bit buffer and count.
207 
208  If we assume that EOB will be the longest code, then we will never
209  ask for bits with NEEDBITS that are beyond the end of the stream.
210  So, NEEDBITS should not read any more bytes than are needed to
211  meet the request. Then no bytes need to be "returned" to the buffer
212  at the end of the last block.
213 
214  However, this assumption is not true for fixed blocks--the EOB code
215  is 7 bits, but the other literal/length codes can be 8 or 9 bits.
216  (The EOB code is shorter than other codes because fixed blocks are
217  generally short. So, while a block always has an EOB, many other
218  literal/length codes have a significantly lower probability of
219  showing up at all.) However, by making the first table have a
220  lookup of seven bits, the EOB code will be found in that first
221  lookup, and so will not require that too many bits be pulled from
222  the stream.
223 */
224 static ulg bb; /* bit buffer */
225 static Uint4 bk; /* bits in bit buffer */
226 static unsigned short mask_bits[] = {
227  0x0000,
228  0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
229  0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
230 };
231 
232 #define NEEDBITS(n) { \
233  while ( k < (n) ) { \
234  b |= ((ulg)Nlmzip_ReadByte()) << k; \
235  k+=8; \
236  } \
237  }
238 #define DUMPBITS(n) { \
239  b >>= (n); \
240  k -= (n); \
241  }
242 
243 /* Huffman code decoding is performed using a multi-Nlmzip_level table lookup.
244  The fastest way to decode is to simply build a lookup table whose
245  size is determined by the longest code. However, the time it takes
246  to build this table can also be a factor if the data being decoded
247  is not very long. The most common codes are necessarily the
248  shortest codes, so those codes dominate the decoding time, and hence
249  the speed. The idea is you can have a shorter table that decodes the
250  shorter, more probable codes, and then point to subsidiary tables for
251  the longer codes. The time it costs to decode the longer codes is
252  then traded against the time it takes to make longer tables.
253 
254  This results of this trade are in the variables lbits and dbits
255  below. lbits is the number of bits the first Nlmzip_level table for literal/
256  length codes can decode in one step, and dbits is the same thing for
257  the distance codes. Subsequent tables are also less than or equal to
258  those sizes. These values may be adjusted either when all of the
259  codes are shorter than that, in which case the longest code length in
260  bits is used, or when the shortest code is *longer* than the requested
261  table size, in which case the length of the shortest code in bits is
262  used.
263 
264  There are two different values for the two tables, since they code a
265  different number of possibilities each. The literal/length table
266  codes 286 possible values, or in a flat code, a little over eight
267  bits. The distance table codes 30 possible values, or a little less
268  than five bits, flat. The optimum values for speed end up being
269  about one bit more than those, so lbits is 8+1 and dbits is 5+1.
270  The optimum values may differ though from machine to machine, and
271  possibly even between compilers. Your mileage may vary.
272 */
273 static int lbits = 9; /* bits in base literal/length lookup table */
274 static int dbits = 6; /* bits in base distance lookup table */
275 
276 /* track memory usage */
277 static Uint4 hufts;
278 
279 /****************************************************************************/
280 /* LOCAL FUNCTIONS */
281 /****************************************************************************/
282 
283 /****************************************************************************/
284 /*.doc huft_build (internal) */
285 /*+
286  Given a list of code lengths and a maximum table size, make a set of
287  tables to decode that set of codes. Return zero on success, one if
288  the given code set is incomplete (the tables are still built in this
289  case), two if the input is invalid (all zero length codes or an
290  oversubscribed set of lengths), and three if not enough memory.
291 -*/
292 /****************************************************************************/
293 static int
294 huft_build ( /*FCN*/
295  Uint4* b, /* code lengths in bits (all assumed <= BMAX) */
296  Uint4 n, /* number of codes (assumed <= N_MAX) */
297  Uint4 s, /* number of simple-valued codes (0..s-1) */
298  unsigned short* d, /* list of base values for non-simple codes */
299  unsigned short* e, /* list of extra bits for non-simple codes */
300  struct huft** t, /* result: starting table */
301  int* m /* maximum lookup bits, returns actual */
302 ){
303  Uint4 a; /* counter for codes of length k */
304  Uint4 c[BMAX+1]; /* bit length count table */
305  Uint4 f; /* i repeats in table every f entries */
306  int g; /* maximum code length */
307  int h; /* table Nlmzip_level */
308  Uint4 i; /* counter, current code */
309  Uint4 j; /* counter */
310  int k; /* number of bits in current code */
311  int l; /* bits per table (returned in m) */
312  Uint4 *p; /* pointer into c[], b[], or v[] */
313  struct huft *q; /* points to current table */
314  struct huft r; /* table entry for structure assignment */
315  struct huft *u[BMAX]; /* table stack */
316  Uint4 v[N_MAX]; /* values in order of bit length */
317  int w; /* bits before this table == (l * h) */
318  Uint4 x[BMAX+1]; /* bit offsets, then code stack */
319  Uint4 *xp; /* pointer into x */
320  int y; /* number of dummy codes added */
321  Uint4 z; /* number of entries in current table */
322 
323 
324  /* Generate counts for each bit length */
325  memset(c, 0, sizeof(c));
326  p = b; i = n;
327  do {
328  c[*p]++; /* assume all entries <= BMAX */
329  p++; /* Can't combine with above line (Solaris bug) */
330  } while (--i);
331 
332  if ( c[0] == n ) { /* null input--all zero length codes */
333  *t = (struct huft *)NULL;
334  *m = 0;
335  return 0;
336  }
337 
338  /* Find minimum and maximum length, bound *m by those */
339  l = *m;
340  for ( j = 1; j <= BMAX; j++ ) {
341  if ( c[j] ) {
342  break;
343  }
344  }
345  if ( (Uint4)l < (Uint4)(k = j) ) { /* minimum code length */
346  l = j;
347  }
348  for ( i = BMAX; i; i-- ) {
349  if ( c[i] ) {
350  break;
351  }
352  }
353  if ( (Uint4)l > (Uint4)(g = i) ) { /* maximum code length */
354  l = i;
355  }
356  *m = l;
357 
358 
359  /* Adjust last length count to fill out codes, if needed */
360  for (y = 1 << j; j < i; j++, y <<= 1)
361  if ((y -= c[j]) < 0)
362  return 2; /* bad input: more codes than bits */
363  if ((y -= c[i]) < 0)
364  return 2;
365  c[i] += y;
366 
367 
368  /* Generate starting offsets into the value table for each length */
369  x[1] = j = 0;
370  p = c + 1; xp = x + 2;
371  while (--i) { /* note that i == g from above */
372  *xp++ = (j += *p++);
373  }
374 
375 
376  /* Make a table of values in order of bit lengths */
377  p = b; i = 0;
378  do {
379  if ((j = *p++) != 0)
380  v[x[j]++] = i;
381  } while (++i < n);
382 
383 
384  /* Generate the Huffman codes and for each, make the table entries */
385  x[0] = i = 0; /* first Huffman code is zero */
386  p = v; /* grab values in bit order */
387  h = -1; /* no tables yet--Nlmzip_level -1 */
388  w = -l; /* bits decoded == (l * h) */
389  u[0] = (struct huft *)NULL; /* just to keep compilers happy */
390  q = (struct huft *)NULL; /* ditto */
391  z = 0; /* ditto */
392 
393  /* go through the bit lengths (k already is bits in shortest code) */
394  for (; k <= g; k++)
395  {
396  a = c[k];
397  while (a--)
398  {
399  /* here i is the Huffman code of length k bits for value *p */
400  /* make tables up to required Nlmzip_level */
401  while (k > w + l)
402  {
403  h++;
404  w += l; /* previous table always l bits */
405 
406  /* compute minimum size table less than or equal to l bits */
407  z = (z = g - w) > (Uint4)l ? l : z; /* upper limit on table size */
408  if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */
409  { /* too few codes for k-w bit table */
410  f -= a + 1; /* deduct codes from patterns left */
411  xp = c + k;
412  while (++j < z) /* try smaller tables up to z bits */
413  {
414  if ((f <<= 1) <= *++xp)
415  break; /* enough codes to use up j bits */
416  f -= *xp; /* else deduct codes from patterns */
417  }
418  }
419  z = 1 << j; /* table entries for j-bit table */
420 
421  /* allocate and link in new table */
422  if ((q = (struct huft *)malloc((z + 1)*sizeof(struct huft))) ==
423  (struct huft *)NULL)
424  {
425  if (h)
426  huft_free(u[0]);
427  return 3; /* not enough memory */
428  }
429  hufts += z + 1; /* track memory usage */
430  *t = q + 1; /* link to list for huft_free() */
431  *(t = &(q->v.t)) = (struct huft *)NULL;
432  u[h] = ++q; /* table starts after link */
433 
434  /* connect to last table, if there is one */
435  if (h)
436  {
437  x[h] = i; /* save pattern for backing up */
438  r.b = (uch)l; /* bits to dump before this table */
439  r.e = (uch)(16 + j); /* bits in this table */
440  r.v.t = q; /* pointer to this table */
441  j = i >> (w - l); /* (get around Turbo C bug) */
442  u[h-1][j] = r; /* connect to last table */
443  }
444  }
445 
446  /* set up table entry in r */
447  r.b = (uch)(k - w);
448  if (p >= v + n)
449  r.e = 99; /* out of values--invalid code */
450  else if (*p < s)
451  {
452  r.e = (uch)(*p < 256 ? 16 : 15); /* 256 is end-of-block code */
453  r.v.n = (ush)(*p); /* simple code is just the value */
454  p++; /* one compiler does not like *p++ */
455  }
456  else
457  {
458  r.e = (uch)e[*p - s]; /* non-simple--look up in lists */
459  r.v.n = d[*p++ - s];
460  }
461 
462  /* fill code-like entries with r */
463  f = 1 << (k - w);
464  for (j = i >> w; j < z; j += f)
465  q[j] = r;
466 
467  /* backwards increment the k-bit code i */
468  for (j = 1 << (k - 1); i & j; j >>= 1)
469  i ^= j;
470  i ^= j;
471 
472  /* backup over finished tables */
473  while ((i & ((1 << w) - 1)) != x[h])
474  {
475  h--; /* don't need to update q */
476  w -= l;
477  }
478  }
479  }
480 
481  /* Return true (1) if we were given an incomplete table */
482  return y != 0 && g != 1;
483 } /* huft_build() */
484 
485 /****************************************************************************/
486 /*.doc huft_free (internal) */
487 /*+
488  Free the malloc'ed tables built by huft_build(), which makes a linked
489  list of the tables it made, with the links in a dummy first entry of
490  each table.
491 -*/
492 /****************************************************************************/
493 static int
494 huft_free ( /*FCN*/
495  struct huft* t /* table to free */
496 ){
497  struct huft* p;
498  struct huft* q;
499 
500  /* Go through linked list, freeing from the malloced (t[-1]) address. */
501  p = t;
502  while ( p != NULL ) {
503  q = (--p)->v.t;
504  free((char*)p);
505  p = q;
506  }
507  return 0;
508 }
509 
510 /****************************************************************************/
511 /*.doc inflate_codes (internal) */
512 /*+
513  Inflate (decompress) the codes in a deflated (compressed) block.
514  Return an error code or zero if it all goes ok.
515 -*/
516 /****************************************************************************/
517 static int
518 inflate_codes ( /*FCN*/
519  struct huft *tl, /* literal/length and distance decoder tables */
520  struct huft *td,
521  int bl, /* number of bits decoded by tl[] and td[] */
522  int bd
523 ){
524  Uint4 e; /* table entry flag/number of extra bits */
525  Uint4 n, d; /* length and index for copy */
526  Uint4 w; /* current Nlmzip_window position */
527  struct huft *t; /* pointer to table entry */
528  Uint4 ml, md; /* masks for bl and bd bits */
529  ulg b; /* bit buffer */
530  Uint4 k; /* number of bits in bit buffer */
531 
532 
533  /* make local copies of globals */
534  b = bb; /* initialize bit buffer */
535  k = bk;
536  w = Nlmzip_outcnt; /* initialize Nlmzip_window position */
537 
538  /* Nlmzip_inflate the coded data */
539  ml = mask_bits[bl]; /* precompute masks for speed */
540  md = mask_bits[bd];
541  for (;;) { /* do until end of block */
542  NEEDBITS((Uint4)bl)
543  if ( (e = (t = tl + ((Uint4)b & ml))->e) > 16 ) {
544  do {
545  if ( e == 99 ) {
546  return 1;
547  }
548  DUMPBITS(t->b)
549  e -= 16;
550  NEEDBITS(e)
551  } while (
552  (e = (t = t->v.t + ((Uint4)b & mask_bits[e]))->e) > 16
553  );
554  }
555  DUMPBITS(t->b)
556  if ( e == 16 ) { /* then it's a literal */
557  Nlmzip_window[w++] = (uch)t->v.n;
558  if ( w == WSIZE ) {
559  flush_output(w);
560  w = 0;
561  }
562  } else { /* it's an EOB or a length */
563  /* exit if end of block */
564  if ( e == 15 ) {
565  break;
566  }
567 
568  /* get length of block to copy */
569  NEEDBITS(e)
570  n = t->v.n + ((Uint4)b & mask_bits[e]);
571  DUMPBITS(e);
572 
573  /* decode distance of block to copy */
574  NEEDBITS((Uint4)bd)
575  if ( (e = (t = td + ((Uint4)b & md))->e) > 16 ) {
576  do {
577  if ( e == 99 ) {
578  return 1;
579  }
580  DUMPBITS(t->b)
581  e -= 16;
582  NEEDBITS(e)
583  } while (
584  (e = (t = t->v.t + ((Uint4)b & mask_bits[e]))->e) > 16
585  );
586  }
587  DUMPBITS(t->b)
588  NEEDBITS(e)
589  d = w - t->v.n - ((Uint4)b & mask_bits[e]);
590  DUMPBITS(e)
591 
592  /* do the copy */
593  do {
594  n -= (e = (
595  e = WSIZE - ((d &= WSIZE-1) > w ? d : w)
596  ) > n ? n : e
597  );
598  /* (this test assumes unsigned comparison) */
599  if ( w - d >= e ) {
600  memcpy(Nlmzip_window + w, Nlmzip_window + d, e);
601  w += e;
602  d += e;
603  } else { /* do it slow to avoid memcpy() overlap */
604  do {
605  Nlmzip_window[w++] = Nlmzip_window[d++];
606  } while (--e);
607  }
608  if ( w == WSIZE ) {
609  flush_output(w);
610  w = 0;
611  }
612  } while (n);
613  }
614  }
615 
616 
617  /* restore the globals from the locals */
618  Nlmzip_outcnt = w; /* restore global Nlmzip_window pointer */
619  bb = b; /* restore global bit buffer */
620  bk = k;
621 
622  return 0; /* return to caller */
623 }
624 
625 /****************************************************************************/
626 /*.doc inflate_stored (internal) */
627 /*+
628  "decompress" an inflated type 0 (stored) block.
629 -*/
630 /****************************************************************************/
631 static int
632 inflate_stored (void) /*FCN*/
633 {
634  Uint4 n; /* number of bytes in block */
635  Uint4 w; /* current Nlmzip_window position */
636  ulg b; /* bit buffer */
637  Uint4 k; /* number of bits in bit buffer */
638 
639 
640  /* make local copies of globals */
641  b = bb; /* initialize bit buffer */
642  k = bk;
643  w = Nlmzip_outcnt; /* initialize Nlmzip_window position */
644 
645 
646  /* go to byte boundary */
647  n = k & 7;
648  DUMPBITS(n);
649 
650  /* get the length and its complement */
651  NEEDBITS(16)
652  n = ((Uint4)b & 0xffff);
653  DUMPBITS(16)
654 
655  NEEDBITS(16)
656  if ( n != (Uint4)((~b) & 0xffff) ) {
657  URCOMPRERR("error in compressed data");
658  /* never be here */
659  }
660  DUMPBITS(16)
661 
662  /* read and output the compressed data */
663  while ( n-- ) {
664  NEEDBITS(8)
665  Nlmzip_window[w++] = (uch)b;
666  if ( w == WSIZE ) {
667  flush_output(w);
668  w = 0;
669  }
670  DUMPBITS(8)
671  }
672 
673  /* restore the globals from the locals */
674  Nlmzip_outcnt = w; /* restore global Nlmzip_window pointer */
675  bb = b; /* restore global bit buffer */
676  bk = k;
677 
678  return 0;
679 }
680 
681 /****************************************************************************/
682 /*.doc inflate_fixed (internal) */
683 /*+
684  Decompress an inflated type 1 (fixed Huffman codes) block. We should
685  either replace this with a custom decoder, or at least precompute the
686  Huffman tables.
687 -*/
688 /****************************************************************************/
689 static int
690 inflate_fixed (void) /*FCN*/
691 {
692  int i; /* temporary variable */
693  struct huft *tl; /* literal/length code table */
694  struct huft *td; /* distance code table */
695  int bl; /* lookup bits for tl */
696  int bd; /* lookup bits for td */
697  Uint4 l[288]; /* length list for huft_build */
698 
699 
700  /* set up literal table */
701  for ( i = 0; i < 144; i++ ) {
702  l[i] = 8;
703  }
704  for (; i < 256; i++) {
705  l[i] = 9;
706  }
707  for (; i < 280; i++) {
708  l[i] = 7;
709  }
710  for (; i < 288; i++) { /* make a complete, but wrong code set */
711  l[i] = 8;
712  }
713 
714  bl = 7;
715  if ( (i = huft_build(l, 288, 257, cplens, cplext, &tl, &bl)) != 0 ) {
716  return i;
717  }
718 
719  /* set up distance table */
720  for ( i = 0; i < 30; i++ ) { /* make an incomplete code set */
721  l[i] = 5;
722  }
723  bd = 5;
724  if ( (i = huft_build(l, 30, 0, cpdist, cpdext, &td, &bd)) > 1 ) {
725  huft_free(tl);
726  return i;
727  }
728 
729  /* decompress until an end-of-block code */
730  if ( inflate_codes(tl, td, bl, bd) ) {
731  return 1;
732  }
733 
734  /* free the decoding tables, return */
735  huft_free(tl);
736  huft_free(td);
737 
738  return 0; /* all okay. Return to caller */
739 }
740 
741 /****************************************************************************/
742 /*.doc inflate_dynamic (internal) */
743 /*+
744  Decompress an inflated type 2 (dynamic Huffman codes) block.
745 -*/
746 /****************************************************************************/
747 static int
748 inflate_dynamic (void) /*FCN*/
749 {
750  int i; /* temporary variables */
751  Uint4 j;
752  Uint4 l; /* last length */
753  Uint4 m; /* mask for bit lengths table */
754  Uint4 n; /* number of lengths to get */
755  struct huft *tl; /* literal/length code table */
756  struct huft *td; /* distance code table */
757  int bl; /* lookup bits for tl */
758  int bd; /* lookup bits for td */
759  Uint4 nb; /* number of bit length codes */
760  Uint4 nl; /* number of literal/length codes */
761  Uint4 nd; /* number of distance codes */
762  Uint4 ll[286+30]; /* literal/length and distance code lengths */
763  ulg b; /* bit buffer */
764  Uint4 k; /* number of bits in bit buffer */
765 
766  /* make local bit buffer */
767  b = bb;
768  k = bk;
769 
770  /* read in table lengths */
771  NEEDBITS(5)
772  nl = 257 + ((Uint4)b & 0x1f); /* number of literal/length codes */
773  DUMPBITS(5)
774 
775  NEEDBITS(5)
776  nd = 1 + ((Uint4)b & 0x1f); /* number of distance codes */
777  DUMPBITS(5)
778 
779  NEEDBITS(4)
780  nb = 4 + ((Uint4)b & 0xf); /* number of bit length codes */
781  DUMPBITS(4)
782 
783  if ( nl > 286 || nd > 30 ) {
784  URCOMPRERR("Bad lengths");
785  /* never be here */
786  }
787 
788  /* read in bit-length-code lengths */
789  for (j = 0; j < nb; j++) {
790  NEEDBITS(3)
791  ll[border[j]] = (Uint4)b & 7;
792  DUMPBITS(3)
793  }
794  for (; j < 19; j++ ) {
795  ll[border[j]] = 0;
796  }
797 
798  /* build decoding table for trees--single Nlmzip_level, 7 bit lookup */
799  bl = 7;
800  if ( (i = huft_build(ll, 19, 19, NULL, NULL, &tl, &bl)) != 0 ) {
801  if ( i == 1 ) {
802  huft_free(tl);
803  }
804  return i; /* incomplete code set */
805  }
806 
807  /* read in literal and distance code lengths */
808  n = nl + nd;
809  m = mask_bits[bl];
810  i = l = 0;
811  while ( (Uint4)i < n ) {
812  NEEDBITS((Uint4)bl)
813  j = (td = tl + ((Uint4)b & m))->b;
814  DUMPBITS(j)
815  j = td->v.n;
816  if ( j < 16 ) { /* length of code in bits (0..15) */
817  ll[i++] = l = j; /* save last length in l */
818  } else if (j == 16) { /* repeat last length 3 to 6 times */
819  NEEDBITS(2)
820  j = 3 + ((Uint4)b & 3);
821  DUMPBITS(2)
822  if ( (Uint4)i + j > n ) {
823  return 1;
824  }
825  while ( j-- ) {
826  ll[i++] = l;
827  }
828  } else if (j == 17) { /* 3 to 10 zero length codes */
829  NEEDBITS(3)
830  j = 3 + ((Uint4)b & 7);
831  DUMPBITS(3)
832  if ( (Uint4)i + j > n ) {
833  return 1;
834  }
835  while ( j-- ) {
836  ll[i++] = 0;
837  }
838  l = 0;
839  } else { /* j == 18: 11 to 138 zero length codes */
840  NEEDBITS(7)
841  j = 11 + ((Uint4)b & 0x7f);
842  DUMPBITS(7)
843  if ( (Uint4)i + j > n ) {
844  return 1;
845  }
846  while ( j-- ) {
847  ll[i++] = 0;
848  }
849  l = 0;
850  }
851  }
852 
853  /* free decoding table for trees */
854  huft_free(tl);
855 
856  /* restore the global bit buffer */
857  bb = b;
858  bk = k;
859 
860  /* build the decoding tables for literal/length and distance codes */
861  bl = lbits;
862  if ( (i = huft_build(ll, nl, 257, cplens, cplext, &tl, &bl)) != 0 ) {
863  if ( i == 1 ) {
864  huft_free(tl);
865  URCOMPRERR("Incomplete literal tree");
866  /* never be here */
867  }
868  return i; /* incomplete code set */
869  }
870 
871  bd = dbits;
872  if ( (i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0 ) {
873  if ( i == 1 ) {
874  huft_free(td);
875  huft_free(tl);
876  URCOMPRERR("Incomplete distance tree");
877  /* never be here */
878  }
879  huft_free(tl);
880  return i; /* incomplete code set */
881  }
882 
883  /* decompress until an end-of-block code */
884  if ( inflate_codes(tl, td, bl, bd) ) {
885  return 1;
886  }
887 
888  /* free the decoding tables, return */
889  huft_free(tl);
890  huft_free(td);
891  return 0;
892 }
893 
894 /****************************************************************************/
895 /*.doc inflate_block (internal) */
896 /*+
897  This function decompress an inflated block
898 -*/
899 /****************************************************************************/
900 static int
901 inflate_block ( /*FCN*/
902  int *e /* last block flag */
903 ){
904  Uint4 t; /* block type */
905  ulg b; /* bit buffer */
906  Uint4 k; /* number of bits in bit buffer */
907 
908  b = bb; /* make local bit buffer */
909  k = bk;
910 
911  NEEDBITS(1) /* read in last block bit */
912  *e = (int)b & 1;
913  DUMPBITS(1)
914 
915  NEEDBITS(2) /* read in block type */
916  t = (Uint4)b & 3;
917  DUMPBITS(2)
918 
919  bb = b; /* restore the global bit buffer */
920  bk = k;
921 
922  /* Nlmzip_inflate that block type */
923  if ( t == 2 ) {
924  return inflate_dynamic();
925  }
926  if ( t == 0 ) {
927  return inflate_stored();
928  }
929  if ( t == 1 ) {
930  return inflate_fixed();
931  }
932 
933  URCOMPRERR("Bad block type");
934  return 2; /* never be here */
935 }
936 
937 /****************************************************************************/
938 /* GLOBAL FUNCTIONS */
939 /****************************************************************************/
940 
941 /****************************************************************************/
942 /*.doc Nlmzip_inflate (internal) */
943 /*+
944  Decompress an inflated entry
945 -*/
946 /****************************************************************************/
947 int
948 Nlmzip_inflate (void) /*FCN*/
949 {
950  int e; /* last block flag */
951  int r; /* result code */
952  Uint4 h; /* maximum struct huft's malloc'ed */
953 
954  /* initialize Nlmzip_window, bit buffer */
955  Nlmzip_outcnt = 0;
956  bk = 0;
957  bb = 0;
958 
959  /* decompress until the last block */
960  h = 0;
961  do
962  {
963  hufts = 0;
964  if ( (r = inflate_block(&e)) != 0 )
965  return r;
966  if ( hufts > h )
967  h = hufts;
968  }
969  while ( !e );
970 
971  /*
972  * Undo too much lookahead. The next read will be byte aligned
973  * so we can discard unused bits in the last meaningful byte.
974  */
975  while ( bk >= 8 )
976  {
977  bk -= 8;
978  Nlmzip_ReadUndo ();
979  }
980 
981  flush_output(Nlmzip_outcnt); /* flush out Nlmzip_window */
982 
983  return 0; /* return success */
984 }
985 
986 
void Nlmzip_ReadUndo(void)
Uint4 Nlmzip_outcnt
unsigned char Nlmzip_window[2L *0x8000]
#define WSIZE
Definition: ct_nlmzip_i.h:132
#define URCOMPRERR(x)
Definition: ct_nlmzip_i.h:180
#define _(proto)
Definition: ct_nlmzip_i.h:78
#define BMAX
#define N_MAX
static int inflate_codes(struct huft *, struct huft *, int, int)
static Uint4 border[]
int Nlmzip_inflate(void)
static Uint4 bk
static int lbits
static unsigned short mask_bits[]
static Uint4 hufts
static ush cpdext[]
static int inflate_dynamic(void)
static ulg bb
#define DUMPBITS(n)
static int inflate_fixed(void)
static unsigned short cplens[]
static int huft_free(struct huft *)
static int huft_build(Uint4 *, Uint4, Uint4, ush *, ush *, struct huft **, int *)
static int inflate_block(int *)
static ush cpdist[]
#define NEEDBITS(n)
#define flush_output(w)
static ush cplext[]
static int dbits
static int inflate_stored(void)
static FILE * f
Definition: readconf.c:23
#define NULL
Definition: ncbistd.hpp:225
#define BEGIN_CTRANSITION_SCOPE
Definition: ncbilcl.hpp:49
#define END_CTRANSITION_SCOPE
Definition: ncbilcl.hpp:50
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n td
int i
if(yy_accept[yy_current_state])
yy_size_t n
unsigned int a
Definition: ncbi_localip.c:102
EIPRangeType t
Definition: ncbi_localip.c:101
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
union huft::@1014 v
struct huft * t
#define uch
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
void free(voidpf ptr)
voidp malloc(uInt size)
unsigned short ush
Definition: zutil.h:41
unsigned long ulg
Definition: zutil.h:43
unsigned char uch
Definition: zutil.h:39
Modified on Fri Sep 20 14:57:56 2024 by modify_doxy.py rev. 669887