NCBI C++ ToolKit
huffman.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 
2 /*-------------------------------------------------------------*/
3 /*--- Huffman coding low-level stuff ---*/
4 /*--- huffman.c ---*/
5 /*-------------------------------------------------------------*/
6 
7 /* ------------------------------------------------------------------
8  This file is part of bzip2/libbzip2, a program and library for
9  lossless, block-sorting data compression.
10 
11  bzip2/libbzip2 version 1.0.8 of 13 July 2019
12  Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
13 
14  Please read the WARNING, DISCLAIMER and PATENTS sections in the
15  README file.
16 
17  This program is released under the terms of the license contained
18  in the file LICENSE.
19  ------------------------------------------------------------------ */
20 
21 
22 #include "bzlib_private.h"
23 
24 /*---------------------------------------------------*/
25 #define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
26 #define DEPTHOF(zz1) ((zz1) & 0x000000ff)
27 #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
28 
29 #define ADDWEIGHTS(zw1,zw2) \
30  (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
31  (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
32 
33 #define UPHEAP(z) \
34 { \
35  Int32 zz, tmp; \
36  zz = z; tmp = heap[zz]; \
37  while (weight[tmp] < weight[heap[zz >> 1]]) { \
38  heap[zz] = heap[zz >> 1]; \
39  zz >>= 1; \
40  } \
41  heap[zz] = tmp; \
42 }
43 
44 #define DOWNHEAP(z) \
45 { \
46  Int32 zz, yy, tmp; \
47  zz = z; tmp = heap[zz]; \
48  while (True) { \
49  yy = zz << 1; \
50  if (yy > nHeap) break; \
51  if (yy < nHeap && \
52  weight[heap[yy+1]] < weight[heap[yy]]) \
53  yy++; \
54  if (weight[tmp] < weight[heap[yy]]) break; \
55  heap[zz] = heap[yy]; \
56  zz = yy; \
57  } \
58  heap[zz] = tmp; \
59 }
60 
61 
62 /*---------------------------------------------------*/
64  Int32 *freq,
65  Int32 alphaSize,
66  Int32 maxLen )
67 {
68  /*--
69  Nodes and heap entries run from 1. Entry 0
70  for both the heap and nodes is a sentinel.
71  --*/
72  Int32 nNodes, nHeap, n1, n2, i, j, k;
73  Bool tooLong;
74 
77  Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
78 
79  for (i = 0; i < alphaSize; i++)
80  weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
81 
82  while (True) {
83 
84  nNodes = alphaSize;
85  nHeap = 0;
86 
87  heap[0] = 0;
88  weight[0] = 0;
89  parent[0] = -2;
90 
91  for (i = 1; i <= alphaSize; i++) {
92  parent[i] = -1;
93  nHeap++;
94  heap[nHeap] = i;
95  UPHEAP(nHeap);
96  }
97 
98  AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
99 
100  while (nHeap > 1) {
101  n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
102  n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
103  nNodes++;
104  parent[n1] = parent[n2] = nNodes;
105  weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
106  parent[nNodes] = -1;
107  nHeap++;
108  heap[nHeap] = nNodes;
109  UPHEAP(nHeap);
110  }
111 
112  AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
113 
114  tooLong = False;
115  for (i = 1; i <= alphaSize; i++) {
116  j = 0;
117  k = i;
118  while (parent[k] >= 0) { k = parent[k]; j++; }
119  len[i-1] = j;
120  if (j > maxLen) tooLong = True;
121  }
122 
123  if (! tooLong) break;
124 
125  /* 17 Oct 04: keep-going condition for the following loop used
126  to be 'i < alphaSize', which missed the last element,
127  theoretically leading to the possibility of the compressor
128  looping. However, this count-scaling step is only needed if
129  one of the generated Huffman code words is longer than
130  maxLen, which up to and including version 1.0.2 was 20 bits,
131  which is extremely unlikely. In version 1.0.3 maxLen was
132  changed to 17 bits, which has minimal effect on compression
133  ratio, but does mean this scaling step is used from time to
134  time, enough to verify that it works.
135 
136  This means that bzip2-1.0.3 and later will only produce
137  Huffman codes with a maximum length of 17 bits. However, in
138  order to preserve backwards compatibility with bitstreams
139  produced by versions pre-1.0.3, the decompressor must still
140  handle lengths of up to 20. */
141 
142  for (i = 1; i <= alphaSize; i++) {
143  j = weight[i] >> 8;
144  j = 1 + (j / 2);
145  weight[i] = j << 8;
146  }
147  }
148 }
149 
150 
151 /*---------------------------------------------------*/
153  UChar *length,
154  Int32 minLen,
155  Int32 maxLen,
156  Int32 alphaSize )
157 {
158  Int32 n, vec, i;
159 
160  vec = 0;
161  for (n = minLen; n <= maxLen; n++) {
162  for (i = 0; i < alphaSize; i++)
163  if (length[i] == n) { code[i] = vec; vec++; };
164  vec <<= 1;
165  }
166 }
167 
168 
169 /*---------------------------------------------------*/
171  Int32 *base,
172  Int32 *perm,
173  UChar *length,
174  Int32 minLen,
175  Int32 maxLen,
176  Int32 alphaSize )
177 {
178  Int32 pp, i, j, vec;
179 
180  pp = 0;
181  for (i = minLen; i <= maxLen; i++)
182  for (j = 0; j < alphaSize; j++)
183  if (length[j] == i) { perm[pp] = j; pp++; };
184 
185  for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
186  for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
187 
188  for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
189 
190  for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
191  vec = 0;
192 
193  for (i = minLen; i <= maxLen; i++) {
194  vec += (base[i+1] - base[i]);
195  limit[i] = vec-1;
196  vec <<= 1;
197  }
198  for (i = minLen + 1; i <= maxLen; i++)
199  base[i] = ((limit[i-1] + 1) << 1) - base[i];
200 }
201 
202 
203 /*-------------------------------------------------------------*/
204 /*--- end huffman.c ---*/
205 /*-------------------------------------------------------------*/
unsigned char Bool
Definition: bzip2.c:162
#define False
Definition: bzip2.c:170
unsigned char UChar
Definition: bzip2.c:163
#define True
Definition: bzip2.c:169
int Int32
Definition: bzip2.c:164
#define BZ_MAX_CODE_LEN
#define AssertH(cond, errcode)
Definition: bzlib_private.h:59
#define BZ_MAX_ALPHA_SIZE
static int heap[2 *(256+1+29)+1]
void BZ2_hbCreateDecodeTables(Int32 *limit, Int32 *base, Int32 *perm, UChar *length, Int32 minLen, Int32 maxLen, Int32 alphaSize)
Definition: huffman.c:170
#define UPHEAP(z)
Definition: huffman.c:33
#define DOWNHEAP(z)
Definition: huffman.c:44
void BZ2_hbAssignCodes(Int32 *code, UChar *length, Int32 minLen, Int32 maxLen, Int32 alphaSize)
Definition: huffman.c:152
#define ADDWEIGHTS(zw1, zw2)
Definition: huffman.c:29
void BZ2_hbMakeCodeLengths(UChar *len, Int32 *freq, Int32 alphaSize, Int32 maxLen)
Definition: huffman.c:63
n font weight
int i
yy_size_t n
int len
Definition: inftrees.h:24
Modified on Fri Sep 20 14:57:14 2024 by modify_doxy.py rev. 669887