NCBI C++ ToolKit
ct_nlmzip_streamprocs.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: ct_nlmzip_streamprocs.cpp 92643 2021-02-02 19:30:01Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information (NCBI)
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government do not place any restriction on its use or reproduction.
13  * We would, however, appreciate having the NCBI and the author cited in
14  * any work or product based on this material
15  *
16  * Although all reasonable efforts have been taken to ensure the accuracy
17  * and reliability of the software and data, the NLM and the U.S.
18  * Government do not and cannot warrant the performance or results that
19  * may be obtained by using this software or data. The NLM and the U.S.
20  * Government disclaim all warranties, express or implied, including
21  * warranties of performance, merchantability or fitness for any particular
22  * purpose.
23  *
24  * ===========================================================================
25  *
26  * Author: Michael Kimelman
27  *
28  * (Asn) Stream processing utilities - compressor/cacher and AsnIoPtr to fci merger.
29  *
30  * Modifications:
31  * --------------------------------------------------------------------------
32  * $Log: streamprocs.c,v $
33  * Revision 1.8 2002/08/14 15:51:52 kimelman
34  * asserts added
35  *
36  * Revision 1.7 2001/05/09 23:40:26 kimelman
37  * reader effeciency improved
38  *
39  * Revision 1.6 2001/05/09 21:32:45 kimelman
40  * bugfix: check for null 'close' method before running
41  *
42  * Revision 1.5 2001/05/09 00:57:42 kimelman
43  * cosmetics
44  *
45  * Revision 1.4 2001/03/01 21:20:51 kimelman
46  * make it less noisy
47  *
48  * Revision 1.3 1998/06/25 19:24:29 kimelman
49  * changed coef. of cache grow
50  *
51  * Revision 1.2 1998/05/15 19:05:19 kimelman
52  * all old 'gzip' names changed to be suitable for library names.
53  * prefix Nlmzip_ is now used for all of this local stuff.
54  * interface headers changed their names, moving from artdb/ur to nlmzip
55  *
56  * Revision 1.1 1998/05/14 20:21:17 kimelman
57  * ct_init --> Nlmzip_ct_init
58  * added stream& AsnIo processing functions
59  * makefile changed
60  *
61  *
62  *
63  * ==========================================================================
64  */
65 
66 
67 #include <ncbi_pch.hpp>
70 #include <assert.h>
71 #include "ct_nlmzip_i.h"
72 
73 // Ignore warnings for ncbi included code
74 #ifdef __GNUC__ // if gcc or g++
75 # pragma GCC diagnostic push
76 # pragma GCC diagnostic ignored "-Wunused-function"
77 #endif //__GNUC__
78 
79 
81 
82 
83 
84 /*
85  * File common interface
86  */
87 
90  Int4 (*proc_buf)(Pointer ptr, CharPtr buf, Int4 count),
91  Int4 (*pclose)(Pointer ptr, int commit)
92  )
93 {
94  fci_t obj = (fci_t)Nlm_MemNew(sizeof(*obj));
95 
96  obj->data = data;
97  obj->proc_buf = proc_buf;
98  obj->close = pclose;
99  return obj;
100 }
101 
102 
103 /*
104  * AsnIoPtr asnio2fci.open(compressor.open(cacher.open(100,dbio.open(db))))
105 
106  * fci_t asnio2fci ;
107  * fci_t compressor;
108  * fci_t cacher;
109  * fci_t dbio;
110 
111  */
112 
113 /*
114  * CACHER
115  */
116 
117 typedef struct {
119  int read;
120  char *buf;
121  int len;
122  int start;
123  int size;
125  int eos;
126 } cacher_t;
127 
128 static Int4 LIBCALLBACK
130 {
131  cacher_t *db = (cacher_t*)ptr;
132  Int4 bytes = 0;
133 
134  while (count > bytes)
135  {
136  assert(bytes>=0);
137  assert(bytes<=count);
138  if ( db->len == db->start ) /* if cache is empty */
139  {
140  Int4 len = 0;
141 // int direct_read = 0;
142  if (db->eos)
143  break; /* end of stream EXIT */
144  if ( count-bytes > db->cache_size / 2)
145  { /* read directly to caller's buffer */
146  len = db->src->proc_buf(db->src->data, buf+bytes, count-bytes);
147  /* negative 'len<0' answer means request for larger buffer size */
148  if ( len == 0 )
149  db->eos = 1;
150  if ( len > 0 )
151  bytes += len ;
152  }
153  if (count>bytes)
154  { /* cache input stream */
155  if ( db->cache_size < count )
156  db->cache_size = count;
157  if ( db->cache_size < - len )
158  db->cache_size = - len;
159  if ( db->cache_size > db->size )
160  db->cache_size = db->size;
161  db->start = 0;
162  len = db->src->proc_buf(db->src->data, db->buf,db->cache_size);
163  if (len < 0 && db->cache_size < - len )
164  {
165  /* negative 'len<0' answer means request for larger buffer size */
166  db->cache_size = - len;
167  if ( db->cache_size > db->size )
168  { /* try to adjust cache size - that case seems to be request
169  for larger buffer from underlying decompressor */
170  CharPtr newb = (CharPtr)Nlm_MemNew( - len );
171  if ( ! newb )
172  {
173  ErrPostEx(SEV_ERROR,0,0,"memory is exhausted - can't allocate %d bytes",-len);
174  return len;
175  }
176  Nlm_MemFree(db->buf);
177  db->buf = newb;
178  db->size = - len;
179  }
180  len = db->src->proc_buf(db->src->data, db->buf,db->cache_size);
181  }
182  db->cache_size *= 2;
183  if (len < 0 )
184  return len;
185  if (len == 0 )
186  db->eos = 1 ;
187  db->len = len;
188  }
189  }
190  if ( db->len - db->start > 0 )
191  {
192  int sz = db->len - db->start;
193  if ( bytes + sz > count )
194  sz = count - bytes ;
195  memcpy(buf+bytes, db->buf + db->start, sz );
196  db->start +=sz;
197  bytes += sz;
198  }
199  }
200  assert(bytes>=0);
201  assert(bytes<=count);
202  return bytes;
203 }
204 
205 static Int4 LIBCALLBACK
207 {
208  cacher_t *db = (cacher_t*)ptr;
209  Int4 bytes = 0;
210 // int flush_it = 0;
211 
212  if(count<=0)
213  return 0;
214  /* cache size adjustments */
215  if ( db->cache_size < count )
216  {
217  db->cache_size = count;
218  if ( db->cache_size > db->size )
219  db->cache_size = db->size;
220  }
221 
222  while (count > bytes)
223  {
224  int len = 0;
225  if ( db->len == db->cache_size || /* if cache is full */
226  count-bytes > db->size / 2 ) /* or new data is too large for this cache */
227  { /* flush cache */
228  if (db->len > 0)
229  len = db->src->proc_buf(db->src->data, db->buf,db->len);
230  if (len != db->len)
231  {
232  ErrPostEx(SEV_ERROR,0,0,"Failure to write data from cache (%d of %d written)",len,db->len);
233  return -1;
234  }
235  db->cache_size *=2;
236  if ( db->cache_size < 2 * count )
237  db->cache_size = 2 * count;
238  if ( db->cache_size > db->size )
239  db->cache_size = db->size;
240  db->start = db->len = 0;
241  }
242  if ( count - bytes > db->cache_size && db->len) /* if there are a lot of data */
243  { /*remains and cache is empty -- do uncached write */
244  len = db->src->proc_buf(db->src->data, buf+bytes,count-bytes);
245  if (len != count-bytes)
246  return -1;
247  bytes += len;
248  assert (bytes == count);
249  }
250  else
251  { /* cached write */
252  len = db->cache_size - db->len;
253  assert( len > 0 );
254  if ( count-bytes < len)
255  len = count - bytes;
256  memcpy(db->buf + db->len, buf+bytes, len);
257  db->len += len ;
258  bytes += len;
259  }
260  }
261  return bytes;
262 }
263 
264 static Int4 LIBCALLBACK
265 cacher_close(Pointer ptr, int commit)
266 {
267  cacher_t *db = (cacher_t*)ptr;
268  Int4 rc;
269 
270  if (!db->read)
271  {
272  Int4 len, len1 = db->len-db->start;
273  if (commit>0)
274  {
275  len = db->src->proc_buf(db->src->data, db->buf+db->start,len1);
276  commit = (len == len1) ;
277  }
278  }
279  rc=commit;
280  if(db->src->close)
281  rc = db->src->close(db->src->data,commit);
282  if (commit>=0)
283  {
284  Nlm_MemFree(db->src);
285  Nlm_MemFree(db->buf);
286  Nlm_MemFree(db);
287  }
288  else
289  {
290  db->len = db->start = db->cache_size = db->eos = 0 ;
291  }
292  return rc ;
293 }
294 
296 cacher_open(fci_t stream, int max_cache_size,int read)
297 {
298  cacher_t *data = (cacher_t*)Nlm_MemNew(sizeof(*data));
299 
300  data->read = read ;
301  data->src = stream ;
302  data->size = max_cache_size;
303  data->cache_size = max_cache_size/10;
304  if (data->cache_size < 2048 && max_cache_size > 2048)
305  data->cache_size=2048;
306 
307  while ((data->buf = (char*)Nlm_MemNew(data->size)) == NULL)
308  {
309  data->size /= 2;
310  if (data->size <= 1024)
311  {
312  Nlm_MemFree(data);
313  ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: memory exhausted '%d' ",
314  __FILE__,__LINE__,data->size*2);
315  return NULL;
316  }
317  }
319 }
320 
321 /*
322  * COMPRESSOR
323  */
324 
325 typedef struct {
327  int mode; /* 0 - uninitialized. 1 - compressed ; -1 - uncompressed */
328  unsigned char *dbuf;
332 } compressor_t;
333 
334 static void
336 {
337  UcharPtr dbuf;
338  Uint4 val;
339  int bytes;
340 
341  dbuf = (UcharPtr) header;
342  if(read)
343  { /* header --> db */
344 #if 0
345  fprintf(stderr,"scanned buffer");
346  for(bytes=0; bytes<8; bytes++)
347  fprintf(stderr,"'%x',",header[bytes]);
348  fprintf(stderr,"\n");
349 #endif
350  for(val=0, bytes=0; bytes<4; bytes++,dbuf++)
351  val = (val<<8) + *dbuf ;
352  db->compr_size = val;
353  for(val=0 ; bytes<8; bytes++,dbuf++)
354  val = (val<<8) + *dbuf ;
355  db->decomp_size = val;
356 #if 0
357  fprintf (stderr,"decompr(%x-%d)-->%x-%d\n",db->compr_size,db->compr_size,db->decomp_size,db->decomp_size);
358  if (read == 1)
359  {/* QA */
360  Uchar buf[8];
361  compressor_header(db,buf,0);
362  assert(memcmp(buf,header,8)==0);
363  }
364 #endif
365  }
366  else
367  { /* write compressed block header */
368  /* db --> header */
369  val = db->compr_size;
370  for(bytes=0; bytes<4; bytes++,dbuf++)
371  *dbuf = (val >> (3-bytes)*8) & 0xff ;
372  val = db->decomp_size;
373  for( ; bytes<8; bytes++,dbuf++)
374  *dbuf = (val >> (7-bytes)*8) & 0xff ;
375 #if 0
376  fprintf (stderr,"compr(%x)-->%x ",db->decomp_size,db->compr_size);
377  fprintf(stderr,"written buffer");
378  for(bytes=0; bytes<8; bytes++)
379  fprintf(stderr,"'%x',",header[bytes]);
380  fprintf(stderr,"\n");
381 
382  {/* QA */
383  Int4 dc = db->decomp_size, cm = db->compr_size;
384  compressor_header(db,header,2);
385  assert(cm == db->compr_size);
386  assert(dc == db->decomp_size);
387  }
388 #endif
389  }
390 }
391 
392 static Int4 LIBCALLBACK
394 {
395  compressor_t *db = (compressor_t*)ptr ;
396  unsigned char lens[8];
397  Int4 bytes = 0 ;
398 
399  switch(db->mode)
400  {
401  case 0:
402  assert(count>=4);
403  bytes = db->src->proc_buf(db->src->data, (CharPtr)obuf,4);
404  if (bytes!=4)
405  return -1;
406  if (strcmp(obuf,"ZIP")==0)
407  {
408  db->mode=1; /* compresseed mode */
409  break;
410  }
411  db->mode=-1; /*uncompresseed mode */
412  obuf+=4;
413  count -=4;
414  case -1:
415  {
416  int rc;
417  rc = db->src->proc_buf(db->src->data, (CharPtr)obuf,count);
418  if (rc < 0)
419  return rc;
420  return bytes+ rc;
421  }
422  case 1:
423  default:
424  break;
425  }
426  assert(db->mode == 1);
427  if ( db->compr_size == 0 )
428  {
429  bytes = db->src->proc_buf(db->src->data, (CharPtr)lens,8);
430  if (bytes<=0)
431  return bytes;
432  assert ( bytes == 8 );
433  compressor_header(db,lens,1);
434  }
435  if ( db->decomp_size > count )
436  {
437 #if 0
438  ErrPostEx(SEV_INFO, 0,0,"\n%s:%d: small compressor output buffer('%d' - required %d) ",
439  __FILE__,__LINE__,count,db->decomp_size);
440 #endif
441  return - db->decomp_size ; /* unsufficient space problem */
442  }
443  if ( db->compr_size > db->bsize)
444  {
445  unsigned char *nb = (unsigned char*)Nlm_MemNew(db->compr_size);
446  if (!nb)
447  {
448  ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: memory exhausted (required %d) ",
449  __FILE__,__LINE__,db->compr_size);
450  return -db->compr_size;
451  }
452  Nlm_MemFree(db->dbuf);
453  db->dbuf = nb;
454  db->bsize = db->compr_size;
455  }
456  bytes = db->src->proc_buf(db->src->data, (CharPtr)db->dbuf,db->compr_size);
457  if ( bytes < db->compr_size )
458  {
459  ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: broken data in input stream compressed(%d) != returned(%d)",
460  __FILE__,__LINE__,db->compr_size,bytes);
461  return -1;
462  }
463  assert (bytes == db->compr_size);
464  if (Nlmzip_Uncompress (db->dbuf, db->compr_size,obuf,count,&bytes) != NLMZIP_OKAY )
465  {
466  ErrPostEx(SEV_ERROR, 0,0,"can't uncompress data");
467  return -1;
468  }
469  assert(bytes==db->decomp_size);
470  db->decomp_size=db->compr_size=0; /* clean buffer reading lock */
471  return bytes;
472 }
473 
474 static Int4 LIBCALLBACK
476 {
477  compressor_t *db = (compressor_t*)ptr ;
478  Int4 bytes = 0 ;
479 
480  if (count<=0)
481  return 0;
482 
483  switch (db->mode)
484  {
485  case 0 :
486  bytes = db->src->proc_buf(db->src->data,(char*)"ZIP",4);
487  if (bytes!=4)
488  return -1;
489  db->mode=1; /* compresseed mode */
490  break ;
491  case -1 : /* uncompresseed mode */
492  return db->src->proc_buf(db->src->data, (CharPtr)buf,count);
493  case 1 :
494  default :
495  break;
496  }
497 
498  while (Nlmzip_Compress (buf, count,db->dbuf+8,db->bsize-8,&bytes) !=NLMZIP_OKAY)
499  {
500  unsigned char *nb = (unsigned char*)Nlm_MemNew(2*db->bsize);
501  if (!nb)
502  {
503  ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: memory exhausted (required %d) ",
504  __FILE__,__LINE__,db->compr_size);
505  return -db->compr_size;
506  }
507  Nlm_MemFree(db->dbuf);
508  db->dbuf = nb;
509  db->bsize *=2;
510  }
511 
512  db->decomp_size = count;
513  db->compr_size = bytes;
514 
515  compressor_header(db,db->dbuf,0);
516  bytes = db->src->proc_buf(db->src->data, (CharPtr)db->dbuf,db->compr_size+8);
517  if ( bytes != db->compr_size+8)
518  {
519  ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: broken data in output stream",
520  __FILE__,__LINE__);
521  return -1;
522  }
523  return count;
524 }
525 
526 static Int4 LIBCALLBACK
527 compressor_close(Pointer ptr, int commit)
528 {
529  compressor_t *db = (compressor_t*)ptr;
530  Int4 rc = commit;
531 
532  if(db->src->close)
533  rc = db->src->close(db->src->data,commit);
534  if (commit>=0)
535  {
536  if (db->src)
537  Nlm_MemFree(db->src);
538  if (db->dbuf)
539  Nlm_MemFree(db->dbuf);
540  Nlm_MemFree(db);
541  }
542  else
543  {
544  db->mode = 0;
545  db->decomp_size=db->compr_size=0; /* clean buffer reading lock */
546  }
547  return rc ;
548 }
549 
551 compressor_open(fci_t stream, int max_buffer_size, int read)
552 {
554 
555  if (max_buffer_size<1024)
556  max_buffer_size = 1024;
557  data->src = stream ;
558  data->mode = 0;
559  data->dbuf = (unsigned char*)Nlm_MemNew(max_buffer_size);
560  if(data->dbuf)
561  data->bsize = max_buffer_size;
562  return cacher_open( /* add one more cache which will read data */
564  max_buffer_size,read);
565 }
566 
567 
568 #if 0
569 /*
570  * ASNIO2FCI
571  */
572 
573 static Int2 LIBCALLBACK
574 asnio2fci_proc(Pointer ptr, CharPtr buf, Uint2 count)
575 {
576  fci_t f = (fci_t)ptr;
577 
578  assert(count <= 0x7fff );
579  return f->proc_buf(f->data, buf,count);
580 }
581 
583 asnio2fci_close(AsnIoPtr aip,Int4 commit)
584 {
585  fci_t stream = aip ->iostruct;
586  Int4 rc = commit;
587 
588  if(commit>=0)
589  AsnIoClose (aip);
590  else
591  AsnIoReset (aip);
592  if(stream->close)
593  rc = stream->close(stream->data,commit);
594  if (commit>=0)
595  MemFree (stream);
596  return rc;
597 }
598 
599 AsnIoPtr LIBCALL
600 asnio2fci_open(int read, fci_t stream)
601 {
602  if (read)
603  return AsnIoNew(ASNIO_BIN_IN, NULL, stream, asnio2fci_proc, NULL);
604  else
605  return AsnIoNew(ASNIO_BIN_OUT, NULL, stream, NULL, asnio2fci_proc);
606 }
607 #endif
608 
609 
611 
612 
613 // Re-enable warnings
614 #ifdef __GNUC__ // if gcc or g++
615 # pragma GCC diagnostic pop
616 #endif //__GNUC__
617 
static Int4 LIBCALLBACK cacher_write(Pointer ptr, CharPtr buf, Int4 count)
static Int4 LIBCALLBACK cacher_close(Pointer ptr, int commit)
static void compressor_header(compressor_t *db, UcharPtr header, int read)
static Int4 LIBCALLBACK compressor_write(Pointer ptr, CharPtr buf, Int4 count)
static Int4 LIBCALLBACK cacher_read(Pointer ptr, CharPtr buf, Int4 count)
static Int4 LIBCALLBACK compressor_close(Pointer ptr, int commit)
static Int4 LIBCALLBACK compressor_read(Pointer ptr, CharPtr obuf, Int4 count)
void MemFree(char *p)
Definition: ftacpp.hpp:55
char data[12]
Definition: iconv.c:80
#define SEV_INFO
Definition: gicache.c:89
#define SEV_ERROR
Definition: gicache.c:91
#define NULL
Definition: ncbistd.hpp:225
#define BEGIN_CTRANSITION_SCOPE
Definition: ncbilcl.hpp:49
fci_t LIBCALL cacher_open(fci_t stream, int max_cache_size, int read)
Pointer data
Definition: nlmzip.hpp:141
#define CharPtr
Definition: ncbistd.hpp:125
#define UcharPtr
Definition: ncbistd.hpp:131
fci_t LIBCALL compressor_open(fci_t stream, int max_buffer_size, int read)
NLM_EXTERN void *LIBCALL Nlm_MemFree(void *ptr)
Definition: ct_ncbimem.cpp:298
BEGIN_CTRANSITION_SCOPE NLM_EXTERN void *LIBCALL Nlm_MemNew(size_t size)
Definition: ct_ncbimem.cpp:271
#define LIBCALLBACK
Definition: ncbistd.hpp:302
Int4(* close)(Pointer ptr, int commit)
Definition: nlmzip.hpp:143
Int4(* proc_buf)(Pointer ptr, CharPtr buf, Int4 count)
Definition: nlmzip.hpp:142
Nlmzip_rc_t Nlmzip_Uncompress(const void *, Int4, void *, Int4, Int4Ptr)
#define ErrPostEx(sev, err_code,...)
Definition: ncbierr.hpp:78
#define LIBCALL
Definition: ncbistd.hpp:297
Nlmzip_rc_t Nlmzip_Compress(const void *, Int4, void *, Int4, Int4Ptr)
#define END_CTRANSITION_SCOPE
Definition: ncbilcl.hpp:50
#define Pointer
Definition: ncbistd.hpp:114
BEGIN_CTRANSITION_SCOPE fci_t LIBCALL fci_open(Pointer data, Int4(*proc_buf)(Pointer ptr, CharPtr buf, Int4 count), Int4(*pclose)(Pointer ptr, int commit))
@ NLMZIP_OKAY
Definition: nlmzip.hpp:129
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
unsigned char Uchar
Alias for unsigned char.
Definition: ncbitype.h:95
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
char * buf
int len
static MDB_val dbuf
Definition: mdb_load.c:39
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
#define assert(x)
Definition: srv_diag.hpp:58
unsigned char * dbuf
Modified on Mon May 27 04:40:52 2024 by modify_doxy.py rev. 669887