NCBI C++ ToolKit
block.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: block.cpp 99284 2023-03-06 16:28:57Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * File Name: block.cpp
27  *
28  * Author: Karl Sirotkin, Hsiu-Chuan Chen
29  *
30  * File Description:
31  * Parsing flatfile to blocks in memory.
32  *
33  */
34 
35 #include <ncbi_pch.hpp>
36 
37 #include "ftacpp.hpp"
38 
39 #include "ftaerr.hpp"
40 #include "ftablock.h"
41 #include "indx_blk.h"
42 #include "indx_def.h"
43 #include "utilfun.h"
44 
45 #ifdef THIS_FILE
46 # undef THIS_FILE
47 #endif
48 #define THIS_FILE "block.cpp"
50 
51 struct QSStruct {
52  char* accession = nullptr;
54  size_t offset = 0;
55  size_t length = 0;
56  QSStruct* next = nullptr;
57 };
59 
60 /**********************************************************/
62 {
63  GapFeatsPtr tgfp;
64 
65  for (; gfp; gfp = tgfp) {
66  tgfp = gfp->next;
67  delete gfp;
68  }
69 }
70 
72 {
73  if (mSimpleDelete)
74  return;
75 
76  int MAX_HEAD_RECURSION(100);
77 
78  mpQscore.clear();
79  delete mpData;
80  if (mType == ParFlat_ENTRYNODE) {
82  }
83  auto p = mpNext;
84  for (int i = 0; p && i < MAX_HEAD_RECURSION; ++i) {
85  p = p->mpNext;
86  }
87  if (! p) {
88  delete mpNext;
89  } else {
90  auto pTail = p->mpNext;
91  p->mpNext = nullptr;
92  delete mpNext;
93  delete pTail;
94  }
95 }
96 
97 /**********************************************************
98  *
99  * void FreeEntry(entry):
100  *
101  * Only free entry itself and ebp->chain data because
102  * ebp->sep has to be used to write out ASN.1 output then
103  * free ebp->sep and ebp itself together.
104  *
105  * 5-12-93
106  *
107  **********************************************************/
108 
110 {
111  if (entry->mpData) {
112  delete entry->mpData;
113  entry->mpData = nullptr;
114  }
115 
116  delete entry;
117 }
118 
119 /**********************************************************/
120 
122 {
123  if (chain) {
124  delete chain;
125  chain = nullptr;
126  }
127 }
128 
129 /**********************************************************/
131 {
132  XmlIndexPtr xipnext;
133 
134  for (; xip; xip = xipnext) {
135  xipnext = xip->next;
136  if (xip->subtags)
137  XMLIndexFree(xip->subtags);
138  delete xip;
139  }
140 }
141 
142 /**********************************************************/
144 {
145  if (! ibp)
146  return;
147 
148  if (ibp->gaps)
149  GapFeatsFree(ibp->gaps);
150 
151  if (ibp->secaccs)
152  FreeTokenblk(ibp->secaccs);
153 
154  if (ibp->xip)
155  XMLIndexFree(ibp->xip);
156 
157  delete ibp;
158 }
159 
160 /**********************************************************/
161 static bool AccsCmp(const Indexblk* ibp1, const Indexblk* ibp2)
162 {
163  int i = StringCmp(ibp1->acnum, ibp2->acnum);
164  if (i != 0)
165  return i < 0;
166 
167  if (ibp1->vernum != ibp2->vernum)
168  return ibp1->vernum < ibp2->vernum;
169 
170  return ibp2->offset < ibp1->offset;
171 }
172 
173 /**********************************************************/
174 static bool QSCmp(const QSStruct* qs1, const QSStruct* qs2)
175 {
176  int i = StringCmp(qs1->accession, qs2->accession);
177  if (i != 0)
178  return i < 0;
179 
180  return qs1->version < qs2->version;
181 }
182 
183 /**********************************************************/
184 static void QSStructFree(QSStructPtr qssp)
185 {
186  QSStructPtr tqssp;
187 
188  for (; qssp; qssp = tqssp) {
189  tqssp = qssp->next;
190  if (qssp->accession)
191  MemFree(qssp->accession);
192  delete qssp;
193  }
194 }
195 
196 /**********************************************************/
197 static bool QSNoSequenceRecordErr(bool accver, QSStructPtr qssp)
198 {
199  if (accver)
200  ErrPostEx(SEV_FATAL, ERR_QSCORE_NoSequenceRecord, "Encountered Quality Score data for a record \"%s.%d\" that does not exist in the file of sequence records being parsed.", qssp->accession, qssp->version);
201  else
202  ErrPostEx(SEV_FATAL, ERR_QSCORE_NoSequenceRecord, "Encountered Quality Score data for a record \"%s\" that does not exist in the file of sequence records being parsed.", qssp->accession);
203  return false;
204 }
205 
206 /**********************************************************/
208 {
209  QSStructPtr qssp;
210  QSStructPtr tqssp;
211  QSStructPtr tqsspprev;
212 
213  char* p;
214  char* q;
215  bool ret;
216  size_t i;
217  Int4 count;
218  Int4 j;
219  Int4 k;
220  Int4 l;
221  Int2 m;
222  Char buf[1024];
223 
224  if (! pp->qsfd)
225  return true;
226 
227  qssp = new QSStruct;
228  tqssp = qssp;
229  tqsspprev = nullptr;
230  count = 0;
231  while (fgets(buf, 1023, pp->qsfd)) {
232  if (buf[0] != '>')
233  continue;
234 
235  p = StringChr(buf, ' ');
236  if (! p)
237  continue;
238 
239  i = (size_t)StringLen(buf);
240  *p = '\0';
241 
242  q = StringChr(buf, '.');
243  if (q)
244  *q++ = '\0';
245 
246  count++;
247  tqssp->next = new QSStruct;
248  tqssp = tqssp->next;
249  tqssp->accession = StringSave(buf + 1);
250  tqssp->version = q ? atoi(q) : 0;
251  tqssp->offset = (size_t)ftell(pp->qsfd) - i;
252  if (tqsspprev)
253  tqsspprev->length = tqssp->offset - tqsspprev->offset;
254  tqssp->next = nullptr;
255 
256  tqsspprev = tqssp;
257  }
258  tqssp->length = (size_t)ftell(pp->qsfd) - tqssp->offset;
259 
260  tqssp = qssp;
261  qssp = tqssp->next;
262  delete tqssp;
263 
264  if (! qssp) {
265  ErrPostEx(SEV_FATAL, ERR_QSCORE_NoScoreDataFound, "No correctly formatted records containing quality score data were found within file \"%s\".", pp->qsfile);
266  return false;
267  }
268 
269  vector<QSStructPtr> qsspp(count);
270  tqssp = qssp;
271  for (j = 0; j < count && tqssp; j++, tqssp = tqssp->next)
272  qsspp[j] = tqssp;
273 
274  if (count > 1) {
275  std::sort(qsspp.begin(), qsspp.end(), QSCmp);
276 
277  for (j = 0, count--; j < count; j++)
278  if (StringEqu(qsspp[j]->accession, qsspp[j + 1]->accession))
279  if (pp->accver == false ||
280  qsspp[j]->version == qsspp[j + 1]->version)
281  break;
282 
283  if (j < count) {
284  if (pp->accver)
285  ErrPostEx(SEV_FATAL, ERR_QSCORE_RedundantScores, "Found more than one set of Quality Score for accession \"%s.%d\".", qsspp[j]->accession, qsspp[j]->version);
286  else
287  ErrPostEx(SEV_FATAL, ERR_QSCORE_RedundantScores, "Found more than one set of Quality Score for accession \"%s\".", qsspp[j]->accession);
288 
289  QSStructFree(qssp);
290  return false;
291  }
292  count++;
293  }
294 
295  vector<IndexblkPtr> ibpp(pp->indx);
296  for (j = 0; j < pp->indx && ibnp; j++, ibnp = ibnp->next)
297  ibpp[j] = ibnp->ibp;
298 
299  if (pp->indx > 1)
300  std::sort(ibpp.begin(), ibpp.end(), AccsCmp);
301 
302  for (ret = true, j = 0, k = 0; j < count; j++) {
303  if (k == pp->indx) {
304  ret = QSNoSequenceRecordErr(pp->accver, qsspp[j]);
305  continue;
306  }
307  for (; k < pp->indx; k++) {
308  l = StringCmp(qsspp[j]->accession, ibpp[k]->acnum);
309  if (l < 0) {
310  ret = QSNoSequenceRecordErr(pp->accver, qsspp[j]);
311  break;
312  }
313  if (l > 0)
314  continue;
315  m = qsspp[j]->version - ibpp[k]->vernum;
316  if (m < 0) {
317  ret = QSNoSequenceRecordErr(pp->accver, qsspp[j]);
318  break;
319  }
320  if (m > 0)
321  continue;
322  ibpp[k]->qsoffset = qsspp[j]->offset;
323  ibpp[k]->qslength = qsspp[j]->length;
324  k++;
325  break;
326  }
327  }
328 
329  QSStructFree(qssp);
330 
331  return (ret);
332 }
333 
static bool QSNoSequenceRecordErr(bool accver, QSStructPtr qssp)
Definition: block.cpp:197
static bool QSCmp(const QSStruct *qs1, const QSStruct *qs2)
Definition: block.cpp:174
static void QSStructFree(QSStructPtr qssp)
Definition: block.cpp:184
void XMLIndexFree(XmlIndexPtr xip)
Definition: block.cpp:130
void GapFeatsFree(GapFeatsPtr gfp)
Definition: block.cpp:61
void xFreeEntry(DataBlkPtr entry)
Definition: block.cpp:109
static bool AccsCmp(const Indexblk *ibp1, const Indexblk *ibp2)
Definition: block.cpp:161
void FreeIndexblk(IndexblkPtr ibp)
Definition: block.cpp:143
bool QSIndex(ParserPtr pp, IndBlkNextPtr ibnp)
Definition: block.cpp:207
char * mOffset
Definition: ftablock.h:332
string mpQscore
Definition: ftablock.h:334
~DataBlk()
Definition: block.cpp:71
CFlatFileData * mpData
Definition: ftablock.h:331
DataBlk * mpNext
Definition: ftablock.h:336
bool mSimpleDelete
Definition: ftablock.h:322
int mType
Definition: ftablock.h:330
#define ParFlat_ENTRYNODE
Definition: ftablock.h:67
int StringCmp(const char *s1, const char *s2)
Definition: ftacpp.hpp:80
char * StringSave(const char *s)
Definition: ftacpp.hpp:61
bool StringEqu(const char *s1, const char *s2)
Definition: ftacpp.hpp:96
void MemFree(char *p)
Definition: ftacpp.hpp:55
size_t StringLen(const char *s)
Definition: ftacpp.hpp:60
#define SEV_FATAL
Definition: gicache.c:93
#define StringChr
Definition: ncbistr.hpp:317
#define ErrPostEx(sev, err_code,...)
Definition: ncbierr.hpp:78
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
char Char
Alias for char.
Definition: ncbitype.h:93
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define ERR_QSCORE_NoSequenceRecord
Definition: indx_err.h:124
#define ERR_QSCORE_NoScoreDataFound
Definition: indx_err.h:125
#define ERR_QSCORE_RedundantScores
Definition: indx_err.h:123
char * buf
int i
if(yy_accept[yy_current_state])
static int version
Definition: mdb_load.c:29
constexpr auto sort(_Init &&init)
DataBlkPtr chain
Definition: ftablock.h:344
~EntryBlk()
Definition: block.cpp:121
GapFeats * next
Definition: ftablock.h:129
Indexblk * ibp
Definition: indx_blk.h:56
IndBlkNode * next
Definition: indx_blk.h:57
Char acnum[200]
Definition: ftablock.h:169
Int2 vernum
Definition: ftablock.h:170
GapFeatsPtr gaps
Definition: ftablock.h:217
TokenBlkPtr secaccs
Definition: ftablock.h:219
size_t offset
Definition: ftablock.h:171
XmlIndexPtr xip
Definition: ftablock.h:220
const char * qsfile
QSStruct * next
Definition: block.cpp:56
Int2 version
Definition: block.cpp:53
char * accession
Definition: block.cpp:52
size_t length
Definition: block.cpp:55
size_t offset
Definition: block.cpp:54
XmlIndex * next
Definition: ftablock.h:161
XmlIndex * subtags
Definition: ftablock.h:160
void FreeTokenblk(TokenBlkPtr tbp)
Definition: utilfun.cpp:529
Modified on Thu Nov 30 04:56:32 2023 by modify_doxy.py rev. 669887