NCBI C++ ToolKit
block.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: block.cpp 102482 2024-05-12 14:36:03Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * File Name: block.cpp
27  *
28  * Author: Karl Sirotkin, Hsiu-Chuan Chen
29  *
30  * File Description:
31  * Parsing flatfile to blocks in memory.
32  *
33  */
34 
35 #include <ncbi_pch.hpp>
36 
37 #include "ftacpp.hpp"
38 
39 #include "ftaerr.hpp"
40 #include "ftablock.h"
41 #include "indx_blk.h"
42 #include "indx_def.h"
43 #include "utilfun.h"
44 
45 #ifdef THIS_FILE
46 # undef THIS_FILE
47 #endif
48 #define THIS_FILE "block.cpp"
50 
51 struct QSStruct {
52  string accession;
54  size_t offset = 0;
55  size_t length = 0;
56  QSStruct* next = nullptr;
57 };
59 
60 /**********************************************************/
62 {
63  GapFeatsPtr tgfp;
64 
65  for (; gfp; gfp = tgfp) {
66  tgfp = gfp->next;
67  delete gfp;
68  }
69 }
70 
72 {
73  if (mSimpleDelete)
74  return;
75 
76  int MAX_HEAD_RECURSION(100);
77 
78  mpQscore.clear();
79  delete mpData;
80  if (mType == ParFlat_ENTRYNODE) {
82  }
83  auto p = mpNext;
84  for (int i = 0; p && i < MAX_HEAD_RECURSION; ++i) {
85  p = p->mpNext;
86  }
87  if (! p) {
88  delete mpNext;
89  } else {
90  auto pTail = p->mpNext;
91  p->mpNext = nullptr;
92  delete mpNext;
93  delete pTail;
94  }
95 }
96 
97 /**********************************************************
98  *
99  * void FreeEntry(entry):
100  *
101  * Only free entry itself and ebp->chain data because
102  * ebp->sep has to be used to write out ASN.1 output then
103  * free ebp->sep and ebp itself together.
104  *
105  * 5-12-93
106  *
107  **********************************************************/
108 
110 {
111  if (entry->mpData) {
112  delete entry->mpData;
113  entry->mpData = nullptr;
114  }
115 
116  delete entry;
117 }
118 
119 /**********************************************************/
120 
122 {
123  if (chain) {
124  delete chain;
125  chain = nullptr;
126  }
127 }
128 
129 /**********************************************************/
131 {
132  XmlIndexPtr xipnext;
133 
134  for (; xip; xip = xipnext) {
135  xipnext = xip->next;
136  if (xip->subtags)
137  XMLIndexFree(xip->subtags);
138  delete xip;
139  }
140 }
141 
142 /**********************************************************/
144 {
145  if (! ibp)
146  return;
147 
148  if (ibp->gaps)
149  GapFeatsFree(ibp->gaps);
150 
151  if (ibp->xip)
152  XMLIndexFree(ibp->xip);
153 
154  delete ibp;
155 }
156 
157 /**********************************************************/
158 static bool AccsCmp(const Indexblk* ibp1, const Indexblk* ibp2)
159 {
160  int i = StringCmp(ibp1->acnum, ibp2->acnum);
161  if (i != 0)
162  return i < 0;
163 
164  if (ibp1->vernum != ibp2->vernum)
165  return ibp1->vernum < ibp2->vernum;
166 
167  return ibp2->offset < ibp1->offset;
168 }
169 
170 /**********************************************************/
171 static bool QSCmp(const QSStruct* qs1, const QSStruct* qs2)
172 {
173  int i = StringCmp(qs1->accession.c_str(), qs2->accession.c_str());
174  if (i != 0)
175  return i < 0;
176 
177  return qs1->version < qs2->version;
178 }
179 
180 /**********************************************************/
181 static void QSStructFree(QSStructPtr qssp)
182 {
183  QSStructPtr tqssp;
184 
185  for (; qssp; qssp = tqssp) {
186  tqssp = qssp->next;
187  delete qssp;
188  }
189 }
190 
191 /**********************************************************/
192 static bool QSNoSequenceRecordErr(bool accver, QSStructPtr qssp)
193 {
194  if (accver)
195  ErrPostEx(SEV_FATAL, ERR_QSCORE_NoSequenceRecord, "Encountered Quality Score data for a record \"%s.%d\" that does not exist in the file of sequence records being parsed.", qssp->accession.c_str(), qssp->version);
196  else
197  ErrPostEx(SEV_FATAL, ERR_QSCORE_NoSequenceRecord, "Encountered Quality Score data for a record \"%s\" that does not exist in the file of sequence records being parsed.", qssp->accession.c_str());
198  return false;
199 }
200 
201 /**********************************************************/
203 {
204  QSStructPtr qssp;
205  QSStructPtr tqssp;
206  QSStructPtr tqsspprev;
207 
208  char* p;
209  char* q;
210  bool ret;
211  size_t i;
212  Int4 count;
213  Int4 j;
214  Int4 k;
215  Int4 l;
216  Int2 m;
217  Char buf[1024];
218 
219  if (! pp->qsfd)
220  return true;
221 
222  qssp = new QSStruct;
223  tqssp = qssp;
224  tqsspprev = nullptr;
225  count = 0;
226  while (fgets(buf, 1023, pp->qsfd)) {
227  if (buf[0] != '>')
228  continue;
229 
230  p = StringChr(buf, ' ');
231  if (! p)
232  continue;
233 
234  i = (size_t)StringLen(buf);
235  *p = '\0';
236 
237  q = StringChr(buf, '.');
238  if (q)
239  *q++ = '\0';
240 
241  count++;
242  tqssp->next = new QSStruct;
243  tqssp = tqssp->next;
244  tqssp->accession = string(buf + 1);
245  tqssp->version = q ? atoi(q) : 0;
246  tqssp->offset = (size_t)ftell(pp->qsfd) - i;
247  if (tqsspprev)
248  tqsspprev->length = tqssp->offset - tqsspprev->offset;
249  tqssp->next = nullptr;
250 
251  tqsspprev = tqssp;
252  }
253  tqssp->length = (size_t)ftell(pp->qsfd) - tqssp->offset;
254 
255  tqssp = qssp;
256  qssp = tqssp->next;
257  delete tqssp;
258 
259  if (! qssp) {
260  ErrPostEx(SEV_FATAL, ERR_QSCORE_NoScoreDataFound, "No correctly formatted records containing quality score data were found within file \"%s\".", pp->qsfile);
261  return false;
262  }
263 
264  vector<QSStructPtr> qsspp(count);
265  tqssp = qssp;
266  for (j = 0; j < count && tqssp; j++, tqssp = tqssp->next)
267  qsspp[j] = tqssp;
268 
269  if (count > 1) {
270  std::sort(qsspp.begin(), qsspp.end(), QSCmp);
271 
272  for (j = 0, count--; j < count; j++)
273  if (qsspp[j]->accession == qsspp[j + 1]->accession)
274  if (pp->accver == false ||
275  qsspp[j]->version == qsspp[j + 1]->version)
276  break;
277 
278  if (j < count) {
279  if (pp->accver)
280  ErrPostEx(SEV_FATAL, ERR_QSCORE_RedundantScores, "Found more than one set of Quality Score for accession \"%s.%d\".", qsspp[j]->accession.c_str(), qsspp[j]->version);
281  else
282  ErrPostEx(SEV_FATAL, ERR_QSCORE_RedundantScores, "Found more than one set of Quality Score for accession \"%s\".", qsspp[j]->accession.c_str());
283 
284  QSStructFree(qssp);
285  return false;
286  }
287  count++;
288  }
289 
290  vector<IndexblkPtr> ibpp(pp->indx);
291  for (j = 0; j < pp->indx && ibnp; j++, ibnp = ibnp->next)
292  ibpp[j] = ibnp->ibp;
293 
294  if (pp->indx > 1)
295  std::sort(ibpp.begin(), ibpp.end(), AccsCmp);
296 
297  for (ret = true, j = 0, k = 0; j < count; j++) {
298  if (k == pp->indx) {
299  ret = QSNoSequenceRecordErr(pp->accver, qsspp[j]);
300  continue;
301  }
302  for (; k < pp->indx; k++) {
303  l = StringCmp(qsspp[j]->accession.c_str(), ibpp[k]->acnum);
304  if (l < 0) {
305  ret = QSNoSequenceRecordErr(pp->accver, qsspp[j]);
306  break;
307  }
308  if (l > 0)
309  continue;
310  m = qsspp[j]->version - ibpp[k]->vernum;
311  if (m < 0) {
312  ret = QSNoSequenceRecordErr(pp->accver, qsspp[j]);
313  break;
314  }
315  if (m > 0)
316  continue;
317  ibpp[k]->qsoffset = qsspp[j]->offset;
318  ibpp[k]->qslength = qsspp[j]->length;
319  k++;
320  break;
321  }
322  }
323 
324  QSStructFree(qssp);
325 
326  return (ret);
327 }
328 
static bool QSNoSequenceRecordErr(bool accver, QSStructPtr qssp)
Definition: block.cpp:192
static bool QSCmp(const QSStruct *qs1, const QSStruct *qs2)
Definition: block.cpp:171
static void QSStructFree(QSStructPtr qssp)
Definition: block.cpp:181
void XMLIndexFree(XmlIndexPtr xip)
Definition: block.cpp:130
void GapFeatsFree(GapFeatsPtr gfp)
Definition: block.cpp:61
void xFreeEntry(DataBlkPtr entry)
Definition: block.cpp:109
static bool AccsCmp(const Indexblk *ibp1, const Indexblk *ibp2)
Definition: block.cpp:158
void FreeIndexblk(IndexblkPtr ibp)
Definition: block.cpp:143
bool QSIndex(ParserPtr pp, IndBlkNextPtr ibnp)
Definition: block.cpp:202
char * mOffset
Definition: ftablock.h:329
string mpQscore
Definition: ftablock.h:331
~DataBlk()
Definition: block.cpp:71
CFlatFileData * mpData
Definition: ftablock.h:328
DataBlk * mpNext
Definition: ftablock.h:333
bool mSimpleDelete
Definition: ftablock.h:319
int mType
Definition: ftablock.h:327
#define ParFlat_ENTRYNODE
Definition: ftablock.h:68
int StringCmp(const char *s1, const char *s2)
Definition: ftacpp.hpp:95
void MemFree(char *p)
Definition: ftacpp.hpp:55
size_t StringLen(const char *s)
Definition: ftacpp.hpp:60
#define SEV_FATAL
Definition: gicache.c:93
string
Definition: cgiapp.hpp:690
#define StringChr
Definition: ncbistr.hpp:317
#define ErrPostEx(sev, err_code,...)
Definition: ncbierr.hpp:78
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
char Char
Alias for char.
Definition: ncbitype.h:93
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define ERR_QSCORE_NoSequenceRecord
Definition: indx_err.h:124
#define ERR_QSCORE_NoScoreDataFound
Definition: indx_err.h:125
#define ERR_QSCORE_RedundantScores
Definition: indx_err.h:123
char * buf
int i
if(yy_accept[yy_current_state])
constexpr auto sort(_Init &&init)
#define count
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
DataBlkPtr chain
Definition: ftablock.h:341
~EntryBlk()
Definition: block.cpp:121
GapFeats * next
Definition: ftablock.h:130
Indexblk * ibp
Definition: indx_blk.h:56
IndBlkNode * next
Definition: indx_blk.h:57
Char acnum[200]
Definition: ftablock.h:166
Int2 vernum
Definition: ftablock.h:167
GapFeatsPtr gaps
Definition: ftablock.h:214
size_t offset
Definition: ftablock.h:168
XmlIndexPtr xip
Definition: ftablock.h:217
const char * qsfile
QSStruct * next
Definition: block.cpp:56
Int2 version
Definition: block.cpp:53
string accession
Definition: block.cpp:52
size_t length
Definition: block.cpp:55
size_t offset
Definition: block.cpp:54
XmlIndex * next
Definition: ftablock.h:158
XmlIndex * subtags
Definition: ftablock.h:157
Modified on Fri Sep 20 14:58:27 2024 by modify_doxy.py rev. 669887