NCBI C++ ToolKit
blastdb_dataextract.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blastdb_dataextract.cpp 100101 2023-06-15 14:10:29Z merezhuk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file blastdb_dataextract.cpp
31  * Defines classes which extract data from a BLAST database
32  */
33 #include <ncbi_pch.hpp>
36 #include <objects/seq/Seqdesc.hpp>
40 #include <corelib/ncbiutil.hpp>
42 #include <util/checksum.hpp>
44 #include <objmgr/scope.hpp>
45 
48 
49 #define NOT_AVAILABLE "N/A"
50 #define SEPARATOR ";"
51 
52 extern string GetBareId(const CSeq_id& id);
53 
54 void CBlastDBExtractor::SetSeqId(const CBlastDBSeqId &id, bool get_data) {
55  m_Defline.Reset();
56  m_Gi = ZERO_GI;
57  m_Oid = -1;
58  CRef<CSeq_id> seq_id;
59 
60  TGi target_gi = ZERO_GI;
61  CSeq_id *target_seq_id = NULL;
62 
63  if (id.IsOID()) {
64  m_Oid = id.GetOID();
65  } else if (id.IsGi()) {
66  m_Gi = GI_FROM(TIntId, id.GetGi());
68  if (m_TargetOnly || ! get_data) target_gi = m_Gi;
69  } else if (id.IsPig()) {
70  m_BlastDb.PigToOid(id.GetPig(), m_Oid);
71  } else if (id.IsStringId()) {
72  string acc(id.GetStringId());
73  NStr::ToUpper(acc);
74  vector<TOID> oids;
75  m_BlastDb.AccessionToOids(acc, oids);
76  if (!oids.empty()) {
77  m_Oid = oids[0];
78  if (m_TargetOnly || ! get_data) {
79  // TODO check if id is complete
81  target_seq_id = &(*seq_id);
82  }
83  }
84  }
85 
86  if (m_Oid < 0) {
87  NCBI_THROW(CSeqDBException, eArgErr,
88  "Entry not found in BLAST database");
89  }
90 
92  if (length <= 0) {
93  NCBI_THROW(CSeqDBException, eArgErr,
94  "Entry found in BLAST database has invalid length");
95  }
96 
98  if((TSeqPos)length <= m_SeqRange.GetTo())
99  {
100  m_SeqRange.SetTo(length-1);
101  }
102 
104  {
105  if (m_SeqRange.GetTo() < m_SeqRange.GetFrom()) {
106  NCBI_THROW(CSeqDBException, eArgErr,
107  "start pos > length of sequence");
108  }
109  }
110 
111  try {
112  if (get_data) {
113  m_Bioseq.Reset(m_BlastDb.GetBioseq(m_Oid, target_gi, target_seq_id));
114  }
115  else if (m_Gi <= ZERO_GI)
116  { // If no GI, then all the Gi2XMaps will fail.
117  m_Bioseq.Reset(m_BlastDb.GetBioseqNoData(m_Oid, target_gi, target_seq_id));
118  }
119 
120  } catch (const CSeqDBException& e) {
121  // this happens when CSeqDB detects a GI that doesn't belong to a
122  // filtered database (e.g.: swissprot as a subset of nr)
123  if (e.GetMsg().find("oid headers do not contain target gi")) {
124  NCBI_THROW(CSeqDBException, eArgErr,
125  "Entry not found in BLAST database");
126  }
127  }
128 }
129 
131  return NStr::IntToString(m_Oid);
132 }
133 
135  if (m_Oid2Pig.first != m_Oid)
136  {
137  CSeqDB::TPIG pig;
138  m_BlastDb.OidToPig(m_Oid, pig);
139  m_Oid2Pig.first = m_Oid;
140  m_Oid2Pig.second = pig;
141  }
142  return NStr::IntToString(m_Oid2Pig.second);
143 }
144 
146  x_SetGi();
148 }
149 
151  if (m_Gi != ZERO_GI) return;
152  ITERATE(list<CRef<CSeq_id> >, itr, m_Bioseq->GetId()) {
153  if ((*itr)->IsGi()) {
154  m_Gi = (*itr)->GetGi();
155  return;
156  }
157  }
158  return;
159 }
160 
162 {
163  if (m_Defline.NotEmpty()) {
164  return;
165  }
166  if (m_Bioseq.NotEmpty()) {
168  }
169  if (m_Defline.Empty()) {
171  }
172 }
173 
175 {
176  x_InitDefline();
177  string retval;
178 
180  const CRef<CSeq_id> seqid = FindBestChoice((*itr)->GetSeqid(),
182  _ASSERT(seqid.NotEmpty());
183  if ((*itr)->IsSetLinks()) {
184  if (seqid->IsGi()) {
185  if (seqid->GetGi() == m_Gi) {
186  ITERATE(CBlast_def_line::TLinks, links_int, (*itr)->GetLinks()) {
187  retval += NStr::NumericToString(*links_int) + SEPARATOR;
188  }
189  break;
190  }
191  } else {
192  ITERATE(CBlast_def_line::TLinks, links_int, (*itr)->GetLinks()) {
193  retval += NStr::NumericToString(*links_int) + SEPARATOR;
194  }
195  }
196  }
197  }
198  if (retval.size()) {
199  retval.erase(retval.size()-1, 1); // remove the last separator
200  }
201 
202  return (retval.empty() ? NOT_AVAILABLE : retval);
203 }
204 
206 {
207  x_InitDefline();
208  int retval = 0;
209 
210  if (m_Gi == ZERO_GI) {
211  return NStr::IntToString(0);
212  }
213 
215  const CRef<CSeq_id> seqid = FindBestChoice((*itr)->GetSeqid(),
217  _ASSERT(seqid.NotEmpty());
218 
219  if (seqid->IsGi() && (seqid->GetGi() == m_Gi) &&
220  (*itr)->IsSetMemberships()) {
222  (*itr)->GetMemberships()) {
223  retval += *memb_int;
224  }
225  break;
226  }
227  }
228 
229  return NStr::IntToString(retval);
230 }
231 
233 {
234  x_InitDefline();
235  CNcbiOstrstream oss;
236  oss << MSerial_AsnText << *m_Defline << endl;
237  return CNcbiOstrstreamToString(oss);
238 }
239 
241 {
243  CNcbiOstrstream oss;
244  oss << MSerial_AsnText << *m_Bioseq << endl;
245  return CNcbiOstrstreamToString(oss);
246 }
247 
249 {
250  if (m_Gi2AccMap.first == m_Oid)
251  return;
252 
253  map<TGi, string> gi2acc;
254  x_InitDefline();
256  TGi gi(INVALID_GI);
257  ITERATE(CBlast_def_line::TSeqid, id, ((*bd)->GetSeqid())) {
258  if ((*id)->IsGi()) {
259  gi = (*id)->GetGi();
260  break;
261  }
262  }
263  CRef<CSeq_id> theId = FindBestChoice((*bd)->GetSeqid(), CSeq_id::WorstRank);
264  string acc;
265  theId->GetLabel(&acc, CSeq_id::eContent);
266  if (gi != INVALID_GI)
267  gi2acc[gi] = acc;
268  }
269  m_Gi2AccMap.first = m_Oid;
270  m_Gi2AccMap.second.swap(gi2acc);
271  return;
272 }
273 
275 {
276  if (m_Gi2SeqIdMap.first == m_Oid)
277  return;
278 
279  map<TGi, string> gi2id;
280  x_InitDefline();
282  TGi gi(INVALID_GI);
283  ITERATE(CBlast_def_line::TSeqid, id, ((*bd)->GetSeqid())) {
284  if ((*id)->IsGi()) {
285  gi = (*id)->GetGi();
286  break;
287  }
288  }
289  CRef<CSeq_id> theId = FindBestChoice((*bd)->GetSeqid(), CSeq_id::WorstRank);
290  if (gi != INVALID_GI) {
291  if (m_UseLongSeqIds) {
292  gi2id[gi] = theId->AsFastaString();
293  }
294  else {
295  gi2id[gi] = GetBareId(*theId);
296  }
297  }
298  }
299  m_Gi2SeqIdMap.first = m_Oid;
300  m_Gi2SeqIdMap.second.swap(gi2id);
301  return;
302 }
303 
305 {
306  if (m_Gi2TitleMap.first == m_Oid)
307  return;
308 
309  map<TGi, string> gi2title;
310  x_InitDefline();
312  TGi gi(INVALID_GI);
313  ITERATE(CBlast_def_line::TSeqid, id, ((*bd)->GetSeqid())) {
314  if ((*id)->IsGi()) {
315  gi = (*id)->GetGi();
316  break;
317  }
318  }
319  if (gi != INVALID_GI) {
320  gi2title[gi] = (*bd)->GetTitle();
321  }
322  }
323  m_Gi2TitleMap.first = m_Oid;
324  m_Gi2TitleMap.second.swap(gi2title);
325  return;
326 }
327 
329  if (m_Gi != ZERO_GI)
330  {
331  x_SetGi2AccMap();
332  return m_Gi2AccMap.second[m_Gi];
333  }
334 
336  if (theId->IsGeneral() && theId->GetGeneral().GetDb() == "BL_ORD_ID") {
337  return NOT_AVAILABLE;
338  }
339  string acc;
340  theId->GetLabel(&acc, CSeq_id::eContent);
341  return acc;
342 }
343 
345  if (m_Gi != ZERO_GI)
346  {
348  return m_Gi2SeqIdMap.second[m_Gi];
349  }
350 
352  if (theId->IsGeneral() && theId->GetGeneral().GetDb() == "BL_ORD_ID") {
353  return NOT_AVAILABLE;
354  }
355  string retval;
356 
357  if (m_UseLongSeqIds) {
358  retval = theId->AsFastaString();
359 
360  // Remove "lcl|" on local ID.
361  if(theId->IsLocal())
362  retval = retval.erase(0, 4);
363  }
364  else {
365  retval = GetBareId(*theId);
366  }
367 
368  return retval;
369 }
370 
372  if (m_Gi != ZERO_GI)
373  {
375  return m_Gi2TitleMap.second[m_Gi];
376  }
377 
378  ITERATE(list <CRef <CSeqdesc> >, itr, m_Bioseq->GetDescr().Get()) {
379  if ((*itr)->IsTitle()) {
380  return (*itr)->GetTitle();
381  }
382  }
383  return NOT_AVAILABLE;
384 }
385 
388 }
389 
391  set<TTaxId> taxids;
392  x_ExtractLeafTaxIds(taxids);
393  if (taxids.empty()) {
394  return ExtractTaxId();
395  }
396  string retval;
397  ITERATE(set<TTaxId>, taxids_iter, taxids) {
398  if (retval.empty()) {
399  retval = NStr::NumericToString(*taxids_iter);
400  } else {
401  retval += SEPARATOR + NStr::NumericToString(*taxids_iter);
402  }
403  }
404  return retval;
405 }
406 
408  const TTaxId kTaxID = x_ExtractTaxId();
409  SSeqDBTaxInfo tax_info;
410  string retval(NOT_AVAILABLE);
411  try {
412  m_BlastDb.GetTaxInfo(kTaxID, tax_info);
413  _ASSERT(kTaxID == tax_info.taxid);
414  retval = tax_info.common_name;
415  } catch (...) {}
416  return retval;
417 }
418 
420  set<TTaxId> taxids;
421  x_ExtractLeafTaxIds(taxids);
422  SSeqDBTaxInfo tax_info;
423  string retval;
424  ITERATE(set<TTaxId>, taxid_iter, taxids) {
425  const TTaxId kTaxID = *taxid_iter;
426  try {
427  m_BlastDb.GetTaxInfo(kTaxID, tax_info);
428  _ASSERT(kTaxID == tax_info.taxid);
429  if (retval.empty()) {
430  retval = tax_info.common_name;
431  } else {
432  retval += SEPARATOR + tax_info.common_name;
433  }
434  } catch (...) {}
435  }
436  if (retval.empty()) {
438  } else {
439  return retval;
440  }
441 }
442 
444  const TTaxId kTaxID = x_ExtractTaxId();
445  SSeqDBTaxInfo tax_info;
446  string retval(NOT_AVAILABLE);
447  try {
448  m_BlastDb.GetTaxInfo(kTaxID, tax_info);
449  _ASSERT(kTaxID == tax_info.taxid);
450  retval = tax_info.scientific_name;
451  } catch (...) {}
452  return retval;
453 }
454 
456  set<TTaxId> taxids;
457  x_ExtractLeafTaxIds(taxids);
458  SSeqDBTaxInfo tax_info;
459  string retval;
460  ITERATE(set<TTaxId>, taxid_iter, taxids) {
461  const TTaxId kTaxID = *taxid_iter;
462  try {
463  m_BlastDb.GetTaxInfo(kTaxID, tax_info);
464  _ASSERT(kTaxID == tax_info.taxid);
465  if (retval.empty()) {
466  retval = tax_info.scientific_name;
467  } else {
468  retval += SEPARATOR + tax_info.scientific_name;
469  }
470  } catch (...) {}
471  }
472  if (retval.empty()) {
473  return ExtractScientificName();
474  } else {
475  return retval;
476  }
477 }
478 
480  const TTaxId kTaxID = x_ExtractTaxId();
481  SSeqDBTaxInfo tax_info;
482  string retval(NOT_AVAILABLE);
483  try {
484  m_BlastDb.GetTaxInfo(kTaxID, tax_info);
485  _ASSERT(kTaxID == tax_info.taxid);
486  retval = tax_info.blast_name;
487  } catch (...) {}
488  return retval;
489 }
490 
491 //string CBlastDBExtractor::ExtractBlastName() {
492 // set<int> taxids;
493 // x_ExtractTaxIds(taxids);
494 // SSeqDBTaxInfo tax_info;
495 // string retval;
496 // ITERATE(set<int>, taxid_iter, taxids) {
497 // const int kTaxID = *taxid_iter;
498 // try {
499 // m_BlastDb.GetTaxInfo(kTaxID, tax_info);
500 // _ASSERT(kTaxID == tax_info.taxid);
501 // if (retval.empty()) {
502 // retval = tax_info.blast_name;
503 // } else {
504 // retval += SEPARATOR + tax_info.blast_name;
505 // }
506 // } catch (...) {}
507 // }
508 // if (retval.empty()) {
509 // return string(NOT_AVAILABLE);
510 // } else {
511 // return retval;
512 // }
513 //}
514 
516  const TTaxId kTaxID = x_ExtractTaxId();
517  SSeqDBTaxInfo tax_info;
518  string retval(NOT_AVAILABLE);
519  try {
520  m_BlastDb.GetTaxInfo(kTaxID, tax_info);
521  _ASSERT(kTaxID == tax_info.taxid);
522  retval = tax_info.s_kingdom;
523  } catch (...) {}
524  return retval;
525 }
526 
527 //string CBlastDBExtractor::ExtractSuperKingdom() {
528 // set<int> taxids;
529 // x_ExtractTaxIds(taxids);
530 // SSeqDBTaxInfo tax_info;
531 // string retval;
532 // ITERATE(set<int>, taxid_iter, taxids) {
533 // const int kTaxID = *taxid_iter;
534 // try {
535 // m_BlastDb.GetTaxInfo(kTaxID, tax_info);
536 // _ASSERT(kTaxID == tax_info.taxid);
537 // if (retval.empty()) {
538 // retval = tax_info.s_kingdom;
539 // } else {
540 // retval += SEPARATOR + tax_info.s_kingdom;
541 // }
542 // } catch (...) {}
543 // }
544 // if (retval.empty()) {
545 // return string(NOT_AVAILABLE);
546 // } else {
547 // return retval;
548 // }
549 //}
550 
551  static const string kNoMasksFound = "none";
553 #if ((defined(NCBI_COMPILER_WORKSHOP) && (NCBI_COMPILER_VERSION <= 550)) || \
554  defined(NCBI_COMPILER_MIPSPRO))
555  return kNoMasksFound;
556 #else
557  CSeqDB::TSequenceRanges masked_ranges;
558  x_ExtractMaskingData(masked_ranges, m_FmtAlgoId);
559  if (masked_ranges.empty()) return kNoMasksFound;
560 
562  ITERATE(CSeqDB::TSequenceRanges, range, masked_ranges) {
563  out << range->first << "-" << range->second << SEPARATOR;
564  }
566 #endif
567 }
568 
570  string seq;
571  try {
573  CSeqDB::TSequenceRanges masked_ranges;
574  x_ExtractMaskingData(masked_ranges, m_FiltAlgoId);
575  ITERATE(CSeqDB::TSequenceRanges, mask, masked_ranges) {
576  transform(&seq[mask->first], &seq[mask->second],
577  &seq[mask->first], (int (*)(int))::tolower);
578  }
579  if (m_Strand == eNa_strand_minus) {
581  0, static_cast<ncbi::TSeqPos>(seq.size()));
582  }
583  } catch (CSeqDBException& e) {
584  //FIXME: change the enumeration when it's availble
586  e.GetErrCode() == CSeqDBException::eFileErr/*eOutOfRange*/) {
587  NCBI_THROW(CInvalidDataException, eInvalidRange, e.GetMsg());
588  }
589  throw;
590  }
591  return seq;
592 }
593 
596 }
597 
599  string seq;
601  return NStr::IntToString(CBlastSeqUtil::GetSeqHash(seq.c_str(), static_cast<ncbi::TSeqPos>(seq.size())));
602 }
603 
604 #define CTRL_A "\001"
605 
607 {
608  static const string kTarget(" >gi|");
609  static const string kCtrlA = string(CTRL_A) + string("gi|");
610  NON_CONST_ITERATE(CSeq_descr::Tdata, desc, bioseq->SetDescr().Set()) {
611  if ((*desc)->Which() == CSeqdesc::e_Title) {
612  NStr::ReplaceInPlace((*desc)->SetTitle(), kTarget, kCtrlA);
613  break;
614  }
615  }
616 }
617 
618 static string s_GetTitle(const CBioseq & bioseq)
619 {
620  _ASSERT(bioseq.CanGetDescr());
621  ITERATE(CSeq_descr::Tdata, desc, bioseq.GetDescr().Get()) {
622  if ((*desc)->Which() == CSeqdesc::e_Title) {
623  return (*desc)->GetTitle();
624  }
625  }
626  return string();
627 }
628 
629 /// Auxiliary function to format the defline for FASTA output format
630 static string
631 s_ConfigureDeflineTitle(const string& title, bool use_ctrl_a)
632 {
633  static const string kStandardSeparator(" >");
634  const string kSeparator(use_ctrl_a ? CTRL_A : kStandardSeparator);
635  string retval;
636  list<string> tokens;
637  NStr::Split(title, kStandardSeparator, tokens, NStr::fSplit_ByPattern);
638  int idx = 0;
639  for (auto token : tokens) {
640  if (idx++ == 0) {
641  retval += token;
642  continue;
643  }
644  SIZE_TYPE pos = token.find(' ');
645  const string kPossibleId(token, 0, pos != NPOS ? pos : token.length());
646  CBioseq::TId seqids;
647 
648  try {
649  CSeq_id::ParseIDs(seqids, kPossibleId, CSeq_id::fParse_PartialOK);
650  } catch (const CException&) {}
651 
652  if (!seqids.empty()) {
653  retval += kSeparator;
655  retval += GetBareId(*id);
656  if (pos != NPOS)
657  retval += token.substr(pos, token.length() - pos);
658  } else {
659  retval += kStandardSeparator + token;
660  }
661  }
662  return retval;
663 }
664 
666  stringstream out("");
667 
668  CFastaOstream fasta(out);
669  fasta.SetWidth(m_LineWidth);
671 
672  SetSeqId(id, true);
673 
674  if (m_UseCtrlA && m_UseLongSeqIds) {
676  }
677 
679 
680  // Handle the case when a sequence range is provided
683  if (m_SeqRange.NotEmpty()) {
684  range.Reset(new CSeq_loc(*seqid, m_SeqRange.GetFrom(),
687  } else {
688  TSeqPos length = m_Bioseq->GetLength();
689  range.Reset(new CSeq_loc(*seqid, 0, length-1, m_Strand));
691  }
692  }
693  // Handle any requests for masked FASTA
694  static const CFastaOstream::EMaskType kMaskType = CFastaOstream::eSoftMask;
695  CSeqDB::TSequenceRanges masked_ranges;
696  x_ExtractMaskingData(masked_ranges, m_FiltAlgoId);
697  if (!masked_ranges.empty()) {
698  CRef<CSeq_loc> masks(new CSeq_loc);
699  ITERATE(CSeqDB::TSequenceRanges, itr, masked_ranges) {
700  CRef<CSeq_loc> mask(new CSeq_loc(*seqid, itr->first, itr->second -1));
701  masks->SetMix().Set().push_back(mask);
702  }
703  fasta.SetMask(kMaskType, masks);
704  }
705 
706  try {
707  if (m_UseLongSeqIds) {
708  if (seqid->IsLocal()) {
709  string lcl_tmp = seqid->AsFastaString();
710  lcl_tmp = lcl_tmp.erase(0, 4);
711  out << ">" << lcl_tmp << " " << s_GetTitle(*m_Bioseq) << '\n';
713  fasta.WriteSequence(scope.AddBioseq(*m_Bioseq), range);
714  }
715  else {
716  fasta.Write(*m_Bioseq, range);
717  }
718  }
719  else {
720 
721  out << '>';
722  CRef<CSeq_id> id = FindBestChoice(m_Bioseq->GetId(), CSeq_id::Score);
723  out << GetBareId(*id);
724 
725  string title = s_GetTitle(*m_Bioseq.GetNonNullPointer());
726  out << ' ' << s_ConfigureDeflineTitle(title, m_UseCtrlA);
727  out << endl;
728 
729  CScope scope(*CObjectManager::GetInstance());
730  fasta.WriteSequence(scope.AddBioseq(*m_Bioseq), range);
731  }
732  }
733  catch (const CObjmgrUtilException& e) {
734  if (e.GetErrCode() == CObjmgrUtilException::eBadLocation) {
735  NCBI_THROW(CInvalidDataException, eInvalidRange,
736  "Invalid sequence range");
737  }
738  }
739  return out.str();
740 }
741 
742 TTaxId CBlastDBExtractor::x_ExtractTaxId()
743 {
744  x_SetGi();
745 
746  if (m_Gi != ZERO_GI) {
747  if (m_Gi2TaxidMap.first != m_Oid)
748  {
749  m_Gi2TaxidMap.first = m_Oid;
750  m_BlastDb.GetTaxIDs(m_Oid, m_Gi2TaxidMap.second);
751  }
752  return m_Gi2TaxidMap.second[m_Gi];
753  }
754  // for database without Gi:
755  vector<TTaxId> taxid;
756  m_BlastDb.GetTaxIDs(m_Oid, taxid);
757  return taxid.size() ? taxid[0] : ZERO_TAX_ID;
758 }
759 
760 void CBlastDBExtractor::x_ExtractLeafTaxIds(set<TTaxId>& taxids)
761 {
762  x_SetGi();
763 
764  if (m_Gi != ZERO_GI) {
765  if (m_Gi2TaxidSetMap.first != m_Oid)
766  {
767  m_Gi2TaxidSetMap.first = m_Oid;
768  m_BlastDb.GetLeafTaxIDs(m_Oid, m_Gi2TaxidSetMap.second);
769  }
770  taxids.clear();
771  const set<TTaxId>& taxid_set = m_Gi2TaxidSetMap.second[m_Gi];
772  taxids.insert(taxid_set.begin(), taxid_set.end());
773  return;
774  }
775  // for database without Gi:
776  vector<TTaxId> taxid;
777  m_BlastDb.GetLeafTaxIDs(m_Oid, taxid);
778  taxids.clear();
779  taxids.insert(taxid.begin(), taxid.end());
780 }
781 
782 void
783 CBlastDBExtractor::x_ExtractMaskingData(CSeqDB::TSequenceRanges &ranges,
784  int algo_id)
785 {
786  ranges.clear();
787  if (algo_id != -1) {
788  m_BlastDb.GetMaskData(m_Oid, algo_id, ranges);
789  }
790 }
791 
792 void CBlastDBExtractor::SetConfig(TSeqRange range, objects::ENa_strand strand,
793  int filt_algo_id)
794 {
795  m_OrigSeqRange = range;
796  m_Strand = strand;
797  m_FiltAlgoId = filt_algo_id;
798 }
799 
800 void CBlastDeflineUtil::ExtractDataFromBlastDeflineSet(const CBlast_def_line_set & dl_set,
801  vector<string> & results,
802  BlastDeflineFields fields,
803  string target_id,
804  bool use_long_id)
805 {
806  CSeq_id target_seq_id (target_id, CSeq_id::fParse_PartialOK | CSeq_id::fParse_Default);
807  Int8 num_id = NStr::StringToNumeric<Int8>(target_id, NStr::fConvErr_NoThrow);
808  bool can_be_gi = errno ? false: true;
809  ITERATE(CBlast_def_line_set::Tdata, itr, dl_set.Get()) {
810  ITERATE(CBlast_def_line::TSeqid, id, (*itr)->GetSeqid()) {
811  if ((*id)->Match(target_seq_id) || (can_be_gi && (*id)->IsGi() && ((*id)->GetGi() == GI_FROM(TIntId, num_id)))) {
812  CBlastDeflineUtil::ExtractDataFromBlastDefline( **itr, results, fields, use_long_id);
813  return;
814  }
815  }
816  }
817 
818  NCBI_THROW(CException, eInvalid, "Failed to find target id " + target_id);
819 }
820 
821 static string s_CheckName(const string & name)
822 {
823  if(name == "-") return NOT_AVAILABLE;
824  if(name == "unclassified") return NOT_AVAILABLE;
825 
826  return name;
827 }
828 
829 void CBlastDeflineUtil::ExtractDataFromBlastDefline(const CBlast_def_line & dl,
830  vector<string> & results,
831  BlastDeflineFields fields,
832  bool use_long_id)
833 {
834  results.clear();
835  results.resize(CBlastDeflineUtil::max_index, kEmptyStr);
836  if (fields.gi == 1) {
837  results[CBlastDeflineUtil::gi] = NOT_AVAILABLE;
838  ITERATE(CBlast_def_line::TSeqid, id, dl.GetSeqid()) {
839  if ((*id)->IsGi()) {
840  TGi gi = (*id)->GetGi();
841  results[CBlastDeflineUtil::gi] = NStr::NumericToString(gi);
842  break;
843  }
844  }
845  }
846  if ((fields.accession == 1) || (fields.seq_id == 1)) {
847  CRef<CSeq_id> theId = FindBestChoice(dl.GetSeqid(), CSeq_id::WorstRank);
848  if(fields.seq_id == 1) {
849  results[CBlastDeflineUtil::seq_id] = theId->AsFastaString();
850  }
851  if(fields.accession == 1) {
852  results[CBlastDeflineUtil::accession] = GetBareId(*theId);
853  }
854  }
855  if(fields.title == 1) {
856  if(dl.IsSetTitle()) {
857  results[CBlastDeflineUtil::title] = dl.GetTitle();
858  }
859  else {
860  results[CBlastDeflineUtil::title] = NOT_AVAILABLE;
861  }
862  }
863  if ((fields.tax_id == 1) || (fields.tax_names == 1)) {
864  TTaxId tax_id = ZERO_TAX_ID;
865  if (dl.IsSetTaxid()) {
866  tax_id = dl.GetTaxid();
867  }
868 
869  if (fields.tax_id == 1) {
870  results[CBlastDeflineUtil::tax_id] = NStr::NumericToString(tax_id);
871  }
872 
873  if (fields.tax_names == 1) {
874  try {
875  SSeqDBTaxInfo taxinfo;
876  CSeqDB::GetTaxInfo(tax_id, taxinfo);
877  results[CBlastDeflineUtil::scientific_name] = taxinfo.scientific_name;
878  results[CBlastDeflineUtil::common_name] = taxinfo.common_name;
879  results[CBlastDeflineUtil::blast_name] = s_CheckName(taxinfo.blast_name);
880  results[CBlastDeflineUtil::super_kingdom] = s_CheckName(taxinfo.s_kingdom);
881  } catch (const CException&) {
882  results[CBlastDeflineUtil::scientific_name] = NOT_AVAILABLE;
883  results[CBlastDeflineUtil::common_name] = NOT_AVAILABLE;
884  results[CBlastDeflineUtil::blast_name] = NOT_AVAILABLE;
885  results[CBlastDeflineUtil::super_kingdom] = NOT_AVAILABLE;
886  }
887  }
888  }
889 
890  if ((fields.leaf_node_tax_ids == 1) || (fields.leaf_node_tax_names == 1)) {
891  set<TTaxId> tax_id_set = dl.GetLeafTaxIds();
892  if (tax_id_set.empty()) {
893  if (dl.IsSetTaxid()) {
894  tax_id_set.insert(dl.GetTaxid());
895  }
896  else {
897  tax_id_set.insert(ZERO_TAX_ID);
898  }
899  }
900 
901  string separator = kEmptyStr;
902  ITERATE(set<TTaxId>, itr, tax_id_set) {
903  if (fields.leaf_node_tax_names == 1) {
904  try {
905  SSeqDBTaxInfo taxinfo;
906  CSeqDB::GetTaxInfo(*itr, taxinfo);
907  results[CBlastDeflineUtil::leaf_node_scientific_names] += separator + taxinfo.scientific_name;
908  results[CBlastDeflineUtil::leaf_node_common_names] += separator + taxinfo.common_name;
909  } catch (const CException&) {
910  results[CBlastDeflineUtil::leaf_node_scientific_names] += separator + NOT_AVAILABLE;
911  results[CBlastDeflineUtil::leaf_node_common_names] += separator + NOT_AVAILABLE;
912  }
913  }
914  results[CBlastDeflineUtil::leaf_node_tax_ids] += separator + NStr::NumericToString(*itr);
915  separator = SEPARATOR;
916  }
917  }
918 
919  if (fields.membership == 1) {
920  int membership = 0;
921  if(dl.IsSetMemberships()) {
922  ITERATE(CBlast_def_line::TMemberships, memb_int, dl.GetMemberships()) {
923  membership += *memb_int;
924  }
925  }
926  results[CBlastDeflineUtil::membership] = NStr::NumericToString(membership);
927  }
928 
929  if (fields.pig == 1) {
930  int pig = -1;
931  if (dl.IsSetOther_info()) {
932  ITERATE(CBlast_def_line::TOther_info, itr, dl.GetOther_info()) {
933  if (*itr != -1) {
934  pig = *itr;
935  break;
936  }
937  }
938  }
939  results[CBlastDeflineUtil::pig] = NStr::NumericToString(pig);
940  }
941  if(fields.links == 1) {
942  if (dl.IsSetLinks()) {
943  ITERATE(CBlast_def_line::TLinks, links_int, dl.GetLinks()) {
944  results[CBlastDeflineUtil::links] += NStr::NumericToString(*links_int) + SEPARATOR;
945  }
946  }
947  else {
948  results[CBlastDeflineUtil::links] = NOT_AVAILABLE;
949  }
950  }
951 
952  if(fields.asn_defline == 1) {
953  CNcbiOstrstream tmp;
954  tmp << MSerial_AsnText << dl;
955  results[CBlastDeflineUtil::asn_defline] = CNcbiOstrstreamToString(tmp);
956  }
957 }
958 
959 void CBlastDeflineUtil::ProcessFastaDeflines(
960  CBioseq & bioseq,
961  string & out,
962  bool use_ctrla
963 )
964 {
965  out = kEmptyStr;
966  const CSeq_id* id = bioseq.GetFirstId();
967  if (!id) {
968  return;
969  }
970  if (id->IsGeneral() && id->GetGeneral().GetDb() == "BL_ORD_ID") {
971  out = ">" + s_GetTitle(bioseq) + '\n';
972  }
973  else if (id->IsLocal()) {
974  string lcl_tmp = id->AsFastaString();
975  lcl_tmp = lcl_tmp.erase(0,4);
976  out = ">" + lcl_tmp + ' ' + s_GetTitle(bioseq) + '\n';
977  } else {
978  out = '>';
979  id = FindBestChoice(bioseq.GetId(), CSeq_id::Score);
980  out += GetBareId(*id) + ' ';
981 
982  string title = s_GetTitle(bioseq);
983  out += s_ConfigureDeflineTitle(title, use_ctrla);
984  out += '\n';
985  }
986 }
987 
988 void CBlastDeflineUtil::ProcessFastaDeflines(
989  CBioseq & bioseq,
990  string & out,
991  bool use_ctrla,
992  const CSeq_loc* location,
993  ENa_strand strand
994 )
995 {
996  out = kEmptyStr;
997  const CSeq_id* id = bioseq.GetFirstId();
998  if (!id) {
999  return;
1000  }
1001  string range;
1002  if (location != NULL) {
1003  TSeqPos start = location->GetStart(eExtreme_Biological) + 1;
1004  TSeqPos stop = location->GetStop(eExtreme_Biological) + 1;
1005  if (strand == eNa_strand_minus) {
1006  range = ":c"
1007  + NStr::IntToString(stop) + "-" + NStr::IntToString(start)
1008  + " ";
1009  } else {
1010  range = ":"
1011  + NStr::IntToString(start) + "-" + NStr::IntToString(stop)
1012  + " ";
1013  }
1014  }
1015  if (id->IsGeneral() && id->GetGeneral().GetDb() == "BL_ORD_ID") {
1016  out = ">" + range + s_GetTitle(bioseq) + '\n';
1017  }
1018  else if (id->IsLocal()) {
1019  string lcl_tmp = id->AsFastaString();
1020  lcl_tmp = lcl_tmp.erase(0,4);
1021  out = ">" + lcl_tmp + (range.empty() ? " " : range)
1022  + s_GetTitle(bioseq) + '\n';
1023  } else {
1024  out = '>';
1025  id = FindBestChoice(bioseq.GetId(), CSeq_id::Score);
1026  out += GetBareId(*id) + (range.empty() ? " " : range);
1027 
1028  string title = s_GetTitle(bioseq);
1029  out += s_ConfigureDeflineTitle(title, use_ctrla);
1030  out += '\n';
1031  }
1032 }
1033 
1034 // Calculates hash for a buffer in IUPACna (NCBIeaa for proteins) format.
1035 // NOTE: if sequence is in a different format, the function below can be modified to convert
1036 // each byte into IUPACna encoding on the fly.
1037 Uint4 CBlastSeqUtil::GetSeqHash(const char* buffer, int length)
1038 {
1039  CChecksum crc(CChecksum::eCRC32ZIP);
1040 
1041  for(int ii = 0; ii < length; ii++) {
1042  if (buffer[ii] != '\n')
1043  crc.AddChars(buffer+ii,1);
1044  }
1045  return (crc.GetChecksum() ^ (0xFFFFFFFFL));
1046 }
1047 
1048 void CBlastSeqUtil::ApplySeqMask(string & seq, const CSeqDB::TSequenceRanges & masks, const TSeqRange r)
1049 {
1050  if(r.Empty()) {
1051  ITERATE(CSeqDB::TSequenceRanges, itr, masks) {
1052  transform(&seq[itr->first], &seq[itr->second],
1053  &seq[itr->first], (int (*)(int))::tolower);
1054  }
1055  }
1056  else {
1057  const TSeqPos r_from = r.GetFrom();
1058  ITERATE(CSeqDB::TSequenceRanges, itr, masks) {
1059  TSeqRange mask (*itr);
1060  if(mask.GetFrom() > r.GetTo()) {
1061  break;
1062  }
1063  TSeqRange tmp = r.IntersectionWith(mask);
1064  if(!tmp.Empty()) {
1065  transform(&seq[tmp.GetFrom() -r_from], &seq[tmp.GetToOpen() - r_from],
1066  &seq[tmp.GetFrom() -r_from], (int (*)(int))::tolower);
1067  }
1068  }
1069  }
1070 }
1071 
1072 void CBlastSeqUtil::GetReverseStrandSeq(string & seq)
1073 {
1074  CSeqManip::ReverseComplement(seq, CSeqUtil::e_Iupacna, 0, static_cast<ncbi::TSeqPos>(seq.size()));
1075 }
1076 
1077 string CBlastSeqUtil::GetMasksString(const CSeqDB::TSequenceRanges & masks)
1078 {
1079  if (masks.empty()) {
1080  return kNoMasksFound;
1081  }
1082  CNcbiOstrstream out;
1083  ITERATE(CSeqDB::TSequenceRanges, range, masks) {
1084  out << range->first << "-" << range->second << SEPARATOR;
1085  }
1086  return CNcbiOstrstreamToString(out);
1087 }
1088 
1089 END_NCBI_SCOPE
USING_SCOPE(objects)
static void s_ReplaceCtrlAsInTitle(CRef< CBioseq > bioseq)
static string s_GetTitle(const CBioseq &bioseq)
static string s_ConfigureDeflineTitle(const string &title, bool use_ctrl_a)
Auxiliary function to format the defline for FASTA output format.
static const string kNoMasksFound
#define NOT_AVAILABLE
string GetBareId(const CSeq_id &id)
Definition: seq_writer.cpp:256
#define CTRL_A
#define SEPARATOR
Declares classes which extract data from a BLAST database.
ncbi::TMaskedQueryRegions mask
void transform(Container &c, UnaryFunction *op)
Definition: chainer.hpp:86
Checksum and hash calculation classes.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
pair< TOID, CSeqDB::TPIG > m_Oid2Pig
Pair with a pig for one Oid.
pair< TOID, map< TGi, string > > m_Gi2SeqIdMap
void x_ExtractLeafTaxIds(set< TTaxId > &taxids)
int m_FmtAlgoId
filtering algorithsm for outfmt
objects::ENa_strand m_Strand
strand
string ExtractLeafCommonTaxonomicNames()
void x_SetGi()
Setting the target_only m_Gi.
void x_SetGi2AccMap()
Sets the map.
int m_FiltAlgoId
filtering algorithsm for sequence
pair< TOID, map< TGi, string > > m_Gi2AccMap
Pair with a gi2accesion map for one Oid.
void x_SetGi2TitleMap()
Sets the map.
TOID m_Oid
OID of the record.
TSeqRange m_OrigSeqRange
sequence range
CRef< CBioseq > m_Bioseq
bioseq
int m_LineWidth
FASTA output line width.
void x_ExtractMaskingData(CSeqDB::TSequenceRanges &ranges, int algo_id)
bool m_UseCtrlA
Replace with ctrl_a? (used only with f)
bool m_UseLongSeqIds
Use long sequence ids (with gi and accessions with database source)
void x_InitDefline()
Initialize the cached defline.
CSeqDB & m_BlastDb
underlying Blast database
string ExtractFasta(const CBlastDBSeqId &seq_id)
bool m_TargetOnly
Should the record contain mutilple seqids? (used only with f)
TSeqRange m_SeqRange
sequence range
CRef< CBlast_def_line_set > m_Defline
Cache the defline (for membership bits)
pair< TOID, map< TGi, string > > m_Gi2TitleMap
Pair with a gi2title map for one Oid.
void SetSeqId(const CBlastDBSeqId &seq_id, bool get_data=false)
Setting seqid.
Encapsulates identifier to retrieve data from a BLAST database.
static Uint4 GetSeqHash(const char *buffer, int length)
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
Definition: sequence.hpp:770
Defines invalid user input exceptions.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CScope –.
Definition: scope.hpp:92
CSeqDBException.
Definition: seqdbcommon.hpp:73
@ eFileErr
Files were missing or contents were incorrect.
Definition: seqdbcommon.hpp:81
@ eArgErr
Argument validation failed.
Definition: seqdbcommon.hpp:78
int TPIG
Sequence type accepted and returned for PIG indices.
Definition: seqdb.hpp:219
bool OidToPig(int oid, int &pig) const
Translate an OID to a PIG.
Definition: seqdb.cpp:790
bool PigToOid(int pig, int &oid) const
Translate a PIG to an OID.
Definition: seqdb.cpp:781
void GetSequenceAsString(int oid, CSeqUtil::ECoding coding, string &output, TSeqRange range=TSeqRange()) const
Get a sequence in a given encoding.
Definition: seqdb.cpp:1141
int GetSeqLength(int oid) const
Returns the sequence length in base pairs or residues.
Definition: seqdb.cpp:400
CRef< CBioseq > GetBioseq(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence.
Definition: seqdb.cpp:504
CRef< CBioseq > GetBioseqNoData(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence without sequence data.
Definition: seqdb.cpp:514
void AccessionToOids(const string &acc, vector< int > &oids) const
Translate an Accession to a list of OIDs.
Definition: seqdb.cpp:870
static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo &info)
Get taxonomy information.
Definition: seqdb.cpp:1105
CRef< CBlast_def_line_set > GetHdr(int oid) const
Get the ASN.1 header for the sequence.
Definition: seqdb.cpp:418
static CRef< CBlast_def_line_set > ExtractBlastDefline(const CBioseq &bioseq)
Extract a Blast-def-line-set object from a Bioseq retrieved by CSeqDB.
Definition: seqdbvol.cpp:1247
bool GiToOid(TGi gi, int &oid) const
Translate a GI to an OID.
Definition: seqdb.cpp:808
static SIZE_TYPE ReverseComplement(const string &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst)
@ e_Iupacna
Definition: sequtil.hpp:47
Definition: map.hpp:338
bool empty() const
Definition: set.hpp:133
std::ofstream out("events_result.xml")
main entry point for tests
#define INVALID_GI
Definition: ncbimisc.hpp:1089
#define GI_FROM(T, value)
Definition: ncbimisc.hpp:1086
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
Int8 TIntId
Definition: ncbimisc.hpp:999
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define ZERO_GI
Definition: ncbimisc.hpp:1088
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)
Parse a string representing one or more Seq-ids, appending the results to IDS.
Definition: Seq_id.cpp:2613
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
static int WorstRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:776
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
Definition: Seq_id.hpp:772
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:774
@ fParse_PartialOK
Warn rather than throwing an exception when a FASTA-style ID set contains unparsable portions,...
Definition: Seq_id.hpp:80
@ fParse_Default
By default in ParseIDs and IsValid, allow raw parsable non-numeric accessions and plausible local acc...
Definition: Seq_id.hpp:102
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
void SetMix(TMix &v)
Definition: Seq_loc.hpp:987
void SetMask(EMaskType type, CConstRef< CSeq_loc > location)
Definition: sequence.cpp:3450
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
Definition: sequence.cpp:2727
void SetWidth(TSeqPos width)
Definition: sequence.cpp:3456
EMaskType
Which residues to mask out in subsequent output.
Definition: sequence.hpp:847
virtual void WriteSequence(const CBioseq_Handle &handle, const CSeq_loc *location=0, CSeq_loc::EOpFlags merge_flags=CSeq_loc::fMerge_AbuttingOnly)
Definition: sequence.cpp:3322
void SetFlag(EFlags flag)
Definition: sequence.hpp:859
void SetAllFlags(TFlags flags)
Definition: sequence.hpp:858
void ResetFlag(EFlags flag)
Definition: sequence.hpp:860
@ fKeepGTSigns
don't convert '>' to '_' in title
Definition: sequence.hpp:777
@ fSuppressRange
never include location details in defline
Definition: sequence.hpp:775
@ fEnableGI
Use this flag to enable GI output in the defline.
Definition: sequence.hpp:786
@ eSoftMask
write as lowercase rather than uppercase
Definition: sequence.hpp:848
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
bool NotEmpty(void) const
Definition: range.hpp:152
static position_type GetPositionMax(void)
Definition: range.hpp:250
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
#define NPOS
Definition: ncbistr.hpp:133
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5086
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3396
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
@ fSplit_ByPattern
Require full delimiter strings.
Definition: ncbistr.hpp:2504
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
list< CRef< CSeq_id > > TSeqid
const Tdata & Get(void) const
Get the member data.
list< CRef< CBlast_def_line > > Tdata
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
bool IsGeneral(void) const
Check if variant General is selected.
Definition: Seq_id_.hpp:877
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
@ eNa_strand_other
Definition: Na_strand_.hpp:70
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
bool CanGetDescr(void) const
Check if it is safe to call GetDescr method.
Definition: Bioseq_.hpp:309
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Bioseq_.hpp:315
@ e_Title
a title for this sequence
Definition: Seqdesc_.hpp:115
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
static string kSeparator
Definition: id2info.cpp:145
yy_size_t n
range(_Ty, _Ty) -> range< _Ty >
int tolower(Uchar c)
Definition: ncbictype.hpp:72
Useful/utility classes and methods.
The Object manager core.
bool IsStringId(const CSeq_id &id)
Determine if id is srting id.
List of sequence offset ranges.
Definition: seqdb.hpp:236
bool empty() const
Definition: seqdb.hpp:272
SSeqDBTaxInfo.
string common_name
Common name, such as "noisy night monkey".
string blast_name
A simple category name, such as "birds".
string s_kingdom
A string of length 1 indicating the "Super Kingdom".
string scientific_name
Scientific name, such as "Aotus vociferans".
TTaxId taxid
An identifier for this species or taxonomic group.
#define _ASSERT
Modified on Wed Sep 04 15:02:23 2024 by modify_doxy.py rev. 669887