NCBI C++ ToolKit
context.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: context.cpp 99483 2023-04-04 17:43:43Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aaron Ucko, NCBI
27 * Mati Shomrat
28 *
29 * File Description:
30 * new (early 2003) flat-file generator -- context needed when (pre)formatting
31 *
32 * ===========================================================================
33 */
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
38 #include <objects/seq/Bioseq.hpp>
39 #include <objects/seq/Seq_ext.hpp>
40 #include <objects/seq/Seg_ext.hpp>
50 
51 #include <objmgr/scope.hpp>
52 #include <objmgr/bioseq_handle.hpp>
54 #include <objmgr/seq_entry_ci.hpp>
55 #include <objmgr/seqdesc_ci.hpp>
56 #include <objmgr/util/sequence.hpp>
58 #include <objmgr/seq_map.hpp>
59 #include <objmgr/seq_map_ci.hpp>
60 #include <objmgr/feat_ci.hpp>
61 #include <objmgr/bioseq_ci.hpp>
62 #include <objmgr/annot_ci.hpp>
63 
65 
66 
69 USING_SCOPE(sequence);
70 
71 
72 /////////////////////////////////////////////////////////////////////////////
73 //
74 // CBioseqContext
75 
76 // constructor
78 (const CBioseq_Handle& seq,
79  CFlatFileContext& ffctx,
80  CMasterContext* mctx,
81  CTopLevelSeqEntryContext *tlsec) :
82  m_Handle(seq),
83  m_pOpticalMapPoints(nullptr),
84  m_Repr(CSeq_inst::eRepr_not_set),
85  m_Mol(CSeq_inst::eMol_not_set),
86  m_HasParts(false),
87  m_IsPart(false),
88  m_PartNumber(0),
89  m_IsDeltaLitOnly(false),
90  m_IsProt(false),
91  m_IsInSGS(false),
92  m_IsInGPS(false),
93  m_IsInNucProt(false),
94  m_IsGED(false),
95  m_IsGenbank(false),
96  m_IsEMBL(false),
97  m_IsDDBJ(false),
98  m_IsPDB(false),
99  m_IsSP(false),
100  m_IsTPA(false),
101  m_IsJournalScan(false),
102  m_IsRefSeq(false),
103  m_RefseqInfo(0),
104  m_IsGbGenomeProject(false), // GenBank Genome project data (AE)
105  m_IsNcbiCONDiv(false), // NCBI CON division (CH)
106  m_IsNcbiGenomes(false),
107  m_IsPatent(false),
108  m_IsGI(false),
109  m_IsWGS(false),
110  m_IsWGSMaster(false),
111  m_IsTSA(false),
112  m_IsTSAMaster(false),
113  m_IsTLS(false),
114  m_IsTLSMaster(false),
115  m_IsHup(false),
116  m_Gi(ZERO_GI),
117  m_ShowGBBSource(false),
118  m_PatSeqid(0),
119  m_HasOperon(false),
120  m_HasMultiIntervalGenes(true), // true is the safe choice if we're not sure
121  m_IsGenomeAssembly(false),
122  m_IsCrossKingdom(false),
123  m_UsePDBCompoundForComment(false),
124  m_fUnverified(fUnverified_None),
125  m_fUnreviewed(fUnreviewed_None),
126  m_ShowAnnotCommentAsCOMMENT(false),
127  m_ShowAnnotCommentAsCOMMENT_checked(false),
128  m_FFCtx(ffctx),
129  m_RefCache(nullptr),
130  m_Master(mctx),
131  m_TLSeqEntryCtx(tlsec)
132 {
133  x_Init(seq, m_FFCtx.GetLocation());
134 }
135 
136 
138 (const CBioseq_Handle& prev_seq,
139  const CBioseq_Handle& seq,
140  const CBioseq_Handle& next_seq,
141  CFlatFileContext& ffctx,
142  CMasterContext* mctx,
143  CTopLevelSeqEntryContext *tlsec) :
144  m_PrevHandle(prev_seq),
145  m_Handle(seq),
146  m_NextHandle(next_seq),
147  m_pOpticalMapPoints(nullptr),
148  m_Repr(CSeq_inst::eRepr_not_set),
149  m_Mol(CSeq_inst::eMol_not_set),
150  m_HasParts(false),
151  m_IsPart(false),
152  m_PartNumber(0),
153  m_IsDeltaLitOnly(false),
154  m_IsProt(false),
155  m_IsInSGS(false),
156  m_IsInGPS(false),
157  m_IsInNucProt(false),
158  m_IsGED(false),
159  m_IsGenbank(false),
160  m_IsEMBL(false),
161  m_IsDDBJ(false),
162  m_IsPDB(false),
163  m_IsSP(false),
164  m_IsTPA(false),
165  m_IsJournalScan(false),
166  m_IsRefSeq(false),
167  m_RefseqInfo(0),
168  m_IsGbGenomeProject(false), // GenBank Genome project data (AE)
169  m_IsNcbiCONDiv(false), // NCBI CON division (CH)
170  m_IsNcbiGenomes(false),
171  m_IsPatent(false),
172  m_IsGI(false),
173  m_IsWGS(false),
174  m_IsWGSMaster(false),
175  m_IsTSA(false),
176  m_IsTSAMaster(false),
177  m_IsTLS(false),
178  m_IsTLSMaster(false),
179  m_IsHup(false),
180  m_Gi(ZERO_GI),
181  m_ShowGBBSource(false),
182  m_PatSeqid(0),
183  m_HasOperon(false),
184  m_HasMultiIntervalGenes(true), // true is the safe choice if we're not sure
185  m_IsGenomeAssembly(false),
186  m_IsCrossKingdom(false),
187  m_UsePDBCompoundForComment(false),
188  m_fUnverified(fUnverified_None),
189  m_fUnreviewed(fUnreviewed_None),
190  m_ShowAnnotCommentAsCOMMENT(false),
191  m_ShowAnnotCommentAsCOMMENT_checked(false),
192  m_FFCtx(ffctx),
193  m_RefCache(nullptr),
194  m_Master(mctx),
195  m_TLSeqEntryCtx(tlsec)
196 {
197  x_Init(seq, m_FFCtx.GetLocation());
198 }
199 
200 
201 // destructor
203 {
204  if (m_Virtual) {
206  }
207 }
208 
209 
211 {
212  if ( id.IsGi() && id.GetGi() == m_Gi ) {
214  }
215 
216  CSeq_id_Handle idh =
218  return idh;
219 }
220 
221 
222 
223 // initialization
224 void CBioseqContext::x_Init(const CBioseq_Handle& seq, const CSeq_loc* user_loc)
225 {
226  _ASSERT(seq);
227  _ASSERT(seq.IsSetInst());
228 
229  // NB: order of execution is important
230  m_Repr = x_GetRepr();
231  m_Mol = seq.GetInst_Mol();
233  x_SetId();
234 
235  if ( IsSegmented() ) {
237  }
238  m_IsPart = x_IsPart();
239  if ( m_IsPart ) {
240  _ASSERT(m_Master);
242  }
243  if ( IsDelta() ) {
245  }
246 
248 
249  m_IsInSGS = x_IsInSGS();
250  m_IsInGPS = x_IsInGPS();
252 
253  x_SetLocation(user_loc);
255 
258 
259  // m_HasOperon = x_HasOperon();
260 
261  if (IsRefSeq()) {
263  }
264 
266  sel.SetResolveAll();
267 
268  // x_SetHasMultiIntervalGenes();
269 
270  // x_SetTaxname();
271 
273 }
274 
275 
277 {
278  CRef<CSeq_loc> loc;
279 
280  if (user_loc) {
281  // map the user location to the current bioseq
284  if ( !sequence::IsSameBioseq(idh1, idh2, &m_Handle.GetScope()) ) {
286  loc.Reset(mapper.Map(*user_loc));
287  } else {
288  loc.Reset(new CSeq_loc);
289  loc->Assign(*user_loc);
290  }
291 
292  if (loc) {
293  if (loc->IsWhole()) {
294  loc.Reset();
295  } else if (loc->IsInt()) {
297  if (!IsReverse(loc->GetStrand()) && range.GetFrom() == 0 && range.GetTo() == m_Handle.GetInst_Length() - 1) {
298  loc.Reset();
299  }
300  }
301  }
302  }
303 
304  // if no partial location specified do the entire bioseq
305  if (!loc) {
306  loc.Reset(new CSeq_loc);
307  loc->SetWhole(*m_PrimaryId);
308  } else {
309  x_SetMapper(*loc);
310  }
311 
312  m_Location = loc;
313 }
314 
315 
317 {
319 
320  // not covering the entire bioseq (may be multiple ranges)
321  CRef<CBioseq> vseq(new CBioseq(loc, GetAccession()));
322  vseq->SetInst().SetRepr(CSeq_inst::eRepr_virtual);
323  CBioseq_Handle vseqh = GetScope().AddBioseq(*vseq);
324 
325  if (vseqh) {
329  //m_Mapper->KeepNonmappingRanges();
330  }
331 }
332 
334 {
335  m_HasMultiIntervalGenes = false;
336 
338 
339  CFeat_CI gene_ci( m_Handle, sel );
340  for( ; gene_ci ; ++gene_ci ) {
341  switch( gene_ci->GetLocation().Which() ) {
344  case CSeq_loc::e_Mix:
345  case CSeq_loc::e_Equiv:
347  break;
348  default:
349  // do nothing
350  break;
351  }
353  break;
354  }
355  }
356 }
357 
359 {
360  if (UsingSeqEntryIndex()) {
362  if (! idx) return false;
364  if (! bsx) return false;
365  return bsx->HasMultiIntervalGenes();
366  }
367 
370 }
371 
373 {
374  // look for taxname in Seqdescs
375  int num_super_kingdom = 0;
376  bool super_kingdoms_different = false;
377  string super_kingdom_name;
379  for( ; desc_ci; ++desc_ci ) {
380  if( desc_ci->IsSource() ) {
381  const CBioSource &bsrc = desc_ci->GetSource();
382  if (bsrc.IsSetOrgname()) {
383  const COrgName& onp = bsrc.GetOrgname();
384  if (onp.IsSetName()) {
385  const COrgName::TName& nam = onp.GetName();
386  if (nam.IsPartial()) {
387  const CPartialOrgName& pon = nam.GetPartial();
388  if (pon.IsSet()) {
389  const CPartialOrgName::Tdata& tx = pon.Get();
390  ITERATE (CPartialOrgName::Tdata, itr, tx) {
391  const CTaxElement& te = **itr;
392  if (te.IsSetFixed_level()) {
393  if (te.GetFixed_level() == 0 && te.IsSetLevel()) {
394  const string& lvl = te.GetLevel();
395  if (NStr::EqualNocase (lvl, "superkingdom")) {
396  num_super_kingdom++;
397  if (super_kingdom_name.empty() && te.IsSetName()) {
398  super_kingdom_name = te.GetName();
399  } else if (te.IsSetName() && ! NStr::EqualNocase (super_kingdom_name, te.GetName())) {
400  super_kingdoms_different = true;
401  }
402  if (num_super_kingdom > 1 && super_kingdoms_different) {
403  m_IsCrossKingdom = true;
404  }
405  }
406  }
407  }
408  }
409  }
410  }
411  }
412  }
413  if( bsrc.IsSetTaxname() && ! bsrc.GetTaxname().empty() ) {
414  // we found a taxname; but need to look at all descriptors to set m_IsCrossKingdom, so keep going
415  m_Taxname = bsrc.GetTaxname();
416  // return;
417  }
418  }
419  }
420 
421  if (! m_Taxname.empty()) {
422  return;
423  }
424 
425  // fall back on the Seq-feats
426 
427  SAnnotSelector sel;
429 
430  CFeat_CI biosrc_ci( m_Handle, sel );
431  for( ; biosrc_ci ; ++biosrc_ci ) {
432  CConstRef<CSeq_feat> seq_feat = biosrc_ci->GetSeq_feat();
433  if( seq_feat && seq_feat->IsSetData() ) {
434  const CSeqFeatData & seq_feat_data = seq_feat->GetData();
435  if( seq_feat_data.IsBiosrc() ) {
436  const CBioSource & bsrc = seq_feat_data.GetBiosrc();
437  if( bsrc.IsSetTaxname() && ! bsrc.GetTaxname().empty() ) {
438  // we found a taxname; we're done
439  m_Taxname = bsrc.GetTaxname();
440  return;
441  }
442  }
443  }
444  }
445 }
446 
447 const string& CBioseqContext::GetTaxname(void) const
448 {
449  // check for indexed version first
450  if (UsingSeqEntryIndex()) {
452  if (idx) {
454  if (bsx) {
455  m_Taxname = bsx->GetTaxname();
456  }
457  }
458  return m_Taxname;
459  }
460 
461  x_SetTaxname();
462  return m_Taxname;
463 }
464 
465 
467 {
468  // check for indexed version first
469  if (UsingSeqEntryIndex()) {
471  if (idx) {
473  if (bsx) {
475  }
476  }
477  return m_IsCrossKingdom;
478  }
479 
480  x_SetTaxname();
481  return m_IsCrossKingdom;
482 }
483 
484 
486 {
488 }
489 
490 
492 {
494 }
495 
496 
498 {
499  if ( ! FIELD_IS_SET_AND_IS(uo, Type, Str) ||
500  ! NStr::EqualNocase(uo.GetType().GetStr(), "FileTrack"))
501  {
502  return;
503  }
504 
505  CConstRef<CUser_field> pFileTrackURLField = uo.GetFieldRef("FileTrackURL");
506  if( ! pFileTrackURLField ) {
507  pFileTrackURLField = uo.GetFieldRef("Map-FileTrackURL");
508  }
509  if ( pFileTrackURLField) {
510  if ( FIELD_IS_SET_AND_IS(*pFileTrackURLField, Data, Str) ) {
511  if ( ! pFileTrackURLField->GetData().GetStr().empty() ) {
512  m_FiletrackURL = pFileTrackURLField->GetData().GetStr();
513  }
514  } else if ( FIELD_IS_SET_AND_IS(*pFileTrackURLField, Data, Strs) ) {
515  const vector< string > & strs = pFileTrackURLField->GetData().GetStrs();
516  FOR_EACH_STRING_IN_VECTOR (itr, strs) {
517  string str = *itr;
518  if ( ! str.empty() ) {
520  }
521  }
522  }
523  }
524 
525  CConstRef<CUser_field> pBaseModURLField = uo.GetFieldRef("BaseModification-FileTrackURL");
526  if ( pBaseModURLField) {
527  if ( FIELD_IS_SET_AND_IS(*pBaseModURLField, Data, Str) ) {
528  if ( ! pBaseModURLField->GetData().GetStr().empty() ) {
529  m_BasemodURLs.push_back(pBaseModURLField->GetData().GetStr());
530  }
531  } else if ( FIELD_IS_SET_AND_IS(*pBaseModURLField, Data, Strs) ) {
532  m_BasemodURLs = pBaseModURLField->GetData().GetStrs();
533  }
534  }
535 }
536 
538 {
539  if ( ! FIELD_IS_SET_AND_IS(uo, Type, Str) ||
540  ! NStr::EqualNocase(uo.GetType().GetStr(), "AuthorizedAccess"))
541  {
542  return;
543  }
544  CConstRef<CUser_field> pAuthorizedAccessField =
545  uo.GetFieldRef("Study");
546  if( ! pAuthorizedAccessField ||
547  ! FIELD_IS_SET_AND_IS(*pAuthorizedAccessField, Data, Str) ||
548  pAuthorizedAccessField->GetData().GetStr().empty() )
549  {
550  return;
551  }
552  m_AuthorizedAccess = pAuthorizedAccessField->GetData().GetStr();
553 }
554 
556 {
557  if( GetRepr() != CSeq_inst::eRepr_map ||
558  ! FIELD_IS_SET_AND_IS(m_Handle, Inst_Ext, Map) )
559  {
560  return;
561  }
562 
563  const CMap_ext & map_ext = m_Handle.GetInst_Ext().GetMap();
564  FOR_EACH_SEQFEAT_ON_MAPEXT(feat_it, map_ext ) {
565  const CSeq_feat & feat = **feat_it;
566  if( ! FIELD_IS_SET_AND_IS(feat, Data, Rsite) ||
567  ! feat.IsSetLocation() )
568  {
569  continue;
570  }
571  const CSeq_loc & feat_loc = feat.GetLocation();
572  switch( feat_loc.Which() ) {
573  case CSeq_loc::e_Pnt: {
574  const CSeq_point & seq_point = feat_loc.GetPnt();
575 
576  if( seq_point.IsSetPoint() ) {
579  seq_point, Fuzz);
581  seq_point, Id);
583  seq_point, Strand);
584  m_pOpticalMapPointsDestroyer->AddPoint( seq_point.GetPoint() );
585 
587  }
588  break;
589  }
591  m_pOpticalMapPoints = & feat_loc.GetPacked_pnt();
592  // in case a previous iteration set this
594  break;
595  default:
596  // ignore other types
597  break;
598  }
599  }
600 }
601 
603 {
604  // translate finishing status
605  typedef SStaticPair<const char *, const char *> TFinStatElem;
606  static const TFinStatElem sc_finstat_map[] = {
607  { "Annotation-directed-improvement", "ANNOTATION_DIRECTED_IMPROVEMENT" },
608  { "High-quality-draft", "HIGH_QUALITY_DRAFT" },
609  { "Improved-high-quality-draft", "IMPROVED_HIGH_QUALITY_DRAFT" },
610  { "Noncontiguous-finished", "NONCONTIGUOUS_FINISHED" },
611  { "Standard-draft", "STANDARD_DRAFT" }
612  };
614  DEFINE_STATIC_ARRAY_MAP(TFinStatMap, sc_FinStatMap, sc_finstat_map);
615 
616  for (CSeqdesc_CI it(m_Handle, CSeqdesc::e_User); it; ++it) {
617  const CUser_object& uo = it->GetUser();
619  if (uo.IsSetType() && uo.GetType().IsStr()) {
621  if( uo.IsSetData() ) {
622  ITERATE( CUser_object::TData, field_iter, uo.GetData() ) {
623  const CUser_field &field = **field_iter;
624  if( ! field.IsSetData() || ! field.GetData().IsStr() ||
625  ! field.IsSetLabel() || ! field.GetLabel().IsStr() ) {
626  continue;
627  }
628  if( field.GetLabel().GetStr() == "StructuredCommentPrefix" &&
629  field.GetData().GetStr() == "##Genome-Assembly-Data-START##" )
630  {
631  m_IsGenomeAssembly = true;
632  }
633  if( field.GetLabel().GetStr() == "Current Finishing Status" )
634  {
635  string asn_fin_stat = field.GetData().GetStr();
636  replace( asn_fin_stat.begin(), asn_fin_stat.end(), ' ', '-' );
637  TFinStatMap::const_iterator new_fin_stat_iter = sc_FinStatMap.find(asn_fin_stat.c_str());
638  if( new_fin_stat_iter != sc_FinStatMap.end() ) {
639  m_FinishingStatus = new_fin_stat_iter->second;
640  }
641  }
642  }
643  }
644  } else if (utype == CUser_object::eObjectType_Unverified) {
645  if (uo.IsUnverifiedOrganism()) {
647  }
648  if (uo.IsUnverifiedFeature()) {
650  }
651  if (uo.IsUnverifiedMisassembled()) {
653  }
654  if (uo.IsUnverifiedContaminant()) {
656  }
657  // default in the past was to use feature
660  }
661  } else if (utype == CUser_object::eObjectType_Unreviewed) {
662  if (uo.IsUnreviewedUnannotated()) {
664  }
665  } else if ( utype == CUser_object::eObjectType_FileTrack ) {
666  x_SetFiletrackURL(uo);
667  } else if ( NStr::EqualNocase(uo.GetType().GetStr(), "AuthorizedAccess") ) {
669  } else if ( NStr::EqualNocase(uo.GetType().GetStr(), "ENCODE") ) {
670  x_SetEncode(uo);
671  }
672  }
673  }
674 }
675 
677 {
679  {
681  }
683 }
684 
686 {
688 
689  if (GetRepr() == CSeq_inst::eRepr_map) {
690  // TODO: is this right? Maybe handle it differently once
691  // CAnnot_CI is able to handle CSeq_inst::eRepr_map.
692  return;
693  }
694 
695  // JIRA SQD-4444 : copy annot selector from the one saved in this context structure
696  // SAnnotSelector sel = m_FFCtx.SetAnnotSelector();
697  SAnnotSelector sel;
699  CAnnot_CI annot_ci(m_Handle, sel);
700  for( ; annot_ci; ++annot_ci ) {
701  if( ! annot_ci->Seq_annot_IsSetDesc() ) {
702  continue;
703  }
704 
705  const CSeq_annot::TDesc & desc = annot_ci->Seq_annot_GetDesc();
706  ITERATE( CSeq_annot::TDesc::Tdata, one_desc_iter, desc.Get() ) {
707  const CAnnotdesc & one_desc = **one_desc_iter;
708  if( ! one_desc.IsUser() ) {
709  continue;
710  }
711 
712  // we finally got down to an annot desc user object. See if it indicates any
713  // relevant information
714  const CUser_object & user_obj = one_desc.GetUser();
715  if( ! user_obj.IsSetType() || ! user_obj.GetType().IsStr() ||
716  ! user_obj.IsSetData() ||
717  user_obj.GetType().GetStr() != "AnnotDescCommentPolicy" )
718  {
719  continue;
720  }
721 
722  // check policy flags
723  ITERATE( CUser_object::TData, policy_field_iter, user_obj.GetData() ) {
724  const CUser_field & policy_field = **policy_field_iter;
725  if( ! policy_field.IsSetLabel() || ! policy_field.GetLabel().IsStr() ||
726  ! policy_field.IsSetData() ||
727  policy_field.GetLabel().GetStr() != "Policy" )
728  {
729  continue;
730  }
731 
732  if( policy_field.GetData().IsStr() ) {
733  const string & policy_str = policy_field.GetData().GetStr();
734  if( policy_str == "ShowInComment" ) {
736  }
737  }
738  }
739  }
740  }
741 }
742 
743 
745 {
746  return CFeat_CI(m_Handle.GetScope(),
747  *m_Location,
749 }
750 
752 {
753  // check for indexed version first
754  if (UsingSeqEntryIndex()) {
756  if (! idx) return false;
758  if (! bsx) return false;
759  return bsx->HasOperon();
760  }
761 
763  return m_HasOperon;
764 }
765 
766 
768 {
772 
773  m_Accession.erase();
775 
776  // -----------------------------------------------------------------------
777  // Look for TPA assembly:
778  // -----------------------------------------------------------------------
779  bool bTpaAssemblyPresent = false;
780  for (CSeqdesc_CI it(m_Handle, CSeqdesc::e_User); it; ++it) {
781  const CUser_object& obj = it->GetUser();
782  if ( !obj.GetType().IsStr() ) {
783  continue;
784  }
785  if ( obj.GetType().GetStr() == "TpaAssembly" ) {
786  bTpaAssemblyPresent = true;
787  continue;
788  }
789  if ( obj.GetType().GetStr() == "GenomeProjectsDB" ) {
790  m_IsGbGenomeProject = true;
791  continue;
792  }
793  }
794 
795  ITERATE (CBioseq::TId, id_iter, m_Handle.GetBioseqCore()->GetId()) {
796  const CSeq_id& id = **id_iter;
797  const CTextseq_id* tsip = id.GetTextseq_Id();
798  const string& acc = (tsip && tsip->CanGetAccession()) ?
799  tsip->GetAccession() : kEmptyStr;
800 
801  CSeq_id::EAccessionInfo acc_info = id.IdentifyAccession();
802  unsigned int acc_div = acc_info & CSeq_id::eAcc_division_mask;
803 
804  switch ( id.Which() ) {
805  // Genbank, Embl or Ddbj
806  case CSeq_id::e_Embl:
807  m_IsEMBL = true;
808  break;
809  case CSeq_id::e_Ddbj:
810  m_IsDDBJ = true;
811  break;
812  case CSeq_id::e_Genbank:
813  m_IsGenbank = true;
814  switch (acc_info) {
816  m_IsGbGenomeProject = true;
817  break;
819  m_IsNcbiCONDiv = true;
820  break;
821  default:
822  break;
823  }
824  break;
825  // Patent
826  case CSeq_id::e_Patent:
827  m_IsPatent = true;
828  if (id.GetPatent().IsSetSeqid()) {
829  m_PatSeqid = id.GetPatent().GetSeqid();
830  }
831  break;
832  // RefSeq
833  case CSeq_id::e_Other:
834  m_IsRefSeq = true;
835  m_RefseqInfo = acc_info;
836  break;
837  // Gi
838  case CSeq_id::e_Gi:
839  m_IsGI = true;
840  m_Gi = id.GetGi();
841  break;
842  // PDB
843  case CSeq_id::e_Pdb:
844  m_IsPDB = true;
845  break;
846  // TPA
847  case CSeq_id::e_Tpg:
848  m_IsTPA = true;
849  m_IsGenbank = true;
850  break;
851  case CSeq_id::e_Tpe:
852  m_IsTPA = true;
853  m_IsEMBL = true;
854  break;
855  case CSeq_id::e_Tpd:
856  m_IsTPA = true;
857  m_IsDDBJ = true;
858  break;
859  case CSeq_id::e_General:
860  if ( id.GetGeneral().CanGetDb() ) {
861  if ( !NStr::CompareCase(id.GetGeneral().GetDb(), "BankIt") ) {
862  m_IsTPA = bTpaAssemblyPresent;
863  }
864  if( NStr::Equal(id.GetGeneral().GetDb(), "NCBI_GENOMES") ) {
865  m_IsNcbiGenomes = true;
866  }
867  }
868  break;
869  case CSeq_id::e_Gibbsq:
870  case CSeq_id::e_Gibbmt:
871  case CSeq_id::e_Giim:
872  m_IsJournalScan = true;
873  break;
875  m_IsSP = true;
876  break;
877  // nothing special
878  case CSeq_id::e_Pir:
879  case CSeq_id::e_not_set:
880  case CSeq_id::e_Local:
881  case CSeq_id::e_Prf:
882  default:
883  break;
884  }
885 
886  // WGS
887  m_IsWGS = m_IsWGS || (acc_div == CSeq_id::eAcc_wgs);
888 
889  if ( m_IsWGS && !acc.empty() ) {
890  /*
891  size_t len = acc.length();
892  m_IsWGSMaster =
893  ((len == 12 || len == 15) && NStr::EndsWith(acc, "000000")) ||
894  ((len == 14) && NStr::EndsWith(acc, "00000000")) ||
895  ((len == 13 || len == 16 || len == 17) && NStr::EndsWith(acc, "0000000"));
896  */
897  m_IsWGSMaster = (acc_info & CSeq_id::fAcc_master) != 0;
898  if ( m_IsWGSMaster ) {
899  m_WGSMasterAccn = acc;
900  m_WGSMasterName = tsip->CanGetName() ? tsip->GetName() : kEmptyStr;
901  }
902  }
903 
904  // TSA
906 
907  if ( m_IsTSA && !acc.empty() ) {
909  m_IsTSAMaster = true;
910  }
911  if ( m_IsTSAMaster ) {
912  m_TSAMasterAccn = acc;
913  m_TSAMasterName = tsip->CanGetName() ? tsip->GetName() : kEmptyStr;
914  }
915  }
916 
917  // TLS
919 
920  if ( m_IsTLS && !acc.empty() ) {
922  m_IsTLSMaster = true;
923  }
924  if ( m_IsTLSMaster ) {
925  m_TLSMasterAccn = acc;
926  m_TLSMasterName = tsip->CanGetName() ? tsip->GetName() : kEmptyStr;
927  }
928  }
929 
930 
931  // GBB source
933 
934  if (m_IsGenbank || m_IsEMBL || m_IsDDBJ) {
935  if (acc.length() == 6) {
936  char ch = acc[0];
937  if (ch == 'J' || ch == 'K' || ch == 'L' || ch == 'M') {
938  m_ShowGBBSource = true;
939  }
940  }
941  }
942  }
943 
944  // Genbank/Embl/Ddbj (GED)
946 }
947 
948 
950 {
951  return m_Handle.IsSetInst_Repr() ?
953 }
954 
955 
957 {
959  return desc ? &desc->GetMolinfo() : nullptr;
960 }
961 
962 
964 {
965  if ( m_Repr == CSeq_inst::eRepr_raw ||
969  const CSeq_entry_Handle& fftse = GetTopLevelEntry();
971  _ASSERT(eh && eh.IsSeq());
972 
973  if (fftse != eh) {
974  eh = eh.GetParentEntry();
975  if ( eh && eh.IsSet() ) {
976  CBioseq_set_Handle bsst = eh.GetSet();
977  if ( bsst.IsSetClass() &&
978  bsst.GetClass() == CBioseq_set::eClass_parts ) {
979  return true;
980  }
981  }
982  }
983  }
984  return false;
985 }
986 
987 
989 {
990  _ASSERT(IsSegmented());
991 
994  if ( !h ) {
995  return false;
996  }
997 
998  // make sure the segmented set contains our bioseq
999  {{
1000  bool has_seq = false;
1001  for ( CSeq_entry_CI it(h); it; ++it ) {
1002  if ( it->IsSeq() && it->GetSeq() == m_Handle ) {
1003  has_seq = true;
1004  break;
1005  }
1006  }
1007  if ( !has_seq ) {
1008  return false;
1009  }
1010  }}
1011 
1012  // find the parts set
1013  {{
1014  for ( CSeq_entry_CI it(h); it; ++it ) {
1015  if ( it->IsSet() && it->GetSet().IsSetClass() &&
1016  it->GetSet().GetClass() == CBioseq_set::eClass_parts ) {
1017  return true;
1018  }
1019  }
1020  }}
1021 
1022  return false;
1023 }
1024 
1025 
1027 {
1028  _ASSERT(IsDelta());
1029 
1030  if ( m_Handle.IsSetInst_Ext() ) {
1032  if ( ext.IsDelta() ) {
1033  ITERATE (CDelta_ext::Tdata, it, ext.GetDelta().Get()) {
1034  if ( (*it)->IsLoc() ) {
1035  const CSeq_loc& loc = (*it)->GetLoc();
1036  if (loc.IsNull()) continue;
1037  return false;
1038  }
1039  }
1040  }
1041  }
1042  return true;
1043 }
1044 
1045 
1047 {
1048  return m_Master ? m_Master->GetPartNumber(m_Handle) : 0;
1049 }
1050 
1051 
1053 {
1054  CSeq_entry_Handle e =
1056  return e;
1057 }
1058 
1059 
1061 {
1062  CSeq_entry_Handle e =
1064  return e;
1065 }
1066 
1067 
1069 {
1070  CSeq_entry_Handle e =
1072  return e;
1073 }
1074 
1075 
1077 {
1078  const CFlatFileConfig& cfg = Config();
1079  if ( cfg.IsStyleContig() ) {
1080  return true;
1081  } else if ( cfg.IsStyleNormal() ) {
1082  if ( (IsSegmented() && !HasParts()) ||
1083  (IsDelta() && !IsDeltaLitOnly()) ) {
1084  return true;
1085  }
1086  }
1087 
1088  return false;
1089 }
1090 
1091 
1093 {
1094  if (uo.IsSetType() && uo.GetType().IsStr()) {
1095  if (NStr::EqualNocase(uo.GetType().GetStr(), "ENCODE")) {
1096  m_Encode.Reset(&uo);
1097  }
1098  }
1099 }
1100 
1101 
1102 /////////////////////////////////////////////////////////////////////////////
1103 //
1104 // CMasterContext
1105 
1106 
1108  m_Handle(seq)
1109 {
1110  _ASSERT(seq);
1111  _ASSERT(seq.GetInst_Ext().IsSeg());
1112 
1113  x_SetNumParts();
1114  x_SetBaseName();
1115 }
1116 
1117 
1119 {
1120 }
1121 
1122 
1124 {
1125  if ( !part ) {
1126  return 0;
1127  }
1128  CScope& scope = m_Handle.GetScope();
1129 
1130  SIZE_TYPE serial = 1;
1132  if ((*it)->IsNull()) {
1133  continue;
1134  }
1135  const CSeq_id& id = GetId(**it, &m_Handle.GetScope());
1137  if (bsh &&
1138  bsh.IsSetInst_Repr() &&
1140  if (bsh == part) {
1141  return serial;
1142  }
1143  ++serial;
1144  }
1145  }
1146 
1147  return 0;
1148 }
1149 
1150 
1152 {
1153  CScope& scope = m_Handle.GetScope();
1154  SIZE_TYPE count = 0;
1155 
1156  // count only non-gap and non-virtual parts
1158  const CSeq_loc& loc = **it;
1159  if (loc.IsNull()) { // skip gaps
1160  continue;
1161  }
1162  // count only non-virtual
1163  const CSeq_id_Handle id = CSeq_id_Handle::GetHandle(GetId(loc, &scope));
1165  if (part &&
1166  part.IsSetInst_Repr() &&
1168  ++count;
1169  }
1170  }
1171  m_NumParts = count;
1172 }
1173 
1174 
1175 static void s_GetNameForBioseq(const CBioseq_Handle& seq, string& name)
1176 {
1177  name.erase();
1178 
1179  CConstRef<CSeq_id> sip;
1180  ITERATE (CBioseq_Handle::TId, it, seq.GetId()) {
1181  CConstRef<CSeq_id> id = it->GetSeqId();
1182  if (id->IsGenbank() || id->IsEmbl() || id->IsDdbj() ||
1183  id->IsTpg() || id->IsTpe() || id->IsTpd()) {
1184  sip = id;
1185  break;
1186  }
1187  }
1188 
1189  if (sip) {
1190  const CTextseq_id* tsip = sip->GetTextseq_Id();
1191  if (tsip && tsip->CanGetName()) {
1192  name = tsip->GetName();
1193  }
1194  }
1195 }
1196 
1197 
1199 {
1200  string parent_name;
1201  s_GetNameForBioseq(m_Handle, parent_name);
1202 
1203  // if there's no "SEG_" prefix just use the master's name
1204  if (!NStr::StartsWith(parent_name, "SEG_")) {
1205  m_BaseName = parent_name;
1206  return;
1207  }
1208 
1209  // otherwise, eliminate the prefix ...
1210  parent_name = parent_name.substr(4);
1211 
1212  // ... and calculate a base name
1213 
1214  // find the first segment
1215  CScope* scope = &m_Handle.GetScope();
1216  CBioseq_Handle segment;
1217  const CSeqMap& seqmap = m_Handle.GetSeqMap();
1218  CSeqMap_CI it = seqmap.BeginResolved(scope,
1219  SSeqMapSelector()
1220  .SetResolveCount(1)
1221  .SetFlags(CSeqMap::fFindRef));
1222  while (it) {
1223  CSeq_id_Handle id = it.GetRefSeqid();
1224  segment = scope->GetBioseqHandleFromTSE(id, m_Handle);
1225  if (segment) {
1226  break;
1227  }
1228  }
1229  string seg_name;
1230  if (segment) {
1231  s_GetNameForBioseq(segment, seg_name);
1232  }
1233 
1234  if (!seg_name.empty() && NStr::EndsWith(seg_name, '1') &&
1235  parent_name.length() == seg_name.length() &&
1236  NStr::EndsWith(parent_name, '1')) {
1237  size_t pos = parent_name.length() - 2;
1238  for ( /*noop*/; pos > 0; --pos) {
1239  if (parent_name[pos] != '0') {
1240  break;
1241  }
1242  }
1243  parent_name.erase(pos + 1);
1244  }
1245 
1246  m_BaseName = parent_name;
1247 }
1248 
1249 /////////////////////////////////////////////////////////////////////////////
1250 //
1251 // CTopLevelSeqEntryContext
1252 
1254 {
1255  if (sep.IsSeq()) {
1256  // Is Bioseq
1257  const CBioseq& bsp = sep.GetSeq();
1258  for (auto& sid : bsp.GetId()) {
1259  TSEQID_CHOICE chs = sid->Which();
1260  switch (chs) {
1262  case CSeq_id_Base::e_Tpg:
1263  // Genbank allows merging only if it's the old-style 1 + 5 accessions
1264  {
1265  const CTextseq_id* tsid = sid->GetTextseq_Id ();
1266  if (tsid && tsid->IsSetAccession() && tsid->GetAccession().length() == 6) {
1267  m_CanSourcePubsBeFused = true;
1268  }
1269  }
1270  break;
1271  case CSeq_id_Base::e_Embl:
1272  case CSeq_id_Base::e_Ddbj:
1273  case CSeq_id_Base::e_Pir:
1275  case CSeq_id_Base::e_Prf:
1276  case CSeq_id_Base::e_Pdb:
1277  case CSeq_id_Base::e_Tpe:
1278  case CSeq_id_Base::e_Tpd:
1279  case CSeq_id_Base::e_Gpipe:
1283  // with some types, it's okay to merge
1284  m_CanSourcePubsBeFused = true;
1285  break;
1286  default:
1287  break;
1288  }
1289  }
1290  } else if (sep.IsSet()) {
1291  // Is Bioseq-set
1292  const CBioseq_set& bssp = sep.GetSet();
1293  if (bssp.CanGetClass() && bssp.GetClass() == CBioseq_set::eClass_small_genome_set) {
1294  m_HasSmallGenomeSet = true;
1295  }
1296  for (auto& seqentry : bssp.GetSeq_set()) {
1297  // recursively explore current Bioseq-set
1298  x_InitSeqs(*seqentry);
1299  }
1300  }
1301 }
1302 
1303 CTopLevelSeqEntryContext::CTopLevelSeqEntryContext( const CSeq_entry_Handle &entry_handle, bool useIndexedFasterSets )
1304 {
1305  m_CanSourcePubsBeFused = false;
1306  m_HasSmallGenomeSet = false;
1307 
1308  if (useIndexedFasterSets) {
1309  CSeq_entry_Handle tseh = entry_handle.GetTopLevelEntry();
1311  CSeq_entry& topsep = const_cast<CSeq_entry&>(*tcsep);
1312  x_InitSeqs( topsep );
1313  return;
1314  }
1315 
1316  CBioseq_CI bioseq_iter( entry_handle.GetScope(), *entry_handle.GetSeq_entryCore() );
1317  for( ; bioseq_iter; ++bioseq_iter ) {
1318  ITERATE( CBioseq_Handle::TId, it, bioseq_iter->GetId() ) {
1319  CConstRef<CSeq_id> seqId = (*it).GetSeqIdOrNull();
1320  if( ! seqId.IsNull() ) {
1321  switch( seqId->Which() ) {
1324  case CSeq_id_Base::e_Embl:
1325  case CSeq_id_Base::e_Pir:
1328  case CSeq_id_Base::e_Ddbj:
1329  case CSeq_id_Base::e_Prf:
1330  case CSeq_id_Base::e_Pdb:
1331  case CSeq_id_Base::e_Tpe:
1332  case CSeq_id_Base::e_Tpd:
1333  case CSeq_id_Base::e_Gpipe:
1334  // with some types, it's okay to merge
1335  m_CanSourcePubsBeFused = true;
1336  break;
1338  case CSeq_id_Base::e_Tpg:
1339  // Genbank allows merging only if it's the old-style 1 + 5 accessions
1340  if( seqId->GetTextseq_Id() &&
1341  seqId->GetTextseq_Id()->IsSetAccession() &&
1342  seqId->GetTextseq_Id()->GetAccession().length() == 6 ) {
1343  m_CanSourcePubsBeFused = true;
1344  }
1345  break;
1347  case CSeq_id_Base::e_Local:
1348  case CSeq_id_Base::e_Other:
1350  case CSeq_id_Base::e_Giim:
1351  case CSeq_id_Base::e_Gi:
1352  break;
1353  default:
1354  break;
1355  }
1356  }
1357  }
1358  }
1359 
1360  // check all Bioseq-sets, if any
1361  if( entry_handle.IsSet() ) {
1362  if( entry_handle.GetSet().CanGetClass() &&
1364  {
1365  m_HasSmallGenomeSet = true;
1366  } else {
1367  CSeq_entry_CI seq_entry_ci( entry_handle, CSeq_entry_CI::eRecursive );
1368  for( ; seq_entry_ci && ! m_HasSmallGenomeSet; ++seq_entry_ci ) {
1369  if( seq_entry_ci->IsSet() && seq_entry_ci->GetSet().CanGetClass() &&
1371  {
1372  m_HasSmallGenomeSet = true;
1373  break;
1374  }
1375  }
1376  }
1377  }
1378 }
1379 
1380 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
Definition: Na_strand.hpp:75
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CAnnot_CI –.
Definition: annot_ci.hpp:59
CAnnot_descr –.
Definition: Annot_descr.hpp:66
CAnnotdesc –.
Definition: Annotdesc.hpp:66
const string & GetTaxname(void) const
Definition: BioSource.cpp:340
const COrgName & GetOrgname(void) const
Definition: BioSource.cpp:410
bool IsSetOrgname(void) const
Definition: BioSource.cpp:405
bool IsSetTaxname(void) const
Definition: BioSource.cpp:335
bool IsDeltaLitOnly(void) const
Definition: context.hpp:144
CRef< CSeq_loc_Mapper > m_Mapper
Definition: context.hpp:388
void x_SetLocation(const CSeq_loc *user_loc=nullptr)
Definition: context.cpp:276
void x_SetId(void)
Definition: context.cpp:767
CConstRef< CSeq_loc > m_Location
Definition: context.hpp:387
bool x_IsInGPS(void) const
Definition: context.cpp:1060
CScope & GetScope(void) const
Definition: context.hpp:102
bool m_IsCrossKingdom
Definition: context.hpp:377
bool HasMultiIntervalGenes(void) const
Definition: context.cpp:358
bool IsCrossKingdom(void) const
Definition: context.cpp:466
@ fUnreviewed_Unannotated
Definition: context.hpp:235
void x_SetOpticalMapPoints(void)
Definition: context.cpp:555
void SetPDBCompoundForComment(bool value)
Definition: context.cpp:491
CBioseq_Handle m_Virtual
Definition: context.hpp:389
bool x_HasOperon(void) const
Definition: context.cpp:744
CSeq_inst::TRepr x_GetRepr(void) const
Definition: context.cpp:949
string m_WGSMasterAccn
Definition: context.hpp:317
bool m_HasOperon
Definition: context.hpp:374
bool m_ShowAnnotCommentAsCOMMENT
Definition: context.hpp:381
SIZE_TYPE m_PartNumber
Definition: context.hpp:341
SAnnotSelector & SetAnnotSelector(void)
Definition: context.hpp:713
bool m_IsWGSMaster
Definition: context.hpp:365
bool UsingSeqEntryIndex(void) const
Definition: context.hpp:676
TUnreviewed m_fUnreviewed
Definition: context.hpp:380
string m_Taxname
Definition: context.hpp:324
CBioseqContext(const CBioseq_Handle &seq, CFlatFileContext &ffctx, CMasterContext *mctx=nullptr, CTopLevelSeqEntryContext *tlsec=nullptr)
Definition: context.cpp:78
bool m_UsePDBCompoundForComment
Definition: context.hpp:378
const CFlatFileConfig & Config(void) const
Definition: context.hpp:689
unique_ptr< CPacked_seqpnt > m_pOpticalMapPointsDestroyer
Definition: context.hpp:331
unsigned int m_RefseqInfo
Definition: context.hpp:358
bool m_IsNcbiGenomes
Definition: context.hpp:361
void x_SetDataFromUserObjects(void)
Definition: context.cpp:602
bool x_IsPart(void) const
Definition: context.cpp:963
CSeq_inst::TRepr GetRepr(void) const
Definition: context.hpp:117
CRef< CSeq_id > m_PrimaryId
Definition: context.hpp:315
bool x_HasParts(void) const
Definition: context.cpp:988
void x_SetAuthorizedAccess(const CUser_object &uo)
Definition: context.cpp:537
void x_SetEncode(const CUser_object &uo)
Definition: context.cpp:1092
string m_TSAMasterAccn
Definition: context.hpp:319
const CPacked_seqpnt * m_pOpticalMapPoints
Definition: context.hpp:328
const CSeq_entry_Handle & GetTopLevelEntry(void) const
Definition: context.hpp:701
bool HasOperon(void) const
Definition: context.cpp:751
bool IsRefSeq(void) const
Definition: context.hpp:194
bool m_IsTLSMaster
Definition: context.hpp:369
bool m_HasMultiIntervalGenes
Definition: context.hpp:375
CConstRef< CUser_object > m_Encode
Definition: context.hpp:384
string m_WGSMasterName
Definition: context.hpp:318
bool m_IsGbGenomeProject
Definition: context.hpp:359
CConstRef< CMolInfo > m_Molinfo
Definition: context.hpp:335
void x_Init(const CBioseq_Handle &seq, const CSeq_loc *user_loc)
Definition: context.cpp:224
bool ShowAnnotCommentAsCOMMENT() const
Definition: context.cpp:676
CSeq_inst::TRepr m_Repr
Definition: context.hpp:333
bool m_IsJournalScan
Definition: context.hpp:356
void x_CheckForShowComments() const
Definition: context.cpp:685
bool m_IsGenomeAssembly
Definition: context.hpp:376
bool m_IsGenbank
Definition: context.hpp:350
const CMolInfo * x_GetMolInfo(void) const
Definition: context.cpp:956
CSeq_id_Handle GetPreferredSynonym(const CSeq_id &id) const
Definition: context.cpp:210
bool IsDelta(void) const
Definition: context.hpp:143
bool DoContigStyle(void) const
Definition: context.cpp:1076
bool x_IsDeltaLitOnly(void) const
Definition: context.cpp:1026
CSeq_inst::TMol m_Mol
Definition: context.hpp:334
void x_SetFiletrackURL(const CUser_object &uo)
Definition: context.cpp:497
string m_FiletrackURL
Definition: context.hpp:325
void x_SetHasMultiIntervalGenes(void) const
Definition: context.cpp:333
bool m_ShowAnnotCommentAsCOMMENT_checked
Definition: context.hpp:382
CBioseq_Handle m_Handle
Definition: context.hpp:312
bool m_IsInNucProt
Definition: context.hpp:348
void x_SetMapper(const CSeq_loc &loc)
Definition: context.cpp:316
const string & GetTaxname(void) const
Empty string if unavailable.
Definition: context.cpp:447
string m_TSAMasterName
Definition: context.hpp:320
CMolInfo::TTech GetTech(void) const
Definition: context.hpp:732
string m_Accession
Definition: context.hpp:316
const CRef< CSeqEntryIndex > GetSeqEntryIndex(void) const
Definition: context.hpp:682
bool m_IsNcbiCONDiv
Definition: context.hpp:360
bool IsSegmented(void) const
Definition: context.hpp:125
bool UsePDBCompoundForComment(void) const
Definition: context.cpp:485
bool m_ShowGBBSource
Definition: context.hpp:372
CFlatFileContext & m_FFCtx
Definition: context.hpp:390
bool m_IsDeltaLitOnly
Definition: context.hpp:343
bool x_IsInNucProt(void) const
Definition: context.cpp:1068
TUnverified m_fUnverified
Definition: context.hpp:379
SIZE_TYPE x_GetPartNumber(void)
Definition: context.cpp:1046
string m_AuthorizedAccess
Definition: context.hpp:327
string m_TLSMasterName
Definition: context.hpp:322
bool HasParts(void) const
Definition: context.hpp:126
void x_SetTaxname(void) const
Definition: context.cpp:372
bool x_IsInSGS(void) const
Definition: context.cpp:1052
vector< string > m_BasemodURLs
Definition: context.hpp:326
string m_FinishingStatus
Definition: context.hpp:323
@ fUnverified_Contaminant
Definition: context.hpp:226
@ fUnverified_SequenceOrAnnotation
Definition: context.hpp:224
@ fUnverified_Organism
Definition: context.hpp:223
@ fUnverified_Misassembled
Definition: context.hpp:225
CRef< CMasterContext > m_Master
Definition: context.hpp:392
string m_TLSMasterAccn
Definition: context.hpp:321
bool m_IsTSAMaster
Definition: context.hpp:367
~CBioseqContext(void)
Definition: context.cpp:202
const string & GetAccession(void) const
Definition: context.hpp:110
bool HasOperon(void)
Definition: indexer.cpp:2988
bool IsCrossKingdom(void)
Definition: indexer.cpp:2646
bool HasMultiIntervalGenes(void)
Definition: indexer.cpp:3014
const string & GetTaxname(void)
Definition: indexer.cpp:2496
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
CBioseq_set_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
void SetRefSeqConventions(void)
bool IsStyleNormal(void) const
bool IsStyleContig(void) const
CFlatFileConfig & SetConfig(void)
Definition: context.hpp:456
const CSeq_loc * GetLocation(void) const
Definition: context.hpp:463
CMap_ext –.
Definition: Map_ext.hpp:66
string m_BaseName
Definition: context.hpp:428
SIZE_TYPE m_NumParts
Definition: context.hpp:429
CBioseq_Handle m_Handle
Definition: context.hpp:427
CMasterContext(const CBioseq_Handle &master)
Definition: context.cpp:1107
void x_SetNumParts(void)
Definition: context.cpp:1151
void x_SetBaseName(void)
Definition: context.cpp:1198
~CMasterContext(void)
Definition: context.cpp:1118
SIZE_TYPE GetPartNumber(const CBioseq_Handle &part)
Definition: context.cpp:1123
CPartialOrgName –.
CScope –.
Definition: scope.hpp:92
CRef< CBioseqIndex > GetBioseqIndex(void)
Definition: indexer.cpp:114
Iterator over CSeqMap.
Definition: seq_map_ci.hpp:252
CSeqMap –.
Definition: seq_map.hpp:93
CSeq_entry_CI –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_ext –.
Definition: Seq_ext.hpp:66
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
bool IsAa(void) const
Definition: Seq_inst.hpp:113
CSeq_loc_Mapper –.
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
class CStaticArrayMap<> provides access to a static array in much the same way as CStaticArraySet<>,...
Definition: static_map.hpp:175
CTaxElement –.
Definition: TaxElement.hpp:66
CTopLevelSeqEntryContext(const CSeq_entry_Handle &entry_handle, bool useIndexedFasterSets=false)
Definition: context.cpp:1303
void x_InitSeqs(const CSeq_entry &sep)
Definition: context.cpp:1253
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Definition: User_object.cpp:84
EObjectType
Object Type.
@ eObjectType_StructuredComment
bool IsUnverifiedMisassembled() const
bool IsUnverifiedOrganism() const
bool IsUnverifiedContaminant() const
bool IsUnverifiedFeature() const
bool IsUnreviewedUnannotated() const
EObjectType GetObjectType() const
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
static const char * str(char *buf, int n)
Definition: stats.c:84
CRange< Position > Map(const CRange< Position > &target, const CRange< Position > &range)
Definition: blast_aux.cpp:826
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define ZERO_GI
Definition: ncbimisc.hpp:1088
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
@ eAcc_wgs
Definition: Seq_id.hpp:290
@ eAcc_gb_con
Definition: Seq_id.hpp:367
@ eAcc_gsdb_dirsub
Definition: Seq_id.hpp:352
@ eAcc_gb_genome
Definition: Seq_id.hpp:365
@ eAcc_gb_segset
Definition: Seq_id.hpp:363
@ fAcc_master
Definition: Seq_id.hpp:256
@ eAcc_division_mask
Definition: Seq_id.hpp:299
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
CBioseq_Handle GetBioseqFromSeqLoc(const CSeq_loc &loc, CScope &scope, CScope::EGetBioseqFlag flag=CScope::eGetBioseq_Loaded)
Retrieve the Bioseq Handle from a location.
Definition: sequence.cpp:308
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
Definition: scope.cpp:253
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
CSeq_loc_Mapper_Base & SetMergeAbutting(void)
Merge only abutting intervals, keep overlapping.
CSeq_loc_Mapper_Base & SetGapRemove(void)
@ eSeqMap_Up
map from segments to the top level bioseq
vector< CSeq_id_Handle > TId
TClass GetClass(void) const
const TInst_Ext & GetInst_Ext(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
bool CanGetClass(void) const
bool IsSetInst_Ext(void) const
TSet GetSet(void) const
TInst_Mol GetInst_Mol(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
void Remove(ERemoveMode mode=eRemoveSeq_entry) const
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
const CSeq_annot::TDesc & Seq_annot_GetDesc(void) const
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
TInst_Length GetInst_Length(void) const
bool IsSetInst(void) const
bool IsSetInst_Repr(void) const
bool IsSetClass(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
TInst_Repr GetInst_Repr(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeq_entry_Handle GetExactComplexityLevel(CBioseq_set::EClass cls) const
Return level with exact complexity, or empty handle if not found.
bool IsSet(void) const
const CSeqMap & GetSeqMap(void) const
Get sequence map.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
const TId & GetId(void) const
bool Seq_annot_IsSetDesc(void) const
CConstRef< CSeq_entry > GetSeq_entryCore(void) const
Get const reference to the seq-entry.
bool IsSeq(void) const
SAnnotSelector & SetFeatType(TFeatType type)
Set feature type (also set annotation type to feat)
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetAnnotType(TAnnotType type)
Set annotation type (feat, align, graph)
CSeq_id_Handle GetRefSeqid(void) const
The following function makes sense only when the segment is a reference to another seq.
Definition: seq_map_ci.cpp:312
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
@ eRecursive
Deprecated.
CSeqMap_CI BeginResolved(CScope *scope) const
Definition: seq_map.cpp:837
@ fFindRef
Definition: seq_map.hpp:137
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:1401
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive compare of a substring with another string.
Definition: ncbistr.cpp:135
const TStr & GetStr(void) const
Get the variant data.
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
const TStrs & GetStrs(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetLabel(void) const
field label Check if a value has been assigned to Label data member.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TData & GetData(void) const
Get the Data member data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
vector< CRef< CUser_field > > TData
TFixed_level GetFixed_level(void) const
Get the Fixed_level member data.
bool IsSetFixed_level(void) const
Check if a value has been assigned to Fixed_level data member.
bool IsPartial(void) const
Check if variant Partial is selected.
Definition: OrgName_.hpp:753
const TName & GetName(void) const
Get the Name member data.
Definition: OrgName_.hpp:771
const TLevel & GetLevel(void) const
Get the Level member data.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
const Tdata & Get(void) const
Get the member data.
const TName & GetName(void) const
Get the Name member data.
list< CRef< CTaxElement > > Tdata
bool IsSetLevel(void) const
Check if a value has been assigned to Level data member.
const TPartial & GetPartial(void) const
Get the variant data.
Definition: OrgName_.cpp:193
bool IsSet(void) const
Check if a value has been assigned to data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
Definition: OrgName_.hpp:759
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool IsBiosrc(void) const
Check if variant Biosrc is selected.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
Definition: Seq_feat_.hpp:1105
bool IsGenbank(void) const
Check if variant Genbank is selected.
Definition: Seq_id_.hpp:841
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
bool IsTpg(void) const
Check if variant Tpg is selected.
Definition: Seq_id_.hpp:928
const TName & GetName(void) const
Get the Name member data.
bool IsSetPoint(void) const
Check if a value has been assigned to Point data member.
Definition: Seq_point_.hpp:284
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
bool IsTpd(void) const
Check if variant Tpd is selected.
Definition: Seq_id_.hpp:940
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
bool CanGetName(void) const
Check if it is safe to call GetName method.
bool IsEmbl(void) const
Check if variant Embl is selected.
Definition: Seq_id_.hpp:847
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
const TPacked_pnt & GetPacked_pnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:260
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93
bool CanGetAccession(void) const
Check if it is safe to call GetAccession method.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
bool IsTpe(void) const
Check if variant Tpe is selected.
Definition: Seq_id_.hpp:934
const TAccession & GetAccession(void) const
Get the Accession member data.
bool IsDdbj(void) const
Check if variant Ddbj is selected.
Definition: Seq_id_.hpp:910
@ e_Gibbmt
Geninfo backbone moltype.
Definition: Seq_id_.hpp:97
@ e_Giim
Geninfo import id.
Definition: Seq_id_.hpp:98
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_Gpipe
Internal NCBI genome pipeline processing ID.
Definition: Seq_id_.hpp:113
@ e_Tpe
Third Party Annot/Seq EMBL.
Definition: Seq_id_.hpp:111
@ e_Tpd
Third Party Annot/Seq DDBJ.
Definition: Seq_id_.hpp:112
@ e_Gibbsq
Geninfo backbone seqid.
Definition: Seq_id_.hpp:96
@ e_General
for other databases
Definition: Seq_id_.hpp:105
@ e_Ddbj
DDBJ.
Definition: Seq_id_.hpp:107
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_Prf
PRF SEQDB.
Definition: Seq_id_.hpp:108
@ e_not_set
No variant selected.
Definition: Seq_id_.hpp:94
@ e_Tpg
Third Party Annot/Seq Genbank.
Definition: Seq_id_.hpp:110
@ e_Local
local use
Definition: Seq_id_.hpp:95
@ e_Pdb
PDB sequence.
Definition: Seq_id_.hpp:109
@ e_Equiv
equivalent sets of locations
Definition: Seq_loc_.hpp:106
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool CanGetClass(void) const
Check if it is safe to call GetClass method.
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
@ eClass_parts
parts for 2 or 3
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_segset
segmented sequence + parts
@ eClass_small_genome_set
viral segments or mitochondrial minicircles
const TSeg & GetSeg(void) const
Get the variant data.
Definition: Seq_ext_.cpp:114
const Tdata & Get(void) const
Get the member data.
ERepr
representation class
Definition: Seq_inst_.hpp:91
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
bool IsSource(void) const
Check if variant Source is selected.
Definition: Seqdesc_.hpp:1190
const TMap & GetMap(void) const
Get the variant data.
Definition: Seq_ext_.cpp:158
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
const TUser & GetUser(void) const
Get the variant data.
Definition: Annotdesc_.cpp:184
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
bool IsSeg(void) const
Check if variant Seg is selected.
Definition: Seq_ext_.hpp:318
bool IsDelta(void) const
Check if variant Delta is selected.
Definition: Seq_ext_.hpp:336
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
const TDelta & GetDelta(void) const
Get the variant data.
Definition: Seq_ext_.cpp:180
bool IsUser(void) const
Check if variant User is selected.
Definition: Annotdesc_.hpp:561
const Tdata & Get(void) const
Get the member data.
Definition: Delta_ext_.hpp:164
list< CRef< CDelta_seq > > Tdata
Definition: Delta_ext_.hpp:89
const Tdata & Get(void) const
Get the member data.
Definition: Seg_ext_.hpp:164
list< CRef< CSeq_loc > > Tdata
Definition: Seg_ext_.hpp:89
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
list< CRef< CAnnotdesc > > Tdata
@ eRepr_const
constructed sequence
Definition: Seq_inst_.hpp:96
@ eRepr_delta
sequence made by changes (delta) to others
Definition: Seq_inst_.hpp:100
@ eRepr_map
ordered map of any kind
Definition: Seq_inst_.hpp:99
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ eRepr_virtual
no seq data
Definition: Seq_inst_.hpp:93
@ eRepr_not_set
empty
Definition: Seq_inst_.hpp:92
@ eTech_targeted
targeted locus sets/studies
Definition: MolInfo_.hpp:147
@ eTech_tsa
transcriptome shotgun assembly
Definition: MolInfo_.hpp:146
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
range(_Ty, _Ty) -> range< _Ty >
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
#define nullptr
Definition: ncbimisc.hpp:45
static void s_GetNameForBioseq(const CBioseq_Handle &seq, string &name)
Definition: context.cpp:1175
USING_SCOPE(sequence)
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
#define FOR_EACH_SEQFEAT_ON_MAPEXT(Itr, Var)
SEQANNOT_IS_SEQFEAT.
Definition: seq_macros.hpp:603
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
#define FOR_EACH_STRING_IN_VECTOR(Itr, Var)
FOR_EACH_STRING_IN_VECTOR EDIT_EACH_STRING_IN_VECTOR.
#define ASSIGN_IF_SET_ELSE_RESET(DestVar, DestFld, SrcVar, SrcFld)
ASSIGN_IF_SET_ELSE_RESET.
#define CLONE_IF_SET_ELSE_RESET(DestVar, DestFld, SrcVar, SrcFld)
CLONE_IF_SET base macro (Useful to copy and object from a variable of one type to a variable of anoth...
#define DEFINE_STATIC_ARRAY_MAP(Type, Var, Array)
Definition: static_set.hpp:888
SAnnotSelector –.
Selector used in CSeqMap methods returning iterators.
Definition: seq_map_ci.hpp:113
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
#define _ASSERT
#define Type
#define const
Definition: zconf.h:232
Modified on Sun Apr 14 05:26:46 2024 by modify_doxy.py rev. 669887