NCBI C++ ToolKit
indexer.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef FEATURE_INDEXER__HPP
2 #define FEATURE_INDEXER__HPP
3 
4 /*
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Jonathan Kans
30 *
31 */
32 
33 #include <corelib/ncbicntr.hpp>
34 
36 #include <objects/seq/MolInfo.hpp>
38 #include <objects/seq/Seq_gap.hpp>
42 
45 #include <objmgr/seq_vector.hpp>
46 #include <objmgr/util/feature.hpp>
47 
50 
51 
52 // look-ahead class names
53 class CSeqEntryIndex;
54 class CSeqMasterIndex;
55 class CSeqsetIndex;
56 class CBioseqIndex;
57 class CGapIndex;
58 class CDescriptorIndex;
59 class CFeatureIndex;
60 
61 typedef void (*FAddSnpFunc)(CBioseq_Handle bsh, string& na_acc);
62 
63 // CSeqEntryIndex
64 //
65 // CSeqEntryIndex is the public, top-level Seq-entry exploration organizer. A variable
66 // is created using the top-level sequence object, with the constructors taking optional
67 // fetch policy and feature collection flags, as well as an optional feature exploration
68 // depth parameter (for the default adaptive fetch policy):
69 //
70 // CSeqEntryIndex idx(*m_entry, CSeqEntryIndex::eAdaptive);
71 //
72 // A Seq-entry wrapper is created if the top-level object is a Bioseq or Bioseq-set.
73 // Bioseqs within the Seq-entry are then indexed and added to a vector of CBioseqIndex.
74 //
75 // Bioseqs are explored with IterateBioseqs, or selected individually by GetBioseqIndex
76 // (given an accession, index number, or subregion):
77 //
78 // idx.IterateBioseqs("U54469", [this](CBioseqIndex& bsx) {
79 // ...
80 // });
81 //
82 // The embedded lambda function statements are executed for each selected Bioseq.
83 //
84 // Internal indexing objects (i.e., CSeqMasterIndex, CSeqsetIndex, CBioseqIndex,
85 // CDescriptorIndex, and CFeatureIndex) are generated by the indexing process, and
86 // should not be created by the application.
88 {
89 public:
90 
91  enum EPolicy {
92  // far feature fetch policy
93  eAdaptive = 0,
94  eInternal = 1,
95  eExternal = 2,
96  eExhaustive = 3,
97  eFtp = 4,
98  eWeb = 5,
99  eGenomes = 6
100  };
101 
102  enum EFlags {
103  fDefault = 0,
104  fHideImpFeats = 1,
105  fHideSNPFeats = 2,
106  fHideCDDFeats = 4,
107  fHideSTSFeats = 8,
108  fHideExonFeats = 16,
109  fHideIntronFeats = 32,
110  fHideMiscFeats = 64,
111  fShowSNPFeats = 128,
112  fShowCDDFeats = 256,
113  fGeneRNACDSOnly = 512,
114  fHideGapFeats = 1024
115  };
116  typedef int TFlags; // Binary "OR" of EFlags
117 
118 public:
119  // Constructors take the top-level sequence object
120 
121  // The primary constructor uses an existing CScope created by the application
122  CSeqEntryIndex (CSeq_entry_Handle& topseh, EPolicy policy = eAdaptive, TFlags flags = fDefault);
123  CSeqEntryIndex (CBioseq_Handle& bsh, EPolicy policy = eAdaptive, TFlags flags = fDefault);
124 
125  // Alternative constructors take an object and create a new local default CScope
126  CSeqEntryIndex (CSeq_entry& topsep, EPolicy policy = eAdaptive, TFlags flags = fDefault);
127  CSeqEntryIndex (CBioseq_set& seqset, EPolicy policy = eAdaptive, TFlags flags = fDefault);
128  CSeqEntryIndex (CBioseq& bioseq, EPolicy policy = eAdaptive, TFlags flags = fDefault);
129  CSeqEntryIndex (CSeq_submit& submit, EPolicy policy = eAdaptive, TFlags flags = fDefault);
130 
131  // Specialized constructors are for streaming through release files, one component at a time
132 
133  // Submit-block obtained from top of Seq-submit release file
134  CSeqEntryIndex (CSeq_entry& topsep, CSubmit_block &sblock, EPolicy policy = eAdaptive, TFlags flags = fDefault);
135  // Seq-descr chain obtained from top of Bioseq-set release file
136  CSeqEntryIndex (CSeq_entry& topsep, CSeq_descr &descr, EPolicy policy = eAdaptive, TFlags flags = fDefault);
137 
138 private:
139  // Prohibit copy constructor & assignment operator
140  CSeqEntryIndex (const CSeqEntryIndex&) = delete;
142 
143 public:
144  // Bioseq exploration iterator
145  template<typename Fnc> size_t IterateBioseqs (Fnc m);
146 
147  // GetBioseqIndex methods are provided for a variety of argument types
148 
149  // Get first Bioseq index
150  CRef<CBioseqIndex> GetBioseqIndex (void);
151  // Get Nth Bioseq index
152  CRef<CBioseqIndex> GetBioseqIndex (int n);
153  // Get Bioseq index by accession
154  CRef<CBioseqIndex> GetBioseqIndex (const string& accn);
155  // Get Bioseq index by handle
156  CRef<CBioseqIndex> GetBioseqIndex (CBioseq_Handle bsh);
157  // Get Bioseq index by mapped feature
158  CRef<CBioseqIndex> GetBioseqIndex (const CMappedFeat& mf);
159  // Get Bioseq index by sublocation
160  CRef<CBioseqIndex> GetBioseqIndex (const CSeq_loc& loc);
161 
162  // Seqset exploration iterator
163  template<typename Fnc> size_t IterateSeqsets (Fnc m);
164 
165  const vector<CRef<CBioseqIndex>>& GetBioseqIndices(void);
166 
167  const vector<CRef<CSeqsetIndex>>& GetSeqsetIndices(void);
168 
169  bool DistributedReferences(void);
170 
171  void SetSnpFunc(FAddSnpFunc* snp);
172 
173  FAddSnpFunc* GetSnpFunc(void);
174 
175  void SetFeatDepth(int featDepth);
176 
177  int GetFeatDepth(void);
178 
179  void SetGapDepth(int gapDepth);
180 
181  int GetGapDepth(void);
182 
183  // Check all Bioseqs for failure to fetch remote sequence components or feature annotation
184  bool IsFetchFailure(void);
185 
186  // Check for failure to create scope
187  bool IsIndexFailure (void);
188 
189  CRef<CSeqMasterIndex> GetMasterIndex(void) const { return m_Idx; }
190 
191 private:
192  // Implementation details are in a separate CSeqMasterIndex object wrapped in a CRef
194 };
195 
196 
197 // CSeqMasterIndex
198 //
199 // CSeqMasterIndex holds the implementation methods and variables for the CSeqEntryIndex
201 {
202 public:
203  // Constructor is separate from Initializers so that CSeqEntryIndex can capture a CRef to
204  // its CSeqMasterIndex, making CWeakRef<CSeqMasterIndex> available to GetFeatureForProduct
205  CSeqMasterIndex (void) { }
206 
207 public:
208  // Initializers take the top-level sequence object
209  void x_Initialize (CSeq_entry_Handle& topseh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
210  void x_Initialize (CBioseq_Handle& bsh, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
211 
212  void x_Initialize (CSeq_entry& topsep, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
213  void x_Initialize (CBioseq_set& seqset, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
214  void x_Initialize (CBioseq& bioseq, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
215  void x_Initialize (CSeq_submit& submit, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
216 
217  void x_Initialize (CSeq_entry& topsep, CSubmit_block &sblock, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
218  void x_Initialize (CSeq_entry& topsep, CSeq_descr &descr, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags);
219 
220 private:
221  // Prohibit copy constructor & assignment operator
222  CSeqMasterIndex (const CSeqMasterIndex&) = delete;
224 
225 public:
226  // Bioseq exploration iterator
227  template<typename Fnc> size_t IterateBioseqs (Fnc m);
228 
229  // Get first Bioseq index
230  CRef<CBioseqIndex> GetBioseqIndex (void);
231  // Get Nth Bioseq index
232  CRef<CBioseqIndex> GetBioseqIndex (int n);
233  // Get Bioseq index by accession
234  CRef<CBioseqIndex> GetBioseqIndex (const string& accn);
235  // Get Bioseq index by handle
236  CRef<CBioseqIndex> GetBioseqIndex (CBioseq_Handle bsh);
237  // Get Bioseq index by string
238  CRef<CBioseqIndex> GetBioseqIndex (string& str);
239  // Get Bioseq index by feature
240  CRef<CBioseqIndex> GetBioseqIndex (const CMappedFeat& mf);
241  // Get Bioseq index by sublocation
242  CRef<CBioseqIndex> GetBioseqIndex (const CSeq_loc& loc);
243 
244  // Seqset exploration iterator
245  template<typename Fnc> size_t IterateSeqsets (Fnc m);
246 
247  // Getters
248  CRef<CObjectManager> GetObjectManager (void) const { return m_Objmgr; }
249  CRef<CScope> GetScope (void) const { return m_Scope; }
250  CSeq_entry_Handle GetTopSEH (void) const { return m_Tseh; }
251  CConstRef<CSeq_entry> GetTopSEP (void) const { return m_Tsep; }
252  CConstRef<CSubmit_block> GetSbtBlk (void) const { return m_SbtBlk; }
253  CConstRef<CSeq_descr> GetTopDescr (void) const { return m_TopDescr; }
254  CRef<feature::CFeatTree> GetFeatTree (void) const { return m_FeatTree; }
255 
256  const vector<CRef<CBioseqIndex>>& GetBioseqIndices(void);
257 
258  const vector<CRef<CSeqsetIndex>>& GetSeqsetIndices(void);
259 
260  void SetHasOperon (bool hasOp) { m_HasOperon = hasOp; }
261  bool HasOperon (void) const { return m_HasOperon; }
262 
263  bool IsSmallGenomeSet (void) const { return m_IsSmallGenomeSet; }
264 
265  bool DistributedReferences (void) const { return m_DistributedReferences; }
266 
267  void SetSnpFunc(FAddSnpFunc* snp);
268 
269  FAddSnpFunc* GetSnpFunc(void);
270 
271  void SetFeatDepth(int featDepth);
272 
273  int GetFeatDepth(void);
274 
275  void SetGapDepth(int gapDepth);
276 
277  int GetGapDepth(void);
278 
279  // Check all Bioseqs for failure to fetch remote sequence components or remote feature annotation
280  bool IsFetchFailure(void);
281 
282  // Check for failure to create scope
283  bool IsIndexFailure (void) const { return m_IndexFailure; }
284  void SetIndexFailure (bool fails) { m_IndexFailure = fails; }
285 
286 private:
287  // Common initialization function called by each Initialize variant
288  void x_Init (void);
289 
290  // Recursive exploration to populate vector of index objects for Bioseqs in Seq-entry
291  void x_InitSeqs (const CSeq_entry& sep, CRef<CSeqsetIndex> prnt, int level = 0);
292 
293 private:
297 
302 
305 
306  vector<CRef<CBioseqIndex>> m_BsxList;
307 
308  // map from accession string to CBioseqIndex object
311 
312  // map from CBioseq_Handle to CBioseqIndex object via best Seq-id string
315 
316  vector<CRef<CSeqsetIndex>> m_SsxList;
317 
320 
322 
324 
327 
329 
331 };
332 
333 
334 // CSeqsetIndex
335 //
336 // CSeqsetIndex stores information about an element in the Bioseq-set hierarchy
338 {
339 public:
340  // Constructor
342  const CBioseq_set& bssp,
343  CRef<CSeqsetIndex> prnt);
344 
345 private:
346  // Prohibit copy constructor & assignment operator
347  CSeqsetIndex (const CSeqsetIndex&) = delete;
349 
350 public:
351  // Getters
352  CBioseq_set_Handle GetSeqsetHandle (void) const { return m_Ssh; }
353  const CBioseq_set& GetSeqset (void) const { return m_Bssp; }
354  CRef<CSeqsetIndex> GetParent (void) const { return m_Prnt; }
355 
356  CBioseq_set::TClass GetClass (void) const { return m_Class; }
357 
358 private:
362 
364 };
365 
366 
367 // CBioseqIndex
368 //
369 // CBioseqIndex is the exploration organizer for a given Bioseq. It provides methods to
370 // obtain descriptors and iterate through features that apply to the Bioseq. (These are
371 // stored in vectors, which are initialized upon first request.)
372 //
373 // CBioseqIndex also maintains a CFeatTree for its Bioseq, used to find the best gene for
374 // each feature.
375 //
376 // Descriptors are explored with:
377 //
378 // bsx.IterateDescriptors([this](CDescriptorIndex& sdx) {
379 // ...
380 // });
381 //
382 // and are presented based on the order of the descriptor chain hierarchy, starting with
383 // descriptors packaged on the Bioseq, then on its parent Bioseq-set, etc.
384 //
385 // Features are explored with:
386 //
387 // bsx.IterateFeatures([this](CFeatureIndex& sfx) {
388 // ...
389 // });
390 //
391 // and are presented in order of biological position along the parent sequence.
392 //
393 // Fetching external features uses SAnnotSelector adaptive depth unless explicitly overridden.
395 {
396 public:
397  // Constructor
399  const CBioseq& bsp,
400  CBioseq_Handle obsh,
401  CRef<CSeqsetIndex> prnt,
402  CSeq_entry_Handle tseh,
403  CRef<CScope> scope,
404  CSeqMasterIndex& idx,
407 
408  // Destructor
409  ~CBioseqIndex (void);
410 
411 private:
412  // Prohibit copy constructor & assignment operator
413  CBioseqIndex (const CBioseqIndex&) = delete;
415 
416 public:
417  // Gap exploration iterator
418  template<typename Fnc> size_t IterateGaps (Fnc m);
419 
420  // Descriptor exploration iterator
421  template<typename Fnc> size_t IterateDescriptors (Fnc m);
422 
423  // Feature exploration iterator
424  template<typename Fnc> size_t IterateFeatures (Fnc m);
425  template<typename Fnc> size_t IterateFeatures (CSeq_loc& slp, Fnc m);
426 
427  // Getters
428  CBioseq_Handle GetBioseqHandle (void) const { return m_Bsh; }
429  const CBioseq& GetBioseq (void) const { return m_Bsp; }
430  CBioseq_Handle GetOrigBioseqHandle (void) const { return m_OrigBsh; }
431  CRef<CSeqsetIndex> GetParent (void) const { return m_Prnt; }
432  CRef<CScope> GetScope (void) const { return m_Scope; }
433  CRef<CSeqVector> GetSeqVector (void) const { return m_SeqVec; }
434 
435  // Get master index
436  CWeakRef<CSeqMasterIndex> GetSeqMasterIndex (void) const { return m_Idx; }
437 
438  // Get sequence letters from Bioseq
439  string GetSequence (void);
440  void GetSequence (string& buffer);
441  // Get sequence letters from Bioseq subrange
442  string GetSequence (int from, int to);
443  void GetSequence (int from, int to, string& buffer);
444 
445  // Map from GetBestGene result to CFeatureIndex object
446  CRef<CFeatureIndex> GetFeatIndex (const CMappedFeat& mf);
447 
448  const vector<CRef<CGapIndex>>& GetGapIndices(void);
449 
450  const vector<CRef<CDescriptorIndex>>& GetDescriptorIndices(void);
451 
452  const vector<CRef<CFeatureIndex>>& GetFeatureIndices(void);
453 
454  // Get feature (CDS, mRNA, Prot) with product pointing to this Bioseq (protein, cDNA, peptide)
455  CRef<CFeatureIndex> GetFeatureForProduct(void);
456 
457  // Get Bioseq index containing feature with product pointing to this Bioseq
458  CWeakRef<CBioseqIndex> GetBioseqForProduct (void);
459 
460  // Get best (longest) protein feature on this protein Bioseq
461  CRef<CFeatureIndex> GetBestProteinFeature(void);
462 
463  // Flag to indicate failure to fetch remote sequence components or feature annotation
464  bool IsFetchFailure (void) const { return m_FetchFailure; }
465 
466  void SetFetchFailure (bool fails) { m_FetchFailure = fails; }
467 
468  void GetSelector(SAnnotSelector& sel);
469 
470  // Seq-inst fields
471  bool IsNA (void) const { return m_IsNA; }
472  bool IsAA (void) const { return m_IsAA; }
473  CSeq_inst::TTopology GetTopology (void) const { return m_Topology; }
474  CSeq_inst::TLength GetLength (void) const { return m_Length; }
475 
476  bool IsDelta (void) const { return m_IsDelta; }
477  bool IsDeltaLitOnly (void) const { return m_IsDeltaLitOnly; }
478  bool IsVirtual (void) const { return m_IsVirtual; }
479  bool IsMap (void) const { return m_IsMap; }
480 
481  // Seq-id fields
482  const string& GetAccession (void) const { return m_Accession; }
483 
484  bool IsRefSeq (void) const { return m_IsRefSeq; }
485  bool IsNC (void) const { return m_IsNC; }
486  bool IsNM (void) const { return m_IsNM; }
487  bool IsNR (void) const { return m_IsNR; }
488  bool IsNZ (void) const { return m_IsNZ; }
489  bool IsPatent (void) const { return m_IsPatent; }
490  bool IsPDB (void) const { return m_IsPDB; }
491  bool IsWP (void) const { return m_IsWP; }
492  bool IsThirdParty (void) const { return m_ThirdParty; }
493  bool IsWGSMaster (void) const { return m_WGSMaster; }
494  bool IsTSAMaster (void) const { return m_TSAMaster; }
495  bool IsTLSMaster (void) const { return m_TLSMaster; }
496 
497  string GetGeneralStr (void) const { return m_GeneralStr; }
498  int GetGeneralId (void) const { return m_GeneralId; }
499 
500  string GetPatentCountry (void) const { return m_PatentCountry; }
501  string GetPatentNumber (void) const { return m_PatentNumber; }
502  int GetPatentSequence (void) const { return m_PatentSequence; }
503 
504  int GetPDBChain (void) const { return m_PDBChain; }
505  string GetPDBChainID (void) const { return m_PDBChainID; }
506 
507  // Most important descriptor fields
508 
509  const string& GetTitle (void);
510 
512  CMolInfo::TBiomol GetBiomol (void);
513  CMolInfo::TTech GetTech (void);
514  CMolInfo::TCompleteness GetCompleteness (void);
515 
517  const string& GetTaxname (void);
518 
519  const string& GetDescTaxname (void);
520 
521  bool IsHTGTech (void);
522  bool IsHTGSUnfinished (void);
523  bool IsTLS (void);
524  bool IsTSA (void);
525  bool IsWGS (void);
526  bool IsEST_STS_GSS (void);
527 
528  bool IsUseBiosrc (void);
529 
530  const string& GetCommon (void);
531  const string& GetLineage (void);
532  TTaxId GetTaxid (void);
533  bool IsUsingAnamorph (void);
534 
535  CTempString GetGenus (void);
536  CTempString GetSpecies (void);
537  bool IsMultispecies (void);
538  CBioSource::TGenome GetGenome (void);
539  bool IsPlasmid (void);
540  bool IsChromosome (void);
541 
542  const string& GetOrganelle (void);
543 
544  string GetFirstSuperKingdom (void);
545  string GetSecondSuperKingdom (void);
546  bool IsCrossKingdom (void);
547 
548  CTempString GetChromosome (void);
549  CTempString GetLinkageGroup (void);
550  CTempString GetClone (void);
551  bool HasClone (void);
552  CTempString GetMap (void);
553  CTempString GetPlasmid (void);
554  CTempString GetSegment (void);
555 
556  CTempString GetBreed (void);
557  CTempString GetCultivar (void);
558  CTempString GetSpecimenVoucher (void);
559  CTempString GetIsolate (void);
560  CTempString GetStrain (void);
561  CTempString GetSubstrain (void);
562  CTempString GetMetaGenomeSource (void);
563 
564  bool IsHTGSCancelled (void);
565  bool IsHTGSDraft (void);
566  bool IsHTGSPooled (void);
567  bool IsTPAExp (void);
568  bool IsTPAInf (void);
569  bool IsTPAReasm (void);
570  bool IsUnordered (void);
571 
572  CTempString GetPDBCompound (void);
573 
574  bool IsForceOnlyNearFeats (void);
575 
576  bool IsUnverified (void);
577  bool IsUnverifiedFeature (void);
578  bool IsUnverifiedOrganism (void);
579  bool IsUnverifiedMisassembled (void);
580  bool IsUnverifiedContaminant (void);
581 
582  bool IsUnreviewed (void);
583  bool IsUnreviewedUnannotated (void);
584 
585  CTempString GetTargetedLocus (void);
586 
587  const string& GetComment (void);
588  bool IsPseudogene (void);
589 
590  bool HasOperon (void);
591  bool HasGene (void);
592  bool HasMultiIntervalGenes (void);
593  bool HasSource (void);
594 
595  string GetrEnzyme (void);
596 
597 private:
598  // Common gap collection, delayed until actually needed
599  void x_InitGaps (void);
600 
601  // Common descriptor collection, delayed until actually needed
602  void x_InitDescs (void);
603 
604  // Common feature collection, delayed until actually needed
605  void x_InitFeats (void);
606  void x_InitFeats (CSeq_loc& slp);
607 
608  void x_DefaultSelector(SAnnotSelector& sel, CSeqEntryIndex::EPolicy policy, CSeqEntryIndex::TFlags flags, bool onlyNear, CScope& scope);
609 
610  // common implementation method
611  void x_InitFeats (CSeq_loc* slpp);
612 
613  // Set BioSource flags
614  void x_InitSource (void);
615 
616 private:
618  const CBioseq& m_Bsp;
623 
625 
627  vector<CRef<CGapIndex>> m_GapList;
628 
630  vector<CRef<CDescriptorIndex>> m_SdxList;
631 
633  vector<CRef<CFeatureIndex>> m_SfxList;
634 
636 
639 
642 
643  // CFeatureIndex from CMappedFeat for use with GetBestGene
646 
648 
651 
653 
654 private:
655  // Seq-inst fields
656  bool m_IsNA;
657  bool m_IsAA;
660 
661  bool m_IsDelta;
664  bool m_IsMap;
665 
666  // Seq-id fields
667  string m_Accession;
668 
670  bool m_IsNC;
671  bool m_IsNM;
672  bool m_IsNR;
673  bool m_IsNZ;
675  bool m_IsPDB;
676  bool m_IsWP;
681 
682  string m_GeneralStr;
684 
688 
690  string m_PDBChainID;
691 
692  // Instantiated title
693  string m_Title;
694 
695  // MolInfo fields
700 
701  bool m_HTGTech;
703  bool m_IsTLS;
704  bool m_IsTSA;
705  bool m_IsWGS;
707 
709 
710  // BioSource fields
713 
715  string m_Taxname;
716 
717  string m_Common;
718  string m_Lineage;
721 
728 
729  string m_Organelle;
730 
734 
735  // Subsource fields
743 
744  // Orgmod fields
752 
753  // Keyword fields (genbank or embl blocks)
757  bool m_TPAExp;
758  bool m_TPAInf;
761 
762  // PDB block fields
764 
765  // User object fields
767 
774 
779 
780  // Comment fields
781  string m_Comment;
783 
784  // Feature fields
785  bool m_HasGene;
788 
789  // Map fields
790  string m_rEnzyme;
791 };
792 
793 
794 // CGapIndex
795 //
796 // CGapIndex stores information about an indexed descriptor
798 {
799 public:
800  // Constructor
801  CGapIndex (TSeqPos start,
802  TSeqPos end,
803  TSeqPos length,
804  const string& type,
805  const vector<string>& evidence,
806  bool isUnknownLength,
807  bool isAssemblyGap,
808  CBioseqIndex& bsx);
809 
810 private:
811  // Prohibit copy constructor & assignment operator
812  CGapIndex (const CGapIndex&) = delete;
813  CGapIndex& operator= (const CGapIndex&) = delete;
814 
815 public:
816  // Getters
817 
818  TSeqPos GetStart (void) const { return m_Start; }
819  TSeqPos GetEnd (void) const { return m_End; }
820  TSeqPos GetLength (void) const { return m_Length; }
821  const string GetGapType (void) const { return m_GapType; }
822  const vector<string>& GetGapEvidence (void) const { return m_GapEvidence; }
823  bool IsUnknownLength (void) const { return m_IsUnknownLength; }
824  bool IsAssemblyGap (void) const { return m_IsAssemblyGap; }
825 
826  // Get parent Bioseq index
827  CWeakRef<CBioseqIndex> GetBioseqIndex (void) const { return m_Bsx; }
828 
829 private:
831 
835 
836  string m_GapType;
837  vector<string> m_GapEvidence;
838 
841 };
842 
843 
844 // CDescriptorIndex
845 //
846 // CDescriptorIndex stores information about an indexed descriptor
848 {
849 public:
850  // Constructor
851  CDescriptorIndex (const CSeqdesc& sd,
852  CBioseqIndex& bsx);
853 
854 private:
855  // Prohibit copy constructor & assignment operator
858 
859 public:
860  // Getters
861  const CSeqdesc& GetSeqDesc (void) const { return m_Sd; }
862 
863  // Get parent Bioseq index
864  CWeakRef<CBioseqIndex> GetBioseqIndex (void) const { return m_Bsx; }
865 
866  // Get descriptor type (e.g., CSeqdesc::e_Molinfo)
867  CSeqdesc::E_Choice GetType (void) const { return m_Type; }
868 
869 private:
870  const CSeqdesc& m_Sd;
872 
874 };
875 
876 
877 // CFeatureIndex
878 //
879 // CFeatureIndex stores information about an indexed feature
881 {
882 public:
883  // Constructor
885  const CMappedFeat mf,
886  CConstRef<CSeq_loc> feat_loc,
887  CBioseqIndex& bsx);
888 
889 private:
890  // Prohibit copy constructor & assignment operator
891  CFeatureIndex (const CFeatureIndex&) = delete;
893 
894 public:
895  // Getters
896  CSeq_feat_Handle GetSeqFeatHandle (void) const { return m_Sfh; }
897  const CMappedFeat GetMappedFeat (void) const { return m_Mf; }
898  CRef<CSeqVector> GetSeqVector (void) const { return m_SeqVec; }
899 
900  CConstRef<CSeq_loc> GetMappedLocation(void) const { return m_Fl; }
901 
902  // Get parent Bioseq index
903  CWeakRef<CBioseqIndex> GetBioseqIndex (void) const { return m_Bsx; }
904 
905  // Get feature type (e.g. CSeqFeatData::e_Rna)
906  CSeqFeatData::E_Choice GetType (void) const { return m_Type; }
907 
908  // Get feature subtype (e.g. CSeqFeatData::eSubtype_mRNA)
909  CSeqFeatData::ESubtype GetSubtype (void) const { return m_Subtype; }
910 
911  TSeqPos GetStart (void) const { return m_Start; }
912  TSeqPos GetEnd (void) const { return m_End; }
913 
914  // Get sequence letters under feature intervals
915  string GetSequence (void);
916  void GetSequence (string& buffer);
917  // Get sequence letters under feature subrange
918  string GetSequence (int from, int to);
919  void GetSequence (int from, int to, string& buffer);
920 
921  // Map from feature to CFeatureIndex for best gene using CFeatTree in parent CBioseqIndex
922  CRef<CFeatureIndex> GetBestGene (void);
923 
924  // Map from feature to CFeatureIndex for best VDJC parent using CFeatTree in parent CBioseqIndex
925  CRef<CFeatureIndex> GetBestParent (void);
926 
927  // Find CFeatureIndex object for overlapping source feature using internal CFeatTree
929 
930 private:
931  void SetFetchFailure (bool fails);
932 
933 private:
939 
942 
945 };
946 
947 
948 // CWordPairIndexer
949 //
950 // CWordPairIndexer generates normalized terms and adjacent word pairs for Entrez indexing
952 {
953 public:
954  // Constructor
955  CWordPairIndexer (void) { }
956 
957 private:
958  // Prohibit copy constructor & assignment operator
961 
962 public:
963  void PopulateWordPairIndex (string str);
964 
965  template<typename Fnc> void IterateNorm (Fnc m);
966  template<typename Fnc> void IteratePair (Fnc m);
967 
968 public:
969  static string ConvertUTF8ToAscii(const string& str);
970  static string TrimPunctuation (const string& str);
971  static string TrimMixedContent (const string& str);
972  static bool IsStopWord(const string& str);
973 
974  const vector<string>& GetNorm (void) const { return m_Norm; }
975  const vector<string>& GetPair (void) const { return m_Pair; }
976 
977 private:
978  string x_AddToWordPairIndex (string item, string prev);
979 
980  vector<string> m_Norm;
981  vector<string> m_Pair;
982 };
983 
984 
985 // Inline lambda function implementations
986 
987 // Visit CBioseqIndex objects for all Bioseqs
988 template<typename Fnc>
989 inline
991 
992 {
993  return m_Idx->IterateBioseqs(m);
994 }
995 
996 template<typename Fnc>
997 inline
999 
1000 {
1001  int count = 0;
1002  for (auto& bsx : m_BsxList) {
1003  m(*bsx);
1004  count++;
1005  }
1006  return count;
1007 }
1008 
1009 // Visit CSeqsetIndex objects for all Seqsets
1010 template<typename Fnc>
1011 inline
1013 
1014 {
1015  return m_Idx->IterateSeqsets(m);
1016 }
1017 
1018 template<typename Fnc>
1019 inline
1021 
1022 {
1023  int count = 0;
1024  for (auto& ssx : m_SsxList) {
1025  m(*ssx);
1026  count++;
1027  }
1028  return count;
1029 }
1030 
1031 // Visit CGapIndex objects for all gaps
1032 template<typename Fnc>
1033 inline
1035 
1036 {
1037  int count = 0;
1038  try {
1039  // Delay gap collection until first request
1040  if (! m_GapsInitialized) {
1041  x_InitGaps();
1042  }
1043 
1044  for (auto& sgx : m_GapList) {
1045  count++;
1046  m(*sgx);
1047  }
1048  }
1049  catch (CException& e) {
1050  ERR_POST(Error << "Error in CBioseqIndex::IterateGaps: " << e.what());
1051  }
1052  return count;
1053 }
1054 
1055 // Visit CDescriptorIndex objects for all descriptors
1056 template<typename Fnc>
1057 inline
1059 
1060 {
1061  int count = 0;
1062  try {
1063  // Delay descriptor collection until first request
1064  if (! m_DescsInitialized) {
1065  x_InitDescs();
1066  }
1067 
1068  for (auto& sdx : m_SdxList) {
1069  count++;
1070  m(*sdx);
1071  }
1072  }
1073  catch (CException& e) {
1074  ERR_POST(Error << "Error in CBioseqIndex::IterateDescriptors: " << e.what());
1075  }
1076  return count;
1077 }
1078 
1079 // Visit CFeatureIndex objects for all features
1080 template<typename Fnc>
1081 inline
1083 
1084 {
1085  int count = 0;
1086  try {
1087  // Delay feature collection until first request
1088  if (! m_FeatsInitialized) {
1089  x_InitFeats();
1090  }
1091 
1092  for (auto& sfx : m_SfxList) {
1093  count++;
1094  m(*sfx);
1095  }
1096  }
1097  catch (CException& e) {
1098  ERR_POST(Error << "Error in CBioseqIndex::IterateFeatures: " << e.what());
1099  }
1100  return count;
1101 }
1102 
1103 template<typename Fnc>
1104 inline
1106 
1107 {
1108  int count = 0;
1109  try {
1110  // Delay feature collection until first request, but do not bail on m_FeatsInitialized flag
1111  x_InitFeats(slp);
1112 
1113  for (auto& sfx : m_SfxList) {
1114  count++;
1115  m(*sfx);
1116  }
1117  }
1118  catch (CException& e) {
1119  ERR_POST(Error << "Error in CBioseqIndex::IterateFeatures: " << e.what());
1120  }
1121  return count;
1122 }
1123 
1124 template<typename Fnc>
1125 inline
1127 
1128 {
1129  for (auto& str : m_Norm) {
1130  m(str);
1131  }
1132 }
1133 
1134 template<typename Fnc>
1135 inline
1137 
1138 {
1139  for (auto& str : m_Pair) {
1140  m(str);
1141  }
1142 }
1143 
1144 
1147 
1148 #endif /* FEATURE_INDEXER__HPP */
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CAtomicCounter –.
Definition: ncbicntr.hpp:71
CBioSource::TGenome m_Genome
Definition: indexer.hpp:725
CRef< CScope > m_Scope
Definition: indexer.hpp:622
bool m_FeatForProdInitialized
Definition: indexer.hpp:637
bool m_HTGSPooled
Definition: indexer.hpp:756
bool m_IsTLS
Definition: indexer.hpp:703
bool m_IsUnverifiedMisassembled
Definition: indexer.hpp:771
bool m_BestProtFeatInitialized
Definition: indexer.hpp:640
int GetPatentSequence(void) const
Definition: indexer.hpp:502
bool m_has_clone
Definition: indexer.hpp:739
bool IsFetchFailure(void) const
Definition: indexer.hpp:464
bool m_ThirdParty
Definition: indexer.hpp:677
vector< CRef< CFeatureIndex > > m_SfxList
Definition: indexer.hpp:633
TFeatIndexMap m_FeatIndexMap
Definition: indexer.hpp:645
bool m_HasGene
Definition: indexer.hpp:785
CTempString m_Species
Definition: indexer.hpp:723
CTempString m_SpecimenVoucher
Definition: indexer.hpp:747
CSeq_inst::TLength GetLength(void) const
Definition: indexer.hpp:474
bool m_IsDeltaLitOnly
Definition: indexer.hpp:662
CSeq_inst::TTopology m_Topology
Definition: indexer.hpp:658
string m_SecondSuperKingdom
Definition: indexer.hpp:732
CRef< CSeqVector > m_SeqVec
Definition: indexer.hpp:647
void SetFetchFailure(bool fails)
Definition: indexer.hpp:466
CTempString m_Clone
Definition: indexer.hpp:738
string m_PDBChainID
Definition: indexer.hpp:690
bool m_IsPseudogene
Definition: indexer.hpp:782
void x_InitGaps(void)
Definition: indexer.cpp:1107
bool m_IsPatent
Definition: indexer.hpp:674
bool m_IsUnreviewed
Definition: indexer.hpp:775
string m_Lineage
Definition: indexer.hpp:718
string m_PatentCountry
Definition: indexer.hpp:685
bool m_ForceOnlyNearFeats
Definition: indexer.hpp:766
bool m_IsUnreviewedUnannotated
Definition: indexer.hpp:776
CConstRef< CBioSource > m_BioSource
Definition: indexer.hpp:714
bool m_UsingAnamorph
Definition: indexer.hpp:720
bool IsNR(void) const
Definition: indexer.hpp:487
map< CMappedFeat, CRef< CFeatureIndex > > TFeatIndexMap
Definition: indexer.hpp:644
string m_Title
Definition: indexer.hpp:693
bool m_FetchFailure
Definition: indexer.hpp:652
CConstRef< CMolInfo > m_MolInfo
Definition: indexer.hpp:696
bool m_IsUnverifiedFeature
Definition: indexer.hpp:769
bool IsWP(void) const
Definition: indexer.hpp:491
CTempString m_Chromosome
Definition: indexer.hpp:736
bool IsTSAMaster(void) const
Definition: indexer.hpp:494
const CBioseq & m_Bsp
Definition: indexer.hpp:618
CTempString m_Isolate
Definition: indexer.hpp:748
bool m_WGSMaster
Definition: indexer.hpp:678
bool m_UseBiosrc
Definition: indexer.hpp:708
bool m_IsMap
Definition: indexer.hpp:664
bool m_HTGTech
Definition: indexer.hpp:701
bool m_TSAMaster
Definition: indexer.hpp:679
bool IsNM(void) const
Definition: indexer.hpp:486
bool m_HTGSUnfinished
Definition: indexer.hpp:702
CRef< CScope > GetScope(void) const
Definition: indexer.hpp:432
bool m_IsUnverifiedContaminant
Definition: indexer.hpp:772
bool m_IsPDB
Definition: indexer.hpp:675
bool IsDelta(void) const
Definition: indexer.hpp:476
bool m_IsVirtual
Definition: indexer.hpp:663
bool m_IsDelta
Definition: indexer.hpp:661
void x_InitFeats(void)
Definition: indexer.cpp:2286
bool IsThirdParty(void) const
Definition: indexer.hpp:492
bool IsMap(void) const
Definition: indexer.hpp:479
bool IsAA(void) const
Definition: indexer.hpp:472
TTaxId m_Taxid
Definition: indexer.hpp:719
bool IsPatent(void) const
Definition: indexer.hpp:489
CBioseq_Handle GetBioseqHandle(void) const
Definition: indexer.hpp:428
bool m_IsRefSeq
Definition: indexer.hpp:669
string m_PatentNumber
Definition: indexer.hpp:686
CRef< CSeqVector > GetSeqVector(void) const
Definition: indexer.hpp:433
CRef< CSeqsetIndex > GetParent(void) const
Definition: indexer.hpp:431
string m_FirstSuperKingdom
Definition: indexer.hpp:731
CTempString m_Cultivar
Definition: indexer.hpp:746
size_t IterateGaps(Fnc m)
Definition: indexer.hpp:1034
string m_GeneralStr
Definition: indexer.hpp:682
bool IsWGSMaster(void) const
Definition: indexer.hpp:493
bool IsTLSMaster(void) const
Definition: indexer.hpp:495
bool m_HTGSCancelled
Definition: indexer.hpp:754
const string & GetAccession(void) const
Definition: indexer.hpp:482
bool m_Multispecies
Definition: indexer.hpp:724
int GetPDBChain(void) const
Definition: indexer.hpp:504
CWeakRef< CSeqMasterIndex > GetSeqMasterIndex(void) const
Definition: indexer.hpp:436
string GetPatentCountry(void) const
Definition: indexer.hpp:500
CWeakRef< CSeqMasterIndex > m_Idx
Definition: indexer.hpp:624
CTempString m_Substrain
Definition: indexer.hpp:750
CSeqEntryIndex::TFlags m_Flags
Definition: indexer.hpp:650
string GetPDBChainID(void) const
Definition: indexer.hpp:505
bool m_IsUnverifiedOrganism
Definition: indexer.hpp:770
CConstRef< CBioSource > m_DescBioSource
Definition: indexer.hpp:711
const CBioseq & GetBioseq(void) const
Definition: indexer.hpp:429
CSeq_inst::TLength m_Length
Definition: indexer.hpp:659
bool IsNA(void) const
Definition: indexer.hpp:471
string m_Organelle
Definition: indexer.hpp:729
CTempString m_UnverifiedPrefix
Definition: indexer.hpp:773
bool m_IsEST_STS_GSS
Definition: indexer.hpp:706
CTempString m_Breed
Definition: indexer.hpp:745
CBioseq_Handle m_Bsh
Definition: indexer.hpp:617
bool m_HasMultiIntervalGenes
Definition: indexer.hpp:786
bool m_TLSMaster
Definition: indexer.hpp:680
bool m_HasSource
Definition: indexer.hpp:787
string GetPatentNumber(void) const
Definition: indexer.hpp:501
bool m_IsTSA
Definition: indexer.hpp:704
bool m_IsWGS
Definition: indexer.hpp:705
CBioseq_Handle m_OrigBsh
Definition: indexer.hpp:619
bool m_FeatsInitialized
Definition: indexer.hpp:632
vector< CRef< CDescriptorIndex > > m_SdxList
Definition: indexer.hpp:630
CMolInfo::TTech m_Tech
Definition: indexer.hpp:698
string m_Accession
Definition: indexer.hpp:667
CTempString m_Segment
Definition: indexer.hpp:742
CMolInfo::TBiomol m_Biomol
Definition: indexer.hpp:697
size_t IterateDescriptors(Fnc m)
Definition: indexer.hpp:1058
CSeq_entry_Handle m_Tseh
Definition: indexer.hpp:621
size_t IterateFeatures(Fnc m)
Definition: indexer.hpp:1082
CTempString m_UnreviewedPrefix
Definition: indexer.hpp:777
bool m_SourcesInitialized
Definition: indexer.hpp:635
CSeq_inst::TTopology GetTopology(void) const
Definition: indexer.hpp:473
bool m_IsCrossKingdom
Definition: indexer.hpp:733
bool IsNZ(void) const
Definition: indexer.hpp:488
bool m_TPAExp
Definition: indexer.hpp:757
int GetGeneralId(void) const
Definition: indexer.hpp:498
string GetGeneralStr(void) const
Definition: indexer.hpp:497
bool IsVirtual(void) const
Definition: indexer.hpp:478
int m_GeneralId
Definition: indexer.hpp:683
string m_DescTaxname
Definition: indexer.hpp:712
vector< CRef< CGapIndex > > m_GapList
Definition: indexer.hpp:627
int m_PDBChain
Definition: indexer.hpp:689
CSeqEntryIndex::EPolicy m_Policy
Definition: indexer.hpp:649
CBioseqIndex(const CBioseqIndex &)=delete
int m_PatentSequence
Definition: indexer.hpp:687
CRef< CFeatureIndex > m_BestProteinFeature
Definition: indexer.hpp:641
string m_Comment
Definition: indexer.hpp:781
bool m_Unordered
Definition: indexer.hpp:760
CTempString m_TargetedLocus
Definition: indexer.hpp:778
CTempString m_MetaGenomeSource
Definition: indexer.hpp:751
bool IsDeltaLitOnly(void) const
Definition: indexer.hpp:477
CTempString m_Plasmid
Definition: indexer.hpp:741
bool m_IsUnverified
Definition: indexer.hpp:768
CTempString m_PDBCompound
Definition: indexer.hpp:763
bool m_HTGSDraft
Definition: indexer.hpp:755
CTempString m_Map
Definition: indexer.hpp:740
CBioseq_Handle GetOrigBioseqHandle(void) const
Definition: indexer.hpp:430
string m_Taxname
Definition: indexer.hpp:715
bool IsRefSeq(void) const
Definition: indexer.hpp:484
string m_rEnzyme
Definition: indexer.hpp:790
bool IsNC(void) const
Definition: indexer.hpp:485
CMolInfo::TCompleteness m_Completeness
Definition: indexer.hpp:699
CTempString m_Strain
Definition: indexer.hpp:749
CRef< CSeqsetIndex > m_Prnt
Definition: indexer.hpp:620
bool m_IsPlasmid
Definition: indexer.hpp:726
CRef< CFeatureIndex > m_FeatureForProduct
Definition: indexer.hpp:638
bool m_TPAReasm
Definition: indexer.hpp:759
void x_InitDescs(void)
Definition: indexer.cpp:1590
bool m_TPAInf
Definition: indexer.hpp:758
CTempString m_LinkageGroup
Definition: indexer.hpp:737
bool IsPDB(void) const
Definition: indexer.hpp:490
bool m_DescsInitialized
Definition: indexer.hpp:629
bool m_GapsInitialized
Definition: indexer.hpp:626
string m_Common
Definition: indexer.hpp:717
CTempString m_Genus
Definition: indexer.hpp:722
bool m_IsChromosome
Definition: indexer.hpp:727
CBioseq_Handle –.
CBioseq_set_Handle –.
const CSeqdesc & m_Sd
Definition: indexer.hpp:870
CSeqdesc::E_Choice m_Type
Definition: indexer.hpp:873
CSeqdesc::E_Choice GetType(void) const
Definition: indexer.hpp:867
CDescriptorIndex(const CDescriptorIndex &)=delete
CWeakRef< CBioseqIndex > GetBioseqIndex(void) const
Definition: indexer.hpp:864
const CSeqdesc & GetSeqDesc(void) const
Definition: indexer.hpp:861
CWeakRef< CBioseqIndex > m_Bsx
Definition: indexer.hpp:871
const CMappedFeat m_Mf
Definition: indexer.hpp:935
CSeqFeatData::ESubtype m_Subtype
Definition: indexer.hpp:941
CSeqFeatData::ESubtype GetSubtype(void) const
Definition: indexer.hpp:909
CSeqFeatData::E_Choice GetType(void) const
Definition: indexer.hpp:906
CSeq_feat_Handle m_Sfh
Definition: indexer.hpp:934
CFeatureIndex(const CFeatureIndex &)=delete
TSeqPos GetStart(void) const
Definition: indexer.hpp:911
CRef< CSeqVector > GetSeqVector(void) const
Definition: indexer.hpp:898
TSeqPos m_End
Definition: indexer.hpp:944
CRef< CSeqVector > m_SeqVec
Definition: indexer.hpp:937
CSeq_feat_Handle GetSeqFeatHandle(void) const
Definition: indexer.hpp:896
CSeqFeatData::E_Choice m_Type
Definition: indexer.hpp:940
const CMappedFeat GetMappedFeat(void) const
Definition: indexer.hpp:897
CConstRef< CSeq_loc > m_Fl
Definition: indexer.hpp:936
TSeqPos m_Start
Definition: indexer.hpp:943
CConstRef< CSeq_loc > GetMappedLocation(void) const
Definition: indexer.hpp:900
CWeakRef< CBioseqIndex > GetBioseqIndex(void) const
Definition: indexer.hpp:903
TSeqPos GetEnd(void) const
Definition: indexer.hpp:912
CWeakRef< CBioseqIndex > m_Bsx
Definition: indexer.hpp:938
const vector< string > & GetGapEvidence(void) const
Definition: indexer.hpp:822
const string GetGapType(void) const
Definition: indexer.hpp:821
CWeakRef< CBioseqIndex > GetBioseqIndex(void) const
Definition: indexer.hpp:827
bool m_IsAssemblyGap
Definition: indexer.hpp:840
bool IsAssemblyGap(void) const
Definition: indexer.hpp:824
TSeqPos GetLength(void) const
Definition: indexer.hpp:820
CGapIndex(const CGapIndex &)=delete
TSeqPos GetStart(void) const
Definition: indexer.hpp:818
bool m_IsUnknownLength
Definition: indexer.hpp:839
TSeqPos m_Start
Definition: indexer.hpp:832
TSeqPos m_End
Definition: indexer.hpp:833
vector< string > m_GapEvidence
Definition: indexer.hpp:837
bool IsUnknownLength(void) const
Definition: indexer.hpp:823
TSeqPos m_Length
Definition: indexer.hpp:834
string m_GapType
Definition: indexer.hpp:836
TSeqPos GetEnd(void) const
Definition: indexer.hpp:819
CWeakRef< CBioseqIndex > m_Bsx
Definition: indexer.hpp:830
CMappedFeat –.
Definition: mapped_feat.hpp:59
CObjectEx –.
Definition: ncbiobj.hpp:2531
CObject –.
Definition: ncbiobj.hpp:180
CScope –.
Definition: scope.hpp:92
CRef< CSeqMasterIndex > GetMasterIndex(void) const
Definition: indexer.hpp:189
CRef< CSeqMasterIndex > m_Idx
Definition: indexer.hpp:193
size_t IterateBioseqs(Fnc m)
Definition: indexer.hpp:990
size_t IterateSeqsets(Fnc m)
Definition: indexer.hpp:1012
CSeqEntryIndex(const CSeqEntryIndex &)=delete
bool m_DistributedReferences
Definition: indexer.hpp:321
CSeqEntryIndex::TFlags m_Flags
Definition: indexer.hpp:304
bool IsSmallGenomeSet(void) const
Definition: indexer.hpp:263
CConstRef< CSeq_descr > m_TopDescr
Definition: indexer.hpp:300
CRef< CObjectManager > GetObjectManager(void) const
Definition: indexer.hpp:248
FAddSnpFunc * m_SnpFunc
Definition: indexer.hpp:323
CSeqMasterIndex(const CSeqMasterIndex &)=delete
void SetIndexFailure(bool fails)
Definition: indexer.hpp:284
CConstRef< CSeq_entry > GetTopSEP(void) const
Definition: indexer.hpp:251
bool DistributedReferences(void) const
Definition: indexer.hpp:265
CSeqEntryIndex::EPolicy m_Policy
Definition: indexer.hpp:303
bool m_IndexFailure
Definition: indexer.hpp:330
TAccnIndexMap m_AccnIndexMap
Definition: indexer.hpp:310
CConstRef< CSeq_descr > GetTopDescr(void) const
Definition: indexer.hpp:253
CConstRef< CSubmit_block > m_SbtBlk
Definition: indexer.hpp:299
CConstRef< CSubmit_block > GetSbtBlk(void) const
Definition: indexer.hpp:252
CRef< CObjectManager > m_Objmgr
Definition: indexer.hpp:294
map< string, CRef< CBioseqIndex > > TAccnIndexMap
Definition: indexer.hpp:309
bool HasOperon(void) const
Definition: indexer.hpp:261
TBestIdIndexMap m_BestIdIndexMap
Definition: indexer.hpp:314
size_t IterateBioseqs(Fnc m)
Definition: indexer.hpp:998
size_t IterateSeqsets(Fnc m)
Definition: indexer.hpp:1020
CAtomicCounter m_Counter
Definition: indexer.hpp:328
CSeqMasterIndex(void)
Definition: indexer.hpp:205
map< string, CRef< CBioseqIndex > > TBestIdIndexMap
Definition: indexer.hpp:313
CSeq_entry_Handle GetTopSEH(void) const
Definition: indexer.hpp:250
vector< CRef< CBioseqIndex > > m_BsxList
Definition: indexer.hpp:306
CConstRef< CSeq_entry > m_Tsep
Definition: indexer.hpp:298
CRef< CScope > GetScope(void) const
Definition: indexer.hpp:249
CRef< feature::CFeatTree > GetFeatTree(void) const
Definition: indexer.hpp:254
void SetHasOperon(bool hasOp)
Definition: indexer.hpp:260
CRef< CScope > m_Scope
Definition: indexer.hpp:295
vector< CRef< CSeqsetIndex > > m_SsxList
Definition: indexer.hpp:316
bool m_IsSmallGenomeSet
Definition: indexer.hpp:319
bool IsIndexFailure(void) const
Definition: indexer.hpp:283
CSeq_entry_Handle m_Tseh
Definition: indexer.hpp:296
CRef< feature::CFeatTree > m_FeatTree
Definition: indexer.hpp:301
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_feat_Handle –.
const CBioseq_set & m_Bssp
Definition: indexer.hpp:360
CRef< CSeqsetIndex > m_Prnt
Definition: indexer.hpp:361
CBioseq_set_Handle GetSeqsetHandle(void) const
Definition: indexer.hpp:352
CBioseq_set_Handle m_Ssh
Definition: indexer.hpp:359
const CBioseq_set & GetSeqset(void) const
Definition: indexer.hpp:353
CBioseq_set::TClass m_Class
Definition: indexer.hpp:363
CBioseq_set::TClass GetClass(void) const
Definition: indexer.hpp:356
CSeqsetIndex(const CSeqsetIndex &)=delete
CRef< CSeqsetIndex > GetParent(void) const
Definition: indexer.hpp:354
CSubmit_block –.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CWordPairIndexer(void)
Definition: indexer.hpp:955
vector< string > m_Pair
Definition: indexer.hpp:981
const vector< string > & GetPair(void) const
Definition: indexer.hpp:975
const vector< string > & GetNorm(void) const
Definition: indexer.hpp:974
void IteratePair(Fnc m)
Definition: indexer.hpp:1136
vector< string > m_Norm
Definition: indexer.hpp:980
CWordPairIndexer(const CWordPairIndexer &)=delete
void IterateNorm(Fnc m)
Definition: indexer.hpp:1126
Definition: map.hpp:338
bool IsCrossKingdom(const COrg_ref &org, string &first_kingdom, string &second_kingdom)
Definition: cleanup.cpp:2251
static uch flags
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static const char * str(char *buf, int n)
Definition: stats.c:84
static void GetSegment(const char *str, IndexblkPtr entry)
Definition: gb_index.cpp:158
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
CConstRef< CSeq_feat > GetOverlappingSource(const CSeq_loc &loc, CScope &scope)
Definition: sequence.cpp:1593
NCBI_XOBJUTIL_EXPORT string GetTitle(const CBioseq_Handle &hnd, TGetTitleFlags flags=0)
Definition: seqtitle.cpp:106
const CBioSource * GetBioSource(const CBioseq &bioseq)
Retrieve the BioSource object for a given bioseq handle.
Definition: sequence.cpp:104
const CMolInfo * GetMolInfo(const CBioseq &bioseq)
Retrieve the MolInfo object for a given bioseq handle.
Definition: sequence.cpp:284
CObject & operator=(const CObject &src) THROWS_NONE
Assignment operator.
Definition: ncbiobj.hpp:482
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJUTIL_EXPORT
Definition: ncbi_export.h:1339
E_Choice
Choice variants.
E_Choice
Choice variants.
Definition: Seqdesc_.hpp:109
ETopology
topology of molecule
Definition: Seq_inst_.hpp:121
TSeqPos TLength
Definition: Seq_inst_.hpp:147
void(* FAddSnpFunc)(CBioseq_Handle bsh, string &na_acc)
Definition: indexer.hpp:61
yy_size_t n
The Object manager core.
static pcre_uint8 * buffer
Definition: pcretest.c:1051
bool IsUnverifiedMisassembled(const CBioseq &seq)
bool IsUnverifiedOrganism(const CBioseq &seq)
bool IsUnverifiedContaminant(const CBioseq &seq)
bool IsUnverifiedFeature(const CBioseq &seq)
SAnnotSelector –.
Definition: type.c:6
Modified on Wed Apr 17 13:08:23 2024 by modify_doxy.py rev. 669887