1 #ifndef FEATURE_INDEXER__HPP
2 #define FEATURE_INDEXER__HPP
109 fHideIntronFeats = 32,
113 fGeneRNACDSOnly = 512,
145 template<
typename Fnc>
size_t IterateBioseqs (Fnc m);
163 template<
typename Fnc>
size_t IterateSeqsets (Fnc m);
165 const vector<CRef<CBioseqIndex>>& GetBioseqIndices(
void);
167 const vector<CRef<CSeqsetIndex>>& GetSeqsetIndices(
void);
169 bool DistributedReferences(
void);
175 void SetFeatDepth(
int featDepth);
177 int GetFeatDepth(
void);
179 void SetGapDepth(
int gapDepth);
181 int GetGapDepth(
void);
184 bool IsFetchFailure(
void);
187 bool IsIndexFailure (
void);
227 template<
typename Fnc>
size_t IterateBioseqs (Fnc m);
245 template<
typename Fnc>
size_t IterateSeqsets (Fnc m);
256 const vector<CRef<CBioseqIndex>>& GetBioseqIndices(
void);
258 const vector<CRef<CSeqsetIndex>>& GetSeqsetIndices(
void);
271 void SetFeatDepth(
int featDepth);
273 int GetFeatDepth(
void);
275 void SetGapDepth(
int gapDepth);
277 int GetGapDepth(
void);
280 bool IsFetchFailure(
void);
418 template<
typename Fnc>
size_t IterateGaps (Fnc m);
421 template<
typename Fnc>
size_t IterateDescriptors (Fnc m);
424 template<
typename Fnc>
size_t IterateFeatures (Fnc m);
425 template<
typename Fnc>
size_t IterateFeatures (
CSeq_loc& slp, Fnc m);
448 const vector<CRef<CGapIndex>>& GetGapIndices(
void);
450 const vector<CRef<CDescriptorIndex>>& GetDescriptorIndices(
void);
452 const vector<CRef<CFeatureIndex>>& GetFeatureIndices(
void);
471 bool IsNA (
void)
const {
return m_IsNA; }
472 bool IsAA (
void)
const {
return m_IsAA; }
476 bool IsDelta (
void)
const {
return m_IsDelta; }
479 bool IsMap (
void)
const {
return m_IsMap; }
485 bool IsNC (
void)
const {
return m_IsNC; }
486 bool IsNM (
void)
const {
return m_IsNM; }
487 bool IsNR (
void)
const {
return m_IsNR; }
488 bool IsNZ (
void)
const {
return m_IsNZ; }
490 bool IsPDB (
void)
const {
return m_IsPDB; }
491 bool IsWP (
void)
const {
return m_IsWP; }
517 const string& GetTaxname (
void);
519 const string& GetDescTaxname (
void);
521 bool IsHTGTech (
void);
522 bool IsHTGSUnfinished (
void);
526 bool IsEST_STS_GSS (
void);
528 bool IsUseBiosrc (
void);
530 const string& GetCommon (
void);
531 const string& GetLineage (
void);
533 bool IsUsingAnamorph (
void);
537 bool IsMultispecies (
void);
539 bool IsPlasmid (
void);
540 bool IsChromosome (
void);
542 const string& GetOrganelle (
void);
544 string GetFirstSuperKingdom (
void);
545 string GetSecondSuperKingdom (
void);
551 bool HasClone (
void);
564 bool IsHTGSCancelled (
void);
565 bool IsHTGSDraft (
void);
566 bool IsHTGSPooled (
void);
567 bool IsTPAExp (
void);
568 bool IsTPAInf (
void);
569 bool IsTPAReasm (
void);
570 bool IsUnordered (
void);
574 bool IsForceOnlyNearFeats (
void);
576 bool IsUnverified (
void);
582 bool IsUnreviewed (
void);
583 bool IsUnreviewedUnannotated (
void);
587 const string& GetComment (
void);
588 bool IsPseudogene (
void);
590 bool HasOperon (
void);
592 bool HasMultiIntervalGenes (
void);
593 bool HasSource (
void);
595 string GetrEnzyme (
void);
599 void x_InitGaps (
void);
602 void x_InitDescs (
void);
605 void x_InitFeats (
void);
614 void x_InitSource (
void);
805 const vector<string>& evidence,
806 bool isUnknownLength,
931 void SetFetchFailure (
bool fails);
963 void PopulateWordPairIndex (
string str);
965 template<
typename Fnc>
void IterateNorm (Fnc m);
966 template<
typename Fnc>
void IteratePair (Fnc m);
969 static string ConvertUTF8ToAscii(
const string&
str);
970 static string TrimPunctuation (
const string&
str);
971 static string TrimMixedContent (
const string&
str);
972 static bool IsStopWord(
const string&
str);
974 const vector<string>&
GetNorm (
void)
const {
return m_Norm; }
975 const vector<string>&
GetPair (
void)
const {
return m_Pair; }
978 string x_AddToWordPairIndex (
string item,
string prev);
988 template<
typename Fnc>
996 template<
typename Fnc>
1010 template<
typename Fnc>
1018 template<
typename Fnc>
1032 template<
typename Fnc>
1056 template<
typename Fnc>
1080 template<
typename Fnc>
1103 template<
typename Fnc>
1124 template<
typename Fnc>
1134 template<
typename Fnc>
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CBioSource::TGenome m_Genome
bool m_FeatForProdInitialized
bool m_IsUnverifiedMisassembled
bool m_BestProtFeatInitialized
int GetPatentSequence(void) const
bool IsFetchFailure(void) const
vector< CRef< CFeatureIndex > > m_SfxList
TFeatIndexMap m_FeatIndexMap
CTempString m_SpecimenVoucher
CSeq_inst::TLength GetLength(void) const
CSeq_inst::TTopology m_Topology
string m_SecondSuperKingdom
CRef< CSeqVector > m_SeqVec
void SetFetchFailure(bool fails)
bool m_ForceOnlyNearFeats
bool m_IsUnreviewedUnannotated
CConstRef< CBioSource > m_BioSource
map< CMappedFeat, CRef< CFeatureIndex > > TFeatIndexMap
CConstRef< CMolInfo > m_MolInfo
bool m_IsUnverifiedFeature
bool IsTSAMaster(void) const
CRef< CScope > GetScope(void) const
bool m_IsUnverifiedContaminant
bool IsThirdParty(void) const
bool IsPatent(void) const
CBioseq_Handle GetBioseqHandle(void) const
CRef< CSeqVector > GetSeqVector(void) const
CRef< CSeqsetIndex > GetParent(void) const
string m_FirstSuperKingdom
size_t IterateGaps(Fnc m)
bool IsWGSMaster(void) const
bool IsTLSMaster(void) const
const string & GetAccession(void) const
int GetPDBChain(void) const
CWeakRef< CSeqMasterIndex > GetSeqMasterIndex(void) const
string GetPatentCountry(void) const
CWeakRef< CSeqMasterIndex > m_Idx
CSeqEntryIndex::TFlags m_Flags
string GetPDBChainID(void) const
bool m_IsUnverifiedOrganism
CConstRef< CBioSource > m_DescBioSource
const CBioseq & GetBioseq(void) const
CSeq_inst::TLength m_Length
CTempString m_UnverifiedPrefix
bool m_HasMultiIntervalGenes
string GetPatentNumber(void) const
vector< CRef< CDescriptorIndex > > m_SdxList
CMolInfo::TBiomol m_Biomol
size_t IterateDescriptors(Fnc m)
size_t IterateFeatures(Fnc m)
CTempString m_UnreviewedPrefix
bool m_SourcesInitialized
CSeq_inst::TTopology GetTopology(void) const
int GetGeneralId(void) const
string GetGeneralStr(void) const
bool IsVirtual(void) const
vector< CRef< CGapIndex > > m_GapList
CSeqEntryIndex::EPolicy m_Policy
CBioseqIndex(const CBioseqIndex &)=delete
CRef< CFeatureIndex > m_BestProteinFeature
CTempString m_TargetedLocus
CTempString m_MetaGenomeSource
bool IsDeltaLitOnly(void) const
CTempString m_PDBCompound
CBioseq_Handle GetOrigBioseqHandle(void) const
bool IsRefSeq(void) const
CMolInfo::TCompleteness m_Completeness
CRef< CSeqsetIndex > m_Prnt
CRef< CFeatureIndex > m_FeatureForProduct
CTempString m_LinkageGroup
CSeqdesc::E_Choice m_Type
CSeqdesc::E_Choice GetType(void) const
CDescriptorIndex(const CDescriptorIndex &)=delete
CWeakRef< CBioseqIndex > GetBioseqIndex(void) const
const CSeqdesc & GetSeqDesc(void) const
CWeakRef< CBioseqIndex > m_Bsx
CSeqFeatData::ESubtype m_Subtype
CSeqFeatData::ESubtype GetSubtype(void) const
CSeqFeatData::E_Choice GetType(void) const
CFeatureIndex(const CFeatureIndex &)=delete
TSeqPos GetStart(void) const
CRef< CSeqVector > GetSeqVector(void) const
CRef< CSeqVector > m_SeqVec
CSeq_feat_Handle GetSeqFeatHandle(void) const
CSeqFeatData::E_Choice m_Type
const CMappedFeat GetMappedFeat(void) const
CConstRef< CSeq_loc > m_Fl
CConstRef< CSeq_loc > GetMappedLocation(void) const
CWeakRef< CBioseqIndex > GetBioseqIndex(void) const
TSeqPos GetEnd(void) const
CWeakRef< CBioseqIndex > m_Bsx
const vector< string > & GetGapEvidence(void) const
const string GetGapType(void) const
CWeakRef< CBioseqIndex > GetBioseqIndex(void) const
bool IsAssemblyGap(void) const
TSeqPos GetLength(void) const
CGapIndex(const CGapIndex &)=delete
TSeqPos GetStart(void) const
vector< string > m_GapEvidence
bool IsUnknownLength(void) const
TSeqPos GetEnd(void) const
CWeakRef< CBioseqIndex > m_Bsx
CRef< CSeqMasterIndex > GetMasterIndex(void) const
CRef< CSeqMasterIndex > m_Idx
size_t IterateBioseqs(Fnc m)
size_t IterateSeqsets(Fnc m)
CSeqEntryIndex(const CSeqEntryIndex &)=delete
bool m_DistributedReferences
CSeqEntryIndex::TFlags m_Flags
bool IsSmallGenomeSet(void) const
CConstRef< CSeq_descr > m_TopDescr
CRef< CObjectManager > GetObjectManager(void) const
CSeqMasterIndex(const CSeqMasterIndex &)=delete
void SetIndexFailure(bool fails)
CConstRef< CSeq_entry > GetTopSEP(void) const
bool DistributedReferences(void) const
CSeqEntryIndex::EPolicy m_Policy
TAccnIndexMap m_AccnIndexMap
CConstRef< CSeq_descr > GetTopDescr(void) const
CConstRef< CSubmit_block > m_SbtBlk
CConstRef< CSubmit_block > GetSbtBlk(void) const
CRef< CObjectManager > m_Objmgr
map< string, CRef< CBioseqIndex > > TAccnIndexMap
bool HasOperon(void) const
TBestIdIndexMap m_BestIdIndexMap
size_t IterateBioseqs(Fnc m)
size_t IterateSeqsets(Fnc m)
map< string, CRef< CBioseqIndex > > TBestIdIndexMap
CSeq_entry_Handle GetTopSEH(void) const
vector< CRef< CBioseqIndex > > m_BsxList
CConstRef< CSeq_entry > m_Tsep
CRef< CScope > GetScope(void) const
CRef< feature::CFeatTree > GetFeatTree(void) const
void SetHasOperon(bool hasOp)
vector< CRef< CSeqsetIndex > > m_SsxList
bool IsIndexFailure(void) const
CRef< feature::CFeatTree > m_FeatTree
@Seq_descr.hpp User-defined methods of the data storage class.
const CBioseq_set & m_Bssp
CRef< CSeqsetIndex > m_Prnt
CBioseq_set_Handle GetSeqsetHandle(void) const
const CBioseq_set & GetSeqset(void) const
CBioseq_set::TClass m_Class
CBioseq_set::TClass GetClass(void) const
CSeqsetIndex(const CSeqsetIndex &)=delete
CRef< CSeqsetIndex > GetParent(void) const
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
const vector< string > & GetPair(void) const
const vector< string > & GetNorm(void) const
CWordPairIndexer(const CWordPairIndexer &)=delete
bool IsCrossKingdom(const COrg_ref &org, string &first_kingdom, string &second_kingdom)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static void GetSegment(char *str, IndexblkPtr entry)
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
unsigned int TSeqPos
Type for sequence locations and lengths.
SStrictId_Tax::TId TTaxId
Taxon id type.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
CConstRef< CSeq_feat > GetOverlappingSource(const CSeq_loc &loc, CScope &scope)
NCBI_XOBJUTIL_EXPORT string GetTitle(const CBioseq_Handle &hnd, TGetTitleFlags flags=0)
const CBioSource * GetBioSource(const CBioseq &bioseq)
Retrieve the BioSource object for a given bioseq handle.
const CMolInfo * GetMolInfo(const CBioseq &bioseq)
Retrieve the MolInfo object for a given bioseq handle.
CObject & operator=(const CObject &src) THROWS_NONE
Assignment operator.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
#define NCBI_XOBJUTIL_EXPORT
ETopology
topology of molecule
void(* FAddSnpFunc)(CBioseq_Handle bsh, string &na_acc)
static pcre_uint8 * buffer
bool IsUnverifiedMisassembled(const CBioseq &seq)
bool IsUnverifiedOrganism(const CBioseq &seq)
bool IsUnverifiedContaminant(const CBioseq &seq)
bool IsUnverifiedFeature(const CBioseq &seq)
static const char * str(char *buf, int n)