91 #define THIS_FILE "gb_ascii.cpp"
99 return (entry.
mOffset + div_shift);
107 if (condiv && ibp->
segnum != 0) {
126 }
else if (condiv && ibp->
is_contig ==
false && ibp->
origin ==
false) {
145 bool allow_crossdb_featloc;
160 for (q = p,
r = p; *q !=
'\0'; q++)
161 if (*q !=
'\n' && *q !=
'\t' && *q !=
' ')
165 for (q = p; *q !=
'\0'; q++)
166 if ((q[0] ==
',' && q[1] ==
',') || (q[0] ==
'(' && q[1] ==
',') ||
167 (q[0] ==
',' && q[1] ==
')'))
261 while (!
str.empty()) {
263 if (c ==
' ' || c ==
'\t' || c ==
'\n' || c ==
'.' || c ==
';')
295 bool pat_ref =
false;
296 bool est_kwd =
false;
297 bool sts_kwd =
false;
298 bool gss_kwd =
false;
299 bool htc_kwd =
false;
300 bool fli_kwd =
false;
301 bool wgs_kwd =
false;
302 bool tpa_kwd =
false;
303 bool tsa_kwd =
false;
304 bool tls_kwd =
false;
305 bool env_kwd =
false;
306 bool mga_kwd =
false;
321 if (
str.back() ==
'.') {
322 if (
str.size() >= 2 && *(
str.end() - 2) ==
'.')
326 gbb->SetSource(std::move(
str));
330 gbb->SetKeywords().swap(ibp->
keywords);
351 for (
const string&
key : gbb->GetKeywords()) {
352 fta_keywords_check(
key.c_str(), &est_kwd, &sts_kwd, &gss_kwd, &htc_kwd, &fli_kwd, &wgs_kwd, &tpa_kwd, &env_kwd, &mga_kwd, &tsa_kwd, &tls_kwd);
372 gbb->SetOrigin(
string(bptr, eptr));
385 string div_str(bptr, bptr + 3);
386 gbb->SetDiv(div_str);
393 const char* p_div = gbb->GetDiv().c_str();
414 if (!
HasHtg(gbb->GetKeywords())) {
449 if (ibp->
is_mga ==
false) {
468 if (ibp->
is_tsa ==
false) {
478 if (ibp->
is_tls ==
false) {
488 if (
i == 2 && ibp->
htg > 0 && env_kwd)
489 ErrPostStr(
SEV_WARNING,
ERR_KEYWORD_HTGPlusENV,
"This HTG record also has the ENV keyword, which is an unusual combination. Confirmation that isolation and cloning steps actually occured might be appropriate.");
490 else if ((
i == 2 && wgs_kwd && tpa_kwd) ||
491 (
i == 2 && tsa_kwd && tpa_kwd) ||
493 env_kwd && tpa_kwd)) {
494 }
else if (
i != 2 || env_kwd ==
false ||
495 (est_kwd ==
false && gss_kwd ==
false && wgs_kwd ==
false)) {
497 ibp->
is_tsa ==
false || env_kwd ==
false) {
499 (env_kwd ==
false && tpa_kwd ==
false)) {
500 ErrPostStr(
SEV_REJECT,
ERR_KEYWORD_ConflictingKeywords,
"This record contains more than one of the special keywords used to indicate that a sequence is an HTG, EST, GSS, STS, HTC, WGS, ENV, FLI_CDNA, TPA, CAGE, TSA or TLS sequence.");
511 wgs_kwd ==
false && tpa_kwd ==
false && env_kwd ==
false) {
526 if (! est_kwd && kw.find(
"EST") != string::npos) {
529 if (! sts_kwd && kw.find(
"STS") != string::npos) {
539 p_div = gbb->GetDiv();
541 check_div(ibp->
is_pat, pat_ref, est_kwd, sts_kwd, gss_kwd, if_cds, p_div, &tech, ibp->
bases, pp->
source, drop);
556 }
else if (gbb->GetDiv() ==
"CON") {
567 gbb->IsSetDiv() && gbb->GetDiv() !=
"EST") {
575 bool is_htc_div = gbb->IsSetDiv() && gbb->GetDiv() ==
"HTC",
576 has_htc =
HasHtc(gbb->GetKeywords());
578 if (is_htc_div && ! has_htc) {
583 if (! is_htc_div && has_htc) {
591 if (*p ==
'm' || *p ==
'r')
609 if (gbb->IsSetDiv()) {
610 if (gbb->GetDiv() ==
"EST") {
615 }
else if (gbb->GetDiv() ==
"STS") {
620 }
else if (gbb->GetDiv() ==
"GSS") {
625 }
else if (gbb->GetDiv() ==
"HTC") {
630 }
else if (gbb->GetDiv() ==
"SYN" && bio_src && bio_src->
IsSetOrigin() &&
659 for (
const auto& subtype : bio_src->
GetSubtype()) {
670 if (!
mod->IsSetSubtype())
687 }
else if (gbb->IsSetDiv() &&
697 ret.
Reset(gbb.Release());
714 char* molstr =
nullptr;
746 mol_info->SetTechexp(
"cage");
749 GetFlatBiomol(mol_info->SetBiomol(), mol_info->GetTech(), molstr, pp, entry, org_ref);
751 mol_info->ResetBiomol();
781 while (*bptr !=
' ' && *bptr !=
'\0')
831 org_ref.
SetOrgname().SetLineage(std::move(s));
836 bioseq.
SetDescr().Set().push_back(descr);
859 for (q = res, p = res; *p !=
'\0'; p++)
873 for (p = q; *p !=
'\0' && *p !=
'\n' && *p !=
';';)
879 cur_field->
SetLabel().SetStr(
"accession");
885 field_set->
SetData().SetFields().push_back(cur_field);
889 for (q = p; *p >=
'0' && *p <=
'9';)
896 cur_field->
SetNum(atoi(q));
897 field_set->
SetData().SetFields().push_back(cur_field);
902 root_field->
SetData().SetFields().push_back(cur_field);
910 user_obj.
SetData().push_back(root_field);
931 for (q = line; *q ==
' ' || *q ==
'\n';)
933 for (
r = res; *q !=
'\0';) {
938 while (*q ==
' ' || *q ==
'\n')
958 user_obj.
SetData().push_back(field);
976 user_obj->
SetType().SetStr(
"RefGeneTracking");
982 for (p += 12; *p ==
' ';)
984 for (
r = p; *p !=
'\0' && *p !=
'\n' && *p !=
' ';)
986 if (*p ==
'\0' || p ==
r)
1006 if (seq_entry.
IsSeq())
1032 id.SetStr(
"CAGE-Tag-List");
1036 field->
SetLabel().SetStr(
"CAGE_tag_total");
1038 user_obj->
SetData().push_back(field);
1042 field->
SetLabel().SetStr(
"CAGE_accession_first");
1044 user_obj->
SetData().push_back(field);
1048 field->
SetLabel().SetStr(
"CAGE_accession_last");
1050 user_obj->
SetData().push_back(field);
1057 descrs.push_back(descr);
1081 for (
auto& descr : bioseq.
SetDescr().Set()) {
1082 if (descr->IsSource()) {
1083 bio_src = &(descr->SetSource());
1085 org_ref = &bio_src->
SetOrg();
1103 if (!
str.empty() &&
str.front() ==
' ') {
1105 for (
char c :
str) {
1118 bioseq.
SetDescr().Set().push_back(descr);
1153 (title.empty() || (!
StringEquN(title.c_str(),
"TPA:", 4) &&
1154 !
StringEquN(title.c_str(),
"TPA_exp:", 8) &&
1155 !
StringEquN(title.c_str(),
"TPA_inf:", 8) &&
1156 !
StringEquN(title.c_str(),
"TPA_asm:", 8) &&
1157 !
StringEquN(title.c_str(),
"TPA_reasm:", 10)))) {
1163 (title.empty() || !
StringEquN(title.c_str(),
"TSA:", 4))) {
1168 if (ibp->
is_tls && (title.empty() || !
StringEquN(title.c_str(),
"TLS:", 4))) {
1179 for (; dbp; dbp = dbp->
mpNext) {
1187 bioseq.
SetDescr().Set().push_back(descr);
1192 for (; dbp; dbp = dbp->
mpNext) {
1200 bioseq.
SetDescr().Set().push_back(descr);
1212 mol_info->ResetTech();
1214 mol_info->SetTech(tech);
1217 if (mol_info->IsSetBiomol() || mol_info->IsSetTech()) {
1220 bioseq.
SetDescr().Set().push_back(descr);
1228 if (pp->
taxserver == 1 && gbbp->IsSetDiv())
1239 bioseq.
SetDescr().Set().push_back(descr);
1263 if (mol_info.
NotEmpty() && mol_info->IsSetTech() &&
1287 for (
auto& user_obj : user_objs) {
1290 bioseq.
SetDescr().Set().push_back(descr);
1294 for (q =
str, p = q; *p !=
'\0';) {
1295 if (*p ==
';' && (p[1] ==
' ' || p[1] ==
'~'))
1297 if (*p ==
'~' || *p ==
' ') {
1299 for (p++; *p ==
' ' || *p ==
'~';)
1310 bioseq.
SetDescr().Set().push_back(descr);
1327 }
else if (ibp->
lc.
date > 0) {
1338 bioseq.
SetDescr().Set().push_back(descr);
1351 StringCpy(division, locusText.substr(64, 3).c_str());
1370 int total_dropped = 0;
1379 unsigned char*
conv;
1385 bool seq_long =
false;
1396 for (
int i = 0;
i < imax;
i++) {
1419 ebp =
static_cast<EntryBlk*
>(pEntry->mpData);
1420 ptr = pEntry->mOffset;
1421 eptr = ptr + pEntry->len;
1428 if (ppCurrentEntry->lc.div > -1) {
1464 conv = protconv.get();
1467 conv = dnaconv.get();
1509 else if (ibp->
htg == 4 || ibp->
htg == 1 || ibp->
htg == 2 ||
1512 }
else if (ibp->
gaps)
1528 if (pEntry->mpQscore.empty() && pp->
accver) {
1551 pEntry->mpQscore.clear();
1555 id->SetPatent(*ibp->
psip);
1556 bioseq->
SetId().push_back(
id);
1563 pp->
debug ==
false &&
1571 }
else if (ibp->
is_wgs) {
1606 for (; j <=
i; j++) {
1618 for (j = segindx; j <=
i; j++) {
1625 seq_entries.clear();
1632 if (seq_entries.empty()) {
1636 for (; j <=
i; j++) {
1671 if (pp->
limit != 0) {
1674 for (; j <=
i; j++) {
1680 if (tibp->
htg == 1 || tibp->
htg == 2 || tibp->
htg == 4) {
1688 if (ibp->
htg == 1 || ibp->
htg == 2 || ibp->
htg == 4) {
1697 for (
auto pEntry : seq_entries) {
1706 if (pp->
qamode && ! seq_entries.empty())
1744 total_long += (
i - segindx + 1);
1751 total += (
i - segindx + 1);
1757 for (
int j = segindx; j <=
i; j++) {
1766 seq_entries.clear();
1789 int total_dropped = 0;
1790 unique_ptr<Entry> pEntry;
1791 unsigned char*
conv;
1803 for (
int i = 0;
i < imax;
i++) {
1839 auto lastType = pEntry->mSections.back()->mType;
1853 pEntry->mSeqEntry->SetSeq(*pBioseq);
1857 if (pEntry->IsAA()) {
1859 conv = protconv.get();
1862 conv = dnaconv.get();
1865 if (! pEntry->xInitSeqInst(
conv)) {
2299 for (CSeq_annot::C_Data::TFtable::iterator feat = feat_table.begin(); feat != feat_table.end();) {
2300 if ((*feat)->IsSetLocation() && (*feat)->GetLocation().GetId()) {
2309 feats.push_back(*feat);
2310 feat = feat_table.erase(feat);
2323 for (
auto& entry : seq_entries) {
2325 const CSeq_id& first_id = *(*bioseq->GetId().begin());
2326 if (
IsSegBioseq(first_id) || ! bioseq->IsSetAnnot())
2331 for (CBioseq::TAnnot::iterator annot = annots.begin(); annot != annots.end();) {
2332 if (! (*annot)->IsSetData() || ! (*annot)->GetData().IsFtable()) {
2340 if (! feat_table.empty()) {
2345 annot = annots.erase(annot);
2354 for (
auto& entry : seq_entries) {
2357 return bio_set.operator->();
2382 if (! feats_no_id.empty() && parts)
2385 for (
auto& annot : parts->
SetAnnot()) {
2386 if (! annot->IsFtable())
2389 annot->SetData().SetFtable().splice(annot->SetData().SetFtable().end(), feats_no_id);
2395 new_annot->
SetData().SetFtable().swap(feats_no_id);
2396 parts->
SetAnnot().push_back(new_annot);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool no_reference(const CBioseq &bioseq)
void SeqToDelta(CBioseq &bioseq, Int2 tech)
CMolInfo::TTech fta_check_con_for_wgs(CBioseq &bioseq)
bool fta_check_htg_kwds(TKeywordList &kwds, IndexblkPtr ibp, CMolInfo &mol_info)
void fta_set_molinfo_completeness(CBioseq &bioseq, const Indexblk *ibp)
void fta_add_hist(ParserPtr pp, CBioseq &bioseq, CGB_block::TExtra_accessions &extra_accs, Parser::ESource source, CSeq_id::E_Choice acctype, bool pricon, const char *acc)
void AssemblyGapsToDelta(CBioseq &bioseq, GapFeatsPtr gfp, bool *drop)
bool fta_parse_tpa_tsa_block(CBioseq &bioseq, char *offset, char *acnum, Int2 vernum, size_t len, Int2 col_data, bool tpa)
string GetQSFromFile(FILE *fd, const Indexblk *ibp)
void fta_get_project_user_object(TSeqdescList &descrs, char *offset, Parser::EFormat format, bool *drop, Parser::ESource source)
bool check_cds(const DataBlk &entry, Parser::EFormat format)
void fta_create_far_fetch_policy_user_object(CBioseq &bsp, Int4 num)
void fta_tsa_tls_comment_dblink_check(const CBioseq &bioseq, bool is_tsa)
void fta_remove_cleanup_user_object(CSeq_entry &seq_entry)
bool fta_dblink_has_sra(const CRef< CUser_object > &uop)
void GapsToDelta(CBioseq &bioseq, GapFeatsPtr gfp, bool *drop)
void fta_get_dblink_user_object(TSeqdescList &descrs, char *offset, size_t len, Parser::ESource source, bool *drop, CRef< CUser_object > &dbuop)
void err_install(const Indexblk *ibp, bool accver)
bool no_date(Parser::EFormat format, const TSeqdescList &descrs)
void fta_parse_structured_comment(char *str, bool &bad, TUserObjVector &objs)
Int4 fta_fix_seq_loc_id(TSeqLocList &locs, ParserPtr pp, const char *location, const char *name, bool iscon)
void GetGenBankSubBlock(const DataBlk &entry, size_t bases)
void StripSerialNumbers(TEntryList &seq_entries)
void AddNIDSeqId(CBioseq &bioseq, const DataBlk &entry, Int2 type, Int2 coldata, Parser::ESource source)
void fta_fix_orgref_div(const CBioseq::TAnnot &annots, COrg_ref *org_ref, CGB_block &gbb)
void DefVsHTGKeywords(CMolInfo::TTech tech, const DataBlk &entry, Int2 what, Int2 ori, bool cancelled)
bool IsSegBioseq(const CSeq_id &id)
void xGetGenBankSubBlocks(Entry &entry, size_t bases)
void fta_sort_seqfeat_cit(TEntryList &seq_entries)
void PackEntries(TEntryList &seq_entries)
void fta_set_strandedness(TEntryList &seq_entries)
void CheckHTGDivision(const char *div, CMolInfo::TTech tech)
unique_ptr< unsigned char[]> GetDNAConv(void)
unique_ptr< unsigned char[]> GetProteinConv(void)
char * GetDescrComment(char *offset, size_t len, Uint2 col_data, bool is_htg, bool is_pat)
void GetSequenceOfKeywords(const DataBlk &entry, int type, Uint2 col_data, TKeywordList &keywords)
void EntryCheckDivCode(TEntryList &seq_entries, ParserPtr pp)
char * GetGenBankBlock(DataBlkPtr *chain, char *ptr, Int2 *retkw, char *eptr)
void GetSeqExt(ParserPtr pp, CSeq_loc &seq_loc)
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
bool fta_EntryCheckGBBlock(TEntryList &seq_entries)
char * SrchNodeSubType(const DataBlk &entry, Int2 type, Int2 subtype, size_t *len)
void xGetGenBankBlocks(Entry &entry)
void fta_sort_descr(TEntryList &seq_entries)
void BuildBioSegHeader(ParserPtr pp, TEntryList &entries, const CSeq_loc &seqloc)
void GetExtraAccession(IndexblkPtr ibp, bool allow_uwsec, Parser::ESource source, TAccessionList &accessions)
bool check_div(bool pat_acc, bool pat_ref, bool est_kwd, bool sts_kwd, bool gss_kwd, bool if_cds, string &div, CMolInfo::TTech *tech, size_t bases, Parser::ESource source, bool &drop)
CRef< CBioseq > CreateEntryBioseq(ParserPtr pp)
void xFreeEntry(DataBlkPtr entry)
void ProcessCitations(TEntryList &seq_entries)
void SetToTime(const CTime &time, EPrecision prec=ePrecision_second)
static bool IsNa(EMol mol)
CUser_field & SetString(const char *value)
EntryPtr LoadEntryGenbank(ParserPtr pp, size_t offset, size_t len)
DataBlkPtr LoadEntry(ParserPtr pp, size_t offset, size_t len)
void g_InstantiateMissingProteins(CSeq_entry_Handle entryHandle)
void FinalCleanup(TEntryList &seq_entries)
#define ERR_SEQUENCE_BadData
#define ERR_TPA_TpaSpansMissing
#define ERR_ENTRY_LongSequence
#define ERR_FORMAT_MissingContigFeature
#define ERR_KEYWORD_ShouldNotBeTPA
#define ERR_DIVISION_BadTSADivcode
#define ERR_FORMAT_MissingSequenceData
#define ERR_DIVISION_InvalidHTCKeyword
#define ERR_KEYWORD_IllegalForCON
#define ERR_DIVISION_MissingHTGKeywords
#define ERR_QSCORE_FailedToParse
#define ERR_ENTRY_LongHTGSSequence
#define ERR_KEYWORD_MissingTSA
#define ERR_DIVISION_BadTPADivcode
#define ERR_REFERENCE_No_references
#define ERR_KEYWORD_ShouldNotBeTLS
#define ERR_ENTRY_GBBlock_not_Empty
#define ERR_KEYWORD_HTGPlusENV
#define ERR_DEFINITION_MissingTPA
#define ERR_ENTRY_Skipped
#define ERR_DEFINITION_MissingTLS
#define ERR_KEYWORD_ESTSubstring
#define ERR_KEYWORD_ConflictingKeywords
#define ERR_DIVISION_ConDivLacksContig
#define ERR_LOCATION_ContigHasNull
#define ERR_SEGMENT_OnlyOneMember
#define ERR_KEYWORD_ENV_NoMatchingQualifier
#define ERR_KEYWORD_ShouldNotBeTSA
#define ERR_KEYWORD_STSSubstring
#define ERR_DIVISION_UnknownDivCode
#define ERR_KEYWORD_MissingTLS
#define ERR_DEFINITION_ShouldNotBeTSA
#define ERR_SEGMENT_Rejected
#define ERR_DIVISION_MissingHTCKeyword
#define ERR_DIVISION_MappedtoCON
#define ERR_DIVISION_MappedtoEST
#define ERR_FORMAT_ContigWithSequenceData
#define ERR_KEYWORD_NoGeneExpressionKeywords
#define ERR_DEFINITION_MissingTSA
#define ERR_DEFINITION_ShouldNotBeTPA
#define ERR_FORMAT_MissingEnd
#define ERR_KEYWORD_MissingTPA
#define ERR_DIVISION_ConDivInSegset
#define ERR_ENTRY_ParsingComplete
#define ERR_DIVISION_Mismatch
#define ERR_ORGANISM_NoOrganism
#define ERR_DATE_IllegalDate
#define ERR_DIVISION_HTCWrongMolType
#define ERR_KEYWORD_ShouldNotBeCAGE
#define ERR_DEFINITION_ShouldNotBeTLS
#define ERR_TSA_UnexpectedPrimaryAccession
list< CRef< objects::CSeq_entry > > TEntryList
bool QscoreToSeqAnnot(const string &qscore, CBioseq &bioseq, char *acc, Int2 ver, bool check_minmax, bool allow_na)
std::list< CRef< objects::CSeq_feat > > TSeqFeatList
std::list< CRef< objects::CSeqdesc > > TSeqdescList
std::vector< CRef< objects::CUser_object > > TUserObjVector
bool StringEquNI(const char *s1, const char *s2, size_t n)
bool StringEquN(const char *s1, const char *s2, size_t n)
bool StringEqu(const char *s1, const char *s2)
void StringCpy(char *d, const char *s)
void StringNCpy(char *d, const char *s, size_t n)
size_t StringLen(const char *s)
void MemCpy(void *p, const void *q, size_t sz)
char * StringNew(size_t sz)
void FtaDeletePrefix(int prefix)
void fta_find_pub_explore(ParserPtr pp, TEntryList &seq_entries)
static const char * str(char *buf, int n)
bool GetGenBankInstContig(const DataBlk &entry, CBioseq &bsp, ParserPtr pp)
static CRef< CGB_block > GetGBBlock(ParserPtr pp, const DataBlk &entry, CMolInfo &mol_info, CBioSource *bio_src)
static string GetGenBankLineage(string_view sv)
static void fta_get_str_user_field(char *line, const Char *tag, CUser_object &user_obj)
void CheckFeatSeqLoc(TEntryList &seq_entries)
static CRef< CMolInfo > GetGenBankMolInfo(ParserPtr pp, const DataBlk &entry, const COrg_ref *org_ref)
static void FindFeatSeqLoc(TEntryList &seq_entries, TSeqFeatList &feats)
static void FakeGenBankBioSources(const DataBlk &entry, CBioseq &bioseq)
static void CheckContigEverywhere(IndexblkPtr ibp, Parser::ESource source)
bool GenBankAsciiOrig(ParserPtr pp)
static void fta_get_user_object(CSeq_entry &seq_entry, const DataBlk &entry)
bool GenBankAscii(ParserPtr pp)
static void GenBankGetDivision(char *division, Int4 div, const DataBlk &entry)
static void fta_get_user_field(char *line, const Char *tag, CUser_object &user_obj)
static void SrchFeatSeqLoc(TSeqFeatList &feats, CSeq_annot::C_Data::TFtable &feat_table)
static CBioseq_set * GetParts(TEntryList &seq_entries)
static void fta_get_mga_user_object(TSeqdescList &descrs, char *offset, size_t len)
static char * GBDivOffset(const DataBlk &entry, Int4 div_shift)
static bool GetGenBankInst(ParserPtr pp, const DataBlk &entry, unsigned char *dnaconv)
static void xGenBankGetDivision(char *division, Int4 div, const string &locusText)
static void GetGenBankDescr(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq)
void DealWithGenes(TEntryList &seq_entries, ParserPtr pp)
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
@ eCurrent
Use current time. See also CCurrentTime.
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
TGenome GetGenome(void) const
Get the Genome member data.
TOrigin GetOrigin(void) const
Get the Origin member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetOrigin(void) const
Check if a value has been assigned to Origin data member.
void SetOrg(TOrg &value)
Assign a value to Org data member.
@ eSubtype_environmental_sample
@ eOrigin_synthetic
purely synthetic
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
TStd & SetStd(void)
Select the variant.
TData & SetData(void)
Assign a value to Data data member.
void SetNum(TNum value)
Assign a value to Num data member.
void SetLabel(TLabel &value)
Assign a value to Label data member.
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
const TMod & GetMod(void) const
Get the Mod member data.
const TDiv & GetDiv(void) const
Get the Div member data.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
bool IsSetDiv(void) const
GenBank division code Check if a value has been assigned to Div data member.
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
void SetOrgname(TOrgname &value)
Assign a value to Orgname data member.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
@ eSubtype_metagenome_source
@ eSeq_code_type_iupacaa
IUPAC 1 letter amino acid code.
@ eSeq_code_type_iupacna
IUPAC 1 letter nuc acid code.
bool IsMix(void) const
Check if variant Mix is selected.
const TMix & GetMix(void) const
Get the variant data.
TSet & SetSet(void)
Select the variant.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
bool IsSeq(void) const
Check if variant Seq is selected.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TSeq & SetSeq(void)
Select the variant.
@ eClass_parts
parts for 2 or 3
TRepr GetRepr(void) const
Get the Repr member data.
void SetData(TData &value)
Assign a value to Data data member.
TId & SetId(void)
Assign a value to Id data member.
const TInst & GetInst(void) const
Get the Inst member data.
TTitle & SetTitle(void)
Select the variant.
TPub & SetPub(void)
Select the variant.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
TGenbank & SetGenbank(void)
Select the variant.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
const TId & GetId(void) const
Get the Id member data.
TTech GetTech(void) const
Get the Tech member data.
const Tdata & Get(void) const
Get the member data.
TComment & SetComment(void)
Select the variant.
void SetInst(TInst &value)
Assign a value to Inst data member.
void ResetTech(void)
Reset Tech data member.
TSource & SetSource(void)
Select the variant.
void SetTopology(TTopology value)
Assign a value to Topology data member.
ETopology
topology of molecule
void SetDescr(TDescr &value)
Assign a value to Descr data member.
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
TUser & SetUser(void)
Select the variant.
void SetRepr(TRepr value)
Assign a value to Repr data member.
EStrand
strandedness in living organism
list< CRef< CSeq_feat > > TFtable
list< CRef< CSeq_annot > > TAnnot
void SetStrand(TStrand value)
Assign a value to Strand data member.
void SetTech(TTech value)
Assign a value to Tech data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
TMolinfo & SetMolinfo(void)
Select the variant.
TUpdate_date & SetUpdate_date(void)
Select the variant.
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_raw
continuous sequence
@ eRepr_virtual
no seq data
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_other
use Source.techexp
@ eTech_htc
high throughput cDNA
@ eTech_targeted
targeted locus sets/studies
@ eTech_sts
Sequence Tagged Site.
@ eTech_htgs_3
finished High Throughput sequence
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_wgs
whole genome shotgun sequencing
@ eTech_survey
one-pass genomic sequence
@ eTech_htgs_0
single genomic reads for coordination
@ eTech_fli_cdna
full length insert cDNA
@ eTech_est
Expressed Sequence Tag.
CRef< CDate_std > GetUpdateDate(const char *ptr, Parser::ESource source)
int CheckTPG(const string &str)
int CheckSTRAND(const string &str)
Int4 IsNewAccessFormat(const Char *acnum)
Int2 CheckDIV(const char *str)
void GetFlatBiomol(CMolInfo::TBiomol &biomol, CMolInfo::TTech tech, char *molstr, ParserPtr pp, const DataBlk &entry, const COrg_ref *org_ref)
void LoadFeat(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq)
const struct ncbi::grid::netcache::search::fields::KEY key
const CharType(& source)[N]
std::list< SeqLoc > TSeqLocList
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
CRef< CPubdesc > DescrRefs(ParserPtr pp, DataBlkPtr dbp, Uint2 col_data)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
CRef< objects::CSeq_entry > seq_entry
CRef< objects::CPatent_seq_id > psip
char *(* ff_get_qscore_pp)(const char *accession, Int2 v, Parser *pp)
vector< IndexblkPtr > entrylist
bool allow_crossdb_featloc
char *(* ff_get_qscore)(const char *accession, Int2 v)
bool GetGenomeInfo(CBioSource &bsp, string_view bptr)
void MaybeCutGbblockSource(TEntryList &seq_entries)
bool HasHtg(const TKeywordList &keywords)
bool HasHtc(const TKeywordList &keywords)
char * SrchTheChar(char *bptr, char *eptr, Char letter)
bool fta_tls_keywords_check(const TKeywordList &kwds, Parser::ESource source)
void RemoveHtgPhase(TKeywordList &keywords)
string GetBlkDataReplaceNewLine(string_view instr, Uint2 indent)
void fta_remove_tsa_keywords(TKeywordList &kwds, Parser::ESource source)
void fta_remove_tpa_keywords(TKeywordList &kwds)
void fta_remove_keywords(CMolInfo::TTech tech, TKeywordList &kwds)
void fta_remove_tls_keywords(TKeywordList &kwds, Parser::ESource source)
char * xSrchNodeType(const DataBlk &entry, Int4 type, size_t *len)
void fta_keywords_check(const char *str, bool *estk, bool *stsk, bool *gssk, bool *htck, bool *flik, bool *wgsk, bool *tpak, bool *envk, bool *mgak, bool *tsak, bool *tlsk)
DataBlkPtr TrackNodeType(const DataBlk &entry, Int2 type)
void fta_remove_mag_keywords(TKeywordList &kwds)
bool IsCancelled(const TKeywordList &keywords)
bool fta_tsa_keywords_check(const TKeywordList &kwds, Parser::ESource source)
void fta_remove_env_keywords(TKeywordList &kwds)
bool fta_tpa_keywords_check(const TKeywordList &kwds)
bool fta_check_mga_keywords(CMolInfo &mol_info, const TKeywordList &kwds)
CRef< CSeq_loc > xgbparseint_ver(string_view raw_intervals, bool &keep_rawPt, int &numErrors, const TSeqIdList &seq_ids, bool accver)
void XGappedSeqLocsToDeltaSeqs(const TSeqLocList &locs, TDeltaList &deltas)