91 #define THIS_FILE "gb_ascii.cpp"
99 return (entry.
mOffset + div_shift);
107 if (condiv && ibp->
segnum != 0) {
126 }
else if (condiv && ibp->
is_contig ==
false && ibp->
origin ==
false) {
145 bool allow_crossdb_featloc;
160 for (q = p,
r = p; *q !=
'\0'; q++)
161 if (*q !=
'\n' && *q !=
'\t' && *q !=
' ')
165 for (q = p; *q !=
'\0'; q++)
166 if ((q[0] ==
',' && q[1] ==
',') || (q[0] ==
'(' && q[1] ==
',') ||
167 (q[0] ==
',' && q[1] ==
')'))
268 for (p =
str; *p !=
'\0';)
275 if (*p !=
' ' && *p !=
'\t' && *p !=
'\n' && *p !=
'.' && *p !=
';') {
316 bool pat_ref =
false;
317 bool est_kwd =
false;
318 bool sts_kwd =
false;
319 bool gss_kwd =
false;
320 bool htc_kwd =
false;
321 bool fli_kwd =
false;
322 bool wgs_kwd =
false;
323 bool tpa_kwd =
false;
324 bool tsa_kwd =
false;
325 bool tls_kwd =
false;
326 bool env_kwd =
false;
327 bool mga_kwd =
false;
343 if (p && p >
str && p[1] ==
'\0' && *(p - 1) ==
'.')
351 gbb->SetKeywords().swap(ibp->
keywords);
372 for (
const string&
key : gbb->GetKeywords()) {
373 fta_keywords_check(
key.c_str(), &est_kwd, &sts_kwd, &gss_kwd, &htc_kwd, &fli_kwd, &wgs_kwd, &tpa_kwd, &env_kwd, &mga_kwd, &tsa_kwd, &tls_kwd);
393 gbb->SetOrigin(
string(bptr, eptr));
406 string div_str(bptr, bptr + 3);
407 gbb->SetDiv(div_str);
414 const char* p_div = gbb->GetDiv().c_str();
435 if (!
HasHtg(gbb->GetKeywords())) {
470 if (ibp->
is_mga ==
false) {
489 if (ibp->
is_tsa ==
false) {
499 if (ibp->
is_tls ==
false) {
509 if (
i == 2 && ibp->
htg > 0 && env_kwd)
510 ErrPostEx(
SEV_WARNING,
ERR_KEYWORD_HTGPlusENV,
"This HTG record also has the ENV keyword, which is an unusual combination. Confirmation that isolation and cloning steps actually occured might be appropriate.");
511 else if ((
i == 2 && wgs_kwd && tpa_kwd) ||
512 (
i == 2 && tsa_kwd && tpa_kwd) ||
514 env_kwd && tpa_kwd)) {
515 }
else if (
i != 2 || env_kwd ==
false ||
516 (est_kwd ==
false && gss_kwd ==
false && wgs_kwd ==
false)) {
518 ibp->
is_tsa ==
false || env_kwd ==
false) {
520 (env_kwd ==
false && tpa_kwd ==
false)) {
521 ErrPostEx(
SEV_REJECT,
ERR_KEYWORD_ConflictingKeywords,
"This record contains more than one of the special keywords used to indicate that a sequence is an HTG, EST, GSS, STS, HTC, WGS, ENV, FLI_CDNA, TPA, CAGE, TSA or TLS sequence.");
532 wgs_kwd ==
false && tpa_kwd ==
false && env_kwd ==
false) {
548 if (kwp && est_kwd ==
false) {
552 if (kwp && sts_kwd ==
false) {
563 p_div = gbb->GetDiv();
565 check_div(ibp->
is_pat, pat_ref, est_kwd, sts_kwd, gss_kwd, if_cds, p_div, &tech, ibp->
bases, pp->
source, drop);
580 }
else if (gbb->GetDiv() ==
"CON") {
591 gbb->IsSetDiv() && gbb->GetDiv() !=
"EST") {
599 bool is_htc_div = gbb->IsSetDiv() && gbb->GetDiv() ==
"HTC",
600 has_htc =
HasHtc(gbb->GetKeywords());
602 if (is_htc_div && ! has_htc) {
607 if (! is_htc_div && has_htc) {
615 if (*p ==
'm' || *p ==
'r')
633 if (gbb->IsSetDiv()) {
634 if (gbb->GetDiv() ==
"EST") {
639 }
else if (gbb->GetDiv() ==
"STS") {
644 }
else if (gbb->GetDiv() ==
"GSS") {
649 }
else if (gbb->GetDiv() ==
"HTC") {
654 }
else if (gbb->GetDiv() ==
"SYN" && bio_src && bio_src->
IsSetOrigin() &&
683 for (
const auto& subtype : bio_src->
GetSubtype()) {
694 if (!
mod->IsSetSubtype())
711 }
else if (gbb->IsSetDiv() &&
721 ret.
Reset(gbb.Release());
738 char* molstr =
nullptr;
770 mol_info->SetTechexp(
"cage");
773 GetFlatBiomol(mol_info->SetBiomol(), mol_info->GetTech(), molstr, pp, entry, org_ref);
775 mol_info->ResetBiomol();
806 while (*bptr !=
' ' && *bptr !=
'\0')
851 taxname = taxname.substr(0, taxname.size() - 1);
862 bioseq.
SetDescr().Set().push_back(descr);
885 for (q = res, p = res; *p !=
'\0'; p++)
899 for (p = q; *p !=
'\0' && *p !=
'\n' && *p !=
';';)
905 cur_field->
SetLabel().SetStr(
"accession");
911 field_set->
SetData().SetFields().push_back(cur_field);
915 for (q = p; *p >=
'0' && *p <=
'9';)
922 cur_field->
SetNum(atoi(q));
923 field_set->
SetData().SetFields().push_back(cur_field);
928 root_field->
SetData().SetFields().push_back(cur_field);
936 user_obj.
SetData().push_back(root_field);
957 for (q = line; *q ==
' ' || *q ==
'\n';)
959 for (
r = res; *q !=
'\0';) {
964 while (*q ==
' ' || *q ==
'\n')
984 user_obj.
SetData().push_back(field);
1006 user_obj->
SetType().SetStr(
"RefGeneTracking");
1012 for (p += 12; *p ==
' ';)
1014 for (
r = p; *p !=
'\0' && *p !=
'\n' && *p !=
' ';)
1016 if (*p ==
'\0' || p ==
r)
1036 if (seq_entry.
IsSeq())
1062 id.SetStr(
"CAGE-Tag-List");
1066 field->
SetLabel().SetStr(
"CAGE_tag_total");
1068 user_obj->
SetData().push_back(field);
1072 field->
SetLabel().SetStr(
"CAGE_accession_first");
1074 user_obj->
SetData().push_back(field);
1078 field->
SetLabel().SetStr(
"CAGE_accession_last");
1080 user_obj->
SetData().push_back(field);
1087 descrs.push_back(descr);
1112 for (
auto& descr : bioseq.
SetDescr().Set()) {
1113 if (descr->IsSource()) {
1114 bio_src = &(descr->SetSource());
1116 org_ref = &bio_src->
SetOrg();
1135 for (p =
str; *p ==
' ';)
1146 bioseq.
SetDescr().Set().push_back(descr);
1181 (title.empty() || (!
StringEquN(title.c_str(),
"TPA:", 4) &&
1182 !
StringEquN(title.c_str(),
"TPA_exp:", 8) &&
1183 !
StringEquN(title.c_str(),
"TPA_inf:", 8) &&
1184 !
StringEquN(title.c_str(),
"TPA_asm:", 8) &&
1185 !
StringEquN(title.c_str(),
"TPA_reasm:", 10)))) {
1191 (title.empty() || !
StringEquN(title.c_str(),
"TSA:", 4))) {
1196 if (ibp->
is_tls && (title.empty() || !
StringEquN(title.c_str(),
"TLS:", 4))) {
1207 for (; dbp; dbp = dbp->
mpNext) {
1215 bioseq.
SetDescr().Set().push_back(descr);
1220 for (; dbp; dbp = dbp->
mpNext) {
1228 bioseq.
SetDescr().Set().push_back(descr);
1240 mol_info->ResetTech();
1242 mol_info->SetTech(tech);
1245 if (mol_info->IsSetBiomol() || mol_info->IsSetTech()) {
1248 bioseq.
SetDescr().Set().push_back(descr);
1256 if (pp->
taxserver == 1 && gbbp->IsSetDiv())
1267 bioseq.
SetDescr().Set().push_back(descr);
1291 if (mol_info.
NotEmpty() && mol_info->IsSetTech() &&
1315 for (
auto& user_obj : user_objs) {
1318 bioseq.
SetDescr().Set().push_back(descr);
1322 for (q =
str, p = q; *p !=
'\0';) {
1323 if (*p ==
';' && (p[1] ==
' ' || p[1] ==
'~'))
1325 if (*p ==
'~' || *p ==
' ') {
1327 for (p++; *p ==
' ' || *p ==
'~';)
1338 bioseq.
SetDescr().Set().push_back(descr);
1355 }
else if (ibp->
lc.
date > 0) {
1366 bioseq.
SetDescr().Set().push_back(descr);
1379 StringCpy(division, locusText.substr(64, 3).c_str());
1398 int total_dropped = 0;
1407 unsigned char*
conv;
1413 bool seq_long =
false;
1424 for (
int i = 0;
i < imax;
i++) {
1447 ebp =
static_cast<EntryBlk*
>(pEntry->mpData);
1448 ptr = pEntry->mOffset;
1449 eptr = ptr + pEntry->len;
1456 if (ppCurrentEntry->lc.div > -1) {
1492 conv = protconv.get();
1495 conv = dnaconv.get();
1537 else if (ibp->
htg == 4 || ibp->
htg == 1 || ibp->
htg == 2 ||
1540 }
else if (ibp->
gaps)
1556 if (pEntry->mpQscore.empty() && pp->
accver) {
1579 pEntry->mpQscore.clear();
1583 id->SetPatent(*ibp->
psip);
1584 bioseq->
SetId().push_back(
id);
1591 pp->
debug ==
false &&
1599 }
else if (ibp->
is_wgs) {
1634 for (; j <=
i; j++) {
1646 for (j = segindx; j <=
i; j++) {
1653 seq_entries.clear();
1660 if (seq_entries.empty()) {
1664 for (; j <=
i; j++) {
1699 if (pp->
limit != 0) {
1702 for (; j <=
i; j++) {
1708 if (tibp->
htg == 1 || tibp->
htg == 2 || tibp->
htg == 4) {
1716 if (ibp->
htg == 1 || ibp->
htg == 2 || ibp->
htg == 4) {
1725 for (
auto pEntry : seq_entries) {
1734 if (pp->
qamode && ! seq_entries.empty())
1772 total_long += (
i - segindx + 1);
1779 total += (
i - segindx + 1);
1785 for (
int j = segindx; j <=
i; j++) {
1794 seq_entries.clear();
1817 int total_dropped = 0;
1818 unique_ptr<Entry> pEntry;
1819 unsigned char*
conv;
1831 for (
int i = 0;
i < imax;
i++) {
1867 auto lastType = pEntry->mSections.back()->mType;
1881 pEntry->mSeqEntry->SetSeq(*pBioseq);
1885 if (pEntry->IsAA()) {
1887 conv = protconv.get();
1890 conv = dnaconv.get();
1893 if (! pEntry->xInitSeqInst(
conv)) {
2327 for (CSeq_annot::C_Data::TFtable::iterator feat = feat_table.begin(); feat != feat_table.end();) {
2328 if ((*feat)->IsSetLocation() && (*feat)->GetLocation().GetId()) {
2337 feats.push_back(*feat);
2338 feat = feat_table.erase(feat);
2351 for (
auto& entry : seq_entries) {
2353 const CSeq_id& first_id = *(*bioseq->GetId().begin());
2354 if (
IsSegBioseq(first_id) || ! bioseq->IsSetAnnot())
2359 for (CBioseq::TAnnot::iterator annot = annots.begin(); annot != annots.end();) {
2360 if (! (*annot)->IsSetData() || ! (*annot)->GetData().IsFtable()) {
2368 if (! feat_table.empty()) {
2373 annot = annots.erase(annot);
2382 for (
auto& entry : seq_entries) {
2385 return bio_set.operator->();
2410 if (! feats_no_id.empty() && parts)
2413 for (
auto& annot : parts->
SetAnnot()) {
2414 if (! annot->IsFtable())
2417 annot->SetData().SetFtable().splice(annot->SetData().SetFtable().end(), feats_no_id);
2423 new_annot->
SetData().SetFtable().swap(feats_no_id);
2424 parts->
SetAnnot().push_back(new_annot);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool no_reference(const CBioseq &bioseq)
void SeqToDelta(CBioseq &bioseq, Int2 tech)
CMolInfo::TTech fta_check_con_for_wgs(CBioseq &bioseq)
bool fta_check_htg_kwds(TKeywordList &kwds, IndexblkPtr ibp, CMolInfo &mol_info)
void fta_set_molinfo_completeness(CBioseq &bioseq, const Indexblk *ibp)
void fta_add_hist(ParserPtr pp, CBioseq &bioseq, CGB_block::TExtra_accessions &extra_accs, Parser::ESource source, CSeq_id::E_Choice acctype, bool pricon, const char *acc)
void AssemblyGapsToDelta(CBioseq &bioseq, GapFeatsPtr gfp, bool *drop)
bool fta_parse_tpa_tsa_block(CBioseq &bioseq, char *offset, char *acnum, Int2 vernum, size_t len, Int2 col_data, bool tpa)
string GetQSFromFile(FILE *fd, const Indexblk *ibp)
void fta_get_project_user_object(TSeqdescList &descrs, char *offset, Parser::EFormat format, bool *drop, Parser::ESource source)
bool check_cds(const DataBlk &entry, Parser::EFormat format)
void fta_create_far_fetch_policy_user_object(CBioseq &bsp, Int4 num)
void fta_tsa_tls_comment_dblink_check(const CBioseq &bioseq, bool is_tsa)
void fta_remove_cleanup_user_object(CSeq_entry &seq_entry)
bool fta_dblink_has_sra(const CRef< CUser_object > &uop)
void GapsToDelta(CBioseq &bioseq, GapFeatsPtr gfp, bool *drop)
void fta_get_dblink_user_object(TSeqdescList &descrs, char *offset, size_t len, Parser::ESource source, bool *drop, CRef< CUser_object > &dbuop)
void err_install(const Indexblk *ibp, bool accver)
Int4 fta_fix_seq_loc_id(TSeqLocList &locs, ParserPtr pp, char *location, const char *name, bool iscon)
bool no_date(Parser::EFormat format, const TSeqdescList &descrs)
void fta_parse_structured_comment(char *str, bool &bad, TUserObjVector &objs)
void GetGenBankSubBlock(const DataBlk &entry, size_t bases)
void StripSerialNumbers(TEntryList &seq_entries)
void AddNIDSeqId(CBioseq &bioseq, const DataBlk &entry, Int2 type, Int2 coldata, Parser::ESource source)
void fta_fix_orgref_div(const CBioseq::TAnnot &annots, COrg_ref *org_ref, CGB_block &gbb)
char * GetDescrComment(char *offset, size_t len, Int2 col_data, bool is_htg, bool is_pat)
void DefVsHTGKeywords(CMolInfo::TTech tech, const DataBlk &entry, Int2 what, Int2 ori, bool cancelled)
bool IsSegBioseq(const CSeq_id &id)
void xGetGenBankSubBlocks(Entry &entry, size_t bases)
void fta_sort_seqfeat_cit(TEntryList &seq_entries)
void PackEntries(TEntryList &seq_entries)
void fta_set_strandedness(TEntryList &seq_entries)
void CheckHTGDivision(const char *div, CMolInfo::TTech tech)
unique_ptr< unsigned char[]> GetDNAConv(void)
unique_ptr< unsigned char[]> GetProteinConv(void)
void GetSequenceOfKeywords(const DataBlk &entry, int type, int col_data, TKeywordList &keywords)
void EntryCheckDivCode(TEntryList &seq_entries, ParserPtr pp)
char * GetGenBankBlock(DataBlkPtr *chain, char *ptr, Int2 *retkw, char *eptr)
void GetSeqExt(ParserPtr pp, CSeq_loc &seq_loc)
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
bool fta_EntryCheckGBBlock(TEntryList &seq_entries)
char * SrchNodeSubType(const DataBlk &entry, Int2 type, Int2 subtype, size_t *len)
void xGetGenBankBlocks(Entry &entry)
void fta_sort_descr(TEntryList &seq_entries)
void BuildBioSegHeader(ParserPtr pp, TEntryList &entries, const CSeq_loc &seqloc)
void GetExtraAccession(IndexblkPtr ibp, bool allow_uwsec, Parser::ESource source, TAccessionList &accessions)
bool check_div(bool pat_acc, bool pat_ref, bool est_kwd, bool sts_kwd, bool gss_kwd, bool if_cds, string &div, CMolInfo::TTech *tech, size_t bases, Parser::ESource source, bool &drop)
CRef< CBioseq > CreateEntryBioseq(ParserPtr pp)
void xFreeEntry(DataBlkPtr entry)
void ProcessCitations(TEntryList &seq_entries)
void SetToTime(const CTime &time, EPrecision prec=ePrecision_second)
static bool IsNa(EMol mol)
CUser_field & SetString(const char *value)
EntryPtr LoadEntryGenbank(ParserPtr pp, size_t offset, size_t len)
DataBlkPtr LoadEntry(ParserPtr pp, size_t offset, size_t len)
void g_InstantiateMissingProteins(CSeq_entry_Handle entryHandle)
void FinalCleanup(TEntryList &seq_entries)
#define ERR_SEQUENCE_BadData
#define ERR_TPA_TpaSpansMissing
#define ERR_ENTRY_LongSequence
#define ERR_FORMAT_MissingContigFeature
#define ERR_KEYWORD_ShouldNotBeTPA
#define ERR_DIVISION_BadTSADivcode
#define ERR_FORMAT_MissingSequenceData
#define ERR_DIVISION_InvalidHTCKeyword
#define ERR_KEYWORD_IllegalForCON
#define ERR_DIVISION_MissingHTGKeywords
#define ERR_QSCORE_FailedToParse
#define ERR_ENTRY_LongHTGSSequence
#define ERR_KEYWORD_MissingTSA
#define ERR_DIVISION_BadTPADivcode
#define ERR_REFERENCE_No_references
#define ERR_KEYWORD_ShouldNotBeTLS
#define ERR_ENTRY_GBBlock_not_Empty
#define ERR_KEYWORD_HTGPlusENV
#define ERR_DEFINITION_MissingTPA
#define ERR_ENTRY_Skipped
#define ERR_DEFINITION_MissingTLS
#define ERR_KEYWORD_ESTSubstring
#define ERR_KEYWORD_ConflictingKeywords
#define ERR_DIVISION_ConDivLacksContig
#define ERR_LOCATION_ContigHasNull
#define ERR_SEGMENT_OnlyOneMember
#define ERR_KEYWORD_ENV_NoMatchingQualifier
#define ERR_KEYWORD_ShouldNotBeTSA
#define ERR_KEYWORD_STSSubstring
#define ERR_DIVISION_UnknownDivCode
#define ERR_KEYWORD_MissingTLS
#define ERR_DEFINITION_ShouldNotBeTSA
#define ERR_SEGMENT_Rejected
#define ERR_DIVISION_MissingHTCKeyword
#define ERR_DIVISION_MappedtoCON
#define ERR_DIVISION_MappedtoEST
#define ERR_FORMAT_ContigWithSequenceData
#define ERR_KEYWORD_NoGeneExpressionKeywords
#define ERR_DEFINITION_MissingTSA
#define ERR_DEFINITION_ShouldNotBeTPA
#define ERR_FORMAT_MissingEnd
#define ERR_KEYWORD_MissingTPA
#define ERR_DIVISION_ConDivInSegset
#define ERR_ENTRY_ParsingComplete
#define ERR_DIVISION_Mismatch
#define ERR_ORGANISM_NoOrganism
#define ERR_DATE_IllegalDate
#define ERR_DIVISION_HTCWrongMolType
#define ERR_KEYWORD_ShouldNotBeCAGE
#define ERR_DEFINITION_ShouldNotBeTLS
#define ERR_TSA_UnexpectedPrimaryAccession
list< CRef< objects::CSeq_entry > > TEntryList
bool QscoreToSeqAnnot(const string &qscore, CBioseq &bioseq, char *acc, Int2 ver, bool check_minmax, bool allow_na)
std::list< CRef< objects::CSeq_feat > > TSeqFeatList
std::list< CRef< objects::CSeqdesc > > TSeqdescList
std::vector< CRef< objects::CUser_object > > TUserObjVector
char * StringSave(const char *s)
bool StringEquNI(const char *s1, const char *s2, size_t n)
bool StringEquN(const char *s1, const char *s2, size_t n)
bool StringEqu(const char *s1, const char *s2)
void StringCpy(char *d, const char *s)
void StringNCpy(char *d, const char *s, size_t n)
size_t StringLen(const char *s)
void MemCpy(void *p, const void *q, size_t sz)
char * StringRChr(char *s, const char c)
void FtaDeletePrefix(int prefix)
void fta_find_pub_explore(ParserPtr pp, TEntryList &seq_entries)
bool GetGenBankInstContig(const DataBlk &entry, CBioseq &bsp, ParserPtr pp)
static CRef< CGB_block > GetGBBlock(ParserPtr pp, const DataBlk &entry, CMolInfo &mol_info, CBioSource *bio_src)
static void fta_get_str_user_field(char *line, const Char *tag, CUser_object &user_obj)
void CheckFeatSeqLoc(TEntryList &seq_entries)
static CRef< CMolInfo > GetGenBankMolInfo(ParserPtr pp, const DataBlk &entry, const COrg_ref *org_ref)
static void FindFeatSeqLoc(TEntryList &seq_entries, TSeqFeatList &feats)
static void FakeGenBankBioSources(const DataBlk &entry, CBioseq &bioseq)
static void CheckContigEverywhere(IndexblkPtr ibp, Parser::ESource source)
bool GenBankAsciiOrig(ParserPtr pp)
static void fta_get_user_object(CSeq_entry &seq_entry, const DataBlk &entry)
static char * GetGenBankLineage(char *start, char *end)
bool GenBankAscii(ParserPtr pp)
static void GenBankGetDivision(char *division, Int4 div, const DataBlk &entry)
static void fta_get_user_field(char *line, const Char *tag, CUser_object &user_obj)
static void SrchFeatSeqLoc(TSeqFeatList &feats, CSeq_annot::C_Data::TFtable &feat_table)
static CBioseq_set * GetParts(TEntryList &seq_entries)
static void fta_get_mga_user_object(TSeqdescList &descrs, char *offset, size_t len)
static char * GBDivOffset(const DataBlk &entry, Int4 div_shift)
static bool GetGenBankInst(ParserPtr pp, const DataBlk &entry, unsigned char *dnaconv)
static void xGenBankGetDivision(char *division, Int4 div, const string &locusText)
static void GetGenBankDescr(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq)
void DealWithGenes(TEntryList &seq_entries, ParserPtr pp)
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
@ eCurrent
Use current time. See also CCurrentTime.
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
TGenome GetGenome(void) const
Get the Genome member data.
TOrigin GetOrigin(void) const
Get the Origin member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetOrigin(void) const
Check if a value has been assigned to Origin data member.
void SetOrg(TOrg &value)
Assign a value to Org data member.
@ eSubtype_environmental_sample
@ eOrigin_synthetic
purely synthetic
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
TStd & SetStd(void)
Select the variant.
TData & SetData(void)
Assign a value to Data data member.
void SetNum(TNum value)
Assign a value to Num data member.
void SetLabel(TLabel &value)
Assign a value to Label data member.
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
const TMod & GetMod(void) const
Get the Mod member data.
const TDiv & GetDiv(void) const
Get the Div member data.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
bool IsSetDiv(void) const
GenBank division code Check if a value has been assigned to Div data member.
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
void SetOrgname(TOrgname &value)
Assign a value to Orgname data member.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
@ eSubtype_metagenome_source
@ eSeq_code_type_iupacaa
IUPAC 1 letter amino acid code.
@ eSeq_code_type_iupacna
IUPAC 1 letter nuc acid code.
bool IsMix(void) const
Check if variant Mix is selected.
const TMix & GetMix(void) const
Get the variant data.
TSet & SetSet(void)
Select the variant.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
bool IsSeq(void) const
Check if variant Seq is selected.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TSeq & SetSeq(void)
Select the variant.
@ eClass_parts
parts for 2 or 3
TRepr GetRepr(void) const
Get the Repr member data.
void SetData(TData &value)
Assign a value to Data data member.
TId & SetId(void)
Assign a value to Id data member.
const TInst & GetInst(void) const
Get the Inst member data.
TTitle & SetTitle(void)
Select the variant.
TPub & SetPub(void)
Select the variant.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
TGenbank & SetGenbank(void)
Select the variant.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
const TId & GetId(void) const
Get the Id member data.
TTech GetTech(void) const
Get the Tech member data.
const Tdata & Get(void) const
Get the member data.
TComment & SetComment(void)
Select the variant.
void SetInst(TInst &value)
Assign a value to Inst data member.
void ResetTech(void)
Reset Tech data member.
TSource & SetSource(void)
Select the variant.
void SetTopology(TTopology value)
Assign a value to Topology data member.
ETopology
topology of molecule
void SetDescr(TDescr &value)
Assign a value to Descr data member.
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
TUser & SetUser(void)
Select the variant.
void SetRepr(TRepr value)
Assign a value to Repr data member.
EStrand
strandedness in living organism
list< CRef< CSeq_feat > > TFtable
list< CRef< CSeq_annot > > TAnnot
void SetStrand(TStrand value)
Assign a value to Strand data member.
void SetTech(TTech value)
Assign a value to Tech data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
TMolinfo & SetMolinfo(void)
Select the variant.
TUpdate_date & SetUpdate_date(void)
Select the variant.
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_raw
continuous sequence
@ eRepr_virtual
no seq data
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_other
use Source.techexp
@ eTech_htc
high throughput cDNA
@ eTech_targeted
targeted locus sets/studies
@ eTech_sts
Sequence Tagged Site.
@ eTech_htgs_3
finished High Throughput sequence
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_wgs
whole genome shotgun sequencing
@ eTech_survey
one-pass genomic sequence
@ eTech_htgs_0
single genomic reads for coordination
@ eTech_fli_cdna
full length insert cDNA
@ eTech_est
Expressed Sequence Tag.
CRef< CDate_std > GetUpdateDate(const char *ptr, Parser::ESource source)
int CheckTPG(const string &str)
int CheckSTRAND(const string &str)
Int4 IsNewAccessFormat(const Char *acnum)
Int2 CheckDIV(const char *str)
void GetFlatBiomol(CMolInfo::TBiomol &biomol, CMolInfo::TTech tech, char *molstr, ParserPtr pp, const DataBlk &entry, const COrg_ref *org_ref)
void LoadFeat(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq)
const struct ncbi::grid::netcache::search::fields::KEY key
const CharType(& source)[N]
std::list< SeqLoc > TSeqLocList
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
CRef< CPubdesc > DescrRefs(ParserPtr pp, DataBlkPtr dbp, Int4 col_data)
static const char * str(char *buf, int n)
CRef< objects::CSeq_entry > seq_entry
CRef< objects::CPatent_seq_id > psip
char *(* ff_get_qscore_pp)(const char *accession, Int2 v, Parser *pp)
vector< IndexblkPtr > entrylist
bool allow_crossdb_featloc
char *(* ff_get_qscore)(const char *accession, Int2 v)
void MaybeCutGbblockSource(TEntryList &seq_entries)
bool GetGenomeInfo(CBioSource &bsp, const Char *bptr)
bool HasHtg(const TKeywordList &keywords)
bool HasHtc(const TKeywordList &keywords)
char * GetBlkDataReplaceNewLine(char *bptr, char *eptr, Int2 start_col_data)
char * SrchTheChar(char *bptr, char *eptr, Char letter)
bool fta_tls_keywords_check(const TKeywordList &kwds, Parser::ESource source)
void RemoveHtgPhase(TKeywordList &keywords)
void fta_remove_tsa_keywords(TKeywordList &kwds, Parser::ESource source)
void fta_remove_tpa_keywords(TKeywordList &kwds)
void fta_remove_keywords(CMolInfo::TTech tech, TKeywordList &kwds)
void fta_remove_tls_keywords(TKeywordList &kwds, Parser::ESource source)
char * xSrchNodeType(const DataBlk &entry, Int4 type, size_t *len)
void fta_keywords_check(const char *str, bool *estk, bool *stsk, bool *gssk, bool *htck, bool *flik, bool *wgsk, bool *tpak, bool *envk, bool *mgak, bool *tsak, bool *tlsk)
void fta_StringCpy(char *dst, const char *src)
DataBlkPtr TrackNodeType(const DataBlk &entry, Int2 type)
void fta_remove_mag_keywords(TKeywordList &kwds)
bool IsCancelled(const TKeywordList &keywords)
bool fta_tsa_keywords_check(const TKeywordList &kwds, Parser::ESource source)
void fta_remove_env_keywords(TKeywordList &kwds)
bool fta_tpa_keywords_check(const TKeywordList &kwds)
bool fta_check_mga_keywords(CMolInfo &mol_info, const TKeywordList &kwds)
CRef< CSeq_loc > xgbparseint_ver(const char *raw_intervals, bool &keep_rawPt, int &numErrors, const TSeqIdList &seq_ids, bool accver)
void XGappedSeqLocsToDeltaSeqs(const TSeqLocList &locs, TDeltaList &deltas)