38 #include <objtools/readers/source_mod_parser.hpp>
83 return st->IsSetSubtype() && (
st->GetSubtype() == m_st);
90 # error "STATIC_SMOD already defined"
98 #define STATIC_SMOD(key_str) \
99 const char s_Mod_s_##key_str[] = #key_str; \
100 const size_t s_Mod_n_##key_str = sizeof(#key_str)-1; \
101 const CTempString s_Mod_##key_str(s_Mod_s_##key_str, s_Mod_n_##key_str)
193 template<
typename TEnum,
196 TSModEnumMap * s_InitSmodToEnumMap(
199 const TSModNameSet & skip_enum_names,
201 const TEnumNameToValMap & extra_enum_names_to_vals )
203 unique_ptr<TSModEnumMap> smod_enum_map(
new TSModEnumMap);
206 const string & enum_name = it->first;
207 const TEnum enum_val =
static_cast<TEnum
>(it->second);
208 if( skip_enum_names.find(enum_name.c_str()) !=
209 skip_enum_names.end() )
214 auto emplace_result =
215 smod_enum_map->emplace(
218 if( ! emplace_result.second) {
220 "s_InitSmodToEnumMap " << enum_name);
224 for(
auto extra_smod_to_enum : extra_enum_names_to_vals) {
225 auto emplace_result =
226 smod_enum_map->emplace(
228 extra_smod_to_enum.second);
230 if( ! emplace_result.second) {
232 "s_InitSmodToEnumMap " << extra_smod_to_enum.first);
236 return smod_enum_map.release();
241 TSModOrgSubtypeMap * s_InitSModOrgSubtypeMap(
void)
243 const TSModNameSet kDeprecatedOrgSubtypes{
244 "dosage",
"old-lineage",
"old-name"};
251 return s_InitSmodToEnumMap<COrgMod::ESubtype>(
253 kDeprecatedOrgSubtypes,
254 extra_smod_to_enum_names
266 TSModSubSrcSubtype * s_InitSModSubSrcSubtypeMap(
void)
270 TSModNameSet skip_enum_names {
272 "fwd_primer_seq",
"rev_primer_seq",
273 "fwd_primer_name",
"rev_primer_name",
274 "fwd_PCR_primer_seq",
"rev_PCR_primer_seq",
275 "fwd_PCR_primer_name",
"rev_PCR_primer_name",
279 "insertion_seq_name",
286 return s_InitSmodToEnumMap<CSubSource::ESubtype>(
289 extra_smod_to_enum_names);
293 s_InitSModSubSrcSubtypeMap,
nullptr);
295 bool x_FindBrackets(
const CTempString&
str,
size_t& start,
size_t& stop,
size_t& eq_pos)
300 const char* s =
str.data() + start;
302 int nested_brackets = -1;
303 while (
i <
str.size())
309 if (nested_brackets == 0)
315 if (nested_brackets >= 0)
320 if (nested_brackets == 0)
328 if (nested_brackets < 0)
340 void x_AppendIfNonEmpty(
string& s,
const CTempString& o)
357 "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
358 "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
359 "-!\"#$%&'()*+,-./0123456789:;<=>?"
360 "@abcdefghijklmnopqrstuvwxyz[\\]^-"
361 "`abcdefghijklmnopqrstuvwxyz{|}~\x7F"
362 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
363 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
364 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
365 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
366 "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
367 "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
368 "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
369 "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF";
408 if (d->IsUser() && d->GetUser().IsDBLink())
410 for (
auto& u : d->SetUser().SetData())
412 if (u->IsSetLabel() && u->GetLabel().IsStr() &&
484 return * operator->();
496 if (!m_bioseq.Empty())
497 m_descr = &m_bioseq->SetDescr();
499 if (!m_bioset.Empty())
500 m_descr = &m_bioset->SetDescr();
511 m_ptr = &Set().SetSource();
517 m_ptr = &Set().SetMolinfo();
523 m_ptr = &Set().SetGenbank();
529 size_t iMaxModsToParse )
532 string stripped_title;
539 size_t iModsFoundSoFar = 0;
540 for (; (pos < title.
size()) && (iModsFoundSoFar < iMaxModsToParse);
543 size_t lb_pos, end_pos, eq_pos;
545 if (x_FindBrackets(title, lb_pos, end_pos, eq_pos))
549 if (eq_pos < end_pos) {
560 x_AppendIfNonEmpty(stripped_title, skipped);
567 x_AppendIfNonEmpty(stripped_title, rest);
572 return stripped_title;
593 bool had_ftable =
false;
597 if ((*it)->GetData().IsFtable()) {
611 feat->
SetData().SetGene(*gene);
613 ftable->SetData().SetFtable().push_back(feat);
623 if (
prot.IsInitialized() ) {
627 ftable->SetData().SetFtable().push_back(feat);
631 if ( !had_ftable &&
ftable.IsInitialized() ) {
684 seq.
SetDescr().Set().push_back(desc);
696 seq.
SetDescr().Set().push_back(desc);
758 if ((
mod =
FindMod(s_Mod_topology, s_Mod_top)) !=
nullptr) {
772 bool bMolSetViaMolMod =
false;
775 if ((
mod =
FindMod(s_Mod_molecule, s_Mod_mol)) !=
nullptr) {
778 bMolSetViaMolMod =
true;
781 bMolSetViaMolMod =
true;
790 if( ! bMolSetViaMolMod ) {
791 if ((
mod =
FindMod(s_Mod_moltype, s_Mod_mol_type)) !=
nullptr) {
793 if (it == sc_BiomolMap.end()) {
797 seq.
SetInst().SetMol(it->second.m_eMol);
804 if ((
mod =
FindMod(s_Mod_strand)) !=
nullptr) {
817 if ((
mod =
FindMod(s_Mod_comment)) !=
nullptr) {
820 seq.
SetDescr().Set().push_back(desc);
827 vector<string>
names;
832 const auto num_names =
names.size();
833 const auto num_seqs = seqs.size();
834 const auto num_primers =
max(num_names, num_seqs);
836 for(
size_t i=0;
i<num_primers; ++
i) {
840 primer->SetName().Set(
names[
i]);
843 primer->SetSeq().Set(seqs[
i]);
845 primer_set.
Set().push_back(primer);
852 vector<pair<string, string>>& reaction_info)
854 reaction_info.clear();
855 vector<string>
names;
864 if (seqs.front().front() ==
'(') {
865 seqs.front().erase(0,1);
867 if (seqs.back().back() ==
')') {
868 seqs.back().erase(seqs.back().size()-1, 1);
873 const auto num_names =
names.size();
874 const auto num_seqs = seqs.size();
875 const auto num_reactions =
max(num_names, num_seqs);
877 for (
int i=0;
i<num_reactions; ++
i) {
878 const string name = (
i<num_names) ?
names[
i] :
"";
879 const string seq = (
i<num_seqs) ? seqs[
i] :
"";
880 reaction_info.push_back(make_pair(name, seq));
887 using TNameSeqPair = pair<string, string>;
889 const SMod* pNameMod =
nullptr;
890 const SMod* pSeqMod =
nullptr;
892 pNameMod =
FindMod(s_Mod_fwd_primer_name, s_Mod_fwd_pcr_primer_name);
893 pSeqMod =
FindMod(s_Mod_fwd_primer_seq, s_Mod_fwd_pcr_primer_seq);
894 vector<TNameSeqPair> fwd_primer_info;
898 pNameMod =
FindMod(s_Mod_rev_primer_name, s_Mod_rev_pcr_primer_name);
899 pSeqMod =
FindMod(s_Mod_rev_primer_seq, s_Mod_rev_pcr_primer_seq);
900 vector<TNameSeqPair> rev_primer_info;
903 if (fwd_primer_info.empty() &&
904 rev_primer_info.empty()) {
908 auto num_fwd_primer_info = fwd_primer_info.size();
909 auto num_rev_primer_info = rev_primer_info.size();
911 if (num_fwd_primer_info == num_rev_primer_info) {
912 for (
auto i=0;
i<num_fwd_primer_info; ++
i) {
916 pcr_reaction_set->
Set().push_back(pcr_reaction);
920 if (num_fwd_primer_info > num_rev_primer_info) {
921 auto diff = num_fwd_primer_info - num_rev_primer_info;
922 for (
int i=0;
i<diff; ++
i) {
925 pcr_reaction_set->
Set().push_back(pcr_reaction);
928 for (
int i=diff;
i<num_fwd_primer_info; ++
i) {
932 pcr_reaction_set->
Set().push_back(pcr_reaction);
936 if (num_fwd_primer_info < num_rev_primer_info) {
937 for (
int i=0;
i<num_fwd_primer_info; ++
i) {
941 pcr_reaction_set->
Set().push_back(pcr_reaction);
944 for (
int i=num_fwd_primer_info;
i<num_rev_primer_info; ++
i) {
947 pcr_reaction_set->
Set().push_back(pcr_reaction);
957 bool reset_taxid =
false;
960 if (organism.
empty())
962 if ((
mod =
FindMod(s_Mod_organism, s_Mod_org)) !=
nullptr) {
963 organism =
mod->value;
966 if ((
mod =
FindMod(s_Mod_taxname)) !=
nullptr) {
967 organism =
mod->value;
971 if ( !organism.
empty())
973 if (!(bsrc->GetOrg().IsSetTaxname() &&
NStr::EqualNocase(bsrc->GetOrg().GetTaxname(), organism)))
975 if (bsrc->GetOrg().IsSetTaxname())
980 bsrc->SetOrg().SetTaxname(organism);
986 if ((
mod =
FindMod(s_Mod_location)) !=
nullptr) {
998 ->FindValue(
mod->value));
1006 if ((
mod =
FindMod(s_Mod_origin)) !=
nullptr) {
1015 ->FindValue(
mod->value));
1023 for(
const auto & smod_orgsubtype : kSModOrgSubtypeMap.Get()) {
1024 const SMod & smod = smod_orgsubtype.first;
1030 bsrc->SetOrg().SetOrgname().SetMod().push_back(org_mod);
1036 for(
const auto & smod_subsrcsubtype : kSModSubSrcSubtypeMap.Get() ) {
1037 const SMod & smod = smod_subsrcsubtype.first;
1040 auto& subtype = bsrc->SetSubtype();
1057 remove_if(subtype.begin(), subtype.end(),
1058 equal_subtype(e_subtype)),
1062 subtype.push_back(subsource);
1071 if (!bsrc->IsSetPcr_primers()) {
1072 bsrc->SetPcr_primers(*pcr_reaction_set);
1075 bsrc->SetPcr_primers().
Set().splice(
1076 bsrc->SetPcr_primers().
Set().end(),
1077 pcr_reaction_set->
Set());
1085 for(
TModsCI db_xref_iter = db_xref_mods_range.first;
1086 db_xref_iter != db_xref_mods_range.second;
1090 const CTempString db_xref_str = db_xref_iter->value;
1093 size_t colon_location = db_xref_str.
find(
":");
1094 if (colon_location == string::npos) {
1096 new_db->
SetDb() =
"?";
1100 db_xref_str.
Copy(new_db->
SetDb(), 0, colon_location);
1104 new_db->
SetTag( *object_id );
1106 bsrc->SetOrg().SetDb().push_back( new_db );
1110 if ((
mod =
FindMod(s_Mod_division, s_Mod_div)) !=
nullptr) {
1111 bsrc->SetOrg().SetOrgname().SetDiv(
mod->value );
1115 if ((
mod =
FindMod(s_Mod_lineage)) !=
nullptr) {
1116 bsrc->SetOrg().SetOrgname().SetLineage(
mod->value );
1120 if ((
mod =
FindMod(s_Mod_gcode)) !=
nullptr) {
1125 if ((
mod =
FindMod(s_Mod_mgcode)) !=
nullptr) {
1130 if ((
mod =
FindMod(s_Mod_pgcode)) !=
nullptr) {
1138 for (
size_t i = 0;
i < 2;
i++)
1144 new_subsource->
SetName(it->value);
1145 bsrc->SetSubtype().push_back(new_subsource);
1150 if ((
mod =
FindMod(s_Mod_focus)) !=
nullptr) {
1152 bsrc->SetIs_focus();
1157 if ((
mod =
FindMod(s_Mod_taxid)) !=
nullptr) {
1161 if (reset_taxid && bsrc->IsSetOrgname() && bsrc->GetOrg().GetTaxId() !=
ZERO_TAX_ID) {
1217 if ((
mod =
FindMod(s_Mod_moltype, s_Mod_mol_type)) !=
nullptr) {
1219 if (it == sc_BiomolMap.end()) {
1224 mi->SetBiomol(it->second.m_eBiomol);
1229 if ((
mod =
FindMod(s_Mod_tech)) !=
nullptr) {
1231 if (it == sc_TechMap.end()) {
1234 mi->SetTech(it->second);
1239 if ((
mod =
FindMod(s_Mod_completeness, s_Mod_completedness)) !=
nullptr) {
1241 if (it == sc_CompletenessMap.end()) {
1244 mi->SetCompleteness(it->second);
1254 if ((
mod =
FindMod(s_Mod_gene)) !=
nullptr) {
1255 gene->SetLocus(
mod->value);
1259 if ((
mod =
FindMod(s_Mod_allele)) !=
nullptr) {
1260 gene->SetAllele(
mod->value );
1264 if ((
mod =
FindMod(s_Mod_gene_syn, s_Mod_gene_synonym)) !=
nullptr) {
1265 gene->SetSyn().push_back(
mod->value );
1269 if ((
mod =
FindMod(s_Mod_locus_tag)) !=
nullptr) {
1270 gene->SetLocus_tag(
mod->value );
1280 if ((
mod =
FindMod(s_Mod_protein, s_Mod_prot)) !=
nullptr) {
1281 prot->SetName().push_back(
mod->value);
1285 if ((
mod =
FindMod(s_Mod_prot_desc, s_Mod_protein_desc)) !=
nullptr) {
1290 if ((
mod =
FindMod(s_Mod_EC_number)) !=
nullptr) {
1291 prot->SetEc().push_back(
mod->value );
1295 if ((
mod =
FindMod(s_Mod_activity, s_Mod_function)) !=
nullptr) {
1296 prot->SetActivity().push_back(
mod->value );
1306 if ((
mod =
FindMod(s_Mod_secondary_accession,
1307 s_Mod_secondary_accessions)) !=
nullptr)
1309 list<CTempString> ranges;
1311 ITERATE (list<CTempString>, it, ranges) {
1316 gbb->SetExtra_accessions().push_back(*it2);
1319 gbb->SetExtra_accessions().push_back(s);
1325 if ((
mod =
FindMod(s_Mod_keyword, s_Mod_keywords)) !=
nullptr) {
1326 list<string> keywordList;
1331 gbb->SetKeywords().push_back( *keyword_iter );
1342 if ((
mod =
FindMod(s_Mod_secondary_accession,
1343 s_Mod_secondary_accessions)) !=
nullptr)
1345 list<CTempString> ranges;
1347 ITERATE (list<CTempString>, it, ranges) {
1352 hist->SetReplaces().SetIds().push_back(it2.GetID());
1356 hist->SetReplaces().SetIds()
1421 if ((
mod =
FindMod(s_Mod_primary, s_Mod_primary_accessions)) !=
nullptr) {
1423 list<CTempString> accns;
1425 ITERATE (list<CTempString>, it, accns) {
1427 field->SetLabel().SetId(0);
1428 subfield->
SetLabel().SetStr(
"accession");
1430 field->SetData().SetFields().push_back(subfield);
1431 data.push_back(field);
1434 if ( !
data.empty() ) {
1447 (
const_cast<CBioseq_set&
>(*pParentSet)).SetDescr() :
1451 for (
auto pDesc : descriptors.
Set()) {
1452 if (pDesc->IsUser() && pDesc->GetUser().IsDBLink()) {
1459 descriptors.
Set().push_back(pDBLinkDesc);
1465 const list<CTempString>& vals,
1472 auto& user_obj = dblink_desc.
SetUser();
1474 if (user_obj.IsSetData()) {
1475 for (
auto pUserField : user_obj.SetData()) {
1476 if (pUserField->IsSetLabel() &&
1477 pUserField->GetLabel().IsStr() &&
1479 pField = pUserField;
1488 user_obj.SetData().push_back(pField);
1491 pField->
SetData().SetStrs().clear();
1492 for (
const auto&
val : vals) {
1504 list<CTempString> value_list;
1506 for (
auto&
val : value_list) {
1510 if (value_list.empty()) {
1532 if ((
mod =
FindMod(s_Mod_bioproject)) !=
nullptr) {
1536 if ((
mod =
FindMod(s_Mod_biosample)) !=
nullptr) {
1549 if ((
mod =
FindMod(s_Mod_project, s_Mod_projects)) !=
nullptr) {
1551 list<CTempString> ids;
1553 ITERATE (list<CTempString>, it, ids) {
1558 field->SetLabel().SetId(0);
1559 subfield->
SetLabel().SetStr(
"ProjectID");
1560 subfield->
SetData().SetInt(
id);
1561 field->SetData().SetFields().push_back(subfield);
1563 subfield->
SetLabel().SetStr(
"ParentID");
1564 subfield->
SetData().SetInt(0);
1565 field->SetData().SetFields().push_back(subfield);
1566 data.push_back(field);
1570 if ( !
data.empty() ) {
1581 it !=
range.second; ++it) {
1587 bioseq.
SetDescr().Set().push_back(pubdesc);
1600 const SMod & badMod,
1601 const string & sAllowedValues )
1602 : runtime_error(x_CalculateErrorString(badMod, sAllowedValues)),
1603 m_BadMod(badMod), m_sAllowedValues(sAllowedValues)
1609 const SMod & badMod,
1610 const string & sAllowedValues )
1612 stringstream str_strm;
1613 str_strm <<
"Bad modifier value at seqid '"
1615 <<
"'. '" << badMod.
key <<
"' cannot have value '" << badMod.
value
1616 <<
"'. Accepted values are [" << sAllowedValues <<
"]";
1617 return str_strm.str();
1621 const SMod& unkMod )
1622 : runtime_error(x_CalculateErrorString(unkMod)), m_UnkMod(unkMod)
1629 stringstream str_strm;
1630 str_strm <<
"Bad modifier key at seqid '"
1632 <<
"'. '" << unkMod.
key <<
"' is not a recognized modifier key";
1633 return str_strm.str();
1669 for (
int tries = 0; tries < 2; ++tries) {
1671 if( modkey.
empty() ) {
1682 const_cast<SMod&
>(*it).
used =
true;
1702 SMod alt_smod(alt_key);
1714 for (
r.second =
r.first;
1722 const_cast<SMod&
>(*
r.second).used =
true;
1735 string delim = s->empty() ?
kEmptyStr :
" ";
1739 *s += delim +
'[' + it->key +
'=' + it->value +
']';
1755 static TMapModToValidValues s_mapModToValidValues;
1758 TMapModToValidValues::const_iterator find_iter =
1759 s_mapModToValidValues.find(
mod);
1760 if( find_iter != s_mapModToValidValues.end() ) {
1761 return find_iter->second;
1770 set_valid_values.
insert(
"linear");
1771 set_valid_values.
insert(
"circular");
1773 set_valid_values.
insert(
"rna");
1774 set_valid_values.
insert(
"dna");
1779 set_valid_values.
insert(map_iter->first);
1783 set_valid_values.
insert(
"single");
1784 set_valid_values.
insert(
"double");
1785 set_valid_values.
insert(
"mixed");
1787 set_valid_values.
insert(
"mitochondrial");
1788 set_valid_values.
insert(
"provirus");
1789 set_valid_values.
insert(
"extrachromosomal");
1790 set_valid_values.
insert(
"insertion sequence");
1792 set_valid_values.
insert(
"natural mutant");
1793 set_valid_values.
insert(
"mutant");
1795 set_valid_values.
insert( enum_iter->first );
1799 set_valid_values.
insert(tech_it->first);
1803 set_valid_values.
insert(comp_it->first);
1806 set_valid_values.
insert(
"ERROR TRYING TO DETERMINE ALLOWED VALUES");
1809 return set_valid_values;
1822 static TMapModNameToStringOfAllAllowedValues mapModNameToStringOfAllAllowedValues;
1825 TMapModNameToStringOfAllAllowedValues::const_iterator find_iter =
1826 mapModNameToStringOfAllAllowedValues.
find(
mod);
1827 if( find_iter != mapModNameToStringOfAllAllowedValues.end() ) {
1828 return find_iter->second;
1832 string & sAllValuesAsOneString =
1833 mapModNameToStringOfAllAllowedValues[
mod];
1836 if( ! sAllValuesAsOneString.empty() ) {
1837 sAllValuesAsOneString +=
", ";
1839 sAllValuesAsOneString +=
"'" + *value_it +
"'";
1842 return sAllValuesAsOneString;
1862 cerr << badModError.what() << endl;
1894 cerr << unkModError.what() << endl;
1922 "Error allowance exceeded",
1956 const_cast<SMod&
>(*it).
used =
false;
1964 newmod.
used =
false;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void remove_if(Container &c, Predicate *__pred)
CRef< CSeqdesc > m_user_obj
bool IsInitialised() const
CRef< CUser_field > m_dblink
CAutoAddDBLink(CBioseq &seq, const CTempString &id)
CRef< CSeq_descr > m_descr
CSeqdesc & Set(bool skip_lookup=false)
CAutoInitDesc(CSeq_descr &descr, CSeqdesc::E_Choice which)
CRef< CBioseq_set > m_bioset
bool IsInitialized(void) const
void Set(T *object)
Initialize with an existing object.
CConstRef< CBioseq_set > GetParentSet(void) const
void Throw(void) const
this function to throw this object.
static CObjReaderLineException * Create(EDiagSev eSeverity, unsigned int uLine, const std::string &strMessage, EProblem eProblem=eProblem_GeneralParsingError, const std::string &strSeqId=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), CObjReaderLineException::EErrCode eErrCode=eFormat, const TVecOfLines &vecOfOtherLines=TVecOfLines())
Please use this instead of the constructor because the ctor is protected.
@OrgMod.hpp User-defined methods of the data storage class.
@Seq_descr.hpp User-defined methods of the data storage class.
namespace ncbi::objects::
Root class for all serialization exceptions.
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
TBase::const_iterator const_iterator
static bool IsMultipleValuesAllowed(TSubtype)
static bool NeedsNoText(const TSubtype &subtype)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
virtual bool PutError(const ILineError &)=0
Store error in the container, and return true if error was stored fine, and return false if the calle...
@ eProblem_GeneralParsingError
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator end() const
const_iterator lower_bound(const key_type &key) const
parent_type::const_iterator const_iterator
static bool key_equal(const KEY_T *a, const KEY_T *b)
static const struct name_t names[]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
static const char location[]
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
@ eDiag_Warning
Warning message.
@ eDiag_Critical
Critical error message.
#define NCBI_USER_THROW_FMT(message)
Throw a "user exception" with message processed as output to ostream.
list< pair< string, TEnumValueType > > TValues
const TValues & GetValues(void) const
Get the list of name-value pairs.
#define ENUM_METHOD_NAME(EnumName)
void SetAllUnused()
Set all mods to unused.
static const unsigned char kKeyCanonicalizationTable[257]
pair< TModsCI, TModsCI > TModsRange
std::string x_CalculateErrorString(const SMod &badMod, const string &sAllowedValues)
ILineErrorListener * m_pErrorListener
void ApplyMods(CBioSource &bsrc, CTempString organism=kEmptyStr)
const SMod * FindMod(const CTempString &key, const CTempString &alt_key=CTempString())
If a modifier with either key is present, mark it as used and return it; otherwise,...
bool AddMods(const CTempString &name, const CTempString &value)
void x_ApplyDBLinkMods(CBioseq &bioseq)
void x_ProcessError(CObjReaderLineException &err)
TModsRange FindAllMods(const CTempString &key, const CTempString &alt_key)
Return all modifiers with the given key (e.g., db_xref), marking them as used along the way.
void x_AddPCRPrimers(CAutoInitRef< CPCRReactionSet > &pcr_reaction_set)
static const set< string > & GetModAllowedValues(const string &mod)
Given a mod name (e.g.
EHandleBadMod m_HandleBadMod
void ApplyPubMods(CBioseq &seq)
CRef< CModFilter > m_pModFilter
CConstRef< CSeq_id > seqid
static bool EqualKeys(const CTempString &lhs, const CTempString &rhs)
void GetLabel(string *s, TWhichMods which=fAllMods) const
Append a representation of the specified modifiers to s, with a space in between if s is not empty an...
CBadModError(const SMod &badMod, const std::string &sAllowedValues)
std::string x_CalculateErrorString(const SMod &unkMod)
void x_HandleUnkModValue(const SMod &mod)
static CSafeStatic< CSourceModParser::SMod > kEmptyMod
Used for passing an empty mod to some funcs without having to constantly recreate an empty one.
void x_HandleBadModValue(const SMod &mod)
static const string & GetModAllowedValuesAsOneString(const string &mod)
Same as GetModAllowedValues, but returns one string with all the values.
void x_ApplyTPAMods(CAutoInitRef< CUser_object > &tpa)
CUnkModError(const SMod &unkMod)
void ApplyAllMods(CBioseq &seq, CTempString organism=kEmptyStr, CConstRef< CSeq_loc > location=CConstRef< CSeq_loc >())
Apply previously extracted modifiers to the given object, marking all relevant ones as used.
void x_ApplyGenomeProjectsDBMods(CAutoInitRef< CUser_object > &gpdb)
string ParseTitle(const CTempString &title, CConstRef< CSeq_id > seqid, size_t iMaxModsToParse=std::numeric_limits< size_t >::max())
Extract and store bracketed modifiers from a title string, returning a stripped version (which may we...
void x_ApplyMods(CAutoInitDesc< CBioSource > &bsrc, CTempString organism)
TMods::const_iterator TModsCI
TMods GetMods(TWhichMods which=fAllMods) const
Return all modifiers matching the given criteria (if any) without affecting their status (used vs.
@ eHandleBadMod_PrintToCerr
@ eHandleBadMod_ErrorListener
const string AsFastaString(void) const
static int BestRank(const CRef< CSeq_id > &id)
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
TObjectType * GetPointerOrNull(void) const THROWS_NONE
Get pointer value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
const char * data(void) const
Return a pointer to the array represented.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static CStringUTF8 AsUTF8(const CTempString &src, EEncoding encoding, EValidate validate=eNoValidate)
Convert into UTF8 from a C/C++ string.
size_type length(void) const
Return the length of the represented array.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
size_type size(void) const
Return the length of the represented array.
void Copy(string &dst, size_type pos, size_type len) const
Copy a substring into a string Somewhat similar to basic_string::assign()
static const size_type npos
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
static const char label[]
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
void SetForward(TForward &value)
Assign a value to Forward data member.
void SetReverse(TReverse &value)
Assign a value to Reverse data member.
void SetName(const TName &value)
Assign a value to Name data member.
Tdata & Set(void)
Assign a value to data member.
@ eSubtype_lat_lon
+/- decimal degrees
@ eOrigin_mut
artificially mutagenized
@ eOrigin_natmut
naturally occurring mutant
bool IsStr(void) const
Check if variant Str is selected.
void SetTag(TTag &value)
Assign a value to Tag data member.
const TStrs & GetStrs(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
E_Choice Which(void) const
Which variant is currently selected.
TData & SetData(void)
Assign a value to Data data member.
void SetNum(TNum value)
Assign a value to Num data member.
const TStr & GetStr(void) const
Get the variant data.
void SetLabel(TLabel &value)
Assign a value to Label data member.
TStr & SetStr(void)
Select the variant.
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
const TType & GetType(void) const
Get the Type member data.
void SetDb(const TDb &value)
Assign a value to Db data member.
vector< CRef< CUser_field > > TData
@ e_not_set
No variant selected.
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
void SetSubname(const TSubname &value)
Assign a value to Subname data member.
@ eSubtype_nat_host
natural host of this specimen
TPmid & SetPmid(void)
Select the variant.
void SetLocation(TLocation &value)
Assign a value to Location data member.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
TClass GetClass(void) const
Get the Class member data.
@ eClass_nuc_prot
nuc acid and coded proteins
void SetPub(TPub &value)
Assign a value to Pub data member.
const TInst & GetInst(void) const
Get the Inst member data.
TPub & SetPub(void)
Select the variant.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TId & GetId(void) const
Get the Id member data.
bool IsSetHist(void) const
sequence history Check if a value has been assigned to Hist data member.
TComment & SetComment(void)
Select the variant.
void SetInst(TInst &value)
Assign a value to Inst data member.
EMol
molecule class in living organism
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TUser & SetUser(void)
Select the variant.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_annot > > TAnnot
@ eCompleteness_has_left
5' or NH3 end present
@ eCompleteness_complete
complete biological entity
@ eCompleteness_has_right
3' or COOH end present
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_physmap
from physical mapping techniques
@ eTech_htc
high throughput cDNA
@ eTech_both
concept transl. w/ partial pept. seq.
@ eTech_targeted
targeted locus sets/studies
@ eTech_seq_pept_homol
sequenced peptide, ordered by homology
@ eTech_composite_wgs_htgs
composite of WGS and HTGS
@ eTech_sts
Sequence Tagged Site.
@ eTech_htgs_3
finished High Throughput sequence
@ eTech_seq_pept_overlap
sequenced peptide, ordered by overlap
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_concept_trans
conceptual translation
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_standard
standard sequencing
@ eTech_wgs
whole genome shotgun sequencing
@ eTech_seq_pept
peptide was sequenced
@ eTech_survey
one-pass genomic sequence
@ eTech_barcode
barcode of life project
@ eTech_htgs_0
single genomic reads for coordination
@ eTech_derived
derived from other data, not a primary entity
@ eTech_fli_cdna
full length insert cDNA
@ eTech_est
Expressed Sequence Tag.
@ eTech_concept_trans_a
conceptual transl. supplied by author
@ eTech_genemap
from genetic mapping techniques
@ eBiomol_pre_RNA
precursor RNA of any sort really
@ eBiomol_cRNA
viral RNA genome copy intermediate
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
@ eBiomol_other_genetic
other genetic material
@ e_Genbank
GenBank specific info.
@ e_Molinfo
info on the molecule and techniques
@ e_not_set
No variant selected.
@ e_Source
source of materials, includes Org-ref
@ eStrand_ds
double strand
@ eStrand_ss
single strand
static const CS_INT unused
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Useful/utility classes and methods.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static const GLdouble origin[]
#define FIELD_CHAIN_OF_2_IS_SET(Var, Fld1, Fld2)
FIELD_CHAIN_OF_2_IS_SET.
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
SStaticPair< const char *, SMolTypeInfo > TBiomolMapEntry
static void s_SetDBLinkField(const string &label, const string &vals, CRef< CSeqdesc > &pDBLinkDesc, CBioseq &bioseq)
DEFINE_STATIC_ARRAY_MAP(TBiomolMap, sc_BiomolMap, sc_BiomolArray)
CStaticPairArrayMap< const char *, CMolInfo::TCompleteness, CSourceModParser::PKeyCompare > TCompletenessMap
static const TTechMapEntry sc_TechArray[]
static void s_GetPrimerInfo(const CSourceModParser::SMod *pNamesMod, const CSourceModParser::SMod *pSeqsMod, vector< pair< string, string >> &reaction_info)
static void s_AddPrimers(const pair< string, string > &primer_info, CPCRPrimerSet &primer_set)
static void s_PopulateUserObject(CUser_object &uo, const string &type, CUser_object::TData &data)
static CRef< CSeqdesc > s_SetDBLinkDesc(CBioseq &bioseq)
static const TBiomolMapEntry sc_BiomolArray[]
SStaticPair< const char *, CMolInfo::TTech > TTechMapEntry
static void s_ApplyPubMods(CBioseq &bioseq, const CSourceModParser::TModsRange &range)
static void s_SetDBLinkFieldVals(const string &label, const list< CTempString > &vals, CSeqdesc &dblink_desc)
static const TCompletenessMapEntry sc_CompletenessArray[]
SStaticPair< const char *, CMolInfo::TCompleteness > TCompletenessMapEntry
CSafeStaticRef< CSeq_descr > fake_descr
CStaticPairArrayMap< const char *, SMolTypeInfo, CSourceModParser::PKeyCompare > TBiomolMap
CStaticPairArrayMap< const char *, CMolInfo::TTech, CSourceModParser::PKeyCompare > TTechMap
#define STATIC_SMOD(key_str)
SMolTypeInfo(EShown eShown, CMolInfo::TBiomol eBiomol, CSeq_inst::EMol eMol)
CMolInfo::TBiomol m_eBiomol
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...