63 #include <unordered_map>
64 #include <unordered_set>
172 function<
bool(
const CSeqdesc&)> f_verify,
176 function<
bool(
const CSeqdesc&)> f_verify,
199 m_fReportError(fReportError),
200 m_SkippedMods(skipped_mods)
217 static const unordered_map<string,TMemFuncPtr>
234 auto it = s_MethodMap.find(mod_name);
235 if (it != s_MethodMap.end()) {
236 auto mem_func_ptr = it->second;
237 (this->*mem_func_ptr)(mod_entry);
248 if (name ==
"location") {
252 if (it == s_GenomeStringToEnum.end()) {
260 if (name ==
"origin") {
264 if (it == s_OriginStringToEnum.end()) {
272 if (name ==
"focus") {
313 for (
const auto&
mod : mod_entry.second) {
321 if (
mod.IsSetAttrib()) {
322 pSubSource->SetAttrib(
mod.GetAttrib());
324 m_pDescrCache->SetSubtype().push_back(std::move(pSubSource));
331 const auto set_size = primer_set.
Get().size();
332 vector<string>
names;
334 const auto num_names =
names.size();
336 auto it = primer_set.
Set().begin();
337 for (
size_t i=0;
i<num_names; ++
i) {
342 (*it)->SetName().Set(
names[
i]);
347 pPrimer->SetName().Set(
names[
i]);
348 primer_set.
Set().push_back(std::move(pPrimer));
356 const auto set_size = primer_set.
Get().size();
359 const auto num_seqs = seqs.size();
361 auto it = primer_set.
Set().begin();
362 for (
size_t i=0;
i<num_seqs; ++
i) {
367 (*it)->SetSeq().Set(seqs[
i]);
372 pPrimer->SetSeq().Set(seqs[
i]);
373 primer_set.
Set().push_back(std::move(pPrimer));
381 vector<string>
names;
383 reaction_names.insert(reaction_names.end(),
names.begin(),
names.end());
391 if (seqs.size() > 1) {
392 if (seqs.front().front() ==
'(') {
393 seqs.front().erase(0,1);
395 if (seqs.back().back() ==
')') {
396 seqs.back().erase(seqs.back().size()-1,1);
400 for (
auto& seq : seqs) {
411 if (mod_name ==
"fwd-primer-name") {
412 vector<string>
names;
413 for (
const auto&
mod : mod_entry.second)
419 auto it = pcr_reaction_set.Set().begin();
420 for (
const auto& reaction_names :
names) {
421 if (it == pcr_reaction_set.Set().end()) {
424 pcr_reaction_set.Set().push_back(std::move(pPCRReaction));
434 if (mod_name ==
"fwd-primer-seq") {
436 for (
const auto&
mod : mod_entry.second)
441 auto it = pcr_reaction_set.Set().begin();
442 for (
const auto& reaction_seqs : seqs) {
443 if (it == pcr_reaction_set.Set().end()) {
446 pcr_reaction_set.Set().push_back(std::move(pPCRReaction));
456 if(mod_name ==
"rev-primer-name")
458 vector<string>
names;
459 for (
const auto&
mod : mod_entry.second) {
462 if (!
names.empty()) {
464 const size_t num_reactions = pcr_reaction_set.Get().size();
465 const size_t num_names =
names.size();
466 if (num_names <= num_reactions) {
467 auto it = pcr_reaction_set.Set().rbegin();
468 for(
int i=num_names-1;
i>=0; --
i) {
474 auto it = pcr_reaction_set.Set().begin();
475 for (
size_t i=0;
i<num_reactions; ++
i) {
479 for (
auto i=num_reactions;
i<num_names; ++
i) {
482 pcr_reaction_set.Set().push_back(std::move(pPCRReaction));
490 if(mod_name ==
"rev-primer-seq")
493 for (
const auto&
mod : mod_entry.second) {
498 const size_t num_reactions = pcr_reaction_set.Get().size();
499 const size_t num_seqs = seqs.size();
500 if (num_seqs <= num_reactions) {
501 auto it = pcr_reaction_set.Set().rbegin();
502 for(
int i=num_seqs-1;
i>=0; --
i) {
507 auto it = pcr_reaction_set.Set().begin();
508 for (
size_t i=0;
i<num_reactions; ++
i) {
512 for (
auto i=num_reactions;
i<num_seqs; ++
i) {
515 pcr_reaction_set.Set().push_back(std::move(pPCRReaction));
529 if (name ==
"taxname") {
532 if (!preserve_taxid &&
540 if (name ==
"taxid") {
544 taxid = NStr::StringToNumeric<TTaxId>(
value);
551 preserve_taxid =
true;
556 if (name ==
"common") {
562 if (name ==
"dbxref") {
576 vector<CRef<CDbtag>> dbtags;
577 for (
const auto& value_attrib : mod_entry.second) {
578 const auto&
value = value_attrib.GetValue();
580 auto colon_pos =
value.find(
":");
583 if (colon_pos < (
value.length()-1)) {
593 pDbtag->SetTag().SetStr(
tag);
594 dbtags.push_back(std::move(pDbtag));
604 if (name ==
"lineage") {
610 if (name ==
"division") {
618 using TFunction =
function<void(
COrgName&,
int)>;
620 unordered_map<string, TFunction>
621 s_GetCodeSetterMethods =
626 auto it = s_GetCodeSetterMethods.find(name);
627 if (it != s_GetCodeSetterMethods.end()) {
655 for (
const auto&
mod : mod_entry.second) {
658 if (
mod.IsSetAttrib()) {
659 pOrgMod->SetAttrib(
mod.GetAttrib());
669 static const unordered_map<string, string> s_NameToLabel =
670 {{
"sra",
"Sequence Read Archive"},
671 {
"biosample",
"BioSample"},
672 {
"bioproject",
"BioProject"}};
674 const auto&
label = s_NameToLabel.at(name);
684 list<CTempString> value_list;
685 for (
const auto&
mod : mod_entry.second) {
686 list<CTempString> value_sublist;
687 const auto& vals =
mod.GetValue();
689 value_list.splice(value_list.end(), value_sublist);
692 if (value_list.empty()) {
700 const list<CTempString>& vals,
709 for (
auto pUserField : dblink.
SetData()) {
711 pUserField->IsSetLabel() &&
712 pUserField->GetLabel().IsStr() &&
723 dblink.
SetData().push_back(pField);
726 pField->
SetData().SetStrs().assign(vals.begin(), vals.end());
769 for (
const auto&
mod : mod_entry.second) {
770 vals.push_back(
mod.GetValue());
773 string label = (mod_entry.first ==
"ft-map") ?
775 "BaseModification-FileTrackURL";
777 for (
auto val : vals) {
780 pField->SetLabel().SetStr(
label);
782 pField->SetData().SetStr(
val);
783 user.SetData().push_back(pField);
790 list<CStringUTF8> accession_list;
791 for (
const auto&
mod : mod_entry.second) {
792 list<CTempString> value_sublist;
793 const auto& vals =
mod.GetValue();
796 list<CStringUTF8> accession_sublist;
798 transform(value_sublist.begin(), value_sublist.end(), back_inserter(accession_sublist),
805 accession_list.splice(accession_list.end(), accession_sublist);
808 if (accession_list.empty()) {
812 auto make_user_field = [](
const CStringUTF8& accession) {
814 pField->SetLabel().SetId(0);
816 pSubfield->SetLabel().SetStr(
"accession");
817 pSubfield->SetData().SetStr(accession);
818 pField->SetData().SetFields().push_back(std::move(pSubfield));
823 user.SetData().resize(accession_list.size());
824 transform(accession_list.begin(), accession_list.end(),
825 user.SetData().begin(), make_user_field);
831 list<string> id_list;
832 for (
const auto&
mod : mod_entry.second) {
833 list<CTempString> value_sublist;
834 const auto& vals =
mod.GetValue();
836 for (
const auto&
val : value_sublist) {
840 id_list.insert(id_list.end(),idrange.
begin(), idrange.
end());
844 id_list.push_back(
value);
849 gb_block.SetExtra_accessions().assign(id_list.begin(), id_list.end());
855 list<CTempString> value_list;
856 for (
const auto&
mod : mod_entry.second) {
857 list<CTempString> value_sublist;
858 const auto& vals =
mod.GetValue();
860 value_list.splice(value_list.end(), value_sublist);
862 if (value_list.empty()) {
865 m_pDescrCache->SetGBblock().SetKeywords().assign(value_list.begin(), value_list.end());
874 for (
const auto&
mod : mod_entry.second) {
875 list<CTempString> value_sublist;
876 const auto& vals =
mod.GetValue();
878 list<int> id_sublist;
880 transform(value_sublist.begin(), value_sublist.end(), back_inserter(id_sublist),
887 id_list.splice(id_list.end(), id_sublist);
889 if (id_list.empty()) {
893 auto make_user_field = [](
const int& id) {
896 pField->SetLabel().SetId(0);
897 pSubfield->SetLabel().SetStr(
"ProjectID");
898 pSubfield->SetData().SetInt(
id);
899 pField->SetData().SetFields().push_back(pSubfield);
901 pSubfield->SetLabel().SetStr(
"ParentID");
902 pSubfield->SetData().SetInt(0);
903 pField->SetData().SetFields().push_back(pSubfield);
908 user.SetData().resize(id_list.size());
909 transform(id_list.begin(), id_list.end(),
910 user.SetData().begin(), make_user_field);
916 for (
const auto&
mod : mod_entry.second) {
925 for (
const auto&
mod : mod_entry.second)
930 pmid = NStr::StringToNumeric<TEntrezId>(
value);
937 pPub->SetPmid().Set(pmid);
941 .push_back(std::move(pPub));
959 const string& add_msg)
961 const auto& mod_name = mod_data.
GetName();
962 const auto& mod_value = mod_data.
GetValue();
963 string msg =
"Invalid value: " + mod_name +
"=" + mod_value +
".";
965 msg +=
" " + add_msg;
986 template<
class TObject>
1012 pParentSet->IsSetClass() &&
1015 auto& bioseq_set =
const_cast<CBioseq_set&
>(*pParentSet);
1042 auto& pub_desc = pDesc->
SetPub();
1044 return pub_desc.GetPub().Get().empty();
1061 return pDesc->SetPub();
1078 return pDesc->SetComment();
1150 pDesc->SetGenbank();
1165 pDesc->SetMolinfo();
1222 function<
bool(
const CSeqdesc&)> f_verify,
1230 function<
bool(
const CSeqdesc&)> f_verify,
1234 auto it =
m_Cache.find(eChoice);
1236 return *(it->second);
1241 if (pDescrContainer->
IsSet()) {
1242 for (
auto& pDesc : pDescrContainer->
SetDescr().
Set()) {
1243 if (pDesc.NotEmpty() && f_verify(*pDesc)) {
1244 m_Cache.insert(make_pair(eChoice, pDesc));
1250 auto pDesc = f_create();
1251 m_Cache.insert(make_pair(eChoice, pDesc));
1252 pDescrContainer->
SetDescr().
Set().push_back(pDesc);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void transform(Container &c, UnaryFunction *op)
CConstRef< CBioseq_set > GetParentSet(void) const
CPCRReactionSet * m_pPCRReactionSet
CBioSource::TSubtype TSubtype
SDescrContainer_Base TDescrContainer
unique_ptr< TDescrContainer > m_pNucProtSetContainer
void x_SetUserType(const string &type, CUser_object &user_object)
CUser_object & SetFileTrack(void)
unordered_map< EChoice, CRef< CSeqdesc >, hash< underlying_type< EChoice >::type > > TMap
CPCRReactionSet & SetPCR_primers(void)
CDescrCache(CBioseq &bioseq)
CGB_block & SetGBblock(void)
CSeqdesc & x_SetDescriptor(const EChoice eChoice, function< bool(const CSeqdesc &)> f_verify, function< CRef< CSeqdesc >(void)> f_create)
unique_ptr< TDescrContainer > m_pBioseqContainer
CMolInfo & SetMolInfo(void)
TDescrContainer * m_pPrimaryContainer
CUser_object & SetDBLink(void)
CBioSource & SetBioSource(void)
CPubdesc & SetPubdesc(void)
CUser_object & SetGenomeProjects(void)
string & SetComment(void)
CUser_object & SetTpaAssembly(void)
TSubtype & SetSubtype(void)
TOrgMods & SetOrgMods(void)
CDescrContainer(TObject &object)
CSeq_descr & SetDescr(void)
void x_SetMolInfoType(const TModEntry &mod_entry)
void x_SetOrgMod(const TModEntry &mod_entry)
CDescrModApply(CBioseq &bioseq, FReportError fReportError, TSkippedMods &skipped_mods)
void x_SetMolInfoCompleteness(const TModEntry &mod_entry)
bool x_TryBioSourceMod(const TModEntry &mod_entry, bool &preserve_taxid)
bool x_TryOrgNameMod(const TModEntry &mod_entry)
virtual ~CDescrModApply()
bool x_TryOrgRefMod(const TModEntry &mod_entry, bool &preserve_taxid)
TSkippedMods & m_SkippedMods
void x_SetTpaAssembly(const TModEntry &mod_entry)
bool x_TryPCRPrimerMod(const TModEntry &mod_entry)
CModAdder::FReportError FReportError
void x_SetFileTrack(const TModEntry &mod_entry)
CModAdder::TSkippedMods TSkippedMods
void x_SetPMID(const TModEntry &mod_entry)
void x_SetComment(const TModEntry &mod_entry)
CModHandler::TMods::value_type TModEntry
void x_SetGBblockKeywords(const TModEntry &mod_entry)
void x_SetGBblockIds(const TModEntry &mod_entry)
static const string & x_GetModValue(const TModEntry &mod_entry)
void x_SetDBLink(const TModEntry &mod_entry)
FReportError m_fReportError
unique_ptr< CDescrCache > m_pDescrCache
void x_SetDBLinkFieldVals(const string &label, const list< CTempString > &vals, CUser_object &db_link)
void x_SetDBLinkField(const string &label, const TModEntry &mod_entry, CDescrCache &descr_cache)
void x_SetDBxref(const TModEntry &mod_entry)
static const string & x_GetModName(const TModEntry &mod_entry)
void x_SetMolInfoTech(const TModEntry &mod_entry)
void x_ReportInvalidValue(const CModData &mod_data, const string &add_msg="")
void x_SetSubtype(const TModEntry &mod_entry)
bool Apply(const TModEntry &mod_entry)
void x_SetGenomeProjects(const TModEntry &mod_entry)
const string & GetValue(void) const
const string & GetName(void) const
static const string & GetCanonicalName(const TModEntry &mod_entry)
static const string & AssertReturnSingleValue(const TModEntry &mod_entry)
@OrgMod.hpp User-defined methods of the data storage class.
@Pubdesc.hpp User-defined methods of the data storage class.
@Seq_descr.hpp User-defined methods of the data storage class.
static bool NeedsNoText(const TSubtype &subtype)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const auto s_OrgModStringToEnum
static const unordered_map< string, CMolInfo::TTech > s_TechStringToEnum
static bool s_EmptyAfterRemovingPMID(CRef< CSeqdesc > &pDesc)
static const auto s_SubSourceStringToEnum
static void s_SetPrimerNames(const string &primer_names, CPCRPrimerSet &primer_set)
static const unordered_map< string, CMolInfo::TCompleteness > s_CompletenessStringToEnum
static bool s_IsUserType(const CUser_object &user_object, const string &type)
static void s_AppendPrimerNames(const string &mod, vector< string > &reaction_names)
static void s_AppendPrimerSeqs(const string &mod, vector< string > &reaction_seqs)
static void s_SetPrimerSeqs(const string &primer_seqs, CPCRPrimerSet &primer_set)
static const struct name_t names[]
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
SStrictId_Tax::TId TTaxId
Taxon id type.
@ eDiag_Error
Error message.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const_iterator end(void) const
const_iterator begin(void) const
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static CStringUTF8 AsUTF8(const CTempString &src, EEncoding encoding, EValidate validate=eNoValidate)
Convert into UTF8 from a C/C++ string.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static string & ToLower(string &str)
Convert string to lower case – string& version.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
static const char label[]
const Tdata & Get(void) const
Get the member data.
list< CRef< CSubSource > > TSubtype
void SetPcr_primers(TPcr_primers &value)
Assign a value to Pcr_primers data member.
void SetOrg(TOrg &value)
Assign a value to Org data member.
Tdata & Set(void)
Assign a value to data member.
Tdata & Set(void)
Assign a value to data member.
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
TData & SetData(void)
Assign a value to Data data member.
const TStr & GetStr(void) const
Get the variant data.
void SetLabel(TLabel &value)
Assign a value to Label data member.
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
const TType & GetType(void) const
Get the Type member data.
TMgcode & SetMgcode(void)
Assign a value to Mgcode data member.
TPgcode & SetPgcode(void)
Assign a value to Pgcode data member.
list< CRef< COrgMod > > TMod
TGcode & SetGcode(void)
Assign a value to Gcode data member.
bool IsPmid(void) const
Check if variant Pmid is selected.
@ eClass_nuc_prot
nuc acid and coded proteins
bool IsGenbank(void) const
Check if variant Genbank is selected.
const TUser & GetUser(void) const
Get the variant data.
bool IsMolinfo(void) const
Check if variant Molinfo is selected.
void SetPub(TPub &value)
Assign a value to Pub data member.
TPub & SetPub(void)
Select the variant.
bool IsComment(void) const
Check if variant Comment is selected.
bool IsSource(void) const
Check if variant Source is selected.
bool IsPub(void) const
Check if variant Pub is selected.
Tdata & Set(void)
Assign a value to data member.
bool IsUser(void) const
Check if variant User is selected.
@ eCompleteness_has_left
5' or NH3 end present
@ eCompleteness_complete
complete biological entity
@ eCompleteness_has_right
3' or COOH end present
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_physmap
from physical mapping techniques
@ eTech_htc
high throughput cDNA
@ eTech_both
concept transl. w/ partial pept. seq.
@ eTech_targeted
targeted locus sets/studies
@ eTech_seq_pept_homol
sequenced peptide, ordered by homology
@ eTech_composite_wgs_htgs
composite of WGS and HTGS
@ eTech_sts
Sequence Tagged Site.
@ eTech_htgs_3
finished High Throughput sequence
@ eTech_seq_pept_overlap
sequenced peptide, ordered by overlap
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_concept_trans
conceptual translation
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_standard
standard sequencing
@ eTech_wgs
whole genome shotgun sequencing
@ eTech_seq_pept
peptide was sequenced
@ eTech_survey
one-pass genomic sequence
@ eTech_barcode
barcode of life project
@ eTech_htgs_0
single genomic reads for coordination
@ eTech_derived
derived from other data, not a primary entity
@ eTech_fli_cdna
full length insert cDNA
@ eTech_est
Expressed Sequence Tag.
@ eTech_concept_trans_a
conceptual transl. supplied by author
@ eTech_genemap
from genetic mapping techniques
unsigned int
A callback function used to compare two keys in a database.
use only n Cassandra database for the lookups</td > n</tr > n< tr > n< td > yes</td > n< td > do not use tables BIOSEQ_INFO and BLOB_PROP in the Cassandra database
const TStringToEnumMap< CMolInfo::TBiomol > g_BiomolStringToEnum
TStringToEnumMap< CSubSource::ESubtype > g_InitModNameSubSrcSubtypeMap(void)
TStringToEnumMap< COrgMod::ESubtype > g_InitModNameOrgSubtypeMap(void)
string g_GetNormalizedModVal(const string &unnormalized)
TStringToEnumMap< CBioSource::EGenome > g_InitModNameGenomeMap(void)
TStringToEnumMap< CBioSource::EOrigin > g_InitModNameOriginMap(void)
const GenericPointer< typename T::ValueType > T2 value
@ eModSubcode_InvalidValue
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
virtual ~SDescrContainer_Base(void)=default
virtual bool IsSet(void) const =0
virtual CSeq_descr & SetDescr(void)=0