47 #include <unordered_map>
48 #include <unordered_set>
64 {
"moltype",
"mol-type"},
65 {
"fwd-pcr-primer-name",
"fwd-primer-name"},
66 {
"fwd-pcr-primer-names",
"fwd-primer-name"},
67 {
"fwd-primer-names",
"fwd-primer-name"},
68 {
"fwd-pcr-primer-seq",
"fwd-primer-seq"},
69 {
"fwd-pcr-primer-seqs",
"fwd-primer-seq"},
70 {
"fwd-primer-seqs",
"fwd-primer-seq"},
71 {
"rev-pcr-primer-name",
"rev-primer-name"},
72 {
"rev-pcr-primer-names",
"rev-primer-name"},
73 {
"rev-primer-names",
"rev-primer-name"},
74 {
"rev-pcr-primer-seq",
"rev-primer-seq"},
75 {
"rev-pcr-primer-seqs",
"rev-primer-seq"},
76 {
"rev-primer-seqs",
"rev-primer-seq"},
78 {
"organism",
"taxname"},
81 {
"completedness",
"completeness"},
82 {
"gene-syn",
"gene-synonym"},
83 {
"genesyn",
"gene-synonym"},
84 {
"genesynonym",
"gene-synonym"},
86 {
"prot-desc",
"protein-desc"},
87 {
"function",
"activity"},
88 {
"secondary",
"secondary-accession"},
89 {
"secondary-accessions",
"secondary-accession"},
90 {
"keywords",
"keyword"},
91 {
"primary",
"primary-accession"},
92 {
"primary-accessions",
"primary-accession"},
93 {
"projects",
"project"},
94 {
"db-xref",
"dbxref"},
96 {
"ft-url-mod",
"ft-mod"},
98 {
"geo-loc-name",
"country"}
108 "insertion-seq-name",
201 transform(excluded_mods.begin(), excluded_mods.end(),
203 [](
const string& mod_name) { return GetCanonicalName(mod_name); });
209 transform(ignored_mods.begin(), ignored_mods.end(),
211 [](
const string& mod_name) { return GetCanonicalName(mod_name); });
226 rejected_mods.clear();
228 unordered_set<string> current_set;
230 TMods conflicting_mods;
232 for (
const auto&
mod : mods) {
238 !allow_multiple_values)) {
246 rejected_mods.push_back(
mod);
252 string message =
"The following modifier is unsupported in this context and will be ignored: " +
mod.GetName() +
".";
256 rejected_mods.push_back(
mod);
261 string message =
"Use of the following modifier in a sequence file is discouraged and the information will be ignored: " +
mod.GetName() +
".";
265 rejected_mods.push_back(
mod);
269 const auto first_occurrence = current_set.insert(canonical_name).second;
272 if (!first_occurrence) {
277 auto it = accepted_mods.
find(canonical_name);
278 if (it != accepted_mods.
end() &&
281 msg =
"Duplicated modifier value detected, ignoring duplicate, no action required: "
282 +
mod.GetName() +
"=" +
mod.GetValue() +
".";
287 if (!allow_multiple_values) {
288 msg =
"Conflicting modifiers detected. Provide one modifier with one value for: " +
mod.GetName() +
".";
292 if (it != accepted_mods.
end()) {
293 conflicting_mods[canonical_name] = it->second;
294 accepted_mods.
erase(it);
296 conflicting_mods[canonical_name].push_back(
mod);
300 accepted_mods[canonical_name].push_back(
mod);
310 fPostMessage(reportMod,
msg, sev, subcode);
316 accepted_mods[canonical_name].push_back(
mod);
319 for (
auto& conflicts : conflicting_mods) {
320 rejected_mods.splice(rejected_mods.end(), conflicts.second);
330 for (
auto& mod_entry : mods) {
331 const auto& canonical_name = mod_entry.first;
332 dest[canonical_name] = mod_entry.second;
337 dest.
insert(make_move_iterator(mods.begin()),
338 make_move_iterator(mods.end()));
342 for (
auto& mod_entry : mods) {
343 const auto& canonical_name = mod_entry.first;
344 auto& dest_mod_list = dest[canonical_name];
346 dest_mod_list.splice(
348 std::move(mod_entry.second));
351 dest_mod_list = std::move(mod_entry.second);
357 for (
auto& mod_entry : mods) {
358 const auto& canonical_name = mod_entry.first;
359 auto& dest_mod_list = dest[canonical_name];
360 if (dest_mod_list.empty()) {
361 dest_mod_list = std::move(mod_entry.second);
365 dest_mod_list.splice(
367 std::move(mod_entry.second));
395 return mod_entry.first;
401 assert(mod_entry.second.size() == 1);
402 return mod_entry.second.front().GetValue();
413 return normalized_name;
426 string normalized = unnormalized;
429 auto new_end = unique(normalized.begin(),
432 return ((a==
'-' || a==
'_' || a==
' ') &&
433 (b==
'-' || b==
'_' || b==
' ')); });
435 normalized.erase(new_end, normalized.end());
436 for (
char& c : normalized) {
437 if (c ==
'_' || c ==
' ') {
455 Apply(mod_handler, bioseq, skipped_mods,
false, fPostMessage);
465 skipped_mods.clear();
475 list<string> applied_mods;
476 for (
const auto& mod_entry : mod_handler.
GetMods()) {
478 bool applied =
false;
479 if (descr_mod_apply.
Apply(mod_entry)) {
481 if (mod_name ==
"secondary-accession"){
484 else if (mod_name ==
"mol-type") {
495 feat_mod_apply.
Apply(mod_entry)) {
508 skipped_mods.insert(skipped_mods.end(),
509 mod_entry.second.begin(),
510 mod_entry.second.end());
512 for (
const auto& modData : mod_entry.second) {
513 string msg =
"Unrecognized modifier: " + modData.GetName() +
".";
519 string msg =
"Unrecognized modifier: " + canonicalName +
".";
523 skipped_mods.insert(skipped_mods.end(),
524 mod_entry.second.begin(),
525 mod_entry.second.end());
536 if (!applied_mods.empty()) {
537 string msg =
"Applied mods: ";
538 for (
const auto& applied_mod : applied_mods) {
539 msg +=
" " + applied_mod;
550 const auto& mod_name = mod_data.
GetName();
551 const auto& mod_value = mod_data.
GetValue();
552 string msg =
"Invalid value: " + mod_name +
"=" + mod_value +
".";
556 skipped_mods.push_back(mod_data);
585 if (mod_name ==
"strand") {
586 x_SetStrand(mod_entry, seq_inst, skipped_mods, fPostMessage);
590 if (mod_name ==
"molecule") {
591 x_SetMolecule(mod_entry, seq_inst, skipped_mods, fPostMessage);
595 if (mod_name ==
"topology") {
596 x_SetTopology(mod_entry, seq_inst, skipped_mods, fPostMessage);
637 seq_inst.
SetMol(it->second);
672 list<string> id_list;
673 for (
const auto&
mod : mod_entry.second) {
674 const auto& vals =
mod.GetValue();
675 list<CTempString> value_sublist;
677 for (
const auto&
val : value_sublist) {
681 id_list.insert(id_list.end(), idrange.
begin(), idrange.
end());
685 id_list.push_back(
value);
690 if (id_list.empty()) {
694 list<CRef<CSeq_id>> secondary_ids;
696 transform(id_list.begin(), id_list.end(), back_inserter(secondary_ids),
697 [](
const string& id_string) {
return Ref(
new CSeq_id(id_string)); });
699 seq_inst.
SetHist().SetReplaces().SetIds() = std::move(secondary_ids);
709 m_pMessageListener(pMessageListener)
758 size_t start_pos = 0;
759 while(start_pos < title.
size()) {
760 size_t lb_pos, end_pos, eq_pos;
763 if (eq_pos < end_pos) {
764 if ((lb_pos > start_pos) ) {
766 if (!left_remainder.empty()) {
767 if (!remainder.empty()) {
768 remainder.append(
" ");
770 remainder.append(left_remainder);
775 mods.emplace_back(name,
value);
777 start_pos = end_pos+1;
781 if (!right_remainder.empty()) {
782 if (!remainder.empty()) {
783 remainder.append(
" ");
785 remainder.append(right_remainder);
795 size_t start_pos = 0;
796 while (start_pos < title.
size()) {
797 size_t lb_pos, end_pos, eq_pos;
800 if (eq_pos < end_pos) {
803 start_pos = end_pos+1;
818 const char* s = line.
data() + start;
820 int num_unmatched_left_brackets = 0;
821 while (
i < line.
size())
826 num_unmatched_left_brackets++;
827 if (num_unmatched_left_brackets == 1)
838 if (num_unmatched_left_brackets == 1)
841 return (eq_pos<stop);
844 if (num_unmatched_left_brackets == 0) {
849 num_unmatched_left_brackets--;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void transform(Container &c, UnaryFunction *op)
IObjtoolsListener * m_pMessageListener
CDefaultModErrorReporter(const string &seqId, int lineNum, IObjtoolsListener *pMessageListener)
void operator()(const CModData &mod, const string &msg, EDiagSev sev, EModSubcode subcode)
bool Apply(const TModEntry &mod_entry)
bool Apply(const TModEntry &mod_entry)
static CLineErrorEx * Create(EProblem eProblem, EDiagSev eSeverity, int code, int subcode, const std::string &strSeqId, unsigned int uLine, const std::string &strErrorMessage=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), const TVecOfLines &vecOfOtherLines=TVecOfLines())
Use this because the constructor is protected.
static void Apply(const CModHandler &mod_handler, CBioseq &bioseq, TSkippedMods &skipped_mods, FPostMessage fPostMessage=nullptr)
static void x_SetMoleculeFromMolType(const TModEntry &mod_entry, CSeq_inst &seq_inst)
static void x_ReportInvalidValue(const CModData &mod_data, TSkippedMods &skipped_mods, FPostMessage fPostMessage)
static void x_SetStrand(const TModEntry &mod_entry, CSeq_inst &seq_inst, TSkippedMods &skipped_mods, FPostMessage fPostMessage)
static const string & x_GetModName(const TModEntry &mod_entry)
static void x_SetHist(const TModEntry &mod_entry, CSeq_inst &seq_inst)
static void x_SetMolecule(const TModEntry &mod_entry, CSeq_inst &seq_inst, TSkippedMods &skipped_mods, FPostMessage fPostMessage)
list< CModData > TSkippedMods
static bool x_TrySeqInstMod(const TModEntry &mod_entry, CSeq_inst &seq_inst, TSkippedMods &skipped_mods, FPostMessage fPostMessage)
CModHandler::FReportError FReportError
CModHandler::TModEntry TModEntry
static const string & x_GetModValue(const TModEntry &mod_entry)
static void x_SetTopology(const TModEntry &mod_entry, CSeq_inst &seq_inst, TSkippedMods &skipped_mods, FPostMessage fPostMessage)
const string & GetValue(void) const
const string & GetName(void) const
void SetExcludedMods(const vector< string > &excluded_mods)
function< void(const CModData &mod, const string &message, EDiagSev severity, EModSubcode subcode)> FReportError
static bool x_MultipleValuesAllowed(const string &canonical_name)
static const string & GetCanonicalName(const TModEntry &mod_entry)
void SetIgnoredMods(const list< string > &ignored_mods)
TNameSet m_IgnoredModifiers
const TMods & GetMods(void) const
list< CModData > TModList
void AddMods(const TModList &mods, EHandleExisting handle_existing, TModList &rejected_mods, FReportError fReportError=nullptr)
TNameSet m_ExcludedModifiers
static const TNameSet sm_MultipleValuesForbidden
static const TNameSet sm_DeprecatedModifiers
static const string & AssertReturnSingleValue(const TModEntry &mod_entry)
static string x_GetNormalizedString(const string &name)
unordered_set< string > TNameSet
void SetMods(const TMods &mods)
TMods::value_type TModEntry
void x_SaveMods(TMods &&mods, EHandleExisting handle_existing, TMods &dest)
static bool x_IsDeprecated(const string &canonical_name)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
CModHandler::TModList TModList
static bool HasMods(const CTempString &title)
static bool x_FindBrackets(const CTempString &line, size_t &start, size_t &stop, size_t &eq_pos)
static void Apply(const CTempString &title, TModList &mods, string &remainder)
@ eProblem_GeneralParsingError
const_iterator end() const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
void Warning(CExceptionArgs_Base &args)
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
const_iterator end(void) const
const_iterator begin(void) const
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
const char * data(void) const
Return a pointer to the array represented.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
size_type size(void) const
Return the length of the represented array.
static string & ToLower(string &str)
Convert string to lower case – string& version.
static const size_type npos
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
void SetHist(THist &value)
Assign a value to Hist data member.
void SetInst(TInst &value)
Assign a value to Inst data member.
void SetTopology(TTopology value)
Assign a value to Topology data member.
EMol
molecule class in living organism
void SetStrand(TStrand value)
Assign a value to Strand data member.
void SetMol(TMol value)
Assign a value to Mol data member.
@ eTopology_tandem
some part of tandem repeat
@ eMol_na
just a nucleic acid
@ eStrand_other
default ds for DNA, ss for RNA, pept
@ eStrand_ds
double strand
@ eStrand_ss
single strand
static const unordered_map< string, CSeq_inst::ETopology > s_TopologyStringToEnum
static string s_GetNormalizedString(const string &unnormalized)
static const unordered_map< string, string > s_ModNameMap
static const unordered_map< string, CSeq_inst::EMol > s_MolStringToEnum
static const unordered_map< string, CSeq_inst::EStrand > s_StrandStringToEnum
const unordered_map< CMolInfo::TBiomol, CSeq_inst::EMol > g_BiomolEnumToMolEnum
const TStringToEnumMap< CMolInfo::TBiomol > g_BiomolStringToEnum
string g_GetNormalizedModVal(const string &unnormalized)
const GenericPointer< typename T::ValueType > T2 value
@ eModSubcode_Unrecognized
@ eModSubcode_ConflictingValues
@ eModSubcode_InvalidValue
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)