NCBI C++ ToolKit
mod_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: mod_reader.cpp 101390 2023-12-07 22:42:38Z kans $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Justin Foley
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
35 #include <objects/seq/Bioseq.hpp>
36 #include <objects/seq/MolInfo.hpp>
37 #include <objects/seq/Seq_inst.hpp>
38 #include <objects/seq/Seq_hist.hpp>
46 #include <map>
47 #include <unordered_map>
48 #include <unordered_set>
49 #include <cassert>
50 //#include <util/compile_time.hpp>
51 
52 #include "mod_to_enum.hpp"
53 #include "descr_mod_apply.hpp"
54 #include "feature_mod_apply.hpp"
55 
58 
59 //MAKE_CONST_MAP(s_ModNameMap, NStr::eCase, const char*, const char*,
60 
61 static const unordered_map<string, string> s_ModNameMap =
62 {{"top","topology"},
63  {"mol","molecule"},
64  {"moltype", "mol-type"},
65  {"fwd-pcr-primer-name", "fwd-primer-name"},
66  {"fwd-pcr-primer-names", "fwd-primer-name"},
67  {"fwd-primer-names", "fwd-primer-name"},
68  {"fwd-pcr-primer-seq","fwd-primer-seq"},
69  {"fwd-pcr-primer-seqs","fwd-primer-seq"},
70  {"fwd-primer-seqs","fwd-primer-seq"},
71  {"rev-pcr-primer-name", "rev-primer-name"},
72  {"rev-pcr-primer-names", "rev-primer-name"},
73  {"rev-primer-names", "rev-primer-name"},
74  {"rev-pcr-primer-seq", "rev-primer-seq"},
75  {"rev-pcr-primer-seqs", "rev-primer-seq"},
76  {"rev-primer-seqs", "rev-primer-seq"},
77  {"org", "taxname"},
78  {"organism", "taxname"},
79  {"div", "division"},
80  {"notes", "note"},
81  {"completedness", "completeness"},
82  {"gene-syn", "gene-synonym"},
83  {"genesyn", "gene-synonym"},
84  {"genesynonym", "gene-synonym"},
85  {"prot", "protein"},
86  {"prot-desc", "protein-desc"},
87  {"function", "activity"},
88  {"secondary", "secondary-accession"},
89  {"secondary-accessions", "secondary-accession"},
90  {"keywords", "keyword"},
91  {"primary", "primary-accession"},
92  {"primary-accessions", "primary-accession"},
93  {"projects", "project"},
94  {"db-xref", "dbxref"},
95  {"pubmed", "pmid"},
96  {"ft-url-mod", "ft-mod"},
97  {"ft-url", "ft-map"},
98  {"geo-loc-name", "country"}
99  };
100 //);
101 
102 
104 {
105  "dosage",
106  "transposon-name",
107  "plastid-name",
108  "insertion-seq-name",
109  "old-lineage",
110  "old-name",
111  "gene",
112  "gene-synonym",
113  "allele",
114  "locus-tag"
115 };
116 
117 
119 {
120  "topology", // Seq-inst
121  "molecule",
122  "strand",
123  "gene", // Gene-ref
124  "allele",
125  "locus-tag",
126  "protein-desc",// Protein-ref
127  "mol-type", // MolInfo descriptor
128  "tech",
129  "completeness",
130  "location", // Biosource descriptor
131  "origin",
132  "focus",
133  "taxname", // Biosource - Org-ref
134  "common",
135  "lineage", // Biosource - Org-ref - OrgName
136  "division",
137  "gcode",
138  "mgcode",
139  "pgcode"
140 };
141 
142 
143 //MAKE_CONST_MAP(s_StrandStringToEnum, NStr::eCase, const char*, CSeq_inst::EStrand,
144 static const unordered_map<string, CSeq_inst::EStrand> s_StrandStringToEnum =
145 {{"single", CSeq_inst::eStrand_ss},
146  {"double", CSeq_inst::eStrand_ds},
147  {"mixed", CSeq_inst::eStrand_mixed},
148  {"other", CSeq_inst::eStrand_other}
149  };
150 //);
151 
152 
153 //MAKE_CONST_MAP(s_MolStringToEnum, NStr::eCase, const char*, CSeq_inst::EMol,
154 static const unordered_map<string, CSeq_inst::EMol> s_MolStringToEnum =
155 {{"dna", CSeq_inst::eMol_dna},
156  {"rna", CSeq_inst::eMol_rna},
157  {"aa", CSeq_inst::eMol_aa},
158  {"na", CSeq_inst::eMol_na},
159  {"other", CSeq_inst::eMol_other}
160  };
161  //);
162 
163 
164 //MAKE_CONST_MAP(s_TopologyStringToEnum, NStr::eCase, const char*, CSeq_inst::ETopology,
165 static const unordered_map<string, CSeq_inst::ETopology> s_TopologyStringToEnum =
166 {{"linear", CSeq_inst::eTopology_linear},
167  {"circular", CSeq_inst::eTopology_circular},
168  {"tandem", CSeq_inst::eTopology_tandem},
169  {"other", CSeq_inst::eTopology_other}
170  };
171  //);
172 
173 /*
174 MAKE_CONST_MAP(s_BiomolEnumToMolEnum, NStr::eNocase, CMolInfo::TBiomol, CSeq_inst::EMol,
175 {{ CMolInfo::eBiomol_genomic, CSeq_inst::eMol_dna},
176  { CMolInfo::eBiomol_pre_RNA, CSeq_inst::eMol_rna},
177  { CMolInfo::eBiomol_mRNA, CSeq_inst::eMol_rna },
178  { CMolInfo::eBiomol_rRNA, CSeq_inst::eMol_rna},
179  { CMolInfo::eBiomol_tRNA, CSeq_inst::eMol_rna},
180  { CMolInfo::eBiomol_snRNA, CSeq_inst::eMol_rna},
181  { CMolInfo::eBiomol_scRNA, CSeq_inst::eMol_rna},
182  { CMolInfo::eBiomol_genomic_mRNA, CSeq_inst::eMol_rna },
183  { CMolInfo::eBiomol_cRNA, CSeq_inst::eMol_rna },
184  { CMolInfo::eBiomol_snoRNA, CSeq_inst::eMol_rna},
185  { CMolInfo::eBiomol_transcribed_RNA, CSeq_inst::eMol_rna},
186  { CMolInfo::eBiomol_ncRNA, CSeq_inst::eMol_rna},
187  { CMolInfo::eBiomol_tmRNA, CSeq_inst::eMol_rna},
188  { CMolInfo::eBiomol_peptide, CSeq_inst::eMol_aa},
189  { CMolInfo::eBiomol_other_genetic, CSeq_inst::eMol_other},
190  { CMolInfo::eBiomol_other, CSeq_inst::eMol_other}
191 });
192 */
193 
194 
196 
197 
198 void CModHandler::SetExcludedMods(const vector<string>& excluded_mods)
199 {
200  m_ExcludedModifiers.clear();
201  transform(excluded_mods.begin(), excluded_mods.end(),
202  inserter(m_ExcludedModifiers, m_ExcludedModifiers.end()),
203  [](const string& mod_name) { return GetCanonicalName(mod_name); });
204 }
205 
206 void CModHandler::SetIgnoredMods(const list<string>& ignored_mods)
207 {
208  m_IgnoredModifiers.clear();
209  transform(ignored_mods.begin(), ignored_mods.end(),
210  inserter(m_IgnoredModifiers, m_IgnoredModifiers.end()),
211  [](const string& mod_name) { return GetCanonicalName(mod_name); });
212 }
213 
214 
215 void CModHandler::SetMods(const TMods& mods)
216 {
217  m_Mods = mods;
218 }
219 
220 
222  EHandleExisting handle_existing,
223  TModList& rejected_mods,
224  FReportError fPostMessage)
225 {
226  rejected_mods.clear();
227 
228  unordered_set<string> current_set;
229  TMods accepted_mods;
230  TMods conflicting_mods;
231 
232  for (const auto& mod : mods) {
233  const auto& canonical_name = GetCanonicalName(mod.GetName());
234  const auto allow_multiple_values = x_MultipleValuesAllowed(canonical_name);
235  // Don't want to check for errors if we're not going to keep the modifier
236  if (handle_existing == ePreserve ||
237  (handle_existing == eAppendPreserve &&
238  !allow_multiple_values)) {
239  if (m_Mods.find(canonical_name) != m_Mods.end()) {
240  continue;
241  }
242  }
243 
244  if (m_IgnoredModifiers.find(canonical_name) !=
245  m_IgnoredModifiers.end()) {
246  rejected_mods.push_back(mod);
247  continue;
248  }
249 
250  if (m_ExcludedModifiers.find(canonical_name) !=
251  m_ExcludedModifiers.end()) {
252  string message = "The following modifier is unsupported in this context and will be ignored: " + mod.GetName() + ".";
253  if (fPostMessage) {
254  fPostMessage(mod, message, eDiag_Warning, eModSubcode_Excluded);
255  }
256  rejected_mods.push_back(mod);
257  continue;
258  }
259 
260  if (x_IsDeprecated(canonical_name)) {
261  string message = "Use of the following modifier in a sequence file is discouraged and the information will be ignored: " + mod.GetName() + ".";
262  if (fPostMessage) {
263  fPostMessage(mod, message, eDiag_Warning, eModSubcode_Deprecated);
264  }
265  rejected_mods.push_back(mod);
266  continue;
267  }
268 
269  const auto first_occurrence = current_set.insert(canonical_name).second;
270 
271  // Put this in its own method
272  if (!first_occurrence) {
273  string msg;
274  EDiagSev sev;
275  EModSubcode subcode;
276 
277  auto it = accepted_mods.find(canonical_name);
278  if (it != accepted_mods.end() &&
279  NStr::EqualNocase(it->second.front().GetValue(),
280  mod.GetValue())) {
281  msg = "Duplicated modifier value detected, ignoring duplicate, no action required: "
282  + mod.GetName() + "=" + mod.GetValue() + ".";
283  sev = eDiag_Warning;
284  subcode = eModSubcode_Duplicate;
285  }
286  else
287  if (!allow_multiple_values) {
288  msg = "Conflicting modifiers detected. Provide one modifier with one value for: " + mod.GetName() + ".";
289  sev = eDiag_Error;
291 
292  if (it != accepted_mods.end()) {
293  conflicting_mods[canonical_name] = it->second;
294  accepted_mods.erase(it);
295  }
296  conflicting_mods[canonical_name].push_back(mod);
297  }
298  else
299  {
300  accepted_mods[canonical_name].push_back(mod);
301  continue;
302  }
303 
304  CModData reportMod =
305  (subcode == eModSubcode_Duplicate) ?
306  mod :
307  CModData( mod.GetName(), kEmptyStr);
308 
309  if (fPostMessage) {
310  fPostMessage(reportMod, msg, sev, subcode);
311  continue;
312  }
313  NCBI_THROW(CModReaderException, eMultipleValuesForbidden, msg);
314  }
315 
316  accepted_mods[canonical_name].push_back(mod);
317  }
318 
319  for (auto& conflicts : conflicting_mods) {
320  rejected_mods.splice(rejected_mods.end(), conflicts.second);
321  }
322 
323  x_SaveMods(std::move(accepted_mods), handle_existing, m_Mods);
324 }
325 
326 
327 void CModHandler::x_SaveMods(TMods&& mods, EHandleExisting handle_existing, TMods& dest)
328 {
329  if (handle_existing == eReplace) {
330  for (auto& mod_entry : mods) {
331  const auto& canonical_name = mod_entry.first;
332  dest[canonical_name] = mod_entry.second;
333  }
334  }
335  else
336  if (handle_existing == ePreserve) {
337  dest.insert(make_move_iterator(mods.begin()),
338  make_move_iterator(mods.end()));
339  }
340  else
341  if (handle_existing == eAppendReplace) {
342  for (auto& mod_entry : mods) {
343  const auto& canonical_name = mod_entry.first;
344  auto& dest_mod_list = dest[canonical_name];
345  if (x_MultipleValuesAllowed(canonical_name)){
346  dest_mod_list.splice(
347  dest_mod_list.end(),
348  std::move(mod_entry.second));
349  }
350  else {
351  dest_mod_list = std::move(mod_entry.second);
352  }
353  }
354  }
355  else
356  if (handle_existing == eAppendPreserve) {
357  for (auto& mod_entry : mods) {
358  const auto& canonical_name = mod_entry.first;
359  auto& dest_mod_list = dest[canonical_name];
360  if (dest_mod_list.empty()) {
361  dest_mod_list = std::move(mod_entry.second);
362  }
363  else
364  if (x_MultipleValuesAllowed(canonical_name)){
365  dest_mod_list.splice(
366  dest_mod_list.end(),
367  std::move(mod_entry.second));
368  }
369  }
370  }
371 }
372 
373 
374 bool CModHandler::x_MultipleValuesAllowed(const string& canonical_name)
375 {
376  return (sm_MultipleValuesForbidden.find(canonical_name) ==
378 }
379 
380 
382 {
383  return m_Mods;
384 }
385 
386 
388 {
389  m_Mods.clear();
390 }
391 
392 
393 const string& CModHandler::GetCanonicalName(const TModEntry& mod_entry)
394 {
395  return mod_entry.first;
396 }
397 
398 
399 const string& CModHandler::AssertReturnSingleValue(const TModEntry& mod_entry)
400 {
401  assert(mod_entry.second.size() == 1);
402  return mod_entry.second.front().GetValue();
403 }
404 
405 string CModHandler::GetCanonicalName(const string& name)
406 {
407  const auto normalized_name = x_GetNormalizedString(name);
408  const auto it = s_ModNameMap.find(normalized_name);
409  if (it != s_ModNameMap.end()) {
410  return it->second;
411  }
412 
413  return normalized_name;
414 }
415 
416 
417 bool CModHandler::x_IsDeprecated(const string& canonical_name)
418 {
419  return (sm_DeprecatedModifiers.find(canonical_name) !=
420  sm_DeprecatedModifiers.end());
421 }
422 
423 
424 static string s_GetNormalizedString(const string& unnormalized)
425 {
426  string normalized = unnormalized;
427  NStr::ToLower(normalized);
428  NStr::TruncateSpacesInPlace(normalized);
429  auto new_end = unique(normalized.begin(),
430  normalized.end(),
431  [](char a, char b) {
432  return ((a=='-' || a=='_' || a==' ') &&
433  (b=='-' || b=='_' || b==' ')); });
434 
435  normalized.erase(new_end, normalized.end());
436  for (char& c : normalized) {
437  if (c == '_' || c == ' ') {
438  c = '-';
439  }
440  }
441  return normalized;
442 }
443 
444 string CModHandler::x_GetNormalizedString(const string& name)
445 {
446  return s_GetNormalizedString(name);
447 }
448 
449 
450 void CModAdder::Apply(const CModHandler& mod_handler,
451  CBioseq& bioseq,
452  TSkippedMods& skipped_mods,
453  FReportError fPostMessage)
454 {
455  Apply(mod_handler, bioseq, skipped_mods, false, fPostMessage);
456 }
457 
458 
459 void CModAdder::Apply(const CModHandler& mod_handler,
460  CBioseq& bioseq,
461  TSkippedMods& skipped_mods,
462  bool logInfo,
463  FReportError fPostMessage)
464 {
465  skipped_mods.clear();
466 
467  CDescrModApply descr_mod_apply(bioseq,
468  fPostMessage,
469  skipped_mods);
470 
471  CFeatModApply feat_mod_apply(bioseq,
472  fPostMessage,
473  skipped_mods);
474 
475  list<string> applied_mods;
476  for (const auto& mod_entry : mod_handler.GetMods()) {
477  try {
478  bool applied = false;
479  if (descr_mod_apply.Apply(mod_entry)) {
480  const string& mod_name = x_GetModName(mod_entry);
481  if (mod_name == "secondary-accession"){
482  x_SetHist(mod_entry, bioseq.SetInst());
483  }
484  else if (mod_name == "mol-type") {
485  // mol-type appears before molecule in the default-ordered
486  // map keys. Therefore, if both mol-type and molecule are
487  // specified, molecule will take precedence over (or, more precisly, overwrite)
488  // the information extracted from mol-type when setting Seq-inst::mol
489  x_SetMoleculeFromMolType(mod_entry, bioseq.SetInst());
490  }
491  applied = true;
492  }
493  else
494  if (x_TrySeqInstMod(mod_entry, bioseq.SetInst(), skipped_mods, fPostMessage) ||
495  feat_mod_apply.Apply(mod_entry)) {
496  applied = true;
497  }
498 
499  if (applied) {
500  if (logInfo) {
501  applied_mods.push_back(x_GetModName(mod_entry));
502  }
503  continue;
504  }
505 
506  // Report unrecognised modifier
507  if (fPostMessage) {
508  skipped_mods.insert(skipped_mods.end(),
509  mod_entry.second.begin(),
510  mod_entry.second.end());
511 
512  for (const auto& modData : mod_entry.second) {
513  string msg = "Unrecognized modifier: " + modData.GetName() + ".";
514  fPostMessage(modData, msg, eDiag_Warning, eModSubcode_Unrecognized);
515  }
516  continue;
517  }
518  string canonicalName = x_GetModName(mod_entry);
519  string msg = "Unrecognized modifier: " + canonicalName + ".";
520  NCBI_THROW(CModReaderException, eUnknownModifier, msg);
521  }
522  catch(const CModReaderException& e) {
523  skipped_mods.insert(skipped_mods.end(),
524  mod_entry.second.begin(),
525  mod_entry.second.end());
526  if (fPostMessage) {
527  string canonicalName = x_GetModName(mod_entry);
528  fPostMessage(CModData( canonicalName, kEmptyStr), e.GetMsg(), eDiag_Error, eModSubcode_Undefined);
529  }
530  else {
531  throw; // rethrow e
532  }
533  }
534  }
535 
536  if (!applied_mods.empty()) {
537  string msg = "Applied mods: ";
538  for (const auto& applied_mod : applied_mods) {
539  msg += " " + applied_mod;
540  }
541  fPostMessage(CModData("",""), msg, eDiag_Info, eModSubcode_Applied);
542  }
543 }
544 
545 
547  TSkippedMods& skipped_mods,
548  FReportError fPostMessage)
549 {
550  const auto& mod_name = mod_data.GetName();
551  const auto& mod_value = mod_data.GetValue();
552  string msg = "Invalid value: " + mod_name + "=" + mod_value + ".";
553 
554  if (fPostMessage) {
555  fPostMessage(mod_data, msg, eDiag_Error, eModSubcode_InvalidValue);
556  skipped_mods.push_back(mod_data);
557  return;
558  }
559 
560  NCBI_THROW(CModReaderException, eInvalidValue, msg);
561 }
562 
563 
564 
565 const string& CModAdder::x_GetModName(const TModEntry& mod_entry)
566 {
567  return CModHandler::GetCanonicalName(mod_entry);
568 }
569 
570 
571 const string& CModAdder::x_GetModValue(const TModEntry& mod_entry)
572 {
573  return CModHandler::AssertReturnSingleValue(mod_entry);
574 }
575 
576 
578  const TModEntry& mod_entry,
579  CSeq_inst& seq_inst,
580  TSkippedMods& skipped_mods,
581  FReportError fPostMessage)
582 {
583  const auto& mod_name = x_GetModName(mod_entry);
584 
585  if (mod_name == "strand") {
586  x_SetStrand(mod_entry, seq_inst, skipped_mods, fPostMessage);
587  return true;
588  }
589 
590  if (mod_name == "molecule") {
591  x_SetMolecule(mod_entry, seq_inst, skipped_mods, fPostMessage);
592  return true;
593  }
594 
595  if (mod_name == "topology") {
596  x_SetTopology(mod_entry, seq_inst, skipped_mods, fPostMessage);
597  return true;
598  }
599 
600 // Note that we do not check for the 'secondary-accession' modifier here.
601 // secondary-accession also modifies the GB_block descriptor
602 // The check for secondary-accession and any resulting call
603 // to x_SetHist is performed before x_TrySeqInstMod
604 // is invoked.
605 
606  return false;
607 }
608 
609 
610 
611 void CModAdder::x_SetStrand(const TModEntry& mod_entry,
612  CSeq_inst& seq_inst,
613  TSkippedMods& skipped_mods,
614  FReportError fPostMessage)
615 {
616  string value = x_GetModValue(mod_entry);
617  const auto it = s_StrandStringToEnum.find(g_GetNormalizedModVal(value));
618  if (it == s_StrandStringToEnum.end()) {
619  x_ReportInvalidValue(mod_entry.second.front(), skipped_mods, fPostMessage);
620  return;
621  }
622  seq_inst.SetStrand(it->second);
623 }
624 
625 
626 void CModAdder::x_SetMolecule(const TModEntry& mod_entry,
627  CSeq_inst& seq_inst,
628  TSkippedMods& skipped_mods,
629  FReportError fPostMessage)
630 {
631  string value = x_GetModValue(mod_entry);
632  const auto it = s_MolStringToEnum.find(g_GetNormalizedModVal(value));
633  if (it == s_MolStringToEnum.end()) {
634  x_ReportInvalidValue(mod_entry.second.front(), skipped_mods, fPostMessage);
635  return;
636  }
637  seq_inst.SetMol(it->second);
638 }
639 
640 
641 void CModAdder::x_SetMoleculeFromMolType(const TModEntry& mod_entry, CSeq_inst& seq_inst)
642 {
643  string value = x_GetModValue(mod_entry);
645  if (it == g_BiomolStringToEnum.end()) {
646  // No need to report an error here.
647  // The error is reported in x_SetMolInfoType
648  return;
649  }
650  CSeq_inst::EMol mol = g_BiomolEnumToMolEnum.at(it->second);
651  seq_inst.SetMol(mol);
652 }
653 
654 
655 void CModAdder::x_SetTopology(const TModEntry& mod_entry,
656  CSeq_inst& seq_inst,
657  TSkippedMods& skipped_mods,
658  FReportError fPostMessage)
659 {
660  string value = x_GetModValue(mod_entry);
661  const auto it = s_TopologyStringToEnum.find(g_GetNormalizedModVal(value));
662  if (it == s_TopologyStringToEnum.end()) {
663  x_ReportInvalidValue(mod_entry.second.front(), skipped_mods, fPostMessage);
664  return;
665  }
666  seq_inst.SetTopology(it->second);
667 }
668 
669 
670 void CModAdder::x_SetHist(const TModEntry& mod_entry, CSeq_inst& seq_inst)
671 {
672  list<string> id_list;
673  for (const auto& mod : mod_entry.second) {
674  const auto& vals = mod.GetValue();
675  list<CTempString> value_sublist;
676  NStr::Split(vals, ",; \t", value_sublist, NStr::fSplit_Tokenize);
677  for (const auto& val : value_sublist) {
679  try {
680  SSeqIdRange idrange(value);
681  id_list.insert(id_list.end(), idrange.begin(), idrange.end());
682  }
683  catch (...)
684  {
685  id_list.push_back(value);
686  }
687  }
688  }
689 
690  if (id_list.empty()) {
691  return;
692  }
693 
694  list<CRef<CSeq_id>> secondary_ids;
695  // try catch statement
696  transform(id_list.begin(), id_list.end(), back_inserter(secondary_ids),
697  [](const string& id_string) { return Ref(new CSeq_id(id_string)); });
698 
699  seq_inst.SetHist().SetReplaces().SetIds() = std::move(secondary_ids);
700 }
701 
702 
704  const string& seqId,
705  int lineNum,
706  IObjtoolsListener* pMessageListener)
707  : m_SeqId(seqId),
708  m_LineNum(lineNum),
709  m_pMessageListener(pMessageListener)
710  {}
711 
712 
714  const CModData& mod,
715  const string& msg,
716  EDiagSev sev,
717  EModSubcode subcode)
718 {
719  if (!m_pMessageListener) {
720  if (sev == eDiag_Info) {
721  return;
722  }
723  if (sev == eDiag_Warning) {
724  ERR_POST(Warning << msg);
725  return;
726  }
727  NCBI_THROW2(CObjReaderParseException, eFormat, msg, 0);
728  }
729 
730 
731  if (!m_pMessageListener->SevEnabled(sev)) {
732  return;
733  }
734 
738  sev,
740  subcode,
741  m_SeqId,
742  m_LineNum,
743  msg,
744  "",
745  mod.GetName(),
746  mod.GetValue()));
747 
748  if (!m_pMessageListener->PutMessage(*pErr)) {
749  NCBI_THROW2(CObjReaderParseException, eFormat, msg, 0);
750  }
751 }
752 
753 
754 void CTitleParser::Apply(const CTempString& title, TModList& mods, string& remainder)
755 {
756  mods.clear();
757  remainder.clear();
758  size_t start_pos = 0;
759  while(start_pos < title.size()) {
760  size_t lb_pos, end_pos, eq_pos;
761  lb_pos = start_pos;
762  if (x_FindBrackets(title, lb_pos, end_pos, eq_pos)) {
763  if (eq_pos < end_pos) {
764  if ((lb_pos > start_pos) ) {
765  auto left_remainder = NStr::TruncateSpaces_Unsafe(title.substr(start_pos, lb_pos-start_pos));
766  if (!left_remainder.empty()) {
767  if (!remainder.empty()) {
768  remainder.append(" ");
769  }
770  remainder.append(left_remainder);
771  }
772  }
773  auto name = NStr::TruncateSpaces_Unsafe(title.substr(lb_pos+1, eq_pos-(lb_pos+1)));
774  auto value = NStr::TruncateSpaces_Unsafe(title.substr(eq_pos+1, end_pos-(eq_pos+1)));
775  mods.emplace_back(name, value);
776  }
777  start_pos = end_pos+1;
778  }
779  else {
780  auto right_remainder = NStr::TruncateSpaces_Unsafe(title.substr(start_pos));
781  if (!right_remainder.empty()) {
782  if (!remainder.empty()) {
783  remainder.append(" ");
784  }
785  remainder.append(right_remainder);
786  }
787  return;
788  }
789  }
790 }
791 
792 
794 {
795  size_t start_pos = 0;
796  while (start_pos < title.size()) {
797  size_t lb_pos, end_pos, eq_pos;
798  lb_pos = start_pos;
799  if (x_FindBrackets(title, lb_pos, end_pos, eq_pos)) {
800  if (eq_pos < end_pos) {
801  return true;
802  }
803  start_pos = end_pos+1;
804  }
805  else {
806  return false;
807  }
808  }
809  return false;
810 }
811 
812 
813 bool CTitleParser::x_FindBrackets(const CTempString& line, size_t& start, size_t& stop, size_t& eq_pos)
814 { // Copied from CSourceModParser
815  size_t i = start;
816 
817  eq_pos = CTempString::npos;
818  const char* s = line.data() + start;
819 
820  int num_unmatched_left_brackets = 0;
821  while (i < line.size())
822  {
823  switch (*s)
824  {
825  case '[':
826  num_unmatched_left_brackets++;
827  if (num_unmatched_left_brackets == 1)
828  {
829  start = i;
830  }
831  break;
832  case '=':
833  if (num_unmatched_left_brackets > 0 && eq_pos == CTempString::npos) {
834  eq_pos = i;
835  }
836  break;
837  case ']':
838  if (num_unmatched_left_brackets == 1)
839  {
840  stop = i;
841  return (eq_pos<stop);
842  }
843  else
844  if (num_unmatched_left_brackets == 0) {
845  return false;
846  }
847  else
848  {
849  num_unmatched_left_brackets--;
850  }
851  }
852  i++; s++;
853  }
854  return false;
855 };
856 
857 
860 
861 
862 
#define static
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void transform(Container &c, UnaryFunction *op)
Definition: chainer.hpp:86
AutoPtr –.
Definition: ncbimisc.hpp:401
IObjtoolsListener * m_pMessageListener
Definition: mod_reader.hpp:226
CDefaultModErrorReporter(const string &seqId, int lineNum, IObjtoolsListener *pMessageListener)
Definition: mod_reader.cpp:703
void operator()(const CModData &mod, const string &msg, EDiagSev sev, EModSubcode subcode)
Definition: mod_reader.cpp:713
bool Apply(const TModEntry &mod_entry)
bool Apply(const TModEntry &mod_entry)
static CLineErrorEx * Create(EProblem eProblem, EDiagSev eSeverity, int code, int subcode, const std::string &strSeqId, unsigned int uLine, const std::string &strErrorMessage=string(""), const std::string &strFeatureName=string(""), const std::string &strQualifierName=string(""), const std::string &strQualifierValue=string(""), const TVecOfLines &vecOfOtherLines=TVecOfLines())
Use this because the constructor is protected.
Definition: line_error.cpp:103
static void Apply(const CModHandler &mod_handler, CBioseq &bioseq, TSkippedMods &skipped_mods, FPostMessage fPostMessage=nullptr)
Definition: mod_reader.cpp:450
static void x_SetMoleculeFromMolType(const TModEntry &mod_entry, CSeq_inst &seq_inst)
Definition: mod_reader.cpp:641
static void x_ReportInvalidValue(const CModData &mod_data, TSkippedMods &skipped_mods, FPostMessage fPostMessage)
Definition: mod_reader.cpp:546
static void x_SetStrand(const TModEntry &mod_entry, CSeq_inst &seq_inst, TSkippedMods &skipped_mods, FPostMessage fPostMessage)
Definition: mod_reader.cpp:611
static const string & x_GetModName(const TModEntry &mod_entry)
Definition: mod_reader.cpp:565
static void x_SetHist(const TModEntry &mod_entry, CSeq_inst &seq_inst)
Definition: mod_reader.cpp:670
static void x_SetMolecule(const TModEntry &mod_entry, CSeq_inst &seq_inst, TSkippedMods &skipped_mods, FPostMessage fPostMessage)
Definition: mod_reader.cpp:626
list< CModData > TSkippedMods
Definition: mod_reader.hpp:155
static bool x_TrySeqInstMod(const TModEntry &mod_entry, CSeq_inst &seq_inst, TSkippedMods &skipped_mods, FPostMessage fPostMessage)
Definition: mod_reader.cpp:577
CModHandler::FReportError FReportError
Definition: mod_reader.hpp:156
CModHandler::TModEntry TModEntry
Definition: mod_reader.hpp:154
static const string & x_GetModValue(const TModEntry &mod_entry)
Definition: mod_reader.cpp:571
static void x_SetTopology(const TModEntry &mod_entry, CSeq_inst &seq_inst, TSkippedMods &skipped_mods, FPostMessage fPostMessage)
Definition: mod_reader.cpp:655
const string & GetValue(void) const
Definition: mod_reader.hpp:76
const string & GetName(void) const
Definition: mod_reader.hpp:72
void SetExcludedMods(const vector< string > &excluded_mods)
Definition: mod_reader.cpp:198
function< void(const CModData &mod, const string &message, EDiagSev severity, EModSubcode subcode)> FReportError
Definition: mod_reader.hpp:105
static bool x_MultipleValuesAllowed(const string &canonical_name)
Definition: mod_reader.cpp:374
static const string & GetCanonicalName(const TModEntry &mod_entry)
Definition: mod_reader.cpp:393
void SetIgnoredMods(const list< string > &ignored_mods)
Definition: mod_reader.cpp:206
TNameSet m_IgnoredModifiers
Definition: mod_reader.hpp:140
const TMods & GetMods(void) const
Definition: mod_reader.cpp:381
list< CModData > TModList
Definition: mod_reader.hpp:94
void AddMods(const TModList &mods, EHandleExisting handle_existing, TModList &rejected_mods, FReportError fReportError=nullptr)
Definition: mod_reader.cpp:221
void Clear(void)
Definition: mod_reader.cpp:387
TNameSet m_ExcludedModifiers
Definition: mod_reader.hpp:139
static const TNameSet sm_MultipleValuesForbidden
Definition: mod_reader.hpp:137
static const TNameSet sm_DeprecatedModifiers
Definition: mod_reader.hpp:138
static const string & AssertReturnSingleValue(const TModEntry &mod_entry)
Definition: mod_reader.cpp:399
static string x_GetNormalizedString(const string &name)
Definition: mod_reader.cpp:444
unordered_set< string > TNameSet
Definition: mod_reader.hpp:135
void SetMods(const TMods &mods)
Definition: mod_reader.cpp:215
TMods::value_type TModEntry
Definition: mod_reader.hpp:104
void x_SaveMods(TMods &&mods, EHandleExisting handle_existing, TMods &dest)
Definition: mod_reader.cpp:327
static bool x_IsDeprecated(const string &canonical_name)
Definition: mod_reader.cpp:417
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CModHandler::TModList TModList
Definition: mod_reader.hpp:233
static bool HasMods(const CTempString &title)
Definition: mod_reader.cpp:793
static bool x_FindBrackets(const CTempString &line, size_t &start, size_t &stop, size_t &eq_pos)
Definition: mod_reader.cpp:813
static void Apply(const CTempString &title, TModList &mods, string &remainder)
Definition: mod_reader.cpp:754
@ eProblem_GeneralParsingError
Definition: line_error.hpp:106
virtual bool SevEnabled(EDiagSev severity) const
Definition: listener.cpp:43
virtual bool PutMessage(const IObjtoolsMessage &message)=0
void erase(iterator pos)
Definition: map.hpp:167
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
@ eDiag_Info
Informational message.
Definition: ncbidiag.hpp:651
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
Definition: ncbiexpt.hpp:1754
const_iterator end(void) const
Definition: Seq_id.hpp:979
const_iterator begin(void) const
Definition: Seq_id.hpp:977
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3191
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:313
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
Definition: tempstr.hpp:776
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
size_type size(void) const
Return the length of the represented array.
Definition: tempstr.hpp:327
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
static const size_type npos
Definition: tempstr.hpp:72
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
void SetHist(THist &value)
Assign a value to Hist data member.
Definition: Seq_inst_.cpp:164
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void SetTopology(TTopology value)
Assign a value to Topology data member.
Definition: Seq_inst_.hpp:739
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
void SetStrand(TStrand value)
Assign a value to Strand data member.
Definition: Seq_inst_.hpp:786
void SetMol(TMol value)
Assign a value to Mol data member.
Definition: Seq_inst_.hpp:621
@ eTopology_tandem
some part of tandem repeat
Definition: Seq_inst_.hpp:125
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
@ eStrand_other
default ds for DNA, ss for RNA, pept
Definition: Seq_inst_.hpp:138
@ eStrand_ds
double strand
Definition: Seq_inst_.hpp:136
@ eStrand_ss
single strand
Definition: Seq_inst_.hpp:135
int i
static const unordered_map< string, CSeq_inst::ETopology > s_TopologyStringToEnum
Definition: mod_reader.cpp:165
static string s_GetNormalizedString(const string &unnormalized)
Definition: mod_reader.cpp:424
static const unordered_map< string, string > s_ModNameMap
Definition: mod_reader.cpp:61
static const unordered_map< string, CSeq_inst::EMol > s_MolStringToEnum
Definition: mod_reader.cpp:154
static const unordered_map< string, CSeq_inst::EStrand > s_StrandStringToEnum
Definition: mod_reader.cpp:144
const unordered_map< CMolInfo::TBiomol, CSeq_inst::EMol > g_BiomolEnumToMolEnum
const TStringToEnumMap< CMolInfo::TBiomol > g_BiomolStringToEnum
string g_GetNormalizedModVal(const string &unnormalized)
Definition: mod_to_enum.cpp:42
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
unsigned int a
Definition: ncbi_localip.c:102
Int mod(Int i, Int j)
Definition: njn_integer.hpp:67
@ eReader_Mods
@ eModSubcode_Unrecognized
@ eModSubcode_Duplicate
@ eModSubcode_Deprecated
@ eModSubcode_Undefined
@ eModSubcode_Excluded
@ eModSubcode_Applied
@ eModSubcode_ConflictingValues
@ eModSubcode_InvalidValue
#define assert(x)
Definition: srv_diag.hpp:58
SSeqIdRange –.
Definition: Seq_id.hpp:895
#define const
Definition: zconf.h:232
Modified on Wed Apr 17 13:10:34 2024 by modify_doxy.py rev. 669887