NCBI C++ ToolKit
descr_mod_apply.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: descr_mod_apply.cpp 100615 2023-08-17 18:09:35Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Justin Foley
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
36 #include <objects/seq/Bioseq.hpp>
39 #include <objects/seq/Seqdesc.hpp>
40 #include <objects/seq/MolInfo.hpp>
56 #include <objects/seq/Pubdesc.hpp>
58 #include <objects/pub/Pub.hpp>
59 
62 #include <map>
63 #include <unordered_map>
64 #include <unordered_set>
65 #include <cassert>
68 #include "mod_to_enum.hpp"
69 #include "descr_mod_apply.hpp"
70 //#include <util/compile_time.hpp>
71 
74 
75 //MAKE_CONST_MAP(s_TechStringToEnum, NStr::eCase, const char*, CMolInfo::TTech,
76 static const unordered_map<string,CMolInfo::TTech> s_TechStringToEnum =
77 { { "?", CMolInfo::eTech_unknown },
78  { "barcode", CMolInfo::eTech_barcode },
79  { "both", CMolInfo::eTech_both },
80  { "compositewgshtgs", CMolInfo::eTech_composite_wgs_htgs },
81  { "concepttrans", CMolInfo::eTech_concept_trans },
82  { "concepttransa", CMolInfo::eTech_concept_trans_a },
83  { "derived", CMolInfo::eTech_derived },
84  { "est", CMolInfo::eTech_est },
85  { "flicdna", CMolInfo::eTech_fli_cdna },
86  { "geneticmap", CMolInfo::eTech_genemap },
87  { "htc", CMolInfo::eTech_htc },
88  { "htgs0", CMolInfo::eTech_htgs_0 },
89  { "htgs1", CMolInfo::eTech_htgs_1 },
90  { "htgs2", CMolInfo::eTech_htgs_2 },
91  { "htgs3", CMolInfo::eTech_htgs_3 },
92  { "physicalmap", CMolInfo::eTech_physmap },
93  { "seqpept", CMolInfo::eTech_seq_pept },
94  { "seqpepthomol", CMolInfo::eTech_seq_pept_homol },
95  { "seqpeptoverlap", CMolInfo::eTech_seq_pept_overlap },
96  { "standard", CMolInfo::eTech_standard },
97  { "sts", CMolInfo::eTech_sts },
98  { "survey", CMolInfo::eTech_survey },
99  { "targeted", CMolInfo::eTech_targeted },
100  { "tsa", CMolInfo::eTech_tsa },
101  { "wgs", CMolInfo::eTech_wgs }
102 };
103 //);
104 
105 
106 //MAKE_CONST_MAP(s_CompletenessStringToEnum, NStr::eCase, const char*, CMolInfo::TCompleteness,
107 
108 static const unordered_map<string,CMolInfo::TCompleteness> s_CompletenessStringToEnum =
109 { { "complete", CMolInfo::eCompleteness_complete },
110  { "hasleft", CMolInfo::eCompleteness_has_left },
111  { "hasright", CMolInfo::eCompleteness_has_right },
112  { "noends", CMolInfo::eCompleteness_no_ends },
113  { "noleft", CMolInfo::eCompleteness_no_left },
114  { "noright", CMolInfo::eCompleteness_no_right },
115  { "partial", CMolInfo::eCompleteness_partial }
116 };
117 //);
118 
119 
121 
123 
124 
126 {
127 public:
128 
130  virtual ~SDescrContainer_Base(void) = default;
131  virtual bool IsSet(void) const = 0;
132  virtual CSeq_descr& SetDescr(void) = 0;
133  };
134 
139 
140  CDescrCache(CBioseq& bioseq);
141 
142  CUser_object& SetDBLink(void);
145  CUser_object& SetFileTrack(void);
146 
147  CGB_block& SetGBblock(void);
148  CMolInfo& SetMolInfo(void);
149  CBioSource& SetBioSource(void);
150 
151  string& SetComment(void);
152  CPubdesc& SetPubdesc(void);
153 
154  TSubtype& SetSubtype(void);
155  TOrgMods& SetOrgMods(void);
157 
158 private:
159  enum EChoice : size_t {
160  eDBLink = 1,
161  eTpa = 2,
163  eMolInfo = 4,
164  eGBblock = 5,
167  };
168 
169  void x_SetUserType(const string& type, CUser_object& user_object);
170 
171  CSeqdesc& x_SetDescriptor(const EChoice eChoice,
172  function<bool(const CSeqdesc&)> f_verify,
173  function<CRef<CSeqdesc>(void)> f_create);
174 
175  CSeqdesc& x_SetDescriptor(const EChoice eChoice,
176  function<bool(const CSeqdesc&)> f_verify,
177  function<CRef<CSeqdesc>(void)> f_create,
178  TDescrContainer* pDescrContainer);
179 
180  TSubtype* m_pSubtype = nullptr;
181  TOrgMods* m_pOrgMods = nullptr;
183  bool m_FirstComment = true;
184  bool m_FirstPubdesc = true;
185  bool m_HasSetTaxid = false;
186  using TMap = unordered_map<EChoice, CRef<CSeqdesc>, hash<underlying_type<EChoice>::type>>;
188 
190  unique_ptr<TDescrContainer> m_pNucProtSetContainer;
191  unique_ptr<TDescrContainer> m_pBioseqContainer;
192 };
193 
194 
196  FReportError fReportError,
197  TSkippedMods& skipped_mods) :
198 m_pDescrCache(new CDescrCache(bioseq)),
199 m_fReportError(fReportError),
200 m_SkippedMods(skipped_mods)
201 {}
202 
203 
205 
206 
207 bool CDescrModApply::Apply(const TModEntry& mod_entry)
208 {
209  if (x_TryBioSourceMod(mod_entry, m_PreserveTaxId)) {
210  return true;
211  }
212 
213  {
214  using TMemFuncPtr = void (CDescrModApply::*)(const TModEntry&);
215 
216 
217  static const unordered_map<string,TMemFuncPtr>
218  s_MethodMap = {{"sra", &CDescrModApply::x_SetDBLink},
219  {"bioproject", &CDescrModApply::x_SetDBLink},
220  {"biosample", &CDescrModApply::x_SetDBLink},
221  {"mol-type", &CDescrModApply::x_SetMolInfoType},
222  {"completeness", &CDescrModApply::x_SetMolInfoCompleteness},
224  {"primary-accession", &CDescrModApply::x_SetTpaAssembly},
225  {"secondary-accession", &CDescrModApply::x_SetGBblockIds},
228  {"comment", &CDescrModApply::x_SetComment},
229  {"pmid", &CDescrModApply::x_SetPMID},
230  {"ft-map", &CDescrModApply::x_SetFileTrack},
231  {"ft-mod", &CDescrModApply::x_SetFileTrack}
232  };
233  const auto& mod_name = x_GetModName(mod_entry);
234  auto it = s_MethodMap.find(mod_name);
235  if (it != s_MethodMap.end()) {
236  auto mem_func_ptr = it->second;
237  (this->*mem_func_ptr)(mod_entry);
238  return true;
239  }
240  }
241  return false;
242 }
243 
244 
245 bool CDescrModApply::x_TryBioSourceMod(const TModEntry& mod_entry, bool& preserve_taxid)
246 {
247  const auto& name = x_GetModName(mod_entry);
248  if (name == "location") {
249  const auto& value = x_GetModValue(mod_entry);
250  static const auto s_GenomeStringToEnum = g_InitModNameGenomeMap();
251  auto it = s_GenomeStringToEnum.find(g_GetNormalizedModVal(value));
252  if (it == s_GenomeStringToEnum.end()) {
253  x_ReportInvalidValue(mod_entry.second.front());
254  return true;
255  }
256  m_pDescrCache->SetBioSource().SetGenome(it->second);
257  return true;
258  }
259 
260  if (name == "origin") {
261  const auto& value = x_GetModValue(mod_entry);
262  static const auto s_OriginStringToEnum = g_InitModNameOriginMap();
263  auto it = s_OriginStringToEnum.find(g_GetNormalizedModVal(value));
264  if (it == s_OriginStringToEnum.end()) {
265  x_ReportInvalidValue(mod_entry.second.front());
266  return true;
267  }
268  m_pDescrCache->SetBioSource().SetOrigin(it->second);
269  return true;
270  }
271 
272  if (name == "focus") {
273  const auto& value = x_GetModValue(mod_entry);
274  if (NStr::EqualNocase(value, "true")) {
275  m_pDescrCache->SetBioSource().SetIs_focus();
276  }
277  else
278  if (NStr::EqualNocase(value, "false")) {
279  x_ReportInvalidValue(mod_entry.second.front());
280  }
281  return true;
282  }
283 
284 
285  { // check to see if this is a subsource mod
286  auto it = s_SubSourceStringToEnum.find(name);
287  if (it != s_SubSourceStringToEnum.end()) {
288  x_SetSubtype(mod_entry);
289  return true;
290  }
291  }
292 
293  if (x_TryPCRPrimerMod(mod_entry)) {
294  return true;
295  }
296 
297  if (x_TryOrgRefMod(mod_entry, preserve_taxid)) {
298  return true;
299  }
300  return false;
301 }
302 
303 
305 {
306  const auto& mod_name = x_GetModName(mod_entry);
307  const auto subtype = s_SubSourceStringToEnum.at(mod_name);
308  if (subtype == CSubSource::eSubtype_plasmid_name) {
309  m_pDescrCache->SetBioSource().SetGenome(CBioSource::eGenome_plasmid);
310  }
311  const auto needs_no_text = CSubSource::NeedsNoText(subtype);
312  CBioSource::TSubtype subsources;
313  for (const auto& mod : mod_entry.second) {
314  const auto& value = mod.GetValue();
315  if (needs_no_text &&
316  !NStr::EqualNocase(value, "true")) {
318  return;
319  }
320  auto pSubSource = Ref(new CSubSource(subtype,value));
321  if (mod.IsSetAttrib()) {
322  pSubSource->SetAttrib(mod.GetAttrib());
323  }
324  m_pDescrCache->SetSubtype().push_back(std::move(pSubSource));
325  }
326 }
327 
328 
329 static void s_SetPrimerNames(const string& primer_names, CPCRPrimerSet& primer_set)
330 {
331  const auto set_size = primer_set.Get().size();
332  vector<string> names;
333  NStr::Split(primer_names, ":", names, NStr::fSplit_Tokenize);
334  const auto num_names = names.size();
335 
336  auto it = primer_set.Set().begin();
337  for (size_t i=0; i<num_names; ++i) {
338  if (NStr::IsBlank(names[i])) {
339  continue;
340  }
341  if (i<set_size) {
342  (*it)->SetName().Set(names[i]);
343  ++it;
344  }
345  else {
346  auto pPrimer = Ref(new CPCRPrimer());
347  pPrimer->SetName().Set(names[i]);
348  primer_set.Set().push_back(std::move(pPrimer));
349  }
350  }
351 }
352 
353 
354 static void s_SetPrimerSeqs(const string& primer_seqs, CPCRPrimerSet& primer_set)
355 {
356  const auto set_size = primer_set.Get().size();
357  vector<string> seqs;
358  NStr::Split(primer_seqs, ":", seqs, NStr::fSplit_Tokenize);
359  const auto num_seqs = seqs.size();
360 
361  auto it = primer_set.Set().begin();
362  for (size_t i=0; i<num_seqs; ++i) {
363  if (NStr::IsBlank(seqs[i])) {
364  continue;
365  }
366  if (i<set_size) {
367  (*it)->SetSeq().Set(seqs[i]);
368  ++it;
369  }
370  else {
371  auto pPrimer = Ref(new CPCRPrimer());
372  pPrimer->SetSeq().Set(seqs[i]);
373  primer_set.Set().push_back(std::move(pPrimer));
374  }
375  }
376 }
377 
378 
379 static void s_AppendPrimerNames(const string& mod, vector<string>& reaction_names)
380 {
381  vector<string> names;
383  reaction_names.insert(reaction_names.end(), names.begin(), names.end());
384 }
385 
386 
387 static void s_AppendPrimerSeqs(const string& mod, vector<string>& reaction_seqs)
388 {
389  vector<string> seqs;
391  if (seqs.size() > 1) {
392  if (seqs.front().front() == '(') {
393  seqs.front().erase(0,1);
394  }
395  if (seqs.back().back() == ')') {
396  seqs.back().erase(seqs.back().size()-1,1);
397  }
398  }
399 
400  for (auto& seq : seqs) {
401  reaction_seqs.push_back(NStr::ToLower(seq));
402  }
403 }
404 
405 
407 {
408  const auto& mod_name = x_GetModName(mod_entry);
409 
410  // Refactor to eliminate duplicated code
411  if (mod_name == "fwd-primer-name") {
412  vector<string> names;
413  for (const auto& mod : mod_entry.second)
414  {
415  s_AppendPrimerNames(mod.GetValue(), names);
416  }
417 
418  auto& pcr_reaction_set = m_pDescrCache->SetPCR_primers();
419  auto it = pcr_reaction_set.Set().begin();
420  for (const auto& reaction_names : names) {
421  if (it == pcr_reaction_set.Set().end()) {
422  auto pPCRReaction = Ref(new CPCRReaction());
423  s_SetPrimerNames(reaction_names, pPCRReaction->SetForward());
424  pcr_reaction_set.Set().push_back(std::move(pPCRReaction));
425  }
426  else {
427  s_SetPrimerNames(reaction_names, (*it++)->SetForward());
428  }
429  }
430  return true;
431  }
432 
433 
434  if (mod_name == "fwd-primer-seq") {
435  vector<string> seqs;
436  for (const auto& mod : mod_entry.second)
437  {
438  s_AppendPrimerSeqs(mod.GetValue(), seqs);
439  }
440  auto& pcr_reaction_set = m_pDescrCache->SetPCR_primers();
441  auto it = pcr_reaction_set.Set().begin();
442  for (const auto& reaction_seqs : seqs) {
443  if (it == pcr_reaction_set.Set().end()) {
444  auto pPCRReaction = Ref(new CPCRReaction());
445  s_SetPrimerSeqs(reaction_seqs, pPCRReaction->SetForward());
446  pcr_reaction_set.Set().push_back(std::move(pPCRReaction));
447  }
448  else {
449  s_SetPrimerSeqs(reaction_seqs, (*it++)->SetForward());
450  }
451  }
452  return true;
453  }
454 
455 
456  if(mod_name == "rev-primer-name")
457  {
458  vector<string> names;
459  for (const auto& mod : mod_entry.second) {
460  s_AppendPrimerNames(mod.GetValue(), names);
461  }
462  if (!names.empty()) {
463  auto& pcr_reaction_set = m_pDescrCache->SetPCR_primers();
464  const size_t num_reactions = pcr_reaction_set.Get().size();
465  const size_t num_names = names.size();
466  if (num_names <= num_reactions) {
467  auto it = pcr_reaction_set.Set().rbegin();
468  for(int i=num_names-1; i>=0; --i) { // don't use auto here. i stops when at -1.
469  s_SetPrimerNames(names[i], (*it++)->SetReverse());
470  }
471  }
472  else {
473 
474  auto it = pcr_reaction_set.Set().begin();
475  for (size_t i=0; i<num_reactions; ++i) {
476  s_SetPrimerNames(names[i], (*it++)->SetReverse());
477  }
478 
479  for (auto i=num_reactions; i<num_names; ++i) {
480  auto pPCRReaction = Ref(new CPCRReaction());
481  s_SetPrimerNames(names[i], pPCRReaction->SetReverse());
482  pcr_reaction_set.Set().push_back(std::move(pPCRReaction));
483  }
484  }
485  }
486  return true;
487  }
488 
489 
490  if(mod_name == "rev-primer-seq")
491  {
492  vector<string> seqs;
493  for (const auto& mod : mod_entry.second) {
494  s_AppendPrimerSeqs(mod.GetValue(), seqs);
495  }
496  if (!seqs.empty()) {
497  auto& pcr_reaction_set = m_pDescrCache->SetPCR_primers();
498  const size_t num_reactions = pcr_reaction_set.Get().size();
499  const size_t num_seqs = seqs.size();
500  if (num_seqs <= num_reactions) {
501  auto it = pcr_reaction_set.Set().rbegin();
502  for(int i=num_seqs-1; i>=0; --i) { // don't use auto here. i stops at -1.
503  s_SetPrimerSeqs(seqs[i], (*it++)->SetReverse());
504  }
505  }
506  else {
507  auto it = pcr_reaction_set.Set().begin();
508  for (size_t i=0; i<num_reactions; ++i) {
509  s_SetPrimerSeqs(seqs[i], (*it++)->SetReverse());
510  }
511 
512  for (auto i=num_reactions; i<num_seqs; ++i) {
513  auto pPCRReaction = Ref(new CPCRReaction());
514  s_SetPrimerSeqs(seqs[i], pPCRReaction->SetReverse());
515  pcr_reaction_set.Set().push_back(std::move(pPCRReaction));
516  }
517  }
518  }
519  return true;
520  }
521 
522  return false;
523 }
524 
525 
526 bool CDescrModApply::x_TryOrgRefMod(const TModEntry& mod_entry, bool& preserve_taxid)
527 {
528  const auto& name = x_GetModName(mod_entry);
529  if (name == "taxname") {
530  const auto& value = x_GetModValue(mod_entry);
531  m_pDescrCache->SetBioSource().SetOrg().SetTaxname(value);
532  if (!preserve_taxid &&
533  m_pDescrCache->SetBioSource().GetOrg().GetTaxId() != ZERO_TAX_ID) {
534  // clear taxid if it does not occur in this modifier set
535  m_pDescrCache->SetBioSource().SetOrg().SetTaxId(ZERO_TAX_ID);
536  }
537  return true;
538  }
539 
540  if (name == "taxid") {
541  const auto& value = x_GetModValue(mod_entry);
542  TTaxId taxid;
543  try {
544  taxid = NStr::StringToNumeric<TTaxId>(value);
545  }
546  catch (...) {
547  x_ReportInvalidValue(mod_entry.second.front(), "Integer value expected.");
548  return true;
549  }
550  m_pDescrCache->SetBioSource().SetOrg().SetTaxId(taxid);
551  preserve_taxid = true;
552  return true;
553  }
554 
555 
556  if (name == "common") {
557  const auto& value = x_GetModValue(mod_entry);
558  m_pDescrCache->SetBioSource().SetOrg().SetCommon(value);
559  return true;
560  }
561 
562  if (name == "dbxref") {
563  x_SetDBxref(mod_entry);
564  return true;
565  }
566 
567  if (x_TryOrgNameMod(mod_entry)) {
568  return true;
569  }
570  return false;
571 }
572 
573 
575 {
576  vector<CRef<CDbtag>> dbtags;
577  for (const auto& value_attrib : mod_entry.second) {
578  const auto& value = value_attrib.GetValue();
579 
580  auto colon_pos = value.find(":");
581  string database;
582  string tag;
583  if (colon_pos < (value.length()-1)) {
584  database = value.substr(0, colon_pos);
585  tag = value.substr(colon_pos+1);
586  }
587  else {
588  database = "?";
589  tag = value;
590  }
591  auto pDbtag = Ref(new CDbtag());
592  pDbtag->SetDb(database);
593  pDbtag->SetTag().SetStr(tag);
594  dbtags.push_back(std::move(pDbtag));
595  }
596 
597  m_pDescrCache->SetBioSource().SetOrg().SetDb() = dbtags;
598 }
599 
600 
602 {
603  const auto& name = x_GetModName(mod_entry);
604  if (name == "lineage") {
605  const auto& value = x_GetModValue(mod_entry);
606  m_pDescrCache->SetBioSource().SetOrg().SetOrgname().SetLineage(value);
607  return true;
608  }
609 
610  if (name == "division") {
611  const auto& value = x_GetModValue(mod_entry);
612  m_pDescrCache->SetBioSource().SetOrg().SetOrgname().SetDiv(value);
613  return true;
614  }
615 
616  // check for gcode, mgcode, pgcode
617  using TSetCodeMemFn = void (COrgName::*)(int);
618  using TFunction = function<void(COrgName&, int)>;
619  static const
620  unordered_map<string, TFunction>
621  s_GetCodeSetterMethods =
622  {{"gcode", TFunction(static_cast<TSetCodeMemFn>(&COrgName::SetGcode))},
623  {"mgcode", TFunction(static_cast<TSetCodeMemFn>(&COrgName::SetMgcode))},
624  {"pgcode", TFunction(static_cast<TSetCodeMemFn>(&COrgName::SetPgcode))}};
625 
626  auto it = s_GetCodeSetterMethods.find(name);
627  if (it != s_GetCodeSetterMethods.end()) {
628  const auto& value = x_GetModValue(mod_entry);
629  int code;
630  try {
632  }
633  catch (...) {
634  x_ReportInvalidValue(mod_entry.second.front(), "Integer value expected.");
635  return true;
636  }
637  it->second(m_pDescrCache->SetBioSource().SetOrg().SetOrgname(), code);
638  return true;
639  }
640 
641  { // check for orgmod
642  auto it = s_OrgModStringToEnum.find(name);
643  if (it != s_OrgModStringToEnum.end()) {
644  x_SetOrgMod(mod_entry);
645  return true;
646  }
647  }
648  return false;
649 }
650 
651 
653 {
654  const auto& subtype = s_OrgModStringToEnum.at(x_GetModName(mod_entry));
655  for (const auto& mod : mod_entry.second) {
656  const auto& subname = mod.GetValue();
657  auto pOrgMod = Ref(new COrgMod(subtype,subname));
658  if (mod.IsSetAttrib()) {
659  pOrgMod->SetAttrib(mod.GetAttrib());
660  }
661  m_pDescrCache->SetOrgMods().push_back(std::move(pOrgMod));
662  }
663 }
664 
665 
667 {
668  const auto& name = x_GetModName(mod_entry);
669  static const unordered_map<string, string> s_NameToLabel =
670  {{"sra", "Sequence Read Archive"},
671  {"biosample", "BioSample"},
672  {"bioproject", "BioProject"}};
673 
674  const auto& label = s_NameToLabel.at(name);
675 
676  x_SetDBLinkField(label, mod_entry, *m_pDescrCache);
677 }
678 
679 
681  const TModEntry& mod_entry,
682  CDescrCache& descr_cache)
683 {
684  list<CTempString> value_list;
685  for (const auto& mod : mod_entry.second) {
686  list<CTempString> value_sublist;
687  const auto& vals = mod.GetValue();
688  NStr::Split(vals, ",; \t", value_sublist, NStr::fSplit_Tokenize);
689  value_list.splice(value_list.end(), value_sublist);
690  }
691 
692  if (value_list.empty()) {
693  return;
694  }
695  x_SetDBLinkFieldVals(label, value_list, descr_cache.SetDBLink());
696 }
697 
698 
700  const list<CTempString>& vals,
701  CUser_object& dblink)
702 {
703  if (vals.empty()) {
704  return;
705  }
706 
707  CRef<CUser_field> pField;
708  if (dblink.IsSetData()) {
709  for (auto pUserField : dblink.SetData()) {
710  if (pUserField &&
711  pUserField->IsSetLabel() &&
712  pUserField->GetLabel().IsStr() &&
713  NStr::EqualNocase(pUserField->GetLabel().GetStr(), label)) {
714  pField = pUserField;
715  break;
716  }
717  }
718  }
719 
720  if (!pField) {
721  pField = Ref(new CUser_field());
722  pField->SetLabel().SetStr() = label;
723  dblink.SetData().push_back(pField);
724  }
725 
726  pField->SetData().SetStrs().assign(vals.begin(), vals.end());
727 }
728 
729 
731 {
732  string value = x_GetModValue(mod_entry);
734  if (it != g_BiomolStringToEnum.end()) {
735  m_pDescrCache->SetMolInfo().SetBiomol(it->second);
736  return;
737  }
738  x_ReportInvalidValue(mod_entry.second.front());
739 }
740 
741 
743 {
744  string value = x_GetModValue(mod_entry);
746  if (it != s_TechStringToEnum.end()) {
747  m_pDescrCache->SetMolInfo().SetTech(it->second);
748  return;
749  }
750  x_ReportInvalidValue(mod_entry.second.front());
751 }
752 
753 
755 {
756  string value = x_GetModValue(mod_entry);
758  if (it != s_CompletenessStringToEnum.end()) {
759  m_pDescrCache->SetMolInfo().SetCompleteness(it->second);
760  return;
761  }
762  x_ReportInvalidValue(mod_entry.second.front());
763 }
764 
765 
767 {
768  list<string> vals;
769  for (const auto& mod : mod_entry.second) {
770  vals.push_back(mod.GetValue());
771  }
772 
773  string label = (mod_entry.first == "ft-map") ?
774  "Map-FileTrackURL" :
775  "BaseModification-FileTrackURL";
776 
777  for (auto val : vals) {
778  auto& user = m_pDescrCache->SetFileTrack();
779  auto pField = Ref(new CUser_field());
780  pField->SetLabel().SetStr(label);
781  pField->SetNum(1);
782  pField->SetData().SetStr(val);
783  user.SetData().push_back(pField);
784  }
785 }
786 
787 
789 {
790  list<CStringUTF8> accession_list;
791  for (const auto& mod : mod_entry.second) {
792  list<CTempString> value_sublist;
793  const auto& vals = mod.GetValue();
794  NStr::Split(vals, ",; \t", value_sublist, NStr::fSplit_Tokenize);
795 
796  list<CStringUTF8> accession_sublist;
797  try {
798  transform(value_sublist.begin(), value_sublist.end(), back_inserter(accession_sublist),
799  [](const CTempString& val) { return CUtf8::AsUTF8(val, eEncoding_UTF8); });
800  }
801  catch (...) {
803  continue;
804  }
805  accession_list.splice(accession_list.end(), accession_sublist);
806  }
807 
808  if (accession_list.empty()) {
809  return;
810  }
811 
812  auto make_user_field = [](const CStringUTF8& accession) {
813  auto pField = Ref(new CUser_field());
814  pField->SetLabel().SetId(0);
815  auto pSubfield = Ref(new CUser_field());
816  pSubfield->SetLabel().SetStr("accession");
817  pSubfield->SetData().SetStr(accession);
818  pField->SetData().SetFields().push_back(std::move(pSubfield));
819  return pField;
820  };
821 
822  auto& user = m_pDescrCache->SetTpaAssembly();
823  user.SetData().resize(accession_list.size());
824  transform(accession_list.begin(), accession_list.end(),
825  user.SetData().begin(), make_user_field);
826 }
827 
828 
830 {
831  list<string> id_list;
832  for (const auto& mod : mod_entry.second) {
833  list<CTempString> value_sublist;
834  const auto& vals = mod.GetValue();
835  NStr::Split(vals, ",; \t", value_sublist, NStr::fSplit_Tokenize);
836  for (const auto& val : value_sublist) {
838  try {
839  SSeqIdRange idrange(value);
840  id_list.insert(id_list.end(),idrange.begin(), idrange.end());
841  }
842  catch(...)
843  {
844  id_list.push_back(value);
845  }
846  }
847  }
848  auto& gb_block = m_pDescrCache->SetGBblock();
849  gb_block.SetExtra_accessions().assign(id_list.begin(), id_list.end());
850 }
851 
852 
854 {
855  list<CTempString> value_list;
856  for (const auto& mod : mod_entry.second) {
857  list<CTempString> value_sublist;
858  const auto& vals = mod.GetValue();
859  NStr::Split(vals, ",; \t", value_sublist, NStr::fSplit_Tokenize);
860  value_list.splice(value_list.end(), value_sublist);
861  }
862  if (value_list.empty()) {
863  return;
864  }
865  m_pDescrCache->SetGBblock().SetKeywords().assign(value_list.begin(), value_list.end());
866 }
867 
868 
869 
870 
872 {
873  list<int> id_list;
874  for (const auto& mod : mod_entry.second) {
875  list<CTempString> value_sublist;
876  const auto& vals = mod.GetValue();
877  NStr::Split(vals, ",; \t", value_sublist, NStr::fSplit_Tokenize);
878  list<int> id_sublist;
879  try {
880  transform(value_sublist.begin(), value_sublist.end(), back_inserter(id_sublist),
881  [](const CTempString& val) { return NStr::StringToUInt(val); });
882  }
883  catch (...) {
885  continue;
886  }
887  id_list.splice(id_list.end(), id_sublist);
888  }
889  if (id_list.empty()) {
890  return;
891  }
892 
893  auto make_user_field = [](const int& id) {
894  auto pField = Ref(new CUser_field());
895  auto pSubfield = Ref(new CUser_field());
896  pField->SetLabel().SetId(0);
897  pSubfield->SetLabel().SetStr("ProjectID");
898  pSubfield->SetData().SetInt(id);
899  pField->SetData().SetFields().push_back(pSubfield);
900  pSubfield.Reset(new CUser_field());
901  pSubfield->SetLabel().SetStr("ParentID");
902  pSubfield->SetData().SetInt(0);
903  pField->SetData().SetFields().push_back(pSubfield);
904  return pField;
905  };
906 
907  auto& user = m_pDescrCache->SetGenomeProjects();
908  user.SetData().resize(id_list.size());
909  transform(id_list.begin(), id_list.end(),
910  user.SetData().begin(), make_user_field);
911 }
912 
913 
915 {
916  for (const auto& mod : mod_entry.second) {
917  m_pDescrCache->SetComment() = mod.GetValue();
918  }
919 
920 }
921 
922 
923 void CDescrModApply::x_SetPMID(const TModEntry& mod_entry)
924 {
925  for (const auto& mod : mod_entry.second)
926  {
927  const auto& value = mod.GetValue();
928  TEntrezId pmid;
929  try {
930  pmid = NStr::StringToNumeric<TEntrezId>(value);
931  }
932  catch(...) {
933  x_ReportInvalidValue(mod_entry.second.front(), "Expected integer value.");
934  continue;
935  }
936  auto pPub = Ref(new CPub());
937  pPub->SetPmid().Set(pmid);
938  m_pDescrCache->SetPubdesc()
939  .SetPub()
940  .Set()
941  .push_back(std::move(pPub));
942  }
943 }
944 
945 
946 const string& CDescrModApply::x_GetModName(const TModEntry& mod_entry)
947 {
948  return CModHandler::GetCanonicalName(mod_entry);
949 }
950 
951 
952 const string& CDescrModApply::x_GetModValue(const TModEntry& mod_entry)
953 {
954  return CModHandler::AssertReturnSingleValue(mod_entry);
955 }
956 
957 
959  const string& add_msg)
960 {
961  const auto& mod_name = mod_data.GetName();
962  const auto& mod_value = mod_data.GetValue();
963  string msg = "Invalid value: " + mod_name + "=" + mod_value + ".";
964  if (!NStr::IsBlank(add_msg)) {
965  msg += " " + add_msg;
966  }
967 
968  if (m_fReportError) {
970  m_SkippedMods.push_back(mod_data);
971  return;
972  }
973 
974  NCBI_THROW(CModReaderException, eInvalidValue, msg);
975 }
976 
977 
978 static bool s_IsUserType(const CUser_object& user_object, const string& type)
979 {
980  return (user_object.IsSetType() &&
981  user_object.GetType().IsStr() &&
982  user_object.GetType().GetStr() == type);
983 }
984 
985 
986 template<class TObject>
988 {
989 public:
990  CDescrContainer(TObject& object) : m_Object(object) {}
991 
992  bool IsSet(void) const {
993  return m_Object.IsSetDescr();
994  }
995 
997  return m_Object.SetDescr();
998  }
999 
1000 private:
1001  TObject& m_Object;
1002 };
1003 
1004 //}
1005 
1007  : m_pBioseqContainer(new CDescrContainer<CBioseq>(bioseq))
1008 {
1009  auto pParentSet = bioseq.GetParentSet();
1010 
1011  if (pParentSet &&
1012  pParentSet->IsSetClass() &&
1013  pParentSet->GetClass() == CBioseq_set::eClass_nuc_prot)
1014  {
1015  auto& bioseq_set = const_cast<CBioseq_set&>(*pParentSet);
1018  }
1019  else {
1021  }
1022 
1024 }
1025 
1026 
1027 
1029  CUser_object& user_object)
1030 {
1031  user_object.SetType().SetStr(type);
1032 }
1033 
1034 
1036 {
1037  if (!pDesc ||
1038  !pDesc->IsPub()) {
1039  return false;
1040  }
1041 
1042  auto& pub_desc = pDesc->SetPub();
1043  pub_desc.SetPub().Set().remove_if([](const CRef<CPub>& pPub) { return (pPub && pPub->IsPmid()); });
1044  return pub_desc.GetPub().Get().empty();
1045 }
1046 
1047 
1049 {
1051 
1052  if (m_FirstPubdesc) {
1053  if (m_pPrimaryContainer->IsSet()) { // Probably need to change this
1055  }
1056  m_FirstPubdesc = false;
1057  }
1058 
1059  auto pDesc = Ref(new CSeqdesc());
1060  m_pPrimaryContainer->SetDescr().Set().push_back(pDesc);
1061  return pDesc->SetPub();
1062 }
1063 
1064 
1066 {
1068 
1069  if (m_FirstComment) {
1070  if (m_pPrimaryContainer->IsSet()) {
1071  m_pPrimaryContainer->SetDescr().Set().remove_if([](const CRef<CSeqdesc>& pDesc) { return (pDesc && pDesc->IsComment()); });
1072  }
1073  m_FirstComment = false;
1074  }
1075 
1076  auto pDesc = Ref(new CSeqdesc());
1077  m_pPrimaryContainer->SetDescr().Set().push_back(pDesc);
1078  return pDesc->SetComment();
1079 }
1080 
1081 
1083 {
1084  return x_SetDescriptor(eDBLink,
1085  [](const CSeqdesc& desc) {
1086  return (desc.IsUser() && desc.GetUser().IsDBLink());
1087  },
1088  []() {
1089  auto pDesc = Ref(new CSeqdesc());
1090  pDesc->SetUser().SetObjectType(CUser_object::eObjectType_DBLink);
1091  return pDesc;
1092  }).SetUser();
1093 }
1094 
1095 
1096 
1098 {
1099  return x_SetDescriptor(eFileTrack,
1100  [](const CSeqdesc& desc) {
1101  return (desc.IsUser() && s_IsUserType(desc.GetUser(), "FileTrack"));
1102  },
1103  [this]() {
1104  auto pDesc = Ref(new CSeqdesc());
1105  x_SetUserType("FileTrack", pDesc->SetUser());
1106  return pDesc;
1107  }
1108  ).SetUser();
1109 }
1110 
1111 
1113 {
1114  return x_SetDescriptor(eTpa,
1115  [](const CSeqdesc& desc) {
1116  return (desc.IsUser() && s_IsUserType(desc.GetUser(), "TpaAssembly"));
1117  },
1118  [this]() {
1119  auto pDesc = Ref(new CSeqdesc());
1120  x_SetUserType("TpaAssembly", pDesc->SetUser());
1121  return pDesc;
1122  }
1123  ).SetUser();
1124 }
1125 
1126 
1128 {
1130  [](const CSeqdesc& desc) {
1131  return (desc.IsUser() && s_IsUserType(desc.GetUser(), "GenomeProjectsDB"));
1132  },
1133  [this]() {
1134  auto pDesc = Ref(new CSeqdesc());
1135  x_SetUserType("GenomeProjectsDB", pDesc->SetUser());
1136  return pDesc;
1137  }
1138  ).SetUser();
1139 }
1140 
1141 
1143 {
1144  return x_SetDescriptor(eGBblock,
1145  [](const CSeqdesc& desc) {
1146  return desc.IsGenbank();
1147  },
1148  []() {
1149  auto pDesc = Ref(new CSeqdesc());
1150  pDesc->SetGenbank();
1151  return pDesc;
1152  }
1153  ).SetGenbank();
1154 }
1155 
1156 
1158 { // MolInfo is a Bioseq descriptor
1159  return x_SetDescriptor(eMolInfo,
1160  [](const CSeqdesc& desc) {
1161  return desc.IsMolinfo();
1162  },
1163  []() {
1164  auto pDesc = Ref(new CSeqdesc());
1165  pDesc->SetMolinfo();
1166  return pDesc;
1167  },
1168  m_pBioseqContainer.get()
1169  ).SetMolinfo();
1170 }
1171 
1172 
1174 {
1175  return x_SetDescriptor(eBioSource,
1176  [](const CSeqdesc& desc) {
1177  return desc.IsSource();
1178  },
1179  []() {
1180  auto pDesc = Ref(new CSeqdesc());
1181  pDesc->SetSource();
1182  return pDesc;
1183  }
1184  ).SetSource();
1185 }
1186 
1187 
1189 {
1190  if (!m_pSubtype) {
1192  m_pSubtype->clear();
1193  }
1194 
1195  return *m_pSubtype;
1196 }
1197 
1198 
1200 {
1201  if (!m_pOrgMods) {
1202  m_pOrgMods = &(SetBioSource().SetOrg().SetOrgname().SetMod());
1203  m_pOrgMods->clear();
1204  }
1205 
1206  return *m_pOrgMods;
1207 }
1208 
1209 
1210 
1212 {
1213  if (!m_pPCRReactionSet) {
1215  m_pPCRReactionSet->Set().clear();
1216  }
1217  return *m_pPCRReactionSet;
1218 }
1219 
1220 
1222  function<bool(const CSeqdesc&)> f_verify,
1223  function<CRef<CSeqdesc>(void)> f_create)
1224 {
1225  return x_SetDescriptor(eChoice, f_verify, f_create, m_pPrimaryContainer);
1226 }
1227 
1228 
1230  function<bool(const CSeqdesc&)> f_verify,
1231  function<CRef<CSeqdesc>(void)> f_create,
1232  TDescrContainer* pDescrContainer)
1233 {
1234  auto it = m_Cache.find(eChoice);
1235  if (it != m_Cache.end()) {
1236  return *(it->second);
1237  }
1238 
1239 
1240  // Search for descriptor on Bioseq
1241  if (pDescrContainer->IsSet()) {
1242  for (auto& pDesc : pDescrContainer->SetDescr().Set()) {
1243  if (pDesc.NotEmpty() && f_verify(*pDesc)) {
1244  m_Cache.insert(make_pair(eChoice, pDesc));
1245  return *pDesc;
1246  }
1247  }
1248  }
1249 
1250  auto pDesc = f_create();
1251  m_Cache.insert(make_pair(eChoice, pDesc));
1252  pDescrContainer->SetDescr().Set().push_back(pDesc);
1253  return *pDesc;
1254 }
1255 
1256 
1259 
User-defined methods of the data storage class.
#define static
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void transform(Container &c, UnaryFunction *op)
Definition: chainer.hpp:86
CConstRef< CBioseq_set > GetParentSet(void) const
Definition: Bioseq_set.cpp:312
Definition: Dbtag.hpp:53
CPCRReactionSet * m_pPCRReactionSet
CBioSource::TSubtype TSubtype
SDescrContainer_Base TDescrContainer
unique_ptr< TDescrContainer > m_pNucProtSetContainer
void x_SetUserType(const string &type, CUser_object &user_object)
CUser_object & SetFileTrack(void)
TSubtype * m_pSubtype
unordered_map< EChoice, CRef< CSeqdesc >, hash< underlying_type< EChoice >::type > > TMap
CPCRReactionSet & SetPCR_primers(void)
CDescrCache(CBioseq &bioseq)
CGB_block & SetGBblock(void)
TOrgMods * m_pOrgMods
CSeqdesc & x_SetDescriptor(const EChoice eChoice, function< bool(const CSeqdesc &)> f_verify, function< CRef< CSeqdesc >(void)> f_create)
unique_ptr< TDescrContainer > m_pBioseqContainer
CMolInfo & SetMolInfo(void)
TDescrContainer * m_pPrimaryContainer
CUser_object & SetDBLink(void)
CBioSource & SetBioSource(void)
CPubdesc & SetPubdesc(void)
CUser_object & SetGenomeProjects(void)
string & SetComment(void)
CUser_object & SetTpaAssembly(void)
COrgName::TMod TOrgMods
TSubtype & SetSubtype(void)
TOrgMods & SetOrgMods(void)
bool IsSet(void) const
CDescrContainer(TObject &object)
CSeq_descr & SetDescr(void)
void x_SetMolInfoType(const TModEntry &mod_entry)
void x_SetOrgMod(const TModEntry &mod_entry)
CDescrModApply(CBioseq &bioseq, FReportError fReportError, TSkippedMods &skipped_mods)
void x_SetMolInfoCompleteness(const TModEntry &mod_entry)
bool x_TryBioSourceMod(const TModEntry &mod_entry, bool &preserve_taxid)
bool x_TryOrgNameMod(const TModEntry &mod_entry)
virtual ~CDescrModApply()
bool x_TryOrgRefMod(const TModEntry &mod_entry, bool &preserve_taxid)
TSkippedMods & m_SkippedMods
void x_SetTpaAssembly(const TModEntry &mod_entry)
bool x_TryPCRPrimerMod(const TModEntry &mod_entry)
CModAdder::FReportError FReportError
void x_SetFileTrack(const TModEntry &mod_entry)
CModAdder::TSkippedMods TSkippedMods
void x_SetPMID(const TModEntry &mod_entry)
void x_SetComment(const TModEntry &mod_entry)
CModHandler::TMods::value_type TModEntry
void x_SetGBblockKeywords(const TModEntry &mod_entry)
void x_SetGBblockIds(const TModEntry &mod_entry)
static const string & x_GetModValue(const TModEntry &mod_entry)
void x_SetDBLink(const TModEntry &mod_entry)
FReportError m_fReportError
unique_ptr< CDescrCache > m_pDescrCache
void x_SetDBLinkFieldVals(const string &label, const list< CTempString > &vals, CUser_object &db_link)
void x_SetDBLinkField(const string &label, const TModEntry &mod_entry, CDescrCache &descr_cache)
void x_SetDBxref(const TModEntry &mod_entry)
static const string & x_GetModName(const TModEntry &mod_entry)
void x_SetMolInfoTech(const TModEntry &mod_entry)
void x_ReportInvalidValue(const CModData &mod_data, const string &add_msg="")
void x_SetSubtype(const TModEntry &mod_entry)
bool Apply(const TModEntry &mod_entry)
void x_SetGenomeProjects(const TModEntry &mod_entry)
const string & GetValue(void) const
Definition: mod_reader.hpp:76
const string & GetName(void) const
Definition: mod_reader.hpp:72
static const string & GetCanonicalName(const TModEntry &mod_entry)
Definition: mod_reader.cpp:393
static const string & AssertReturnSingleValue(const TModEntry &mod_entry)
Definition: mod_reader.cpp:399
@OrgMod.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:54
CPCRPrimerSet –.
CPCRPrimer –.
Definition: PCRPrimer.hpp:66
CPCRReactionSet –.
CPCRReaction –.
Definition: PCRReaction.hpp:66
Definition: Pub.hpp:56
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
static bool NeedsNoText(const TSubtype &subtype)
Definition: SubSource.cpp:233
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
bool IsDBLink() const
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const auto s_OrgModStringToEnum
static const unordered_map< string, CMolInfo::TTech > s_TechStringToEnum
static bool s_EmptyAfterRemovingPMID(CRef< CSeqdesc > &pDesc)
static const auto s_SubSourceStringToEnum
static void s_SetPrimerNames(const string &primer_names, CPCRPrimerSet &primer_set)
static const unordered_map< string, CMolInfo::TCompleteness > s_CompletenessStringToEnum
static bool s_IsUserType(const CUser_object &user_object, const string &type)
static void s_AppendPrimerNames(const string &mod, vector< string > &reaction_names)
static void s_AppendPrimerSeqs(const string &mod, vector< string > &reaction_seqs)
static void s_SetPrimerSeqs(const string &primer_seqs, CPCRPrimerSet &primer_set)
static const struct name_t names[]
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
Definition: ncbimisc.hpp:1041
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const_iterator end(void) const
Definition: Seq_id.hpp:979
const_iterator begin(void) const
Definition: Seq_id.hpp:977
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
std::string CStringUTF8
Definition: ncbistl.hpp:254
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3191
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static CStringUTF8 AsUTF8(const CTempString &src, EEncoding encoding, EValidate validate=eNoValidate)
Convert into UTF8 from a C/C++ string.
Definition: ncbistr.hpp:3889
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ eEncoding_UTF8
Definition: ncbistr.hpp:201
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
static const char label[]
const Tdata & Get(void) const
Get the member data.
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
void SetPcr_primers(TPcr_primers &value)
Assign a value to Pcr_primers data member.
Definition: BioSource_.cpp:124
void SetOrg(TOrg &value)
Assign a value to Org data member.
Definition: BioSource_.cpp:108
Tdata & Set(void)
Assign a value to data member.
Tdata & Set(void)
Assign a value to data member.
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
Definition: BioSource_.hpp:545
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
TData & SetData(void)
Assign a value to Data data member.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
void SetLabel(TLabel &value)
Assign a value to Label data member.
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
const TType & GetType(void) const
Get the Type member data.
TMgcode & SetMgcode(void)
Assign a value to Mgcode data member.
Definition: OrgName_.hpp:981
TPgcode & SetPgcode(void)
Assign a value to Pgcode data member.
Definition: OrgName_.hpp:1075
list< CRef< COrgMod > > TMod
Definition: OrgName_.hpp:332
TGcode & SetGcode(void)
Assign a value to Gcode data member.
Definition: OrgName_.hpp:934
bool IsPmid(void) const
Check if variant Pmid is selected.
Definition: Pub_.hpp:677
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
bool IsGenbank(void) const
Check if variant Genbank is selected.
Definition: Seqdesc_.hpp:1090
const TUser & GetUser(void) const
Get the variant data.
Definition: Seqdesc_.cpp:384
bool IsMolinfo(void) const
Check if variant Molinfo is selected.
Definition: Seqdesc_.hpp:1196
void SetPub(TPub &value)
Assign a value to Pub data member.
Definition: Pubdesc_.cpp:72
TPub & SetPub(void)
Select the variant.
Definition: Seqdesc_.cpp:362
bool IsComment(void) const
Check if variant Comment is selected.
Definition: Seqdesc_.hpp:1052
bool IsSource(void) const
Check if variant Source is selected.
Definition: Seqdesc_.hpp:1190
bool IsPub(void) const
Check if variant Pub is selected.
Definition: Seqdesc_.hpp:1096
Tdata & Set(void)
Assign a value to data member.
Definition: Seq_descr_.hpp:172
bool IsUser(void) const
Check if variant User is selected.
Definition: Seqdesc_.hpp:1122
@ eCompleteness_has_left
5' or NH3 end present
Definition: MolInfo_.hpp:161
@ eCompleteness_complete
complete biological entity
Definition: MolInfo_.hpp:156
@ eCompleteness_has_right
3' or COOH end present
Definition: MolInfo_.hpp:162
@ eCompleteness_no_left
missing 5' or NH3 end
Definition: MolInfo_.hpp:158
@ eCompleteness_partial
partial but no details given
Definition: MolInfo_.hpp:157
@ eCompleteness_no_right
missing 3' or COOH end
Definition: MolInfo_.hpp:159
@ eCompleteness_no_ends
missing both ends
Definition: MolInfo_.hpp:160
@ eTech_htgs_2
ordered High Throughput sequence contig
Definition: MolInfo_.hpp:138
@ eTech_physmap
from physical mapping techniques
Definition: MolInfo_.hpp:129
@ eTech_htc
high throughput cDNA
Definition: MolInfo_.hpp:142
@ eTech_both
concept transl. w/ partial pept. seq.
Definition: MolInfo_.hpp:133
@ eTech_targeted
targeted locus sets/studies
Definition: MolInfo_.hpp:147
@ eTech_seq_pept_homol
sequenced peptide, ordered by homology
Definition: MolInfo_.hpp:135
@ eTech_composite_wgs_htgs
composite of WGS and HTGS
Definition: MolInfo_.hpp:145
@ eTech_sts
Sequence Tagged Site.
Definition: MolInfo_.hpp:126
@ eTech_htgs_3
finished High Throughput sequence
Definition: MolInfo_.hpp:139
@ eTech_seq_pept_overlap
sequenced peptide, ordered by overlap
Definition: MolInfo_.hpp:134
@ eTech_htgs_1
unordered High Throughput sequence contig
Definition: MolInfo_.hpp:137
@ eTech_concept_trans
conceptual translation
Definition: MolInfo_.hpp:131
@ eTech_tsa
transcriptome shotgun assembly
Definition: MolInfo_.hpp:146
@ eTech_standard
standard sequencing
Definition: MolInfo_.hpp:124
@ eTech_wgs
whole genome shotgun sequencing
Definition: MolInfo_.hpp:143
@ eTech_seq_pept
peptide was sequenced
Definition: MolInfo_.hpp:132
@ eTech_survey
one-pass genomic sequence
Definition: MolInfo_.hpp:127
@ eTech_barcode
barcode of life project
Definition: MolInfo_.hpp:144
@ eTech_htgs_0
single genomic reads for coordination
Definition: MolInfo_.hpp:141
@ eTech_derived
derived from other data, not a primary entity
Definition: MolInfo_.hpp:130
@ eTech_fli_cdna
full length insert cDNA
Definition: MolInfo_.hpp:140
@ eTech_est
Expressed Sequence Tag.
Definition: MolInfo_.hpp:125
@ eTech_concept_trans_a
conceptual transl. supplied by author
Definition: MolInfo_.hpp:136
@ eTech_genemap
from genetic mapping techniques
Definition: MolInfo_.hpp:128
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
use only n Cassandra database for the lookups</td > n</tr > n< tr > n< td > yes</td > n< td > do not use tables BIOSEQ_INFO and BLOB_PROP in the Cassandra database
int i
static char * subname
Definition: mdb_load.c:26
const TStringToEnumMap< CMolInfo::TBiomol > g_BiomolStringToEnum
TStringToEnumMap< CSubSource::ESubtype > g_InitModNameSubSrcSubtypeMap(void)
TStringToEnumMap< COrgMod::ESubtype > g_InitModNameOrgSubtypeMap(void)
string g_GetNormalizedModVal(const string &unnormalized)
Definition: mod_to_enum.cpp:42
TStringToEnumMap< CBioSource::EGenome > g_InitModNameGenomeMap(void)
TStringToEnumMap< CBioSource::EOrigin > g_InitModNameOriginMap(void)
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const char * tag
Int mod(Int i, Int j)
Definition: njn_integer.hpp:67
@ eModSubcode_InvalidValue
#define assert(x)
Definition: srv_diag.hpp:58
virtual ~SDescrContainer_Base(void)=default
virtual bool IsSet(void) const =0
virtual CSeq_descr & SetDescr(void)=0
SSeqIdRange –.
Definition: Seq_id.hpp:895
Definition: inftrees.h:24
Definition: _hash_fun.h:40
Definition: type.c:6
#define const
Definition: zconf.h:232
Modified on Fri May 24 14:53:39 2024 by modify_doxy.py rev. 669887