NCBI C++ ToolKit
macro_editor_context.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: macro_editor_context.cpp 47479 2023-05-02 13:24:02Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrea Asztalos
27 
28  */
29 
30 
31 #include <ncbi_pch.hpp>
37 
38 #include <util/static_map.hpp>
40 #include <gui/objutils/utils.hpp>
53 
56 
58 {
59  m_DescriptorMap.clear();
60  using TPos = TBimapStrings::value_type;
61  m_DescriptorMap.insert(TPos("Any", kEmptyStr));
65  m_DescriptorMap.insert(TPos("Numbering", CSeqdesc::SelectionName(CSeqdesc::e_Num)));
69  m_DescriptorMap.insert(TPos("Publication", "Publication"));
71  m_DescriptorMap.insert(TPos("User", "User"));
72  m_DescriptorMap.insert(TPos("SWISS-PROT", CSeqdesc::SelectionName(CSeqdesc::e_Sp)));
79  m_DescriptorMap.insert(TPos("Heterogen", CSeqdesc::SelectionName(CSeqdesc::e_Het)));
80  m_DescriptorMap.insert(TPos("BioSource", "BioSource"));
81  m_DescriptorMap.insert(TPos("MolInfo", "MolInfo"));
82  m_DescriptorMap.insert(TPos("StructuredComment", "StructuredComment"));
83  m_DescriptorMap.insert(TPos("DBLink", "DBLink"));
84 
86  set<string> existing;
87 
88  vector<const CFeatListItem *> featlist = GetSortedFeatList(seh);
89  ITERATE(vector<const CFeatListItem *>, feat_it, featlist) {
90  const CFeatListItem& item = **feat_it;
91  string desc = item.GetDescription();
92  int feat_type = item.GetType();
93  int feat_subtype = item.GetSubtype();
94  if (existing.find(desc) == existing.end()) {
95  existing.insert(desc);
96  m_Featuretypes.push_back(desc);
97  m_FeatureMap.insert(make_pair(desc, make_pair(feat_type, feat_subtype)));
98  }
99  }
100 
101  copy(m_SourceTextKeys.begin(), m_SourceTextKeys.end(), back_inserter(m_SourceKeywords));
102  copy(m_SourceTaxKeys.begin(), m_SourceTaxKeys.end(), back_inserter(m_SourceKeywords));
106  for (auto& it : m_PubdescKeywords) {
107  it = "pub " + it;
108  }
109  for (auto& it : m_SourceTextKeys) {
110  m_SourceKeywords.push_back(it);
111  }
113 }
114 
115 string CMacroEditorContext::FindInBimapOrEmpty(const string& type, const TBimapStrings& str_bimap)
116 {
117  auto left_it = str_bimap.left.find(type);
118  if (left_it != str_bimap.left.end()) {
119  return left_it->second;
120  }
121 
122  auto right_it = str_bimap.right.find(type);
123  if (right_it != str_bimap.right.end()) {
124  return right_it->second;
125  }
126  return kEmptyStr;
127 }
128 
130 {
131  bool allow_other = false;
132  switch (type) {
134  return x_GetBsrcTextFieldnames(false);
136  return x_GetBsrcTextFieldnames(true);
138  return x_GetBsrcTaxFieldnames();
144  return x_GetAllBsrcFieldnames();
146  return m_MolinfoKeywords;
160  return m_DBLinkKeywords;
162  return m_MiscKeywords;
164  return m_MiscKeywordsToRmv;
166  return CPubFieldType::GetFieldNames(true);
170  return x_GetDescriptorFieldnames();
172  return x_GetStrCommFieldnames();
174  return m_GeneKeywords;
176  return x_GetProteinFieldnames();
180  return m_CdsGeneProtKeywords;
182  return{ "Local id", "Definition line" };
184  return m_BsrcAutodefWords;
186  return x_GetSetClassFieldnames(true);
188  return x_GetSetClassFieldnames(false);
189  default:
190  break;
191  }
192  return vector<string>();
193 }
194 
195 vector<string> CMacroEditorContext::x_GetBsrcTextFieldnames(bool complete) const
196 {
197  vector<string> fieldnames;
198  // subsource modifiers
199  for (size_t i = 0; i < CSubSource::eSubtype_other; i++) {
200  try {
201  string qual_name = CSubSource::GetSubtypeName(static_cast<CSubSource::TSubtype>(i));
202  if (!NStr::IsBlank(qual_name) && !CSubSource::IsDiscouraged(static_cast<CSubSource::TSubtype>(i))) {
203  fieldnames.push_back((qual_name));
204  }
205 
206  }
207  catch (const exception&) {
208  }
209  }
210  fieldnames.push_back(kSubSourceNote);
211 
212  // orgmod modifiers
213  for (size_t i = 0; i < COrgMod::eSubtype_other; i++) {
214  try {
215  string qual_name = COrgMod::GetSubtypeName(static_cast<COrgMod::TSubtype>(i));
216  if (!NStr::IsBlank(qual_name)) {
217  // special case for nat-host
218  if (NStr::EqualNocase(qual_name, kNatHost)) {
219  fieldnames.push_back(kHost);
220  }
221  else {
222  fieldnames.push_back(qual_name);
223  if (complete &&
227  fieldnames.push_back(qual_name + macro::kColl_suffix);
228  fieldnames.push_back(qual_name + macro::kInst_suffix);
229  fieldnames.push_back(qual_name + macro::kSpecid_suffix);
230  }
231  }
232  }
233 
234  }
235  catch (const exception&) {
236  }
237  }
238  fieldnames.push_back(kOrgModNote);
239 
240  // primers
241  fieldnames.push_back(kFwdPrimerName);
242  fieldnames.push_back(kFwdPrimerSeq);
243  fieldnames.push_back(kRevPrimerName);
244  fieldnames.push_back(kRevPrimerSeq);
245 
246  fieldnames.push_back(kDbXref);
247 
248  sort(fieldnames.begin(), fieldnames.end());
249  return fieldnames;
250 }
251 
253 {
254  return { "taxname", "common name", "division", "lineage" };
255 }
256 
258 {
259  vector<string> fieldnames = x_GetBsrcTaxFieldnames();
260  vector<string> text_names = x_GetBsrcTextFieldnames(true);
261  fieldnames.reserve(fieldnames.size() + text_names.size() + 2);
262  move(text_names.begin(), text_names.end(), inserter(fieldnames, fieldnames.end()));
263  fieldnames.push_back("location");
264  fieldnames.push_back("origin");
265  return fieldnames;
266 }
267 
269 {
270  return { "Field", "Database name", "Field name" };
271 }
272 
274 {
275  vector<string> fieldnames;
276  for (auto& it : m_DescriptorMap.left) {
277  fieldnames.push_back(it.first);
278  }
279  sort(fieldnames.begin(), fieldnames.end());
280  return fieldnames;
281 }
282 
284 {
285  return {
286  "protein name",
287  "protein description",
288  "protein EC number",
289  "protein activity",
290  "protein comment",
291  };
292 }
293 
295 {
296  vector<string> fieldnames;
297  if (all) {
298  CEnumeratedTypeValues::TValues values = CBioseq_set::ENUM_METHOD_NAME(EClass)()->GetValues();
299  for (auto& it : values) {
300  if (it.first == "gi" || it.first == "gibb") {
301  it.first += "-set";
302  }
303  toupper((unsigned char)it.first[0]);
304  fieldnames.push_back(it.first);
305  }
306  }
307  else {
308  fieldnames.push_back(CBioseq_set::ENUM_METHOD_NAME(EClass)()->FindName(CBioseq_set::eClass_genbank, true));
309  fieldnames.push_back(CBioseq_set::ENUM_METHOD_NAME(EClass)()->FindName(CBioseq_set::eClass_mut_set, true));
310  fieldnames.push_back(CBioseq_set::ENUM_METHOD_NAME(EClass)()->FindName(CBioseq_set::eClass_pop_set, true));
311  fieldnames.push_back(CBioseq_set::ENUM_METHOD_NAME(EClass)()->FindName(CBioseq_set::eClass_phy_set, true));
312  fieldnames.push_back(CBioseq_set::ENUM_METHOD_NAME(EClass)()->FindName(CBioseq_set::eClass_eco_set, true));
313  fieldnames.push_back(CBioseq_set::ENUM_METHOD_NAME(EClass)()->FindName(CBioseq_set::eClass_wgs_set, true));
314  fieldnames.push_back(CBioseq_set::ENUM_METHOD_NAME(EClass)()->FindName(CBioseq_set::eClass_small_genome_set, true));
315  for (auto&& it : fieldnames) {
316  toupper((unsigned char)it[0]);
317  }
318  }
319  return fieldnames;
320 }
321 
322 vector<string> CMacroEditorContext::GetFeatureTypes(bool for_removal, bool to_create, bool include_all) const
323 {
324  // used for feature removal
325  if (for_removal) {
326  return m_Featuretypes;
327  }
328 
329  vector<string> feat_list;
330  if (to_create) {
331  feat_list.push_back("Gene");
332  feat_list.push_back("misc_feature");
333  }
334 
335  // used for removing feature qualifier
336  if (include_all) {
337  feat_list.push_back("All");
338  }
339  for (auto& it : m_Featuretypes) {
343  if (!to_create) {
344  feat_list.push_back(it);
345  } else if (type == CSeqFeatData::e_Imp && find(feat_list.begin(), feat_list.end(), it) == feat_list.end()) {
346  feat_list.push_back(it);
347  }
348  }
349  }
350  return feat_list;
351 }
352 
353 pair<int, int> CMacroEditorContext::GetFeatureType(const string& name) const
354 {
355  auto it = m_FeatureMap.find(name);
356  return (it != m_FeatureMap.end()) ? it->second : pair<int, int>();
357 }
358 
359 vector<string> CMacroEditorContext::GetLegalQualifiers(const string& feat_name) const
360 {
362  auto quals = CSeqFeatData::GetLegalQualifiers(subtype);
363  vector<string> qual_names;
364  for (auto it : quals) {
365  qual_names.push_back(CSeqFeatData::GetQualifierAsString(it));
366  }
367  return qual_names;
368 }
369 
371 {
373  vector<string> names;
374  FromArrayString(qual_list, names);
375  return names;
376 }
377 
378 vector<string> CMacroEditorContext::GetRNATypes() const
379 {
381 }
382 
383 vector<string> CMacroEditorContext::GetncRNAClassTypes(bool include_any) const
384 {
385  if (!include_any) {
387  }
388 
389  vector<string> types{ "any" };
390  vector<string> class_types = CRNA_gen::GetncRNAClassList();
391  types.reserve(class_types.size() + 1);
392  copy(class_types.begin(), class_types.end(), back_inserter(types));
393  return types;
394 }
395 
396 vector<string> CMacroEditorContext::GetRNAProductnames(const string& rna_type) const
397 {
398  if (NStr::EqualNocase(rna_type, "rRNA")) {
399  return {
400  "4.5S ribosomal RNA",
401  "5S ribosomal RNA",
402  "5.8S ribosomal RNA",
403  "12S ribosomal RNA",
404  "15S ribosomal RNA",
405  "16S ribosomal RNA",
406  "18S ribosomal RNA",
407  "21S ribosomal RNA",
408  "23S ribosomal RNA",
409  "25S ribosomal RNA",
410  "26S ribosomal RNA",
411  "28S ribosomal RNA",
412  "large subunit ribosomal RNA",
413  "small subunit ribosomal RNA" };
414  }
415  else if (NStr::EqualNocase(rna_type, "tRNA")) {
416  return{
417  "A Alanine",
418  "B Asp or Asn",
419  "C Cysteine",
420  "D Aspartic Acid",
421  "E Glutamic Acid",
422  "F Phenylalanine",
423  "G Glycine",
424  "H Histidine",
425  "I Isoleucine",
426  "J Leu or Ile",
427  "K Lysine",
428  "L Leucine",
429  "M Methionine",
430  "N Asparagine",
431  "O Pyrrolysine",
432  "P Proline",
433  "Q Glutamine",
434  "R Arginine",
435  "S Serine",
436  "T Threonine",
437  "U Selenocysteine",
438  "V Valine",
439  "W Tryptophan",
440  "X Undetermined",
441  "Y Tyrosine",
442  "Z Glu or Gln",
443  "* Stop Codon" };
444  }
445  return {};
446 }
447 
448 vector<string> CMacroEditorContext::GetBondTypes() const
449 {
450  const CBondList* list = CSeqFeatData::GetBondList();
451  vector<string> bond_list;
452  for (auto it = list->begin(); it != list->end(); ++it) {
453  bond_list.push_back(it->first);
454  }
455  return bond_list;
456 }
457 
458 vector<string> CMacroEditorContext::GetSiteTypes() const
459 {
460  const CSiteList* list = CSeqFeatData::GetSiteList();
461  vector<string> site_list;
462  for (auto it = list->begin(); it != list->end(); ++it) {
463  site_list.push_back(it->first);
464  }
465  return site_list;
466 }
467 
468 string CMacroEditorContext::GetConversionDescr(const string& from_feat, const string& to_feat)
469 {
472  CRef<CConvertFeatureBase> converter = CConvertFeatureBaseFactory::Create(from_subtype, to_subtype);
473  return converter->GetDescription();
474 }
475 
477 {
478  switch (type) {
480  return m_SourceTextKeys;
482  return m_SourceTaxKeys;
484  return m_MolinfoKeywords;
486  return m_PubdescKeywords;
488  return m_DBLinkKeywords;
490  return m_MiscKeywords;
492  return m_MiscKeywordsToRmv;
494  return m_DescrKeywords;
496  return m_GeneKeywords;
498  return m_RnaKeywords;
500  return m_ProteinKeywords;
502  return m_FeatQualKeywords;
504  return m_CdsGeneProtKeywords;
506  return m_BsrcAutodefWords;
507  default:
508  break;
509  }
510  return m_EmptyKeywords;
511 }
512 
513 string CMacroEditorContext::GetAsnPathToFieldName(const string& feat, const string& qual)
514 {
515  return x_GetAsnPathToFeatQuals(feat, qual);
516 }
517 
518 string CMacroEditorContext::x_GetAsnPathToFeatQuals(const string& feat, const string& qual)
519 {
520  string qual_name(qual);
521  NStr::ReplaceInPlace(qual_name, "-", "_");
522 
523  string path;
524  if (NStr::EqualNocase(feat, "gene")) {
525  if (qual_name == "description") {
526  path = "data.gene.desc";
527  } else if (qual_name == "locus") {
528  path = "data.gene.locus";
529  } else if (macro::NMacroUtil::StringsAreEquivalent(qual_name, "locus-tag")) {
530  path = "data.gene.locus-tag";
531  } else if (qual_name == "synonym" || macro::NMacroUtil::StringsAreEquivalent(qual_name, "gene_synonym")) {
532  path = "data.gene.syn";
533  } else if (qual_name == "allele") {
534  path = "data.gene.allele";
535  }
536  }
537 
538  if (NStr::EqualNocase(feat, "protein") || NStr::EqualNocase(feat, "Proprotein")) {
539  if (qual_name == "product" || qual_name == "name") {
540  path = "data.prot.name";
541  } else if (qual_name == "description") {
542  path = "data.prot.desc";
543  } else if (qual_name == "activity") {
544  path = "data.prot.activity";
545  } else if (macro::NMacroUtil::StringsAreEquivalent(qual_name, "EC-number")) {
546  path = "data.prot.ec";
547  }
548  }
549 
550  if (qual_name == "product") {
551  if (feat.find("RNA") != NPOS) {
552  if (NStr::EqualNocase(feat, "mRNA") ||
553  NStr::EqualNocase(feat, "rRNA") ||
554  NStr::EqualNocase(feat, "preRNA") ||
555  NStr::EqualNocase(feat, "precursor_RNA")) {
556  path = "data.rna.ext.name";
557  }
558  else if (NStr::EqualNocase(feat, "misc_RNA") ||
559  NStr::EqualNocase(feat, "ncRNA") ||
560  NStr::EqualNocase(feat, "scRNA") ||
561  NStr::EqualNocase(feat, "snRNA") ||
562  NStr::EqualNocase(feat, "snoRNA") ||
563  NStr::EqualNocase(feat, "tmRNA")) {
564  path = "data.rna.ext.gen.product";
565  }
566  }
567  }
568 
569  if (NStr::EqualNocase(feat, "cds")) {
570  if (qual_name == "product") {
571  path = "data.prot.name";
572  } else if (qual_name == "activity") {
573  path = "data.prot.activity";
574  }
575  else if (macro::NMacroUtil::StringsAreEquivalent(qual_name, "EC-number")) {
576  path = "data.prot.ec";
577  }
578  else if (macro::NMacroUtil::StringsAreEquivalent(qual_name, "codon-start")) {
579  path = "data.cdregion.frame";
580  }
581  }
582 
583  if (qual_name == "note" || qual_name == "comment") {
584  path = "comment";
585  }
586 
587  if (qual_name == "db_xref") {
588  path = "dbxref";
589  }
590 
591  if (feat == "ncRNA" && macro::NMacroUtil::StringsAreEquivalent(qual_name, "ncRNA-class")) {
592  path = "data.rna.ext.gen.class";
593  }
594  else if (feat == "tmRNA" && macro::NMacroUtil::StringsAreEquivalent(qual_name, "tag_peptide")) {
595  path = "data.rna.ext.gen.quals,tag_peptide";
596  }
597 
598  if (path.empty()) {
599  path = "qual," + qual_name;
600  }
601 
602  return path;
603 }
604 
605 string CMacroEditorContext::GetAsnPathToFeature(const string& feat_name) const
606 {
608  string asn_path_to_feature;
609  switch (subtype) {
626  break;
627  default:
628  asn_path_to_feature = "\"data.imp.key\", \"" + CSeqFeatData::SubtypeValueToName(subtype) + "\"";
629  break;
630  }
631  return asn_path_to_feature;
632 }
633 
634 string CMacroEditorContext::GetAsnPathToAuthorFieldName(const string& author_name)
635 {
636  string val;
637  if (NStr::FindNoCase(author_name, "last") != NPOS)
638  val = "last";
639  else if (NStr::FindNoCase(author_name, "first") != NPOS)
640  val = "first";
641  else if (NStr::FindNoCase(author_name, "suffix") != NPOS)
642  val = "suffix";
643  else if (NStr::FindNoCase(author_name, "initials") != NPOS)
644  val = "initials";
645  else if (NStr::FindNoCase(author_name, "consortium") != NPOS)
646  val = "consortium";
647  return val;
648 }
649 
650 string CMacroEditorContext::GetGUIAuthorName(const string& author_part)
651 {
653  if (NStr::EqualNocase(author_part, "first"))
655  else if (NStr::EqualNocase(author_part, "last"))
657  else if (NStr::EqualNocase(author_part, "suffix"))
659  else if (NStr::EqualNocase(author_part, "consortium"))
661  else if (NStr::EqualNocase(author_part, "initials"))
663 
664  return GetGUIPubField(type);
665 }
666 
668 {
671 }
672 
673 string CMacroEditorContext::GetAsnPathToFieldName(const string& field, EMacroFieldType type, const string& target)
674 {
675  m_Field = field;
676 
677  switch (type) {
680  return x_GetAsnPathToBsrcText();
682  return x_GetAsnPathToBsrcTax();
684  return "origin";
686  return "genome";
688  return x_GetAsnPathToAllBsrc();
690  return x_GetAsnPathToMolinfo(target);
695  return x_GetAsnPathToPub();
697  return x_GetAsnPathToAffilFields();
699  return "PUB_AFFIL()";
701  return "PUB_AFFIL()";
703  return x_GetAsnPathToDBLinkField(target);
706  return x_GetAsnPathToMiscDescr(target);
708  return x_GetAsnPathToDescriptors();
710  return x_GetAsnPathtoGene();
712  return x_GetAsnPathToRna();
714  return x_GetAsnPathToCDS();
716  return x_GetAsnPathToCdsGeneProt();
718  return x_GetAsnPathToProtein();
720  return x_GetAsnPathToFeatQuals();
722  return x_GetAsnPathToMiscFields();
725  return x_GetAsnPathToSet();
727  return macro::CMacroFunction_SeqID::GetFuncName() + "()";
728  default:
729  break;
730  }
731  return kEmptyStr;
732 }
733 
735 {
736  string path;
737  if (NStr::EqualNocase(m_Field, "taxname")) {
738  path = "org.taxname";
739  }
740  else if (NStr::EqualNocase(m_Field, "common name")) {
741  path = "org.common";
742  }
743  else if (NStr::EqualNocase(m_Field, "division")) {
744  path = "org.orgname.div";
745  }
746  else if (NStr::EqualNocase(m_Field, "lineage")) {
747  path = "org.orgname.lineage";
748  }
749  return path;
750 }
751 
753 {
754  string path;
757  path = "org.orgname.mod,nat-host";
758  }
760  path = "org.orgname.mod,other";
761  }
763  path = "subtype,other";
764  }
766  path = "pcr-primers..forward..seq";
767  }
769  path = "pcr-primers..reverse..seq";
770  }
772  path = "pcr-primers..forward..name";
773  }
775  path = "pcr-primers..reverse..name";
776  }
778  path = "org.db";
779  }
781  path = "org.orgname.mod," + m_Field;
782  }
784  path = "subtype," + m_Field;
785  }
786 
787  return path;
788 }
789 
791 {
792  string path = x_GetAsnPathToBsrcTax();
793  if (path.empty()) {
794  if (NStr::EqualNocase(m_Field, "origin"))
795  path = "origin";
796  else if (NStr::EqualNocase(m_Field, "location") || (NStr::EqualNocase(m_Field, "genome")))
797  path = "genome";
798  else {
799  path = x_GetAsnPathToBsrcText();
800  }
801  }
802  return path;
803 }
804 
805 string CMacroEditorContext::GetGUIFieldName(const string& macro_field, const string& target, EMacroFieldType& type, EMSection clause)
806 {
807  string field_name;
809 
810  m_MacroField = macro_field;
813  return "Local id";
814  }
815  else if (NStr::EqualNocase(m_MacroField, "defline") || NStr::EqualNocase(m_MacroField, "descr..title")) {
817  return "definition line";
818  }
819  else if (NStr::EqualNocase(m_MacroField, "SeqId")) {
821  return "SeqId";
822  }
823 
824  if (NStr::EqualNocase(target, macro::CMacroBioData::sm_BioSource)) {
825  if (!x_GetGUIFieldForBsrcTax(field_name, type)) {
826  if (NStr::EqualNocase(m_MacroField, "origin")) {
828  field_name = "origin";
829  }
830  else if (NStr::EqualNocase(m_MacroField, "genome")) {
832  field_name = "location";
833  }
834  else {
835  x_GetGUIFieldForBsrcText(field_name, type);
836  }
837  }
838  if (field_name.empty()) {
839  if (!x_GetGUIFieldForStructComm(field_name, type, clause)) {
840  x_IsDescriptor(field_name, type);
841  }
842  }
843  }
844  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_Seq) ||
845  NStr::EqualNocase(target, macro::CMacroBioData::sm_SeqNa) ||
846  NStr::EqualNocase(target, macro::CMacroBioData::sm_SeqAa)) {
847  if (NStr::StartsWith(m_MacroField, "descr..molinfo.", NStr::eNocase)) {
848  m_MacroField = m_MacroField.substr(CTempString("descr..molinfo.").length(), NPOS);
849  }
850 
851  if (m_MacroField == "inst.length" || m_MacroField == "inst.repr") {
852  field_name = m_MacroField;
853  }
854  else {
855  bool ret = x_GetGUIFieldForMolinfo(field_name, type);
856  if (!ret) {
857  ret = x_GetGUIFieldForDBLink(field_name, type);
858  }
859  if (!ret) {
860  ret = x_GetGUIFieldForStructComm(field_name, type, clause);
861  }
862  if (!ret) {
863  x_GetGUIFieldForMiscDescr(field_name, type);
864  }
865  }
866  }
867  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_MolInfo)) {
868  bool ret = x_GetGUIFieldForMolinfo(field_name, type);
869  if (!ret) {
870  x_IsDescriptor(field_name, type);
871  }
872  }
873  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_Pubdesc)) {
874  if (!x_GetGUIFieldForPubdesc(field_name, type)) {
875  x_IsDescriptor(field_name, type);
876  }
877  }
878  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_StrComm)) {
879  bool ret = x_GetGUIFieldForStructComm(field_name, type, clause);
880  if (!ret) {
881  x_IsDescriptor(field_name, type);
882  }
883  }
884  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_DBLink)) {
885  bool ret = x_GetGUIFieldForDBLink(field_name, type);
886  if (!ret) {
887  x_IsDescriptor(field_name, type);
888  }
889  }
890  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_UserObject)) {
891  x_IsDescriptor(field_name, type);
892  }
893  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_Seqdesc)) {
895  if (!field_name.empty()) {
897  }
898  }
899  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_Gene)) {
900  field_name = x_GetGUIFieldForGene();
901  if (!field_name.empty()) {
903  }
904  else {
906  }
907  }
908  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_Protein)) {
909  field_name = x_GetGUIFieldForProtein();
910  if (!field_name.empty()) {
912  }
913  else {
915  }
916  }
917  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_RNA) ||
918  NStr::EqualNocase(target, macro::CMacroBioData::sm_miscRNA) ||
919  NStr::EqualNocase(target, macro::CMacroBioData::sm_rRNA) ||
920  NStr::EqualNocase(target, macro::CMacroBioData::sm_mRNA)) {
921  field_name = x_GetGUIFieldForRNA();
922  if (!field_name.empty()) {
924  }
925  else {
927  }
928  }
929  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_CdRegion)) {
930  field_name = x_GetGUIFieldForCDS();
931  if (!field_name.empty()) {
933  } else {
935  }
936  }
937  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_SeqFeat)) {
938  if (NStr::StartsWith(m_MacroField, "protein,", NStr::eNocase)) {
939  field_name = x_GetGUIFieldForProtein();
940  }
941  else {
942  field_name = x_GetGUIFieldForProtein();
943  if (!field_name.empty()) {
944  if (!NStr::StartsWith(m_MacroField, "protein,", NStr::eNocase)) {
945  NStr::ReplaceInPlace(field_name, "protein ", "");
946  }
947  }
948  else if (NStr::StartsWith(m_MacroField, "gene,", NStr::eNocase)) {
949  field_name = x_GetGUIFieldForGene();
950  }
951  else if (NStr::StartsWith(m_MacroField, "mRNA,", NStr::eNocase)) {
952  m_MacroField = m_MacroField.substr(CTempString("mRNA,").length(), NPOS);
953  field_name = x_GetGUIFieldForRNA();
954  if (!field_name.empty()) {
955  field_name = "mRNA " + field_name;
956  }
957  }
958  else if (NStr::StartsWith(m_MacroField, "cds", NStr::eNocase)) {
959  field_name = x_GetGUIFieldForCDS();
960  }
961  }
962 
963  if (!field_name.empty()) {
965  }
966  }
967  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_SeqSet)) {
968  if (m_MacroField == "class") {
969  field_name = m_MacroField;
970  }
971  else {
972  if (CBioseq_set::ENUM_METHOD_NAME(EClass)()->IsValidName(m_MacroField)) {
973  field_name = m_MacroField;
974  if (m_MacroField == "gi" || m_MacroField == "gibb") {
975  field_name = m_MacroField + "-set";
976  }
977  }
978  }
979  }
980  else if (NStr::EqualNocase(target, macro::CMacroBioData::sm_AutodefOpts)) {
981  field_name = m_MacroField;
982  }
983 
984  if (field_name.empty() && macro::CMacroBioData::s_IsFeatSelector(target)) {
985  if (NStr::EqualNocase(m_MacroField, "except-text") ||
986  NStr::EqualNocase(m_MacroField, "except") ||
987  m_MacroField == "product") {
988  field_name = m_MacroField;
989  }
990  else if (NStr::EqualNocase(m_MacroField, "comment")) {
991  field_name = "note";
992  }
993  else if (NStr::EqualNocase(m_MacroField, "dbxref")) {
994  field_name = "db-xref";
995  }
996  else if (s_IsGeneQualifier(m_MacroField)) {
997  field_name = m_MacroField;
998  NStr::ReplaceInPlace(field_name, "_", "-");
999  }
1000 
1001  if (field_name.empty()) {
1002  auto pos = m_MacroField.find(",");
1003  if (pos != NPOS) {
1004  auto tmp = m_MacroField.substr(pos + 1, NPOS);
1005  if (NStr::StartsWith(m_MacroField, "qual,") &&
1006  (CSeqFeatData::GetQualifierType(tmp) != CSeqFeatData::eQual_bad /*|| tmp == "codons_recognized" */)) {
1007  field_name = tmp;
1008  NStr::ReplaceInPlace(field_name, "_", "-");
1009  }
1010  else if (NStr::StartsWith(m_MacroField, "gene,", NStr::eNocase)) {
1011  field_name = x_GetGUIFieldForGene();
1012  NStr::ReplaceInPlace(field_name, "gene ", "");
1013  }
1014  else {
1015  pos = tmp.find(",");
1016  if (pos != NPOS) {
1017  tmp = tmp.substr(pos + 1, NPOS);
1018  }
1020  field_name = tmp;
1021  }
1022  }
1023  }
1024  }
1025  if (!field_name.empty()) {
1027  }
1028  }
1029 
1030  return field_name;
1031 }
1032 
1034 {
1035  if (!field_name.empty())
1036  return true;
1037 
1039  field_name = x_GetGUIFieldForCDS();
1040  }
1041  else if (NStr::StartsWith(m_MacroField, "protein,", NStr::eNocase)) {
1042  field_name = x_GetGUIFieldForProtein();
1043  }
1044  else if (NStr::StartsWith(m_MacroField, "gene,", NStr::eNocase)) {
1045  field_name = x_GetGUIFieldForGene();
1046  }
1047  else if (NStr::StartsWith(m_MacroField, "mat_peptide,", NStr::eNocase)) {
1048  m_MacroField = m_MacroField.substr(CTempString("mat_peptide,").length(), NPOS);
1049  field_name = x_GetGUIFieldForProtein();
1050  if (!field_name.empty()) {
1051  NStr::ReplaceInPlace(field_name, "protein", "mat_peptide");
1052  }
1053  }
1054  else if (NStr::StartsWith(m_MacroField, "mRNA,", NStr::eNocase)) {
1055  m_MacroField = m_MacroField.substr(CTempString("mRNA,").length(), NPOS);
1056  field_name = x_GetGUIFieldForRNA();
1057  if (!field_name.empty()) {
1058  field_name = "mRNA " + field_name;
1059  }
1060  }
1061  if (!field_name.empty()) {
1063  }
1064  return (!field_name.empty());
1065 }
1066 
1067 string CMacroEditorContext::GetGUIFeatQualifier(const string& macro_field)
1068 {
1069  string field_name;
1070  if (macro_field == "data.gene.allele") {
1071  field_name = "allele";
1072  }
1073  else if (macro_field == "data.gene.syn") {
1074  field_name = "gene_synonym";
1075  }
1076  else if (macro_field == "data.gene.locus-tag") {
1077  field_name = "locus_tag";
1078  }
1079  return field_name;
1080 }
1081 
1083 {
1084  return (macro::NMacroUtil::StringsAreEquivalent(field, "locus_tag") ||
1085  field == "locus" ||
1086  field == "allele" ||
1087  field == "maploc" ||
1088  field == "desc"); // maybe syn?
1089 }
1090 
1091 
1093 {
1095  if (!field_name.empty()) {
1097  return true;
1098  }
1099  return false;
1100 }
1101 
1102 
1104 {
1105  static const TBimapStrings::relation bsrc_tax_bm[] = {
1106  {"taxname", "org.taxname"},
1107  {"common name", "org.common"},
1108  {"division", "org.orgname.div"},
1109  {"lineage", "org.orgname.lineage"} };
1110  TBimapStrings const data{ begin(bsrc_tax_bm), end(bsrc_tax_bm) };
1111 
1112  field_name = FindInBimapOrEmpty(m_MacroField, data);
1113  if (!field_name.empty()) {
1115  return true;
1116  }
1117  return false;
1118 }
1119 
1121 {
1122  static const TBimapStrings::relation bsrc_text_bm[] = {
1123  {kHost, "org.orgname.mod,nat-host"},
1124  {kOrgModNote, "org.orgname.mod,other"},
1125  {kSubSourceNote, "subtype,other"},
1126  {kFwdPrimerSeq, "pcr-primers..forward..seq"},
1127  {kRevPrimerSeq, "pcr-primers..reverse..seq"},
1128  {kFwdPrimerName, "pcr-primers..forward..name"},
1129  {kRevPrimerName, "pcr-primers..reverse..name"},
1130  {kDbXref, "org.db"} };
1131 
1132  TBimapStrings const data{ begin(bsrc_text_bm), end(bsrc_text_bm) };
1133 
1134  string ret = FindInBimapOrEmpty(m_MacroField, data);
1135  if (ret.empty() && (m_MacroField == "org.db.db")) {
1136  ret = kDbXref;
1137  }
1138  if (ret.empty()) {
1139  string tmp = m_MacroField;
1140  string sv_part;
1141 
1142  auto pos = m_MacroField.find(",");
1143  if (pos != NPOS) {
1144  tmp = m_MacroField.substr(pos + 1, NPOS);
1145  pos = tmp.find(",");
1146  if (pos != NPOS) {
1147  sv_part = "-" + tmp.substr(pos + 1, NPOS);
1148  tmp = tmp.substr(0, pos);
1149  }
1150  }
1152  ret = tmp;
1153  }
1154  else if (CSubSource::IsValidSubtypeName(tmp) && !NStr::StartsWith(m_MacroField, "org.orgname.mod,")) {
1155  ret = tmp;
1156  }
1157  if (!ret.empty() && pos != NPOS && macro::NMacroUtil::IsStructVoucherPart(sv_part)) {
1158  ret = ret + sv_part;
1159  }
1160  }
1161  if (!ret.empty()) {
1162  field_name = ret;
1164  return true;
1165  }
1166  return false;
1167 }
1168 
1170 {
1171  static const TBimapStrings::relation struct_comm_bm[] = {
1172  {"Field", macro::kStrCommFieldValue},
1173  {"Database name", macro::kStrCommDbname},
1174  {"Field name", macro::kStrCommFieldName} };
1175 
1176  TBimapStrings const data{ begin(struct_comm_bm), end(struct_comm_bm) };
1177  return FindInBimapOrEmpty(value, data);
1178 }
1179 
1181 {
1183  if (!field_name.empty()) {
1185  if (clause == EMSection::eWhereSection) {
1186  field_name = "Structured comment " + field_name;
1187  }
1188  return true;
1189  }
1190  return false;
1191 }
1192 
1194 {
1195  string path;
1196  if (NStr::EqualNocase(m_Field, "molecule")) {
1197  path = "biomol";
1198  }
1199  else if (NStr::EqualNocase(m_Field, "technique")) {
1200  path = "tech";
1201  }
1202  else if (NStr::EqualNocase(m_Field, "completedness")) {
1203  path = "completeness";
1204  }
1205 
1206 
1207  if ((target == macro::CMacroBioData::sm_Seq ||
1208  target == macro::CMacroBioData::sm_SeqNa ||
1209  target == macro::CMacroBioData::sm_SeqAa) && !path.empty()) {
1210  path = "descr..molinfo." + path;
1211  }
1212 
1213  if (NStr::EqualNocase(m_Field, "class")) {
1214  path = "inst.mol";
1215  }
1216  else if (NStr::EqualNocase(m_Field, "topology")) {
1217  path = "inst.topology";
1218  }
1219  else if (NStr::EqualNocase(m_Field, "strand")) {
1220  path = "inst.strand";
1221  }
1222  return path;
1223 }
1224 
1226 {
1227  static const TBimapStrings::relation molinfo_descr_bm[] = {
1228  {"biomol", "molecule"},
1229  {"tech", "technique"},
1230  {"completeness", "completedness"},
1231  {"inst.mol", "class"},
1232  {"inst.topology", "topology"},
1233  {"inst.strand", "strand"} };
1234 
1235  TBimapStrings const data{ begin(molinfo_descr_bm), end(molinfo_descr_bm) };
1236  field_name = FindInBimapOrEmpty(m_MacroField, data);
1237  if (!field_name.empty()) {
1239  return true;
1240  }
1241  return false;
1242 }
1243 
1245 {
1246  const auto it = find_if(m_DBLinkKeywords.begin(), m_DBLinkKeywords.end(),
1247  [this](const auto& elem) { return NStr::EqualNocase(elem, m_MacroField); });
1248 
1249  if (it != m_DBLinkKeywords.end()) {
1250  field_name = m_MacroField;
1252  return true;
1253  }
1254  return false;
1255 }
1256 
1258 {
1259  field_name.clear();
1260  if (m_MacroField == "affil") {
1262  }
1263  else if (m_MacroField == "div") {
1265  }
1266  else if (m_MacroField == "sub") {
1268  }
1269  else if (m_MacroField == "postal-code") {
1271  }
1272  else {
1273  const auto fields = CPubFieldType::GetFieldNames(true);
1274  if (find(fields.begin(), fields.end(), m_MacroField) != fields.end())
1275  field_name = m_MacroField;
1276  }
1277 
1278  if (!field_name.empty()) {
1279  field_name = CPubFieldType::NormalizePubFieldName(field_name);
1281  return true;
1282  }
1283  return false;
1284 }
1285 
1286 string CMacroEditorContext::GetGUIAffilField(const string& macro_field)
1287 {
1288  m_MacroField = macro_field;
1289  string field_name;
1291  x_GetGUIFieldForPubdesc(field_name, type);
1292  return field_name;
1293 }
1294 
1295 string CMacroEditorContext::GetGUIDateField(const string& macro_field)
1296 {
1300 }
1301 
1303 {
1304  static const TBimapStrings::relation misc_descr_bm[] = {
1305  {"title", kDefinitionLineLabel},
1306  {"comment", kCommentDescriptorLabel},
1307  {"genbank.keywords", kGenbankBlockKeyword} };
1308 
1309  TBimapStrings const data{ begin(misc_descr_bm), end(misc_descr_bm) };
1310  return FindInBimapOrEmpty(value, data);
1311 }
1312 
1314 {
1315  string tmp = m_MacroField;
1316  if (NStr::StartsWith(m_MacroField, "descr..")) {
1317  tmp = tmp.substr(CTempString("descr..").length(), NPOS);
1318  }
1319 
1320  field_name = s_GetGUIMappedMiscDescr(tmp);
1321  if (!field_name.empty()) {
1323  return true;
1324  }
1325  return false;
1326 }
1327 
1329 {
1330  string path;
1332  path = "title";
1333  }
1335  path = "comment";
1336  }
1338  path = "genbank.keywords";
1339  }
1340 
1341  if ((target == macro::CMacroBioData::sm_Seq ||
1342  target == macro::CMacroBioData::sm_SeqNa ||
1343  target == macro::CMacroBioData::sm_SeqAa) && !path.empty()) {
1344  path = "descr.." + path;
1345  }
1346  return path;
1347 }
1348 
1350 {
1351  string path;
1353  path = "localid";
1354  }
1355  else if (NStr::EqualNocase(m_Field, "definition line")) {
1356  path = "defline";
1357  }
1358  return path;
1359 }
1360 
1362 {
1363  if (target == macro::CMacroBioData::sm_SeqNa) {
1364  return macro::CMacroFunction_GetDBLink::GetFuncName() + "(\"" + m_Field + "\")";
1365  }
1366  return "data.data.strs";
1367 }
1368 
1370 {
1372  return CPubFieldType::GetMacroLabelForType(field_type);
1373 }
1374 
1376 {
1379  switch (type) {
1381  return "affil";
1383  return "div";
1385  return "sub";
1387  return "postal-code";
1388  default:
1389  break;
1390  }
1391  return m_Field;
1392 }
1393 
1395 {
1396  auto it = m_DescriptorMap.left.find(m_Field);
1397  if (it != m_DescriptorMap.left.end()) {
1398  return it->second;
1399  }
1400  return kEmptyStr;
1401 }
1402 
1403 string CMacroEditorContext::GetGUIDescriptor(const string& macro_field)
1404 {
1405  return FindInBimapOrEmpty(macro_field, m_DescriptorMap);
1406 }
1407 
1408 static string s_GetGeneField(const string& value)
1409 {
1410  static const CMacroEditorContext::TBimapStrings::relation gene_qual_bm[] = {
1411  {"gene locus", "data.gene.locus"},
1412  {"gene description", "data.gene.desc"},
1413  {"gene comment", "comment"},
1414  {"gene allele", "data.gene.allele"},
1415  {"gene maploc", "data.gene.maploc"},
1416  {"gene locus tag", "data.gene.locus-tag"},
1417  {"gene synonym", "data.gene.syn"}
1418  };
1419 
1420  CMacroEditorContext::TBimapStrings const data{ begin(gene_qual_bm), end(gene_qual_bm) };
1422 }
1423 
1424 
1426 {
1427 
1428  string path = s_GetGeneField(m_Field);
1429  if (path.empty() && NStr::StartsWith(m_Field, "gene ", NStr::eNocase)) {
1430  SIZE_TYPE pos = m_Field.find(" ");
1431  path = "qual," + m_Field.substr(pos + 1);
1432  }
1433  return path;
1434 }
1435 
1437 {
1438  if (NStr::StartsWith(m_MacroField, "gene,", NStr::eNocase)) {
1439  m_MacroField = m_MacroField.substr(CTempString("gene,").length(), NPOS);
1440  }
1441 
1442  string gui_elem = s_GetGeneField(m_MacroField);
1443  if (gui_elem.empty()) {
1444  if (m_MacroField == "inference" || NStr::EqualNocase(m_MacroField, "qual,inference")) {
1445  gui_elem = "gene inference";
1446  }
1447  else if (m_MacroField == "old_locus_tag") {
1448  gui_elem = "gene old_locus_tag";
1449  }
1450  }
1451  return gui_elem;
1452 }
1453 
1455 {
1456  string path;
1457  if (NStr::EqualNocase(m_Field, "cds comment")) {
1458  path = "comment";
1459  }
1460  else if (NStr::EqualNocase(m_Field, "cds inference")) {
1461  path = "qual,inference";
1462  }
1463  else if (macro::NMacroUtil::StringsAreEquivalent(m_Field, "codon-start")) {
1464  path = "data.cdregion.frame";
1465  }
1466  return path;
1467 }
1468 
1470 {
1471  string gui_elem;
1473  m_MacroField = m_MacroField.substr(string("cds,").length(), NPOS);
1474  }
1475  if (NStr::EqualNocase(m_MacroField, "comment")) {
1476  gui_elem = "cds comment";
1477  }
1478  else if (NStr::EqualNocase(m_MacroField, "qual,inference") || NStr::EqualNocase(m_MacroField, "inference")) {
1479  gui_elem = "cds inference";
1480  }
1481  else if (m_MacroField == "data.cdregion.frame") {
1482  gui_elem = "codon-start";
1483  }
1484  return gui_elem;
1485 }
1486 
1488 {
1489  if (NStr::StartsWith(m_Field, "gene", NStr::eNocase)) {
1490  return x_GetAsnPathtoGene();
1491  }
1492  else {
1493  SIZE_TYPE pos = m_Field.find(" ");
1494  if (pos != NPOS) {
1495  return x_GetAsnPathToRnaFieldName(m_Field.substr(0, pos), m_Field.substr(pos + 1));
1496  }
1497  }
1498  return kEmptyStr;
1499 }
1500 
1501 static string s_GetProteinField(const string& value)
1502 {
1503  static const CMacroEditorContext::TBimapStrings::relation protein_qual_bm[] = {
1504  {"protein comment", "comment"},
1505  {"protein name", "data.prot.name"},
1506  {"protein description", "data.prot.desc"},
1507  {"protein EC number", "data.prot.ec"},
1508  {"protein activity", "data.prot.activity"}
1509  };
1510 
1511  CMacroEditorContext::TBimapStrings const data{ begin(protein_qual_bm), end(protein_qual_bm) };
1513 }
1514 
1516 {
1517  return s_GetProteinField(m_Field);
1518 }
1519 
1521 {
1522  if (NStr::StartsWith(m_MacroField, "protein,", NStr::eNocase)) {
1523  m_MacroField = m_MacroField.substr(CTempString("protein,").length(), NPOS);
1524  }
1526 }
1527 
1529 {
1530  if (NStr::StartsWith(m_Field, "mat_peptide", NStr::eNocase)) {
1531  NStr::ReplaceInPlace(m_Field, "mat_peptide", "protein");
1532  }
1533 
1534  if (NStr::StartsWith(m_Field, "protein", NStr::eNocase)) {
1535  return x_GetAsnPathToProtein();
1536  }
1537  else if (NStr::StartsWith(m_Field, "CDS", NStr::eNocase) ||
1539  return x_GetAsnPathToCDS();
1540  }
1541  else if (NStr::StartsWith(m_Field, "gene", NStr::eNocase)) {
1542  return x_GetAsnPathtoGene();
1543  }
1544  else if (NStr::StartsWith(m_Field, "mRNA", NStr::eNocase)) {
1545  SIZE_TYPE pos = m_Field.find(" ");
1546  return x_GetAsnPathToRnaFieldName("mRNA", m_Field.substr(pos + 1));
1547  }
1548  return kEmptyStr;
1549 }
1550 
1552 {
1553  vector<string> tokens;
1554  NStr::Split(m_Field, " ", tokens);
1555  if (tokens.size() == 2) {
1556  return x_GetAsnPathToFeatQuals(tokens[0], tokens[1]);
1557  }
1558  return kEmptyStr;
1559 }
1560 
1561 string CMacroEditorContext::x_GetAsnPathToRnaFieldName(const string& rna_type, const string& field)
1562 {
1563  string path;
1564 
1565  if (field == "comment" || field == "note") {
1566  path = "comment";
1567  }
1568 
1569  if (field == "product") {
1570  if (NStr::EqualNocase(rna_type, "mRNA") ||
1571  NStr::EqualNocase(rna_type, "rRNA") ||
1572  NStr::EqualNocase(rna_type, "preRNA")) {
1573  path = "data.rna.ext.name";
1574  }
1575  else if (NStr::EqualNocase(rna_type, "miscRNA") ||
1576  NStr::EqualNocase(rna_type, "ncRNA") ||
1577  NStr::EqualNocase(rna_type, "tmRNA")) {
1578  path = "data.rna.ext.gen.product";
1579  }
1580  else if (NStr::EqualNocase(rna_type, "tRNA") ||
1581  NStr::EqualNocase(rna_type, "any")) {
1582  path = rna_type + "::" + field;
1583  }
1584  }
1585 
1586  if (NStr::EqualNocase(field, "ncRNA class") && NStr::EqualNocase(rna_type, "ncRNA")) {
1587  path = "data.rna.ext.gen.class";
1588  }
1589 
1590  if (macro::NMacroUtil::StringsAreEquivalent(field, "codons-recognized")) {
1591  if (NStr::EqualNocase(rna_type, "tRNA")) {
1592  // only to remove
1593  path = "data.rna.ext.tRNA.codon";
1594  }
1595  /*else {
1596  path = "qual,codons_recognized";
1597  }
1598  */
1599  }
1600 
1601  if (NStr::EqualNocase(field, "tag-peptide")) {
1602  if (NStr::EqualNocase(rna_type, "tmRNA")) {
1603  path = "data.rna.ext.gen.quals,tag_peptide";
1604  }
1605  else {
1606  path = "qual,tag_peptide";
1607  }
1608  }
1609 
1610  if (NStr::EqualNocase(field, "anticodon")) {
1611  if (NStr::EqualNocase(rna_type, "tRNA")) {
1612  // only to remove
1613  path = "data.rna.ext.tRNA.anticodon";
1614  }
1615  else {
1616  path = "qual," + field;
1617  }
1618  }
1619 
1620  return path;
1621 }
1622 
1624 {
1625  if (NStr::StartsWith(m_MacroField, "gene,", NStr::eNocase)) {
1626  return x_GetGUIFieldForGene();
1627  }
1628 
1629  size_t pos = m_MacroField.find(",");
1630  if (pos != NPOS) {
1631  string tmp = m_MacroField.substr(0, pos);
1632  if (NStr::EndsWith(tmp, "RNA", NStr::eNocase) || tmp == "any") {
1633  m_MacroField = m_MacroField.substr(pos + 1, NPOS);
1634  }
1635  }
1636 
1637  if (m_MacroField == "product" ||
1638  m_MacroField == "any::product" ||
1639  m_MacroField == "tRNA::product" ||
1640  m_MacroField == "data.rna.ext.name" ||
1641  m_MacroField == "data.rna.ext.gen.product")
1642  return "product";
1643  else if (m_MacroField == "comment")
1644  return m_MacroField;
1645  else if (m_MacroField == "tag_peptide" || m_MacroField == "qual,tag_peptide" || m_MacroField == "data.rna.ext.gen.quals,tag_peptide")
1646  return "tag-peptide";
1647  else if (m_MacroField == "anticodon" || m_MacroField == "qual,anticodon" || m_MacroField == "data.rna.ext.tRNA.anticodon")
1648  return "anticodon";
1649  else if (m_MacroField == "codons recognized" || m_MacroField == "data.rna.ext.tRNA.codon")
1650  return "codons recognized";
1651  else if (m_MacroField == "data.rna.ext.gen.class")
1652  return "ncRNA class";
1653 
1654  return kEmptyStr;
1655 }
1656 
1657 static string s_MapStrandValues(const string& value)
1658 {
1659  static const CMacroEditorContext::TBimapStrings::relation strand_bm[] = {
1660  {"na", "nucleotide"},
1661  {"aa", "protein"},
1662  {"ds", "double"},
1663  {"ss", "single"},
1664  {"pre-RNA", "precursor RNA"} };
1665  CMacroEditorContext::TBimapStrings const data{ begin(strand_bm), end(strand_bm) };
1667 }
1668 
1669 string CMacroEditorContext::GetAsnMolinfoValue(const string& molinfo_field, const string& choice)
1670 {
1671  // handle outliers first:
1672  string ret = s_MapStrandValues(choice);
1673  if (!ret.empty())
1674  return ret;
1675 
1676 
1678  if (choice == " ") {
1679  switch (field_type) {
1684  return "not-set";
1688  return "unknown";
1689  default:
1690  return kEmptyStr;
1691  }
1692  }
1693 
1694  if (field_type == CMolInfoFieldType::e_Repr) {
1695  if (choice == "consensus")
1696  return "consen";
1697  else if (choice == "constructed")
1698  return "const";
1699  else if (choice == "ordered map")
1700  return "map";
1701  else if (choice == "reference to another")
1702  return "ref";
1703  else if (choice == "segmented")
1704  return "seg";
1705  }
1706 
1707 
1709  switch (field_type) {
1711  values = CSeq_inst::ENUM_METHOD_NAME(EMol)()->GetValues();
1712  break;
1714  values = CMolInfo::ENUM_METHOD_NAME(ECompleteness)()->GetValues();
1715  break;
1717  values = CMolInfo::ENUM_METHOD_NAME(EBiomol)()->GetValues();
1718  break;
1720  values = CSeq_inst::ENUM_METHOD_NAME(EStrand)()->GetValues();
1721  break;
1723  values = CMolInfo::ENUM_METHOD_NAME(ETech)()->GetValues();
1724  break;
1726  values = CSeq_inst::ENUM_METHOD_NAME(ETopology)()->GetValues();
1727  break;
1729  values = CSeq_inst::ENUM_METHOD_NAME(ERepr)()->GetValues();
1730  default:
1731  break;
1732  }
1733 
1734  auto it = find_if(values.begin(), values.end(), [&choice](const pair<string, TEnumValueType>& elem)
1735  { return edit::CFieldHandler::QualifierNamesAreEquivalent(elem.first, choice); });
1736 
1737  if (it != values.end()) {
1738  return it->first;
1739  }
1740 
1741  LOG_POST(Error << choice << " could not be mapped onto an enum value");
1742  return kEmptyStr;
1743 }
1744 
1745 string CMacroEditorContext::GetGUIMolinfoValue(const string& macro_field, const string& molinfo_field)
1746 {
1747  string ret = s_MapStrandValues(macro_field);
1748  if (!ret.empty())
1749  return ret;
1750 
1751  if (macro_field == "not-set" || macro_field == "unknown") {
1752  // pay attention as this is not an empty string
1753  return " ";
1754  }
1755 
1757  CEnumeratedTypeValues::TNameToValue name_to_values;
1758  switch (field_type) {
1760  name_to_values = CSeq_inst::ENUM_METHOD_NAME(EMol)()->NameToValue();
1761  break;
1763  name_to_values = CMolInfo::ENUM_METHOD_NAME(ECompleteness)()->NameToValue();
1764  break;
1766  name_to_values = CMolInfo::ENUM_METHOD_NAME(EBiomol)()->NameToValue();
1767  break;
1769  name_to_values = CSeq_inst::ENUM_METHOD_NAME(EStrand)()->NameToValue();
1770  break;
1772  name_to_values = CMolInfo::ENUM_METHOD_NAME(ETech)()->NameToValue();
1773  break;
1775  name_to_values = CSeq_inst::ENUM_METHOD_NAME(ETopology)()->NameToValue();
1776  break;
1778  name_to_values = CSeq_inst::ENUM_METHOD_NAME(ERepr)()->NameToValue();
1779  default:
1780  break;
1781  }
1782 
1783  auto it = name_to_values.find(macro_field);
1784  if (it != name_to_values.end()) {
1785  switch (field_type) {
1788  break;
1791  break;
1794  break;
1797  break;
1800  break;
1803  break;
1806  default:
1807  break;
1808  }
1809  }
1810  return ret;
1811 }
1812 
1813 
1815 {
1816  m_Field[0] = tolower((unsigned char)m_Field[0]);
1817  if (m_Field == "gi-set" || m_Field == "gibb-set") {
1818  m_Field = m_Field.substr(0, m_Field.find("-"));
1819  }
1820  return m_Field;
1821 }
1822 
1823 
1824 
1825 
User-defined methods of the data storage class.
const_iterator end() const
const_iterator begin() const
static CRef< CConvertFeatureBase > Create(objects::CSeqFeatData::ESubtype subtype_from, objects::CSeqFeatData::ESubtype subtype_to)
virtual string GetDescription()
static vector< string > GetFieldNames()
CFeatListItem - basic configuration data for one "feature" type.
int GetSubtype() const
string GetDescription() const
int GetType() const
static wxArrayString s_FillFeatQualList(objects::CSeqFeatData::ESubtype subtype)
static bool s_IsRarelyUsedOrDiscouragedFeatureType(int subtype)
static bool s_IsGeneQualifier(const string &field)
vector< string > m_GeneKeywords
vector< string > m_RnaKeywords
vector< string > m_DescrKeywords
string GetGUIFieldName(const string &macro_field, const string &target, EMacroFieldType &type, EMSection clause=EMSection::eDoSection)
vector< string > x_GetProteinFieldnames() const
boost::bimap< boost::bimaps::set_of< string, PNocase >, boost::bimaps::set_of< string, PNocase > > TBimapStrings
vector< string > m_EmptyKeywords
static string s_GetGUIMappedMiscDescr(const string &value)
vector< string > GetFeatureTypes(bool for_removal=false, bool to_create=false, bool include_all=false) const
string GetGUIDateField(const string &macro_field)
string GetGUIPubField(CPubFieldType::EPubFieldType)
vector< string > m_ProteinKeywords
vector< string > x_GetStrCommFieldnames() const
vector< string > GetRNATypes() const
pair< int, int > GetFeatureType(const string &name) const
string GetAsnPathToFieldName(const string &field, EMacroFieldType type, const string &target=kEmptyStr)
vector< string > m_DBLinkKeywords
string x_GetAsnPathToMolinfo(const string &target)
string x_GetAsnPathToMiscDescr(const string &target)
vector< string > m_FeatQualKeywords
static string s_GetGUIMappedStructCommField(const string &value)
vector< string > m_SourceTaxKeys
vector< string > m_Featuretypes
string GetGUIMolinfoValue(const string &macro_field, const string &molinfo_field)
vector< string > m_BsrcAutodefWords
vector< string > m_MiscKeywordsToRmv
const vector< string > & GetKeywords(EMacroFieldType type) const
vector< string > GetncRNAClassTypes(bool include_any=true) const
vector< string > m_SourceKeywords
bool x_GetGUIFieldForDBLink(string &field_name, EMacroFieldType &type)
vector< string > m_SourceTextKeys
bool x_GetGUIFieldForCdsGeneProtRna(string &field_name, EMacroFieldType &type)
string GetConversionDescr(const string &from_feat, const string &to_feat)
string GetGUIAuthorName(const string &author_part)
string GetGUIFeatQualifier(const string &macro_field)
string GetGUIAffilField(const string &macro_field)
vector< string > x_GetBsrcTextFieldnames(bool complete=false) const
vector< string > x_GetDescriptorFieldnames() const
vector< string > GetFeatQualifiers() const
map< string, pair< int, int > > m_FeatureMap
bool x_GetGUIFieldForBsrcText(string &field_name, EMacroFieldType &type)
vector< string > m_PubdescKeywords
vector< string > m_MiscKeywords
vector< string > x_GetBsrcTaxFieldnames() const
string x_GetAsnPathToDBLinkField(const string &target)
bool x_GetGUIFieldForPubdesc(string &field_name, EMacroFieldType &type)
vector< string > m_CdsGeneProtKeywords
bool x_GetGUIFieldForMiscDescr(string &field_name, EMacroFieldType &type)
bool x_IsDescriptor(string &field_name, EMacroFieldType &type)
vector< string > x_GetAllBsrcFieldnames() const
string GetAsnPathToFeature(const string &feat_name) const
vector< string > m_MolinfoKeywords
bool x_GetGUIFieldForStructComm(string &field_name, EMacroFieldType &type, EMSection clause=EMSection::eDoSection)
bool x_GetGUIFieldForMolinfo(string &field_name, EMacroFieldType &type)
string GetAsnPathToAuthorFieldName(const string &author_name)
static string FindInBimapOrEmpty(const string &type, const TBimapStrings &str_bimap)
string GetGUIDescriptor(const string &macro_field)
string GetAsnMolinfoValue(const string &molinfo_field, const string &choice)
vector< string > GetBondTypes() const
vector< string > GetRNAProductnames(const string &rna_type) const
bool x_GetGUIFieldForBsrcTax(string &field_name, EMacroFieldType &type)
vector< string > GetLegalQualifiers(const string &feat_name) const
vector< string > x_GetSetClassFieldnames(bool all=true) const
vector< string > GetFieldNames(EMacroFieldType type) const
vector< string > GetSiteTypes() const
string x_GetAsnPathToRnaFieldName(const string &rna_type, const string &field)
static string GetBiomolLabel(objects::CMolInfo::TBiomol biomol)
static vector< string > GetFieldNames()
static string GetTechLabel(objects::CMolInfo::TTech tech)
static EMolInfoFieldType GetFieldType(const string &field_name)
static string GetReprLabel(objects::CSeq_inst::TRepr val)
static string GetStrandLabel(objects::CSeq_inst::TStrand val)
static string GetMolLabel(objects::CSeq_inst::TMol val)
static string GetTopologyLabel(objects::CSeq_inst::TTopology tech)
static vector< string > GetChoicesForField(CMolInfoFieldType::EMolInfoFieldType field_type, bool &allow_other)
static string GetCompletenessLabel(objects::CMolInfo::TCompleteness tech)
@ eVocabulary_insdc
Definition: OrgMod.hpp:69
static bool IsValidSubtypeName(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:86
static string GetSubtypeName(TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
Definition: OrgMod.cpp:108
@ ePubFieldType_AuthorLastName
@ ePubFieldType_AuthorFirstName
@ ePubFieldType_AuthorConsortium
@ ePubFieldType_AuthorMiddleInitial
@ ePubFieldType_AffilPostalCode
static EPubFieldType GetTypeForLabel(string label)
static string NormalizePubFieldName(string orig_label)
static string GetLabelForType(EPubFieldType field_type)
static vector< string > GetFieldNames(bool extended=false)
static vector< string > GetChoicesForField(EPubFieldType field_type, bool &allow_other)
static string GetMacroLabelForType(EPubFieldType field_type)
static vector< string > GetRNAFields()
static vector< string > GetRNATypes()
static vector< string > GetncRNAClassList()
Definition: RNA_gen.cpp:86
static const CSiteList * GetSiteList()
const TLegalQualifiers & GetLegalQualifiers(void) const
Get a list of all the legal qualifiers for the feature.
static E_Choice GetTypeFromSubtype(ESubtype subtype)
@ eSubtype_transit_peptide_aa
static EQualifier GetQualifierType(CTempString qual)
convert qual string to enumerated value
static const CBondList * GetBondList()
static CTempString GetQualifierAsString(EQualifier qual)
Convert a qualifier from an enumerated value to a string representation or empty if not found.
static ESubtype SubtypeNameToValue(CTempString sName)
Turn a string into its ESubtype which is NOT necessarily related to the identifier of the enum.
static CTempString SubtypeValueToName(ESubtype eSubtype)
Turns a ESubtype into its string value which is NOT necessarily related to the identifier of the enum...
static bool IsRegulatory(ESubtype subtype)
CSeq_entry_Handle –.
const_iterator begin() const
const_iterator end() const
static vector< string > s_GetSourceLocationOptions()
static vector< string > s_GetSourceOriginOptions()
static bool IsValidSubtypeName(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: SubSource.cpp:157
@ eVocabulary_insdc
Definition: SubSource.hpp:83
static string GetSubtypeName(CSubSource::TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
Definition: SubSource.cpp:185
static bool IsDiscouraged(const TSubtype subtype)
Definition: SubSource.cpp:247
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
const char * kDefinitionLineLabel
const char * kOrgModNote
const char * kFwdPrimerSeq
const char * kFwdPrimerName
const char * kDbXref
const char * kSubSourceNote
const char * kNatHost
const char * kCommentDescriptorLabel
const char * kHost
const char * kRevPrimerName
const char * kGenbankBlockKeyword
const char * kRevPrimerSeq
static const struct name_t names[]
static const struct type types[]
Definition: type.c:22
static char tmp[3200]
Definition: utf8.c:42
char data[12]
Definition: iconv.c:80
EStrand
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
list< pair< string, TEnumValueType > > TValues
Definition: enumvalues.hpp:54
bool StringsAreEquivalent(const string &name1, const string &name2)
Definition: macro_util.cpp:397
const char * kColl_suffix
Definition: macro_util.cpp:933
objects::CSeqFeatData::ESubtype GetFeatSubtype(const string &feat_type)
Definition: macro_util.cpp:350
const char * kStrCommFieldValue
Definition: macro_util.cpp:940
bool IsStructVoucherPart(const string &field)
Definition: macro_util.cpp:826
const char * kStrCommFieldName
Definition: macro_util.cpp:942
const char * kInst_suffix
Definition: macro_util.cpp:932
bool IsSatelliteSubfield(const string &field)
Definition: macro_util.cpp:903
bool IsMobileElementTSubfield(const string &field)
Definition: macro_util.cpp:908
const char * kSpecid_suffix
Definition: macro_util.cpp:934
const char * kStrCommDbname
Definition: macro_util.cpp:941
vector< const objects::CFeatListItem * > GetSortedFeatList(objects::CSeq_entry_Handle seh, size_t max=numeric_limits< size_t >::max())
#define ENUM_METHOD_NAME(EnumName)
Definition: serialbase.hpp:994
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3405
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
Definition: OrgMod_.hpp:125
@ eSubtype_specimen_voucher
Definition: OrgMod_.hpp:106
@ eSubtype_bio_material
Definition: OrgMod_.hpp:119
@ eSubtype_culture_collection
Definition: OrgMod_.hpp:118
E_Choice
Choice variants.
@ eClass_pop_set
population study
@ eClass_phy_set
phylogenetic study
@ eClass_wgs_set
whole genome shotgun project
@ eClass_mut_set
set of mutations
@ eClass_eco_set
ecological sample study
@ eClass_genbank
converted genbank
@ eClass_small_genome_set
viral segments or mitochondrial minicircles
ERepr
representation class
Definition: Seq_inst_.hpp:91
ETopology
topology of molecule
Definition: Seq_inst_.hpp:121
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
EStrand
strandedness in living organism
Definition: Seq_inst_.hpp:133
static string SelectionName(E_Choice index)
Retrieve selection name (for diagnostic purposes).
Definition: Seqdesc_.cpp:218
@ e_Embl
EMBL specific information.
Definition: Seqdesc_.hpp:127
@ e_Het
cofactor, etc associated but not bound
Definition: Seqdesc_.hpp:132
@ e_Num
a numbering system
Definition: Seqdesc_.hpp:118
@ e_Update_date
date of last update
Definition: Seqdesc_.hpp:129
@ e_Pir
PIR specific info.
Definition: Seqdesc_.hpp:120
@ e_Genbank
GenBank specific info.
Definition: Seqdesc_.hpp:121
@ e_Prf
PRF specific information.
Definition: Seqdesc_.hpp:130
@ e_Sp
SWISSPROT specific info.
Definition: Seqdesc_.hpp:125
@ e_Dbxref
xref to other databases
Definition: Seqdesc_.hpp:126
@ e_Comment
a more extensive comment
Definition: Seqdesc_.hpp:117
@ e_Region
overall region (globin locus)
Definition: Seqdesc_.hpp:123
@ e_Maploc
map location of this sequence
Definition: Seqdesc_.hpp:119
@ e_Create_date
date entry first created/released
Definition: Seqdesc_.hpp:128
@ e_Title
a title for this sequence
Definition: Seqdesc_.hpp:115
@ e_Pdb
PDB specific information.
Definition: Seqdesc_.hpp:131
@ e_Name
a name for this sequence
Definition: Seqdesc_.hpp:114
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
int i
USING_SCOPE(objects)
static string s_MapStrandValues(const string &value)
static string s_GetGeneField(const string &value)
static string s_GetProteinField(const string &value)
constexpr auto sort(_Init &&init)
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
int tolower(Uchar c)
Definition: ncbictype.hpp:72
int toupper(Uchar c)
Definition: ncbictype.hpp:73
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
bool QualifierNamesAreEquivalent(string name1, string name2)
Definition: type.c:6
void FromArrayString(const wxArrayString &in, vector< string > &out)
Definition: wx_utils.cpp:343
Modified on Wed Apr 17 13:08:09 2024 by modify_doxy.py rev. 669887