NCBI C++ ToolKit
so_map.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: so_map.cpp 91102 2020-09-04 15:05:02Z ludwigf $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Frank Ludwig
27  *
28  * File Description: Sequence Ontology Type Mapping
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
34 #include <objects/seq/so_map.hpp>
42 
45 
46 // ----------------------------------------------------------------------------
48  const string& lhs,
49  const string& rhs) const
50 // ----------------------------------------------------------------------------
51 {
52  string::const_iterator pLhs = lhs.begin();
53  string::const_iterator pRhs = rhs.begin();
54  while (pLhs != lhs.end() && pRhs != rhs.end() &&
55  tolower(*pLhs) == tolower(*pRhs)) {
56  ++pLhs;
57  ++pRhs;
58  }
59  if (pLhs == lhs.end()) {
60  return (pRhs != rhs.end());
61  }
62  if (pRhs == rhs.end()) {
63  return false;
64  }
65  return (tolower(*pLhs) < tolower(*pRhs));
66 }
67 
68 // ----------------------------------------------------------------------------
70  const CSeq_feat& feature,
71  const string& qualName)
72 // ----------------------------------------------------------------------------
73 {
74  string namedQual;
75  const auto& quals = feature.GetQual();
76  for (const auto& qual: quals) {
77  if (!qual->CanGetQual() || !qual->CanGetVal()) {
78  continue;
79  }
80  if (qual->GetQual() != qualName) {
81  continue;
82  }
83  if (namedQual.empty()) {
84  namedQual = qual->GetVal();
85  continue;
86  }
87  if (namedQual != qual->GetVal()) {
88  return "";
89  }
90  }
91  return namedQual;
92 }
93 
94 // ----------------------------------------------------------------------------
96 // ----------------------------------------------------------------------------
97 
98 // ----------------------------------------------------------------------------
100 // ----------------------------------------------------------------------------
101  {"SO:0000001", "region"},
102  {"SO:0000002", "sequece_secondary_structure"},
103  {"SO:0000005", "satellite_DNA"},
104  {"SO:0000013", "scRNA"},
105  {"SO:0000035", "riboswitch"},
106  {"SO:0000036", "matrix_attachment_site"},
107  {"SO:0000037", "locus_control_region"},
108  {"SO:0000104", "polypeptide"},
109  {"SO:0000110", "sequence_feature"},
110  {"SO:0000139", "ribosome_entry_site"},
111  {"SO:0000140", "attenuator"},
112  {"SO:0000141", "terminator"},
113  {"SO:0000147", "exon"},
114  {"SO:0000165", "enhancer"},
115  {"SO:0000167", "promoter"},
116  {"SO:0000172", "CAAT_signal"},
117  {"SO:0000173", "GC_rich_promoter_region"},
118  {"SO:0000174", "TATA_box"},
119  {"SO:0000175", "minus_10_signal"},
120  {"SO:0000176", "minus_35_signal"},
121  {"SO:0000178", "operon"},
122  {"SO:0000185", "primary_transcript"},
123  {"SO:0000188", "intron"},
124  {"SO:0000204", "five_prime_UTR"},
125  {"SO:0000205", "three_prime_UTR"},
126  {"SO:0000234", "mRNA"},
127  {"SO:0000252", "rRNA"},
128  {"SO:0000253", "tRNA"},
129  {"SO:0000274", "snRNA"},
130  {"SO:0000275", "snoRNA"},
131  {"SO:0000276", "miRNA"},
132  {"SO:0000286", "long_terminal_repeat"},
133  {"SO:0000289", "microsatellite"},
134  {"SO:0000294", "inverted_repeat"},
135  {"SO:0000296", "origin_of_replication"},
136  {"SO:0000297", "D_loop"},
137  {"SO:0000298", "recombination_feature"},
138  {"SO:0000305", "modified_DNA_base"},
139  {"SO:0000313", "stem_loop"},
140  {"SO:0000314", "direct_repeat"},
141  {"SO:0000315", "TSS"},
142  {"SO:0000316", "CDS"},
143  {"SO:0000330", "conserved_region"},
144  {"SO:0000331", "STS"},
145  {"SO:0000336", "pseudogene"},
146  {"SO:0000374", "ribozyme"},
147  {"SO:0000380", "hammerhead_ribozyme"},
148  {"SO:0000385", "RNase_MRP_RNA"},
149  {"SO:0000386", "RNase_P_RNA"},
150  {"SO:0000404", "vault_RNA"},
151  {"SO:0000405", "Y_RNA"},
152  {"SO:0000409", "binding_site"},
153  {"SO:0000410", "protein_binding_site"},
154  {"SO:0000413", "sequence_difference"},
155  {"SO:0000418", "signal_peptide"},
156  {"SO:0000419", "mature_protein_region"},
157  {"SO:0000433", "non_LTR_retrotransposon_polymeric_tract"},
158  {"SO:0000454", "rasiRNA"},
159  {"SO:0000458", "D_gene_segment"},
160  {"SO:0000466", "V_gene_segment"},
161  {"SO:0000470", "J_gene_segment"},
162  {"SO:0000478", "C_gene_segment"},
163  {"SO:0000507", "pseudogenic_exon"},
164  {"SO:0000516", "pseudogenic_transcript"},
165  {"SO:0000551", "polyA_signal_sequence"},
166  {"SO:0000553", "polyA_site"},
167  {"SO:0000577", "centromere"},
168  {"SO:0000584", "tmRNA"},
169  {"SO:0000588", "autocatalytically_spliced_intron"},
170  {"SO:0000590", "SRP_RNA"},
171  {"SO:0000602", "guide_RNA"},
172  {"SO:0000624", "telomere"},
173  {"SO:0000625", "silencer"},
174  {"SO:0000627", "insulator"},
175  {"SO:0000644", "antisense_RNA"},
176  {"SO:0000646", "siRNA"},
177  {"SO:0000655", "ncRNA"},
178  {"SO:0000657", "repeat_region"},
179  {"SO:0000658", "dispersed_repeat"},
180  {"SO:0000673", "transcript"},
181  {"SO:0000685", "DNAsel_hypersensitive_site"},
182  {"SO:0000704", "gene"},
183  {"SO:0000705", "tandem_repeat"},
184  {"SO:0000714", "nucleotide_motif"},
185  {"SO:0000723", "iDNA"},
186  {"SO:0000724", "oriT"},
187  {"SO:0000725", "transit_peptide"},
188  {"SO:0000730", "gap"},
189  {"SO:0000777", "pseudogenic_rRNA"},
190  {"SO:0000778", "pseudogenic_tRNA"},
191  {"SO:0001021", "chromosome_preakpoint"},
192  {"SO:0001035", "piRNA"},
193  {"SO:0001037", "mobile_genetic_element"},
194  {"SO:0001055", "transcriptional_cis_regulatory_region"},
195  {"SO:0001059", "sequence_alteration"},
196  {"SO:0001062", "propeptide"},
197  {"SO:0001086", "sequence_uncertainty"},
198  {"SO:0001087", "cross_link"},
199  {"SO:0001088", "disulfide_bond"},
200  {"SO:0001268", "recoding_stimulatory_region"},
201  {"SO:0001411", "biological_region"},
202  {"SO:0001484", "X_element_combinatorical_repeat"},
203  {"SO:0001485", "Y_prime_element"},
204  {"SO:0001496", "telomeric_repeat"},
205  {"SO:0001649", "nested_repeat"},
206  {"SO:0001682", "replication_regulatory_region"},
207  {"SO:0001720", "epigenetically_modified_region"},
208  {"SO:0001797", "centromeric_repeat"},
209  {"SO:0001833", "V_region"},
210  {"SO:0001835", "N_region"},
211  {"SO:0001836", "S_region"},
212  {"SO:0001877", "lnc_RNA"},
213  {"SO:0001917", "CAGE_cluster"},
214  {"SO:0002020", "boundary_element"},
215  {"SO:0002072", "sequence_comparison"},
216  {"SO:0002087", "pseudogenic_CDS"},
217  {"SO:0002094", "non_allelic_homologous_recombination_region"},
218  {"SO:0002154", "mitotic_recombination_region"},
219  {"SO:0002155", "meiotic_recombination_region"},
220  {"SO:0002190", "enhancer_blocking_element"},
221  {"SO:0002191", "imprinting_control_region"},
222  {"SO:0002205", "response_element"},
223  {"SO:0005836", "regulatory_region"},
224  {"SO:0005850", "primary_binding_site"},
225 
226  {"SO:0000000", ""},
227  //{"SO:UNKNOWN", "replication_start_site"},
228  //{"SO:UNKNOWN", "nucleotide_site"},
229  //{"SO:UNKNOWN", "nucleotide_cleavage_site"},
230  //{"SO:UNKNOWN", "repeat_instability_region"},
231 };
232 
233 // ----------------------------------------------------------------------------
235 // ----------------------------------------------------------------------------
236  {"CAGE_cluster", CSoMap::xFeatureMakeMiscFeature},
237  {"CAAT_signal", CSoMap::xFeatureMakeRegulatory},
238  {"CDS", CSoMap::xFeatureMakeCds},
239  {"C_gene_segment", CSoMap::xFeatureMakeImp},
240  {"DNAsel_hypersensitive_site", CSoMap::xFeatureMakeRegulatory},
241  {"D_loop", CSoMap::xFeatureMakeImp},
242  {"D_gene_segment", CSoMap::xFeatureMakeImp},
243  {"GC_rich_promoter_region", CSoMap::xFeatureMakeRegulatory},
244  {"J_gene_segment", CSoMap::xFeatureMakeImp},
245  {"N_region", CSoMap::xFeatureMakeImp},
246  {"RNase_MRP_RNA", CSoMap::xFeatureMakeNcRna},
247  {"RNase_P_RNA", CSoMap::xFeatureMakeNcRna},
248  {"SRP_RNA", CSoMap::xFeatureMakeNcRna},
249  {"STS", CSoMap::xFeatureMakeImp},
250  {"S_region", CSoMap::xFeatureMakeImp},
251  {"TATA_box", CSoMap::xFeatureMakeRegulatory},
252  {"V_gene_segment", CSoMap::xFeatureMakeImp},
253  {"V_region", CSoMap::xFeatureMakeImp},
254  {"X_element_combinatorical_repeat", CSoMap::xFeatureMakeRepeatRegion},
255  {"Y_RNA", CSoMap::xFeatureMakeNcRna},
256  {"Y_prime_element", CSoMap::xFeatureMakeRepeatRegion},
257  {"antisense_RNA", CSoMap::xFeatureMakeNcRna},
258  {"attenuator", CSoMap::xFeatureMakeRegulatory},
259  {"autocatalytically_spliced_intron", CSoMap::xFeatureMakeNcRna},
260  {"binding_site", CSoMap::xFeatureMakeImp},
261  {"biological_region", CSoMap::xFeatureMakeRegion},
262  {"boundary_element", CSoMap::xFeatureMakeRegulatory},
263  {"centromere", CSoMap::xFeatureMakeImp},
264  {"centromeric_repeat", CSoMap::xFeatureMakeRepeatRegion},
265  {"chromosome_breakpoint", CSoMap::xFeatureMakeMiscRecomb},
266  {"conserved_region", CSoMap::xFeatureMakeMiscFeature},
267  {"direct_repeat", CSoMap::xFeatureMakeRepeatRegion},
268  {"dispersed_repeat", CSoMap::xFeatureMakeRepeatRegion},
269  {"enhancer", CSoMap::xFeatureMakeRegulatory},
270  {"enhancer_blocking_element", CSoMap::xFeatureMakeRegulatory},
271  {"epigenetically_modified_region", CSoMap::xFeatureMakeRegulatory},
272  {"exon", CSoMap::xFeatureMakeImp},
273  {"five_prime_UTR", CSoMap::xFeatureMakeImp},
274  {"gap", CSoMap::xFeatureMakeImp},
275  {"gene", CSoMap::xFeatureMakeGene},
276  {"guide_RNA", CSoMap::xFeatureMakeNcRna},
277  {"hammerhead_ribozyme", CSoMap::xFeatureMakeNcRna},
278  {"iDNA", CSoMap::xFeatureMakeImp},
279  {"immature_peptide_region", CSoMap::xFeatureMakeProt},
280  {"imprinting_control_region", CSoMap::xFeatureMakeRegulatory},
281  {"insulator", CSoMap::xFeatureMakeRegulatory},
282  {"intron", CSoMap::xFeatureMakeImp},
283  {"inverted_repeat", CSoMap::xFeatureMakeRepeatRegion},
284  {"lnc_RNA", CSoMap::xFeatureMakeNcRna},
285  {"locus_control_region", CSoMap::xFeatureMakeRegulatory},
286  {"long_terminal_repeat", CSoMap::xFeatureMakeRepeatRegion},
287  {"mRNA", CSoMap::xFeatureMakeRna},
288  {"matrix_attachment_region", CSoMap::xFeatureMakeRegulatory},
289  {"mature_protein_region", CSoMap::xFeatureMakeImp},
290  {"meiotic_recombination_region", CSoMap::xFeatureMakeMiscRecomb},
291  {"miRNA", CSoMap::xFeatureMakeNcRna},
292  {"microsatellite", CSoMap::xFeatureMakeRepeatRegion},
293  {"minisatellite", CSoMap::xFeatureMakeRepeatRegion},
294  {"minus_10_signal", CSoMap::xFeatureMakeRegulatory},
295  {"minus_35_signal", CSoMap::xFeatureMakeRegulatory},
296  {"mitotic_recombination_region", CSoMap::xFeatureMakeMiscRecomb},
297  {"mobile_genetic_element", CSoMap::xFeatureMakeImp},
298  {"modified_DNA_base", CSoMap::xFeatureMakeImp},
299  {"ncRNA", CSoMap::xFeatureMakeNcRna},
300  {"nested_repeat", CSoMap::xFeatureMakeRepeatRegion},
301  {"non_allelic_homologous_recombination", CSoMap::xFeatureMakeMiscRecomb},
302  {"non_LTR_retrotransposon_polymeric_tract", CSoMap::xFeatureMakeRepeatRegion},
303  {"nucleotide_motif", CSoMap::xFeatureMakeMiscFeature},
304  {"nucleotide_cleavage_site", CSoMap::xFeatureMakeMiscFeature},
305  {"nucleotide_site", CSoMap::xFeatureMakeMiscFeature},
306  {"operon", CSoMap::xFeatureMakeImp},
307  {"oriT", CSoMap::xFeatureMakeImp},
308  {"origin_of_replication", CSoMap::xFeatureMakeImp},
309  {"piRNA", CSoMap::xFeatureMakeNcRna},
310  {"polyA_signal_sequence", CSoMap::xFeatureMakeRegulatory},
311  {"polyA_site", CSoMap::xFeatureMakeImp},
312  {"primary_transcript", CSoMap::xFeatureMakeImp},
313  {"primer_binding_site", CSoMap::xFeatureMakeImp},
314  {"promoter", CSoMap::xFeatureMakeRegulatory},
315  {"protein_binding_site", CSoMap::xFeatureMakeImp},
316  {"pseudogene", CSoMap::xFeatureMakeGene},
317  {"pseudogenic_exon", CSoMap::xFeatureMakeImp},
318  {"pseudogenic_CDS", CSoMap::xFeatureMakeCds},
319  {"pseudogenic_rRNA", CSoMap::xFeatureMakeRna},
320  {"pseudogenic_tRNA", CSoMap::xFeatureMakeRna},
321  {"pseudogenic_transcript", CSoMap::xFeatureMakeMiscRna},
322  {"rRNA", CSoMap::xFeatureMakeRna},
323  {"rasiRNA", CSoMap::xFeatureMakeNcRna},
324  {"recoding_stimulatory_region", CSoMap::xFeatureMakeRegulatory},
325  {"recombination_feature", CSoMap::xFeatureMakeMiscRecomb},
326  {"region", CSoMap::xFeatureMakeImp},
327  {"regulatory_region", CSoMap::xFeatureMakeRegulatory},
328  {"repeat_instability_region", CSoMap::xFeatureMakeMiscFeature},
329  {"repeat_region", CSoMap::xFeatureMakeRepeatRegion},
330  {"replication_regulatory_region", CSoMap::xFeatureMakeRegulatory},
331  {"replication_start_site", CSoMap::xFeatureMakeMiscFeature},
332  {"response_element", CSoMap::xFeatureMakeRegulatory},
333  {"ribosome_entry_site", CSoMap::xFeatureMakeRegulatory},
334  {"riboswitch", CSoMap::xFeatureMakeRegulatory},
335  {"ribozyme", CSoMap::xFeatureMakeNcRna},
336  {"satellite_DNA", CSoMap::xFeatureMakeRepeatRegion},
337  {"scRNA", CSoMap::xFeatureMakeNcRna},
338  {"sequence_alteration", CSoMap::xFeatureMakeImp},
339  {"sequence_comparison", CSoMap::xFeatureMakeMiscFeature},
340  {"sequence_difference", CSoMap::xFeatureMakeImp},
341  {"sequence_feature", CSoMap::xFeatureMakeMiscFeature},
342  {"sequence_secondary_structure", CSoMap::xFeatureMakeImp},
343  {"sequence_uncertainty", CSoMap::xFeatureMakeImp},
344  {"siRNA", CSoMap::xFeatureMakeNcRna},
345  {"signal_peptide", CSoMap::xFeatureMakeImp},
346  {"silencer", CSoMap::xFeatureMakeRegulatory},
347  {"snRNA", CSoMap::xFeatureMakeNcRna},
348  {"snoRNA", CSoMap::xFeatureMakeNcRna},
349  {"stem_loop", CSoMap::xFeatureMakeImp},
350  {"tRNA", CSoMap::xFeatureMakeRna},
351  {"tandem_repeat", CSoMap::xFeatureMakeRepeatRegion},
352  {"telomerase_RNA", CSoMap::xFeatureMakeNcRna},
353  {"telomere", CSoMap::xFeatureMakeImp},
354  {"telomeric_repeat", CSoMap::xFeatureMakeRepeatRegion},
355  {"terminator", CSoMap::xFeatureMakeRegulatory},
356  {"tmRNA", CSoMap::xFeatureMakeRna},
357  {"transcript", CSoMap::xFeatureMakeMiscRna},
358  {"transcriptional_cis_regulatory_region", CSoMap::xFeatureMakeRegulatory},
359  {"transcription_start_site", CSoMap::xFeatureMakeMiscFeature},
360  {"transit_peptide", CSoMap::xFeatureMakeImp},
361  {"three_prime_UTR", CSoMap::xFeatureMakeImp},
362  {"vault_RNA", CSoMap::xFeatureMakeNcRna},
363 };
364 
365 
366 // ----------------------------------------------------------------------------
368  vector<string>& supported_terms)
369 // ----------------------------------------------------------------------------
370 {
371  supported_terms.clear();
372  for (auto term: CSoMap::mMapFeatFunc) {
373  supported_terms.push_back(term.first);
374  }
375  std::sort(supported_terms.begin(), supported_terms.end());
376  return true;
377 }
378 
379 // ----------------------------------------------------------------------------
381  const string& sofa_id)
382 // ----------------------------------------------------------------------------
383 {
384  TYPEENTRY type_it = CSoMap::mMapSoIdToType.find(sofa_id);
385  if (type_it == CSoMap::mMapSoIdToType.end()) {
386  return "";
387  }
388  return type_it->second;
389 }
390 
391 // ----------------------------------------------------------------------------
393  const string& so_type)
394 // ----------------------------------------------------------------------------
395 {
397  for (TYPEENTRY cit = CSoMap::mMapSoIdToType.begin();
398  cit != CSoMap::mMapSoIdToType.end();
399  ++cit) {
400  CSoMap::mMapSoTypeToId[cit->second] = cit->first;
401  }
402  }
403  TYPEENTRY id_it = mMapSoTypeToId.find(so_type);
404  if (id_it == CSoMap::mMapSoTypeToId.end()) {
405  return "";
406  }
407  return id_it->second;
408 }
409 
410 // ----------------------------------------------------------------------------
412  const string& so_type,
413  CSeq_feat& feature,
414  bool invalidToRegion)
415 // ----------------------------------------------------------------------------
416 {
417  string resolved_so_type = ResolveSoAlias(so_type);
418  FEATFUNCENTRY it = mMapFeatFunc.find(resolved_so_type);
419  if (it != mMapFeatFunc.end()) {
420  return (it->second)(resolved_so_type, feature);
421  }
422  if (invalidToRegion) {
423  return xFeatureMakeRegion(so_type, feature);
424  }
425  return false;
426 }
427 
428 // ----------------------------------------------------------------------------
430  const string& so_type,
431  CSeq_feat& feature)
432 // ----------------------------------------------------------------------------
433 {
434  feature.SetData().SetGene();
435  if (so_type == "pseudogene") {
436  feature.SetPseudo(true);
437  }
438  return true;
439 }
440 
441 // ----------------------------------------------------------------------------
443  const string& so_type,
444  CSeq_feat& feature)
445 // ----------------------------------------------------------------------------
446 {
447  static const map<string, CRNA_ref::EType, CompareNoCase> mTypeToRna = {
448  {"mRNA", CRNA_ref::eType_mRNA},
449  {"rRNA", CRNA_ref::eType_rRNA},
450  {"pseudogenic_rRNA", CRNA_ref::eType_rRNA},
451  {"tRNA", CRNA_ref::eType_tRNA},
452  {"pseudogenic_tRNA", CRNA_ref::eType_tRNA},
453  {"tmRNA", CRNA_ref::eType_tmRNA},
454  };
455  auto it = mTypeToRna.find(so_type);
456  feature.SetData().SetRna().SetType(it->second);
457  if(NStr::StartsWith(so_type, "pseudogenic_")) {
458  feature.SetPseudo(true);
459  }
460  return true;
461 }
462 
463 // ----------------------------------------------------------------------------
465  const string& so_type,
466  CSeq_feat& feature)
467 // ----------------------------------------------------------------------------
468 {
469  static const TYPEMAP mTypeToClass = {
470  {"ncRNA", "other"},
471  };
472  feature.SetData().SetRna().SetType(CRNA_ref::eType_ncRNA);
473  auto normalizedType = so_type;
474  auto it = mTypeToClass.find(so_type);
475  if (it != mTypeToClass.end()) {
476  normalizedType = it->second;
477  }
478  feature.SetData().SetRna().SetExt().SetGen().SetClass(normalizedType);
479  return true;
480 }
481 
482 // ----------------------------------------------------------------------------
484  const string& so_type,
485  CSeq_feat& feature)
486 // ----------------------------------------------------------------------------
487 {
488  feature.SetData().SetCdregion();
489  if (so_type=="pseudogenic_CDS") {
490  feature.SetPseudo(true);
491  }
492  return true;
493 }
494 
495 // ----------------------------------------------------------------------------
497  const string& so_type,
498  CSeq_feat& feature)
499 // ----------------------------------------------------------------------------
500 {
502  mTypeToProcessed = {
503  {"mature_protein_region", CProt_ref::eProcessed_mature},
504  {"immature_peptide_region", CProt_ref::eProcessed_preprotein},
505  };
506  auto cit = mTypeToProcessed.find(so_type);
507  if (cit == mTypeToProcessed.end()) {
508  return false;
509  }
510  feature.SetData().SetProt().SetProcessed(cit->second);
511  return true;
512 }
513 
514 // ----------------------------------------------------------------------------
516  const string& so_type,
517  CSeq_feat& feature)
518 // ----------------------------------------------------------------------------
519 {
520  static const TYPEMAP mapTypeToQual = {
521  {"TSS", "transcription_start_site"},
522  };
523  feature.SetData().SetImp().SetKey("misc_feature");
524  if (so_type == "sequence_feature") {
525  return true;
526  }
527  CRef<CGb_qual> feat_class(new CGb_qual);
528  feat_class->SetQual("feat_class");
529  auto cit = mapTypeToQual.find(so_type);
530  if (cit == mapTypeToQual.end()) {
531  feat_class->SetVal(so_type);
532  }
533  else {
534  feat_class->SetVal(cit->second);
535  }
536  feature.SetQual().push_back(feat_class);
537  return true;
538 }
539 
540 // ----------------------------------------------------------------------------
542  const string& so_type,
543  CSeq_feat& feature)
544 // ----------------------------------------------------------------------------
545 {
546  static const TYPEMAP mapTypeToQual = {
547  {"meiotic_recombination_region", "meiotic"},
548  {"mitotic_recombination_region", "mitotic"},
549  {"non_allelic_homologous_recombination", "non_allelic_homologous"},
550  {"recombination_feature", "other"},
551  };
552  feature.SetData().SetImp().SetKey("misc_recomb");
553  CRef<CGb_qual> recombination_class(new CGb_qual);
554  recombination_class->SetQual("recombination_class");
555  auto cit = mapTypeToQual.find(so_type);
556  if (cit == mapTypeToQual.end()) {
557  recombination_class->SetVal(so_type);
558  }
559  else {
560  recombination_class->SetVal(cit->second);
561  }
562  feature.SetQual().push_back(recombination_class);
563  return true;
564 }
565 
566 // ----------------------------------------------------------------------------
568  const string& so_type,
569  CSeq_feat& feature)
570 // ----------------------------------------------------------------------------
571 {
572  feature.SetData().SetImp().SetKey("misc_RNA");
573  if (so_type=="pseudogenic_transcript") {
574  feature.SetPseudo(true);
575  }
576  return true;
577 }
578 
579 // ----------------------------------------------------------------------------
581  const string& so_type,
582  CSeq_feat& feature)
583 // ----------------------------------------------------------------------------
584 {
585  static const TYPEMAP mapTypeToKey = {
586  {"C_gene_segment", "C_region"},
587  {"D_gene_segment", "D_segment"},
588  {"D_loop", "D-loop"},
589  {"J_gene_segment", "J_segment"},
590  {"V_gene_segment", "V_segment"},
591  {"binding_site", "misc_binding"},
592  {"five_prime_UTR", "5\'UTR"},
593  {"long_terminal_repeat", "LTR"},
594  {"mature_protein_region", "mat_peptide"},
595  {"mobile_genetic_element", "mobile_element"},
596  {"modified_DNA_base", "modified_base"},
597  {"origin_of_replication", "rep_origin"},
598  {"primary_transcript", "prim_transcript"},
599  {"primer_binding_site", "primer_bind"},
600  {"protein_binding_site", "protein_bind"},
601  {"pseudogenic_exon", "exon"},
602  {"region", "source"},
603  {"sequence_alteration", "variation"},
604  {"sequence_difference", "misc_difference"},
605  {"sequence_secondary_structure", "misc_structure"},
606  {"sequence_uncertainty", "unsure"},
607  {"signal_peptide", "sig_peptide"},
608  {"three_prime_UTR", "3\'UTR"},
609  };
610  auto cit = mapTypeToKey.find(so_type);
611  if (cit == mapTypeToKey.end()) {
612  feature.SetData().SetImp().SetKey(so_type);
613  }
614  else {
615  feature.SetData().SetImp().SetKey(cit->second);
616  }
617  if(NStr::StartsWith(so_type, "pseudogenic_")) {
618  feature.SetPseudo(true);
619  }
620  return true;
621 }
622 
623 // ----------------------------------------------------------------------------
625  const string& so_type,
626  CSeq_feat& feature)
627 // ----------------------------------------------------------------------------
628 {
629  feature.SetData().SetRegion();
630  CRef<CGb_qual> qual(new CGb_qual("SO_type", so_type));
631  feature.SetQual().push_back(qual);
632  return true;
633 }
634 
635 // ----------------------------------------------------------------------------
637  const string& so_type,
638  CSeq_feat& feature)
639 // ----------------------------------------------------------------------------
640 {
641  static const TYPEMAP mapTypeToQual = {
642  {"DNAsel_hypersensitive_site", "DNase_I_hypersensitive_site"},
643  {"GC_rich_promoter_region", "GC_signal"},
644  {"boundary_element", "insulator"},
645  {"regulatory_region", "other"},
646  {"ribosome_entry_site", "ribosome_binding_site"},
647  };
648  feature.SetData().SetImp().SetKey("regulatory");
649  CRef<CGb_qual> regulatory_class(new CGb_qual);
650  regulatory_class->SetQual("regulatory_class");
651  auto cit = mapTypeToQual.find(so_type);
652  if (cit == mapTypeToQual.end()) {
653  regulatory_class->SetVal(so_type);
654  }
655  else {
656  regulatory_class->SetVal(cit->second);
657  }
658  feature.SetQual().push_back(regulatory_class);
659  return true;
660 }
661 
662 // ----------------------------------------------------------------------------
664  const string& so_type,
665  CSeq_feat& feature)
666 // ----------------------------------------------------------------------------
667 {
668  static const TYPEMAP mapTypeToSatellite = {
669  {"microsatellite", "microsatellite"},
670  {"minisatellite", "minisatellite"},
671  {"satellite_DNA", "satellite"},
672  };
673  static const TYPEMAP mapTypeToRptType = {
674  {"tandem_repeat", "tandem"},
675  {"inverted_repeat", "inverted"},
676  {"direct_repeat", "direct"},
677  {"nested_repeat", "nested"},
678  {"non_LTR_retrotransposon_polymeric_tract", "non_ltr_retrotransposon_polymeric_tract"},
679  {"X_element_combinatorial_repeat", "x_element_combinatorial_repeat"},
680  {"Y_prime_element", "y_prime_element"},
681  {"repeat_region", "other"},
682  };
683  feature.SetData().SetImp().SetKey("repeat_region");
684 
685  CRef<CGb_qual> qual(new CGb_qual);
686  auto cit = mapTypeToSatellite.find(so_type);
687  if (cit != mapTypeToSatellite.end()) {
688  qual->SetQual("satellite");
689  qual->SetVal(cit->second);
690  }
691  else {
692  qual->SetQual("rpt_type");
693  cit = mapTypeToRptType.find(so_type);
694  if (cit == mapTypeToRptType.end()) {
695  qual->SetVal(so_type);
696  }
697  else {
698  qual->SetVal(cit->second);
699  }
700  }
701  feature.SetQual().push_back(qual);
702  return true;
703 }
704 
705 
706 // ----------------------------------------------------------------------------
708 // ----------------------------------------------------------------------------
773 
775  {CSeqFeatData::eSubtype_enhancer, xMapGeneric},
776  {CSeqFeatData::eSubtype_promoter, xMapGeneric},
778 
780 };
781 
782 // ----------------------------------------------------------------------------
784  const CSeq_feat& feature,
785  string& so_type)
786 // ----------------------------------------------------------------------------
787 {
788  auto original_type = feature.GetNamedQual("SO_type");
789  if (!original_type.empty()) {
790  so_type = original_type;
791  return true;
792  }
793  auto subtype = feature.GetData().GetSubtype();
794  TYPEFUNCENTRY cit = mMapTypeFunc.find(subtype);
795  if (cit == mMapTypeFunc.end()) {
796  return false;
797  }
798  return (cit->second)(feature, so_type);
799 }
800 
801 // ----------------------------------------------------------------------------
803  const CSeq_feat& feature,
804  string& so_type)
805 // ----------------------------------------------------------------------------
806 {
807  static const map<CSeqFeatData::ESubtype, string> mapSubtypeToSoType = {
808  {CSeqFeatData::eSubtype_3UTR, "three_prime_UTR"},
809  {CSeqFeatData::eSubtype_5UTR, "five_prime_UTR"},
810  {CSeqFeatData::eSubtype_assembly_gap, "assemply_gap"},
811  {CSeqFeatData::eSubtype_C_region, "C_gene_segment"},
812  {CSeqFeatData::eSubtype_centromere, "centromere"},
813  {CSeqFeatData::eSubtype_conflict, "sequence_conflict"},
814  {CSeqFeatData::eSubtype_D_loop, "D_loop"},
815  {CSeqFeatData::eSubtype_D_segment, "D_gene_segment"},
816  {CSeqFeatData::eSubtype_exon, "exon"},
817  {CSeqFeatData::eSubtype_enhancer, "enhancer"},
819  {CSeqFeatData::eSubtype_iDNA, "iDNA"},
820  {CSeqFeatData::eSubtype_intron, "intron"},
821  {CSeqFeatData::eSubtype_J_segment, "J_gene_segment"},
822  {CSeqFeatData::eSubtype_LTR, "long_terminal_repeat"},
823  {CSeqFeatData::eSubtype_mat_peptide, "mature_protein_region"},
824  {CSeqFeatData::eSubtype_mat_peptide_aa, "mature_protein_region"},
825  {CSeqFeatData::eSubtype_misc_binding, "binding_site"},
826  {CSeqFeatData::eSubtype_misc_difference, "sequence_difference"},
827  {CSeqFeatData::eSubtype_misc_structure, "sequence_secondary_structure"},
828  {CSeqFeatData::eSubtype_mobile_element, "mobile_genetic_element"},
829  {CSeqFeatData::eSubtype_modified_base, "modified_DNA_base"},
830  {CSeqFeatData::eSubtype_mRNA, "mRNA"},
831  {CSeqFeatData::eSubtype_N_region, "N_region"},
832  {CSeqFeatData::eSubtype_operon, "operon"},
833  {CSeqFeatData::eSubtype_oriT, "oriT"},
834  {CSeqFeatData::eSubtype_otherRNA, "transcript"},
835  {CSeqFeatData::eSubtype_polyA_site, "polyA_site"},
836  {CSeqFeatData::eSubtype_precursor_RNA, "primary_transcript"},
837  {CSeqFeatData::eSubtype_preRNA, "primary_transcript"},
838  {CSeqFeatData::eSubtype_preprotein, "immature_peptide_region"},
839  {CSeqFeatData::eSubtype_prim_transcript, "primary_transcript"},
840  {CSeqFeatData::eSubtype_primer_bind, "primer_binding_site"},
841  {CSeqFeatData::eSubtype_promoter, "promoter"},
842  {CSeqFeatData::eSubtype_propeptide, "propeptide"},
843  {CSeqFeatData::eSubtype_prot, "polypeptide"},
844  {CSeqFeatData::eSubtype_protein_bind, "protein_binding_site"},
845  {CSeqFeatData::eSubtype_rep_origin, "origin_of_replication"},
846  {CSeqFeatData::eSubtype_S_region, "S_region"},
847  {CSeqFeatData::eSubtype_sig_peptide, "signal_peptide"},
848  {CSeqFeatData::eSubtype_sig_peptide_aa, "signal_peptide"},
849  {CSeqFeatData::eSubtype_source, "region"},
850  {CSeqFeatData::eSubtype_stem_loop, "stem_loop"},
852  {CSeqFeatData::eSubtype_telomere, "telomere"},
853  {CSeqFeatData::eSubtype_terminator, "terminator"},
854  {CSeqFeatData::eSubtype_tmRNA, "tmRNA"},
855  {CSeqFeatData::eSubtype_transit_peptide, "transit_peptide"},
856  {CSeqFeatData::eSubtype_transit_peptide_aa, "transit_peptide"},
857  {CSeqFeatData::eSubtype_unsure, "sequence_uncertainty"},
858  {CSeqFeatData::eSubtype_V_region, "V_region"},
859  {CSeqFeatData::eSubtype_V_segment, "V_gene_segment"},
860  {CSeqFeatData::eSubtype_variation, "sequence_alteration"},
861  //{CSeqFeatData::eSubtype_attenuator, "attenuator"},
862  };
863  auto subtype = feature.GetData().GetSubtype();
864  auto cit = mapSubtypeToSoType.find(subtype);
865  if (cit != mapSubtypeToSoType.end()) {
866  so_type = cit->second;
867  return true;
868  }
869  return false;
870 }
871 
872 // ----------------------------------------------------------------------------
874  const CSeq_feat& feature,
875  string& so_type)
876 // ----------------------------------------------------------------------------
877 {
878  so_type = "biological_region";
879  return true;
880 }
881 
882 // ----------------------------------------------------------------------------
884  const CSeq_feat& feature,
885  string& so_type)
886 // ----------------------------------------------------------------------------
887 {
888  if (feature.IsSetPseudo() && feature.GetPseudo()) {
889  so_type = "pseudogenic_CDS";
890  return true;
891  }
892  for (auto qual: feature.GetQual()) {
893  if (qual->GetQual() == "pseudo" || qual->GetQual() == "pseudogene") {
894  so_type = "pseudogenic_CDS";
895  return true;
896  }
897  }
898  so_type = "CDS";
899  return true;
900 }
901 
902 // ----------------------------------------------------------------------------
904  const CSeq_feat& feature,
905  string& so_type)
906 // ----------------------------------------------------------------------------
907 {
908  if (feature.IsSetPseudo() && feature.GetPseudo()) {
909  so_type = "pseudogene";
910  return true;
911  }
912  for (auto qual: feature.GetQual()) {
913  if (qual->GetQual() == "pseudo" || qual->GetQual() == "pseudogene") {
914  so_type = "pseudogene";
915  return true;
916  }
917  }
918  so_type = "gene";
919  return true;
920 }
921 
922 // ----------------------------------------------------------------------------
924  const CSeq_feat& feature,
925  string& so_type)
926 // ----------------------------------------------------------------------------
927 {
928  static const map<CSeqFeatData::ESubtype, string> mapSubtypeStraight = {
929  {CSeqFeatData::eSubtype_misc_RNA, "transcript"},
930  {CSeqFeatData::eSubtype_rRNA, "rRNA"},
931  {CSeqFeatData::eSubtype_tRNA, "tRNA"},
932  };
933  static const map<CSeqFeatData::ESubtype, string> mapSubtypePseudo = {
934  {CSeqFeatData::eSubtype_misc_RNA, "pseudogenic_transcript"},
935  {CSeqFeatData::eSubtype_rRNA, "pseudogenic_rRNA"},
936  {CSeqFeatData::eSubtype_tRNA, "pseudogenic_tRNA"},
937  };
938 
939  auto subtype = feature.GetData().GetSubtype();
940  if (feature.IsSetPseudo() && feature.GetPseudo()) {
941  auto cit = mapSubtypePseudo.find(subtype);
942  if (cit == mapSubtypePseudo.end()) {
943  return false;
944  }
945  so_type = cit->second;
946  return true;
947  }
948  if (feature.IsSetPseudo() && !feature.GetPseudo()) {
949  auto cit = mapSubtypeStraight.find(subtype);
950  if (cit == mapSubtypeStraight.end()) {
951  return false;
952  }
953  so_type = cit->second;
954  return true;
955  }
956 
957  for (auto qual: feature.GetQual()) {
958  if (qual->GetQual() == "pseudo" || qual->GetQual() == "pseudogene") {
959  auto cit = mapSubtypePseudo.find(subtype);
960  if (cit == mapSubtypePseudo.end()) {
961  return false;
962  }
963  so_type = cit->second;
964  return true;
965  }
966  }
967  auto cit = mapSubtypeStraight.find(subtype);
968  if (cit == mapSubtypeStraight.end()) {
969  return false;
970  }
971  so_type = cit->second;
972  return true;
973 }
974 
975 // ----------------------------------------------------------------------------
977  const CSeq_feat& feature,
978  string& so_type)
979 // ----------------------------------------------------------------------------
980 {
981  static const TYPEMAP mapFeatClassToSoType = {
982  {"transcription_start_site", "TSS"},
983  {"other", "sequence_feature"},
984  };
985  string feat_class = GetUnambiguousNamedQual(feature, "feat_class");
986  if (feat_class.empty()) {
987  so_type = "sequence_feature";
988  return true;
989  }
990  auto cit = mapFeatClassToSoType.find(feat_class);
991  if (cit == mapFeatClassToSoType.end()) {
992  so_type = feat_class;
993  return true;
994  }
995  so_type = cit->second;
996  return true;
997 }
998 
999 // ----------------------------------------------------------------------------
1001  const CSeq_feat& feature,
1002  string& so_type)
1003 // ----------------------------------------------------------------------------
1004 {
1005  static const TYPEMAP mapRecombClassToSoType = {
1006  {"meiotic", "meiotic_recombination_region"},
1007  {"mitotic", "mitotic_recombination_region"},
1008  {"non_allelic_homologous", "non_allelic_homologous_recombination_region"},
1009  {"meiotic_recombination", "meiotic_recombination_region"},
1010  {"mitotic_recombination", "mitotic_recombination_region"},
1011  {"non_allelic_homologous_recombination", "non_allelic_homologous_recombination_region"},
1012  {"other", "recombination_feature"},
1013  };
1014  string recomb_class = GetUnambiguousNamedQual(feature, "recombination_class");
1015  if (recomb_class.empty()) {
1016  so_type = "recombination_feature";
1017  return true;
1018  }
1019  auto cit = mapRecombClassToSoType.find(recomb_class);
1020  if (cit == mapRecombClassToSoType.end()) {
1021  auto validClasses = CSeqFeatData::GetRecombinationClassList();
1022  auto valid = std::find(validClasses.begin(), validClasses.end(), recomb_class);
1023  if (valid == validClasses.end()) {
1024  so_type = "recombination_feature";
1025  }
1026  else {
1027  so_type = recomb_class;
1028  }
1029  return true;
1030  }
1031  so_type = cit->second;
1032  return true;
1033 }
1034 
1035 // ----------------------------------------------------------------------------
1037  const CSeq_feat& feature,
1038  string& so_type)
1039 // ----------------------------------------------------------------------------
1040 {
1041  so_type = "transcript";
1042  return true;
1043 }
1044 
1045 // ----------------------------------------------------------------------------
1047  const CSeq_feat& feature,
1048  string& so_type)
1049 // ----------------------------------------------------------------------------
1050 {
1051  static const TYPEMAP mapNcRnaClassToSoType = {
1052  {"antisense_RNA", "antisense_RNA"},
1053  {"autocatalytically_spliced_intron", "autocatalytically_spliced_intron"},
1054  {"guide_RNA", "guide_RNA"},
1055  {"hammerhead_ribozyme", "hammerhead_ribozyme"},
1056  {"lncRNA", "lnc_RNA"},
1057  {"miRNA", "miRNA"},
1058  {"other", "ncRNA"},
1059  {"piRNA", "piRNA"},
1060  {"rasiRNA", "rasiRNA"},
1061  {"ribozyme", "ribozyme"},
1062  {"RNase_MRP_RNA", "RNase_MRP_RNA"},
1063  {"RNase_P_RNA", "RNase_P_RNA"},
1064  {"scRNA", "scRNA"},
1065  {"siRNA", "siRNA"},
1066  {"snRNA", "snRNA"},
1067  {"snoRNA", "snoRNA"},
1068  {"SRP_RNA", "SRP_RNA"},
1069  {"telomerase_RNA", "telomerase_RNA"},
1070  {"vault_RNA", "vault_RNA"},
1071  {"Y_RNA", "Y_RNA"},
1072  };
1073  string ncrna_class = GetUnambiguousNamedQual(feature, "ncRNA_class");
1074  if (ncrna_class.empty()) {
1075  if (feature.IsSetData() &&
1076  feature.GetData().IsRna() &&
1077  feature.GetData().GetRna().IsSetExt() &&
1078  feature.GetData().GetRna().GetExt().IsGen() &&
1079  feature.GetData().GetRna().GetExt().GetGen().IsSetClass()) {
1080  ncrna_class = feature.GetData().GetRna().GetExt().GetGen().GetClass();
1081  if (ncrna_class == "classRNA") {
1082  ncrna_class = "ncRNA";
1083  }
1084  }
1085  }
1086  if (ncrna_class.empty()) {
1087  if (feature.IsSetData() &&
1088  feature.GetData().IsRna() &&
1089  feature.GetData().GetRna().IsSetType()) {
1090  auto ncrna_type = feature.GetData().GetRna().GetType();
1091  ncrna_class = CRNA_ref::GetRnaTypeName(ncrna_type);
1092  }
1093  }
1094  if (ncrna_class.empty()) {
1095  ncrna_class = "ncRNA";
1096  return true;
1097  }
1098  auto cit = mapNcRnaClassToSoType.find(ncrna_class);
1099  if (cit == mapNcRnaClassToSoType.end()) {
1100  so_type = "ncRNA";
1101  return true;
1102  }
1103  so_type = cit->second;
1104  return true;
1105 }
1106 
1107 // ----------------------------------------------------------------------------
1109  const CSeq_feat& feature,
1110  string& so_type)
1111 // ----------------------------------------------------------------------------
1112 {
1113  static const TYPEMAP mapRegulatoryClassToSoType = {
1114  {"DNase_I_hypersensitive_site", "DNaseI_hypersensitive_site"},
1115  {"GC_signal", "GC_rich_promoter_region"},
1116  {"enhancer_blocking_element", "enhancer_blocking_element"},
1117  {"epigenetically_modified_region", "epigenetically_modified_region"},
1118  {"imprinting_control_region", "imprinting_control_region"},
1119  {"matrix_attachment_region", "matrix_attachment_site"},
1120  {"other", "regulatory_region"},
1121  {"response_element", "response_element"},
1122  {"ribosome_binding_site", "ribosome_entry_site"},
1123  };
1124 
1125  string regulatory_class = GetUnambiguousNamedQual(feature, "regulatory_class");
1126  if (regulatory_class.empty()) {
1127  so_type = "regulatory_region";
1128  return true;
1129  }
1130  auto cit = mapRegulatoryClassToSoType.find(regulatory_class);
1131  if (cit == mapRegulatoryClassToSoType.end()) {
1132  auto validClasses = CSeqFeatData::GetRegulatoryClassList();
1133  auto valid = std::find(
1134  validClasses.begin(), validClasses.end(), regulatory_class);
1135  if (valid == validClasses.end()) {
1136  so_type = "regulatory_region";
1137  }
1138  else {
1139  so_type = regulatory_class;
1140  }
1141  return true;
1142  }
1143  so_type = cit->second;
1144  return true;
1145 }
1146 
1147 // ----------------------------------------------------------------------------
1149  const CSeq_feat& feature,
1150  string& so_type)
1151 // ----------------------------------------------------------------------------
1152 {
1153  static const TYPEMAP mapBondTypeToSoType = {
1154  {"disulfide", "disulfide_bond"},
1155  {"xlink", "cross_link"},
1156  };
1157  string bond_type = GetUnambiguousNamedQual(feature, "bond_type");
1158  if (bond_type.empty()) {
1159  return false;
1160  }
1161  auto cit = mapBondTypeToSoType.find(bond_type);
1162  if (cit == mapBondTypeToSoType.end()) {
1163  so_type = bond_type;
1164  return true;
1165  }
1166  so_type = cit->second;
1167  return true;
1168 }
1169 
1170 
1171 // ----------------------------------------------------------------------------
1173  const CSeq_feat& feature,
1174  string& so_type)
1175 // ----------------------------------------------------------------------------
1176 {
1177  static const TYPEMAP mapSatelliteToSoType = {
1178  {"satellite", "satellite_DNA"},
1179  {"microsatellite", "microsatellite"},
1180  {"minisatellite", "minisatellite"},
1181  };
1182  string satellite = GetUnambiguousNamedQual(feature, "satellite");
1183  if (!satellite.empty()) {
1184  auto cit = mapSatelliteToSoType.find(satellite);
1185  if (cit == mapSatelliteToSoType.end()) {
1186  return false;
1187  }
1188  so_type = cit->second;
1189  return true;
1190  }
1191 
1192  static const TYPEMAP mapRptTypeToSoType = {
1193  {"tandem", "tandem_repeat"},
1194  {"inverted", "inverted_repeat"},
1195  {"flanking", "repeat_region"},
1196  {"terminal", "repeat_region"},
1197  {"direct", "direct_repeat"},
1198  {"dispersed", "dispersed_repeat"},
1199  {"nested", "nested_repeat"},
1200  {"non_ltr_retrotransposon_polymeric_tract", "non_LTR_retrotransposon_polymeric_tract"},
1201  {"x_element_combinatorical_repeat", "X_element_combinatorical_repeat"},
1202  {"y_prime_element", "Y_prime_element"},
1203  {"other", "repeat_region"},
1204  };
1205  string rpt_type = GetUnambiguousNamedQual(feature, "rpt_type");
1206  if (rpt_type.empty()) {
1207  so_type = "repeat_region";
1208  return true;
1209  }
1210  auto cit = mapRptTypeToSoType.find(rpt_type);
1211  if (cit == mapRptTypeToSoType.end()) {
1212  so_type = rpt_type;
1213  return true;
1214  }
1215  so_type = cit->second;
1216  return true;
1217 }
1218 
1219 // ----------------------------------------------------------------------------
1221 // ----------------------------------------------------------------------------
1222  {"-10_signal", "minus_10_signal"},
1223  {"-35_signal", "minus_35_signal"},
1224  {"3'UTR", "three_prime_UTR"},
1225  {"3'clip", "three_prime_clip"},
1226  {"5'UTR", "five_prime_UTR"},
1227  {"5'clip", "five_prime_clip"},
1228  {"C_region", "C_gene_segment"},
1229  {"D-loop", "D_loop"},
1230  {"D_segment", "D_gene_segment"},
1231  {"GC_signal", "GC_rich_promoter_region"},
1232  {"J_segment", "J_gene_segment"},
1233  {"LTR", "long_terminal_repeat"},
1234  {"RBS", "ribosome_entry_site"},
1235  {"TATA_signal", "TATA_box"},
1236  {"V_segment", "V_gene_segment"},
1237  {"assembly_gap", "gap"},
1238  {"Comment", "remark"},
1239  {"conflict", "sequence_conflict"},
1240  {"mat_peptide_nt", "mature_protein_region"},
1241  {"mat_peptide", "mature_protein_region"},
1242  {"misc_binding", "binding_site"},
1243  {"misc_difference", "sequence_difference"},
1244  {"misc_feature", "sequence_feature"},
1245  {"misc_recomb", "recombination_feature"},
1246  {"misc_signal", "regulatory_region"},
1247  {"misc_structure", "sequence_secondary_structure"},
1248  {"mobile_element", "mobile_genetic_element"},
1249  {"modified_base", "modified_DNA_base"},
1250  {"misc_RNA", "transcript"},
1251  {"polyA_signal", "polyA_signal_sequence"},
1252  {"pre_RNA", "primary_transcript"},
1253  {"precursor_RNA", "primary_transcript"},
1254  {"proprotein", "immature_peptide_region"},
1255  {"prim_transcript", "primary_transcript"},
1256  {"primer_bind", "primer_binding_site"},
1257  {"Protein", "polypeptide"},
1258  {"protein_bind", "protein_binding_site"},
1259  {"SecStr", "sequence_secondary_structure"},
1260  {"regulatory", "regulatory_region"},
1261  {"rep_origin", "origin_of_replication"},
1262  {"Rsite", "restriction_enzyme_cut_site"},
1263  {"satellite", "satellite_DNA"},
1264  {"Shine_Dalgarno_sequence", "ribosome_entry_site"},
1265  {"sig_peptide_nt", "signal_peptide"},
1266  {"sig_peptide", "signal_peptide"},
1267  {"Site", "site"},
1268  {"Site-ref", "site"},
1269  {"transit_peptide_nt", "transit_peptide"},
1270  {"unsure", "sequence_uncertainty"},
1271  {"variation", "sequence_alteration"},
1272  {"VariationRef", "sequence_alteration"},
1273  {"virion", "viral_sequence"},
1274 };
1275 
1276 // ----------------------------------------------------------------------------
1278  const string& alias)
1279 // ----------------------------------------------------------------------------
1280 {
1281  ALIASENTRY cit = mMapSoAliases.find(alias);
1282  if (cit == mMapSoAliases.end()) {
1283  return alias;
1284  }
1285  return cit->second;
1286 }
1287 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
static string GetRnaTypeName(const CRNA_ref::EType rna_type)
Definition: RNA_ref.cpp:73
ESubtype GetSubtype(void) const
static const vector< string > & GetRecombinationClassList()
@ eSubtype_transit_peptide
@ eSubtype_misc_difference
@ eSubtype_prim_transcript
@ eSubtype_transit_peptide_aa
static const vector< string > & GetRegulatoryClassList()
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
Definition: Seq_feat.cpp:429
static TYPEFUNCMAP mMapTypeFunc
Definition: so_map.hpp:119
static bool xMapRegion(const CSeq_feat &, string &)
Definition: so_map.cpp:873
static bool xFeatureMakeCds(const string &, CSeq_feat &)
Definition: so_map.cpp:483
static bool xMapRna(const CSeq_feat &, string &)
Definition: so_map.cpp:923
static bool xFeatureMakeRepeatRegion(const string &, CSeq_feat &)
Definition: so_map.cpp:663
static bool SoTypeToFeature(const string &, CSeq_feat &, bool=false)
Definition: so_map.cpp:411
static FEATFUNCMAP mMapFeatFunc
Definition: so_map.hpp:114
static bool xMapRegulatory(const CSeq_feat &, string &)
Definition: so_map.cpp:1108
static bool xMapMiscRecomb(const CSeq_feat &, string &)
Definition: so_map.cpp:1000
static bool xFeatureMakeRegulatory(const string &, CSeq_feat &)
Definition: so_map.cpp:636
static bool xFeatureMakeRegion(const string &, CSeq_feat &)
Definition: so_map.cpp:624
static bool xFeatureMakeProt(const string &, CSeq_feat &)
Definition: so_map.cpp:496
static bool FeatureToSoType(const CSeq_feat &, string &)
Definition: so_map.cpp:783
static string ResolveSoAlias(const string &)
Definition: so_map.cpp:1277
static bool xFeatureMakeMiscRna(const string &, CSeq_feat &)
Definition: so_map.cpp:567
static TYPEMAP mMapSoIdToType
Definition: so_map.hpp:109
static bool xFeatureMakeMiscRecomb(const string &, CSeq_feat &)
Definition: so_map.cpp:541
static bool xFeatureMakeGene(const string &, CSeq_feat &)
Definition: so_map.cpp:429
static bool xMapMiscFeature(const CSeq_feat &, string &)
Definition: so_map.cpp:976
static std::string SoTypeToId(const std::string &)
Definition: so_map.cpp:392
static bool xMapGene(const CSeq_feat &, string &)
Definition: so_map.cpp:903
static TYPEMAP mMapSoTypeToId
Definition: so_map.hpp:108
TYPEFUNCMAP::const_iterator TYPEFUNCENTRY
Definition: so_map.hpp:118
FEATFUNCMAP::const_iterator FEATFUNCENTRY
Definition: so_map.hpp:113
static bool xMapNcRna(const CSeq_feat &, string &)
Definition: so_map.cpp:1046
SOALIASMAP::const_iterator ALIASENTRY
Definition: so_map.hpp:122
static SOALIASMAP mMapSoAliases
Definition: so_map.hpp:123
static bool xFeatureMakeMiscFeature(const string &, CSeq_feat &)
Definition: so_map.cpp:515
static bool xMapOtherRna(const CSeq_feat &, string &)
Definition: so_map.cpp:1036
static bool xMapBond(const CSeq_feat &, string &)
Definition: so_map.cpp:1148
static bool xFeatureMakeRna(const string &, CSeq_feat &)
Definition: so_map.cpp:442
static bool xMapGeneric(const CSeq_feat &, string &)
Definition: so_map.cpp:802
static bool xMapCds(const CSeq_feat &, string &)
Definition: so_map.cpp:883
static bool xFeatureMakeNcRna(const string &, CSeq_feat &)
Definition: so_map.cpp:464
static bool GetSupportedSoTerms(vector< string > &)
Definition: so_map.cpp:367
static bool xFeatureMakeImp(const string &, CSeq_feat &)
Definition: so_map.cpp:580
TYPEMAP::const_iterator TYPEENTRY
Definition: so_map.hpp:107
static std::string SoIdToType(const std::string &)
Definition: so_map.cpp:380
static bool xMapRepeatRegion(const CSeq_feat &, string &)
Definition: so_map.cpp:1172
bool operator()(const string &x, const string &y) const
Definition: so_map.cpp:47
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
TType GetType(void) const
Get the Type member data.
Definition: RNA_ref_.hpp:529
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
bool IsGen(void) const
Check if variant Gen is selected.
Definition: RNA_ref_.hpp:504
const TGen & GetGen(void) const
Get the variant data.
Definition: RNA_ref_.cpp:156
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
Definition: RNA_ref_.hpp:510
bool IsSetClass(void) const
for ncRNAs, the class of non-coding RNA: examples: antisense_RNA, guide_RNA, snRNA Check if a value h...
Definition: RNA_gen_.hpp:247
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
const TClass & GetClass(void) const
Get the Class member data.
Definition: RNA_gen_.hpp:259
@ eType_ncRNA
non-coding RNA; subsumes snRNA, scRNA, snoRNA
Definition: RNA_ref_.hpp:104
void SetQual(const TQual &value)
Assign a value to Qual data member.
Definition: Gb_qual_.hpp:221
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
TPseudo GetPseudo(void) const
Get the Pseudo member data.
Definition: Seq_feat_.hpp:1365
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
Definition: Seq_feat_.hpp:1346
void SetVal(const TVal &value)
Assign a value to Val data member.
Definition: Gb_qual_.hpp:268
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
Definition: Seq_feat_.hpp:1374
const TRna & GetRna(void) const
Get the variant data.
TQual & SetQual(void)
Assign a value to Qual data member.
Definition: Seq_feat_.hpp:1153
bool IsRna(void) const
Check if variant Rna is selected.
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
int tolower(Uchar c)
Definition: ncbictype.hpp:72
USING_SCOPE(objects)
string GetUnambiguousNamedQual(const CSeq_feat &feature, const string &qualName)
Definition: so_map.cpp:69
Modified on Wed Apr 17 13:08:14 2024 by modify_doxy.py rev. 669887