NCBI C++ ToolKit
SeqFeatData.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: SeqFeatData.cpp 99881 2023-05-18 17:44:07Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: .......
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using specifications from the ASN data definition file
34  * 'seqfeat.asn'.
35  */
36 
37 // standard includes
38 
39 // generated includes
40 #include <ncbi_pch.hpp>
44 #include <objects/seq/Pubdesc.hpp>
48 
49 #include <algorithm>
50 #include <util/static_map.hpp>
51 #include <cassert>
52 
53 // generated classes
54 
56 
57 BEGIN_objects_SCOPE // namespace ncbi::objects::
58 
59 // constructor
61 {
63 }
64 
65 
66 struct SImportEntry {
67  const char* m_Name;
69 
70  bool operator<(const SImportEntry& e) const {
71  return strcmp(m_Name, e.m_Name) < 0;
72  }
73 };
74 
75 // NOTE: these must stay in ASCIIbetical order!
76 static const SImportEntry kImportTable[] = {
77  { "-10_signal", CSeqFeatData::eSubtype_10_signal },
78  { "-35_signal", CSeqFeatData::eSubtype_35_signal },
79  { "3'UTR", CSeqFeatData::eSubtype_3UTR },
80  { "3'clip", CSeqFeatData::eSubtype_3clip },
81  { "5'UTR", CSeqFeatData::eSubtype_5UTR },
82  { "5'clip", CSeqFeatData::eSubtype_5clip },
83  { "CAAT_signal", CSeqFeatData::eSubtype_CAAT_signal },
84  { "C_region", CSeqFeatData::eSubtype_C_region },
85  { "D-loop", CSeqFeatData::eSubtype_D_loop },
86  { "D_segment", CSeqFeatData::eSubtype_D_segment },
87  { "GC_signal", CSeqFeatData::eSubtype_GC_signal },
88  { "Imp_CDS", CSeqFeatData::eSubtype_Imp_CDS },
89  { "J_segment", CSeqFeatData::eSubtype_J_segment },
90  { "LTR", CSeqFeatData::eSubtype_LTR },
91  { "N_region", CSeqFeatData::eSubtype_N_region },
92  { "RBS", CSeqFeatData::eSubtype_RBS },
93  { "STS", CSeqFeatData::eSubtype_STS },
94  { "S_region", CSeqFeatData::eSubtype_S_region },
95  { "TATA_signal", CSeqFeatData::eSubtype_TATA_signal },
96  { "V_region", CSeqFeatData::eSubtype_V_region },
97  { "V_segment", CSeqFeatData::eSubtype_V_segment },
98  { "allele", CSeqFeatData::eSubtype_allele },
99  { "assembly_gap", CSeqFeatData::eSubtype_assembly_gap },
100  { "attenuator", CSeqFeatData::eSubtype_attenuator },
101  { "centromere", CSeqFeatData::eSubtype_centromere },
102  { "conflict", CSeqFeatData::eSubtype_conflict },
103  { "enhancer", CSeqFeatData::eSubtype_enhancer },
104  { "exon", CSeqFeatData::eSubtype_exon },
105  { "gap", CSeqFeatData::eSubtype_gap },
106  { "iDNA", CSeqFeatData::eSubtype_iDNA },
107  { "import", CSeqFeatData::eSubtype_imp },
108  { "intron", CSeqFeatData::eSubtype_intron },
109  { "mat_peptide", CSeqFeatData::eSubtype_mat_peptide },
110  { "misc_RNA", CSeqFeatData::eSubtype_misc_RNA },
111  { "misc_binding", CSeqFeatData::eSubtype_misc_binding },
112  { "misc_difference", CSeqFeatData::eSubtype_misc_difference },
113  { "misc_feature", CSeqFeatData::eSubtype_misc_feature },
114  { "misc_recomb", CSeqFeatData::eSubtype_misc_recomb },
115  { "misc_signal", CSeqFeatData::eSubtype_misc_signal },
116  { "misc_structure", CSeqFeatData::eSubtype_misc_structure },
117  { "mobile_element", CSeqFeatData::eSubtype_mobile_element },
118  { "modified_base", CSeqFeatData::eSubtype_modified_base },
119  { "mutation", CSeqFeatData::eSubtype_mutation },
120  { "old_sequence", CSeqFeatData::eSubtype_old_sequence },
121  { "operon", CSeqFeatData::eSubtype_operon },
122  { "oriT", CSeqFeatData::eSubtype_oriT },
123  { "polyA_signal", CSeqFeatData::eSubtype_polyA_signal },
124  { "polyA_site", CSeqFeatData::eSubtype_polyA_site },
125  { "precursor_RNA", CSeqFeatData::eSubtype_precursor_RNA },
126  { "prim_transcript", CSeqFeatData::eSubtype_prim_transcript },
127  { "primer_bind", CSeqFeatData::eSubtype_primer_bind },
128  { "promoter", CSeqFeatData::eSubtype_promoter },
129  { "propeptide", CSeqFeatData::eSubtype_propeptide },
130  { "protein_bind", CSeqFeatData::eSubtype_protein_bind },
131  { "regulatory", CSeqFeatData::eSubtype_regulatory },
132  { "rep_origin", CSeqFeatData::eSubtype_rep_origin },
133  { "repeat_region", CSeqFeatData::eSubtype_repeat_region },
134  { "repeat_unit", CSeqFeatData::eSubtype_repeat_unit },
135  { "satellite", CSeqFeatData::eSubtype_satellite },
136  { "sig_peptide", CSeqFeatData::eSubtype_sig_peptide },
137  { "site_ref", CSeqFeatData::eSubtype_site_ref },
138  { "source", CSeqFeatData::eSubtype_source },
139  { "stem_loop", CSeqFeatData::eSubtype_stem_loop },
140  { "telomere", CSeqFeatData::eSubtype_telomere },
141  { "terminator", CSeqFeatData::eSubtype_terminator },
142  { "transit_peptide", CSeqFeatData::eSubtype_transit_peptide },
143  { "unsure", CSeqFeatData::eSubtype_unsure },
144  { "variation", CSeqFeatData::eSubtype_variation },
145  { "virion", CSeqFeatData::eSubtype_virion }
146 };
147 
148 static const SImportEntry* const kImportTableEnd
149  = kImportTable + sizeof(kImportTable)/sizeof(kImportTable[0]);
150 
151 // Feat info table
153 
154 #define FEAT_INFO_PAIR(type, subtype, key_full, key_gb) \
155  { CSeqFeatData::e_##type, \
156  { CSeqFeatData::eSubtype_##subtype, key_full, key_gb } }
157 
158 static const TInfoPair kInfoPairs[] = {
159  FEAT_INFO_PAIR(Gene, gene, "Gene", "gene"),
160  FEAT_INFO_PAIR(Org, org, "Org", "source"),
161  FEAT_INFO_PAIR(Cdregion, cdregion, "CDS", "CDS"),
162  FEAT_INFO_PAIR(Pub, pub, "Cit", "misc_feature"),
163  FEAT_INFO_PAIR(Seq, seq, "Xref", "misc_feature"),
164  FEAT_INFO_PAIR(Region, region, "Region", "misc_feature"),
165  FEAT_INFO_PAIR(Comment, comment, "Comment", "misc_feature"),
166  FEAT_INFO_PAIR(Bond, bond, "Bond", "misc_feature"),
167  FEAT_INFO_PAIR(Rsite, rsite, "Rsite", "misc_feature"),
168  FEAT_INFO_PAIR(User, user, "User", "misc_feature"),
169  FEAT_INFO_PAIR(Txinit, txinit, "TxInit", "promoter"),
170  FEAT_INFO_PAIR(Num, num, "Num", "misc_feature"),
171  FEAT_INFO_PAIR(Psec_str, psec_str, "SecStr", "SecStr"),
172  FEAT_INFO_PAIR(Non_std_residue, non_std_residue, "NonStdRes", "NonStdRes"),
173  FEAT_INFO_PAIR(Het, het, "Het", "Het"),
174  FEAT_INFO_PAIR(Biosrc, biosrc, "Src", "source"),
175  FEAT_INFO_PAIR(Clone, clone, "CloneRef", "misc_feature"),
176  FEAT_INFO_PAIR(Variation, variation_ref, "Variation", "variation")
177 };
178 
182 
183 
184 // e_Prot info table
186 
187 #define PROT_INFO_PAIR(proc, subtype, key_full, key_gb) \
188  { CProt_ref::eProcessed_##proc, \
189  { CSeqFeatData::eSubtype_##subtype, key_full, key_gb } }
190 
191 static const TProtInfoPair kProtInfoPairs[] = {
192  PROT_INFO_PAIR(preprotein, preprotein, "Prot", "proprotein"),
193  PROT_INFO_PAIR(mature, mat_peptide_aa, "Prot", "mat_peptide"),
194  PROT_INFO_PAIR(signal_peptide, sig_peptide_aa, "Prot", "sig_peptide"),
195  PROT_INFO_PAIR(transit_peptide, transit_peptide_aa, "Prot", "transit_peptide"),
196  PROT_INFO_PAIR(propeptide, propeptide_aa, "Prot", "propeptide")
197 };
198 
202 
203 
204 // e_Site info table
206 
207 #define SITE_INFO_PAIR(site, subtype, key_full, key_gb) \
208  { CSeqFeatData::eSite_##site, \
209  { CSeqFeatData::eSubtype_##subtype, key_full, key_gb } }
210 
211 static const TSiteInfoPair kSiteInfoPairs[] = {
212  SITE_INFO_PAIR(binding, site, "Site", "misc_binding"),
213  SITE_INFO_PAIR(metal_binding, site, "Site", "misc_binding"),
214  SITE_INFO_PAIR(lipid_binding, site, "Site", "misc_binding"),
215  SITE_INFO_PAIR(np_binding, site, "Site", "protein_bind"),
216  SITE_INFO_PAIR(dna_binding, site, "Site", "primer_bind"),
217  SITE_INFO_PAIR(signal_peptide, site, "Site", "sig_peptide"),
218  SITE_INFO_PAIR(transit_peptide, site, "Site", "transit_peptide")
219 };
220 
224 
225 
226 // e_Rna info table
228 
229 #define RNA_INFO_PAIR(rna, subtype, key_full, key_gb) \
230  { CRNA_ref::eType_##rna, \
231  { CSeqFeatData::eSubtype_##subtype, key_full, key_gb } }
232 
233 static const TRnaInfoPair kRnaInfoPairs[] = {
234  RNA_INFO_PAIR(premsg, preRNA, "precursor_RNA", "precursor_RNA"),
235  RNA_INFO_PAIR(mRNA, mRNA, "mRNA", "mRNA"),
236  RNA_INFO_PAIR(tRNA, tRNA, "tRNA", "tRNA"),
237  RNA_INFO_PAIR(rRNA, rRNA, "rRNA", "rRNA"),
238  RNA_INFO_PAIR(snRNA, snRNA, "snRNA", "snRNA"),
239  RNA_INFO_PAIR(scRNA, scRNA, "scRNA", "scRNA"),
240  RNA_INFO_PAIR(snoRNA, snoRNA, "snoRNA", "snoRNA"),
241  RNA_INFO_PAIR(ncRNA, ncRNA, "ncRNA", "ncRNA"),
242  RNA_INFO_PAIR(tmRNA, tmRNA, "tmRNA", "tmRNA")
243 };
244 
248 
249 
251 {
252  m_FeatDataInfo.m_Key_gb = "misc_feature"; // ???
253  m_FeatDataInfo.m_Key_full = "???";
254  switch (Which()) {
255  case e_Prot:
256  {
258  sc_ProtInfoPairs.find(GetProt().GetProcessed());
259  if (it != sc_ProtInfoPairs.end()) {
260  m_FeatDataInfo = it->second;
261  }
262  else {
264  m_FeatDataInfo.m_Key_full = "Prot";
265  m_FeatDataInfo.m_Key_gb = "Protein";
266  }
267  break;
268  }
269  case e_Site: // Is this correct, or are these encoded as Imp?
270  {
271  TSiteInfoMap::const_iterator it = sc_SiteInfoPairs.find(GetSite());
272  if (it != sc_SiteInfoPairs.end()) {
273  m_FeatDataInfo = it->second;
274  }
275  else {
277  m_FeatDataInfo.m_Key_full = "Site";
278  m_FeatDataInfo.m_Key_gb = "misc_feature";
279  }
280  break;
281  }
282  case e_Rna:
283  {
284  CRNA_ref_Base::TType rna_type = GetRna().GetType();
286  sc_RnaInfoPairs.find(rna_type);
287  if (it != sc_RnaInfoPairs.end()) {
288  m_FeatDataInfo = it->second;
289  }
290  else {
291  bool can_get_name = (GetRna().CanGetExt()
292  && GetRna().GetExt().IsName());
293  const string& ext_name = (can_get_name
294  ? GetRna().GetExt().GetName()
295  : kEmptyStr);
296  if (ext_name == "ncRNA") {
298  m_FeatDataInfo.m_Key_full = "ncRNA";
299  } else if (ext_name == "tmRNA") {
301  m_FeatDataInfo.m_Key_full = "tmRNA";
302  } else {
304  bool other = GetRna().GetType() == CRNA_ref::eType_other;
305  m_FeatDataInfo.m_Key_full = other ? "RNA" : "misc_RNA";
306  }
307  m_FeatDataInfo.m_Key_gb = "misc_RNA";
308  }
309  break;
310  }
311  case e_Imp:
312  {
313  const string& key = GetImp().GetKey();
314  SImportEntry key2 = { key.c_str(), eSubtype_imp };
315  const SImportEntry* result = lower_bound(kImportTable,
317  key2);
318  if ( result == kImportTableEnd ||
319  strcmp(key2.m_Name, result->m_Name) ) {
321  } else {
322  m_FeatDataInfo.m_Subtype = result->m_Subtype;
323  }
324  m_FeatDataInfo.m_Key_gb = key.c_str(); // "Imp"?;
325  m_FeatDataInfo.m_Key_full = key.c_str();
326  break;
327  }
328  default:
329  {
330  TInfoMap::const_iterator it = sc_InfoPairs.find(Which());
331  if (it != sc_InfoPairs.end()) {
332  m_FeatDataInfo = it->second;
333  }
334  else {
336  m_FeatDataInfo.m_Key_full = "???";
337  m_FeatDataInfo.m_Key_gb = "misc_feature"; // ???
338  }
339  }
340  }
341 }
342 
343 
344 // destructor
346 {
347 }
348 
349 
350 // ASCII representation of subtype (GenBank feature key, e.g.)
352 {
355  }
356  return (vocab == eVocabulary_genbank) ?
358 }
359 
360 
362 {
363  if (m_FeatDataInfo.m_Subtype == eSubtype_any) { // unknown
365  }
366  return m_FeatDataInfo.m_Subtype;
367 }
368 
369 
372 {
373  InvalidateCache();
374  Tparent::Assign(source, how);
375 }
376 
377 
378 void CSeqFeatData::PostRead(void) const
379 {
380  InvalidateCache();
381 }
382 
383 
384 DEFINE_STATIC_MUTEX(sx_InitTablesMutex);
385 
386 
387 typedef vector<CSeqFeatData::E_Choice> TSubtypesTable;
389 static atomic<bool> sx_SubtypesTableInitialized;
390 
392 {
393  if ( !sx_SubtypesTableInitialized.load(memory_order_acquire) ) {
395  }
396  return (*sx_SubtypesTable)[subtype];
397 }
398 
400 {
401  { "-10_signal", CSeqFeatData::eSubtype_10_signal },
402  { "-35_signal", CSeqFeatData::eSubtype_35_signal },
403  { "3'UTR", CSeqFeatData::eSubtype_3UTR },
404  { "3'clip", CSeqFeatData::eSubtype_3clip },
405  { "5'UTR", CSeqFeatData::eSubtype_5UTR },
406  { "5'clip", CSeqFeatData::eSubtype_5clip },
407  { "Bond", CSeqFeatData::eSubtype_bond },
408  { "CAAT_signal", CSeqFeatData::eSubtype_CAAT_signal },
410  { "C_region", CSeqFeatData::eSubtype_C_region },
411  { "Cit", CSeqFeatData::eSubtype_pub },
412  { "CloneRef", CSeqFeatData::eSubtype_clone },
413  { "Comment", CSeqFeatData::eSubtype_comment },
414  { "D-loop", CSeqFeatData::eSubtype_D_loop },
415  { "D_segment", CSeqFeatData::eSubtype_D_segment },
416  { "GC_signal", CSeqFeatData::eSubtype_GC_signal },
417  { "Het", CSeqFeatData::eSubtype_het },
418  { "J_segment", CSeqFeatData::eSubtype_J_segment },
419  { "LTR", CSeqFeatData::eSubtype_LTR },
420  { "N_region", CSeqFeatData::eSubtype_N_region },
422  { "Num", CSeqFeatData::eSubtype_num },
423  { "Protein", CSeqFeatData::eSubtype_prot },
424  { "RBS", CSeqFeatData::eSubtype_RBS },
425  { "REFERENCE", CSeqFeatData::eSubtype_pub },
426  { "Region", CSeqFeatData::eSubtype_region },
427  { "Rsite", CSeqFeatData::eSubtype_rsite },
428  { "STS", CSeqFeatData::eSubtype_STS },
429  { "S_region", CSeqFeatData::eSubtype_S_region },
430  { "SecStr", CSeqFeatData::eSubtype_psec_str },
431  { "Site", CSeqFeatData::eSubtype_site },
432  { "Site-ref", CSeqFeatData::eSubtype_site_ref },
434  { "TATA_signal", CSeqFeatData::eSubtype_TATA_signal },
435  { "TxInit", CSeqFeatData::eSubtype_txinit },
436  { "User", CSeqFeatData::eSubtype_user },
437  { "V_region", CSeqFeatData::eSubtype_V_region },
438  { "V_segment", CSeqFeatData::eSubtype_V_segment },
439  { "VariationRef", CSeqFeatData::eSubtype_variation_ref },
440  { "Xref", CSeqFeatData::eSubtype_seq },
441  { "assembly_gap", CSeqFeatData::eSubtype_assembly_gap },
442  { "attenuator", CSeqFeatData::eSubtype_attenuator },
443  { "centromere", CSeqFeatData::eSubtype_centromere },
444  { "conflict", CSeqFeatData::eSubtype_conflict },
445  { "enhancer", CSeqFeatData::eSubtype_enhancer },
446  { "exon", CSeqFeatData::eSubtype_exon },
447  { "gap", CSeqFeatData::eSubtype_gap },
448  { "gene", CSeqFeatData::eSubtype_gene },
449  { "iDNA", CSeqFeatData::eSubtype_iDNA },
450  { "intron", CSeqFeatData::eSubtype_intron },
451  { "mRNA", CSeqFeatData::eSubtype_mRNA },
452  { "mat_peptide", CSeqFeatData::eSubtype_mat_peptide_aa },
453  { "mat_peptide_nt", CSeqFeatData::eSubtype_mat_peptide },
454  { "misc_RNA", CSeqFeatData::eSubtype_otherRNA },
455  { "misc_binding", CSeqFeatData::eSubtype_misc_binding },
456  { "misc_difference", CSeqFeatData::eSubtype_misc_difference },
457  { "misc_feature", CSeqFeatData::eSubtype_misc_feature },
458  { "misc_recomb", CSeqFeatData::eSubtype_misc_recomb },
459  { "misc_signal", CSeqFeatData::eSubtype_misc_signal },
460  { "misc_structure", CSeqFeatData::eSubtype_misc_structure },
461  { "mobile_element", CSeqFeatData::eSubtype_mobile_element },
462  { "modified_base", CSeqFeatData::eSubtype_modified_base },
463  { "ncRNA", CSeqFeatData::eSubtype_ncRNA },
464  { "old_sequence", CSeqFeatData::eSubtype_old_sequence },
465  { "operon", CSeqFeatData::eSubtype_operon },
466  { "oriT", CSeqFeatData::eSubtype_oriT },
467  { "polyA_signal", CSeqFeatData::eSubtype_polyA_signal },
468  { "polyA_site", CSeqFeatData::eSubtype_polyA_site },
469  { "precursor_RNA", CSeqFeatData::eSubtype_preRNA },
470  { "prim_transcript", CSeqFeatData::eSubtype_prim_transcript },
471  { "primer_bind", CSeqFeatData::eSubtype_primer_bind },
472  { "promoter", CSeqFeatData::eSubtype_promoter },
473  { "propeptide", CSeqFeatData::eSubtype_propeptide_aa },
474  { "propeptide_nt", CSeqFeatData::eSubtype_propeptide },
475  { "proprotein", CSeqFeatData::eSubtype_preprotein },
476  { "protein_bind", CSeqFeatData::eSubtype_protein_bind },
477  { "rRNA", CSeqFeatData::eSubtype_rRNA },
478  { "regulatory", CSeqFeatData::eSubtype_regulatory },
479  { "rep_origin", CSeqFeatData::eSubtype_rep_origin },
480  { "repeat_region", CSeqFeatData::eSubtype_repeat_region },
481  { "repeat_unit", CSeqFeatData::eSubtype_repeat_unit },
482  { "satellite", CSeqFeatData::eSubtype_satellite },
483  { "scRNA", CSeqFeatData::eSubtype_scRNA },
484  { "sig_peptide", CSeqFeatData::eSubtype_sig_peptide_aa },
485  { "sig_peptide_nt", CSeqFeatData::eSubtype_sig_peptide },
486  { "snRNA", CSeqFeatData::eSubtype_snRNA },
487  { "snoRNA", CSeqFeatData::eSubtype_snoRNA },
488  { "source", CSeqFeatData::eSubtype_biosrc },
489  { "stem_loop", CSeqFeatData::eSubtype_stem_loop },
490  { "tRNA", CSeqFeatData::eSubtype_tRNA },
491  { "telomere", CSeqFeatData::eSubtype_telomere },
492  { "terminator", CSeqFeatData::eSubtype_terminator },
493  { "tmRNA", CSeqFeatData::eSubtype_tmRNA },
494  { "transit_peptide", CSeqFeatData::eSubtype_transit_peptide_aa },
495  { "transit_peptide_nt", CSeqFeatData::eSubtype_transit_peptide },
496  { "unsure", CSeqFeatData::eSubtype_unsure },
497  { "variation", CSeqFeatData::eSubtype_variation },
498  { "virion", CSeqFeatData::eSubtype_virion }
499 })
500 
502 {
503  auto it = sm_FeatKeys.first.find(sName);
504  if (it == sm_FeatKeys.first.end())
505  return eSubtype_bad;
506 
507  if (sName == "pre_RNA")
508  return eSubtype_preRNA;
509 
510  return it->second;
511 }
512 
514 {
515  auto it = sm_FeatKeys.second.find(eSubtype);
516  if (it == sm_FeatKeys.second.end())
517  return kEmptyStr;
518 
519  return it->second;
520 }
521 
523 {
524  switch(subtype) {
526  case eSubtype_biosrc:
527  case eSubtype_centromere:
528  case eSubtype_gap:
529  case eSubtype_operon:
530  case eSubtype_rep_origin:
531  case eSubtype_telomere:
532  return false;
533  default:
534  break;
535  }
536  return true;
537 }
538 
539 namespace
540 {
541  static constexpr CSeqFeatData::TLegalQualifiers empty_quals{};
542 }
543 
545 {
547  {
556  // although INSDC indicates that mol_type should be mandatory on
557  // source subtype, we do not do so since there are legacy
558  // records for which a mol_type could cause loss of information.
563  });
564 
565  auto iter = sx_MandatoryQuals.find(subtype);
566  if (iter == sx_MandatoryQuals.end()) {
567  return empty_quals;
568  }
569  return iter->second;
570 }
571 
572 
573 #ifdef _DEBUG
574 struct SSubtypeInfo {
577  int m_Value;
578  const char* m_Name;
579 };
580 #define SUBTYPE_INFO(type, subtype, value) \
581  { CSeqFeatData::type, CSeqFeatData::subtype, value, #subtype }
582 static const SSubtypeInfo s_subtype_info[] = {
583  SUBTYPE_INFO( e_not_set, eSubtype_bad, 0),
584  SUBTYPE_INFO( e_Gene, eSubtype_gene, 1),
585  SUBTYPE_INFO( e_Org, eSubtype_org, 2),
586  SUBTYPE_INFO( e_Cdregion, eSubtype_cdregion, 3),
587  SUBTYPE_INFO( e_Prot, eSubtype_prot, 4),
588  SUBTYPE_INFO( e_Prot, eSubtype_preprotein, 5),
589  SUBTYPE_INFO( e_Prot, eSubtype_mat_peptide_aa, 6),
590  SUBTYPE_INFO( e_Prot, eSubtype_sig_peptide_aa, 7),
591  SUBTYPE_INFO( e_Prot, eSubtype_transit_peptide_aa, 8),
592  SUBTYPE_INFO( e_Rna, eSubtype_preRNA, 9),
593  SUBTYPE_INFO( e_Rna, eSubtype_mRNA, 10),
594  SUBTYPE_INFO( e_Rna, eSubtype_tRNA, 11),
595  SUBTYPE_INFO( e_Rna, eSubtype_rRNA, 12),
596  SUBTYPE_INFO( e_Rna, eSubtype_snRNA, 13),
597  SUBTYPE_INFO( e_Rna, eSubtype_scRNA, 14),
598  SUBTYPE_INFO( e_Rna, eSubtype_snoRNA, 15),
599  SUBTYPE_INFO( e_Rna, eSubtype_otherRNA, 16),
600  SUBTYPE_INFO( e_Pub, eSubtype_pub, 17),
601  SUBTYPE_INFO( e_Seq, eSubtype_seq, 18),
602  SUBTYPE_INFO( e_Imp, eSubtype_imp, 19),
603  SUBTYPE_INFO( e_Imp, eSubtype_allele, 20),
604  SUBTYPE_INFO( e_Imp, eSubtype_attenuator, 21),
605  SUBTYPE_INFO( e_Imp, eSubtype_C_region, 22),
606  SUBTYPE_INFO( e_Imp, eSubtype_CAAT_signal, 23),
607  SUBTYPE_INFO( e_Imp, eSubtype_Imp_CDS, 24),
608  SUBTYPE_INFO( e_Imp, eSubtype_conflict, 25),
609  SUBTYPE_INFO( e_Imp, eSubtype_D_loop, 26),
610  SUBTYPE_INFO( e_Imp, eSubtype_D_segment, 27),
611  SUBTYPE_INFO( e_Imp, eSubtype_enhancer, 28),
612  SUBTYPE_INFO( e_Imp, eSubtype_exon, 29),
613  SUBTYPE_INFO( e_Imp, eSubtype_EC_number, 30),
614  SUBTYPE_INFO( e_Imp, eSubtype_GC_signal, 31),
615  SUBTYPE_INFO( e_Imp, eSubtype_iDNA, 32),
616  SUBTYPE_INFO( e_Imp, eSubtype_intron, 33),
617  SUBTYPE_INFO( e_Imp, eSubtype_J_segment, 34),
618  SUBTYPE_INFO( e_Imp, eSubtype_LTR, 35),
619  SUBTYPE_INFO( e_Imp, eSubtype_mat_peptide, 36),
620  SUBTYPE_INFO( e_Imp, eSubtype_misc_binding, 37),
621  SUBTYPE_INFO( e_Imp, eSubtype_misc_difference, 38),
622  SUBTYPE_INFO( e_Imp, eSubtype_misc_feature, 39),
623  SUBTYPE_INFO( e_Imp, eSubtype_misc_recomb, 40),
624  SUBTYPE_INFO( e_Imp, eSubtype_misc_RNA, 41),
625  SUBTYPE_INFO( e_Imp, eSubtype_misc_signal, 42),
626  SUBTYPE_INFO( e_Imp, eSubtype_misc_structure, 43),
627  SUBTYPE_INFO( e_Imp, eSubtype_modified_base, 44),
628  SUBTYPE_INFO( e_Imp, eSubtype_mutation, 45),
629  SUBTYPE_INFO( e_Imp, eSubtype_N_region, 46),
630  SUBTYPE_INFO( e_Imp, eSubtype_old_sequence, 47),
631  SUBTYPE_INFO( e_Imp, eSubtype_polyA_signal, 48),
632  SUBTYPE_INFO( e_Imp, eSubtype_polyA_site, 49),
633  SUBTYPE_INFO( e_Imp, eSubtype_precursor_RNA, 50),
634  SUBTYPE_INFO( e_Imp, eSubtype_prim_transcript, 51),
635  SUBTYPE_INFO( e_Imp, eSubtype_primer_bind, 52),
636  SUBTYPE_INFO( e_Imp, eSubtype_promoter, 53),
637  SUBTYPE_INFO( e_Imp, eSubtype_protein_bind, 54),
638  SUBTYPE_INFO( e_Imp, eSubtype_RBS, 55),
639  SUBTYPE_INFO( e_Imp, eSubtype_repeat_region, 56),
640  SUBTYPE_INFO( e_Imp, eSubtype_repeat_unit, 57),
641  SUBTYPE_INFO( e_Imp, eSubtype_rep_origin, 58),
642  SUBTYPE_INFO( e_Imp, eSubtype_S_region, 59),
643  SUBTYPE_INFO( e_Imp, eSubtype_satellite, 60),
644  SUBTYPE_INFO( e_Imp, eSubtype_sig_peptide, 61),
645  SUBTYPE_INFO( e_Imp, eSubtype_source, 62),
646  SUBTYPE_INFO( e_Imp, eSubtype_stem_loop, 63),
647  SUBTYPE_INFO( e_Imp, eSubtype_STS, 64),
648  SUBTYPE_INFO( e_Imp, eSubtype_TATA_signal, 65),
649  SUBTYPE_INFO( e_Imp, eSubtype_terminator, 66),
650  SUBTYPE_INFO( e_Imp, eSubtype_transit_peptide, 67),
651  SUBTYPE_INFO( e_Imp, eSubtype_unsure, 68),
652  SUBTYPE_INFO( e_Imp, eSubtype_V_region, 69),
653  SUBTYPE_INFO( e_Imp, eSubtype_V_segment, 70),
654  SUBTYPE_INFO( e_Imp, eSubtype_variation, 71),
655  SUBTYPE_INFO( e_Imp, eSubtype_virion, 72),
656  SUBTYPE_INFO( e_Imp, eSubtype_3clip, 73),
657  SUBTYPE_INFO( e_Imp, eSubtype_3UTR, 74),
658  SUBTYPE_INFO( e_Imp, eSubtype_5clip, 75),
659  SUBTYPE_INFO( e_Imp, eSubtype_5UTR, 76),
660  SUBTYPE_INFO( e_Imp, eSubtype_10_signal, 77),
661  SUBTYPE_INFO( e_Imp, eSubtype_35_signal, 78),
662  SUBTYPE_INFO( e_Imp, eSubtype_gap, 79),
663  SUBTYPE_INFO( e_Imp, eSubtype_operon, 80),
664  SUBTYPE_INFO( e_Imp, eSubtype_oriT, 81),
665  SUBTYPE_INFO( e_Imp, eSubtype_site_ref, 82),
666  SUBTYPE_INFO( e_Region, eSubtype_region, 83),
667  SUBTYPE_INFO( e_Comment, eSubtype_comment, 84),
668  SUBTYPE_INFO( e_Bond, eSubtype_bond, 85),
669  SUBTYPE_INFO( e_Site, eSubtype_site, 86),
670  SUBTYPE_INFO( e_Rsite, eSubtype_rsite, 87),
671  SUBTYPE_INFO( e_User, eSubtype_user, 88),
672  SUBTYPE_INFO( e_Txinit, eSubtype_txinit, 89),
673  SUBTYPE_INFO( e_Num, eSubtype_num, 90),
674  SUBTYPE_INFO( e_Psec_str, eSubtype_psec_str, 91),
675  SUBTYPE_INFO( e_Non_std_residue, eSubtype_non_std_residue, 92),
676  SUBTYPE_INFO( e_Het, eSubtype_het, 93),
677  SUBTYPE_INFO( e_Biosrc, eSubtype_biosrc, 94),
678  SUBTYPE_INFO( e_Rna, eSubtype_ncRNA, 95),
679  SUBTYPE_INFO( e_Rna, eSubtype_tmRNA, 96),
680  SUBTYPE_INFO( e_Clone, eSubtype_clone, 97),
681  SUBTYPE_INFO( e_Variation, eSubtype_variation_ref, 98),
682  SUBTYPE_INFO( e_Imp, eSubtype_mobile_element, 99),
683  SUBTYPE_INFO( e_Imp, eSubtype_centromere, 100),
684  SUBTYPE_INFO( e_Imp, eSubtype_telomere, 101),
685  SUBTYPE_INFO( e_Imp, eSubtype_assembly_gap, 102),
686  SUBTYPE_INFO( e_Imp, eSubtype_regulatory, 103),
687  SUBTYPE_INFO( e_Imp, eSubtype_propeptide, 104),
688  SUBTYPE_INFO( e_Prot, eSubtype_propeptide_aa, 105),
689  SUBTYPE_INFO( e_not_set, eSubtype_max, 106),
690  SUBTYPE_INFO( e_not_set, eSubtype_any, 255)
691 };
692 static const size_t s_subtype_count =
693  sizeof(s_subtype_info)/sizeof(s_subtype_info[0]);
694 #endif
695 
696 
698 {
699  if (sx_SubtypesTableInitialized.load(memory_order_acquire)) {
700  return;
701  }
702  CMutexGuard guard(sx_InitTablesMutex);
703  if (sx_SubtypesTableInitialized.load(memory_order_acquire)) {
704  return;
705  }
707  table.assign(eSubtype_any + 1, e_not_set);
708 
728  for (int sub = eSubtype_prot; sub <= eSubtype_transit_peptide_aa; ++sub) {
729  table[ESubtype(sub)] = e_Prot;
730  }
731  for (int sub = eSubtype_preRNA; sub <= eSubtype_otherRNA; ++sub) {
732  table[ESubtype(sub)] = e_Rna;
733  }
736  for (int sub = eSubtype_imp; sub <= eSubtype_site_ref; ++sub) {
737  table[ESubtype(sub)] = e_Imp;
738  }
739  for ( const SImportEntry* p = kImportTable; p != kImportTableEnd; ++p ) {
740  table[p->m_Subtype] = e_Imp;
741  }
744 
745  sx_SubtypesTableInitialized.store(true, memory_order_release);
746 
747 #ifdef _DEBUG
748  if ( false ) { // print new definition of s_subtype_info[]
749  NcbiCout << "static const SSubtypeInfo s_subtype_info[] = {\n";
750  for ( size_t i = 0; i < s_subtype_count; ++i ) {
751  const SSubtypeInfo& info = s_subtype_info[i];
752  string type = SelectionName(GetTypeFromSubtype(info.m_Subtype));
753  if ( type == "not set" ) {
754  type = "not_set";
755  }
756  else {
757  type[0] = toupper(type[0]);
758  NStr::ReplaceInPlace(type, "-", "_");
759  }
760  type = "e_"+type;
761  NcbiCout << " SUBTYPE_INFO("
762  << setw(20) << type << ", "
763  << setw(30) << info.m_Name << ", "
764  << setw(3) << info.m_Subtype << ")";
765  if ( i != s_subtype_count-1 ) NcbiCout << ",";
766  NcbiCout << "\n";
767  }
768  NcbiCout << "};" << NcbiEndl;
769  }
770  // check if type/subtype values didn't change
771  for ( size_t i = 0; i < s_subtype_count; ++i ) {
772  const SSubtypeInfo& info = s_subtype_info[i];
773  _ASSERT(info.m_Type == GetTypeFromSubtype(info.m_Subtype));
774  }
775 #endif
776 }
777 
779 {
780  MAKE_CONST_MAP(legal_quals_proxy, ESubtype, TLegalQualifiers,
781  {
782 { eSubtype_gene, {
783  eQual_allele,
790  eQual_gene,
793  eQual_label,
795  eQual_map,
797  eQual_note,
799  eQual_operon,
802  eQual_pseudo,
805  eQual_usedin,
806 } },
807 
808 //{ eSubtype_org, {
809 //},
810 
813  eQual_allele,
817  eQual_codon,
825  eQual_gene,
829  eQual_map,
830  eQual_note,
831  eQual_number,
833  eQual_operon,
837  eQual_pseudo,
845  eQual_usedin,
846 } },
847 
848 { eSubtype_prot, {
851  eQual_allele,
859  eQual_gene,
862  eQual_label,
864  eQual_map,
865  eQual_name,
866  eQual_note,
870  eQual_pseudo,
873  eQual_usedin,
874 } },
875 
878  eQual_allele,
885  eQual_gene,
888  eQual_label,
890  eQual_map,
891  eQual_name,
892  eQual_note,
896  eQual_pseudo,
899  eQual_usedin,
900 } },
901 
904  eQual_allele,
913  eQual_gene,
916  eQual_label,
918  eQual_map,
919  eQual_name,
920  eQual_note,
924  eQual_pseudo,
927  eQual_usedin,
928 } },
929 
932  eQual_allele,
940  eQual_gene,
943  eQual_label,
945  eQual_map,
946  eQual_name,
947  eQual_note,
951  eQual_pseudo,
954  eQual_usedin,
955 } },
956 
958  eQual_allele,
966  eQual_gene,
969  eQual_label,
971  eQual_map,
972  eQual_name,
973  eQual_note,
977  eQual_pseudo,
980  eQual_usedin,
981 } },
982 
983 { eSubtype_preRNA, {
984  eQual_allele,
990  eQual_gene,
993  eQual_label,
995  eQual_map,
996  eQual_note,
998  eQual_operon,
1000  eQual_pseudo,
1004  eQual_usedin,
1005 } },
1006 
1007 { eSubtype_mRNA, {
1008  eQual_allele,
1011  eQual_db_xref,
1016  eQual_gene,
1020  eQual_map,
1021  eQual_note,
1023  eQual_operon,
1024  eQual_partial,
1025  eQual_product,
1026  eQual_pseudo,
1031  eQual_usedin,
1032 } },
1033 
1034 { eSubtype_tRNA, {
1035  eQual_allele,
1038  eQual_db_xref,
1042  eQual_gene,
1045  eQual_label,
1047  eQual_map,
1048  eQual_note,
1050  eQual_operon,
1051  eQual_product,
1052  eQual_pseudo,
1056  eQual_usedin,
1057 } },
1058 
1059 { eSubtype_rRNA, {
1060  eQual_allele,
1062  eQual_db_xref,
1067  eQual_gene,
1070  eQual_label,
1072  eQual_map,
1073  eQual_note,
1075  eQual_operon,
1076  eQual_product,
1077  eQual_pseudo,
1080  eQual_usedin,
1081 } },
1082 
1083 { eSubtype_snRNA, {
1084  eQual_allele,
1086  eQual_db_xref,
1091  eQual_gene,
1094  eQual_label,
1096  eQual_map,
1097  eQual_note,
1099  eQual_product,
1100  eQual_pseudo,
1103  eQual_usedin,
1104 } },
1105 
1106 { eSubtype_scRNA, {
1107  eQual_allele,
1109  eQual_db_xref,
1114  eQual_gene,
1117  eQual_label,
1119  eQual_map,
1120  eQual_note,
1122  eQual_product,
1123  eQual_pseudo,
1126  eQual_usedin,
1127 } },
1128 
1129 { eSubtype_snoRNA, {
1130  eQual_allele,
1132  eQual_db_xref,
1137  eQual_gene,
1140  eQual_label,
1142  eQual_map,
1143  eQual_note,
1145  eQual_product,
1146  eQual_pseudo,
1149  eQual_usedin,
1150 } },
1151 
1152 //a.k.a.misc_RNA
1153 { eSubtype_otherRNA, {
1154  eQual_allele,
1156  eQual_db_xref,
1161  eQual_gene,
1164  eQual_label,
1166  eQual_map,
1168  eQual_note,
1170  eQual_operon,
1171  eQual_product,
1172  eQual_pseudo,
1176  eQual_usedin,
1177 } },
1178 
1179 //{ eSubtype_pub, {
1180 //},
1181 
1182 //{ eSubtype_seq, {
1183 //},
1184 
1185 //{ eSubtype_imp, {
1186 //},
1187 
1188 //{ eSubtype_allele, {
1189 //},
1190 
1192  eQual_allele,
1194  eQual_db_xref,
1198  eQual_gene,
1201  eQual_label,
1203  eQual_map,
1204  eQual_note,
1206  eQual_operon,
1208  eQual_pseudo,
1211  eQual_usedin,
1212 } },
1213 
1214 { eSubtype_C_region, {
1215  eQual_allele,
1217  eQual_db_xref,
1221  eQual_gene,
1224  eQual_label,
1226  eQual_map,
1227  eQual_note,
1229  eQual_product,
1230  eQual_pseudo,
1233  eQual_usedin,
1234 } },
1235 
1237  eQual_allele,
1239  eQual_db_xref,
1243  eQual_gene,
1246  eQual_label,
1248  eQual_map,
1249  eQual_note,
1251  eQual_pseudo,
1254  eQual_usedin,
1255 } },
1256 
1257 { eSubtype_Imp_CDS, {
1259  eQual_allele,
1262  eQual_codon,
1264  eQual_db_xref,
1269  eQual_gene,
1272  eQual_label,
1274  eQual_map,
1275  eQual_note,
1276  eQual_number,
1278  eQual_operon,
1279  eQual_product,
1281  eQual_pseudo,
1287  eQual_usedin,
1288 } },
1289 
1290 { eSubtype_conflict, {
1291  eQual_allele,
1293  eQual_compare,
1294  eQual_db_xref,
1298  eQual_gene,
1301  eQual_label,
1303  eQual_map,
1304  eQual_note,
1306  eQual_replace,
1307  eQual_usedin,
1308 } },
1309 
1310 { eSubtype_D_loop, {
1311  eQual_allele,
1313  eQual_db_xref,
1317  eQual_gene,
1320  eQual_label,
1322  eQual_map,
1323  eQual_note,
1325  eQual_usedin,
1326 } },
1327 
1328 { eSubtype_D_segment, {
1329  eQual_allele,
1331  eQual_db_xref,
1335  eQual_gene,
1338  eQual_label,
1340  eQual_map,
1341  eQual_note,
1343  eQual_product,
1344  eQual_pseudo,
1347  eQual_usedin,
1348 } },
1349 
1350 { eSubtype_enhancer, {
1351  eQual_allele,
1354  eQual_db_xref,
1358  eQual_gene,
1361  eQual_label,
1363  eQual_map,
1364  eQual_note,
1366  eQual_pseudo,
1370  eQual_usedin,
1371 } },
1372 
1373 { eSubtype_exon, {
1375  eQual_allele,
1377  eQual_db_xref,
1382  eQual_gene,
1386  eQual_map,
1387  eQual_note,
1388  eQual_number,
1390  eQual_partial,
1391  eQual_product,
1392  eQual_pseudo,
1396  eQual_usedin,
1397 } },
1398 
1399 { eSubtype_GC_signal, {
1400  eQual_allele,
1402  eQual_db_xref,
1406  eQual_gene,
1409  eQual_label,
1411  eQual_map,
1412  eQual_note,
1414  eQual_pseudo,
1417  eQual_usedin,
1418 } },
1419 
1420 { eSubtype_iDNA, {
1421  eQual_allele,
1423  eQual_db_xref,
1428  eQual_gene,
1431  eQual_label,
1433  eQual_map,
1434  eQual_note,
1435  eQual_number,
1437  eQual_pseudo,
1440  eQual_usedin,
1441 } },
1442 
1443 { eSubtype_intron, {
1444  eQual_allele,
1447  eQual_db_xref,
1452  eQual_gene,
1456  eQual_map,
1457  eQual_note,
1458  eQual_number,
1460  eQual_partial,
1461  eQual_pseudo,
1465  eQual_usedin,
1466 } },
1467 
1468 { eSubtype_J_segment, {
1469  eQual_allele,
1471  eQual_db_xref,
1475  eQual_gene,
1478  eQual_label,
1480  eQual_map,
1481  eQual_note,
1483  eQual_product,
1484  eQual_pseudo,
1487  eQual_usedin,
1488 } },
1489 
1490 { eSubtype_LTR, {
1491  eQual_allele,
1493  eQual_db_xref,
1498  eQual_gene,
1501  eQual_label,
1503  eQual_map,
1504  eQual_note,
1507  eQual_usedin,
1508 } },
1509 
1512  eQual_allele,
1514  eQual_db_xref,
1519  eQual_gene,
1522  eQual_label,
1524  eQual_map,
1525  eQual_note,
1527  eQual_product,
1528  eQual_pseudo,
1531  eQual_usedin,
1532 } },
1533 
1535  eQual_allele,
1538  eQual_db_xref,
1543  eQual_gene,
1546  eQual_label,
1548  eQual_map,
1549  eQual_note,
1551  eQual_usedin,
1552 } },
1553 
1555  eQual_allele,
1557  eQual_clone,
1558  eQual_compare,
1559  eQual_db_xref,
1563  eQual_gene,
1566  eQual_label,
1568  eQual_map,
1569  eQual_note,
1572  eQual_replace,
1574  eQual_usedin,
1575 } },
1576 
1578  eQual_allele,
1580  eQual_db_xref,
1586  eQual_gene,
1589  eQual_label,
1591  eQual_map,
1592  eQual_note,
1593  eQual_number,
1596  eQual_product,
1597  eQual_pseudo,
1599  eQual_SO_type,
1601  eQual_usedin,
1602 } },
1603 
1605  eQual_allele,
1607  eQual_db_xref,
1611  eQual_gene,
1614  eQual_label,
1616  eQual_map,
1617  eQual_note,
1621  eQual_usedin,
1622 } },
1623 
1624 { eSubtype_misc_RNA, {
1625  eQual_allele,
1627  eQual_db_xref,
1632  eQual_gene,
1635  eQual_label,
1637  eQual_map,
1638  eQual_note,
1640  eQual_operon,
1641  eQual_product,
1643  eQual_usedin,
1644 } },
1645 
1647  eQual_allele,
1649  eQual_db_xref,
1654  eQual_gene,
1657  eQual_label,
1659  eQual_map,
1660  eQual_note,
1662  eQual_operon,
1664  eQual_pseudo,
1668  eQual_usedin,
1669 } },
1670 
1672  eQual_allele,
1674  eQual_db_xref,
1679  eQual_gene,
1682  eQual_label,
1684  eQual_map,
1685  eQual_note,
1688  eQual_usedin,
1689 } },
1690 
1692  eQual_allele,
1694  eQual_db_xref,
1698  eQual_gene,
1701  eQual_label,
1703  eQual_map,
1705  eQual_note,
1707  eQual_usedin,
1708 } },
1709 
1710 //{ eSubtype_mutation, {
1711 //},
1712 
1713 { eSubtype_N_region, {
1714  eQual_allele,
1716  eQual_db_xref,
1720  eQual_gene,
1723  eQual_label,
1725  eQual_map,
1726  eQual_note,
1728  eQual_product,
1729  eQual_pseudo,
1732  eQual_usedin,
1733 } },
1734 
1736  eQual_allele,
1738  eQual_compare,
1739  eQual_db_xref,
1743  eQual_gene,
1746  eQual_label,
1748  eQual_map,
1749  eQual_note,
1751  eQual_replace,
1752  eQual_usedin,
1753 } },
1754 
1756  eQual_allele,
1758  eQual_db_xref,
1762  eQual_gene,
1765  eQual_label,
1767  eQual_map,
1768  eQual_note,
1770  eQual_pseudo,
1773  eQual_usedin,
1774 } },
1775 
1777  eQual_allele,
1779  eQual_db_xref,
1783  eQual_gene,
1786  eQual_label,
1788  eQual_map,
1789  eQual_note,
1791  eQual_usedin,
1792 } },
1793 
1795  eQual_allele,
1797  eQual_db_xref,
1803  eQual_gene,
1806  eQual_label,
1808  eQual_map,
1809  eQual_note,
1811  eQual_operon,
1812  eQual_product,
1815  eQual_usedin,
1816 } },
1817 
1819  eQual_allele,
1821  eQual_db_xref,
1826  eQual_gene,
1829  eQual_label,
1831  eQual_map,
1832  eQual_note,
1834  eQual_operon,
1836  eQual_usedin,
1837 } },
1838 
1841  eQual_allele,
1843  eQual_db_xref,
1847  eQual_gene,
1850  eQual_label,
1852  eQual_map,
1853  eQual_note,
1856  eQual_usedin,
1857 } },
1858 
1859 { eSubtype_promoter, {
1860  eQual_allele,
1863  eQual_db_xref,
1868  eQual_gene,
1871  eQual_label,
1873  eQual_map,
1874  eQual_note,
1876  eQual_operon,
1878  eQual_pseudo,
1882  eQual_usedin,
1883 } },
1884 
1886  eQual_allele,
1889  eQual_db_xref,
1894  eQual_gene,
1897  eQual_label,
1899  eQual_map,
1900  eQual_note,
1902  eQual_operon,
1904  eQual_usedin,
1905 } },
1906 
1907 { eSubtype_RBS, {
1908  eQual_allele,
1910  eQual_db_xref,
1914  eQual_gene,
1917  eQual_label,
1919  eQual_map,
1920  eQual_note,
1922  eQual_pseudo,
1926  eQual_usedin,
1927 } },
1928 
1930  eQual_allele,
1932  eQual_db_xref,
1937  eQual_gene,
1941  eQual_label,
1943  eQual_map,
1945  eQual_note,
1947  eQual_partial,
1956  eQual_usedin,
1957 } },
1958 
1960  eQual_allele,
1962  eQual_db_xref,
1967  eQual_gene,
1970  eQual_label,
1972  eQual_map,
1973  eQual_note,
1980  eQual_usedin,
1981 } },
1982 
1984  eQual_allele,
1986  eQual_db_xref,
1992  eQual_gene,
1995  eQual_label,
1997  eQual_map,
1998  eQual_note,
2001  eQual_usedin,
2002 } },
2003 
2004 { eSubtype_S_region, {
2005  eQual_allele,
2007  eQual_db_xref,
2011  eQual_gene,
2014  eQual_label,
2016  eQual_map,
2017  eQual_note,
2019  eQual_product,
2020  eQual_pseudo,
2023  eQual_usedin,
2024 } },
2025 
2026 { eSubtype_satellite, {
2027  eQual_allele,
2029  eQual_db_xref,
2033  eQual_gene,
2036  eQual_label,
2038  eQual_map,
2039  eQual_note,
2047  eQual_usedin,
2048 } },
2049 
2051  eQual_allele,
2053  eQual_db_xref,
2058  eQual_gene,
2061  eQual_label,
2063  eQual_map,
2064  eQual_note,
2066  eQual_product,
2067  eQual_pseudo,
2070  eQual_usedin,
2071 } },
2072 
2073 { eSubtype_source, {
2083  eQual_clone,
2087  eQual_country,
2091  eQual_db_xref,
2093  eQual_ecotype,
2096  eQual_focus,
2101  eQual_host,
2103  eQual_isolate,
2107  eQual_label,
2108  eQual_lat_lon,
2111  eQual_map,
2117  eQual_note,
2120  eQual_plasmid,
2124  eQual_segment,
2127  eQual_serovar,
2128  eQual_sex,
2130  eQual_strain,
2140  eQual_usedin,
2141  eQual_variety,
2142  eQual_virion,
2144 } },
2145 
2146 { eSubtype_stem_loop, {
2147  eQual_allele,
2149  eQual_db_xref,
2153  eQual_gene,
2156  eQual_label,
2158  eQual_map,
2159  eQual_note,
2161  eQual_operon,
2163  eQual_usedin,
2164 } },
2165 
2166 { eSubtype_STS, {
2167  eQual_allele,
2169  eQual_db_xref,
2172  eQual_gene,
2175  eQual_label,
2177  eQual_map,
2178  eQual_note,
2181  eQual_usedin,
2182 } },
2183 
2185  eQual_allele,
2187  eQual_db_xref,
2190  eQual_gene,
2193  eQual_label,
2195  eQual_map,
2196  eQual_note,
2198  eQual_pseudo,
2201  eQual_usedin,
2202 } },
2203 
2205  eQual_allele,
2207  eQual_db_xref,
2211  eQual_gene,
2214  eQual_label,
2216  eQual_map,
2217  eQual_note,
2219  eQual_operon,
2220  eQual_pseudo,
2224  eQual_usedin,
2225 } },
2226 
2228  eQual_allele,
2230  eQual_db_xref,
2234  eQual_gene,
2237  eQual_label,
2239  eQual_map,
2240  eQual_note,
2242  eQual_product,
2243  eQual_pseudo,
2246  eQual_usedin,
2247 } },
2248 
2249 { eSubtype_unsure, {
2250  eQual_allele,
2252  eQual_compare,
2253  eQual_db_xref,
2256  eQual_gene,
2259  eQual_label,
2261  eQual_map,
2262  eQual_note,
2264  eQual_replace,
2265  eQual_usedin,
2266 } },
2267 
2268 { eSubtype_V_region, {
2269  eQual_allele,
2271  eQual_db_xref,
2274  eQual_gene,
2277  eQual_label,
2279  eQual_map,
2280  eQual_note,
2282  eQual_product,
2283  eQual_pseudo,
2286  eQual_usedin,
2287 } },
2288 
2289 { eSubtype_V_segment, {
2290  eQual_allele,
2292  eQual_db_xref,
2295  eQual_gene,
2298  eQual_label,
2300  eQual_map,
2301  eQual_note,
2303  eQual_product,
2304  eQual_pseudo,
2307  eQual_usedin,
2308 } },
2309 
2310 { eSubtype_variation, {
2311  eQual_allele,
2313  eQual_compare,
2314  eQual_db_xref,
2318  eQual_gene,
2321  eQual_label,
2323  eQual_map,
2324  eQual_note,
2327  eQual_product,
2328  eQual_replace,
2330  eQual_usedin,
2331 } },
2332 
2333 //{ eSubtype_virion, {
2334 
2335 { eSubtype_3clip, {
2336  eQual_allele,
2338  eQual_db_xref,
2342  eQual_gene,
2345  eQual_label,
2347  eQual_map,
2348  eQual_note,
2352  eQual_usedin,
2353 } },
2354 
2355 { eSubtype_3UTR, {
2356  eQual_allele,
2358  eQual_db_xref,
2362  eQual_gene,
2365  eQual_label,
2367  eQual_map,
2368  eQual_note,
2372  eQual_usedin,
2373 } },
2374 
2375 { eSubtype_5clip, {
2376  eQual_allele,
2378  eQual_db_xref,
2382  eQual_gene,
2385  eQual_label,
2387  eQual_map,
2388  eQual_note,
2392  eQual_usedin,
2393 } },
2394 
2395 { eSubtype_5UTR, {
2396  eQual_allele,
2398  eQual_db_xref,
2402  eQual_gene,
2405  eQual_label,
2407  eQual_map,
2408  eQual_note,
2412  eQual_usedin,
2413 } },
2414 
2415 { eSubtype_10_signal, {
2416  eQual_allele,
2418  eQual_db_xref,
2421  eQual_gene,
2424  eQual_label,
2426  eQual_map,
2427  eQual_note,
2429  eQual_operon,
2430  eQual_pseudo,
2434  eQual_usedin,
2435 } },
2436 
2437 { eSubtype_35_signal, {
2438  eQual_allele,
2440  eQual_db_xref,
2443  eQual_gene,
2446  eQual_label,
2448  eQual_map,
2449  eQual_note,
2451  eQual_operon,
2452  eQual_pseudo,
2456  eQual_usedin,
2457 } },
2458 
2459 { eSubtype_gap, {
2464  eQual_map,
2465  eQual_note,
2466 } },
2467 
2468 { eSubtype_operon, {
2469  eQual_allele,
2471  eQual_db_xref,
2476  eQual_label,
2477  eQual_map,
2478  eQual_note,
2479  eQual_operon,
2481  eQual_pseudo,
2484  eQual_usedin,
2485 } },
2486 
2487 { eSubtype_oriT, {
2488  eQual_allele,
2491  eQual_db_xref,
2495  eQual_gene,
2498  eQual_label,
2500  eQual_map,
2501  eQual_note,
2508  eQual_usedin,
2509 } },
2510 
2511 //{ eSubtype_site_ref, {
2512 //},
2513 
2514 { eSubtype_region, {
2515  eQual_allele,
2517  eQual_db_xref,
2521  eQual_gene,
2524  eQual_label,
2526  eQual_map,
2527  eQual_note,
2528  eQual_number,
2531  eQual_product,
2532  eQual_pseudo,
2535  eQual_SO_type,
2537  eQual_usedin,
2538 } },
2539 
2540 //sameasmisc_feature
2541 { eSubtype_comment, {
2542  eQual_allele,
2544  eQual_db_xref,
2548  eQual_gene,
2551  eQual_label,
2553  eQual_map,
2554  eQual_note,
2555  eQual_number,
2558  eQual_product,
2559  eQual_pseudo,
2562  eQual_usedin,
2563 } },
2564 
2565 //sameasmisc_feature
2566 { eSubtype_bond, {
2567  eQual_allele,
2570  eQual_db_xref,
2574  eQual_gene,
2577  eQual_label,
2579  eQual_map,
2580  eQual_note,
2581  eQual_number,
2584  eQual_product,
2585  eQual_pseudo,
2588  eQual_usedin,
2589 } },
2590 
2591 //sameasmisc_feature
2592 { eSubtype_site, {
2593  eQual_allele,
2595  eQual_db_xref,
2599  eQual_gene,
2602  eQual_label,
2604  eQual_map,
2605  eQual_note,
2606  eQual_number,
2608  eQual_pseudo,
2612  eQual_usedin,
2613 } },
2614 
2615 //{ eSubtype_rsite, {
2616 //},
2617 
2618 //{ eSubtype_user, {
2619 //},
2620 
2621 //{ eSubtype_txinit, {
2622 //},
2623 
2624 //{ eSubtype_num, {
2625 //},
2626 
2627 //sameasmisc_feature???
2628 { eSubtype_psec_str, {
2629  eQual_allele,
2631  eQual_db_xref,
2635  eQual_gene,
2638  eQual_label,
2640  eQual_map,
2641  eQual_note,
2642  eQual_number,
2645  eQual_product,
2646  eQual_pseudo,
2650  eQual_usedin,
2651 } },
2652 
2654  eQual_allele,
2656  eQual_db_xref,
2660  eQual_gene,
2663  eQual_label,
2665  eQual_map,
2667  eQual_note,
2668  eQual_number,
2671  eQual_product,
2672  eQual_pseudo,
2675  eQual_usedin,
2676 } },
2677 
2678 //sameasmisc_feature
2679 { eSubtype_het, {
2680  eQual_allele,
2682  eQual_db_xref,
2686  eQual_gene,
2690  eQual_label,
2692  eQual_map,
2693  eQual_note,
2694  eQual_number,
2697  eQual_product,
2698  eQual_pseudo,
2701  eQual_usedin,
2702 } },
2703 
2704 { eSubtype_biosrc, {
2714  eQual_clone,
2718  eQual_country,
2722  eQual_db_xref,
2724  eQual_ecotype,
2727  eQual_focus,
2732  eQual_host,
2734  eQual_isolate,
2738  eQual_label,
2739  eQual_lat_lon,
2742  eQual_map,
2748  eQual_note,
2751  eQual_plasmid,
2755  eQual_segment,
2758  eQual_serovar,
2759  eQual_sex,
2761  eQual_strain,
2771  eQual_usedin,
2772  eQual_variety,
2773  eQual_virion,
2775 } },
2776 
2777 { eSubtype_ncRNA, {
2778  eQual_allele,
2780  eQual_db_xref,
2785  eQual_gene,
2788  eQual_label,
2790  eQual_map,
2792  eQual_note,
2794  eQual_operon,
2795  eQual_product,
2796  eQual_pseudo,
2800  eQual_usedin,
2801 } },
2802 
2803 { eSubtype_tmRNA, {
2804  eQual_allele,
2806  eQual_db_xref,
2811  eQual_gene,
2814  eQual_label,
2816  eQual_map,
2817  eQual_note,
2819  eQual_operon,
2820  eQual_product,
2821  eQual_pseudo,
2825  eQual_usedin,
2826 } },
2827 
2828 //{ eSubtype_clone, {
2829 //},
2830 
2832  eQual_allele,
2834  eQual_compare,
2835  eQual_db_xref,
2839  eQual_gene,
2842  eQual_label,
2844  eQual_map,
2845  eQual_note,
2848  eQual_product,
2849  eQual_replace,
2851  eQual_usedin,
2852 } },
2853 
2855  eQual_allele,
2857  eQual_db_xref,
2862  eQual_gene,
2866  eQual_label,
2868  eQual_map,
2870  eQual_note,
2876  eQual_usedin,
2877 } },
2878 
2881  eQual_db_xref,
2885  eQual_note,
2887 } },
2888 
2889 { eSubtype_telomere, {
2891  eQual_db_xref,
2895  eQual_note,
2900 } },
2901 
2907 } },
2908 
2910  eQual_allele,
2913  eQual_db_xref,
2917  eQual_gene,
2920  eQual_label,
2922  eQual_map,
2923  eQual_note,
2925  eQual_operon,
2927  eQual_pseudo,
2931 } },
2932 
2935  eQual_allele,
2938  eQual_db_xref,
2944  eQual_gene,
2947  eQual_label,
2949  eQual_map,
2950  eQual_name,
2951  eQual_note,
2953  eQual_product,
2955  eQual_pseudo,
2958  eQual_usedin,
2959 } },
2960 
2963  eQual_allele,
2966  eQual_db_xref,
2972  eQual_gene,
2975  eQual_label,
2977  eQual_map,
2978  eQual_name,
2979  eQual_note,
2981  eQual_product,
2983  eQual_pseudo,
2986  eQual_usedin,
2987 } },
2989 });
2990 
2991  static constexpr TSubTypeQualifiersMap g_legal_quals{legal_quals_proxy};
2992 
2993  return g_legal_quals;
2994 }
2995 
2996 
2998 {
2999  auto it = s_GetLegalQualMap().find(subtype);
3000  if (it == s_GetLegalQualMap().end())
3001  return empty_quals;
3002 
3003  return it->second;
3004 }
3005 
3007 {
3008  auto it = s_GetLegalQualMap().find(subtype);
3009  if (it == s_GetLegalQualMap().end())
3010  return false;
3011 
3012  return it->second.test(qual);
3013 }
3014 
3015 // this is compile time dual map
3016 // there is no need to pre-sort item in any particular order
3017 // duplicate item will be overriden with latter version, i.e. it permitted to put aliases
3019 {
3020  { CSeqFeatData::eQual_bad, "bad" },
3021  { CSeqFeatData::eQual_allele, "allele" },
3022  { CSeqFeatData::eQual_altitude, "altitude" },
3023  { CSeqFeatData::eQual_anticodon, "anticodon" },
3024  { CSeqFeatData::eQual_artificial_location, "artificial_location" },
3025  { CSeqFeatData::eQual_bio_material, "bio_material" },
3026  { CSeqFeatData::eQual_bond_type, "bond_type" },
3027  { CSeqFeatData::eQual_bound_moiety, "bound_moiety" },
3028  { CSeqFeatData::eQual_calculated_mol_wt, "calculated_mol_wt" },
3029  { CSeqFeatData::eQual_cell_line, "cell_line" },
3030  { CSeqFeatData::eQual_cell_type, "cell_type" },
3031  { CSeqFeatData::eQual_chloroplast, "chloroplast" },
3032  { CSeqFeatData::eQual_chromoplast, "chromoplast" },
3033  { CSeqFeatData::eQual_chromosome, "chromosome" },
3034  { CSeqFeatData::eQual_citation, "citation" },
3035  { CSeqFeatData::eQual_clone, "clone" },
3036  { CSeqFeatData::eQual_clone_lib, "clone_lib" },
3037  { CSeqFeatData::eQual_coded_by, "coded_by" },
3038  { CSeqFeatData::eQual_codon, "codon" },
3039  { CSeqFeatData::eQual_codon_start, "codon_start" },
3040  { CSeqFeatData::eQual_collected_by, "collected_by" },
3041  { CSeqFeatData::eQual_collection_date, "collection_date" },
3042  { CSeqFeatData::eQual_compare, "compare" },
3043  { CSeqFeatData::eQual_cons_splice, "cons_splice" },
3044  { CSeqFeatData::eQual_country, "country" },
3045  { CSeqFeatData::eQual_cultivar, "cultivar" },
3046  { CSeqFeatData::eQual_culture_collection, "culture_collection" },
3047  { CSeqFeatData::eQual_cyanelle, "cyanelle" },
3048  { CSeqFeatData::eQual_db_xref, "db_xref" },
3049  { CSeqFeatData::eQual_derived_from, "derived_from" },
3050  { CSeqFeatData::eQual_dev_stage, "dev_stage" },
3051  { CSeqFeatData::eQual_direction, "direction" },
3052  { CSeqFeatData::eQual_EC_number, "EC_number" },
3053  { CSeqFeatData::eQual_ecotype, "ecotype" },
3054  { CSeqFeatData::eQual_environmental_sample, "environmental_sample" },
3055  { CSeqFeatData::eQual_estimated_length, "estimated_length" },
3056  { CSeqFeatData::eQual_evidence, "evidence" },
3057  { CSeqFeatData::eQual_exception, "exception" },
3058  { CSeqFeatData::eQual_experiment, "experiment" },
3059  { CSeqFeatData::eQual_feat_class, "feat_class" },
3060  { CSeqFeatData::eQual_focus, "focus" },
3061  { CSeqFeatData::eQual_frequency, "frequency" },
3062  { CSeqFeatData::eQual_function, "function" },
3063  { CSeqFeatData::eQual_gap_type, "gap_type" },
3064  { CSeqFeatData::eQual_gdb_xref, "gdb_xref" },
3065  { CSeqFeatData::eQual_gene, "gene" },
3066  { CSeqFeatData::eQual_gene_synonym, "gene_synonym" },
3067  { CSeqFeatData::eQual_germline, "germline" },
3068  { CSeqFeatData::eQual_haplogroup, "haplogroup" },
3069  { CSeqFeatData::eQual_haplotype, "haplotype" },
3070  { CSeqFeatData::eQual_heterogen, "heterogen" },
3071  { CSeqFeatData::eQual_host, "specific_host" }, // this is supported
3072  { CSeqFeatData::eQual_host, "host" }, // the second will override previous
3073  { CSeqFeatData::eQual_identified_by, "identified_by" },
3074  { CSeqFeatData::eQual_inference, "inference" },
3075  { CSeqFeatData::eQual_insertion_seq, "insertion_seq" },
3076  { CSeqFeatData::eQual_isolate, "isolate" },
3077  { CSeqFeatData::eQual_isolation_source, "isolation_source" },
3078  { CSeqFeatData::eQual_kinetoplast, "kinetoplast" },
3079  { CSeqFeatData::eQual_lab_host, "lab_host" },
3080  { CSeqFeatData::eQual_label, "label" },
3081  { CSeqFeatData::eQual_lat_lon, "lat_lon" },
3082  { CSeqFeatData::eQual_linkage_evidence, "linkage_evidence" },
3083  { CSeqFeatData::eQual_linkage_group, "linkage_group" },
3084  { CSeqFeatData::eQual_locus_tag, "locus_tag" },
3085  { CSeqFeatData::eQual_macronuclear, "macronuclear" },
3086  { CSeqFeatData::eQual_map, "map" },
3087  { CSeqFeatData::eQual_mating_type, "mating_type" },
3088  { CSeqFeatData::eQual_metagenome_source, "metagenome_source" },
3089  { CSeqFeatData::eQual_metagenomic, "metagenomic" },
3090  { CSeqFeatData::eQual_mitochondrion, "mitochondrion" },
3091  { CSeqFeatData::eQual_mobile_element, "mobile_element" },
3092  { CSeqFeatData::eQual_mobile_element_type, "mobile_element_type" },
3093  { CSeqFeatData::eQual_mod_base, "mod_base" },
3094  { CSeqFeatData::eQual_mol_type, "mol_type" },
3095  { CSeqFeatData::eQual_name, "name" },
3096  { CSeqFeatData::eQual_nomenclature, "nomenclature" },
3097  { CSeqFeatData::eQual_non_std_residue, "non_std_residue" },
3098  { CSeqFeatData::eQual_ncRNA_class, "ncRNA_class" },
3099  { CSeqFeatData::eQual_note, "note" },
3100  { CSeqFeatData::eQual_number, "number" },
3101  { CSeqFeatData::eQual_old_locus_tag, "old_locus_tag" },
3102  { CSeqFeatData::eQual_operon, "operon" },
3103  { CSeqFeatData::eQual_organelle, "organelle" },
3104  { CSeqFeatData::eQual_organism, "organism" },
3105  { CSeqFeatData::eQual_partial, "partial" },
3106  { CSeqFeatData::eQual_PCR_conditions, "PCR_conditions" },
3107  { CSeqFeatData::eQual_PCR_primers, "PCR_primers" },
3108  { CSeqFeatData::eQual_phenotype, "phenotype" },
3109  { CSeqFeatData::eQual_plasmid, "plasmid" },
3110  { CSeqFeatData::eQual_pop_variant, "pop_variant" },
3111  { CSeqFeatData::eQual_product, "product" },
3112  { CSeqFeatData::eQual_protein_id, "protein_id" },
3113  { CSeqFeatData::eQual_proviral, "proviral" },
3114  { CSeqFeatData::eQual_pseudo, "pseudo" },
3115  { CSeqFeatData::eQual_pseudogene, "pseudogene" },
3116  { CSeqFeatData::eQual_rearranged, "rearranged" },
3117  { CSeqFeatData::eQual_recombination_class, "recombination_class" },
3118  { CSeqFeatData::eQual_region_name, "region_name" },
3119  { CSeqFeatData::eQual_regulatory_class, "regulatory_class" },
3120  { CSeqFeatData::eQual_replace, "replace" },
3121  { CSeqFeatData::eQual_ribosomal_slippage, "ribosomal_slippage" },
3122  { CSeqFeatData::eQual_rpt_family, "rpt_family" },
3123  { CSeqFeatData::eQual_rpt_type, "rpt_type" },
3124  { CSeqFeatData::eQual_rpt_unit, "rpt_unit" },
3125  { CSeqFeatData::eQual_rpt_unit_range, "rpt_unit_range" },
3126  { CSeqFeatData::eQual_rpt_unit_seq, "rpt_unit_seq" },
3127  { CSeqFeatData::eQual_satellite, "satellite" },
3128  { CSeqFeatData::eQual_sec_str_type, "sec_str_type" },
3129  { CSeqFeatData::eQual_segment, "segment" },
3130  { CSeqFeatData::eQual_sequenced_mol, "sequenced_mol" },
3131  { CSeqFeatData::eQual_serotype, "serotype" },
3132  { CSeqFeatData::eQual_serovar, "serovar" },
3133  { CSeqFeatData::eQual_sex, "sex" },
3134  { CSeqFeatData::eQual_site_type, "site_type" },
3135  { CSeqFeatData::eQual_SO_type, "SO_type" },
3136  { CSeqFeatData::eQual_specimen_voucher, "specimen_voucher" },
3137  { CSeqFeatData::eQual_standard_name, "standard_name" },
3138  { CSeqFeatData::eQual_strain, "strain" },
3139  { CSeqFeatData::eQual_submitter_seqid, "submitter_seqid" },
3140  { CSeqFeatData::eQual_sub_clone, "sub_clone" },
3141  { CSeqFeatData::eQual_sub_species, "sub_species" },
3142  { CSeqFeatData::eQual_sub_strain, "sub_strain" },
3143  { CSeqFeatData::eQual_tag_peptide, "tag_peptide" },
3144  { CSeqFeatData::eQual_tissue_lib, "tissue_lib" },
3145  { CSeqFeatData::eQual_tissue_type, "tissue_type" },
3146  { CSeqFeatData::eQual_trans_splicing, "trans_splicing" },
3147  { CSeqFeatData::eQual_transcript_id, "transcript_id" },
3148  { CSeqFeatData::eQual_transgenic, "transgenic" },
3149  { CSeqFeatData::eQual_translation, "translation" },
3150  { CSeqFeatData::eQual_transl_except, "transl_except" },
3151  { CSeqFeatData::eQual_transl_table, "transl_table" },
3152  { CSeqFeatData::eQual_transposon, "transposon" },
3153  { CSeqFeatData::eQual_type_material, "type_material" },
3154  { CSeqFeatData::eQual_UniProtKB_evidence, "UniProtKB_evidence" },
3155  { CSeqFeatData::eQual_usedin, "usedin" },
3156  { CSeqFeatData::eQual_variety, "variety" },
3157  { CSeqFeatData::eQual_virion, "virion" },
3158  { CSeqFeatData::eQual_whole_replicon, "whole_replicon" }
3159 })
3160 
3162 {
3163  auto iter = sc_QualPairs.first.find(qual);
3164  if (iter == sc_QualPairs.first.end())
3165  return kEmptyStr;
3166  else
3167  return iter->second;
3168 }
3169 
3171 {
3172  auto iter = sc_QualPairs.second.find(qual);
3173  if (iter != sc_QualPairs.second.end())
3174  return iter->second;
3175 
3176  return CSeqFeatData::eQual_bad;
3177 }
3178 
3179  std::pair<CSeqFeatData::EQualifier, CTempString> CSeqFeatData::GetQualifierTypeAndValue(CTempString qual)
3180 {
3181  auto iter = sc_QualPairs.second.find(qual);
3182  if (iter != sc_QualPairs.second.end())
3183  {
3184  CTempString value = iter->first;
3185  return { iter->second, value };
3186  }
3187 
3188  return { CSeqFeatData::eQual_bad, kEmptyStr };
3189 }
3190 
3191 namespace
3192 {
3193 #define ADD_XREF_PAIR(x, y) {CSeqFeatData::eSubtype_ ## x, CSeqFeatData::eSubtype_ ## y },
3194 
3195  template<typename _Ty, size_t _Width>
3196  class TPairsMatrix
3197  {
3198  public:
3199  static constexpr size_t width = _Width;
3200  using TBitset = ct::const_bitset<width, _Ty>;
3201  using table_t = ct_const_array<TBitset, width>;
3202 
3203  using init_t = std::pair<_Ty, _Ty>;
3204  using non_empty_pair = std::pair<_Ty, TBitset>;
3205 
3206  template<size_t N>
3207  constexpr TPairsMatrix(const init_t(&init)[N])
3208  {
3209  using row_t = ct_const_array<char, width>;
3210  using init_matrix_t = ct_const_array<row_t, width>;
3211 
3212  init_matrix_t matrix{};
3213  for (const auto& rec : init)
3214  {
3215  matrix[rec.first][rec.second] = '1';
3216  matrix[rec.second][rec.first] = '1';
3217  }
3218  m_table = assemble_table(matrix, std::make_index_sequence<width>{});
3219  size_t last = 0;
3220  for (size_t i = 0; i < width; ++i)
3221  {
3222  if (!m_table[i].empty())
3223  m_non_empty_indices[last++] = i;
3224  }
3225  m_non_empty_count = last;
3226  }
3227 
3228  constexpr size_t NonEmptyCount() const
3229  {
3230  return m_non_empty_count;
3231  }
3232 
3233  template<size_t N>
3234  static bool Check(const ct_const_array<non_empty_pair, N>& in, _Ty v1, _Ty v2)
3235  {
3236  auto it = std::lower_bound(in.begin(), in.end(), v1, [](auto left, auto right)
3237  {
3238  return left.first < right;
3239  });
3240  if (it != in.end())
3241  {
3242  return it->second.test(v2);
3243  }
3244  return false;
3245  }
3246  static bool Check(const table_t& table, _Ty v1, _Ty v2)
3247  {
3248  return table[v1].test(v2);
3249  }
3250  // returns ordered collection of non_empty bitsets, suitable for binary search
3251  template<size_t N>
3252  constexpr auto select_bitsets() const
3253  {
3254  return select_bitsets(std::make_index_sequence<N>{});
3255  }
3256  // returns all bitsets, suitable for constant-time lookups
3257  constexpr auto get_bitsets() const
3258  {
3259  return m_table;
3260  }
3261  protected:
3262  template<typename _Matrix, size_t...Ints>
3263  static constexpr auto assemble_table(const _Matrix& init, std::index_sequence<Ints...>)
3264  -> table_t
3265  {
3266  return { { TBitset{init[Ints]} ... } };
3267  }
3268  template<size_t I>
3269  constexpr non_empty_pair make_row() const
3270  {
3271  return { _Ty(m_non_empty_indices[I]), m_table[m_non_empty_indices[I]] };
3272  }
3273  template<size_t...Ints>
3274  constexpr auto select_bitsets(std::index_sequence<Ints...>) const
3275  -> ct_const_array<non_empty_pair, sizeof...(Ints) >
3276  {
3277  return { { make_row<Ints>() ... } };
3278  }
3279 
3280  table_t m_table{};
3281  ct_const_array<size_t, width> m_non_empty_indices{};
3282  size_t m_non_empty_count{ 0 };
3283  };
3284  using CAssembleSubTypePairs = TPairsMatrix<CSeqFeatData::ESubtype, CSeqFeatData::eSubtype_max>;
3285 
3286  static constexpr CAssembleSubTypePairs::init_t g_allowed_pairs[] = {
3287  ADD_XREF_PAIR(ncRNA, preRNA)
3288  ADD_XREF_PAIR(S_region, mRNA)
3289  ADD_XREF_PAIR(gene, preRNA)
3290  ADD_XREF_PAIR(J_segment, gene)
3291  ADD_XREF_PAIR(exon, tmRNA)
3292  ADD_XREF_PAIR(N_region, exon)
3293  ADD_XREF_PAIR(V_region, cdregion)
3294  ADD_XREF_PAIR(intron, preRNA)
3295  ADD_XREF_PAIR(V_segment, preRNA)
3296  ADD_XREF_PAIR(otherRNA, polyA_signal)
3297  ADD_XREF_PAIR(S_region, exon)
3298  ADD_XREF_PAIR(gene, tmRNA)
3299  ADD_XREF_PAIR(otherRNA, preRNA)
3300  ADD_XREF_PAIR(exon, preRNA)
3301  ADD_XREF_PAIR(5UTR, intron)
3302  ADD_XREF_PAIR(mRNA, tmRNA)
3303  ADD_XREF_PAIR(3UTR, intron)
3304  ADD_XREF_PAIR(5UTR, preRNA)
3305  ADD_XREF_PAIR(otherRNA, polyA_site)
3306  ADD_XREF_PAIR(N_region, cdregion)
3307  ADD_XREF_PAIR(N_region, gene)
3308  ADD_XREF_PAIR(V_region, mRNA)
3309  ADD_XREF_PAIR(V_segment, mRNA)
3310  ADD_XREF_PAIR(cdregion, mRNA)
3311  ADD_XREF_PAIR(gene, ncRNA)
3312  ADD_XREF_PAIR(C_region, mRNA)
3313  ADD_XREF_PAIR(exon, tRNA)
3314  ADD_XREF_PAIR(gene, mRNA)
3315  ADD_XREF_PAIR(exon, misc_RNA)
3316  ADD_XREF_PAIR(ncRNA, polyA_signal)
3317  ADD_XREF_PAIR(3UTR, preRNA)
3318  ADD_XREF_PAIR(preRNA, rRNA)
3319  ADD_XREF_PAIR(exon, mRNA)
3320  ADD_XREF_PAIR(gene, rRNA)
3321  ADD_XREF_PAIR(intron, otherRNA)
3322  ADD_XREF_PAIR(V_segment, cdregion)
3323  ADD_XREF_PAIR(N_region, preRNA)
3324  ADD_XREF_PAIR(J_segment, preRNA)
3325  ADD_XREF_PAIR(5UTR, exon)
3326  ADD_XREF_PAIR(gene, polyA_site)
3327  ADD_XREF_PAIR(preRNA, tRNA)
3328  ADD_XREF_PAIR(polyA_signal, preRNA)
3329  ADD_XREF_PAIR(D_segment, cdregion)
3330  ADD_XREF_PAIR(V_region, preRNA)
3331  ADD_XREF_PAIR(cdregion, tmRNA)
3332  ADD_XREF_PAIR(N_region, intron)
3333  ADD_XREF_PAIR(V_region, exon)
3334  ADD_XREF_PAIR(5UTR, gene)
3335  ADD_XREF_PAIR(gene, tRNA)
3336  ADD_XREF_PAIR(TATA_signal, gene)
3337  ADD_XREF_PAIR(D_segment, mRNA)
3338  ADD_XREF_PAIR(tRNA, tmRNA)
3339  ADD_XREF_PAIR(V_segment, exon)
3340  ADD_XREF_PAIR(V_segment, intron)
3341  ADD_XREF_PAIR(cdregion, gene)
3342  ADD_XREF_PAIR(mRNA, preRNA)
3343  ADD_XREF_PAIR(gene, otherRNA)
3344  ADD_XREF_PAIR(enhancer, gene)
3345  ADD_XREF_PAIR(misc_RNA, polyA_signal)
3346  ADD_XREF_PAIR(ncRNA, polyA_site)
3347  ADD_XREF_PAIR(intron, rRNA)
3348  ADD_XREF_PAIR(35_signal, gene)
3349  ADD_XREF_PAIR(misc_RNA, preRNA)
3350  ADD_XREF_PAIR(10_signal, gene)
3351  ADD_XREF_PAIR(preRNA, tmRNA)
3352  ADD_XREF_PAIR(intron, ncRNA)
3353  ADD_XREF_PAIR(misc_RNA, polyA_site)
3354  ADD_XREF_PAIR(5UTR, mRNA)
3355  ADD_XREF_PAIR(J_segment, cdregion)
3356  ADD_XREF_PAIR(C_region, cdregion)
3357  ADD_XREF_PAIR(intron, misc_RNA)
3358  ADD_XREF_PAIR(TATA_signal, preRNA)
3359  ADD_XREF_PAIR(exon, ncRNA)
3360  ADD_XREF_PAIR(3UTR, gene)
3361  ADD_XREF_PAIR(S_region, preRNA)
3362  ADD_XREF_PAIR(exon, rRNA)
3363  ADD_XREF_PAIR(mRNA, regulatory)
3364  ADD_XREF_PAIR(J_segment, intron)
3365  ADD_XREF_PAIR(intron, tRNA)
3366  ADD_XREF_PAIR(S_region, cdregion)
3367  ADD_XREF_PAIR(V_region, gene)
3368  ADD_XREF_PAIR(C_region, intron)
3369  ADD_XREF_PAIR(C_region, exon)
3370  ADD_XREF_PAIR(D_segment, intron)
3371  ADD_XREF_PAIR(C_region, preRNA)
3372  ADD_XREF_PAIR(3UTR, mRNA)
3373  ADD_XREF_PAIR(intron, mRNA)
3374  ADD_XREF_PAIR(J_segment, mRNA)
3375  ADD_XREF_PAIR(mRNA, polyA_site)
3376  ADD_XREF_PAIR(V_segment, gene)
3377  ADD_XREF_PAIR(3UTR, exon)
3378  ADD_XREF_PAIR(V_region, intron)
3379  ADD_XREF_PAIR(D_segment, exon)
3380  ADD_XREF_PAIR(J_segment, exon)
3381  ADD_XREF_PAIR(gene, promoter)
3382  ADD_XREF_PAIR(D_segment, preRNA)
3383  ADD_XREF_PAIR(gene, misc_RNA)
3384  ADD_XREF_PAIR(exon, gene)
3385  ADD_XREF_PAIR(N_region, mRNA)
3386  ADD_XREF_PAIR(exon, otherRNA)
3387  ADD_XREF_PAIR(C_region, gene)
3388  ADD_XREF_PAIR(S_region, gene)
3389  ADD_XREF_PAIR(D_segment, gene)
3390  ADD_XREF_PAIR(gene, polyA_signal)
3391  ADD_XREF_PAIR(intron, tmRNA)
3392  ADD_XREF_PAIR(gene, intron)
3393  ADD_XREF_PAIR(gene, regulatory)
3394  ADD_XREF_PAIR(mRNA, polyA_signal)
3395  ADD_XREF_PAIR(polyA_site, preRNA)
3396  ADD_XREF_PAIR(S_region, intron)
3397  };
3398 
3399  static constexpr CAssembleSubTypePairs::init_t g_prohibited_pairs[] = {
3400  ADD_XREF_PAIR(3UTR, promoter)
3401  ADD_XREF_PAIR(enhancer, rRNA)
3402  ADD_XREF_PAIR(3UTR, 5UTR)
3403  ADD_XREF_PAIR(cdregion, cdregion)
3404  ADD_XREF_PAIR(otherRNA, otherRNA)
3405  ADD_XREF_PAIR(35_signal, D_segment)
3406  ADD_XREF_PAIR(polyA_site, regulatory)
3407  ADD_XREF_PAIR(N_region, promoter)
3408  ADD_XREF_PAIR(cdregion, regulatory)
3409  ADD_XREF_PAIR(35_signal, misc_RNA)
3410  ADD_XREF_PAIR(mRNA, otherRNA)
3411  ADD_XREF_PAIR(V_region, polyA_signal)
3412  ADD_XREF_PAIR(35_signal, J_segment)
3413  ADD_XREF_PAIR(rRNA, rRNA)
3414  ADD_XREF_PAIR(C_region, otherRNA)
3415  ADD_XREF_PAIR(TATA_signal, V_segment)
3416  ADD_XREF_PAIR(ncRNA, regulatory)
3417  ADD_XREF_PAIR(intron, polyA_site)
3418  ADD_XREF_PAIR(5UTR, C_region)
3419  ADD_XREF_PAIR(intron, intron)
3420  ADD_XREF_PAIR(35_signal, V_segment)
3421  ADD_XREF_PAIR(5UTR, D_segment)
3422  ADD_XREF_PAIR(10_signal, S_region)
3423  ADD_XREF_PAIR(J_segment, tRNA)
3424  ADD_XREF_PAIR(V_region, promoter)
3425  ADD_XREF_PAIR(5UTR, rRNA)
3426  ADD_XREF_PAIR(35_signal, C_region)
3427  ADD_XREF_PAIR(polyA_signal, tRNA)
3428  ADD_XREF_PAIR(J_segment, enhancer)
3429  ADD_XREF_PAIR(cdregion, ncRNA)
3430  ADD_XREF_PAIR(10_signal, rRNA)
3431  ADD_XREF_PAIR(J_segment, tmRNA)
3432  ADD_XREF_PAIR(mRNA, ncRNA)
3433  ADD_XREF_PAIR(N_region, regulatory)
3434  ADD_XREF_PAIR(TATA_signal, cdregion)
3435  ADD_XREF_PAIR(10_signal, polyA_site)
3436  ADD_XREF_PAIR(10_signal, V_segment)
3437  ADD_XREF_PAIR(S_region, tmRNA)
3438  ADD_XREF_PAIR(S_region, misc_RNA)
3439  ADD_XREF_PAIR(mRNA, tRNA)
3440  ADD_XREF_PAIR(TATA_signal, exon)
3441  ADD_XREF_PAIR(10_signal, tRNA)
3442  ADD_XREF_PAIR(35_signal, cdregion)
3443  ADD_XREF_PAIR(35_signal, tRNA)
3444  ADD_XREF_PAIR(cdregion, otherRNA)
3445  ADD_XREF_PAIR(35_signal, enhancer)
3446  ADD_XREF_PAIR(10_signal, tmRNA)
3447  ADD_XREF_PAIR(35_signal, V_region)
3448  ADD_XREF_PAIR(C_region, C_region)
3449  ADD_XREF_PAIR(enhancer, promoter)
3450  ADD_XREF_PAIR(3UTR, tmRNA)
3451  ADD_XREF_PAIR(D_segment, regulatory)
3452  ADD_XREF_PAIR(35_signal, otherRNA)
3453  ADD_XREF_PAIR(otherRNA, regulatory)
3454  ADD_XREF_PAIR(V_region, V_region)
3455  ADD_XREF_PAIR(35_signal, rRNA)
3456  ADD_XREF_PAIR(J_segment, S_region)
3457  ADD_XREF_PAIR(misc_RNA, ncRNA)
3458  ADD_XREF_PAIR(V_region, polyA_site)
3459  ADD_XREF_PAIR(S_region, tRNA)
3460  ADD_XREF_PAIR(V_segment, rRNA)
3461  ADD_XREF_PAIR(N_region, misc_RNA)
3462  ADD_XREF_PAIR(J_segment, misc_RNA)
3463  ADD_XREF_PAIR(mRNA, mRNA)
3464  ADD_XREF_PAIR(5UTR, V_segment)
3465  ADD_XREF_PAIR(N_region, tmRNA)
3466  ADD_XREF_PAIR(N_region, ncRNA)
3467  ADD_XREF_PAIR(3UTR, enhancer)
3468  ADD_XREF_PAIR(TATA_signal, tmRNA)
3469  ADD_XREF_PAIR(D_segment, tRNA)
3470  ADD_XREF_PAIR(enhancer, preRNA)
3471  ADD_XREF_PAIR(D_segment, polyA_site)
3472  ADD_XREF_PAIR(3UTR, cdregion)
3473  ADD_XREF_PAIR(ncRNA, rRNA)
3474  ADD_XREF_PAIR(promoter, tmRNA)
3475  ADD_XREF_PAIR(N_region, polyA_signal)
3476  ADD_XREF_PAIR(S_region, otherRNA)
3477  ADD_XREF_PAIR(35_signal, S_region)
3478  ADD_XREF_PAIR(10_signal, V_region)
3479  ADD_XREF_PAIR(misc_RNA, regulatory)
3480  ADD_XREF_PAIR(C_region, promoter)
3481  ADD_XREF_PAIR(otherRNA, tRNA)
3482  ADD_XREF_PAIR(J_segment, promoter)
3483  ADD_XREF_PAIR(polyA_site, tmRNA)
3484  ADD_XREF_PAIR(preRNA, promoter)
3485  ADD_XREF_PAIR(otherRNA, rRNA)
3486  ADD_XREF_PAIR(10_signal, D_segment)
3487  ADD_XREF_PAIR(35_signal, promoter)
3488  ADD_XREF_PAIR(enhancer, tRNA)
3489  ADD_XREF_PAIR(10_signal, misc_RNA)
3490  ADD_XREF_PAIR(rRNA, regulatory)
3491  ADD_XREF_PAIR(10_signal, N_region)
3492  ADD_XREF_PAIR(5UTR, enhancer)
3493  ADD_XREF_PAIR(TATA_signal, promoter)
3494  ADD_XREF_PAIR(D_segment, tmRNA)
3495  ADD_XREF_PAIR(misc_RNA, otherRNA)
3496  ADD_XREF_PAIR(D_segment, V_region)
3497  ADD_XREF_PAIR(35_signal, preRNA)
3498  ADD_XREF_PAIR(3UTR, otherRNA)
3499  ADD_XREF_PAIR(polyA_signal, promoter)
3500  ADD_XREF_PAIR(S_region, regulatory)
3501  ADD_XREF_PAIR(misc_RNA, misc_RNA)
3502  ADD_XREF_PAIR(10_signal, cdregion)
3503  ADD_XREF_PAIR(5UTR, S_region)
3504  ADD_XREF_PAIR(10_signal, J_segment)
3505  ADD_XREF_PAIR(ncRNA, otherRNA)
3506  ADD_XREF_PAIR(otherRNA, tmRNA)
3507  ADD_XREF_PAIR(cdregion, polyA_signal)
3508  ADD_XREF_PAIR(J_segment, polyA_site)
3509  ADD_XREF_PAIR(cdregion, enhancer)
3510  ADD_XREF_PAIR(J_segment, J_segment)
3511  ADD_XREF_PAIR(regulatory, tmRNA)
3512  ADD_XREF_PAIR(S_region, polyA_site)
3513  ADD_XREF_PAIR(35_signal, tmRNA)
3514  ADD_XREF_PAIR(D_segment, polyA_signal)
3515  ADD_XREF_PAIR(35_signal, exon)
3516  ADD_XREF_PAIR(intron, regulatory)
3517  ADD_XREF_PAIR(enhancer, enhancer)
3518  ADD_XREF_PAIR(10_signal, polyA_signal)
3519  ADD_XREF_PAIR(rRNA, tmRNA)
3520  ADD_XREF_PAIR(D_segment, ncRNA)
3521  ADD_XREF_PAIR(N_region, tRNA)
3522  ADD_XREF_PAIR(cdregion, preRNA)
3523  ADD_XREF_PAIR(enhancer, regulatory)
3524  ADD_XREF_PAIR(D_segment, promoter)
3525  ADD_XREF_PAIR(5UTR, V_region)
3526  ADD_XREF_PAIR(35_signal, polyA_signal)
3527  ADD_XREF_PAIR(10_signal, intron)
3528  ADD_XREF_PAIR(J_segment, polyA_signal)
3529  ADD_XREF_PAIR(V_region, otherRNA)
3530  ADD_XREF_PAIR(polyA_site, polyA_site)
3531  ADD_XREF_PAIR(mRNA, promoter)
3532  ADD_XREF_PAIR(enhancer, ncRNA)
3533  ADD_XREF_PAIR(tRNA, tRNA)
3534  ADD_XREF_PAIR(3UTR, V_region)
3535  ADD_XREF_PAIR(C_region, S_region)
3536  ADD_XREF_PAIR(D_segment, S_region)
3537  ADD_XREF_PAIR(D_segment, N_region)
3538  ADD_XREF_PAIR(polyA_site, tRNA)
3539  ADD_XREF_PAIR(C_region, misc_RNA)
3540  ADD_XREF_PAIR(10_signal, regulatory)
3541  ADD_XREF_PAIR(35_signal, polyA_site)
3542  ADD_XREF_PAIR(5UTR, misc_RNA)
3543  ADD_XREF_PAIR(J_segment, rRNA)
3544  ADD_XREF_PAIR(5UTR, polyA_site)
3545  ADD_XREF_PAIR(misc_RNA, tmRNA)
3546  ADD_XREF_PAIR(C_region, J_segment)
3547  ADD_XREF_PAIR(V_segment, polyA_signal)
3548  ADD_XREF_PAIR(V_region, misc_RNA)
3549  ADD_XREF_PAIR(V_region, regulatory)
3550  ADD_XREF_PAIR(10_signal, mRNA)
3551  ADD_XREF_PAIR(misc_RNA, rRNA)
3552  ADD_XREF_PAIR(TATA_signal, V_region)
3553  ADD_XREF_PAIR(J_segment, N_region)
3554  ADD_XREF_PAIR(ncRNA, promoter)
3555  ADD_XREF_PAIR(S_region, polyA_signal)
3556  ADD_XREF_PAIR(D_segment, rRNA)
3557  ADD_XREF_PAIR(polyA_site, rRNA)
3558  ADD_XREF_PAIR(V_region, tRNA)
3559  ADD_XREF_PAIR(D_segment, D_segment)
3560  ADD_XREF_PAIR(J_segment, otherRNA)
3561  ADD_XREF_PAIR(V_segment, polyA_site)
3562  ADD_XREF_PAIR(5UTR, otherRNA)
3563  ADD_XREF_PAIR(exon, exon)
3564  ADD_XREF_PAIR(exon, intron)
3565  ADD_XREF_PAIR(promoter, promoter)
3566  ADD_XREF_PAIR(cdregion, polyA_site)
3567  ADD_XREF_PAIR(V_region, enhancer)
3568  ADD_XREF_PAIR(TATA_signal, misc_RNA)
3569  ADD_XREF_PAIR(TATA_signal, rRNA)
3570  ADD_XREF_PAIR(3UTR, J_segment)
3571  ADD_XREF_PAIR(J_segment, regulatory)
3572  ADD_XREF_PAIR(intron, polyA_signal)
3573  ADD_XREF_PAIR(C_region, N_region)
3574  ADD_XREF_PAIR(N_region, enhancer)
3575  ADD_XREF_PAIR(TATA_signal, enhancer)
3576  ADD_XREF_PAIR(preRNA, preRNA)
3577  ADD_XREF_PAIR(3UTR, misc_RNA)
3578  ADD_XREF_PAIR(C_region, D_segment)
3579  ADD_XREF_PAIR(V_segment, otherRNA)
3580  ADD_XREF_PAIR(5UTR, N_region)
3581  ADD_XREF_PAIR(35_signal, mRNA)
3582  ADD_XREF_PAIR(3UTR, TATA_signal)
3583  ADD_XREF_PAIR(V_region, ncRNA)
3584  ADD_XREF_PAIR(10_signal, preRNA)
3585  ADD_XREF_PAIR(enhancer, otherRNA)
3586  ADD_XREF_PAIR(10_signal, exon)
3587  ADD_XREF_PAIR(3UTR, V_segment)
3588  ADD_XREF_PAIR(misc_RNA, tRNA)
3589  ADD_XREF_PAIR(cdregion, exon)
3590  ADD_XREF_PAIR(10_signal, TATA_signal)
3591  ADD_XREF_PAIR(5UTR, polyA_signal)
3592  ADD_XREF_PAIR(tmRNA, tmRNA)
3593  ADD_XREF_PAIR(5UTR, cdregion)
3594  ADD_XREF_PAIR(35_signal, 35_signal)
3595  ADD_XREF_PAIR(TATA_signal, intron)
3596  ADD_XREF_PAIR(ncRNA, ncRNA)
3597  ADD_XREF_PAIR(promoter, tRNA)
3598  ADD_XREF_PAIR(3UTR, ncRNA)
3599  ADD_XREF_PAIR(35_signal, N_region)
3600  ADD_XREF_PAIR(N_region, otherRNA)
3601  ADD_XREF_PAIR(C_region, tRNA)
3602  ADD_XREF_PAIR(5UTR, 5UTR)
3603  ADD_XREF_PAIR(N_region, S_region)
3604  ADD_XREF_PAIR(TATA_signal, regulatory)
3605  ADD_XREF_PAIR(V_segment, tRNA)
3606  ADD_XREF_PAIR(ncRNA, tRNA)
3607  ADD_XREF_PAIR(5UTR, ncRNA)
3608  ADD_XREF_PAIR(3UTR, S_region)
3609  ADD_XREF_PAIR(V_segment, regulatory)
3610  ADD_XREF_PAIR(3UTR, D_segment)
3611  ADD_XREF_PAIR(35_signal, 5UTR)
3612  ADD_XREF_PAIR(3UTR, C_region)
3613  ADD_XREF_PAIR(cdregion, intron)
3614  ADD_XREF_PAIR(cdregion, rRNA)
3615  ADD_XREF_PAIR(TATA_signal, TATA_signal)
3616  ADD_XREF_PAIR(35_signal, intron)
3617  ADD_XREF_PAIR(N_region, rRNA)
3618  ADD_XREF_PAIR(35_signal, TATA_signal)
3619  ADD_XREF_PAIR(mRNA, rRNA)
3620  ADD_XREF_PAIR(10_signal, 10_signal)
3621  ADD_XREF_PAIR(C_region, V_segment)
3622  ADD_XREF_PAIR(rRNA, tRNA)
3623  ADD_XREF_PAIR(TATA_signal, tRNA)
3624  ADD_XREF_PAIR(TATA_signal, otherRNA)
3625  ADD_XREF_PAIR(C_region, rRNA)
3626  ADD_XREF_PAIR(35_signal, ncRNA)
3627  ADD_XREF_PAIR(3UTR, N_region)
3628  ADD_XREF_PAIR(3UTR, polyA_site)
3629  ADD_XREF_PAIR(S_region, TATA_signal)
3630  ADD_XREF_PAIR(V_region, V_segment)
3631  ADD_XREF_PAIR(N_region, N_region)
3632  ADD_XREF_PAIR(cdregion, promoter)
3633  ADD_XREF_PAIR(D_segment, J_segment)
3634  ADD_XREF_PAIR(5UTR, tmRNA)
3635  ADD_XREF_PAIR(enhancer, exon)
3636  ADD_XREF_PAIR(S_region, S_region)
3637  ADD_XREF_PAIR(5UTR, regulatory)
3638  ADD_XREF_PAIR(enhancer, misc_RNA)
3639  ADD_XREF_PAIR(exon, polyA_signal)
3640  ADD_XREF_PAIR(3UTR, rRNA)
3641  ADD_XREF_PAIR(gene, gene)
3642  ADD_XREF_PAIR(TATA_signal, polyA_site)
3643  ADD_XREF_PAIR(enhancer, polyA_site)
3644  ADD_XREF_PAIR(5UTR, tRNA)
3645  ADD_XREF_PAIR(enhancer, polyA_signal)
3646  ADD_XREF_PAIR(D_segment, otherRNA)
3647  ADD_XREF_PAIR(C_region, polyA_site)
3648  ADD_XREF_PAIR(J_segment, TATA_signal)
3649  ADD_XREF_PAIR(polyA_signal, polyA_site)
3650  ADD_XREF_PAIR(10_signal, enhancer)
3651  ADD_XREF_PAIR(TATA_signal, ncRNA)
3652  ADD_XREF_PAIR(promoter, regulatory)
3653  ADD_XREF_PAIR(S_region, V_segment)
3654  ADD_XREF_PAIR(S_region, V_region)
3655  ADD_XREF_PAIR(J_segment, ncRNA)
3656  ADD_XREF_PAIR(TATA_signal, polyA_signal)
3657  ADD_XREF_PAIR(C_region, enhancer)
3658  ADD_XREF_PAIR(ncRNA, tmRNA)
3659  ADD_XREF_PAIR(D_segment, V_segment)
3660  ADD_XREF_PAIR(C_region, polyA_signal)
3661  ADD_XREF_PAIR(polyA_site, promoter)
3662  ADD_XREF_PAIR(35_signal, regulatory)
3663  ADD_XREF_PAIR(enhancer, mRNA)
3664  ADD_XREF_PAIR(preRNA, regulatory)
3665  ADD_XREF_PAIR(10_signal, 3UTR)
3666  ADD_XREF_PAIR(10_signal, C_region)
3667  ADD_XREF_PAIR(polyA_signal, regulatory)
3668  ADD_XREF_PAIR(C_region, tmRNA)
3669  ADD_XREF_PAIR(3UTR, polyA_signal)
3670  ADD_XREF_PAIR(regulatory, regulatory)
3671  ADD_XREF_PAIR(V_segment, V_segment)
3672  ADD_XREF_PAIR(10_signal, 35_signal)
3673  ADD_XREF_PAIR(D_segment, enhancer)
3674  ADD_XREF_PAIR(V_segment, ncRNA)
3675  ADD_XREF_PAIR(V_segment, promoter)
3676  ADD_XREF_PAIR(V_segment, misc_RNA)
3677  ADD_XREF_PAIR(D_segment, TATA_signal)
3678  ADD_XREF_PAIR(N_region, V_region)
3679  ADD_XREF_PAIR(N_region, polyA_site)
3680  ADD_XREF_PAIR(C_region, ncRNA)
3681  ADD_XREF_PAIR(5UTR, promoter)
3682  ADD_XREF_PAIR(C_region, TATA_signal)
3683  ADD_XREF_PAIR(exon, polyA_site)
3684  ADD_XREF_PAIR(D_segment, misc_RNA)
3685  ADD_XREF_PAIR(S_region, rRNA)
3686  ADD_XREF_PAIR(S_region, ncRNA)
3687  ADD_XREF_PAIR(S_region, promoter)
3688  ADD_XREF_PAIR(5UTR, TATA_signal)
3689  ADD_XREF_PAIR(N_region, TATA_signal)
3690  ADD_XREF_PAIR(V_region, rRNA)
3691  ADD_XREF_PAIR(otherRNA, promoter)
3692  ADD_XREF_PAIR(exon, promoter)
3693  ADD_XREF_PAIR(polyA_signal, rRNA)
3694  ADD_XREF_PAIR(V_segment, tmRNA)
3695  ADD_XREF_PAIR(J_segment, V_region)
3696  ADD_XREF_PAIR(cdregion, tRNA)
3697  ADD_XREF_PAIR(enhancer, intron)
3698  ADD_XREF_PAIR(regulatory, tRNA)
3699  ADD_XREF_PAIR(C_region, regulatory)
3700  ADD_XREF_PAIR(polyA_signal, tmRNA)
3701  ADD_XREF_PAIR(misc_RNA, promoter)
3702  ADD_XREF_PAIR(J_segment, V_segment)
3703  ADD_XREF_PAIR(N_region, V_segment)
3704  ADD_XREF_PAIR(intron, promoter)
3705  ADD_XREF_PAIR(V_segment, enhancer)
3706  ADD_XREF_PAIR(10_signal, otherRNA)
3707  ADD_XREF_PAIR(TATA_signal, mRNA)
3708  ADD_XREF_PAIR(S_region, enhancer)
3709  ADD_XREF_PAIR(3UTR, tRNA)
3710  ADD_XREF_PAIR(V_region, tmRNA)
3711  ADD_XREF_PAIR(C_region, V_region)
3712  ADD_XREF_PAIR(3UTR, regulatory)
3713  ADD_XREF_PAIR(10_signal, ncRNA)
3714  ADD_XREF_PAIR(10_signal, 5UTR)
3715  ADD_XREF_PAIR(polyA_signal, polyA_signal)
3716  ADD_XREF_PAIR(3UTR, 3UTR)
3717  ADD_XREF_PAIR(35_signal, 3UTR)
3718  ADD_XREF_PAIR(enhancer, tmRNA)
3719  ADD_XREF_PAIR(10_signal, promoter)
3720  ADD_XREF_PAIR(5UTR, J_segment)
3721  ADD_XREF_PAIR(cdregion, misc_RNA)
3722  ADD_XREF_PAIR(exon, regulatory)
3723  ADD_XREF_PAIR(mRNA, misc_RNA)
3724  ADD_XREF_PAIR(promoter, rRNA)
3725  };
3726 
3727  // Three steps initialization is still required until C++17 is engaged
3728  // this doesn't impact performance or memory footprints
3729  static constexpr CAssembleSubTypePairs g_allowed_init(g_allowed_pairs);
3730  static constexpr CAssembleSubTypePairs g_prohibited_init(g_prohibited_pairs);
3731 
3732 // constant time access tables use 2544 bytes each, regardless of the number of pairs used
3733 // binary search table requires 928 bytes each, and depends on number of pairs used
3734 #if 1 // use tables with constant time access
3735  static constexpr auto g_allowed_xrefs = g_allowed_init.get_bitsets();
3736  static constexpr auto g_prohibited_xrefs = g_prohibited_init.get_bitsets();
3737 #else // or use binary search tables
3738  static constexpr auto g_allowed_xrefs = g_allowed_init.select_bitsets<g_allowed_init.NonEmptyCount()>();
3739  static constexpr auto g_prohibited_xrefs = g_prohibited_init.select_bitsets<g_prohibited_init.NonEmptyCount()>();
3740 #endif
3741 
3742 #undef ADD_XREF_PAIR
3743 }
3744 
3746 {
3747  return CAssembleSubTypePairs::Check(g_allowed_xrefs, subtype1, subtype2);
3748 }
3750 {
3751  return CAssembleSubTypePairs::Check(g_prohibited_xrefs, subtype1, subtype2);
3752 }
3753 
3754 /////////////////// end of CSeqFeatData methods
3755 
3756 
3758 {
3759  static unique_ptr<CFeatList> theFeatList;
3760 
3761  if ( !theFeatList.get() ) {
3762  DEFINE_STATIC_MUTEX(s_Mutex);
3763  CMutexGuard LOCK(s_Mutex);
3764  if ( !theFeatList.get() ) {
3765  theFeatList.reset(new CFeatList());
3766  }
3767  }
3768  return theFeatList.get();
3769 }
3770 
3772 {
3773  static unique_ptr<CBondList> theBondList;
3774 
3775  if ( !theBondList.get() ) {
3776  DEFINE_STATIC_MUTEX(s_Mutex);
3777  CMutexGuard LOCK(s_Mutex);
3778  if ( !theBondList.get() ) {
3779  theBondList.reset(new CBondList());
3780  }
3781  }
3782  return theBondList.get();
3783 }
3784 
3785 
3787 {
3788  static unique_ptr<CSiteList> theSiteList;
3789 
3790  if ( !theSiteList.get() ) {
3791  DEFINE_STATIC_MUTEX(s_Mutex);
3792  CMutexGuard LOCK(s_Mutex);
3793  if ( !theSiteList.get() ) {
3794  theSiteList.reset(new CSiteList());
3795  }
3796  }
3797  return theSiteList.get();
3798 }
3799 
3801 {
3802  const TSubtypeSet & regulatory_subtypes_set = GetSetOfRegulatorySubtypes();
3803  return (
3804  regulatory_subtypes_set.find(subtype) !=
3805  regulatory_subtypes_set.end() );
3806 }
3807 
3808 
3810 {
3811  // Special cases where subtype does not translate to its
3812  // SubtypeValueToName equivalent.
3813  typedef map<ESubtype, string> TSubtypeToNameMap;
3814 
3815  struct FCreateSubtypeNameMap {
3816  static TSubtypeToNameMap * Create() {
3817 
3818  // create via the inverse of the GetRegulatoryClass that
3819  // takes a string
3820  AutoPtr<TSubtypeToNameMap> p_new_map(new TSubtypeToNameMap);
3821 
3822  const TSubtypeSet & regulatory_subtypes_set =
3824  ITERATE(TSubtypeSet, subtype_iter, regulatory_subtypes_set) {
3825  (*p_new_map)[*subtype_iter] = SubtypeValueToName(*subtype_iter);
3826  }
3827 
3828  _ASSERT( regulatory_subtypes_set.size() == p_new_map->size() );
3829 
3830  // override for special cases
3831  typedef SStaticPair<ESubtype, const char *> TSubtypeNameElem;
3832  static const TSubtypeNameElem sc_subtype_name_map[] = {
3833  { CSeqFeatData::eSubtype_polyA_signal, "polyA_signal_sequence"},
3834  { CSeqFeatData::eSubtype_RBS, "ribosome_binding_site"},
3835  { CSeqFeatData::eSubtype_TATA_signal, "TATA_box"},
3836  { CSeqFeatData::eSubtype_35_signal, "minus_35_signal"},
3837  { CSeqFeatData::eSubtype_10_signal, "minus_10_signal"},
3838  };
3839 
3840  ITERATE_0_IDX(special_case_idx, ArraySize(sc_subtype_name_map)) {
3841  const TSubtypeNameElem & subtype_name_elem =
3842  sc_subtype_name_map[special_case_idx];
3843 
3844  (*p_new_map)[subtype_name_elem.first] =
3845  subtype_name_elem.second;
3846  }
3847 
3848  _ASSERT( regulatory_subtypes_set.size() == p_new_map->size() );
3849  return p_new_map.release();
3850  }
3851  };
3852 
3853  static CSafeStatic<TSubtypeToNameMap> sc_SubtypeToNameMap(
3854  FCreateSubtypeNameMap::Create);
3855 
3856  if( ! IsRegulatory(subtype) ) {
3857  return kEmptyStr;
3858  } else if( subtype == eSubtype_misc_signal ) {
3859  // this subtype does not have a string equivalent
3860  return kEmptyStr;
3861  } else {
3862  TSubtypeToNameMap::const_iterator find_iter =
3863  sc_SubtypeToNameMap->find(subtype);
3864  if( find_iter != sc_SubtypeToNameMap->end() ) {
3865  return find_iter->second;
3866  }
3867  }
3868 
3869  // give up
3870  return kEmptyStr;
3871 }
3872 
3874 CSeqFeatData::GetRegulatoryClass(const string & class_name )
3875 {
3876  typedef map<string, ESubtype> TNameToSubtypeMap;
3877 
3878  // to avoid getting out of sync, this map is created via the
3879  // inverse of the subtype-to-name map.
3880  struct FCreateNameToSubtypeMap {
3881  static TNameToSubtypeMap * Create() {
3882  AutoPtr<TNameToSubtypeMap> p_new_map(new TNameToSubtypeMap);
3883 
3884  const TSubtypeSet & regulatory_subtypes_set =
3886  ITERATE(TSubtypeSet, subtype_iter, regulatory_subtypes_set) {
3887  const string & class_name =
3888  GetRegulatoryClass(*subtype_iter);
3889  (*p_new_map)[class_name] = *subtype_iter;
3890  }
3891 
3892  _ASSERT( p_new_map->size() == regulatory_subtypes_set.size() );
3893  return p_new_map.release();
3894  }
3895  };
3896 
3897  static CSafeStatic<TNameToSubtypeMap> ms_NameToSubtypeMap(
3898  FCreateNameToSubtypeMap::Create);
3899 
3900  TNameToSubtypeMap::const_iterator find_iter =
3901  ms_NameToSubtypeMap->find(class_name);
3902  if( find_iter != ms_NameToSubtypeMap->end() ) {
3903  return find_iter->second;
3904  }
3905 
3906  return eSubtype_bad;
3907 }
3908 
3909 
3911 {
3912  static vector<string> choices = {
3913  "promoter",
3914  "ribosome_binding_site",
3915  "attenuator",
3916  "CAAT_signal",
3917  "DNase_I_hypersensitive_site",
3918  "enhancer",
3919  "enhancer_blocking_element",
3920  "GC_signal",
3921  "imprinting_control_region",
3922  "insulator",
3923  "locus_control_region",
3924  "matrix_attachment_region",
3925  "minus_10_signal",
3926  "minus_35_signal",
3927  "polyA_signal_sequence",
3928  "recoding_stimulatory_region",
3929  "recombination_enhancer",
3930  "replication_regulatory_region",
3931  "response_element",
3932  "riboswitch",
3933  "silencer",
3934  "TATA_box",
3935  "terminator",
3936  "transcriptional_cis_regulatory_region",
3937  "uORF",
3938  };
3939 
3940  return choices;
3941 }
3942 
3944 {
3945  static vector<string> regulatory_class_values;
3946  if (regulatory_class_values.empty()) {
3947  regulatory_class_values = GetRegulatoryClassList();
3948  }
3949 
3950  string original = val;
3951 
3952  const string* valid_val = NStr::FindNoCase(regulatory_class_values, val);
3953  if (valid_val != nullptr) {
3954  val = *valid_val;
3955  }
3956 
3957  return original != val;
3958 }
3959 
3961 {
3962  static vector<string> choices = {
3963  "meiotic",
3964  "mitotic",
3965  "non_allelic_homologous",
3966  "chromosome_breakpoint",
3967  };
3968 
3969  return choices;
3970 }
3971 
3973 {
3974  static constexpr TSubtypes discouraged_subtypes {
3983  eSubtype_LTR,
3988  eSubtype_RBS,
3998  };
3999 
4000  if (discouraged_subtypes.test(subtype))
4001  return true;
4002  else
4003  return false;
4004 }
4005 
4007 {
4008  static constexpr TQualifiers discouraged_quals {
4013  };
4014 
4015  if (discouraged_quals.test(qual))
4016  return true;
4017  else
4018  return false;
4019 }
4020 
4021 
4022 //////////////////////////////////////////////////////////////////////////////
4023 
4024 
4028  const char* m_Description;
4029  const char* m_StorageKey;
4030 };
4031 
4037 
4038  { CSeqFeatData::e_Prot, CSeqFeatData::eSubtype_any, "Protein, All", "Prot Master" },
4039  { CSeqFeatData::e_Prot, CSeqFeatData::eSubtype_prot, "Protein", "Prot" },
4040  { CSeqFeatData::e_Prot, CSeqFeatData::eSubtype_preprotein, "ProProtein", "ProProtein" },
4041  { CSeqFeatData::e_Prot, CSeqFeatData::eSubtype_mat_peptide_aa, "Mature Peptide AA", "Mat-Peptide AA" },
4042  { CSeqFeatData::e_Prot, CSeqFeatData::eSubtype_sig_peptide_aa, "Signal Peptide AA", "Sig-Peptide AA" },
4043  { CSeqFeatData::e_Prot, CSeqFeatData::eSubtype_transit_peptide_aa, "Transit Peptide AA", "Transit-Peptide AA" },
4044  { CSeqFeatData::e_Prot, CSeqFeatData::eSubtype_propeptide_aa, "ProPeptide AA", "ProPeptide" },
4045 
4046  { CSeqFeatData::e_Rna, CSeqFeatData::eSubtype_any, "RNA, All" , "RNA Master" },
4047  { CSeqFeatData::e_Rna, CSeqFeatData::eSubtype_preRNA, "precursor_RNA", "precursor_RNA" },
4048  { CSeqFeatData::e_Rna, CSeqFeatData::eSubtype_mRNA, "mRNA", "mRNA" },
4049  { CSeqFeatData::e_Rna, CSeqFeatData::eSubtype_tRNA, "tRNA", "tRNA" },
4050  { CSeqFeatData::e_Rna, CSeqFeatData::eSubtype_rRNA, "rRNA", "rRNA" },
4051  { CSeqFeatData::e_Rna, CSeqFeatData::eSubtype_snRNA, "snRNA", "snRNA" },
4052  { CSeqFeatData::e_Rna, CSeqFeatData::eSubtype_scRNA, "scRNA", "scRNA" },
4053  { CSeqFeatData::e_Rna, CSeqFeatData::eSubtype_snoRNA, "sno_RNA", "sno_RNA" },
4054  { CSeqFeatData::e_Rna, CSeqFeatData::eSubtype_ncRNA, "ncRNA", "ncRNA" },
4055  { CSeqFeatData::e_Rna, CSeqFeatData::eSubtype_tmRNA, "tmRNA", "tmRNA" },
4056  { CSeqFeatData::e_Rna, CSeqFeatData::eSubtype_otherRNA, "misc_RNA", "misc_RNA" },
4057 
4060 
4061  { CSeqFeatData::e_Imp, CSeqFeatData::eSubtype_any, "Import All", "Import Master" },
4062 
4063  { CSeqFeatData::e_Region, CSeqFeatData::eSubtype_region, "region", "region" },
4064  { CSeqFeatData::e_Comment, CSeqFeatData::eSubtype_comment, "comment", "comment" },
4067  { CSeqFeatData::e_Rsite, CSeqFeatData::eSubtype_rsite, "rsite", "rsite" },
4069  { CSeqFeatData::e_Txinit, CSeqFeatData::eSubtype_txinit, "txinit", "txinit" },
4071  { CSeqFeatData::e_Psec_str, CSeqFeatData::eSubtype_psec_str, "psec_str", "psec_str" },
4072  { CSeqFeatData::e_Non_std_residue, CSeqFeatData::eSubtype_non_std_residue, "non_std_residue", "non_std_residue" },
4074  { CSeqFeatData::e_Biosrc, CSeqFeatData::eSubtype_biosrc, "biosrc", "biosrc" },
4075  { CSeqFeatData::e_Clone, CSeqFeatData::eSubtype_clone, "clone", "clone" },
4077 };
4078 
4079 
4080 /**
4081  CFeatListItem comparator
4082  to sort the set properly.
4083 */
4085 {
4086  if (m_Type == rhs.m_Type) {
4087  // the 'Any' subtype should sort lower than anything else in that type.
4089  return rhs.m_Subtype != CSeqFeatData::eSubtype_any;
4090  }
4091  if ( rhs.m_Subtype == CSeqFeatData::eSubtype_any) {
4092  return false;
4093  }
4094  return m_Subtype < rhs.m_Subtype;
4095  }
4096  return m_Type < rhs.m_Type;
4097 }
4098 
4099 
4100 /*****
4101  CFeatList definitions.
4102 *****/
4103 
4105 {
4106  x_Init();
4107 }
4108 
4109 
4111 {
4112 }
4113 
4114 bool CFeatList::TypeValid(int type, int subtype) const
4115 {
4116  const_iterator ci_it = m_FeatTypes.find(CFeatListItem(type, subtype, "", ""));
4117  if (ci_it == m_FeatTypes.end()) {
4118  return false;
4119  }
4120  return true;
4121 }
4122 
4123 
4124 bool CFeatList::GetItem(int type, int subtype, CFeatListItem& config_item) const
4125 {
4126  const_iterator ci_it = m_FeatTypes.find(CFeatListItem(type, subtype, "", ""));
4127  if (ci_it == m_FeatTypes.end()) {
4128  return false;
4129  }
4130  config_item = *ci_it;
4131  return true;
4132 }
4133 
4134 
4135 bool CFeatList::GetItemBySubtype(int subtype, CFeatListItem& config_item) const
4136 {
4138  if (fm_it == m_FeatTypeMap.end()) {
4139  return false;
4140  }
4141  config_item = fm_it->second;
4142  return true;
4143 }
4144 
4145 
4146 bool CFeatList::GetItemByDescription(const string& desc, CFeatListItem& config_item) const
4147 {
4148  const_iterator ci_it = begin();
4149  for (; ci_it != end(); ++ci_it) {
4150  if (NStr::EqualNocase(ci_it->GetDescription(), desc)) {
4151  config_item = *ci_it;
4152  return true;
4153  }
4154  }
4155  return false;
4156 }
4157 
4158 
4159 bool CFeatList::GetTypeSubType(const string& desc, int& type, int& subtype) const
4160 {
4161  CFeatListItem config_item;
4162  if ( GetItemByDescription(desc, config_item) ) {
4163  type = config_item.GetType();
4164  subtype = config_item.GetSubtype();
4165  return true;
4166  }
4167  return false;
4168 }
4169 
4170 bool CFeatList::GetItemByKey(const string& key, CFeatListItem& config_item) const
4171 {
4172  const_iterator ci_it = begin();
4173  for (; ci_it != end(); ++ci_it) {
4174  if (ci_it->GetStoragekey() == key) {
4175  config_item = *ci_it;
4176  return true;
4177  }
4178  }
4179  return false;
4180 }
4181 
4182 
4183 string CFeatList::GetDescription(int type, int subtype) const
4184 {
4185  CFeatListItem config_item;
4186  if (!GetItem(type, subtype, config_item)) {
4187  return kEmptyStr;
4188  }
4189  return config_item.GetDescription();
4190 }
4191 
4192 
4193 string CFeatList::GetStoragekey(int type, int subtype) const
4194 {
4195  CFeatListItem config_item;
4196  if (!GetItem(type, subtype, config_item)) {
4197  return kEmptyStr;
4198  }
4199  return config_item.GetStoragekey();
4200 }
4201 
4202 
4203 string CFeatList::GetStoragekey(int subtype) const
4204 {
4205  CFeatListItem config_item;
4206  if (!GetItemBySubtype(subtype, config_item)) {
4207  return kEmptyStr;
4208  }
4209  return config_item.GetStoragekey();
4210 }
4211 
4212 
4213 vector<string> CFeatList::GetStoragekeys(int subtype) const
4214 {
4215  vector<string> keys;
4217  if (subtype != CSeqFeatData::eSubtype_any) {
4218  CFeatListItem item;
4219  if (GetItemBySubtype(subtype, item)) {
4220  CFeatListItem sub_master_item;
4221  if (GetItem(item.GetType(), CSeqFeatData::eSubtype_any, sub_master_item)) {
4222  keys.push_back(sub_master_item.GetStoragekey());
4223  }
4224  keys.push_back(item.GetStoragekey());
4225  }
4226  }
4227 
4228  return keys;
4229 }
4230 
4232 {
4233  const size_t config_item_size = sizeof(sc_ConfigItemInit)/sizeof(sc_ConfigItemInit[0]);
4234  for (size_t i = 0; i < config_item_size; ++i ) {
4235  CFeatListItem item(sc_ConfigItemInit[i].m_Type,
4236  sc_ConfigItemInit[i].m_Subtype,
4237  sc_ConfigItemInit[i].m_Description,
4238  sc_ConfigItemInit[i].m_StorageKey);
4239  _VERIFY(m_FeatTypes.insert(item).second);
4240  }
4241 
4242  for (const SImportEntry* iep = kImportTable; iep < kImportTableEnd; ++iep) {
4243  CFeatListItem item(CSeqFeatData::GetTypeFromSubtype(iep->m_Subtype),
4244  iep->m_Subtype, iep->m_Name, iep->m_Name);
4245  _VERIFY(m_FeatTypes.insert(item).second);
4246  }
4247 
4248  ITERATE(CFeatList, it, m_FeatTypes) {
4249  const CFeatListItem& item = *it;
4250  int subtype = item.GetSubtype();
4251  if (subtype != CSeqFeatData::eSubtype_any || item.GetType() == CSeqFeatData::e_not_set) {
4252  // only enter the main Master item, no other master items.
4253  // else subtypes are not unique.
4254  m_FeatTypeMap[subtype] = item;
4255  }
4256  }
4257 }
4258 
4259 
4260 /// return a list of all the feature descriptions for a menu or other control.
4261 void CFeatList::GetDescriptions(vector<string> &descs, bool hierarchical) const
4262 {
4263  descs.clear();
4264 
4265 
4267  string this_desc = iter->GetDescription();
4268 
4269  if (hierarchical) {
4270  string parent_desc;
4271  if (iter->GetSubtype() != CSeqFeatData::eSubtype_any) {
4272  parent_desc = GetDescription(iter->GetType(), CSeqFeatData::eSubtype_any);
4273  } else if (iter->GetType() != CSeqFeatData::e_not_set) {
4274  parent_desc = this_desc;
4275  }
4276 
4277  if ( ! parent_desc.empty()) {
4278  this_desc = parent_desc + "/" + this_desc;
4279  }
4280  }
4281 
4282  descs.push_back(this_desc);
4283  }
4284 }
4285 
4286 string x_SpaceToDash(string str1)
4287 {
4288  string::size_type pos = 0;
4289  while ((pos = NStr::Find(str1, " ", pos)) != NCBI_NS_STD::string::npos) {
4290  str1[pos] = '-';
4291  }
4292  return str1;
4293 }
4294 
4295 /////////////////////////////////////////////////////////////////////////////
4296 
4298  { "disulfide", CSeqFeatData::eBond_disulfide },
4299  { "other", CSeqFeatData::eBond_other },
4300  { "thioether", CSeqFeatData::eBond_thioether },
4301  { "thiolester", CSeqFeatData::eBond_thiolester },
4302  { "xlink", CSeqFeatData::eBond_xlink }
4303 };
4305 
4307 {
4308 }
4309 
4310 
4312 {
4313 }
4314 
4315 
4316 bool CBondList::IsBondName(string str) const
4317 {
4318  const_iterator ci_it = sm_BondKeys.find (x_SpaceToDash(str).c_str());
4319  if (ci_it != sm_BondKeys.end ()) {
4320  return true;
4321  } else {
4322  return false;
4323  }
4324 }
4325 
4326 
4327 bool CBondList::IsBondName (string str, CSeqFeatData::EBond& bond_type) const
4328 {
4329  const_iterator ci_it = sm_BondKeys.find (x_SpaceToDash(str).c_str());
4330  if (ci_it != sm_BondKeys.end ()) {
4331  bond_type = ci_it->second;
4332  return true;
4333  } else {
4334  return false;
4335  }
4336 }
4337 
4338 
4340 {
4341  const_iterator ci_it = sm_BondKeys.find (x_SpaceToDash(str).c_str());
4342  if (ci_it == sm_BondKeys.end()) {
4343  NCBI_THROW(CException, eUnknown, "Not a valid bond type!");
4344  } else {
4345  return ci_it->second;
4346  }
4347 }
4348 /////////////////////////////////////////////////////////////////////////////
4349 
4350 
4352  { "acetylation", CSeqFeatData::eSite_acetylation },
4353  { "active", CSeqFeatData::eSite_active },
4354  { "amidation", CSeqFeatData::eSite_amidation },
4355  { "binding", CSeqFeatData::eSite_binding },
4356  { "blocked", CSeqFeatData::eSite_blocked },
4357  { "cleavage", CSeqFeatData::eSite_cleavage },
4358  { "DNA binding", CSeqFeatData::eSite_dna_binding },
4359  { "gamma carboxyglutamic acid", CSeqFeatData::eSite_gamma_carboxyglutamic_acid },
4360  { "glycosylation", CSeqFeatData::eSite_glycosylation },
4361  { "hydroxylation", CSeqFeatData::eSite_hydroxylation },
4362  { "inhibit", CSeqFeatData::eSite_inhibit },
4363  { "lipid binding", CSeqFeatData::eSite_lipid_binding },
4364  { "metal binding", CSeqFeatData::eSite_metal_binding },
4365  { "methylation", CSeqFeatData::eSite_methylation },
4366  { "modified", CSeqFeatData::eSite_modified },
4367  { "mutagenized", CSeqFeatData::eSite_mutagenized },
4368  { "myristoylation", CSeqFeatData::eSite_myristoylation },
4369  { "nitrosylation", CSeqFeatData::eSite_nitrosylation },
4370  { "np binding", CSeqFeatData::eSite_np_binding },
4371  { "other", CSeqFeatData::eSite_other },
4372  { "oxidative deamination", CSeqFeatData::eSite_oxidative_deamination },
4373  { "phosphorylation", CSeqFeatData::eSite_phosphorylation },
4374  { "pyrrolidone carboxylic acid", CSeqFeatData::eSite_pyrrolidone_carboxylic_acid },
4375  { "signal peptide", CSeqFeatData::eSite_signal_peptide },
4376  { "sulfatation", CSeqFeatData::eSite_sulfatation },
4377  { "transit peptide", CSeqFeatData::eSite_transit_peptide },
4378  { "transmembrane region", CSeqFeatData::eSite_transmembrane_region },
4379  { "unclassified", CSeqFeatData::eSite_other }
4380 };
4381 
4383 
4385 {
4386 }
4387 
4388 
4390 {
4391 }
4392 
4393 
4394 bool CSiteList::IsSiteName(string str) const
4395 {
4396  const_iterator ci_it = sm_SiteKeys.find (x_SpaceToDash(str).c_str());
4397  if (ci_it != sm_SiteKeys.end ()) {
4398  return true;
4399  } else {
4400  return false;
4401  }
4402 }
4403 
4404 
4405 bool CSiteList::IsSiteName (string str, CSeqFeatData::ESite& site_type) const
4406 {
4407  const_iterator ci_it = sm_SiteKeys.find (x_SpaceToDash(str).c_str());
4408  if (ci_it != sm_SiteKeys.end ()) {
4409  site_type = ci_it->second;
4410  return true;
4411  } else {
4412  return false;
4413  }
4414 }
4415 
4416 
4418 {
4419  const_iterator ci_it = sm_SiteKeys.find (x_SpaceToDash(str).c_str());
4420  if (ci_it == sm_SiteKeys.end()) {
4421  NCBI_THROW(CException, eUnknown, "Not a valid site type!");
4422  } else {
4423  return ci_it->second;
4424  }
4425 }
4426 
4429 {
4430  static ESubtype const regulatory_subtypes [] = {
4435  eSubtype_LTR,
4439  eSubtype_RBS,
4444  };
4445 
4447  TSubtypeSet, sc_RegulatorySubtypes, regulatory_subtypes);
4448 
4449  return sc_RegulatorySubtypes;
4450 }
4451 
4452 
4454 {
4456  switch (subtype) {
4457  case eSubtype_prot:
4458  case eSubtype_preprotein:
4463  case eSubtype_bond:
4464  case eSubtype_psec_str:
4467  break;
4468  case eSubtype_region:
4469  case eSubtype_pub:
4470  case eSubtype_site:
4472  break;
4473  case eSubtype_any:
4474  case eSubtype_bad:
4475  case eSubtype_max:
4477  break;
4478  default:
4480  break;
4481  }
4482 
4483  return rval;
4484 }
4485 
4486 
4488 {
4489  bool rval = false;
4490 
4491  switch (subtype) {
4492  case eSubtype_regulatory:
4493  case eSubtype_protein_bind:
4494  case eSubtype_misc_feature:
4496  case eSubtype_rep_origin:
4497  case eSubtype_misc_recomb:
4498  case eSubtype_S_region:
4499  case eSubtype_centromere:
4500  case eSubtype_telomere:
4501  case eSubtype_variation:
4502  case eSubtype_misc_binding:
4506  case eSubtype_assembly_gap:
4507  case eSubtype_LTR:
4508  rval = true;
4509  break;
4510  default:
4511  rval = false;
4512  break;
4513  }
4514 
4515  return rval;
4516 }
4517 
4518 
4520 {
4521  bool required = true;
4522  switch (feat_subtype)
4523  {
4528  required = false;
4529  break;
4530  default:
4531  break;
4532  }
4533  return required;
4534 }
4535 
4536 
4538 {
4539  bool allowed = false;
4540  switch (feat_subtype)
4541  {
4546  allowed = true;
4547  break;
4548  default:
4549  break;
4550  }
4551  return allowed;
4552 }
4553 
4554 
4556 {
4557  // experiment and inference get their own panels
4558  if (qual_type == CSeqFeatData::eQual_experiment || qual_type == CSeqFeatData::eQual_inference) {
4559  return false;
4560  }
4561  // pseudo and pseudogene are handled separately
4562  if (qual_type == CSeqFeatData::eQual_pseudogene || qual_type == CSeqFeatData::eQual_pseudo) {
4563  return false;
4564  }
4565 
4566  if (qual_type == CSeqFeatData::eQual_product) {
4567  if (feat_subtype == CSeqFeatData::eSubtype_mat_peptide
4568  || feat_subtype == CSeqFeatData::eSubtype_sig_peptide
4569  || feat_subtype == CSeqFeatData::eSubtype_transit_peptide
4570  || feat_subtype == CSeqFeatData::eSubtype_propeptide
4571  || feat_subtype == CSeqFeatData::eSubtype_C_region
4572  || feat_subtype == CSeqFeatData::eSubtype_D_segment
4573  || feat_subtype == CSeqFeatData::eSubtype_exon
4574  || feat_subtype == CSeqFeatData::eSubtype_J_segment
4575  || feat_subtype == CSeqFeatData::eSubtype_misc_feature
4576  || feat_subtype == CSeqFeatData::eSubtype_N_region
4577  || feat_subtype == CSeqFeatData::eSubtype_S_region
4578  || feat_subtype == CSeqFeatData::eSubtype_V_region
4579  || feat_subtype == CSeqFeatData::eSubtype_V_segment
4580  || feat_subtype == CSeqFeatData::eSubtype_variation) {
4581  return true;
4582  } else {
4583  return false;
4584  }
4585  }
4586 
4587  if (feat_subtype == CSeqFeatData::eSubtype_gene)
4588  {
4589  if ( qual_type == CSeqFeatData::eQual_allele ||
4590  qual_type == CSeqFeatData::eQual_gene_synonym ||
4591  qual_type == CSeqFeatData::eQual_locus_tag ||
4592  qual_type == CSeqFeatData::eQual_map
4593  )
4594  return true;
4595  else
4596  return false;
4597  }
4598 
4599 
4600  if (qual_type == CSeqFeatData::eQual_citation
4601  || qual_type == CSeqFeatData::eQual_db_xref
4602  || qual_type == CSeqFeatData::eQual_evidence
4603  || qual_type == CSeqFeatData::eQual_exception
4604  || qual_type == CSeqFeatData::eQual_gene
4605  || qual_type == CSeqFeatData::eQual_gene_synonym
4606  || qual_type == CSeqFeatData::eQual_insertion_seq
4607  || qual_type == CSeqFeatData::eQual_label
4608  || qual_type == CSeqFeatData::eQual_locus_tag
4609  || qual_type == CSeqFeatData::eQual_note
4610  || qual_type == CSeqFeatData::eQual_partial
4611  || qual_type == CSeqFeatData::eQual_product
4612  || qual_type == CSeqFeatData::eQual_pseudo
4613  || qual_type == CSeqFeatData::eQual_rpt_unit
4614  || qual_type == CSeqFeatData::eQual_transposon
4615  || qual_type == CSeqFeatData::eQual_experiment
4616  || qual_type == CSeqFeatData::eQual_trans_splicing
4618  || qual_type == CSeqFeatData::eQual_standard_name
4619  || qual_type == CSeqFeatData::eQual_usedin) {
4620  return false;
4621  }
4622  if (feat_subtype == CSeqFeatData::eSubtype_cdregion) {
4623  if (qual_type == CSeqFeatData::eQual_codon_start
4624  || qual_type == CSeqFeatData::eQual_codon
4625  || qual_type == CSeqFeatData::eQual_EC_number
4626  || qual_type == CSeqFeatData::eQual_gdb_xref
4627  || qual_type == CSeqFeatData::eQual_number
4628  || qual_type == CSeqFeatData::eQual_protein_id
4629  //|| qual_type == CSeqFeatData::eQual_transl_except
4630  || qual_type == CSeqFeatData::eQual_transl_table
4631  || qual_type == CSeqFeatData::eQual_translation
4632  || qual_type == CSeqFeatData::eQual_allele
4633  || qual_type == CSeqFeatData::eQual_translation
4634  || qual_type == CSeqFeatData::eQual_function
4635  || qual_type == CSeqFeatData::eQual_old_locus_tag) {
4636  return false;
4637  }
4638  }
4639 
4640 
4641 
4642  if (qual_type == CSeqFeatData::eQual_map
4643  && feat_subtype != CSeqFeatData::eSubtype_repeat_region
4644  && feat_subtype != CSeqFeatData::eSubtype_gap) {
4645  return false;
4646  }
4647  if (qual_type == CSeqFeatData::eQual_operon
4648  && feat_subtype != CSeqFeatData::eSubtype_operon) {
4649  return false;
4650  }
4651  return true;
4652 }
4653 
4654 
4656 {
4657  if (!qual.IsSetQual()) {
4658  return false;
4659  }
4660  return ShouldRepresentAsGbqual(feat_subtype, CSeqFeatData::GetQualifierType(qual.GetQual()));
4661 }
4662 
4663 
4665 {
4666  if (NStr::EqualNocase(key, "allele") ||
4667  NStr::EqualNocase(key, "mutation")) {
4668  key = "variation";
4669  return true;
4670  } else if (NStr::EqualNocase(key, "Import") ||
4671  NStr::EqualNocase(key, "virion")) {
4672  key = "misc_feature";
4673  return true;
4674  } else if (NStr::EqualNocase(key, "repeat_unit")) {
4675  key = "repeat_region";
4676  return true;
4677  } else if (NStr::EqualNocase(key, "misc_bind")) {
4678  key = "misc_binding";
4679  return true;
4680  }
4681  const SImportEntry* start = kImportTable;
4682  while (start < kImportTableEnd) {
4683  if (NStr::EqualNocase(key, start->m_Name)) {
4684  if (!NStr::Equal(key, start->m_Name)) {
4685  key = start->m_Name;
4686  return true;
4687  } else {
4688  return false;
4689  }
4690  }
4691  ++start;
4692  }
4693  return false;
4694 }
4695 
4696 
4698  const string& productName)
4699 {
4700  static vector<string> matchPhrases{ "IS150 protein InsAB", "PCRF domain-containing protein" };
4701  static vector<string> containedPhrases{ "transposase", "chain release" };
4702 
4703  for (const auto& phrase: matchPhrases) {
4704  if (phrase == productName) {
4705  return true;
4706  }
4707  }
4708  for (const auto& phrase: containedPhrases) {
4709  if (productName.find(phrase) != string::npos) {
4710  return true;
4711  }
4712  }
4713  return false;
4714 }
4715 
4716 
4717 END_objects_SCOPE // namespace ncbi::objects::
4718 
4720 
4721 /* Original file checksum: lines: 61, chars: 1894, CRC32: 86fb976 */
MAKE_TWOWAY_CONST_MAP(sm_FeatKeys, ct::tagStrNocase, CSeqFeatData::ESubtype, { { "-10_signal", CSeqFeatData::eSubtype_10_signal }, { "-35_signal", CSeqFeatData::eSubtype_35_signal }, { "3'UTR", CSeqFeatData::eSubtype_3UTR }, { "3'clip", CSeqFeatData::eSubtype_3clip }, { "5'UTR", CSeqFeatData::eSubtype_5UTR }, { "5'clip", CSeqFeatData::eSubtype_5clip }, { "Bond", CSeqFeatData::eSubtype_bond }, { "CAAT_signal", CSeqFeatData::eSubtype_CAAT_signal }, { "CDS", CSeqFeatData::eSubtype_cdregion }, { "C_region", CSeqFeatData::eSubtype_C_region }, { "Cit", CSeqFeatData::eSubtype_pub }, { "CloneRef", CSeqFeatData::eSubtype_clone }, { "Comment", CSeqFeatData::eSubtype_comment }, { "D-loop", CSeqFeatData::eSubtype_D_loop }, { "D_segment", CSeqFeatData::eSubtype_D_segment }, { "GC_signal", CSeqFeatData::eSubtype_GC_signal }, { "Het", CSeqFeatData::eSubtype_het }, { "J_segment", CSeqFeatData::eSubtype_J_segment }, { "LTR", CSeqFeatData::eSubtype_LTR }, { "N_region", CSeqFeatData::eSubtype_N_region }, { "NonStdRes", CSeqFeatData::eSubtype_non_std_residue }, { "Num", CSeqFeatData::eSubtype_num }, { "Protein", CSeqFeatData::eSubtype_prot }, { "RBS", CSeqFeatData::eSubtype_RBS }, { "REFERENCE", CSeqFeatData::eSubtype_pub }, { "Region", CSeqFeatData::eSubtype_region }, { "Rsite", CSeqFeatData::eSubtype_rsite }, { "STS", CSeqFeatData::eSubtype_STS }, { "S_region", CSeqFeatData::eSubtype_S_region }, { "SecStr", CSeqFeatData::eSubtype_psec_str }, { "Site", CSeqFeatData::eSubtype_site }, { "Site-ref", CSeqFeatData::eSubtype_site_ref }, { "Src", CSeqFeatData::eSubtype_biosrc }, { "TATA_signal", CSeqFeatData::eSubtype_TATA_signal }, { "TxInit", CSeqFeatData::eSubtype_txinit }, { "User", CSeqFeatData::eSubtype_user }, { "V_region", CSeqFeatData::eSubtype_V_region }, { "V_segment", CSeqFeatData::eSubtype_V_segment }, { "VariationRef", CSeqFeatData::eSubtype_variation_ref }, { "Xref", CSeqFeatData::eSubtype_seq }, { "assembly_gap", CSeqFeatData::eSubtype_assembly_gap }, { "attenuator", CSeqFeatData::eSubtype_attenuator }, { "centromere", CSeqFeatData::eSubtype_centromere }, { "conflict", CSeqFeatData::eSubtype_conflict }, { "enhancer", CSeqFeatData::eSubtype_enhancer }, { "exon", CSeqFeatData::eSubtype_exon }, { "gap", CSeqFeatData::eSubtype_gap }, { "gene", CSeqFeatData::eSubtype_gene }, { "iDNA", CSeqFeatData::eSubtype_iDNA }, { "intron", CSeqFeatData::eSubtype_intron }, { "mRNA", CSeqFeatData::eSubtype_mRNA }, { "mat_peptide", CSeqFeatData::eSubtype_mat_peptide_aa }, { "mat_peptide_nt", CSeqFeatData::eSubtype_mat_peptide }, { "misc_RNA", CSeqFeatData::eSubtype_otherRNA }, { "misc_binding", CSeqFeatData::eSubtype_misc_binding }, { "misc_difference", CSeqFeatData::eSubtype_misc_difference }, { "misc_feature", CSeqFeatData::eSubtype_misc_feature }, { "misc_recomb", CSeqFeatData::eSubtype_misc_recomb }, { "misc_signal", CSeqFeatData::eSubtype_misc_signal }, { "misc_structure", CSeqFeatData::eSubtype_misc_structure }, { "mobile_element", CSeqFeatData::eSubtype_mobile_element }, { "modified_base", CSeqFeatData::eSubtype_modified_base }, { "ncRNA", CSeqFeatData::eSubtype_ncRNA }, { "old_sequence", CSeqFeatData::eSubtype_old_sequence }, { "operon", CSeqFeatData::eSubtype_operon }, { "oriT", CSeqFeatData::eSubtype_oriT }, { "polyA_signal", CSeqFeatData::eSubtype_polyA_signal }, { "polyA_site", CSeqFeatData::eSubtype_polyA_site }, { "precursor_RNA", CSeqFeatData::eSubtype_preRNA }, { "prim_transcript", CSeqFeatData::eSubtype_prim_transcript }, { "primer_bind", CSeqFeatData::eSubtype_primer_bind }, { "promoter", CSeqFeatData::eSubtype_promoter }, { "propeptide", CSeqFeatData::eSubtype_propeptide_aa }, { "propeptide_nt", CSeqFeatData::eSubtype_propeptide }, { "proprotein", CSeqFeatData::eSubtype_preprotein }, { "protein_bind", CSeqFeatData::eSubtype_protein_bind }, { "rRNA", CSeqFeatData::eSubtype_rRNA }, { "regulatory", CSeqFeatData::eSubtype_regulatory }, { "rep_origin", CSeqFeatData::eSubtype_rep_origin }, { "repeat_region", CSeqFeatData::eSubtype_repeat_region }, { "repeat_unit", CSeqFeatData::eSubtype_repeat_unit }, { "satellite", CSeqFeatData::eSubtype_satellite }, { "scRNA", CSeqFeatData::eSubtype_scRNA }, { "sig_peptide", CSeqFeatData::eSubtype_sig_peptide_aa }, { "sig_peptide_nt", CSeqFeatData::eSubtype_sig_peptide }, { "snRNA", CSeqFeatData::eSubtype_snRNA }, { "snoRNA", CSeqFeatData::eSubtype_snoRNA }, { "source", CSeqFeatData::eSubtype_biosrc }, { "stem_loop", CSeqFeatData::eSubtype_stem_loop }, { "tRNA", CSeqFeatData::eSubtype_tRNA }, { "telomere", CSeqFeatData::eSubtype_telomere }, { "terminator", CSeqFeatData::eSubtype_terminator }, { "tmRNA", CSeqFeatData::eSubtype_tmRNA }, { "transit_peptide", CSeqFeatData::eSubtype_transit_peptide_aa }, { "transit_peptide_nt", CSeqFeatData::eSubtype_transit_peptide }, { "unsure", CSeqFeatData::eSubtype_unsure }, { "variation", CSeqFeatData::eSubtype_variation }, { "virion", CSeqFeatData::eSubtype_virion } }) CSeqFeatData
vector< CSeqFeatData::E_Choice > TSubtypesTable