154 #define FEAT_INFO_PAIR(type, subtype, key_full, key_gb) \
155 { CSeqFeatData::e_##type, \
156 { CSeqFeatData::eSubtype_##subtype, key_full, key_gb } }
172 FEAT_INFO_PAIR(Non_std_residue, non_std_residue,
"NonStdRes",
"NonStdRes"),
187 #define PROT_INFO_PAIR(proc, subtype, key_full, key_gb) \
188 { CProt_ref::eProcessed_##proc, \
189 { CSeqFeatData::eSubtype_##subtype, key_full, key_gb } }
194 PROT_INFO_PAIR(signal_peptide, sig_peptide_aa,
"Prot",
"sig_peptide"),
195 PROT_INFO_PAIR(transit_peptide, transit_peptide_aa,
"Prot",
"transit_peptide"),
207 #define SITE_INFO_PAIR(site, subtype, key_full, key_gb) \
208 { CSeqFeatData::eSite_##site, \
209 { CSeqFeatData::eSubtype_##subtype, key_full, key_gb } }
229 #define RNA_INFO_PAIR(rna, subtype, key_full, key_gb) \
230 { CRNA_ref::eType_##rna, \
231 { CSeqFeatData::eSubtype_##subtype, key_full, key_gb } }
234 RNA_INFO_PAIR(premsg, preRNA,
"precursor_RNA",
"precursor_RNA"),
258 sc_ProtInfoPairs.find(
GetProt().GetProcessed());
259 if (it != sc_ProtInfoPairs.end()) {
272 if (it != sc_SiteInfoPairs.end()) {
286 sc_RnaInfoPairs.find(rna_type);
287 if (it != sc_RnaInfoPairs.end()) {
293 const string& ext_name = (can_get_name
296 if (ext_name ==
"ncRNA") {
299 }
else if (ext_name ==
"tmRNA") {
331 if (it != sc_InfoPairs.end()) {
503 auto it = sm_FeatKeys.first.find(sName);
504 if (it == sm_FeatKeys.first.end())
507 if (sName ==
"pre_RNA")
508 return eSubtype_preRNA;
515 auto it = sm_FeatKeys.second.
find(eSubtype);
516 if (it == sm_FeatKeys.second.end())
565 auto iter = sx_MandatoryQuals.find(subtype);
566 if (iter == sx_MandatoryQuals.end()) {
580 #define SUBTYPE_INFO(type, subtype, value) \
581 { CSeqFeatData::type, CSeqFeatData::subtype, value, #subtype }
675 SUBTYPE_INFO( e_Non_std_residue, eSubtype_non_std_residue, 92),
749 NcbiCout <<
"static const SSubtypeInfo s_subtype_info[] = {\n";
753 if (
type ==
"not set" ) {
762 << setw(20) <<
type <<
", "
763 << setw(30) <<
info.m_Name <<
", "
764 << setw(3) <<
info.m_Subtype <<
")";
2993 return g_legal_quals;
3012 return it->second.test(qual);
3163 auto iter = sc_QualPairs.first.
find(qual);
3164 if (iter == sc_QualPairs.first.end())
3167 return iter->second;
3172 auto iter = sc_QualPairs.second.find(qual);
3173 if (iter != sc_QualPairs.second.end())
3174 return iter->second;
3181 auto iter = sc_QualPairs.second.find(qual);
3182 if (iter != sc_QualPairs.second.end())
3185 return { iter->second,
value };
3193 #define ADD_XREF_PAIR(x, y) {CSeqFeatData::eSubtype_ ## x, CSeqFeatData::eSubtype_ ## y },
3195 template<
typename _Ty,
size_t _W
idth>
3199 static constexpr
size_t width = _Width;
3201 using table_t = ct_const_array<TBitset, width>;
3203 using init_t = std::pair<_Ty, _Ty>;
3204 using non_empty_pair = std::pair<_Ty, TBitset>;
3207 constexpr TPairsMatrix(
const init_t(&
init)[
N])
3209 using row_t = ct_const_array<char, width>;
3210 using init_matrix_t = ct_const_array<row_t, width>;
3212 init_matrix_t matrix{};
3213 for (
const auto& rec :
init)
3215 matrix[rec.first][rec.second] =
'1';
3216 matrix[rec.second][rec.first] =
'1';
3218 m_table = assemble_table(matrix, std::make_index_sequence<width>{});
3220 for (
size_t i = 0;
i < width; ++
i)
3223 m_non_empty_indices[
last++] =
i;
3225 m_non_empty_count =
last;
3228 constexpr
size_t NonEmptyCount()
const
3230 return m_non_empty_count;
3234 static bool Check(
const ct_const_array<non_empty_pair, N>&
in, _Ty v1, _Ty
v2)
3236 auto it = std::lower_bound(
in.begin(),
in.end(), v1, [](
auto left,
auto right)
3238 return left.first < right;
3242 return it->second.test(
v2);
3246 static bool Check(
const table_t&
table, _Ty v1, _Ty
v2)
3252 constexpr
auto select_bitsets()
const
3254 return select_bitsets(std::make_index_sequence<N>{});
3257 constexpr
auto get_bitsets()
const
3262 template<
typename _Matrix,
size_t...Ints>
3263 static constexpr
auto assemble_table(
const _Matrix&
init, std::index_sequence<Ints...>)
3266 return { { TBitset{
init[Ints]} ... } };
3269 constexpr non_empty_pair make_row()
const
3271 return { _Ty(m_non_empty_indices[I]), m_table[m_non_empty_indices[I]] };
3273 template<
size_t...Ints>
3274 constexpr
auto select_bitsets(std::index_sequence<Ints...>)
const
3277 return { { make_row<Ints>() ... } };
3281 ct_const_array<size_t, width> m_non_empty_indices{};
3282 size_t m_non_empty_count{ 0 };
3284 using CAssembleSubTypePairs = TPairsMatrix<CSeqFeatData::ESubtype, CSeqFeatData::eSubtype_max>;
3286 static constexpr CAssembleSubTypePairs::init_t g_allowed_pairs[] = {
3399 static constexpr CAssembleSubTypePairs::init_t g_prohibited_pairs[] = {
3729 static constexpr CAssembleSubTypePairs g_allowed_init(g_allowed_pairs);
3730 static constexpr CAssembleSubTypePairs g_prohibited_init(g_prohibited_pairs);
3735 static constexpr
auto g_allowed_xrefs = g_allowed_init.get_bitsets();
3736 static constexpr
auto g_prohibited_xrefs = g_prohibited_init.get_bitsets();
3738 static constexpr
auto g_allowed_xrefs = g_allowed_init.select_bitsets<g_allowed_init.NonEmptyCount()>();
3739 static constexpr
auto g_prohibited_xrefs = g_prohibited_init.select_bitsets<g_prohibited_init.NonEmptyCount()>();
3742 #undef ADD_XREF_PAIR
3759 static unique_ptr<CFeatList> theFeatList;
3761 if ( !theFeatList.get() ) {
3764 if ( !theFeatList.get() ) {
3768 return theFeatList.get();
3773 static unique_ptr<CBondList> theBondList;
3775 if ( !theBondList.get() ) {
3778 if ( !theBondList.get() ) {
3782 return theBondList.get();
3788 static unique_ptr<CSiteList> theSiteList;
3790 if ( !theSiteList.get() ) {
3793 if ( !theSiteList.get() ) {
3797 return theSiteList.get();
3804 regulatory_subtypes_set.
find(subtype) !=
3805 regulatory_subtypes_set.
end() );
3815 struct FCreateSubtypeNameMap {
3816 static TSubtypeToNameMap * Create() {
3828 _ASSERT( regulatory_subtypes_set.
size() == p_new_map->size() );
3832 static const TSubtypeNameElem sc_subtype_name_map[] = {
3841 const TSubtypeNameElem & subtype_name_elem =
3842 sc_subtype_name_map[special_case_idx];
3844 (*p_new_map)[subtype_name_elem.first] =
3845 subtype_name_elem.second;
3848 _ASSERT( regulatory_subtypes_set.
size() == p_new_map->size() );
3854 FCreateSubtypeNameMap::Create);
3862 TSubtypeToNameMap::const_iterator find_iter =
3863 sc_SubtypeToNameMap->find(subtype);
3864 if( find_iter != sc_SubtypeToNameMap->end() ) {
3865 return find_iter->second;
3880 struct FCreateNameToSubtypeMap {
3881 static TNameToSubtypeMap * Create() {
3887 const string & class_name =
3889 (*p_new_map)[class_name] = *subtype_iter;
3892 _ASSERT( p_new_map->size() == regulatory_subtypes_set.
size() );
3898 FCreateNameToSubtypeMap::Create);
3900 TNameToSubtypeMap::const_iterator find_iter =
3901 ms_NameToSubtypeMap->find(class_name);
3902 if( find_iter != ms_NameToSubtypeMap->end() ) {
3903 return find_iter->second;
3912 static vector<string> choices = {
3914 "ribosome_binding_site",
3917 "DNase_I_hypersensitive_site",
3919 "enhancer_blocking_element",
3921 "imprinting_control_region",
3923 "locus_control_region",
3924 "matrix_attachment_region",
3927 "polyA_signal_sequence",
3928 "recoding_stimulatory_region",
3929 "recombination_enhancer",
3930 "replication_regulatory_region",
3936 "transcriptional_cis_regulatory_region",
3945 static vector<string> regulatory_class_values;
3946 if (regulatory_class_values.empty()) {
3950 string original =
val;
3953 if (valid_val !=
nullptr) {
3957 return original !=
val;
3962 static vector<string> choices = {
3965 "non_allelic_homologous",
3966 "chromosome_breakpoint",
3974 static constexpr
TSubtypes discouraged_subtypes {
4000 if (discouraged_subtypes.test(subtype))
4015 if (discouraged_quals.test(qual))
4130 config_item = *ci_it;
4141 config_item = fm_it->second;
4149 for (; ci_it !=
end(); ++ci_it) {
4151 config_item = *ci_it;
4173 for (; ci_it !=
end(); ++ci_it) {
4174 if (ci_it->GetStoragekey() ==
key) {
4175 config_item = *ci_it;
4215 vector<string> keys;
4234 for (
size_t i = 0;
i < config_item_size; ++
i ) {
4244 iep->m_Subtype, iep->m_Name, iep->m_Name);
4267 string this_desc = iter->GetDescription();
4274 parent_desc = this_desc;
4277 if ( ! parent_desc.empty()) {
4278 this_desc = parent_desc +
"/" + this_desc;
4282 descs.push_back(this_desc);
4288 string::size_type pos = 0;
4289 while ((pos =
NStr::Find(str1,
" ", pos)) != NCBI_NS_STD::string::npos) {
4319 if (ci_it != sm_BondKeys.end ()) {
4330 if (ci_it != sm_BondKeys.end ()) {
4331 bond_type = ci_it->second;
4342 if (ci_it == sm_BondKeys.end()) {
4345 return ci_it->second;
4397 if (ci_it != sm_SiteKeys.end ()) {
4408 if (ci_it != sm_SiteKeys.end ()) {
4409 site_type = ci_it->second;
4420 if (ci_it == sm_SiteKeys.end()) {
4423 return ci_it->second;
4430 static ESubtype const regulatory_subtypes [] = {
4447 TSubtypeSet, sc_RegulatorySubtypes, regulatory_subtypes);
4449 return sc_RegulatorySubtypes;
4521 bool required =
true;
4522 switch (feat_subtype)
4539 bool allowed =
false;
4540 switch (feat_subtype)
4672 key =
"misc_feature";
4675 key =
"repeat_region";
4678 key =
"misc_binding";
4698 const string& productName)
4700 static vector<string> matchPhrases{
"IS150 protein InsAB",
"PCRF domain-containing protein" };
4701 static vector<string> containedPhrases{
"transposase",
"chain release" };
4703 for (
const auto& phrase: matchPhrases) {
4704 if (phrase == productName) {
4708 for (
const auto& phrase: containedPhrases) {
4709 if (productName.find(phrase) != string::npos) {
MAKE_TWOWAY_CONST_MAP(sm_FeatKeys, ct::tagStrNocase, CSeqFeatData::ESubtype, { { "-10_signal", CSeqFeatData::eSubtype_10_signal }, { "-35_signal", CSeqFeatData::eSubtype_35_signal }, { "3'UTR", CSeqFeatData::eSubtype_3UTR }, { "3'clip", CSeqFeatData::eSubtype_3clip }, { "5'UTR", CSeqFeatData::eSubtype_5UTR }, { "5'clip", CSeqFeatData::eSubtype_5clip }, { "Bond", CSeqFeatData::eSubtype_bond }, { "CAAT_signal", CSeqFeatData::eSubtype_CAAT_signal }, { "CDS", CSeqFeatData::eSubtype_cdregion }, { "C_region", CSeqFeatData::eSubtype_C_region }, { "Cit", CSeqFeatData::eSubtype_pub }, { "CloneRef", CSeqFeatData::eSubtype_clone }, { "Comment", CSeqFeatData::eSubtype_comment }, { "D-loop", CSeqFeatData::eSubtype_D_loop }, { "D_segment", CSeqFeatData::eSubtype_D_segment }, { "GC_signal", CSeqFeatData::eSubtype_GC_signal }, { "Het", CSeqFeatData::eSubtype_het }, { "J_segment", CSeqFeatData::eSubtype_J_segment }, { "LTR", CSeqFeatData::eSubtype_LTR }, { "N_region", CSeqFeatData::eSubtype_N_region }, { "NonStdRes", CSeqFeatData::eSubtype_non_std_residue }, { "Num", CSeqFeatData::eSubtype_num }, { "Protein", CSeqFeatData::eSubtype_prot }, { "RBS", CSeqFeatData::eSubtype_RBS }, { "REFERENCE", CSeqFeatData::eSubtype_pub }, { "Region", CSeqFeatData::eSubtype_region }, { "Rsite", CSeqFeatData::eSubtype_rsite }, { "STS", CSeqFeatData::eSubtype_STS }, { "S_region", CSeqFeatData::eSubtype_S_region }, { "SecStr", CSeqFeatData::eSubtype_psec_str }, { "Site", CSeqFeatData::eSubtype_site }, { "Site-ref", CSeqFeatData::eSubtype_site_ref }, { "Src", CSeqFeatData::eSubtype_biosrc }, { "TATA_signal", CSeqFeatData::eSubtype_TATA_signal }, { "TxInit", CSeqFeatData::eSubtype_txinit }, { "User", CSeqFeatData::eSubtype_user }, { "V_region", CSeqFeatData::eSubtype_V_region }, { "V_segment", CSeqFeatData::eSubtype_V_segment }, { "VariationRef", CSeqFeatData::eSubtype_variation_ref }, { "Xref", CSeqFeatData::eSubtype_seq }, { "assembly_gap", CSeqFeatData::eSubtype_assembly_gap }, { "attenuator", CSeqFeatData::eSubtype_attenuator }, { "centromere", CSeqFeatData::eSubtype_centromere }, { "conflict", CSeqFeatData::eSubtype_conflict }, { "enhancer", CSeqFeatData::eSubtype_enhancer }, { "exon", CSeqFeatData::eSubtype_exon }, { "gap", CSeqFeatData::eSubtype_gap }, { "gene", CSeqFeatData::eSubtype_gene }, { "iDNA", CSeqFeatData::eSubtype_iDNA }, { "intron", CSeqFeatData::eSubtype_intron }, { "mRNA", CSeqFeatData::eSubtype_mRNA }, { "mat_peptide", CSeqFeatData::eSubtype_mat_peptide_aa }, { "mat_peptide_nt", CSeqFeatData::eSubtype_mat_peptide }, { "misc_RNA", CSeqFeatData::eSubtype_otherRNA }, { "misc_binding", CSeqFeatData::eSubtype_misc_binding }, { "misc_difference", CSeqFeatData::eSubtype_misc_difference }, { "misc_feature", CSeqFeatData::eSubtype_misc_feature }, { "misc_recomb", CSeqFeatData::eSubtype_misc_recomb }, { "misc_signal", CSeqFeatData::eSubtype_misc_signal }, { "misc_structure", CSeqFeatData::eSubtype_misc_structure }, { "mobile_element", CSeqFeatData::eSubtype_mobile_element }, { "modified_base", CSeqFeatData::eSubtype_modified_base }, { "ncRNA", CSeqFeatData::eSubtype_ncRNA }, { "old_sequence", CSeqFeatData::eSubtype_old_sequence }, { "operon", CSeqFeatData::eSubtype_operon }, { "oriT", CSeqFeatData::eSubtype_oriT }, { "polyA_signal", CSeqFeatData::eSubtype_polyA_signal }, { "polyA_site", CSeqFeatData::eSubtype_polyA_site }, { "precursor_RNA", CSeqFeatData::eSubtype_preRNA }, { "prim_transcript", CSeqFeatData::eSubtype_prim_transcript }, { "primer_bind", CSeqFeatData::eSubtype_primer_bind }, { "promoter", CSeqFeatData::eSubtype_promoter }, { "propeptide", CSeqFeatData::eSubtype_propeptide_aa }, { "propeptide_nt", CSeqFeatData::eSubtype_propeptide }, { "proprotein", CSeqFeatData::eSubtype_preprotein }, { "protein_bind", CSeqFeatData::eSubtype_protein_bind }, { "rRNA", CSeqFeatData::eSubtype_rRNA }, { "regulatory", CSeqFeatData::eSubtype_regulatory }, { "rep_origin", CSeqFeatData::eSubtype_rep_origin }, { "repeat_region", CSeqFeatData::eSubtype_repeat_region }, { "repeat_unit", CSeqFeatData::eSubtype_repeat_unit }, { "satellite", CSeqFeatData::eSubtype_satellite }, { "scRNA", CSeqFeatData::eSubtype_scRNA }, { "sig_peptide", CSeqFeatData::eSubtype_sig_peptide_aa }, { "sig_peptide_nt", CSeqFeatData::eSubtype_sig_peptide }, { "snRNA", CSeqFeatData::eSubtype_snRNA }, { "snoRNA", CSeqFeatData::eSubtype_snoRNA }, { "source", CSeqFeatData::eSubtype_biosrc }, { "stem_loop", CSeqFeatData::eSubtype_stem_loop }, { "tRNA", CSeqFeatData::eSubtype_tRNA }, { "telomere", CSeqFeatData::eSubtype_telomere }, { "terminator", CSeqFeatData::eSubtype_terminator }, { "tmRNA", CSeqFeatData::eSubtype_tmRNA }, { "transit_peptide", CSeqFeatData::eSubtype_transit_peptide_aa }, { "transit_peptide_nt", CSeqFeatData::eSubtype_transit_peptide }, { "unsure", CSeqFeatData::eSubtype_unsure }, { "variation", CSeqFeatData::eSubtype_variation }, { "virion", CSeqFeatData::eSubtype_virion } }) CSeqFeatData
vector< CSeqFeatData::E_Choice > TSubtypesTable
static CSafeStatic< TSubtypesTable > sx_SubtypesTable
string x_SpaceToDash(string str1)