87 #define CompressSpaces x_CompressSpaces
98 void Lock(
void) { m_mutex.Lock(); }
99 void Unlock(
void) { m_mutex.Unlock(); }
109 template<
typename TLockableObj>
113 CLockingRef(TLockableObj *pLockableObj) :
114 m_pLockableObj(pLockableObj)
116 m_pLockableObj->Lock();
120 m_pLockableObj->Unlock();
123 TLockableObj * operator->(
void) {
return m_pLockableObj.GetPointer(); }
128 typedef CLockingRef<CRegexpWithLock> CCachedRegexp;
133 typedef pair<const char *, CRegexp::TCompile> TRegexpKey;
134 typedef TRegexpWithLockRef TRegexpValue;
136 class CRegexpCacheHandler :
140 TRegexpValue
CreateValue(
const TRegexpKey & regexp_key )
142 return Ref(
new CRegexpWithLock(
143 regexp_key.first, regexp_key.second));
153 CCachedRegexp
Get(
const char * pattern,
156 TRegexpKey regexpKey(pattern,
flags);
157 TRegexpWithLockRef regexpLockRef = m_Cache[regexpKey];
158 return CCachedRegexp(regexpLockRef.GetPointer());
162 typedef CCache<TRegexpKey, TRegexpValue,
163 CRegexpCacheHandler> TUnderlyingCache;
164 TUnderlyingCache m_Cache;
168 CRegexpCache regexpCache;
173 : m_Changes(changes),
324 f->SetData().SetBiosrc().Assign(src);
327 src.
Assign(
f->GetData().GetBiosrc());
383 edit_handle.
Swap( new_sah );
400 edit_handle.
Replace( *new_seq_feat );
454 SSeqEntryInfo seqEntryInfo;
455 if( ! m_SeqEntryInfoStack.empty() ) {
457 seqEntryInfo = m_SeqEntryInfoStack.top();
459 seqEntryInfo.m_IsEmblOrDdbj =
false;
460 seqEntryInfo.m_StripSerial =
true;
474 m_SeqEntryInfoStack.push( m_SeqEntryInfoStack.top() );
490 const CSeq_id& sid = **sid_itr;
497 const string& acc =
GET_FIELD (tsid, Accession);
498 if (acc.length() == 6) {
499 seqEntryInfo.m_StripSerial =
false;
506 seqEntryInfo.m_StripSerial =
false;
507 seqEntryInfo.m_IsEmblOrDdbj =
true;
524 seqEntryInfo.m_StripSerial =
false;
532 m_SeqEntryInfoStack.push(seqEntryInfo);
542 m_SeqEntryInfoStack.pop();
589 const size_t old_str_size =
str.length();
591 if( old_str_size !=
str.length() ) {
598 const size_t old_str_size =
str.length();
600 if( old_str_size !=
str.length() ) {
608 int& num_nucs,
int& num_prots)
614 const auto& inst = bioseq.
GetInst();
617 }
else if (inst.IsAa()) {
633 int& num_nucs,
int& num_prots,
bool& hasInvalidSubset)
636 for (
const auto& pSubEntry : bioseqSet.
GetSeq_set()) {
638 if (pSubEntry->IsSeq()) {
641 const auto& bioseqSet = pSubEntry->GetSet();
643 hasInvalidSubset =
true;
661 bool hasInvalidSubset{
false};
664 return (!hasInvalidSubset && numNucs == 1 && numProts > 0);
688 if (partial5 && partial3) {
690 }
else if (partial5) {
692 }
else if (partial3) {
694 }
else if (partial) {
720 bool has_gibbsq =
false;
722 if ((*id)->IsGibbsq()) {
735 bool make_partial5 =
false;
736 bool make_partial3 =
false;
738 if (dit->IsTitle()) {
739 if (
NStr::Find(dit->GetTitle(),
"{C-terminal}") != string::npos) {
740 make_partial5 =
true;
742 if (
NStr::Find(dit->GetTitle(),
"{N-terminal}") != string::npos) {
743 make_partial3 =
true;
749 if (!make_partial5 && !make_partial3) {
758 if (ait->IsSetData() && ait->GetData().IsFtable()) {
759 for (
auto fi : ait->SetData().SetFtable()) {
760 if (
fi->IsSetData() &&
762 fi->IsSetPartial() &&
fi->GetPartial() &&
763 fi->IsSetLocation() &&
779 for (
auto ds : bs.
SetDescr().Set()) {
780 if (ds->IsMolinfo() &&
781 (!ds->GetMolinfo().IsSetCompleteness() ||
782 ds->GetMolinfo().GetCompleteness() != wanted)) {
783 ds->SetMolinfo().SetCompleteness(wanted);
804 if( ! pObjectId || !
FIELD_IS(*pObjectId, Str) ) {
824 const char *search_pattern,
825 const char *replacement,
830 size_t num_replacements = replacer.
Replace( search_pattern, replacement,
835 return ( num_replacements > 0 );
841 template <
class Iter1,
class Iter2,
class Compare>
843 Iter1 first1, Iter1 last1,
844 Iter2 first2, Iter2 last2,
847 for( ; first1 != last1 && first2 != last2 ; ++first1, ++first2 ) {
848 int comparison = compare( *first1, *first2 );
849 if( comparison != 0 ) {
854 if( first1 == last1 ) {
855 if( first2 == last2 ) {
901 s1.begin(), s1.end(),
902 s2.begin(), s2.end(),
912 const static string apicoplast(
"apicoplast");
918 const static string chloroplast(
"chloroplast");
924 const static string chromoplast(
"chromoplast");
930 const static string kinetoplast(
"kinetoplast");
936 const static string leucoplast(
"leucoplast");
942 const static string plastid(
"plastid");
948 const static string proplastid(
"proplastid");
986 for( ; pos <
str.length(); ++pos ) {
1066 CCachedRegexp reassembly_regex
1067 = regexpCache.Get(
"^tpa(?:_|[_:]re)assembly$",
1070 string & sKeyword = *keyword_it;
1071 if( reassembly_regex->IsMatch(sKeyword) ) {
1072 sKeyword =
"TPA:assembly";
1125 template<
typename TMapType >
1130 const string *
str = &str_arg;
1134 unique_ptr<string> temp_str;
1138 for( ; first_bad_char < str_arg.length(); ++first_bad_char ) {
1139 const char ch = str_arg[first_bad_char];
1140 if( !
isalnum(ch) && ch !=
'-' && ch !=
'_' && ch !=
' ' ) {
1141 temp_str.reset(
new string(str_arg, 0, first_bad_char) );
1142 str = temp_str.get();
1147 typename TMapType::const_iterator it = the_map.lower_bound( *
str );
1148 if( it != the_map.begin() && ( it == the_map.end() || !
NStr::EqualNocase(*
str, it->first) ) ) {
1154 return the_map.end();
1158 template<
typename TSetType >
1161 typename TSetType::const_iterator it = the_set.lower_bound(
str );
1168 return the_set.end();
1178 if (pos != 0 && pos !=
NPOS
1179 && (pos2 =
str.find_first_not_of(
": =", pos)) !=
NPOS) {
1181 string val =
str.substr(0, pos);
1184 out_subtype = subtype;
1185 out_val_start_pos = pos2;
1199 if (pos != 0 && pos !=
NPOS
1200 && (pos2 =
str.find_first_not_of(
": =", pos)) !=
NPOS) {
1202 string val =
str.substr(0, pos);
1214 out_subtype = subtype;
1215 out_val_start_pos = pos2;
1225 out_subtype = subtype;
1226 out_val_start_pos =
str.length();
1250 if (chs1 < chs2)
return true;
1251 if (chs1 > chs2)
return false;
1277 if (chs1 != chs2)
return false;
1296 auto& mod_set = biosrc.
SetOrg().SetOrgname().SetMod();
1297 auto mod_it = mod_set.begin();
1298 while (mod_it != mod_set.end()) {
1309 if (pFirstOtherOrgMod) {
1312 mod_it = mod_set.erase(mod_it);
1315 pFirstOtherOrgMod.
Reset(&orgmod);
1333 if( pFirstOtherSubSource ) {
1339 pFirstOtherSubSource.
Reset( &subsrc );
1346 if( ! pFirstOtherSubSource ) {
1374 #ifndef NCBI_OS_MSWIN
1388 NStr::ReplaceInPlace (
str,
"the SMART approach (~http://www.evrogen.com",
"the SMART approach (http://www.evrogen.com");
1437 const string& n1 = s1.
GetName();
1438 const string& n2 = s2.
GetName();
1462 CBioSource::TSubtype::iterator s = biosrc.
SetSubtype().begin();
1463 CBioSource::TSubtype::iterator s_next = s;
1465 while (s_next != biosrc.
SetSubtype().end()) {
1479 vector<string> components;
1482 if (components.size() == 1) {
1486 if (components.size() == 2) {
1487 string dat = components[0];
1488 string tim = components[1];
1489 size_t zee = tim.length();
1490 if (zee > 4 && tim[zee-1] ==
'Z' && tim[1] ==
':') {
1491 return dat +
"T" +
"0" + tim;
1500 vector<string> pieces;
1503 if (pieces.size() == 1) {
1507 if (pieces.size() == 2) {
1510 return fstdate +
"/" + scddate;
1518 string north_or_south;
1520 string east_or_west;
1526 if (ch < '0' || ch >
'9') {
1532 lat_lon_stream >> lat;
1533 lat_lon_stream >> north_or_south;
1534 lat_lon_stream >> lon;
1535 lat_lon_stream >> east_or_west;
1536 if( lat_lon_stream.bad() ) {
1540 if( north_or_south !=
"N" && north_or_south !=
"S" ) {
1544 if( east_or_west !=
"E" && east_or_west !=
"W" ) {
1550 size_t len = lat.length();
1551 if (pos + 9 <
len) {
1558 size_t len = lon.length();
1559 if (pos + 9 <
len) {
1564 return lat +
" " + north_or_south +
" " + lon +
" " + east_or_west;
1583 SUBSOURCE_ON_BIOSOURCE_Type::iterator
prev =
1609 static const string kUSPrefix(
"United States:" );
1617 country.replace( 0, kUSPrefix.length(),
"USA:" );
1630 CCachedRegexp altitude_regex = regexpCache.Get(
1631 "^([+-]?[0-9]+(\\.[0-9]+)?) ?(m|meter[s]?|metre[s]?)\\.?$",
1634 if( altitude_regex->IsMatch(altitude) ) {
1635 string new_altitude = altitude_regex->GetSub(altitude, 1);
1636 new_altitude +=
" m";
1637 if( altitude != new_altitude ) {
1638 altitude = new_altitude;
1674 coll_date = new_date;
1682 const string before =
GET_FIELD (sbs, Name);
1684 const string& after =
GET_FIELD (sbs, Name);
1685 if ( before != after ) {
1705 const string &name =
GET_FIELD(sbs, Name);
1708 if ( (chs == prev_chs) &&
1717 }
else if ( (chs == prev_chs) &&
1721 (**prev).Assign( sbs );
1746 auto& orgname = biosrc.
SetOrg().SetOrgname();
1747 if (orgname.IsSetMod()) {
1748 auto& mod_set = orgname.SetMod();
1749 for (
auto& orgmod_it : mod_set) {
1750 COrgMod & orgmod = *orgmod_it;
1813 bool plasmid_subsource_found =
false;
1831 plasmid_subsource_found =
true;
1836 if( plasmid_subsource_found ) {
1892 if (
GET_FIELD (oid, Id) == 0)
return true;
1922 vector< CRef< CDbtag > > new_dbtags;
1928 if( ! new_dbtags.empty() ) {
1929 copy( new_dbtags.begin(), new_dbtags.end(), back_inserter( org.
SetDb() ) );
1982 if (subtype1 < subtype2)
return true;
1983 if (subtype1 > subtype2)
return false;
1986 const string& subname1 =
GET_FIELD (omd1, Subname);
1987 const string& subname2 =
GET_FIELD (omd2, Subname);
1989 if( subname_comparison < 0 ) {
1991 }
else if( subname_comparison > 0 ) {
2000 if (attrib_comparison < 0) {
2019 const string& subname1 =
GET_FIELD (omd1, Subname);
2020 const string& subname2 =
GET_FIELD (omd2, Subname);
2029 if (chs1 == chs2)
return true;
2057 bool do_erase =
false;
2058 string val_name, otherval;
2064 if ((*match_it)->GetSubtype() == subtype
2131 const string::size_type old_len =
subname.length();
2134 if( old_len !=
subname.length() ) {
2147 if (
str.length() == 0) {
2150 bool all_digits =
true;
2171 size_t len = db.length();
2173 if (
len != db.length()) {
2186 db =
"UniProtKB/Swiss-Prot";
2191 db =
"UniProtKB/TrEMBL";
2231 db =
"ATCC(in host)";
2241 const string& db = dbtag.
GetDb();
2276 dbtag.
SetTag().SetStr(
"MGI:");
2279 string newstr =
"MGI:" +
str;
2280 dbtag.
SetTag().SetStr(newstr);
2285 string newstr =
"HGNC:" +
str;
2286 dbtag.
SetTag().SetStr(newstr);
2295 string newstr =
"VGNC:" +
str;
2296 dbtag.
SetTag().SetStr(newstr);
2345 for (
auto p : pubdesc.
SetPub().Set()) {
2354 typedef pair<string, CRef<CPub> >
TCit;
2361 if( label_compare_no_case != 0 ) {
2362 return (label_compare_no_case < 0);
2367 if( label_compare_case != 0 ) {
2368 return (label_compare_case < 0);
2391 return e1.second == e2;
2397 if( ! pub_set.
IsPub() ) {
2405 for (
auto cit_it : pub_set.
GetPub()) {
2412 auto& publist = pub_set.
SetPub();
2414 if ( cit_set.size() != publist.size() ||
2415 ! equal(cit_set.begin(), cit_set.end(), publist.begin(),
cmpSortedvsOld) )
2419 ITERATE (TCitSet, citset_it, cit_set) {
2420 publist.push_back(citset_it->second);
2450 satellite_qual->
SetQual(
"satellite");
2460 feat.
SetQual().push_back( satellite_qual );
2461 }
else if (
key ==
"LTR" ) {
2466 rpt_type_qual->
SetQual(
"rpt_type" );
2467 rpt_type_qual->
SetVal(
"long_terminal_repeat" );
2469 feat.
SetQual().push_back( rpt_type_qual );
2478 regulatory_class_qual->
SetQual(
"regulatory_class");
2480 regulatory_class_qual->
SetVal(
"other" );
2482 regulatory_class_qual->
SetVal( regulatory_class );
2484 feat.
SetQual().push_back( regulatory_class_qual );
2492 if( !
val.empty() ) {
2494 satellite_qual->
SetQual(
"satellite");
2497 feat.
SetQual().push_back( satellite_qual );
2502 if(
key ==
"CDS" ) {
2514 feat.
SetData().SetCdregion(*new_cdregion);
2530 if (
key ==
"precursor_RNA" ) {
2532 }
else if (
key ==
"mRNA" ) {
2534 }
else if (
key ==
"tRNA" ) {
2536 }
else if (
key ==
"rRNA" ) {
2538 }
else if (
key ==
"snRNA" ) {
2540 }
else if (
key ==
"scRNA" ) {
2542 }
else if (
key ==
"snoRNA" ) {
2544 }
else if (
key ==
"misc_RNA" ) {
2549 new_rna_ref->
SetType( rna_ref_type );
2550 feat.
SetData().SetRna( *new_rna_ref );
2556 if (
key ==
"proprotein" ||
key ==
"preprotein" ) {
2558 }
else if (
key ==
"mat_peptide" ) {
2560 }
else if (
key ==
"sig_peptide" ) {
2562 }
else if (
key ==
"transit_peptide" ) {
2564 }
else if (
key ==
"propeptide" ) {
2569 if( location_seq_id ) {
2571 if ( bioseq_handle && bioseq_handle.
IsAa() ) {
2578 feat.
SetData().SetProt( *new_prot_ref );
2638 const string& comment =
GET_FIELD(feat, Comment);
2640 if ( it != sc_SiteMap.end() ) {
2641 feat.
SetData().SetSite(it->second);
2655 switch (loc.
Which()) {
2665 if (ints.size() == 1) {
2705 TMixList& sl_list = loc.
SetMix().Set();
2706 TMixList::iterator sl_it = sl_list.begin();
2707 while (sl_it != sl_list.end()) {
2708 if ((*sl_it)->IsNull()) {
2709 sl_it = sl_list.erase(sl_it);
2717 if( sl_list.size() > 0 ) {
2718 sl_it = sl_list.end();
2719 while (sl_it != sl_list.begin()) {
2721 if ( ! (*sl_it)->IsNull()) {
2726 if (sl_it != sl_list.end()) {
2727 sl_list.erase(sl_it, sl_list.end());
2732 if (sl_list.size() == 0) {
2735 }
else if (sl_list.size() == 1) {
2779 auto& interval = loc.
SetInt();
2780 interval.SetId(*
id);
2781 interval.SetFrom(0);
2782 interval.SetTo(bs_len - 1);
2791 bool any_nulls_seen )
2795 if( old_piece->
IsNull() ) {
2797 }
else if( old_piece->
IsMix() ) {
2801 if( any_nulls_seen && ! new_mix_pieces.empty() ) {
2804 new_mix_pieces.push_back( null_piece );
2806 new_mix_pieces.push_back( old_piece );
2813 if( ! loc_mix.
IsSet() || loc_mix.
Set().empty() ) {
2820 bool have_seen_inner_mix =
false;
2821 bool any_nulls_seen =
false;
2822 bool alternates_not_null_then_null =
true;
2825 if( (mix_pieces.size() % 2) == 0 ) {
2828 alternates_not_null_then_null =
false;
2831 bool last_piece_was_null =
true;
2833 const CSeq_loc &this_piece = **outer_mix_iter;
2834 const bool this_piece_is_null = this_piece.
IsNull();
2837 if( this_piece_is_null ) {
2838 any_nulls_seen =
true;
2842 if( alternates_not_null_then_null ) {
2843 if( this_piece_is_null == last_piece_was_null ) {
2845 alternates_not_null_then_null =
false;
2850 if( this_piece.
IsMix() ) {
2851 have_seen_inner_mix =
true;
2852 alternates_not_null_then_null =
false;
2854 if( ! any_nulls_seen ) {
2856 for( ; inner_ci; ++inner_ci ) {
2858 any_nulls_seen =
true;
2865 last_piece_was_null = this_piece_is_null;
2870 if( have_seen_inner_mix ||
2871 (any_nulls_seen && ! alternates_not_null_then_null) )
2880 loc_mix.
Set().swap( new_mix_pieces );
2888 const char& ch = *str_itr;
2889 if (ch >
' ' && ch !=
'"' && ch !=
'\'')
return false;
2906 const string::size_type old_length = gbq.
GetVal().length();
2910 if (gbq.
GetVal().length() != old_length) {
2927 gbq.
SetQual(
"rpt_unit_range");
2969 SET_FIELD( gbq, Qual,
"mobile_element_type" );
2983 static const char *allowed_types[] = {
2984 "-10_signal",
"-35_signal",
"3'UTR",
"3'clip",
"5'UTR",
2985 "5'clip",
"CAAT_signal",
"CDS",
"C_region",
"D-loop",
2986 "D_segment",
"GC_signal",
"Import",
"J_segment",
"LTR",
2987 "N_region",
"RBS",
"STS",
"S_region",
"Site-ref",
2988 "TATA_signal",
"V_region",
"V_segment",
"allele",
"attenuator",
2989 "centromere",
"conflict",
"enhancer",
"exon",
"gap",
2990 "iDNA",
"intron",
"mat_peptide",
"misc_RNA",
"misc_binding",
2991 "misc_difference",
"misc_feature",
"misc_recomb",
"misc_signal",
"misc_structure",
2992 "mobile_element",
"modified_base",
"mutation",
"old_sequence",
"operon",
2993 "oriT",
"polyA_signal",
"polyA_site",
"precursor_RNA",
"prim_transcript",
2994 "primer_bind",
"promoter",
"protein_bind",
"regulatory",
"rep_origin",
2995 "repeat_region",
"repeat_unit",
"satellite",
"sig_peptide",
"source",
2996 "stem_loop",
"telomere",
"terminator",
"transit_peptide",
"unsure",
2997 "variation",
"virion"
2999 static const int kAllowedTypesNumElems = (
sizeof(allowed_types) /
sizeof(allowed_types[0]));
3001 static const char *kFeatBad =
"???";
3006 if( binary_search( allowed_types, allowed_types + kAllowedTypesNumElems,
3019 static const char *kFeatBad =
"???";
3022 switch (fdata.
Which()) {
3035 return "proprotein";
3037 return "mat_peptide";
3039 return "sig_peptide";
3041 return "transit_peptide";
3043 return "propeptide";
3052 switch (
rna.GetType() )
3057 return "precursor_RNA";
3078 const string &name =
rna.GetExt().GetName();
3122 return "VariationRef";
3148 string exc = gb_qual.
GetQual();
3250 string::size_type colon_pos =
val.find_first_of(
":");
3252 string comment =
val.substr( colon_pos + 1 );
3253 val.resize( colon_pos );
3291 string new_val =
val;
3293 if( new_val !=
val ) {
3308 xref->SetData().SetGene().SetLocus(
val);
3320 }
else if( qual ==
"satellite" ) {
3346 qual =
"mobile_element_type";
3347 data.
SetImp().SetKey(
"mobile_element" );
3363 data.
SetImp().SetKey(
"misc_difference");
3372 if( qual.empty() &&
val.empty() ) {
3382 if (string::npos == pos) {
3388 if (start < 1 || stop < 1) {
3401 if (string::npos == pos) {
3407 if (start < 1 || stop < 1) {
3419 if (
val.length() > 25) {
3436 gbq.
SetQual(
"rpt_unit_range");
3454 static const string integronValues[] = {
3456 "class II integron",
3457 "class III integron",
3462 static const string* endIntegronValues
3463 = integronValues +
sizeof(integronValues)/
sizeof(*integronValues);
3466 SET_FIELD( gbq, Qual,
"mobile_element");
3469 const string* pValue = std::find(integronValues, endIntegronValues,
GET_FIELD(gbq, Val) );
3470 if ( pValue != endIntegronValues ) {
3471 string::size_type cutoff = pValue->find(
" integron" );
3472 _ASSERT( cutoff != string::npos );
3473 SET_FIELD( gbq, Val,
string(
"integron: ") + pValue->substr(0, cutoff) );
3492 gbq.
SetQual(
"mobile_element");
3499 const string&
value )
3513 bool last_char_was_close_paren =
false;
3514 string::const_iterator s =
value.begin();
3516 while (s !=
value.end()) {
3519 }
else if (last_char_was_close_paren) {
3521 }
else if (*s ==
')') {
3522 last_char_was_close_paren =
true;
3532 CSeq_feat::TQual::iterator& it,
3548 string qual_type = qual.
GetQual();
3565 vector< string > newValues;
3566 string valueList =
val.substr(1,
val.length() - 2);
3569 qual.
SetVal( newValues[0] );
3571 for (
size_t i=1;
i < newValues.size(); ++
i ) {
3573 newQual->
SetQual( qual_type );
3574 newQual->
SetVal( newValues[
i] );
3575 new_quals.push_back( newQual );
3590 if( (
val.length() > 1) && (
val[0] ==
'{') &&
3591 (
val[
val.length()-1] ==
'}') )
3594 val[
val.length()-1] =
')';
3617 if ( ! new_quals.empty() ) {
3618 quals.insert(quals.end(), new_quals.begin(), new_quals.end());
3629 const string& qual =
GET_FIELD(gb_qual, Qual);
3636 bool change_made =
false;
3668 const string& qual = gb_qual.
GetQual();
3697 int transl_table = 1;
3713 cds.
SetCode().Set().push_back(gc);
3746 for (
auto ait : pseq->
GetAnnot()) {
3747 if (ait->IsFtable()) {
3748 for (
auto fit : ait->GetData().GetFtable()) {
3759 bool push_back_xref_on_success =
false;
3767 if( (*xref_iter)->IsSetData() && (*xref_iter)->GetData().IsProt() ) {
3774 xref->SetData().SetProt( *prot_ref );
3776 push_back_xref_on_success =
true;
3778 prot_ref.
Reset( &xref->SetData().SetProt() );
3798 if( push_back_xref_on_success ) {
3799 feat.
SetXref().push_back( xref );
3819 {
"Arginine",
'R' },
3822 {
"Asp or Asn",
'B' },
3823 {
"Asparagine",
'N' },
3824 {
"Aspartate",
'D' },
3825 {
"Aspartic",
'D' },
3826 {
"Aspartic Acid",
'D' },
3829 {
"Cysteine",
'C' },
3833 {
"Glu or Gln",
'Z' },
3834 {
"Glutamate",
'E' },
3835 {
"Glutamic",
'E' },
3836 {
"Glutamic Acid",
'E' },
3837 {
"Glutamine",
'Q' },
3842 {
"Histidine",
'H' },
3846 {
"Isoleucine",
'I' },
3848 {
"Leu or Ile",
'J' },
3853 {
"Methionine",
'M' },
3856 {
"Phenylalanine",
'F' },
3860 {
"Pyrrolysine",
'O' },
3862 {
"Selenocysteine",
'U' },
3867 {
"Termination",
'*' },
3869 {
"Threonine",
'T' },
3871 {
"Tryptophan",
'W' },
3873 {
"Tyrosine",
'Y' },
3890 for( ; ii < num_keys; ++ii ) {
3908 if (pos_end != string::npos) {
3910 string pos_str =
str.substr (5, pos_end - 5);
3912 if (aa_start != string::npos) {
3913 string abbrev = pos_str.substr (aa_start + 3);
3920 aa->SetIupacaa (t_iter->second);
3922 pos_str = pos_str.substr (0, aa_start);
3925 pos_str = pos_str.substr (0, pos_str.length() - 1);
3949 trna->SetAnticodon(*anticodon);
3959 if (
str.empty() )
return '\0';
3963 if(
tmp.length() == 1 ) {
3965 const char aminoAcidLetter =
toupper(
tmp[0]);
3967 return aminoAcidLetter;
3973 return trna_iter->second;
3983 copy( list_of_characters.begin(), list_of_characters.end(),
3998 out_string_list.clear();
3999 if ( tRNA_string.empty() )
return;
4004 CCachedRegexp valid_sgd_regex = regexpCache.Get(
4005 "^[Tt][A-Za-z]\\(...\\)[A-Za-z]\\d?\\d?$");
4006 if ( valid_sgd_regex->IsMatch(tRNA_string) ) {
4009 string &new_SGD_tRNA_anticodon = out_string_list.back();
4010 string raw_codon_part = tRNA_string.substr(3,3);
4017 out_string_list.push_back(tRNA_string.substr(1,1));
4021 string tRNA_string_copy = tRNA_string;
4023 replace_if( tRNA_string_copy.begin(), tRNA_string_copy.end(),
4026 vector<string> tRNA_tokens;
4032 string &tRNA_token = *tRNA_token_iter;
4035 tRNA_token = tRNA_token.substr(4);
4037 CCachedRegexp threeLettersPlusDigits = regexpCache.Get(
4038 "^[A-Za-z][A-Za-z][A-Za-z]\\d*$");
4039 if (! tRNA_token.empty() ) {
4040 if ( threeLettersPlusDigits->IsMatch(tRNA_token) ) {
4041 tRNA_token = tRNA_token.substr(0, 3);
4043 out_string_list.push_back(tRNA_token);
4053 if (out_justTrnaText) {
4054 *out_justTrnaText =
false;
4058 if ( comment.empty() )
return '\0';
4066 list<string>::const_iterator head_iter =
head.begin();
4067 bool is_ambig =
false;
4068 for( ; head_iter !=
head.end(); ++head_iter ) {
4069 const string &
str = *head_iter;
4070 if(
str.empty() )
continue;
4072 if (noSingleLetter &&
str.length() == 1) {
4077 if(curraa !=
'\0') {
4080 }
else if( curraa != aa) {
4096 if( comment.find_first_of(
"0123456789") != string::npos ) {
4100 if (out_justTrnaText) {
4101 *out_justTrnaText = justt;
4130 string name =
rna.GetExt().GetName();
4131 bool justTrnaText =
false;
4139 trp.
SetAa().SetNcbieaa(aa);
4143 }
else if (is_iMet) {
4146 }
else if (aa ==
'I') {
4157 bool justTrnaText =
false;
4163 trna.
SetAa().SetNcbieaa(aa);
4181 }
else if (aa ==
'I') {
4219 bool justTrnaText =
false;
4222 trp.
SetAa().SetNcbieaa(aa);
4263 const string &gb_qual_qual = gb_qual.
GetQual();
4264 string &gb_qual_val = gb_qual.
SetVal();
4280 if (
rna.IsSetExt() &&
rna.GetExt().IsName() ) {
4281 const string &name =
rna.SetExt().SetName();
4282 if ( name.empty() ) {
4291 if (!
rna.IsSetExt()) {
4293 rna.SetRnaProductName(gb_qual_val, remainder);
4302 if(
rna.GetExt().IsGen() ) {
4311 if (
rna.GetExt().IsName() &&
NStr::Equal(
rna.GetExt().GetName(), gb_qual_val)) {
4315 const string &name = (
rna.IsSetExt() ?
rna.GetExt().GetName() :
kEmptyStr );
4316 if (! name.empty() ) {
4318 if (rDNA_pos !=
NPOS) {
4319 gb_qual_val[rDNA_pos+1] =
'R';
4343 rna.SetExt().SetName( gb_qual_val );
4362 if (trna->IsSetAa() || trna->IsSetAnticodon()) {
4364 bool apply_aa =
false;
4365 bool apply_anticodon =
false;
4366 bool ok_to_apply =
true;
4369 if (!
rna.IsSetExt() || !
rna.GetExt().IsTRNA()) {
4370 if (trna->IsSetAa()) {
4373 if (trna->IsSetAnticodon()) {
4374 apply_anticodon =
true;
4378 if (trna->IsSetAa()) {
4379 if (
rna.GetExt().GetTRNA().IsSetAa()) {
4380 if (
rna.GetExt().GetTRNA().GetAa().IsIupacaa()) {
4381 if (trna->GetAa().GetIupacaa() !=
rna.GetExt().GetTRNA().GetAa().GetIupacaa()) {
4382 ok_to_apply =
false;
4391 if (trna->IsSetAnticodon()) {
4392 if (
rna.GetExt().GetTRNA().IsSetAnticodon()) {
4395 ok_to_apply =
false;
4398 apply_anticodon =
true;
4404 rna.SetExt().SetTRNA().SetAa().SetIupacaa(trna->GetAa().GetNcbieaa());
4407 if (apply_anticodon) {
4409 anticodon->
Add(trna->GetAnticodon());
4410 rna.SetExt().SetTRNA().SetAnticodon(*anticodon);
4424 const string& qual = gb_qual.
GetQual();
4443 static const char *
const ignored_quals[] =
4444 {
"label",
"allele",
"experiment",
"inference",
"UniProtKB_evidence",
4445 "dbxref",
"replace",
"rpt_unit_seq",
"rpt_unit_range" };
4451 static CMutex ignored_quals_raw_initialization_mutex;
4453 CMutexGuard guard(ignored_quals_raw_initialization_mutex);
4454 if (ignored_quals_raw.
empty()) {
4455 copy(ignored_quals, ignored_quals +
sizeof(ignored_quals) /
sizeof(ignored_quals[0]),
4456 inserter(ignored_quals_raw, ignored_quals_raw.
begin()));
4460 if (ignored_quals_raw.
find(qual) != ignored_quals_raw.
end()) {
4484 auto& org = biosrc.
SetOrg();
4487 if ( org.IsSetOrgname()) {
4488 const auto& orgname = org.GetOrgname();
4489 bool needs_env_sample =
false;
4490 bool needs_metagenomic =
false;
4491 if (orgname.IsSetLineage()) {
4492 string lineage = orgname.GetLineage();
4494 needs_env_sample =
true;
4497 needs_metagenomic =
true;
4500 if (orgname.IsSetDiv()
4502 needs_env_sample =
true;
4505 if (needs_env_sample || needs_metagenomic) {
4506 bool has_env_sample =
false;
4507 bool has_metagenomic =
false;
4510 if ((*it)->IsSetSubtype()) {
4512 has_env_sample =
true;
4515 has_metagenomic =
true;
4520 if (needs_env_sample && !has_env_sample) {
4525 if (needs_metagenomic && !has_metagenomic) {
4540 return (
mod->IsSetSubtype() &&
4542 mod->IsSetSubname() &&
4554 size_t before = modset.size();
4555 modset.erase(
std::remove_if(modset.begin(), modset.end(), matcher), modset.end());
4556 if (before != modset.size()) {
4559 if (modset.empty()) {
4574 if ((*it)->IsSetSubtype() &&
4578 (*it)->IsSetSubname() &&
4590 return (
mod->IsSetSubtype() &&
4592 mod->IsSetSubname() &&
4606 size_t before = modset.size();
4607 modset.erase(
std::remove_if(modset.begin(), modset.end(), matcher), modset.end());
4608 if (before != modset.size()) {
4611 if (modset.empty()) {
4619 void CNewCleanup_imp::x_FlattenPubEquiv(
CPub_equiv& pub_equiv)
4624 if(
FIELD_IS(**pub_iter, Equiv) ) {
4626 x_FlattenPubEquiv(equiv);
4627 copy(equiv.
Set().begin(), equiv.
Set().end(), back_inserter(data));
4689 if (start !=
NPOS) {
4692 string replace_val =
str.substr(start + 1, (end - start) - 1);
4731 switch (loc.
Which()) {
4790 if(
tag.GetStr().find(
":") == string::npos ) {
4796 string db = dbt.
GetDb();
4807 vector<string> tags;
4812 tag.SetStr( tags.front() );
4813 vector<string>::const_iterator str_iter = tags.begin() + 1;
4814 for( ; str_iter != tags.end(); ++str_iter ) {
4817 new_tag->
SetTag().SetStr( *str_iter );
4818 out_new_dbtags.push_back( new_tag );
4827 return (codon1 < codon2);
4833 return (codon1 == codon2);
4839 char temp_aa =
'\0';
4841 size_t num_converted = 0;
4843 switch( trna_aa.
Which() ) {
4864 *out_aa_char = temp_aa;
4866 if( num_converted > 0 ) {
4877 tRNA.
SetAa().SetNcbieaa( old_value );
4899 if( !component )
return;
4900 if ( component->empty() )
return;
4902 string component_copy = *component;
4904 const string::size_type
len = component_copy.length();
4905 if (
len > 1 && component_copy[0] ==
'(' && component_copy[
len - 1] ==
')' && component_copy.find(
'(', 1) == string::npos ) {
4906 component_copy = component_copy.substr( 1, component_copy.length() - 2 );
4918 const string* fwd_seq,
4919 const string* rev_seq,
4920 const string* fwd_name,
4921 const string* rev_name) :
4961 out_pcr_set.clear();
4963 const string* fwd_primer_seq =
nullptr;
4964 const string* rev_primer_seq =
nullptr;
4965 const string* fwd_primer_name =
nullptr;
4966 const string* rev_primer_name =
nullptr;
4969 #define PARSEPCRSET_CASE(Subtype) \
4970 case NCBI_SUBSOURCE(Subtype): \
4971 if( (*subsrc_iter)->IsSetName() ) { \
4972 Subtype = &((*subsrc_iter)->GetName()); \
4988 #undef PARSEPCRSET_CASE
4991 vector<string> fwd_seq_list;
4993 vector<string> rev_seq_list;
4995 vector<string> fwd_name_list;
4997 vector<string> rev_name_list;
5000 vector<string>::iterator curr_fwd_seq = fwd_seq_list.begin();
5001 vector<string>::iterator curr_rev_seq = rev_seq_list.begin();
5002 vector<string>::iterator curr_fwd_name = fwd_name_list.begin();
5003 vector<string>::iterator curr_rev_name = rev_name_list.begin();
5005 while (curr_fwd_seq != fwd_seq_list.end() ||
5006 curr_rev_seq != rev_seq_list.end() ||
5007 curr_fwd_name != fwd_name_list.end() ||
5008 curr_rev_name != rev_name_list.end() )
5010 const string* fwd_seq = ( curr_fwd_seq != fwd_seq_list.end() ? &*curr_fwd_seq++ :
nullptr );
5011 const string* rev_seq = ( curr_rev_seq != rev_seq_list.end() ? &*curr_rev_seq++ :
nullptr );
5012 const string* fwd_name = ( curr_fwd_name != fwd_name_list.end() ? &*curr_fwd_name++ :
nullptr );
5013 const string* rev_name = ( curr_rev_name != rev_name_list.end() ? &*curr_rev_name++ :
nullptr );
5015 out_pcr_set.push_back(
CPCRParsedSet(fwd_seq, rev_seq, fwd_name, rev_name) );
5026 if( str_iter->empty() ) {
5038 list< CRef< CPCRPrimer > > &primer_list =
return_value->Set();
5040 vector<string> seq_list;
5042 vector<string> name_list;
5045 vector<string>::const_iterator name_iter = name_list.begin();
5052 const string* curr_name =
nullptr;
5053 if ( name_iter != name_list.end() ) {
5054 curr_name = &*name_iter;
5059 curr_primer->
SetSeq().Set( *seq_iter );
5061 curr_primer->
SetName().Set( *curr_name );
5063 primer_list.push_back( curr_primer );
5064 last_primer = curr_primer;
5069 for ( ; name_iter != name_list.end() ; ++name_iter ) {
5070 last_primer->
SetName().Set() +=
":" + *name_iter;
5075 for ( ; name_iter != name_list.end() ; ++name_iter ) {
5077 curr_primer->
SetName().Set( *name_iter );
5078 primer_list.push_back( curr_primer );
5083 if( primer_list.empty() ) {