97 #define NCBI_USE_ERRCODE_X ObjMgr_SeqUtil
108 if ((**it).IsSource())
109 return &(**it).GetSource();
147 cleaned_location->
Assign(*cds_loc);
156 cleaned_location->
Assign(*rev_loc);
181 return scope.GetTaxId(idh);
187 const auto& bioSource = pSourceFeat->GetData().GetBiosrc();
188 if (!bioSource.CanGetOrg()) {
191 return bioSource.GetOrg().GetTaxId();
201 const auto& bioSource = pSourceFeat->GetData().GetBiosrc();
202 if (bioSource.CanGetOrg()) {
203 pOrgRef = &bioSource.GetOrg();
214 if (!pSource || !pSource->CanGetOrg()) {
217 return &pSource->GetOrg();
225 return &pSourceFeat->GetData().GetBiosrc();
247 vector<CSeqdesc::E_Choice>
types;
256 if ( desc.
IsOrg() ) {
288 if ((**it).IsMolinfo())
289 return &(**it).GetMolinfo();
298 for ( ; desc_iter; ++desc_iter) {
348 }
catch (exception&) {
366 "the sequence is not a protein");
370 vector<CMappedFeat> best_feats;
373 if (
range.GetToOpen() > seq_length ) {
374 range.SetToOpen(seq_length);
377 if ( length > best_length ) {
378 best_length = length;
381 if ( length == best_length ) {
382 best_feats.push_back(*it);
385 if ( best_feats.empty() ) {
388 "the sequence does't have prot feature");
390 if ( best_feats.size() > 1 ) {
393 "the sequence have ambiguous prot feature");
396 best_feats[0].GetData().GetProt().GetLabel(&ret);
400 "the prot feature doesn't return name");
408 switch (GetErrCode()) {
420 (
const_cast<CSeq_id*
>(
id.GetPointer()));
429 (
const_cast<CSeq_id*
>(
id.GetPointer()));
438 (
const_cast<CSeq_id*
>(
id.GetPointer()));
447 (
const_cast<CSeq_id*
>(
id.GetPointer()));
456 (
const_cast<CSeq_id*
>(
id.GetPointer()));
479 "sequence::GetId(): gi seq-id not found in the list");
494 "sequence::GetId(): text seq-id not found in the list");
562 if (!idh)
return ret;
582 idh.
IsGi())
return idh;
587 if (db ==
"ti" || db ==
"SRA")
return idh;
605 catch (exception& e) {
606 ERR_POST(
"sequence::GetId(): exception: "<<e.what());
615 "sequence::GetId(): seq-id not found in the scope");
631 "Unable to get Seq-id from handle");
654 catch (exception& e) {
662 "sequence::GetGiForAccession(): invalid seq-id type");
680 "sequence::GetGiForId(): seq-id not found in the scope");
702 "sequence::GetAccessionForGi(): seq-id not found in the scope");
723 "sequence::GetAccessionForId(): seq-id not found in the scope");
746 if ( rec.
GetIds().empty() ) {
792 CScope* scope,
int* frame)
805 if (base_frame > 0) {
809 *frame = (3 + rl.
m_Ranges.front()->GetFrom() - base_frame) % 3 + 1;
820 <<
"SourceToProduct:"
821 " parent and child have opposite orientations");
825 (*it)->SetFrom(((*it)->GetFrom() - base_frame) / 3);
826 (*it)->SetTo (((*it)->GetTo() - base_frame) / 3);
837 return rl.
Resolve(scope, rl_flags);
851 if (base_frame > 0) {
854 TSeqPos nuc_length, prot_length;
871 from = (*it)->GetFrom() * 3 + base_frame;
876 to = (*it)->GetTo() * 3 + base_frame + 2;
878 (*it)->SetFrom(from);
890 template <
class T,
class U>
893 bool operator()(
const pair<T,U>& p1,
const pair<T,U>& p2)
const
895 return p1.first < p2.first;
899 template <
class T,
class U>
902 bool operator()(
const pair<T,U>& p1,
const pair<T,U>& p2)
const
904 return p1.second < p2.second;
917 if( gene1.first != gene2.first ) {
918 return gene1.first < gene2.first;
921 const CSeq_loc &loc1 = gene1.second->GetLocation();
922 const CSeq_loc &loc2 = gene2.second->GetLocation();
927 if( gene1.second->IsSetData() && gene1.second->GetData().IsGene() &&
928 gene2.second->IsSetData() && gene2.second->GetData().IsGene() )
933 gene1.second->GetData().GetGene().
GetLabel( &gene1_label );
934 gene2.second->GetData().GetGene().GetLabel( &gene2_label );
935 return gene1_label < gene2_label;
954 bool revert_locations =
false;
956 switch (overlap_type) {
968 revert_locations =
true;
1004 if ( bioseq_handle ) {
1008 circular_id = bioseq_handle.
GetSeqId();
1013 const CSeq_id* loc_id =
nullptr;
1017 catch (exception&) {
1021 circular_id.
Reset(loc_id);
1030 _TRACE(
"test for circularity failed: " << e.what()) ;
1035 if (circular_id &&
range.GetFrom() >
range.GetTo()) {
1040 sub_loc->
SetId().Assign(*circular_id);
1049 sub_loc->
SetId().Assign(*circular_id);
1071 if( ! circular_id &&
range.GetFrom() >
range.GetTo() ) {
1080 unique_ptr<CFeat_CI> feat_it_ptr;
1083 circular_length,
range, loc, sel, scope, strand);
1085 if ( circular_loc ) {
1086 if ( !bioseq_handle ) {
1089 feat_it_ptr.reset(
new CFeat_CI(scope, *circular_loc, sel) );
1091 else if ( bioseq_handle ) {
1092 feat_it_ptr.reset(
new CFeat_CI(bioseq_handle,
range, strand, sel) );
1096 feat_it_ptr.reset(
new CFeat_CI(scope, loc, sel) );
1103 cleaned_loc->
Assign( loc );
1109 plugin->
processLoc( bioseq_handle, cleaned_loc, circular_length );
1112 for ( ; feat_it; ++feat_it) {
1119 EOverlapType overlap_type_this_iteration = overlap_type;
1120 bool revert_locations_this_iteration = revert_locations;
1123 bool shouldContinueToNextIteration =
false;
1125 shouldContinueToNextIteration,
1126 cleaned_loc_this_iteration,
1128 overlap_type_this_iteration,
1129 revert_locations_this_iteration,
1133 annot_overlap_type);
1134 if( shouldContinueToNextIteration ) {
1142 if ( !revert_locations_this_iteration ) {
1145 *cleaned_loc_this_iteration,
1146 overlap_type_this_iteration,
1152 *cleaned_loc_this_iteration,
1153 overlap_type_this_iteration,
1161 *candidate_feat_loc,
1162 overlap_type_this_iteration,
1168 *candidate_feat_loc,
1169 overlap_type_this_iteration,
1177 candidate_feat_loc, scope, sel, circular_length );
1191 feats.push_back(sc);
1199 catch (exception&) {
1200 _TRACE(
"GetOverlappingFeatures(): error: feature iterator failed");
1203 std::stable_sort(feats.begin(), feats.end(),
1218 overlap_type, scores, scope, opts, plugin );
1219 if (scores.size()) {
1221 return scores.back().second;
1223 return scores.front().second;
1240 overlap_type, scores, scope, opts, plugin );
1242 if (scores.size()) {
1244 return scores.back().second;
1246 return scores.front().second;
1265 bool has_xref =
false;
1284 if ((*it)->IsSetId() && (*it)->GetId().IsLocal()) {
1308 bool search_both_strands =
true)
1316 if (scores.size()) {
1317 overlap = scores.front().second;
1320 if (search_both_strands && !overlap) {
1336 if (scores.size()) {
1337 overlap = scores.front().second;
1348 bool search_both_strands)
1351 scope, search_both_strands);
1358 bool search_both_strands)
1362 search_both_strands);
1370 switch ( eTransSplicing ) {
1392 if ( !it ) ret.
Reset();
1406 if ( it ) ret.
Reset();
1447 if ((*it)->IsSetData() && (*it)->GetData().IsGene() &&
1448 (*it)->GetData().GetGene().IsSetPseudo() &&
1449 (*it)->GetData().GetGene().GetPseudo()) {
1455 if (gene &&
IsPseudo(*gene, scope)) {
1480 return p->GetSeq_feat();
1495 return p->GetSeq_feat();
1538 if ((*xit)->IsSetData() && (*xit)->GetData().IsGene() && (*xit)->GetData().GetGene().IsSuppressed()) {
1541 if ((*xit)->IsSetId() && (*xit)->GetId().IsLocal() &&
1542 (!(*xit)->IsSetData() || (*xit)->GetData().IsGene())) {
1545 if (far_feats.size() > 0) {
1546 return far_feats.front().GetSeq_feat();
1550 if ((*xit)->IsSetData() && (*xit)->GetData().IsGene()) {
1553 }
else if ((*xit)->IsSetData() && (*xit)->GetData().IsGene()) {
1554 const CGene_ref& gene = (*xit)->GetData().GetGene();
1631 feats, scope, opts, plugin );
1633 if (feats.size() < 2) {
1634 if (feats.size() == 1) {
1635 mrna_feat = feats.front().second;
1648 const CSeq_feat& feat = *feat_iter->second;
1657 for ( ; obj_iter; ++obj_iter) {
1658 if (obj_iter->IsSetType() &&
1659 obj_iter->GetType().IsStr() &&
1660 obj_iter->GetType().GetStr() ==
"MrnaProteinLink") {
1661 string prot_id_str = obj_iter->GetField(
"protein seqID")
1662 .GetData().GetStr();
1664 vector<CSeq_id_Handle> ids = scope.
GetIds(prot_id);
1666 ITERATE (vector<CSeq_id_Handle>, id_iter, ids) {
1667 if (product_id.
Match(*id_iter->GetSeqId())) {
1668 mrna_feat.
Reset(&feat);
1676 catch (exception&) {
1693 const CSeq_feat& mrna = *feat_iter->second;
1710 for ( ; other_iter && !mrna_feat; ++other_iter) {
1718 if ( !prot_handle ) {
1722 if (prot_handle.
IsSynonym(product_id)) {
1724 matching_feats.push_back(*feat_iter);
1729 if ( !matching_feats.empty() ) {
1731 feats.swap(matching_feats);
1732 if ( feats.size() == 1 ) {
1733 mrna_feat = feats.front().second;
1738 catch (exception&) {
1743 string transcript_id = cds_feat.
GetNamedQual(
"transcript_id");
1744 if ( !transcript_id.empty() ) {
1745 ITERATE (vector<TFeatScore>, feat_iter, feats) {
1746 const CSeq_feat& feat = *feat_iter->second;
1747 string other_transcript_id =
1749 if (transcript_id == other_transcript_id) {
1750 mrna_feat.
Reset(&feat);
1762 mrna_feat = feats.back().second;
1764 mrna_feat = feats.front().second;
1793 unique_ptr<CFeat_CI> &feat_ci,
1803 feat_ci, circular_length,
range, loc, sel, scope, strand);
1806 if ( bioseq_handle ) {
1807 feat_ci.reset(
new CFeat_CI(bioseq_handle,
range, strand, sel));
1809 feat_ci.reset(
new CFeat_CI(scope, loc, sel));
1824 bool &shouldContinueToNextIteration,
1828 bool &revert_locations_this_iteration,
1846 cleaned_loc_this_iteration, candidate_feat_loc,
1847 overlap_type_this_iteration,
1848 revert_locations_this_iteration,
1849 bioseq_handle, feat, circular_length, annot_overlap_type);
1863 cleaned_loc, candidate_feat_loc,
1864 scope, sel, circular_length);
1882 unique_ptr<CGetOverlappingFeaturesPlugin> cds_plugin(
1892 feats, scope, opts, cds_plugin.get());
1895 if (feats.size() < 2) {
1896 if (feats.size() == 1) {
1897 cds_feat = feats.front().second;
1908 for ( ; obj_iter; ++obj_iter) {
1909 if (obj_iter->IsSetType() &&
1910 obj_iter->GetType().IsStr() &&
1911 obj_iter->GetType().GetStr() ==
"MrnaProteinLink") {
1912 prot_id_str = obj_iter->GetField(
"protein seqID").GetData().GetStr();
1916 if ( !prot_id_str.empty() ) {
1918 vector<CSeq_id_Handle> ids = scope.
GetIds(prot_id);
1924 const CSeq_feat& feat = *feat_iter->second;
1930 ITERATE (vector<CSeq_id_Handle>, id_iter, ids) {
1931 if (
id.Match(*id_iter->GetSeqId())) {
1932 cds_feat.
Reset(&feat);
1938 catch (exception&) {
1965 for ( ; iter; ++iter) {
1975 if ( !protein_id ) {
1979 TFeatScores::const_iterator feat_iter = feats.begin();
1980 TFeatScores::const_iterator feat_end = feats.end();
1981 for ( ; feat_iter != feat_end && !cds_feat; ++feat_iter) {
1985 const CSeq_feat& cds = *feat_iter->second;
1992 if ( !prot_handle ) {
1996 if (prot_handle.
IsSynonym(*protein_id)) {
1998 cds_feat.
Reset(&cds);
2003 catch ( exception& ) {
2011 string transcript_id = mrna_feat.
GetNamedQual(
"transcript_id");
2012 if ( !transcript_id.empty() ) {
2014 const CSeq_feat& feat = *feat_iter->second;
2015 string other_transcript_id =
2017 if (transcript_id == other_transcript_id) {
2018 cds_feat.
Reset(&feat);
2030 cds_feat = feats.back().second;
2032 cds_feat = feats.front().second;
2054 feats, scope, opts, plugin );
2056 if (feats.size() < 2) {
2057 if (feats.size() == 1) {
2058 gene_feat = feats.front().second;
2077 const CSeq_feat& feat = *feat_it->second;
2079 string other_ref_str;
2080 other_ref.
GetLabel(&other_ref_str);
2081 if (ref_str == other_ref_str) {
2094 if ((*dbxref)->GetDb() ==
"GeneID" ||
2095 (*dbxref)->GetDb() ==
"LocusID") {
2096 gene_id = (*dbxref)->GetTag().GetId();
2103 const CSeq_feat& feat = *feat_it->second;
2105 const string& db = (*dbxref)->GetDb();
2106 if ((db ==
"GeneID" || db ==
"LocusID") &&
2107 (*dbxref)->GetTag().GetId() == gene_id) {
2118 gene_feat = feats.back().second;
2120 gene_feat = feats.front().second;
2143 feats, scope, opts, plugin );
2145 if (feats.size() < 2) {
2146 if (feats.size() == 1) {
2147 feat_ref = feats.front().second;
2161 const CSeq_feat& feat = *feat_it->second;
2167 string other_ref_str;
2168 other_ref.
GetLabel(&other_ref_str);
2169 if (ref_str == other_ref_str) {
2189 feat_ref = feats.front().second;
2218 for ( ; feat_it; ++feat_it) {
2226 string other_ref_str;
2227 other_ref->
GetLabel(&other_ref_str);
2228 if (other_ref_str != ref_str) {
2241 mrna_feats.push_back(feat_ref);
2257 if ((*dbxref)->GetDb() ==
"GeneID" ||
2258 (*dbxref)->GetDb() ==
"LocusID") {
2259 gene_id = (*dbxref)->GetTag().GetId();
2268 for ( ; feat_it; ++feat_it) {
2289 if (((*dbxref)->GetDb() ==
"GeneID" ||
2290 (*dbxref)->GetDb() ==
"LocusID") &&
2291 (*dbxref)->GetTag().GetId() == gene_id) {
2292 mrna_feats.push_back(ref);
2311 scope, opts, plugin );
2313 mrna_feats.push_back(feat);
2324 list< CConstRef<CSeq_feat> > mrna_feats;
2326 if (mrna_feats.size()) {
2330 cds_feats.push_back(cds);
2338 scope, opts, plugin );
2340 cds_feats.push_back(feat);
2355 switch (feat_type) {
2359 overlap_type, scope, opts, plugin );
2364 overlap_type, scope, opts, plugin );
2370 overlap_type, scope, opts, plugin );
2378 (feat.
GetLocation(), feat_type, overlap_type, scope, opts, plugin );
2430 (feat.
GetLocation(), subtype, overlap_type, scope, opts, plugin );
2448 if (
id.IsLocal() ) {
2450 if ( obj_id.
IsId() ) {
2451 int local_id = obj_id.
GetId();
2454 if ( feat_handle ) {
2562 return &
f.GetOriginalFeature();
2607 return &(
fi->GetOriginalFeature());
2634 return &(
fi->GetOriginalFeature());
2715 m_Flags(fInstantiateGaps | fAssembleParts | fEnableGI),
2716 m_GapMode(eGM_letters)
2718 m_Gen.reset(
new sequence::CDeflineGenerator);
2734 loc2.
SetWhole().Assign(*it->GetSeqId());
2750 const string& custom_title)
2762 if (! desc.
IsUser())
continue;
2766 if (! oi.
IsStr())
continue;
2787 const CSeq_id& sid = **id_itr;
2788 switch (sid.
Which()) {
2795 const string& db = dbtag.
GetDb();
2818 if ((*id)->IsGi()) {
2829 switch (best_id->
Which())
2851 for (
const auto& pId : bioseq.
GetId()) {
2852 if (pId->IsGeneral()) {
2856 if (pId->IsGenbank()) {
2872 return (pAccession || pGnlId);
2885 bool hide_prefix =
false;
2936 pair<TSeq_id_HandleSet::iterator, bool> p
2940 "Duplicate Seq-id " + (*id)->AsFastaString()
2941 +
" in FASTA output");
2947 if (!(m_Flags & fIgnoreOriginalID) &&
2948 s_ShouldUseOriginalID(bioseq)) {
2949 string origID = s_FastaGetOriginalID(bioseq);
2950 if (! NStr::IsBlank(origID)) {
2951 m_Out << "lcl|" << origID;
2953 x_WriteAsFasta(bioseq);
2956 x_WriteAsFasta(bioseq);
2961 for (CSeq_loc_CI it(*location); it; ++it) {
2962 CSeq_loc::TRange range = it.GetRange();
2963 TSeqPos from = range.GetFrom() + 1, to = range.GetTo() + 1;
2964 _ASSERT(from <= to);
2966 if (it.IsSetStrand() && IsReverse(it.GetStrand())) {
2967 m_Out << 'c
' << to << '-
' << from;
2969 m_Out << from << '-
' << to;
2977 sequence::CDeflineGenerator::TUserFlags
2978 CFastaOstream::x_GetTitleFlags(void) const
2980 sequence::TGetTitleFlags title_flags = 0;
2981 title_flags |= sequence::CDeflineGenerator::fFastaFormat;
2983 if ((m_Flags & fNoExpensiveOps) != 0) {
2984 title_flags |= sequence::CDeflineGenerator::fNoExpensiveOps;
2986 if ((m_Flags & fShowModifiers) != 0) {
2987 title_flags |= sequence::CDeflineGenerator::fShowModifiers;
2989 if ((m_Flags & fDoNotUseAutoDef) != 0) {
2990 title_flags |= sequence::CDeflineGenerator::fDoNotUseAutoDef;
2993 if ((m_Flags & fDoNotUseAutoDef) == 0) {
2994 title_flags |= sequence::CDeflineGenerator::fUseAutoDef;
3000 void CFastaOstream::x_WriteSeqTitle(const CBioseq_Handle & bioseq_handle,
3001 const string& custom_title)
3003 string safe_title = (!custom_title.empty()) ? custom_title
3004 : m_Gen->GenerateDefline(bioseq_handle, x_GetTitleFlags());
3006 if ( !safe_title.empty() ) {
3007 if ( !(m_Flags & fKeepGTSigns) ) {
3008 NStr::ReplaceInPlace(safe_title, ">", "_");
3010 if (safe_title[0] != ' ') {
3014 if ((m_Flags & fHTMLEncode) != 0) {
3015 safe_title = NStr::HtmlEncode(safe_title);
3017 m_Out << safe_title;
3022 void CFastaOstream::WriteTitle(const CBioseq& bioseq,
3023 const CSeq_loc* location,
3024 bool no_scope, // not used
3025 const string& custom_title)
3027 x_WriteSeqIds(bioseq, location);
3028 CScope scope(*CObjectManager::GetInstance());
3029 CBioseq_Handle bioseq_handle = scope.AddBioseq(bioseq);
3030 x_WriteSeqTitle(bioseq_handle, custom_title);
3033 void CFastaOstream::WriteTitle(const CBioseq_Handle& bioseq_handle,
3034 const CSeq_loc* location,
3035 const string& custom_title)
3037 const CBioseq& bioseq = *bioseq_handle.GetBioseqCore();
3038 x_WriteSeqIds(bioseq, location);
3039 x_WriteSeqTitle(bioseq_handle, custom_title);
3043 CConstRef<CSeq_loc> CFastaOstream::x_MapMask(CSeq_loc_Mapper& mapper,
3044 const CSeq_loc& mask,
3045 const CSeq_id* base_seq_id,
3048 CConstRef<CSeq_loc> mapped_mask(&mask);
3050 // Mapping down requires the higher-level ID as a reference, even
3051 // when given a scope, and as such should precede mapping up to
3052 // keep sequence::GetId from bombing out.
3053 if ((m_Flags & fMapMasksDown) != 0 && scope) {
3055 CSeq_loc_Mapper mapper_down
3056 (scope->GetBioseqHandle(sequence::GetId(*mapped_mask, scope)),
3057 CSeq_loc_Mapper::eSeqMap_Down);
3058 mapped_mask = mapped_mask->Add(*mapper_down.Map(*mapped_mask),
3059 CSeq_loc::fSortAndMerge_All, 0);
3060 } catch (CObjmgrUtilException&) {
3063 if ((m_Flags & fMapMasksUp) != 0 && scope && base_seq_id) {
3064 CSeq_loc_Mapper mapper_up(scope->GetBioseqHandle(*base_seq_id),
3065 CSeq_loc_Mapper::eSeqMap_Up);
3066 mapped_mask = mapped_mask->Add(*mapper_up.Map(*mapped_mask),
3067 CSeq_loc::fSortAndMerge_All, 0);
3069 mapped_mask = mapper.Map(*mapped_mask);
3074 void CFastaOstream::x_GetMaskingStates(TMSMap& masking_state,
3075 const CSeq_id* base_seq_id,
3076 const CSeq_loc* location,
3079 CRef<CSeq_loc_Mapper> mapper;
3082 if (m_SoftMask.NotEmpty() || m_HardMask.NotEmpty()) {
3083 _ASSERT(base_seq_id);
3087 TSeqPos length = sequence::GetLength(*location, scope);
3088 loc2.SetInt().SetId().Assign(*base_seq_id);
3089 loc2.SetInt().SetFrom(0);
3090 loc2.SetInt().SetTo(length - 1);
3091 } catch (exception&) {
3092 loc2.SetWhole().Assign(*base_seq_id);
3094 mapper.Reset(new CSeq_loc_Mapper(*location, loc2, scope));
3096 // still useful for filtering out locations on other sequences
3098 whole.SetWhole().Assign(*base_seq_id);
3099 mapper.Reset(new CSeq_loc_Mapper(whole, whole, scope));
3101 mapper->SetMergeAll();
3102 mapper->TruncateNonmappingRanges();
3104 if (scope && (m_Flags & (fMapMasksUp | fMapMasksDown))) {
3105 bsh = scope->GetBioseqHandle(*base_seq_id);
3108 const CSeq_loc& mask = m_SoftMask ? *m_SoftMask : *m_HardMask;
3109 int type = m_SoftMask ? eSoftMask : eHardMask;
3110 CConstRef<CSeq_loc> mapped_mask = x_MapMask(*mapper, mask, base_seq_id,
3113 masking_state[0] = 0;
3114 for (CSeq_loc_CI it(*mapped_mask); it; ++it) {
3115 CSeq_loc_CI::TRange loc_range = it.GetRange();
3116 masking_state[loc_range.GetFrom()] = type;
3117 masking_state[loc_range.GetToOpen()] = 0;
3121 if (m_SoftMask.NotEmpty() && m_HardMask.NotEmpty()) {
3122 CConstRef<CSeq_loc> mapped_mask = x_MapMask(*mapper, *m_HardMask,
3123 base_seq_id, scope);
3124 for (CSeq_loc_CI it(*mapped_mask); it; ++it) {
3125 CSeq_loc_CI::TRange loc_range = it.GetRange();
3126 TSeqPos from = loc_range.GetFrom();
3127 TSeqPos to = loc_range.GetToOpen();
3128 TMSMap::iterator ms_it = masking_state.lower_bound(from);
3131 if (ms_it == masking_state.end()) {
3132 masking_state[loc_range.GetFrom()] = eHardMask;
3133 masking_state[loc_range.GetToOpen()] = 0;
3135 } else if (ms_it->first == from) {
3136 prev_state = ms_it->second;
3137 ms_it->second |= eHardMask;
3139 // NB: lower_bound's name is misleading, as it actually
3141 _ASSERT(ms_it != masking_state.begin());
3144 prev_state = prev_it->second;
3150 ms_it = masking_state.insert(ms_it,
value);
3152 while (++ms_it != masking_state.end() && ms_it->first < to) {
3153 prev_state = ms_it->second;
3154 ms_it->second |= eHardMask;
3156 if (ms_it == masking_state.end() || ms_it->first != to) {
3165 const TMSMap& masking_state)
3173 int current_state = 0;
3183 alt_gap_str = uc_hard_mask_str;
3193 if (rem_state == 0) {
3195 current_state = ms_it->second;
3196 if (++ms_it == masking_state.
end()) {
3199 rem_state = ms_it->first - it.
GetPos();
3225 m_Out <<
">?unk" << gap_size;
3227 m_Out <<
">?unk100";
3230 m_Out <<
">?" << gap_size;
3241 pGapLiteral->GetSeq_data().GetGap();
3247 const string sGapModText =
3249 if( ! sGapModText.empty() ) {
3250 m_Out <<
' ' << sGapModText;
3258 while (rem_gap >= rem_line) {
3261 rem_gap -= rem_line;
3266 rem_line -= rem_gap;
3270 if (rem_state >= gap_size) {
3271 rem_state -= gap_size;
3273 while (++ms_it != masking_state.
end()
3274 && ms_it->first < it.
GetPos()) {
3275 current_state = ms_it->second;
3277 if (ms_it == masking_state.
end()) {
3280 rem_state = ms_it->first - it.
GetPos();
3292 : uc_hard_mask_str.
data();
3295 lc_buffer.assign(ptr, count);
3297 ptr = lc_buffer.data();
3299 while ( count >= rem_line ) {
3327 vector<CTSE_Handle> used_tses;
3345 "CFastaOstream: location out of range: " +
label);
3372 switch (entry.
Which()) {
3390 bool no_scope,
const string& custom_title )
3472 out << sPrefix <<
"[gap-type=" <<
gap_type <<
']';
3488 string & gap_type = out_gap_info.
gap_type;
3489 vector<string> & gap_linkage_evidences =
3494 gap_linkage_evidences.clear();
3499 bool need_evidence =
false;
3516 gap_type =
"unknown";
3517 need_evidence = is_linkage;
3520 gap_type =
"within scaffold";
3521 need_evidence =
true;
3524 gap_type = ( is_linkage ?
"within scaffold" :
"between scaffolds" );
3525 need_evidence = is_linkage;
3528 gap_type =
"short arm";
3531 gap_type =
"heterochromatin";
3534 gap_type =
"centromere";
3537 gap_type =
"telomere";
3540 gap_type = ( is_linkage ?
3541 "repeat within scaffold" :
3542 "repeat between scaffolds" );
3543 need_evidence = is_linkage;
3546 gap_type =
"between scaffolds";
3549 gap_type =
"within scaffold";
3550 need_evidence = is_linkage;
3553 gap_type =
"contamination";
3554 need_evidence = is_linkage;
3560 gap_type =
"(ERROR: UNRECOGNIZED_GAP_TYPE:" +
3574 switch( evidence.
GetType() ) {
3576 gap_linkage_evidences.push_back(
"paired-ends");
3579 gap_linkage_evidences.push_back(
"align genus");
3582 gap_linkage_evidences.push_back(
"align xgenus");
3585 gap_linkage_evidences.push_back(
"align trnscpt");
3588 gap_linkage_evidences.push_back(
"within clone");
3591 gap_linkage_evidences.push_back(
"clone contig");
3594 gap_linkage_evidences.push_back(
"map");
3597 gap_linkage_evidences.push_back(
"strobe");
3600 gap_linkage_evidences.push_back(
"unspecified");
3603 gap_linkage_evidences.push_back(
"pcr");
3606 gap_linkage_evidences.push_back(
"proximity ligation");
3609 gap_linkage_evidences.push_back(
"other");
3612 gap_linkage_evidences.push_back(
"(UNRECOGNIZED LINKAGE EVIDENCE:" +
3620 if( need_evidence && gap_linkage_evidences.empty() ) {
3621 gap_linkage_evidences.push_back(
"unspecified");
3622 }
else if( ! need_evidence && ! gap_linkage_evidences.empty() ) {
3625 gap_linkage_evidences.clear();
3635 template <
class Container>
3640 bool is_5prime_complete,
3641 bool is_3prime_complete,
3643 bool remove_trailing_X,
3647 const size_t usable_size = seq.size() > frame ? seq.size() - frame : 0;
3648 const size_t mod = usable_size % 3;
3650 prot.reserve((usable_size + 2) / 3);
3659 int start_state = 0;
3662 typename Container::const_iterator start = seq.begin();
3664 for (
int i = 0;
i < frame; ++
i) {
3671 size_t length = usable_size / 3;
3672 bool check_start = (is_5prime_complete && frame == 0);
3673 bool first_time =
true;
3675 for (
i = 0;
i < length; ++
i) {
3678 for (k = 0; k < 3; ++k, ++start) {
3683 start_state =
state;
3687 if (first_time && check_start) {
3699 for (k = 0; k <
mod; ++k, ++start) {
3703 for (; k < 3; ++k) {
3708 start_state =
state;
3713 if (first_time && check_start) {
3716 }
else if (c !=
'X') {
3726 if ( aa !=
'*' && include_stop && (!
mod) &&
prot.size() > 0 && is_3prime_complete ) {
3735 if (alt_start && is_5prime_complete) {
3743 if ( !include_stop ) {
3745 if (sz != string::npos) {
3750 if (remove_trailing_X) {
3752 for (sz =
prot.size(); sz > 0 &&
prot[sz - 1] ==
'X'; --sz) {
3774 if (
prot->SetInst().SetExt().SetDelta().Set().empty()
3775 ||
prot->GetInst().GetExt().GetDelta().Get().back()->GetLiteral().GetSeq_data().IsGap()) {
3778 seg->SetLiteral().SetLength(0);
3779 prot->SetInst().SetExt().SetDelta().Set().push_back(seg);
3784 if (residue ==
'*' || residue ==
'-') {
3786 if (
last->IsLiteral() &&
last->GetLiteral().IsSetSeq_data() &&
last->GetLiteral().GetSeq_data().IsIupacaa()) {
3788 string current =
last->GetLiteral().GetSeq_data().GetIupacaa().Get();
3789 last->SetLiteral().SetSeq_data().SetNcbieaa().Set(current);
3792 last->SetLiteral().SetSeq_data().SetNcbieaa().Set().append(1, residue);
3793 }
else if (
last->IsLiteral() &&
last->GetLiteral().IsSetSeq_data() &&
last->GetLiteral().GetSeq_data().IsNcbieaa()) {
3795 last->SetLiteral().SetSeq_data().SetNcbieaa().Set().append(1, residue);
3798 last->SetLiteral().SetSeq_data().SetIupacaa().Set().append(1, residue);
3802 last->SetLiteral().SetLength(
len + 1);
3808 if (
prot->SetInst().SetExt().SetDelta().Set().empty()) {
3812 new_seg->SetLiteral().SetLength(add_len);
3813 if (unknown_length) {
3816 prot->SetInst().SetExt().SetDelta().Set().push_back(new_seg);
3819 if (
last->SetLiteral().GetSeq_data().IsGap()
3820 && ((unknown_length &&
last->SetLiteral().IsSetFuzz())
3821 || (!unknown_length && !
last->SetLiteral().IsSetFuzz()))) {
3823 TSeqPos len =
prot->GetInst().GetExt().GetDelta().Get().back()->GetLiteral().GetLength();
3824 prot->SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetLength(
len + add_len);
3829 new_seg->SetLiteral().SetLength(add_len);
3830 if (unknown_length) {
3833 prot->SetInst().SetExt().SetDelta().Set().push_back(new_seg);
3872 prot->SetInst().SetLength(0);
3886 for (
int i = 0;
i < frame; ++
i) {
3893 TSeqPos length = usable_size / 3;
3894 bool check_start = (is_5prime_complete && frame == 0);
3895 bool first_time =
true;
3897 for (
i = 0;
i < length; ++
i) {
3899 bool unknown_length =
false;
3907 for (k = 0; k < 3; ++k, ++start) {
3910 if (is_gap && !unknown_length) {
3914 unknown_length =
true;
3926 if (first_time && check_start) {
3939 bool unknown_length =
false;
3940 TSeqPos pos = (length * 3) + frame;
3941 for (k = 0; k <
mod; ++k, ++start) {
3944 if (is_gap && !unknown_length) {
3948 unknown_length =
true;
3960 for (; k < 3; ++k) {
3967 if (first_time && check_start) {
3981 prot_len += (*seg_it)->GetLiteral().GetLength();
3996 string::size_type j = seq_pos / 3;
4000 CDelta_ext::Tdata::iterator seg_it =
prot->SetInst().SetExt().SetDelta().Set().begin();
4001 string::size_type
offset = 0;
4002 while (seg_it !=
prot->SetInst().SetExt().SetDelta().Set().end()
4003 &&
offset + (*seg_it)->GetLiteral().GetLength() < j) {
4004 offset += (*seg_it)->GetLiteral().GetLength();
4007 if (seg_it !=
prot->SetInst().SetExt().SetDelta().Set().end()
4008 && !(*seg_it)->GetLiteral().GetSeq_data().IsGap()) {
4009 if ((*seg_it)->GetLiteral().GetSeq_data().IsIupacaa()) {
4010 (*seg_it)->SetLiteral().SetSeq_data().SetIupacaa().Set()[j -
offset] = c_aa.
GetNcbieaa();
4012 (*seg_it)->SetLiteral().SetSeq_data().SetNcbieaa().Set()[j -
offset] = c_aa.
GetNcbieaa();
4016 }
else if (j == prot_len) {
4028 if (!
prot->SetInst().SetExt().SetDelta().Set().empty())
4030 end =
prot->SetInst().SetExt().SetDelta().Set().back();
4033 if (end && end->IsLiteral() && end->GetLiteral().IsSetSeq_data()) {
4034 if (end->GetLiteral().GetSeq_data().IsIupacaa()) {
4035 string& last_seg = end->SetLiteral().SetSeq_data().SetIupacaa().Set();
4037 last_seg = last_seg.substr(0, last_seg.length() - 1);
4038 end->SetLiteral().SetLength(
TSeqPos(last_seg.length()));
4040 }
else if (end->GetLiteral().GetSeq_data().IsNcbieaa()) {
4041 string& last_seg = end->SetLiteral().SetSeq_data().SetNcbieaa().Set();
4043 last_seg = last_seg.substr(0, last_seg.length() - 1);
4044 end->SetLiteral().SetLength(
TSeqPos(last_seg.length()));
4052 prot_len += (*seg_it)->GetLiteral().GetLength();
4053 if ((*seg_it)->GetLiteral().IsSetSeq_data()
4054 && (*seg_it)->GetLiteral().GetSeq_data().IsNcbieaa()) {
4055 string current = (*seg_it)->GetLiteral().GetSeq_data().GetNcbieaa();
4057 (*seg_it)->SetLiteral().SetSeq_data().SetIupacaa().Set(current);
4061 prot->SetInst().SetLength(prot_len);
4063 if (
prot->GetInst().GetLength() == 0) {
4065 }
else if (
prot->SetInst().SetExt().SetDelta().Set().size() == 1
4066 &&
prot->SetInst().SetExt().SetDelta().Set().front()->IsLiteral()
4067 &&
prot->SetInst().SetExt().SetDelta().Set().front()->GetLiteral().IsSetSeq_data()) {
4069 if (
prot->SetInst().SetExt().SetDelta().Set().front()->GetLiteral().GetSeq_data().IsIupacaa()) {
4070 string data =
prot->SetInst().SetExt().SetDelta().Set().front()->GetLiteral().GetSeq_data().GetIupacaa().Get();
4071 prot->SetInst().ResetExt();
4072 prot->SetInst().SetSeq_data().SetIupacaa().Set(data);
4074 }
else if (
prot->SetInst().SetExt().SetDelta().Set().front()->GetLiteral().GetSeq_data().IsNcbieaa()) {
4075 string data =
prot->SetInst().SetExt().SetDelta().Set().front()->GetLiteral().GetSeq_data().GetNcbieaa().Get();
4076 prot->SetInst().ResetExt();
4077 prot->SetInst().SetSeq_data().SetNcbieaa().Set(data);
4088 if (!protein || !protein->
IsAa() || !protein->
IsSetInst()) {
4091 return protein->
SetInst().ConvertDeltaToRaw();
4098 bool remove_trailing_X,
4100 bool is_5prime_complete,
4101 bool is_3prime_complete)
4104 is_5prime_complete, is_3prime_complete, include_stop, remove_trailing_X, alt_start);
4115 !(
flags & fIs5PrimePartial),
4116 !(
flags & fIs3PrimePartial),
4118 flags & fRemoveTrailingX,
4126 bool remove_trailing_X,
4128 bool is_5prime_complete,
4129 bool is_3prime_complete)
4132 is_5prime_complete, is_3prime_complete, include_stop, remove_trailing_X, alt_start);
4142 !(
flags & fIs5PrimePartial),
4143 !(
flags & fIs3PrimePartial),
4145 flags & fRemoveTrailingX,
4155 bool remove_trailing_X,
4162 include_stop, remove_trailing_X, alt_start);
4172 bool remove_trailing_X,
4179 include_stop, remove_trailing_X, alt_start);
4187 bool remove_trailing_X,
4211 bool code_break_include_stop = include_stop;
4214 code_break_include_stop =
true;
4221 code_break_include_stop, remove_trailing_X, alt_start);
4228 string::size_type protlen =
prot.size();
4237 string::size_type
i = seq_pos / 3;
4243 }
else if (
i == protlen) {
4252 if ( !include_stop ) {
4254 if (sz != string::npos) {
4297 for (
auto it = frame_map.
begin(); it != frame_map.
end(); it++) {
4298 tmp_cds->
SetData().SetCdregion().SetFrame(it->first);
4302 it->second.len =
prot.length();
4304 if ((pos ==
prot.length() - 1) && (leftover == it->second.frame_offset)) {
4305 it->second.has_final_stop =
true;
4306 }
else if (pos !=
NPOS) {
4307 it->second.has_internal_stop =
true;
4311 it->second.has_start_m =
true;
4317 if (frame_map[orig_frame].has_final_stop) {
4321 if (is_3complete && !is_5complete) {
4323 for (
auto it = frame_map.
begin(); it != frame_map.
end(); it++) {
4324 if (it->second.has_final_stop) {
4330 if (is_5complete && !is_3complete) {
4346 for (
auto it = frame_map.
begin(); it != frame_map.
end(); it++) {
4347 if (it->second.has_final_stop) {
4354 if (!frame_map[orig_frame].has_internal_stop) {
4359 for (
auto it = frame_map.
begin(); it != frame_map.
end(); it++) {
4360 if (!it->second.has_internal_stop) {
4362 best_frame = it->first;
4378 bool ambiguous =
false;
4380 return FindBestFrame(cds, scope, ambiguous);
4389 bool remove_trailing_X,
4397 include_stop, remove_trailing_X, alt_start);
4406 bool remove_trailing_X,
4417 include_stop, remove_trailing_X, alt_start);
4423 : m_ParentLoc(&parent)
4427 const CSeq_id& cseqid = cit.GetSeq_id();
4428 TRange0 crange = cit.GetRange();
4429 if (crange.IsWholeTo() && scope) {
4437 TRange0 prange = pit.GetRange();
4438 if (prange.IsWholeTo() && scope) {
4443 pos += prange.GetLength();
4449 if (crange.GetFrom() >= prange.GetFrom()) {
4450 abs_from = crange.GetFrom();
4451 fuzz_from = cit.GetFuzzFrom();
4452 if (abs_from == prange.GetFrom()) {
4454 const CInt_fuzz* pfuzz = pit.GetFuzzFrom();
4458 f->Assign(*fuzz_from);
4459 f->Subtract(*pfuzz, abs_from, abs_from);
4460 if (
f->IsP_m() && !
f->GetP_m() ) {
4466 fuzz_from = pfuzz->
Negative(abs_from);
4471 abs_from = prange.GetFrom();
4477 if (crange.GetTo() <= prange.GetTo()) {
4478 abs_to = crange.GetTo();
4479 fuzz_to = cit.GetFuzzTo();
4480 if (abs_to == prange.GetTo()) {
4482 const CInt_fuzz* pfuzz = pit.GetFuzzTo();
4486 f->Assign(*fuzz_to);
4487 f->Subtract(*pfuzz, abs_to, abs_to);
4488 if (
f->IsP_m() && !
f->GetP_m() ) {
4499 abs_to = prange.GetTo();
4505 if (abs_from <= abs_to) {
4507 TSeqPos sigma = pos + prange.GetTo();
4508 intersection->SetFrom(sigma - abs_to);
4509 intersection->SetTo (sigma - abs_from);
4511 intersection->SetFuzz_to().AssignTranslated
4512 (*fuzz_from, intersection->GetTo(), abs_from);
4513 intersection->SetFuzz_to().Negate
4514 (intersection->GetTo());
4517 intersection->SetFuzz_from().AssignTranslated
4518 (*fuzz_to, intersection->GetFrom(), abs_to);
4519 intersection->SetFuzz_from().Negate
4520 (intersection->GetFrom());
4523 intersection->SetStrand(pstrand);
4525 intersection->SetStrand(
Reverse(cstrand));
4529 intersection->SetFrom(abs_from +
delta);
4530 intersection->SetTo (abs_to +
delta);
4532 intersection->SetFuzz_from().AssignTranslated
4533 (*fuzz_from, intersection->GetFrom(), abs_from);
4536 intersection->SetFuzz_to().AssignTranslated
4537 (*fuzz_to, intersection->GetTo(), abs_to);
4540 intersection->SetStrand(pstrand);
4542 intersection->SetStrand(cstrand);
4550 if (
m_Ranges.back()->GetTo() == intersection->GetFrom() - 1
4551 && !
IsReverse(intersection->GetStrand()) ) {
4552 m_Ranges.back()->SetTo(intersection->GetTo());
4553 if (intersection->IsSetFuzz_to()) {
4555 (intersection->SetFuzz_to());
4559 }
else if (
m_Ranges.back()->GetFrom()
4560 == intersection->GetTo() + 1
4561 &&
IsReverse(intersection->GetStrand())) {
4562 m_Ranges.back()->SetFrom(intersection->GetFrom());
4563 if (intersection->IsSetFuzz_from()) {
4565 (intersection->SetFuzz_from());
4576 pos += prange.GetLength();
4591 _ASSERT((*it)->GetFrom() <= (*it)->GetTo());
4592 TSeqPos pos = 0, start = (*it)->GetFrom();
4593 bool keep_going =
true;
4595 TRange0 prange = pit.GetRange();
4596 if (prange.IsWholeTo() && scope) {
4600 TSeqPos length = prange.GetLength();
4601 if (start >= pos && start < pos + length) {
4606 TSeqPos sigma = pos + prange.GetTo();
4607 from = sigma - (*it)->GetTo();
4609 if (from < prange.GetFrom() || from > sigma) {
4610 from = prange.GetFrom();
4615 if ( !(*it)->IsSetStrand()
4617 strand = pit.GetStrand();
4619 strand =
Reverse((*it)->GetStrand());
4621 if (from == prange.GetFrom()) {
4622 fuzz_from = pit.GetFuzzFrom();
4624 if ( !keep_going && (*it)->IsSetFuzz_to() ) {
4627 f->Assign(*fuzz_from);
4631 f->Subtract((*it)->GetFuzz_to(), from, (*it)->GetTo(),
4633 if (
f->IsP_m() && !
f->GetP_m() ) {
4639 if (to == prange.GetTo()) {
4640 fuzz_to = pit.GetFuzzTo();
4642 if (start == (*it)->GetFrom()
4643 && (*it)->IsSetFuzz_from()) {
4646 f->Assign(*fuzz_to);
4650 f->Subtract((*it)->GetFuzz_from(), to,
4652 if (
f->IsP_m() && !
f->GetP_m() ) {
4660 from = start +
delta;
4661 to = (*it)->GetTo() +
delta;
4662 if (to > prange.GetTo()) {
4663 to = prange.GetTo();
4668 if ( !(*it)->IsSetStrand()
4670 strand = pit.GetStrand();
4672 strand = (*it)->GetStrand();
4674 if (from == prange.GetFrom()) {
4675 fuzz_from = pit.GetFuzzFrom();
4677 if (start == (*it)->GetFrom()
4678 && (*it)->IsSetFuzz_from()) {
4681 f->Assign(*fuzz_from);
4682 f->Add((*it)->GetFuzz_from(), from,
4685 f->AssignTranslated((*it)->GetFuzz_from(), from,
4688 if (
f->IsP_m() && !
f->GetP_m() ) {
4694 if (to == prange.GetTo()) {
4695 fuzz_to = pit.GetFuzzTo();
4697 if ( !keep_going && (*it)->IsSetFuzz_to() ) {
4700 f->Assign(*fuzz_to);
4701 f->Add((*it)->GetFuzz_to(), to, (*it)->GetTo());
4703 f->AssignTranslated((*it)->GetFuzz_to(), to,
4706 if (
f->IsP_m() && !
f->GetP_m() ) {
4714 && (fuzz_from == fuzz_to
4716 && fuzz_from->
Equals(*fuzz_to)))) {
4725 point.
SetFuzz().Assign(*fuzz_from);
4727 point.
SetId().Assign(pit.GetSeq_id());
4728 mix.
Set().push_back(loc);
4743 ival.
SetId().Assign(pit.GetSeq_id());
4744 mix.
Set().push_back(loc);
4747 start = pos + length;
4761 << start <<
" exceeds length (" << total_length
4762 <<
") of parent location " <<
label);
4766 <<
" exceeds length (?\?\?) of parent location "
4772 switch (mix.
Get().size()) {
4853 return (comp_it != sc_Complement.end()) ? comp_it->second :
'\0';
4860 revcomp.reserve(sequence.length());
4861 string::const_reverse_iterator rend = sequence.rend();
4863 for (string::const_reverse_iterator rit = sequence.rbegin(); rit != rend; ++rit) {
4872 (
const string& name,
4873 const string& sequence,
4882 string pattern = sequence;
4887 bool symmetric = (pattern ==
revcomp);
4914 if (!
m_Fsa.IsPrimed()) {
4918 int next_state =
m_Fsa.GetNextState(current_state, ch);
4921 if (
m_Fsa.IsMatchFound(next_state)) {
4922 ITERATE(vector<TPatternInfo>, it,
m_Fsa.GetMatches(next_state)) {
4923 int start = position -
int(it->GetSequence().length()) + 1;
4926 if (start < length) {
5060 '\0',
'A',
'C',
'M',
'G',
'R',
'S',
'V',
'T',
'W',
'Y',
'H',
'K',
'D',
'B',
'N'
5065 (
const string& name,
5083 buffer.reserve(pattern.length());
5099 if (pos < sequence.length()) {
5102 for (
int i = 0;
i < 4; ++
i) {
5103 if ((
code & expansion[
i]) != 0) {
5138 m_Fsa.AddWord(sequence, pat_info);
5151 "Sequence of this type cannot be reverse-complemented.");
5154 inst.
SetExt().SetDelta().Set().reverse();
5157 switch ((*it)->Which()) {
5159 if ((*it)->GetLiteral().IsSetSeq_data()) {
5169 (*it)->SetLoc(*flip);
5180 "Sequence of this type cannot be reverse-complemented.");
User-defined methods of the data storage class.
User-defined methods of the data storage class.
LargeInt< 1 > revcomp(const LargeInt< 1 > &x, size_t sizeKmer)
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
ENa_strand Reverse(ENa_strand s)
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
bool SameOrientation(ENa_strand a, ENa_strand b)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
size_t GetSize(void) const
CTime AsCTime(CTime::ETimeZone tz=CTime::eLocal) const
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
static const CTrans_table & GetTransTable(int id)
void GetLabel(string *label) const
bool IsSuppressed(void) const
CRef< CInt_fuzz > Negative(TSeqPos n) const
@ eAmplify
go for the largest possible range