98 const string& message,
103 e->SetMessage(message);
128 }
else if(
id.IsGeneral() ||
id.IsLocal()) {
162 bool found_in_starts = exon_biostarts.
find(exon_anchor_pos)
163 != exon_biostarts.
end();
165 bool found_in_stops = exon_biostops.
find(exon_anchor_pos)
166 != exon_biostops.
end();
168 return (offset_pos < 0 && found_in_starts)
169 || (offset_pos > 0 && found_in_stops)
170 || (offset_pos == 0 && (found_in_starts || found_in_stops));
176 return !fuzz.
IsLim() ? 0
192 const int start_offset_sign =
203 const int stop_offset_sign =
216 const bool start_ok =
222 start_offset_sign * sign);
230 stop_offset_sign * sign);
232 return start_ok && stop_ok;
251 for(
CFeat_CI ci(bsh, sel); ci; ++ci) {
309 CInt_fuzz& fuzz = (minus_strand == is_start) ? loc.
SetInt().SetFuzz_to()
310 : loc.
SetInt().SetFuzz_from();
360 TSeqPos& bio_start_ref = minus_strand ? loc->
SetInt().SetTo()
361 : loc->
SetInt().SetFrom();
418 "Expected genomic_id in the variation to be the same as in spliced-seg");
424 long closest_start = ss.
GetExons().front()->GetGenomic_start();
427 long closest_stop = ss.
GetExons().front()->GetGenomic_start();
435 closest_start = start;
461 if(start != closest_start || stop != closest_stop) {
462 int_loc->
SetInt().SetFrom(closest_start);
463 int_loc->
SetInt().SetTo(closest_stop);
472 if(start != closest_start) {
473 long offset = (start - closest_start);
481 if(stop != closest_stop) {
482 long offset = (stop - closest_stop);
512 for(
int i = 0;
i < 2;
i++) {
518 if(target_row == -1) {
521 "The alignment has no row for seq-id "
536 bool source_loc_is_projected =
548 "HGVS exon-boundary position not found in alignment of "
572 "Mismatches in mapping",
578 static const long thr = 5000;
581 bool far_start = start_offset +
thr < 0
585 bool far_stop = stop_offset >
thr
589 if(far_start || far_stop) {
591 "Source location overhangs the alignment by at least 5kb ",
596 if(check_placements) {
618 se->SetGenomic_start(ci.GetRange().GetFrom());
619 se->SetGenomic_end(ci.GetRange().GetTo());
620 se->SetProduct_start().SetNucpos(product_pos);
621 se->SetProduct_end().SetNucpos(product_pos + ci.GetRange().GetLength() - 1);
622 product_pos += ci.GetRange().GetLength();
638 result->SetLoc().SetNull();
654 if(
result->GetLoc().IsNull()) {
712 for(
CFeat_CI ci(bsh, sel); ci; ++ci) {
729 aln = SerialClone<CSeq_align>(current_aln);
742 if(!
result->GetLoc().IsNull()) {
752 if(!
result->GetLoc().IsNull()) {
755 result->Assign(*mapped_placement);
761 result->SetLoc().Assign(*loc);
778 if(c !=
'A' && c !=
'C' && c !=
'G' && c !=
'T') {
788 bool had_ambiguities =
false;
792 had_ambiguities =
true;
804 return had_ambiguities;
809 bool invalid_location =
false;
810 bool out_of_order =
false;
813 invalid_location =
true;
827 invalid_location =
true;
833 if(invalid_location) {
835 out_of_order ?
"Invalid location - start and stop are out of order"
836 :
"Invalid location",
850 "Bioseq is suppressed or withdrawn",
855 return invalid_location;
876 "Cannot use Mapper-based method to remap intronic cases;"
877 "must remap via spliced-seg alignment instead.");
889 "Mismatches in mapping",
927 const bool equal_offsets = (
937 const bool merge_single_range =
939 && mapped_loc->
IsPnt()
942 if(mapped_loc->
IsInt()
948 mapped_loc->
SetInt().ResetFuzz_to();
966 loc1->
SetInt().SetTo() += 500;
969 if(tmp_mapped_loc->
GetId()) {
989 if(mapped_len == 0) {
992 }
else if(mapped_len < orig_len) {
995 }
else if(!orig_is_compound && mapped_is_compound) {
1000 exception->SetMessage(
"");
1024 "Source location overhangs the alignment by at least 5kb",
1039 p.
SetSeq().SetLength(length);
1044 "Can't get sequence for an offset-based location",
1047 }
else if(length > max_len) {
1049 "Sequence is longer than the cutoff threshold",
1059 "Ambiguous residues in reference",
1067 "Cannot fetch sequence at location",
1078 bool had_exceptions =
false;
1091 if(!
v2.GetData().IsInstance() || (
v2.GetConsequenceParent() && &v != &
v2)) {
1097 && inst.
GetDelta().front()->IsSetSeq()
1098 && inst.
GetDelta().front()->GetSeq().IsLiteral()) {
1100 if(!asserted_literal
1104 }
else if(!variant_literal
1107 && (!inst.
GetDelta().front()->IsSetMultiplier() && !inst.
GetDelta().front()->IsSetMultiplier_fuzz())
1115 if(variant_literal) {
1119 LOG_POST(
"Did not find variant-literal");
1135 "Asserted sequence is inconsistent with reference",
1137 had_exceptions =
true;
1142 && variant_literal->Equals(p.
GetSeq())) {
1144 "Reference sequence is the same as variant",
1146 had_exceptions =
true;
1153 v.
SetData().SetSet().SetVariations())
1156 had_exceptions = had_exceptions ||
AttachSeq(
v2, max_len);
1159 return !had_exceptions;
1278 first.SetInt().SetTo(start - 1);
1281 if(stop == max_pos) {
1284 second.
SetInt().SetFrom(stop + 1);
1299 if(prot_str.size() != 1) {
1303 static const char* alphabet =
"ACGT";
1304 string codon =
"AAA";
1305 for(
size_t i0 = 0; i0 < 4; i0++) {
1306 codon[0] = alphabet[i0];
1307 for(
size_t i1 = 0; i1 < 4; i1++) {
1308 codon[1] = alphabet[i1];
1309 for(
size_t i2 = 0; i2 < 4; i2++) {
1310 codon[2] = alphabet[i2];
1316 if(
prot == prot_str) {
1327 for(
size_t i = 0;
i <
min(
a.size(),
b.size());
i++) {
1336 const string& codon_from,
1337 const string& prot_to,
1338 vector<string>& codons_to)
1340 vector<string> candidates1;
1341 size_t max_matches(0);
1344 bool have_silent =
false;
1346 ITERATE(vector<string>, it1, candidates1)
1358 if(matches >= max_matches) {
1359 if(matches > max_matches) {
1362 codons_to.push_back(*it1);
1363 max_matches = matches;
1368 if(codons_to.empty() && have_silent) {
1369 codons_to.push_back(codon_from);
1375 string collapsed_seq;
1379 typedef const vector<string> TConstStrs;
1382 const string& seq = *it;
1383 if(seq.size() > bits.size()) {
1384 bits.resize(seq.size());
1387 for(
size_t i = 0;
i < seq.size();
i++) {
1389 int m = (nt ==
'T' ? 1
1392 : nt ==
'A' ? 8 : 0);
1401 static const char* iupac_nuc_ambiguity_codes =
"NTGKCYSBAWRDMHVN";
1402 collapsed_seq.resize(bits.size());
1403 for(
size_t i = 0;
i < collapsed_seq.size();
i++) {
1404 collapsed_seq[
i] = iupac_nuc_ambiguity_codes[bits[
i]];
1406 return collapsed_seq;
1414 v.
SetData().SetSet().SetVariations().clear();
1420 if(
v2->GetData().IsInstance()
1421 &&
v2->GetData().GetInstance().IsSetObservation()
1427 v.
SetData().SetSet().SetVariations().push_back(
v2);
1440 if(!placements || placements->size() == 0) {
1467 if(!prot2precursor_mapper) {
1487 v2->SetPlacements().push_back(p);
1493 if(!nuc_loc->
IsInt()
1497 || !
delta->IsSetSeq()
1498 || !
delta->GetSeq().IsLiteral()
1499 ||
delta->GetSeq().GetLiteral().GetLength() != 1)
1505 v2->SetData().SetUnknown();
1506 v2->SetPlacements().push_back(p);
1513 string original_allele_codon;
1516 string variant_codon;
1520 delta->GetSeq().GetLiteral().GetSeq_data(),
1524 vector<string> variant_codons;
1533 original_allele_codon,
1542 && variant_codon != original_allele_codon) {
1543 while(variant_codon.length() > 0
1544 && original_allele_codon.length() > 0
1545 && variant_codon.at(0) == original_allele_codon.at(0)) {
1546 variant_codon = variant_codon.substr(1);
1547 original_allele_codon = original_allele_codon.substr(1);
1549 nuc_loc->
SetInt().SetTo()--;
1551 nuc_loc->
SetInt().SetFrom()++;
1555 while(variant_codon.length() > 0
1556 && original_allele_codon.length() > 0
1557 && variant_codon.at(variant_codon.length() - 1)
1558 == original_allele_codon.at(original_allele_codon.length() - 1)) {
1559 variant_codon.resize(variant_codon.length() - 1);
1560 original_allele_codon.resize(original_allele_codon.length() - 1);
1564 nuc_loc->
SetInt().SetFrom()++;
1566 nuc_loc->
SetInt().SetTo()--;
1572 delta2->SetSeq().SetLiteral().SetLength(variant_codon.length());
1573 delta2->SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set(variant_codon);
1583 v2->SetPlacements().push_back(p2);
1587 v2->SetData().SetUnknown();
1591 inst2.
SetDelta().push_back(delta2);
1648 if(cached_literal) {
1669 literal->SetSeq_data().SetNcbieaa().Set().push_back(
1677 literal->SetLength(seq.size());
1679 literal->SetSeq_data().SetNcbieaa().Set(seq);
1681 literal->SetSeq_data().SetIupacna().Set(seq);
1699 if(
b.GetLength() == 0) {
1701 }
else if(
a.GetLength() == 0) {
1706 if(
a.IsSetFuzz() ||
b.IsSetFuzz()) {
1710 if(
a.IsSetSeq_data() &&
b.IsSetSeq_data()) {
1712 a.GetSeq_data(), 0,
a.GetLength(),
1713 b.GetSeq_data(), 0,
b.GetLength());
1787 v.
SetData().SetSet().SetVariations())
1801 "Could not find literal for 'this' location in placements");
1805 di->SetSeq().SetLiteral().Assign(*this_literal);
1815 di->SetSeq().SetLiteral().SetLength(0);
1816 di->SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set(
"");
1818 }
else if(inst.
GetDelta().size() > 1) {
1826 di.
SetSeq().SetLiteral().SetLength(0);
1827 di.
SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set(
"");
1840 di.
SetSeq().SetLiteral().Assign(*this_literal);
1851 if(!
literal.IsSetSeq_data() || !
literal.GetSeq_data().IsIupacna()) {
1854 string str_kernel =
literal.GetSeq_data().GetIupacna().Get();
1855 literal.SetSeq_data().SetIupacna().Set(
"");
1857 literal.SetSeq_data().SetIupacna().Set() += str_kernel;
1881 }
else if(this_literal->GetLength() == 0) {
1889 this_literal->GetLength() - 1);
1902 bool ignore_genomic)
1909 v.
SetData().SetSet().SetVariations())
1923 if(!placements || placements->size() == 0) {
1964 consequence->SetVariation(*prot_variation);
1971 static string Translate(
const string& nuc_str,
bool is_mito)
1976 code.SetId(is_mito ? 2 : 1);
1984 if(prot_str.size() * 3 < nuc_str.size()) {
1985 prot_str.push_back(
'X');
1989 size_t stop_pos = prot_str.find(
'*');
1990 if(stop_pos !=
NPOS) {
1991 prot_str.resize(stop_pos + 1);
1998 const string& prot_ref_str,
1999 const string& prot_delta_str)
2013 const string& prot_ref_str,
2014 const string& prot_delta_str)
2017 for(
size_t i = 0;
i < prot_ref_str.size() &&
i < prot_delta_str.size();
i++) {
2018 if(prot_ref_str[
i] == prot_delta_str[
i]) {
2020 }
else if(prot_ref_str[
i] ==
'*') {
2022 }
else if(prot_delta_str[
i] ==
'*') {
2033 const string& prot_ref_str,
2034 const string& prot_variant_str)
2038 bool stop_gain =
false;
2039 bool stop_loss =
false;
2040 for(
size_t i = 0;
i <
max(prot_ref_str.size(), prot_variant_str.size());
i++) {
2041 char r =
i >= prot_ref_str.size() ?
'-' : prot_ref_str[
i];
2042 char v =
i >= prot_variant_str.size() ?
'-' : prot_variant_str[
i];
2044 if(
r ==
'*' && v !=
'*') {
2048 if(
r !=
'*' && v ==
'*') {
2059 if(nuc_delta_len == 0) {
2060 if(!stop_gain && !stop_loss) {
2064 }
else if(nuc_delta_len % 3 == 0) {
2076 vp.
SetLoc().FlipStrand();
2079 vp.
SetSeq().SetSeq_data(),
2080 &vp.
SetSeq().SetSeq_data(),
2088 if(
tmp->IsSetStart_offset()) {
2094 if(
tmp->IsSetStop_offset()) {
2100 if(
tmp->IsSetStart_offset_fuzz()) {
2106 if(
tmp->IsSetStop_offset_fuzz()) {
2151 v.
SetData().SetSet().SetVariations())
2172 di.
SetSeq().SetLoc().FlipStrand();
2175 di.
SetSeq().GetLiteral().GetSeq_data(),
2176 &di.
SetSeq().SetLiteral().SetSeq_data(),
2214 sub_loc->
Assign(*range_loc);
2217 if(!suffix_loc->
Which()) {
2221 sub_loc->
Assign(*range_loc);
2224 if(!prefix_loc->
Which()) {
2229 swap(prefix_loc, suffix_loc);
2247 if(!
delta.IsSetSeq() || !
delta.GetSeq().IsLiteral()) {
2253 delta.SetSeq().SetLiteral(*tmp_literal2);
2277 p->
SetLoc().SetWhole().Assign(
id);
2280 v->SetData().SetUnknown();
2281 v->SetPlacements().push_back(p);
2289 bool is_frameshifting,
2306 prot_loc = nuc2prot_mapper->
Map(nuc_p.
GetLoc());
2307 codons_loc = prot2nuc_mapper->
Map(*prot_loc);
2311 if(codons_loc->
IsNull()) {
2327 v->SetData().SetUnknown();
2330 prot_p->
SetLoc(*prot_loc);
2333 "Cannot infer consequence; projecting location only",
2335 v->SetPlacements().push_back(prot_p);
2338 codons_p->
SetLoc(*codons_loc);
2340 v->SetPlacements().push_back(codons_p);
2347 if(is_frameshifting) {
2358 while(
i <
a.size() &&
i <
b.size() &&
a[
i] ==
b[
i]) {
2367 while(
i <
a.size() &&
i <
b.size() &&
a[
a.size() - 1 -
i] ==
b[
b.size() - 1 -
i]) {
2406 v->SetData().SetInstance().Assign(nuc_inst);
2407 v->ResetPlacements();
2412 v->SetPlacements().push_back(p);
2428 const CDelta_item& nuc_delta = *v->GetData().GetInstance().GetDelta().front();
2461 prot_loc = nuc2prot_mapper->
Map(p->
GetLoc());
2462 codons_loc = prot2nuc_mapper->
Map(*prot_loc);
2479 int frameshift_phase = nuc_delta_len % 3;
2480 if(frameshift_phase < 0) {
2481 frameshift_phase += 3;
2488 frameshift_phase != 0,
2493 string downstream_cds_suffix_seq_str;
2509 downstream_cds_loc = ext_cds_loc->
Intersect(
2517 if(
literal->GetLength() > 0) {
2518 downstream_cds_suffix_seq_str =
literal->GetSeq_data().GetIupacna().Get();
2536 if(!v->GetPlacements().front()->GetSeq().IsSetSeq_data()) {
2540 frameshift_phase != 0,
2546 string nuc_ref_prefix = v->GetPlacements().front()->GetSeq().GetSeq_data().GetIupacna().Get();
2548 const CSeq_literal& nuc_var_literal = v->GetData().GetInstance().GetDelta().front()->GetSeq().GetLiteral();
2551 string nuc_ref_str = nuc_ref_prefix + downstream_cds_suffix_seq_str;
2552 string nuc_var_str = nuc_var_prefix + downstream_cds_suffix_seq_str;
2556 int num_ref_codons = (nuc_ref_prefix.size() + 2) / 3;
2557 int num_var_codons = (nuc_var_prefix.size() + 2) / 3;
2561 <<
"nuc_var_str: " << nuc_var_str <<
"\n";
2565 <<
"prot_var_str: " << prot_var_str <<
"\n";
2568 int common_prot_prefix_len(0);
2571 if(prot_ref_str == prot_var_str) {
2574 prot_ref_str.resize(
min(
static_cast<int>(prot_ref_str.size()), num_ref_codons));
2575 prot_var_str.resize(prot_ref_str.size());
2577 if(prot_ref_str.size() > 0 && *prot_ref_str.rbegin() ==
'*') {
2579 frameshift_phase = 0;
2593 if(common_prot_prefix_len > 0
2594 && common_prot_prefix_len ==
static_cast<int>(prot_ref_str.size())) {
2595 common_prot_prefix_len -= 1;
2599 prot_ref_str = prot_ref_str.substr(common_prot_prefix_len);
2600 prot_var_str = prot_var_str.substr(common_prot_prefix_len);
2602 if(
verbose)
NcbiCerr <<
"prot_ref_str: " << prot_ref_str <<
":" << prot_ref_str.size() <<
"\n"
2603 <<
"prot_var_str: " << prot_var_str <<
":" << prot_var_str.size() <<
"\n";
2605 if(frameshift_phase == 0) {
2608 size_t min_len =
min(prot_ref_str.size(), prot_var_str.size());
2609 size_t ref_stop_pos = prot_ref_str.find(
'*');
2610 size_t var_stop_pos = prot_var_str.find(
'*');
2611 size_t min_stop_pos =
min(ref_stop_pos, var_stop_pos);
2614 bool truncate_at_stop = min_stop_pos < min_len
2615 && ref_stop_pos != var_stop_pos
2616 && nuc_delta_len == 0;
2618 if(truncate_at_stop) {
2619 prot_ref_str.resize(min_stop_pos + 1);
2620 prot_var_str.resize(min_stop_pos + 1);
2622 prot_ref_str.resize(prot_ref_str.size() - suffix_len);
2623 prot_var_str.resize(prot_var_str.size() - suffix_len);
2628 prot_ref_str.resize(
min(
static_cast<size_t>(1), prot_ref_str.size()));
2629 prot_var_str.resize(
min(
static_cast<size_t>(1), prot_var_str.size()));
2634 if(prot_ref_str.size() == 0) {
2637 prot_loc->
SetInt().SetFrom() += common_prot_prefix_len - 1;
2638 prot_loc->
SetInt().SetTo(prot_loc->
SetInt().SetFrom() + 1);
2641 prot_loc->
SetInt().SetFrom() += common_prot_prefix_len;
2642 prot_loc->
SetInt().SetTo(prot_loc->
SetInt().SetFrom() + prot_ref_str.size() - 1);
2646 codons_loc = prot2nuc_mapper->
Map(*prot_loc);
2648 if(codons_loc->
IsNull()) {
2653 frameshift_phase != 0,
2660 if(
verbose)
NcbiCerr <<
"prot_ref_str: " << prot_ref_str <<
":" << prot_ref_str.size() <<
"\n"
2661 <<
"prot_var_str: " << prot_var_str <<
":" << prot_var_str.size() <<
"\n";
2666 <<
"; variant codons: " << num_var_codons
2667 <<
"; common prefix: " << common_prot_prefix_len <<
"\n";
2683 prot_p->
SetSeq().SetLength(prot_ref_str.size());
2684 prot_p->
SetSeq().SetSeq_data().SetNcbieaa().Set(prot_ref_str);
2686 prot_p->
SetLoc(*prot_loc);
2690 prot_v->SetPlacements().push_back(prot_p);
2694 codons_p->
SetLoc(*codons_loc);
2698 prot_v->SetPlacements().push_back(codons_p);
2703 if(frameshift_phase == 0 && prot_ref_str.size() == prot_var_str.size()) {
2707 prot_v->SetVariant_prop().SetEffect(prop);
2714 copy(so_terms.begin(), so_terms.end(), back_inserter(prot_v->SetSo_terms()));
2718 prot_v->SetData().SetInstance().SetType(
CalcInstTypeForAA(prot_ref_str, prot_var_str));
2722 prot_v->SetData().SetInstance().SetDelta().push_back(di);
2724 if(prot_var_str.size() > 0) {
2732 if(
false && common_prot_prefix_len == 0) {
2733 di->SetSeq().Assign(v->GetData().GetInstance().GetDelta().front()->GetSeq());
2737 <<
"inst-type: " << prot_v->GetData().GetInstance().GetType()
2738 <<
"; nuc_var_len: " << nuc_var_str.size()
2739 <<
"; nuc_var_str: " << nuc_var_str
2740 <<
"; prefix_len: " << common_prot_prefix_len * 3
2741 <<
"; var_codons:" << prot_var_str.size() * 3 <<
"\n";
2745 string adjusted_codons_str = nuc_var_str.substr(
2746 min<int>(nuc_var_str.size(), common_prot_prefix_len * 3),
2747 prot_var_str.size() * 3);
2749 if(adjusted_codons_str.size() > 0) {
2750 di->SetSeq().SetLiteral().SetLength(adjusted_codons_str.size());
2751 di->SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set() = adjusted_codons_str;
2753 di->SetSeq().SetThis();
2758 if(prot_ref_str.size() == 0) {
2762 di->SetSeq().SetThis();
2767 if(frameshift_phase != 0) {
2770 prot_v->SetVariant_prop().SetEffect(
2772 | (prot_v->IsSetVariant_prop()
2773 && prot_v->GetVariant_prop().IsSetEffect()
2774 ? prot_v->GetVariant_prop().GetEffect() : 0));
2776 prot_v->SetFrameshift().SetPhase(frameshift_phase);
2902 if(parent ==
NULL) {
2929 if(p1.size() != p2.size()) {
2932 CVariation::TPlacements::const_iterator it1 = p1.begin();
2933 CVariation::TPlacements::const_iterator it2 = p2.begin();
2935 for(; it1 != p1.end() && it2 != p2.end(); ++it1, ++it2) {
2953 v.
SetData().SetSet().SetVariations())
2963 v.
SetData().SetSet().SetVariations())
2966 if(!
v2.IsSetPlacements()) {
2970 p1 = &
v2.SetPlacements();
2973 if(!
Equals(*p1,
v2.GetPlacements())) {
2989 v.
SetData().SetSet().SetVariations())
2992 v2.ResetPlacements();
3006 const CVariation::TConsequence::value_type::TObjectType& cons = **it;
3007 if(cons.IsVariation()
3008 && cons.GetVariation().IsSetPlacements()) {
3015 cons_v.
Reset(&cons.GetVariation());
3043 const CDbtag& dbtag = **it;
3044 if(dbtag.
GetDb() ==
"GeneID"
3052 dbtag->
SetDb(
"GeneID");
3053 dbtag->
SetTag().SetId(gene_id);
3099 bool is_completely_intronic =
false;
3109 && (is_start_offset || is_stop_offset);
3118 is_completely_intronic = is_case1 || is_case2;
3127 for(
size_t i = 0;
i < 3;
i++) {
3140 int gene_id = it->first;
3143 if(loc_prop &
flags[
i]) {
3151 if(!is_completely_intronic) {
3172 genomic_query_loc = mapper->
Map(query_loc);
3174 genomic_query_loc.
Reset(&query_loc);
3207 TIdRangeMap loc_map;
3212 loc_map[ci.GetSeq_id_Handle()][ci.GetRange()] = term;
3219 if(!rna_loc && !cds_loc) {
3223 const CSeq_loc& main_loc = rna_loc ? *rna_loc : *cds_loc;
3243 *ci.GetRangeAsSeq_loc());
3268 const SPropsMap::TRangeMap& rm = props_map.loc_map[ci.GetSeq_id_Handle()];
3269 for(SPropsMap::TRangeMap::const_iterator it2 = rm.begin(ci.GetRange()); it2.Valid(); ++it2) {
3270 terms_set.
insert(it2->second);
3273 copy(terms_set.
begin(), terms_set.
end(), back_inserter(terms));
3294 for(
CFeat_CI ci(bsh, sel); ci; ++ci) {
3303 return last_exon_pos ? last_exon_pos + 1
3304 : last_polyA_pos ? last_polyA_pos + 1
3341 if(offset < 0 && offset >= -2) {
3362 if(!
v2.IsSetPlacements()) {
3367 if(!
v2.SetVariant_prop().IsSetGene_location()) {
3368 v2.SetVariant_prop().SetGene_location(0);
3376 if(
v2.GetConsequenceParent()) {
3423 int gene_id = gene_id_and_prop.first;
3426 if(m.find(gene_id) == m.end()) {
3429 m[gene_id] |= properties;
3447 m_loc2prop[ci.GetSeq_id_Handle()][ci.GetRange()].push_back(
TGeneIDAndProp(gene_id, prop));
3463 m_rangemap[ci.GetSeq_id_Handle()][ci.GetRange()] = ci.GetRangeAsSeq_loc();
3476 if(it2 == m_rangemap.end()) {
3505 if(std::find(k.begin(), k.end(),
"RefSeqGene") != k.end()) {
3535 for(
CFeat_CI ci(bsh, sel); ci; ++ci) {
3544 if(transcript_seq_ids.
find(product_id) == transcript_seq_ids.
end()) {
3553 const CDbtag& dbtag = **it;
3554 if(dbtag.
GetDb() ==
"GeneID"
3555 || dbtag.
GetDb() ==
"LocusID") {
3580 feature::CFeatTree ft(ci);
3582 for(ci.
Rewind(); ci; ++ci) {
3590 return s_GetGeneID(mf, ft);
3608 feature::CFeatTree ft(ci);
3610 m_loc2prop[idh].size();
3625 for(ci.
Rewind(); ci; ++ci) {
3631 const int gene_id = s_GetGeneID(mf, ft);
3632 const bool is_focus_locus = focus_loci.
empty()
3633 || focus_loci.count(gene_id);
3635 (is_focus_locus ? focus_gene_ranges
3636 : non_focus_gene_ranges)
3637 ->SetMix().Set().push_back(
3649 bool found_some_gene_ids =
false;
3651 for(ci.
Rewind(); ci; ++ci) {
3659 const int gene_id = s_GetGeneID(mf, ft);
3660 if(!focus_loci.
empty()
3661 && focus_loci.
find(gene_id) == focus_loci.
end()) {
3665 if(!parent_mf && gene_id) {
3673 found_some_gene_ids =
true;
3695 p.first->ResetStrand();
3696 p.second->ResetStrand();
3717 subtract_gene_ranges_from(*p.first);
3718 subtract_gene_ranges_from(*p.second);
3724 all_gene_neighborhoods->
SetMix().Set().push_back(p.first);
3725 all_gene_neighborhoods->
SetMix().Set().push_back(p.second);
3747 x_Add(*ci.GetRangeAsSeq_loc(),
3792 if(ft.GetChildren(mf).size() == 0) {
3812 genes_and_neighborhoods_loc =
3814 *genes_and_neighborhoods_loc,
3815 *non_focus_gene_ranges,
3823 *genes_and_neighborhoods_loc,
3827 x_Add(*intergenic_loc,
3833 && !found_some_gene_ids) {
3843 int gene_id = s_GetGeneIdForProduct(bsh);
3845 x_Add(*whole_range_loc, gene_id, 0);
3852 feature::CFeatTree& ft)
3861 const CDbtag& dbtag = **it;
3862 if(dbtag.
GetDb() ==
"GeneID"
3863 || dbtag.
GetDb() ==
"LocusID") {
3876 const CDbtag& dbtag = **it;
3877 if(dbtag.
GetDb() ==
"GeneID"
3878 || dbtag.
GetDb() ==
"LocusID") {
3886 return parent ? s_GetGeneID(parent, ft) : gene_id;
3899 p.second->Assign(*p.first);
3900 p.first->SetInt().SetTo(p.first->GetInt().GetFrom() + 2);
3901 p.second->SetInt().SetFrom(p.second->GetInt().GetTo() - 2);
3904 swap(p.first, p.second);
3911 p.second->SetNull();
3923 sub_loc2->
Assign(*sub_loc1);
3932 swap(p.first, p.second);
3940 TSeqPos flank1_len(2000), flank2_len(500);
3942 swap(flank1_len, flank2_len);
3948 p.second->Assign(*p.first);
3950 if(p.first->GetTotalRange().GetFrom() == 0) {
3953 p.first->SetInt().SetTo(p.first->GetTotalRange().GetFrom() - 1);
3954 p.first->SetInt().SetFrom(p.first->GetTotalRange().GetFrom() < flank1_len ? 0 : p.first->GetTotalRange().GetFrom() - flank1_len);
3957 if(p.second->GetTotalRange().GetTo() == max_pos) {
3958 p.second->SetNull();
3960 p.second->SetInt().SetFrom(p.second->GetTotalRange().GetTo() + 1);
3961 p.second->SetInt().SetTo(p.second->GetTotalRange().GetTo() > max_pos ? max_pos : p.second->GetTotalRange().GetTo() + flank2_len);
3965 swap(p.first, p.second);
3978 introns_loc_without_splice_sites->
Assign(*introns_loc_with_splice_sites);
3985 seqint.
SetTo() -= 2;
3990 p.first = introns_loc_without_splice_sites;
3992 *introns_loc_without_splice_sites,
4013 m_seq_data_map[idh].mapper.
Reset();
4017 for(
CFeat_CI ci(bsh, sel); ci; ++ci) {
4031 m_data[ci.GetSeq_id_Handle()][ci.GetRange()].push_back(s);
4046 x_CacheSeqData(*all_rna_loc, idh);
4053 SSeqData& d = m_seq_data_map[idh2];
4082 target_loc->
SetInt().SetId().SetLocal().SetStr(
"all_cds");
4083 target_loc->
SetInt().SetFrom(0);
4093 literal->SetSeq_data().SetIupacna().Set(
"");
4101 if(m_seq_data_map.find(ci.GetSeq_id_Handle()) == m_seq_data_map.end()) {
4104 const SSeqData& d = m_seq_data_map.find(ci.GetSeq_id_Handle())->second;
4113 if((!mapped_loc->
IsInt() && !mapped_loc->
IsPnt())
4121 literal->SetSeq_data().SetIupacna().Set() += seq_chunk;
4137 if(m_data.find(idh) == m_data.end()) {
4143 if(it == m_data.end()) {
4157 cdregions.push_back(*it);
4175 if(!v->IsSetId() && parent.
IsSetId()) {
4176 v->SetId().Assign(parent.
GetId());
4210 feat->
SetData().SetVariation(*vr);
4211 feats.push_back(feat);
4239 out_feats.insert(out_feats.end(), feats.begin(), feats.end());
4247 vr->SetId().Assign(v.
GetId());
4282 vr->SetPhenotype().push_back(p);
4297 new CVariation_ref::TConsequence::value_type::TObjectType);
4298 vr->SetConsequence().push_back(fr_cons);
4299 fr_cons->SetFrameshift();
4309 vr->SetData().SetComplex();
4316 vr->SetData().SetUniparental_disomy();
4318 vr->SetData().SetUnknown();
4335 vr->SetConsequence();
4338 const CVariation::TConsequence::value_type::TObjectType& v_cons = **it;
4340 new CVariation_ref::TConsequence::value_type::TObjectType);
4341 vr->SetConsequence().push_back(vr_cons);
4342 vr_cons->SetUnknown();
4344 if(v_cons.IsSplicing()) {
4345 vr_cons->SetSplicing();
4346 }
else if(v_cons.IsNote()) {
4347 vr_cons->SetNote(v_cons.GetNote());
4348 }
else if(v_cons.IsVariation()) {
4350 vr_cons->SetVariation(*cons_variation);
4351 }
else if(v_cons.IsLoss_of_heterozygosity()) {
4352 vr_cons->SetLoss_of_heterozygosity();
4353 if(v_cons.GetLoss_of_heterozygosity().IsSetReference()) {
4354 vr_cons->SetLoss_of_heterozygosity().SetReference(
4355 v_cons.GetLoss_of_heterozygosity().GetReference());
4357 if(v_cons.GetLoss_of_heterozygosity().IsSetTest()) {
4358 vr_cons->SetLoss_of_heterozygosity().SetTest(
4359 v_cons.GetLoss_of_heterozygosity().GetTest());
4366 vr->SetSomatic_origin();
4369 const CVariation::TSomatic_origin::value_type::TObjectType& v_so = **it;
4372 new CVariation_ref::TSomatic_origin::value_type::TObjectType);
4374 if(v_so.IsSetSource()) {
4375 vr_so->SetSource().Assign(v_so.GetSource());
4378 if(v_so.IsSetCondition()) {
4379 vr_so->SetCondition();
4380 if(v_so.GetCondition().IsSetDescription()) {
4381 vr_so->SetCondition().SetDescription(
4382 v_so.GetCondition().GetDescription());
4384 if(v_so.GetCondition().IsSetObject_id()) {
4385 vr_so->SetCondition().SetObject_id();
4386 ITERATE(CVariation::TSomatic_origin::value_type::TObjectType::TCondition::TObject_id,
4388 v_so.GetCondition().GetObject_id())
4392 vr_so->SetCondition().SetObject_id().push_back(dbtag);
4397 vr->SetSomatic_origin().push_back(vr_so);
4411 delta->SetMultiplier(-1);
4414 delta->SetSeq().SetLiteral().SetFuzz().Assign(*fuzz);
4451 v->SetPlacements().push_back(p);
4455 v->SetPub().Assign(variation_feat.
GetCit());
4461 v->SetExt().push_back(uo);
4469 v->SetExt().push_back(uo);
4481 v->SetId().Assign(vr.
GetId());
4516 v->SetPhenotype().push_back(p);
4521 v->SetMethod().SetMethod() = vr.
GetMethod();
4529 v->SetData().SetComplex();
4535 v->SetData().SetUniparental_disomy();
4537 v->SetData().SetUnknown();
4554 v->SetConsequence();
4557 const CVariation_ref::TConsequence::value_type::TObjectType& vr_cons = **it;
4559 if(vr_cons.IsFrameshift()) {
4564 if(vr_cons.GetFrameshift().IsSetPhase()) {
4565 cons_variation.
SetFrameshift().SetPhase(vr_cons.GetFrameshift().GetPhase());
4567 if(vr_cons.GetFrameshift().IsSetX_length()) {
4568 cons_variation.
SetFrameshift().SetX_length(vr_cons.GetFrameshift().GetX_length());
4575 if(vr_cons.IsUnknown()) {
4576 v_cons->SetUnknown();
4577 }
else if(vr_cons.IsSplicing()) {
4578 v_cons->SetSplicing();
4579 }
else if(vr_cons.IsNote()) {
4580 v_cons->SetNote(vr_cons.GetNote());
4581 }
else if(vr_cons.IsVariation()) {
4583 v_cons->SetVariation(*cons_variation);
4584 }
else if(vr_cons.IsLoss_of_heterozygosity()) {
4585 v_cons->SetLoss_of_heterozygosity();
4586 if(vr_cons.GetLoss_of_heterozygosity().IsSetReference()) {
4587 v_cons->SetLoss_of_heterozygosity().SetReference(vr_cons.GetLoss_of_heterozygosity().GetReference());
4589 if(vr_cons.GetLoss_of_heterozygosity().IsSetTest()) {
4590 v_cons->SetLoss_of_heterozygosity().SetTest(vr_cons.GetLoss_of_heterozygosity().GetTest());
4594 v->SetConsequence().push_back(v_cons);
4596 if(v->GetConsequence().empty()) {
4597 v->ResetConsequence();
4602 v->SetSomatic_origin();
4605 const CVariation_ref::TSomatic_origin::value_type::TObjectType& vr_so = **it;
4608 if(vr_so.IsSetSource()) {
4609 v_so->SetSource().Assign(vr_so.GetSource());
4612 if(vr_so.IsSetCondition()) {
4613 v_so->SetCondition();
4614 if(vr_so.GetCondition().IsSetDescription()) {
4615 v_so->SetCondition().SetDescription(vr_so.GetCondition().GetDescription());
4617 if(vr_so.GetCondition().IsSetObject_id()) {
4618 v_so->SetCondition().SetObject_id();
4619 ITERATE(CVariation_ref::TSomatic_origin::value_type::TObjectType::TCondition::TObject_id,
4621 vr_so.GetCondition().GetObject_id())
4625 v_so->SetCondition().SetObject_id().push_back(dbtag);
4630 v->SetSomatic_origin().push_back(v_so);
4640 return delta.GetSeq().GetLiteral().GetLength()
4641 * (
delta.IsSetMultiplier() ?
delta.GetMultiplier() : 1);
4646 return delta.GetSeq().GetLiteral().IsSetFuzz() ?
4647 &
delta.GetSeq().GetLiteral().GetFuzz() :
NULL;
4655 v.
SetData().SetSet().SetVariations())
4663 if(delta_first->IsSetAction()
4671 v.
SetData().SetInstance().SetDelta().pop_front();
4674 if(delta_last != delta_first
4675 && delta_last->IsSetAction()
4683 v.
SetData().SetInstance().SetDelta().pop_back();
4693 string* asserted_out,
4706 s_PropagateLocsInPlace(vr);
4709 bool have_asserted_seq =
false;
4717 string asserted_seq;
4719 if(
literal.GetSeq_data().IsIupacna()) {
4720 asserted_seq =
literal.GetSeq_data().GetIupacna();
4721 have_asserted_seq =
true;
4722 }
else if(
literal.GetSeq_data().IsNcbieaa()) {
4723 asserted_seq =
literal.GetSeq_data().GetNcbieaa();
4724 have_asserted_seq =
true;
4729 string prefix, suffix;
4735 v.GetSeqData(v.begin(), v.end(), actual_seq);
4742 *asserted_out = asserted_seq;
4745 *actual_out = actual_seq;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
bool SameOrientation(ENa_strand a, ENa_strand b)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
ESubtype GetSubtype(void) const
TSeqPos GetSeqStop(TDim row) const
CRef< CSeq_loc > CreateRowSeq_loc(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
namespace ncbi::objects::
TSeqPos GetLength(void) const
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
static TSeqPos Keep(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static TSeqPos ReverseComplement(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static TSeqPos Append(CSeq_data *out_seq, const CSeq_data &in_seq1, TSeqPos uBeginIdx1, TSeqPos uLength1, const CSeq_data &in_seq2, TSeqPos uBeginIdx2, TSeqPos uLength2)
Template class for iteration on objects of class C (non-medifiable version)
Template class for iteration on objects of class C.
Set of related Variations.
Set of related Variations.
const TLocation & GetLocation(void) const
void SetLocation(TLocation &value)