98 const string& message,
103 e->SetMessage(message);
128 }
else if(
id.IsGeneral() ||
id.IsLocal()) {
162 bool found_in_starts = exon_biostarts.
find(exon_anchor_pos)
163 != exon_biostarts.
end();
165 bool found_in_stops = exon_biostops.
find(exon_anchor_pos)
166 != exon_biostops.
end();
168 return (offset_pos < 0 && found_in_starts)
169 || (offset_pos > 0 && found_in_stops)
170 || (offset_pos == 0 && (found_in_starts || found_in_stops));
176 return !fuzz.
IsLim() ? 0
192 const int start_offset_sign =
203 const int stop_offset_sign =
216 const bool start_ok =
222 start_offset_sign * sign);
230 stop_offset_sign * sign);
232 return start_ok && stop_ok;
251 for(
CFeat_CI ci(bsh, sel); ci; ++ci) {
309 CInt_fuzz& fuzz = (minus_strand == is_start) ? loc.
SetInt().SetFuzz_to()
310 : loc.
SetInt().SetFuzz_from();
360 TSeqPos& bio_start_ref = minus_strand ? loc->
SetInt().SetTo()
361 : loc->
SetInt().SetFrom();
418 "Expected genomic_id in the variation to be the same as in spliced-seg");
424 long closest_start = ss.
GetExons().front()->GetGenomic_start();
427 long closest_stop = ss.
GetExons().front()->GetGenomic_start();
435 closest_start = start;
461 if(start != closest_start || stop != closest_stop) {
462 int_loc->
SetInt().SetFrom(closest_start);
463 int_loc->
SetInt().SetTo(closest_stop);
472 if(start != closest_start) {
473 long offset = (start - closest_start);
481 if(stop != closest_stop) {
482 long offset = (stop - closest_stop);
512 for(
int i = 0;
i < 2;
i++) {
518 if(target_row == -1) {
521 "The alignment has no row for seq-id "
536 bool source_loc_is_projected =
548 "HGVS exon-boundary position not found in alignment of "
572 "Mismatches in mapping",
578 static const long thr = 5000;
581 bool far_start = start_offset +
thr < 0
585 bool far_stop = stop_offset >
thr
589 if(far_start || far_stop) {
591 "Source location overhangs the alignment by at least 5kb ",
596 if(check_placements) {
618 se->SetGenomic_start(ci.GetRange().GetFrom());
619 se->SetGenomic_end(ci.GetRange().GetTo());
620 se->SetProduct_start().SetNucpos(product_pos);
621 se->SetProduct_end().SetNucpos(product_pos + ci.GetRange().GetLength() - 1);
622 product_pos += ci.GetRange().GetLength();
638 result->SetLoc().SetNull();
654 if(
result->GetLoc().IsNull()) {
712 for(
CFeat_CI ci(bsh, sel); ci; ++ci) {
729 aln = SerialClone<CSeq_align>(current_aln);
742 if(!
result->GetLoc().IsNull()) {
752 if(!
result->GetLoc().IsNull()) {
755 result->Assign(*mapped_placement);
761 result->SetLoc().Assign(*loc);
778 if(c !=
'A' && c !=
'C' && c !=
'G' && c !=
'T') {
788 bool had_ambiguities =
false;
792 had_ambiguities =
true;
804 return had_ambiguities;
809 bool invalid_location =
false;
810 bool out_of_order =
false;
813 invalid_location =
true;
827 invalid_location =
true;
833 if(invalid_location) {
835 out_of_order ?
"Invalid location - start and stop are out of order"
836 :
"Invalid location",
850 "Bioseq is suppressed or withdrawn",
855 return invalid_location;
876 "Cannot use Mapper-based method to remap intronic cases;"
877 "must remap via spliced-seg alignment instead.");
889 "Mismatches in mapping",
927 const bool equal_offsets = (
937 const bool merge_single_range =
939 && mapped_loc->
IsPnt()
942 if(mapped_loc->
IsInt()
948 mapped_loc->
SetInt().ResetFuzz_to();
966 loc1->
SetInt().SetTo() += 500;
969 if(tmp_mapped_loc->
GetId()) {
989 if(mapped_len == 0) {
992 }
else if(mapped_len < orig_len) {
995 }
else if(!orig_is_compound && mapped_is_compound) {
1000 exception->SetMessage(
"");
1024 "Source location overhangs the alignment by at least 5kb",
1039 p.
SetSeq().SetLength(length);
1044 "Can't get sequence for an offset-based location",
1047 }
else if(length > max_len) {
1049 "Sequence is longer than the cutoff threshold",
1059 "Ambiguous residues in reference",
1067 "Cannot fetch sequence at location",
1078 bool had_exceptions =
false;
1091 if(!
v2.GetData().IsInstance() || (
v2.GetConsequenceParent() && &v != &
v2)) {
1097 && inst.
GetDelta().front()->IsSetSeq()
1098 && inst.
GetDelta().front()->GetSeq().IsLiteral()) {
1100 if(!asserted_literal
1104 }
else if(!variant_literal
1107 && (!inst.
GetDelta().front()->IsSetMultiplier() && !inst.
GetDelta().front()->IsSetMultiplier_fuzz())
1115 if(variant_literal) {
1119 LOG_POST(
"Did not find variant-literal");
1135 "Asserted sequence is inconsistent with reference",
1137 had_exceptions =
true;
1142 && variant_literal->Equals(p.
GetSeq())) {
1144 "Reference sequence is the same as variant",
1146 had_exceptions =
true;
1153 v.
SetData().SetSet().SetVariations())
1156 had_exceptions = had_exceptions ||
AttachSeq(
v2, max_len);
1159 return !had_exceptions;
1278 first.SetInt().SetTo(start - 1);
1281 if(stop == max_pos) {
1284 second.
SetInt().SetFrom(stop + 1);
1299 if(prot_str.size() != 1) {
1303 static const char* alphabet =
"ACGT";
1304 string codon =
"AAA";
1305 for(
size_t i0 = 0; i0 < 4; i0++) {
1306 codon[0] = alphabet[i0];
1307 for(
size_t i1 = 0; i1 < 4; i1++) {
1308 codon[1] = alphabet[i1];
1309 for(
size_t i2 = 0; i2 < 4; i2++) {
1310 codon[2] = alphabet[i2];
1316 if(
prot == prot_str) {
1327 for(
size_t i = 0;
i <
min(
a.size(),
b.size());
i++) {
1336 const string& codon_from,
1337 const string& prot_to,
1338 vector<string>& codons_to)
1340 vector<string> candidates1;
1341 size_t max_matches(0);
1344 bool have_silent =
false;
1346 ITERATE(vector<string>, it1, candidates1)
1358 if(matches >= max_matches) {
1359 if(matches > max_matches) {
1362 codons_to.push_back(*it1);
1363 max_matches = matches;
1368 if(codons_to.empty() && have_silent) {
1369 codons_to.push_back(codon_from);
1375 string collapsed_seq;
1379 typedef const vector<string> TConstStrs;
1382 const string& seq = *it;
1383 if(seq.size() > bits.size()) {
1384 bits.resize(seq.size());
1387 for(
size_t i = 0;
i < seq.size();
i++) {
1389 int m = (nt ==
'T' ? 1
1392 : nt ==
'A' ? 8 : 0);
1401 static const char* iupac_nuc_ambiguity_codes =
"NTGKCYSBAWRDMHVN";
1402 collapsed_seq.resize(bits.size());
1403 for(
size_t i = 0;
i < collapsed_seq.size();
i++) {
1404 collapsed_seq[
i] = iupac_nuc_ambiguity_codes[bits[
i]];
1406 return collapsed_seq;
1414 v.
SetData().SetSet().SetVariations().clear();
1420 if(
v2->GetData().IsInstance()
1421 &&
v2->GetData().GetInstance().IsSetObservation()
1427 v.
SetData().SetSet().SetVariations().push_back(
v2);
1440 if(!placements || placements->size() == 0) {
1467 if(!prot2precursor_mapper) {
1487 v2->SetPlacements().push_back(p);
1493 if(!nuc_loc->
IsInt()
1497 || !
delta->IsSetSeq()
1498 || !
delta->GetSeq().IsLiteral()
1499 ||
delta->GetSeq().GetLiteral().GetLength() != 1)
1505 v2->SetData().SetUnknown();
1506 v2->SetPlacements().push_back(p);
1513 string original_allele_codon;
1516 string variant_codon;
1520 delta->GetSeq().GetLiteral().GetSeq_data(),
1524 vector<string> variant_codons;
1533 original_allele_codon,
1542 && variant_codon != original_allele_codon) {
1543 while(variant_codon.length() > 0
1544 && original_allele_codon.length() > 0
1545 && variant_codon.at(0) == original_allele_codon.at(0)) {
1546 variant_codon = variant_codon.substr(1);
1547 original_allele_codon = original_allele_codon.substr(1);
1549 nuc_loc->
SetInt().SetTo()--;
1551 nuc_loc->
SetInt().SetFrom()++;
1555 while(variant_codon.length() > 0
1556 && original_allele_codon.length() > 0
1557 && variant_codon.at(variant_codon.length() - 1)
1558 == original_allele_codon.at(original_allele_codon.length() - 1)) {
1559 variant_codon.resize(variant_codon.length() - 1);
1560 original_allele_codon.resize(original_allele_codon.length() - 1);
1564 nuc_loc->
SetInt().SetFrom()++;
1566 nuc_loc->
SetInt().SetTo()--;
1572 delta2->SetSeq().SetLiteral().SetLength(variant_codon.length());
1573 delta2->SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set(variant_codon);
1583 v2->SetPlacements().push_back(p2);
1587 v2->SetData().SetUnknown();
1591 inst2.
SetDelta().push_back(delta2);
1648 if(cached_literal) {
1669 literal->SetSeq_data().SetNcbieaa().Set().push_back(
1677 literal->SetLength(seq.size());
1679 literal->SetSeq_data().SetNcbieaa().Set(seq);
1681 literal->SetSeq_data().SetIupacna().Set(seq);
1699 if(
b.GetLength() == 0) {
1701 }
else if(
a.GetLength() == 0) {
1706 if(
a.IsSetFuzz() ||
b.IsSetFuzz()) {
1710 if(
a.IsSetSeq_data() &&
b.IsSetSeq_data()) {
1712 a.GetSeq_data(), 0,
a.GetLength(),
1713 b.GetSeq_data(), 0,
b.GetLength());
1787 v.
SetData().SetSet().SetVariations())
1801 "Could not find literal for 'this' location in placements");
1805 di->SetSeq().SetLiteral().Assign(*this_literal);
1815 di->SetSeq().SetLiteral().SetLength(0);
1816 di->SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set(
"");
1818 }
else if(inst.
GetDelta().size() > 1) {
1826 di.
SetSeq().SetLiteral().SetLength(0);
1827 di.
SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set(
"");
1840 di.
SetSeq().SetLiteral().Assign(*this_literal);
1851 if(!
literal.IsSetSeq_data() || !
literal.GetSeq_data().IsIupacna()) {
1854 string str_kernel =
literal.GetSeq_data().GetIupacna().Get();
1855 literal.SetSeq_data().SetIupacna().Set(
"");
1857 literal.SetSeq_data().SetIupacna().Set() += str_kernel;
1881 }
else if(this_literal->GetLength() == 0) {
1889 this_literal->GetLength() - 1);
1902 bool ignore_genomic)
1909 v.
SetData().SetSet().SetVariations())
1923 if(!placements || placements->size() == 0) {
1964 consequence->SetVariation(*prot_variation);
1971 static string Translate(
const string& nuc_str,
bool is_mito)
1976 code.SetId(is_mito ? 2 : 1);
1984 if(prot_str.size() * 3 < nuc_str.size()) {
1985 prot_str.push_back(
'X');
1989 size_t stop_pos = prot_str.find(
'*');
1990 if(stop_pos !=
NPOS) {
1991 prot_str.resize(stop_pos + 1);
1998 const string& prot_ref_str,
1999 const string& prot_delta_str)
2013 const string& prot_ref_str,
2014 const string& prot_delta_str)
2017 for(
size_t i = 0;
i < prot_ref_str.size() &&
i < prot_delta_str.size();
i++) {
2018 if(prot_ref_str[
i] == prot_delta_str[
i]) {
2020 }
else if(prot_ref_str[
i] ==
'*') {
2022 }
else if(prot_delta_str[
i] ==
'*') {
2033 const string& prot_ref_str,
2034 const string& prot_variant_str)
2038 bool stop_gain =
false;
2039 bool stop_loss =
false;
2040 for(
size_t i = 0;
i <
max(prot_ref_str.size(), prot_variant_str.size());
i++) {
2041 char r =
i >= prot_ref_str.size() ?
'-' : prot_ref_str[
i];
2042 char v =
i >= prot_variant_str.size() ?
'-' : prot_variant_str[
i];
2044 if(
r ==
'*' && v !=
'*') {
2048 if(
r !=
'*' && v ==
'*') {
2059 if(nuc_delta_len == 0) {
2060 if(!stop_gain && !stop_loss) {
2064 }
else if(nuc_delta_len % 3 == 0) {
2076 vp.
SetLoc().FlipStrand();
2079 vp.
SetSeq().SetSeq_data(),
2080 &vp.
SetSeq().SetSeq_data(),
2088 if(
tmp->IsSetStart_offset()) {
2094 if(
tmp->IsSetStop_offset()) {
2100 if(
tmp->IsSetStart_offset_fuzz()) {
2106 if(
tmp->IsSetStop_offset_fuzz()) {
2151 v.
SetData().SetSet().SetVariations())
2172 di.
SetSeq().SetLoc().FlipStrand();
2175 di.
SetSeq().GetLiteral().GetSeq_data(),
2176 &di.
SetSeq().SetLiteral().SetSeq_data(),
2214 sub_loc->
Assign(*range_loc);
2217 if(!suffix_loc->
Which()) {
2221 sub_loc->
Assign(*range_loc);
2224 if(!prefix_loc->
Which()) {
2229 swap(prefix_loc, suffix_loc);
2247 if(!
delta.IsSetSeq() || !
delta.GetSeq().IsLiteral()) {
2253 delta.SetSeq().SetLiteral(*tmp_literal2);
2277 p->
SetLoc().SetWhole().Assign(
id);
2280 v->SetData().SetUnknown();
2281 v->SetPlacements().push_back(p);
2289 bool is_frameshifting,
2306 prot_loc = nuc2prot_mapper->
Map(nuc_p.
GetLoc());
2307 codons_loc = prot2nuc_mapper->
Map(*prot_loc);
2311 if(codons_loc->
IsNull()) {
2327 v->SetData().SetUnknown();
2330 prot_p->
SetLoc(*prot_loc);
2333 "Cannot infer consequence; projecting location only",
2335 v->SetPlacements().push_back(prot_p);
2338 codons_p->
SetLoc(*codons_loc);
2340 v->SetPlacements().push_back(codons_p);
2347 if(is_frameshifting) {
2358 while(
i <
a.size() &&
i <
b.size() &&
a[
i] ==
b[
i]) {
2367 while(
i <
a.size() &&
i <
b.size() &&
a[
a.size() - 1 -
i] ==
b[
b.size() - 1 -
i]) {
2406 v->SetData().SetInstance().Assign(nuc_inst);
2407 v->ResetPlacements();
2412 v->SetPlacements().push_back(p);
2428 const CDelta_item& nuc_delta = *v->GetData().GetInstance().GetDelta().front();
2461 prot_loc = nuc2prot_mapper->
Map(p->
GetLoc());
2462 codons_loc = prot2nuc_mapper->
Map(*prot_loc);
2479 int frameshift_phase = nuc_delta_len % 3;
2480 if(frameshift_phase < 0) {
2481 frameshift_phase += 3;
2488 frameshift_phase != 0,
2493 string downstream_cds_suffix_seq_str;
2509 downstream_cds_loc = ext_cds_loc->
Intersect(
2517 if(
literal->GetLength() > 0) {
2518 downstream_cds_suffix_seq_str =
literal->GetSeq_data().GetIupacna().Get();
2536 if(!v->GetPlacements().front()->GetSeq().IsSetSeq_data()) {
2540 frameshift_phase != 0,
2546 string nuc_ref_prefix = v->GetPlacements().front()->GetSeq().GetSeq_data().GetIupacna().Get();
2548 const CSeq_literal& nuc_var_literal = v->GetData().GetInstance().GetDelta().front()->GetSeq().GetLiteral();
2551 string nuc_ref_str = nuc_ref_prefix + downstream_cds_suffix_seq_str;
2552 string nuc_var_str = nuc_var_prefix + downstream_cds_suffix_seq_str;
2556 int num_ref_codons = (nuc_ref_prefix.size() + 2) / 3;
2557 int num_var_codons = (nuc_var_prefix.size() + 2) / 3;
2561 <<
"nuc_var_str: " << nuc_var_str <<
"\n";
2565 <<
"prot_var_str: " << prot_var_str <<
"\n";
2568 int common_prot_prefix_len(0);
2571 if(prot_ref_str == prot_var_str) {
2574 prot_ref_str.resize(
min(
static_cast<int>(prot_ref_str.size()), num_ref_codons));
2575 prot_var_str.resize(prot_ref_str.size());
2577 if(prot_ref_str.size() > 0 && *prot_ref_str.rbegin() ==
'*') {
2579 frameshift_phase = 0;
2593 if(common_prot_prefix_len > 0
2594 && common_prot_prefix_len ==
static_cast<int>(prot_ref_str.size())) {
2595 common_prot_prefix_len -= 1;
2599 prot_ref_str = prot_ref_str.substr(common_prot_prefix_len);
2600 prot_var_str = prot_var_str.substr(common_prot_prefix_len);
2602 if(
verbose)
NcbiCerr <<
"prot_ref_str: " << prot_ref_str <<
":" << prot_ref_str.size() <<
"\n"
2603 <<
"prot_var_str: " << prot_var_str <<
":" << prot_var_str.size() <<
"\n";
2605 if(frameshift_phase == 0) {
2608 size_t min_len =
min(prot_ref_str.size(), prot_var_str.size());
2609 size_t ref_stop_pos = prot_ref_str.find(
'*');
2610 size_t var_stop_pos = prot_var_str.find(
'*');
2611 size_t min_stop_pos =
min(ref_stop_pos, var_stop_pos);
2614 bool truncate_at_stop = min_stop_pos < min_len
2615 && ref_stop_pos != var_stop_pos
2616 && nuc_delta_len == 0;
2618 if(truncate_at_stop) {
2619 prot_ref_str.resize(min_stop_pos + 1);
2620 prot_var_str.resize(min_stop_pos + 1);
2622 prot_ref_str.resize(prot_ref_str.size() - suffix_len);
2623 prot_var_str.resize(prot_var_str.size() - suffix_len);
2628 prot_ref_str.resize(
min(
static_cast<size_t>(1), prot_ref_str.size()));
2629 prot_var_str.resize(
min(
static_cast<size_t>(1), prot_var_str.size()));
2634 if(prot_ref_str.size() == 0) {
2637 prot_loc->
SetInt().SetFrom() += common_prot_prefix_len - 1;
2638 prot_loc->
SetInt().SetTo(prot_loc->
SetInt().SetFrom() + 1);
2641 prot_loc->
SetInt().SetFrom() += common_prot_prefix_len;
2642 prot_loc->
SetInt().SetTo(prot_loc->
SetInt().SetFrom() + prot_ref_str.size() - 1);
2646 codons_loc = prot2nuc_mapper->
Map(*prot_loc);
2648 if(codons_loc->
IsNull()) {
2653 frameshift_phase != 0,
2660 if(
verbose)
NcbiCerr <<
"prot_ref_str: " << prot_ref_str <<
":" << prot_ref_str.size() <<
"\n"
2661 <<
"prot_var_str: " << prot_var_str <<
":" << prot_var_str.size() <<
"\n";
2666 <<
"; variant codons: " << num_var_codons
2667 <<
"; common prefix: " << common_prot_prefix_len <<
"\n";
2683 prot_p->
SetSeq().SetLength(prot_ref_str.size());
2684 prot_p->
SetSeq().SetSeq_data().SetNcbieaa().Set(prot_ref_str);
2686 prot_p->
SetLoc(*prot_loc);
2690 prot_v->SetPlacements().push_back(prot_p);
2694 codons_p->
SetLoc(*codons_loc);
2698 prot_v->SetPlacements().push_back(codons_p);
2703 if(frameshift_phase == 0 && prot_ref_str.size() == prot_var_str.size()) {
2707 prot_v->SetVariant_prop().SetEffect(prop);
2714 copy(so_terms.begin(), so_terms.end(), back_inserter(prot_v->SetSo_terms()));
2718 prot_v->SetData().SetInstance().SetType(
CalcInstTypeForAA(prot_ref_str, prot_var_str));
2722 prot_v->SetData().SetInstance().SetDelta().push_back(di);
2724 if(prot_var_str.size() > 0) {
2732 if(
false && common_prot_prefix_len == 0) {
2733 di->SetSeq().Assign(v->GetData().GetInstance().GetDelta().front()->GetSeq());
2737 <<
"inst-type: " << prot_v->GetData().GetInstance().GetType()
2738 <<
"; nuc_var_len: " << nuc_var_str.size()
2739 <<
"; nuc_var_str: " << nuc_var_str
2740 <<
"; prefix_len: " << common_prot_prefix_len * 3
2741 <<
"; var_codons:" << prot_var_str.size() * 3 <<
"\n";
2745 string adjusted_codons_str = nuc_var_str.substr(
2746 min<int>(nuc_var_str.size(), common_prot_prefix_len * 3),
2747 prot_var_str.size() * 3);
2749 if(adjusted_codons_str.size() > 0) {
2750 di->SetSeq().SetLiteral().SetLength(adjusted_codons_str.size());
2751 di->SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set() = adjusted_codons_str;
2753 di->SetSeq().SetThis();
2758 if(prot_ref_str.size() == 0) {
2762 di->SetSeq().SetThis();
2767 if(frameshift_phase != 0) {
2770 prot_v->SetVariant_prop().SetEffect(
2772 | (prot_v->IsSetVariant_prop()
2773 && prot_v->GetVariant_prop().IsSetEffect()
2774 ? prot_v->GetVariant_prop().GetEffect() : 0));
2776 prot_v->SetFrameshift().SetPhase(frameshift_phase);
2902 if(parent ==
NULL) {
2929 if(p1.size() != p2.size()) {
2932 CVariation::TPlacements::const_iterator it1 = p1.begin();
2933 CVariation::TPlacements::const_iterator it2 = p2.begin();
2935 for(; it1 != p1.end() && it2 != p2.end(); ++it1, ++it2) {
2953 v.
SetData().SetSet().SetVariations())
2963 v.
SetData().SetSet().SetVariations())
2966 if(!
v2.IsSetPlacements()) {
2970 p1 = &
v2.SetPlacements();
2973 if(!
Equals(*p1,
v2.GetPlacements())) {
2989 v.
SetData().SetSet().SetVariations())
2992 v2.ResetPlacements();
3006 const CVariation::TConsequence::value_type::TObjectType& cons = **it;
3007 if(cons.IsVariation()
3008 && cons.GetVariation().IsSetPlacements()) {
3015 cons_v.
Reset(&cons.GetVariation());
3043 const CDbtag& dbtag = **it;
3044 if(dbtag.
GetDb() ==
"GeneID"
3052 dbtag->
SetDb(
"GeneID");
3053 dbtag->
SetTag().SetId(gene_id);
3099 bool is_completely_intronic =
false;
3109 && (is_start_offset || is_stop_offset);
3118 is_completely_intronic = is_case1 || is_case2;
3127 for(
size_t i = 0;
i < 3;
i++) {
3140 int gene_id = it->first;
3143 if(loc_prop &
flags[
i]) {
3151 if(!is_completely_intronic) {
3172 genomic_query_loc = mapper->
Map(query_loc);
3174 genomic_query_loc.
Reset(&query_loc);
3207 TIdRangeMap loc_map;
3212 loc_map[ci.GetSeq_id_Handle()][ci.GetRange()] = term;
3219 if(!rna_loc && !cds_loc) {
3223 const CSeq_loc& main_loc = rna_loc ? *rna_loc : *cds_loc;
3243 *ci.GetRangeAsSeq_loc());
3268 const SPropsMap::TRangeMap& rm = props_map.loc_map[ci.GetSeq_id_Handle()];
3269 for(SPropsMap::TRangeMap::const_iterator it2 = rm.begin(ci.GetRange()); it2.Valid(); ++it2) {
3270 terms_set.
insert(it2->second);
3273 copy(terms_set.
begin(), terms_set.
end(), back_inserter(terms));
3294 for(
CFeat_CI ci(bsh, sel); ci; ++ci) {
3303 return last_exon_pos ? last_exon_pos + 1
3304 : last_polyA_pos ? last_polyA_pos + 1
3341 if(offset < 0 && offset >= -2) {
3362 if(!
v2.IsSetPlacements()) {
3367 if(!
v2.SetVariant_prop().IsSetGene_location()) {
3368 v2.SetVariant_prop().SetGene_location(0);
3376 if(
v2.GetConsequenceParent()) {
3423 int gene_id = gene_id_and_prop.first;
3426 if(m.find(gene_id) == m.end()) {
3429 m[gene_id] |= properties;
3447 m_loc2prop[ci.GetSeq_id_Handle()][ci.GetRange()].push_back(
TGeneIDAndProp(gene_id, prop));
3463 m_rangemap[ci.GetSeq_id_Handle()][ci.GetRange()] = ci.GetRangeAsSeq_loc();
3476 if(it2 == m_rangemap.end()) {
3505 if(std::find(k.begin(), k.end(),
"RefSeqGene") != k.end()) {
3535 for(
CFeat_CI ci(bsh, sel); ci; ++ci) {
3544 if(transcript_seq_ids.
find(product_id) == transcript_seq_ids.
end()) {
3553 const CDbtag& dbtag = **it;
3554 if(dbtag.
GetDb() ==
"GeneID"
3555 || dbtag.
GetDb() ==
"LocusID") {
3580 feature::CFeatTree ft(ci);
3582 for(ci.
Rewind(); ci; ++ci) {
3590 return s_GetGeneID(mf, ft);
3608 feature::CFeatTree ft(ci);
3610 m_loc2prop[idh].size();
3625 for(ci.
Rewind(); ci; ++ci) {
3631 const int gene_id = s_GetGeneID(mf, ft);
3632 const bool is_focus_locus = focus_loci.
empty()
3633 || focus_loci.count(gene_id);
3635 (is_focus_locus ? focus_gene_ranges
3636 : non_focus_gene_ranges)
3637 ->SetMix().Set().push_back(
3649 bool found_some_gene_ids =
false;
3651 for(ci.
Rewind(); ci; ++ci) {
3659 const int gene_id = s_GetGeneID(mf, ft);
3660 if(!focus_loci.
empty()
3661 && focus_loci.
find(gene_id) == focus_loci.
end()) {
3665 if(!parent_mf && gene_id) {
3673 found_some_gene_ids =
true;
3695 p.first->ResetStrand();
3696 p.second->ResetStrand();
3717 subtract_gene_ranges_from(*p.first);
3718 subtract_gene_ranges_from(*p.second);
3724 all_gene_neighborhoods->
SetMix().Set().push_back(p.first);
3725 all_gene_neighborhoods->
SetMix().Set().push_back(p.second);
3747 x_Add(*ci.GetRangeAsSeq_loc(),
3792 if(ft.GetChildren(mf).size() == 0) {
3812 genes_and_neighborhoods_loc =
3814 *genes_and_neighborhoods_loc,
3815 *non_focus_gene_ranges,
3823 *genes_and_neighborhoods_loc,
3827 x_Add(*intergenic_loc,
3833 && !found_some_gene_ids) {
3843 int gene_id = s_GetGeneIdForProduct(bsh);
3845 x_Add(*whole_range_loc, gene_id, 0);
3852 feature::CFeatTree& ft)
3861 const CDbtag& dbtag = **it;
3862 if(dbtag.
GetDb() ==
"GeneID"
3863 || dbtag.
GetDb() ==
"LocusID") {
3876 const CDbtag& dbtag = **it;
3877 if(dbtag.
GetDb() ==
"GeneID"
3878 || dbtag.
GetDb() ==
"LocusID") {
3886 return parent ? s_GetGeneID(parent, ft) : gene_id;
3899 p.second->Assign(*p.first);
3900 p.first->SetInt().SetTo(p.first->GetInt().GetFrom() + 2);
3901 p.second->SetInt().SetFrom(p.second->GetInt().GetTo() - 2);
3904 swap(p.first, p.second);
3911 p.second->SetNull();
3923 sub_loc2->
Assign(*sub_loc1);
3932 swap(p.first, p.second);
3940 TSeqPos flank1_len(2000), flank2_len(500);
3942 swap(flank1_len, flank2_len);
3948 p.second->Assign(*p.first);
3950 if(p.first->GetTotalRange().GetFrom() == 0) {
3953 p.first->SetInt().SetTo(p.first->GetTotalRange().GetFrom() - 1);
3954 p.first->SetInt().SetFrom(p.first->GetTotalRange().GetFrom() < flank1_len ? 0 : p.first->GetTotalRange().GetFrom() - flank1_len);
3957 if(p.second->GetTotalRange().GetTo() == max_pos) {
3958 p.second->SetNull();
3960 p.second->SetInt().SetFrom(p.second->GetTotalRange().GetTo() + 1);
3961 p.second->SetInt().SetTo(p.second->GetTotalRange().GetTo() > max_pos ? max_pos : p.second->GetTotalRange().GetTo() + flank2_len);
3965 swap(p.first, p.second);
3978 introns_loc_without_splice_sites->
Assign(*introns_loc_with_splice_sites);
3985 seqint.
SetTo() -= 2;
3990 p.first = introns_loc_without_splice_sites;
3992 *introns_loc_without_splice_sites,
4013 m_seq_data_map[idh].mapper.
Reset();
4017 for(
CFeat_CI ci(bsh, sel); ci; ++ci) {
4031 m_data[ci.GetSeq_id_Handle()][ci.GetRange()].push_back(s);
4046 x_CacheSeqData(*all_rna_loc, idh);
4053 SSeqData& d = m_seq_data_map[idh2];
4082 target_loc->
SetInt().SetId().SetLocal().SetStr(
"all_cds");
4083 target_loc->
SetInt().SetFrom(0);
4093 literal->SetSeq_data().SetIupacna().Set(
"");
4101 if(m_seq_data_map.find(ci.GetSeq_id_Handle()) == m_seq_data_map.end()) {
4104 const SSeqData& d = m_seq_data_map.find(ci.GetSeq_id_Handle())->second;
4113 if((!mapped_loc->
IsInt() && !mapped_loc->
IsPnt())
4121 literal->SetSeq_data().SetIupacna().Set() += seq_chunk;
4137 if(m_data.find(idh) == m_data.end()) {
4143 if(it == m_data.end()) {
4157 cdregions.push_back(*it);
4175 if(!v->IsSetId() && parent.
IsSetId()) {
4176 v->SetId().Assign(parent.
GetId());
4210 feat->
SetData().SetVariation(*vr);
4211 feats.push_back(feat);
4239 out_feats.insert(out_feats.end(), feats.begin(), feats.end());
4247 vr->SetId().Assign(v.
GetId());
4282 vr->SetPhenotype().push_back(p);
4297 new CVariation_ref::TConsequence::value_type::TObjectType);
4298 vr->SetConsequence().push_back(fr_cons);
4299 fr_cons->SetFrameshift();
4309 vr->SetData().SetComplex();
4316 vr->SetData().SetUniparental_disomy();
4318 vr->SetData().SetUnknown();
4335 vr->SetConsequence();
4338 const CVariation::TConsequence::value_type::TObjectType& v_cons = **it;
4340 new CVariation_ref::TConsequence::value_type::TObjectType);
4341 vr->SetConsequence().push_back(vr_cons);
4342 vr_cons->SetUnknown();
4344 if(v_cons.IsSplicing()) {
4345 vr_cons->SetSplicing();
4346 }
else if(v_cons.IsNote()) {
4347 vr_cons->SetNote(v_cons.GetNote());
4348 }
else if(v_cons.IsVariation()) {
4350 vr_cons->SetVariation(*cons_variation);
4351 }
else if(v_cons.IsLoss_of_heterozygosity()) {
4352 vr_cons->SetLoss_of_heterozygosity();
4353 if(v_cons.GetLoss_of_heterozygosity().IsSetReference()) {
4354 vr_cons->SetLoss_of_heterozygosity().SetReference(
4355 v_cons.GetLoss_of_heterozygosity().GetReference());
4357 if(v_cons.GetLoss_of_heterozygosity().IsSetTest()) {
4358 vr_cons->SetLoss_of_heterozygosity().SetTest(
4359 v_cons.GetLoss_of_heterozygosity().GetTest());
4366 vr->SetSomatic_origin();
4369 const CVariation::TSomatic_origin::value_type::TObjectType& v_so = **it;
4372 new CVariation_ref::TSomatic_origin::value_type::TObjectType);
4374 if(v_so.IsSetSource()) {
4375 vr_so->SetSource().Assign(v_so.GetSource());
4378 if(v_so.IsSetCondition()) {
4379 vr_so->SetCondition();
4380 if(v_so.GetCondition().IsSetDescription()) {
4381 vr_so->SetCondition().SetDescription(
4382 v_so.GetCondition().GetDescription());
4384 if(v_so.GetCondition().IsSetObject_id()) {
4385 vr_so->SetCondition().SetObject_id();
4386 ITERATE(CVariation::TSomatic_origin::value_type::TObjectType::TCondition::TObject_id,
4388 v_so.GetCondition().GetObject_id())
4392 vr_so->SetCondition().SetObject_id().push_back(dbtag);
4397 vr->SetSomatic_origin().push_back(vr_so);
4411 delta->SetMultiplier(-1);
4414 delta->SetSeq().SetLiteral().SetFuzz().Assign(*fuzz);
4451 v->SetPlacements().push_back(p);
4455 v->SetPub().Assign(variation_feat.
GetCit());
4461 v->SetExt().push_back(uo);
4469 v->SetExt().push_back(uo);
4481 v->SetId().Assign(vr.
GetId());
4516 v->SetPhenotype().push_back(p);
4521 v->SetMethod().SetMethod() = vr.
GetMethod();
4529 v->SetData().SetComplex();
4535 v->SetData().SetUniparental_disomy();
4537 v->SetData().SetUnknown();
4554 v->SetConsequence();
4557 const CVariation_ref::TConsequence::value_type::TObjectType& vr_cons = **it;
4559 if(vr_cons.IsFrameshift()) {
4564 if(vr_cons.GetFrameshift().IsSetPhase()) {
4565 cons_variation.
SetFrameshift().SetPhase(vr_cons.GetFrameshift().GetPhase());
4567 if(vr_cons.GetFrameshift().IsSetX_length()) {
4568 cons_variation.
SetFrameshift().SetX_length(vr_cons.GetFrameshift().GetX_length());
4575 if(vr_cons.IsUnknown()) {
4576 v_cons->SetUnknown();
4577 }
else if(vr_cons.IsSplicing()) {
4578 v_cons->SetSplicing();
4579 }
else if(vr_cons.IsNote()) {
4580 v_cons->SetNote(vr_cons.GetNote());
4581 }
else if(vr_cons.IsVariation()) {
4583 v_cons->SetVariation(*cons_variation);
4584 }
else if(vr_cons.IsLoss_of_heterozygosity()) {
4585 v_cons->SetLoss_of_heterozygosity();
4586 if(vr_cons.GetLoss_of_heterozygosity().IsSetReference()) {
4587 v_cons->SetLoss_of_heterozygosity().SetReference(vr_cons.GetLoss_of_heterozygosity().GetReference());
4589 if(vr_cons.GetLoss_of_heterozygosity().IsSetTest()) {
4590 v_cons->SetLoss_of_heterozygosity().SetTest(vr_cons.GetLoss_of_heterozygosity().GetTest());
4594 v->SetConsequence().push_back(v_cons);
4596 if(v->GetConsequence().empty()) {
4597 v->ResetConsequence();
4602 v->SetSomatic_origin();
4605 const CVariation_ref::TSomatic_origin::value_type::TObjectType& vr_so = **it;
4608 if(vr_so.IsSetSource()) {
4609 v_so->SetSource().Assign(vr_so.GetSource());
4612 if(vr_so.IsSetCondition()) {
4613 v_so->SetCondition();
4614 if(vr_so.GetCondition().IsSetDescription()) {
4615 v_so->SetCondition().SetDescription(vr_so.GetCondition().GetDescription());
4617 if(vr_so.GetCondition().IsSetObject_id()) {
4618 v_so->SetCondition().SetObject_id();
4619 ITERATE(CVariation_ref::TSomatic_origin::value_type::TObjectType::TCondition::TObject_id,
4621 vr_so.GetCondition().GetObject_id())
4625 v_so->SetCondition().SetObject_id().push_back(dbtag);
4630 v->SetSomatic_origin().push_back(v_so);
4640 return delta.GetSeq().GetLiteral().GetLength()
4641 * (
delta.IsSetMultiplier() ?
delta.GetMultiplier() : 1);
4646 return delta.GetSeq().GetLiteral().IsSetFuzz() ?
4647 &
delta.GetSeq().GetLiteral().GetFuzz() :
NULL;
4655 v.
SetData().SetSet().SetVariations())
4663 if(delta_first->IsSetAction()
4671 v.
SetData().SetInstance().SetDelta().pop_front();
4674 if(delta_last != delta_first
4675 && delta_last->IsSetAction()
4683 v.
SetData().SetInstance().SetDelta().pop_back();
4693 string* asserted_out,
4706 s_PropagateLocsInPlace(vr);
4709 bool have_asserted_seq =
false;
4717 string asserted_seq;
4719 if(
literal.GetSeq_data().IsIupacna()) {
4720 asserted_seq =
literal.GetSeq_data().GetIupacna();
4721 have_asserted_seq =
true;
4722 }
else if(
literal.GetSeq_data().IsNcbieaa()) {
4723 asserted_seq =
literal.GetSeq_data().GetNcbieaa();
4724 have_asserted_seq =
true;
4735 v.GetSeqData(v.begin(), v.end(), actual_seq);
4742 *asserted_out = asserted_seq;
4745 *actual_out = actual_seq;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
bool SameOrientation(ENa_strand a, ENa_strand b)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
ESubtype GetSubtype(void) const
TSeqPos GetSeqStop(TDim row) const
CRef< CSeq_loc > CreateRowSeq_loc(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
namespace ncbi::objects::
TSeqPos GetLength(void) const
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
static TSeqPos Keep(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static TSeqPos ReverseComplement(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static TSeqPos Append(CSeq_data *out_seq, const CSeq_data &in_seq1, TSeqPos uBeginIdx1, TSeqPos uLength1, const CSeq_data &in_seq2, TSeqPos uBeginIdx2, TSeqPos uLength2)
Template class for iteration on objects of class C (non-medifiable version)
Template class for iteration on objects of class C.
Set of related Variations.
Set of related Variations.
const TLocation & GetLocation(void) const
void SetLocation(TLocation &value)
bool IsSetLocation(void) const
NOTE: THESE ARE GOING AWAY SOON!!
const CVariation * GetParent() const
container_type::const_iterator const_iterator
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator find(const key_type &key) const
const_iterator end() const
void Get(const CSeq_loc &loc, TCdregions &cdregions)
vector< SCdregion > TCdregions
void x_Index(const CSeq_id_Handle &idh)
CRef< CSeq_literal > GetCachedLiteralAtLoc(const CSeq_loc &loc)
void x_CacheSeqData(const CSeq_loc &loc, const CSeq_id_Handle &idh)
static TLocsPair s_GetIntronsAndSpliceSiteLocs(const CSeq_loc &rna_loc)
pair< CRef< CSeq_loc >, CRef< CSeq_loc > > TLocsPair
static int s_GetGeneIdForProduct(CBioseq_Handle bsh)
void GetLocationProperties(const CSeq_loc &loc, TGeneIDAndPropVector &v)
pair< int, CVariantProperties::TGene_location > TGeneIDAndProp
static TLocsPair s_GetNeighborhoodLocs(const CSeq_loc &gene_loc, TSeqPos max_pos)
static TLocsPair s_GetUTRLocs(const CSeq_loc &cds_loc, const CSeq_loc &parent_loc)
static TLocsPair s_GetStartAndStopCodonsLocs(const CSeq_loc &cds_loc)
static int s_GetGeneID(const CMappedFeat &mf, feature::CFeatTree &ft)
void x_Add(const CSeq_loc &loc, int gene_id, CVariantProperties::TGene_location prop)
void x_Index(const CSeq_id_Handle &idh)
vector< TGeneIDAndProp > TGeneIDAndPropVector
CRef< CVariantPlacement > RemapToAnnotatedTarget(const CVariation &v, const CSeq_id &target)
Remap variation from product coordinates onto a nucleotide sequence on which this product is annotate...
static void s_AddInstOffsetsFromPlacementOffsets(CVariation_inst &vi, const CVariantPlacement &p)
static const CVariation::TPlacements * s_GetPlacements(const CVariation &v)
@ fAA2NA_truncate_common_prefix_and_suffix
void FindLocationProperties(const CSeq_align &transcript_aln, const CSeq_loc &query_loc, TSOTerms &terms)
Find location properties based on alignment.
CRef< CSeq_literal > GetLiteralAtLoc(const CSeq_loc &loc)
CRef< CVariation > x_AsVariation(const CVariation_ref &vr)
SFlankLocs CreateFlankLocs(const CSeq_loc &loc, TSeqPos len)
vector< ESOTerm > TSOTerms
CRef< CVariation > x_CreateUnknownVariation(const CSeq_id &id, CVariantPlacement::TMol mol)
bool CheckAmbiguitiesInLiterals(CVariation &v)
if variation.data contains a seq-literal with non-ACGT residues, attach VariationException to the fir...
static size_t s_CountMatches(const string &a, const string &b)
void x_AdjustDelinsToInterval(CVariation &v, const CSeq_loc &loc)
ETestStatus CheckExonBoundary(const CVariantPlacement &p, const CSeq_align &aln)
static void s_FactorOutPlacements(CVariation &v)
If at any level in variation-set all variations have all same placements, move them to the parent lev...
static CConstRef< CVariation > s_FindConsequenceForPlacement(const CVariation &v, const CVariantPlacement &p)
Find attached consequence variation in v that corresponds to p (has same seq-id).
CRef< CVariation > TranslateNAtoAA(const CVariation_inst &nuc_inst, const CVariantPlacement &p, const CSeq_feat &cds_feat)
Evaluate protein effect of a single-inst @ single-placement.
static string AsString(ESOTerm term)
static CRef< CSeq_literal > s_SpliceLiterals(const CSeq_literal &payload, const CSeq_literal &ref, TSeqPos pos)
insert seq-literal payload into ref before pos (pos=0 -> prepend; pos=ref.len -> append)
void x_SetVariantPropertiesForIntronic(CVariantPlacement &p, int offset, const CSeq_loc &loc, CBioseq_Handle &bsh)
bool AttachSeq(CVariantPlacement &p, TSeqPos max_len=kMaxAttachSeqLen)
If have offsets (intronic) or too long, return false; else set seq field on the placement and return ...
CRef< CVariation > AsVariation(const CSeq_feat &variation_ref)
@ fOpt_cache_exon_sequence
Use when there will be many calls to calculate protein consequnece per sequence.
bool CheckPlacement(CVariantPlacement &p)
if placement is invalid SeqLocCheck fails, or offsets out of order, attach VariationException and ret...
static void s_AddIntronicOffsets(CVariantPlacement &p, const CSpliced_seg &ss, CScope *scope)
CVariantPlacement::TMol GetMolType(const CSeq_id &id)
void x_InferNAfromAA(CVariation &v, TAA2NAFlags flags)
static string s_CollapseAmbiguities(const vector< string > &seqs)
static void s_FindLocationProperties(CConstRef< CSeq_loc > rna_loc, CConstRef< CSeq_loc > cds_loc, const CSeq_loc &query_loc, TSOTerms &terms)
void ChangeToDelins(CVariation &v)
static void s_ResolveIntronicOffsets(CVariantPlacement &p)
static void s_UntranslateProt(const string &prot_str, vector< string > &codons)
void AsSOTerms(const CVariantProperties &p, TSOTerms &terms)
static void s_AttachGeneIDdbxref(CVariantPlacement &p, int gene_id)
static const CConstRef< CSeq_literal > s_FindFirstLiteral(const CVariation &v)
CRef< CSeq_literal > x_GetLiteralAtLoc(const CSeq_loc &loc)
CRef< CVariation > InferNAfromAA(const CVariation &prot_variation, TAA2NAFlags flags=fAA2NA_default)
void FlipStrand(CVariation &v) const
Other utility methods:
void SetPlacementProperties(CVariantPlacement &placement)
Methods to compute properties.
CVariantPropertiesIndex m_variant_properties_index
CRef< CVariation_ref > x_AsVariation_ref(const CVariation &v, const CVariantPlacement &p)
CRef< CVariantPlacement > Remap(const CVariantPlacement &p, const CSeq_align &aln, bool check_placements=true)
Methods to remap a VariantPlacement.
static bool s_IsInstStrandFlippable(const CVariation &v, const CVariation_inst &inst)
CCdregionIndex m_cdregion_index
static TSeqPos s_GetLength(const CVariantPlacement &p, CScope *scope)
ESOTerm
Supported SO-terms.
@ eSO_splice_acceptor_variant
@ eSO_nc_transcript_variant
@ eSO_initiator_codon_variant
@ eSO_coding_sequence_variant
@ eSO_2KB_upstream_variant
@ eSO_splice_donor_variant
@ eSO_3_prime_UTR_variant
@ eSO_5_prime_UTR_variant
@ eSO_500B_downstream_variant
@ eSO_terminator_codon_variant
TSeqPos GetEffectiveTranscriptLength(const CBioseq_Handle &bsh)
Length up to last position of the last exon (i.e.
void AttachProteinConsequences(CVariation &nuc_variation, const CSeq_id *=NULL, bool ignore_genomic=false)
Find the CDSes for the first placement; Compute prot consequence using TranslateNAtoAA for each and a...
CConstRef< CSeq_literal > x_FindOrCreateLiteral(const CVariation &v)
static void s_ConvertInstOffsetsToPlacementOffsets(CVariation &v, CVariantPlacement &p)
static const CConstRef< CSeq_literal > s_FindAssertedLiteral(const CVariation &v)
void AsVariation_feats(const CVariation &v, CSeq_annot::TData::TFtable &feats)
void SetVariantProperties(CVariation &v)
static CRef< CSeq_literal > s_CatLiterals(const CSeq_literal &a, const CSeq_literal &b)
join two seq-literals
int TAA2NAFlags
Methods to convert between nucleotide and protein.
void s_CalcPrecursorVariationCodon(const string &codon_from, const string &prot_to, vector< string > &codons_to)
CRef< CVariantPlacement > x_Remap(const CVariantPlacement &p, CSeq_loc_Mapper &mapper)
Note: this is strand-agnostic.
CRangeMap< CConstRef< CSeq_loc >, TSeqPos > TRangeMap
map< CSeq_id_Handle, TRangeMap > TIdRangeMap
SFastLocSubtract(const CSeq_loc &loc)
void operator()(CSeq_loc &container_loc) const
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
const TResidue codons[4][4]
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_USER_THROW(message)
Throw a quick-and-dirty runtime exception of type 'CException' with the given error message and error...
#define NCBI_USER_THROW_FMT(message)
Throw a "user exception" with message processed as output to ostream.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define NCBI_RETHROW_SAME(prev_exception, message)
Generic macro to re-throw the same exception.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
C * SerialClone(const C &src)
Create on heap a clone of the source object.
#define MSerial_AsnText
I/O stream manipulators –.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
const string AsFastaString(void) const
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
CSeq_id::EAccessionInfo IdentifyAccession(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
@ eAcc_refseq_contig_ncbo
@ eAcc_refseq_mrna_predicted
@ eAcc_refseq_prot_predicted
@ eAcc_refseq_ncrna_predicted
@ eAcc_refseq_wgs_intermed
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
bool IsTruncatedStop(ESeqLocExtremes ext) const
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
CRef< CSeq_loc > Subtract(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper, ILengthGetter *len_getter) const
Subtract seq-loc from this, merge/sort resulting ranges depending on flags.
bool IsTruncatedStart(ESeqLocExtremes ext) const
check if parts of the seq-loc are missing
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
void ResetStrand(void)
Reset the strand on this location.
CRef< CSeq_loc > Intersect(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper) const
Find the intersection with the seq-loc, merge/sort resulting ranges depending on flags.
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
void SetNull(void)
Override all setters to incorporate cache invalidation.
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
@ eOrder_Biological
Iterate sub-locations in positional order.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CRef< CSeq_loc > Seq_loc_Subtract(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Subtract the second seq-loc from the first one.
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
CRef< CSeq_loc > Seq_loc_Add(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Add two seq-locs.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
ESeqLocCheck SeqLocCheck(const CSeq_loc &loc, CScope *scope)
Checks that a CSeq_loc is all on one strand on one CBioseq.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eContains
First CSeq_loc contains second.
const CMolInfo * GetMolInfo(const CBioseq &bioseq)
Retrieve the MolInfo object for a given bioseq handle.
const CBioseq * GetNucleotideParent(const CBioseq &product, CScope *scope)
Get the encoding nucleotide sequnce of a protein.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
@ fIs5PrimePartial
= 0x4 Translate first codon even if not start codon (because sequence is 5' partial)
@ eGetId_ForceAcc
return only an accession based seq-id
@ eGetId_ForceGi
return only a gi-based seq-id
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeq_loc_Mapper_Base & SetMergeAll(void)
Merge any abutting or overlapping intervals.
@ eSeqMap_Up
map from segments to the top level bioseq
@ eProductToLocation
Map from the feature's product to location.
@ eLocationToProduct
Map from the feature's location to product.
const TDescr & GetDescr(void) const
TBioseqStateFlags GetState(void) const
Get state of the bioseq.
bool IsSetDbxref(void) const
const CSeqFeatData & GetData(void) const
TInst_Mol GetInst_Mol(void) const
bool IsSetProduct(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
const TDescr & GetDescr(void) const
TInst_Length GetInst_Length(void) const
void Reset(void)
Reset handle and make it not to point to any bioseq.
bool IsSetDescr(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
const CSeq_feat::TDbxref & GetDbxref(void) const
CRef< CSeq_loc > GetRangeSeq_loc(TSeqPos start, TSeqPos stop, ENa_strand strand=eNa_strand_unknown) const
Return CSeq_loc referencing the given range and strand on the bioseq If start == 0,...
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
TMol GetBioseqMolType(void) const
Get some values from core:
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
bool CanGetDescr(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetResolveTSE(void)
SetResolveTSE() is equivalent to SetResolveMethod(eResolve_TSE).
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
const CSeq_loc & GetLocation(void) const
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
const CSeq_loc & GetProduct(void) const
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
bool IsProtein(void) const
const_iterator begin(void) const
bool IsNucleotide(void) const
const_iterator end(void) const
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
position_type GetLength(void) const
const_iterator begin(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
CTempString literal(const char(&str)[Size])
Templatized initialization from a string literal.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
size_type size(void) const
Return the length of the represented array.
const TKeywords & GetKeywords(void) const
Get the Keywords member data.
TGenome GetGenome(void) const
Get the Genome member data.
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetDb(void) const
ids in other dbases Check if a value has been assigned to Db data member.
const TDb & GetDb(void) const
Get the Db member data.
vector< CRef< CDbtag > > TDb
bool IsLim(void) const
Check if variant Lim is selected.
const TTag & GetTag(void) const
Get the Tag member data.
void SetTag(TTag &value)
Assign a value to Tag data member.
bool IsId(void) const
Check if variant Id is selected.
const TDb & GetDb(void) const
Get the Db member data.
TLim GetLim(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
TLim & SetLim(void)
Select the variant.
bool IsRange(void) const
Check if variant Range is selected.
void SetDb(const TDb &value)
Assign a value to Db data member.
TId GetId(void) const
Get the variant data.
@ eLim_tl
space to left of position
@ eLim_tr
space to right of position
const TProtpos & GetProtpos(void) const
Get the variant data.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
void SetProduct_id(TProduct_id &value)
Assign a value to Product_id data member.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
TExons & SetExons(void)
Assign a value to Exons data member.
void SetProduct_strand(TProduct_strand value)
Assign a value to Product_strand data member.
TAmin GetAmin(void) const
Get the Amin member data.
void SetType(TType value)
Assign a value to Type data member.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
void SetProduct_type(TProduct_type value)
Assign a value to Product_type data member.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
void SetGenomic_id(TGenomic_id &value)
Assign a value to Genomic_id data member.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
void SetGenomic_strand(TGenomic_strand value)
Assign a value to Genomic_strand data member.
bool IsSpliced(void) const
Check if variant Spliced is selected.
bool IsNucpos(void) const
Check if variant Nucpos is selected.
TNucpos GetNucpos(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
@ eProduct_type_transcript
vector< CRef< CDbtag > > TDbxref
const TExts & GetExts(void) const
Get the Exts member data.
bool IsSetExt(void) const
user defined structure extension Check if a value has been assigned to Ext data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
list< CRef< CUser_object > > TExts
const TCit & GetCit(void) const
Get the Cit member data.
void SetCit(TCit &value)
Assign a value to Cit data member.
TExts & SetExts(void)
Assign a value to Exts data member.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
void SetData(TData &value)
Assign a value to Data data member.
const TProduct & GetProduct(void) const
Get the Product member data.
bool IsSetExts(void) const
set of extensions; will replace 'ext' field Check if a value has been assigned to Exts data member.
bool IsSetCit(void) const
citations for this feature Check if a value has been assigned to Cit data member.
bool IsVariation(void) const
Check if variant Variation is selected.
const TGene & GetGene(void) const
Get the variant data.
TExcept GetExcept(void) const
Get the Except member data.
const TExt & GetExt(void) const
Get the Ext member data.
const TVariation & GetVariation(void) const
Get the variant data.
bool IsRna(void) const
Check if variant Rna is selected.
void SetTo(TTo value)
Assign a value to To data member.
bool IsMix(void) const
Check if variant Mix is selected.
bool IsEmpty(void) const
Check if variant Empty is selected.
TFrom GetFrom(void) const
Get the From member data.
E_Choice Which(void) const
Which variant is currently selected.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
bool IsSetFuzz_to(void) const
Check if a value has been assigned to Fuzz_to data member.
bool IsWhole(void) const
Check if variant Whole is selected.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsNull(void) const
Check if variant Null is selected.
bool IsSetFuzz_from(void) const
Check if a value has been assigned to Fuzz_from data member.
bool IsPnt(void) const
Check if variant Pnt is selected.
const TIupacaa & GetIupacaa(void) const
Get the variant data.
void SetLength(TLength value)
Assign a value to Length data member.
bool IsGenbank(void) const
Check if variant Genbank is selected.
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
list< CRef< CSeqdesc > > Tdata
bool IsMolinfo(void) const
Check if variant Molinfo is selected.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
const TIupacna & GetIupacna(void) const
Get the variant data.
const TSource & GetSource(void) const
Get the variant data.
bool IsSource(void) const
Check if variant Source is selected.
const Tdata & Get(void) const
Get the member data.
TLength GetLength(void) const
Get the Length member data.
const TGenbank & GetGenbank(void) const
Get the variant data.
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
TBiomol GetBiomol(void) const
Get the Biomol member data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
list< CRef< CSeq_feat > > TFtable
bool IsSetSeq_data(void) const
may have the data Check if a value has been assigned to Seq_data data member.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
bool IsIupacna(void) const
Check if variant Iupacna is selected.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
E_Choice Which(void) const
Which variant is currently selected.
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ e_Iupacaa
IUPAC 1 letter amino acid code.
@ eBiomol_pre_RNA
precursor RNA of any sort really
@ eBiomol_cRNA
viral RNA genome copy intermediate
@ eBiomol_snoRNA
small nucleolar RNA
@ eBiomol_genomic_mRNA
reported a mix of genomic and cdna sequence
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
@ eBiomol_other_genetic
other genetic material
void SetStop_offset_fuzz(TStop_offset_fuzz &value)
Assign a value to Stop_offset_fuzz data member.
void SetGene_location(TGene_location value)
Assign a value to Gene_location data member.
TMol GetMol(void) const
Get the Mol member data.
bool IsSetName(void) const
names and synonyms some variants have well-known canonical names and possible accepted synonyms Check...
bool IsSetStop_offset_fuzz(void) const
Check if a value has been assigned to Stop_offset_fuzz data member.
TMethod & SetMethod(void)
Assign a value to Method data member.
void SetFrame(TFrame value)
Assign a value to Frame data member.
void SetFrameshift(TFrameshift &value)
Assign a value to Frameshift data member.
TPlacement_method GetPlacement_method(void) const
Get the Placement_method member data.
TDbxrefs & SetDbxrefs(void)
Assign a value to Dbxrefs data member.
bool IsSetOther_ids(void) const
Check if a value has been assigned to Other_ids data member.
bool IsComplex(void) const
Check if variant Complex is selected.
list< CRef< CVariantPlacement > > TPlacements
bool IsSetPhenotype(void) const
phenotype Check if a value has been assigned to Phenotype data member.
bool IsSetFrameshift(void) const
Check if a value has been assigned to Frameshift data member.
void SetPlacement_method(TPlacement_method value)
Assign a value to Placement_method data member.
void SetStop_offset(TStop_offset value)
Assign a value to Stop_offset data member.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
list< CRef< CVariation > > TVariations
const TStop_offset_fuzz & GetStop_offset_fuzz(void) const
Get the Stop_offset_fuzz member data.
bool IsSetPlacements(void) const
where this beast is seen note that this is a set of locations, and there are no restrictions to the c...
bool IsSetGene_location(void) const
Same semantics as VariantProperties.gene-location, except placement-specific Check if a value has bee...
void SetName(const TName &value)
Assign a value to Name data member.
const TName & GetName(void) const
Get the Name member data.
bool IsUnknown(void) const
Check if variant Unknown is selected.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetPub(void) const
publication support; same type as in seq-feat Check if a value has been assigned to Pub data member.
TX_length GetX_length(void) const
Get the X_length member data.
void ResetStop_offset(void)
Reset Stop_offset data member.
bool IsInstance(void) const
Check if variant Instance is selected.
void ResetStart_offset_fuzz(void)
Reset Start_offset_fuzz data member.
const TSample_id & GetSample_id(void) const
Get the Sample_id member data.
const TExt & GetExt(void) const
Get the Ext member data.
list< CRef< CPhenotype > > TPhenotype
const TPhenotype & GetPhenotype(void) const
Get the Phenotype member data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSetSeq(void) const
for situations in which a raw location isn't sufficient Check if a value has been assigned to Seq dat...
bool IsSetId(void) const
ids (i.e., SNP rsid / ssid, dbVar nsv/nssv) expected values include 'dbSNP|rs12334',...
const TInstance & GetInstance(void) const
Get the variant data.
const TOther_ids & GetOther_ids(void) const
Get the Other_ids member data.
bool IsSetParent_id(void) const
Check if a value has been assigned to Parent_id data member.
const TDescription & GetDescription(void) const
Get the Description member data.
bool IsSetStart_offset_fuzz(void) const
Check if a value has been assigned to Start_offset_fuzz data member.
const TSomatic_origin & GetSomatic_origin(void) const
Get the Somatic_origin member data.
list< CRef< CObject_id > > TSample_id
TExceptions & SetExceptions(void)
Assign a value to Exceptions data member.
const TNote & GetNote(void) const
Get the variant data.
const TPub & GetPub(void) const
Get the Pub member data.
const TVariations & GetVariations(void) const
Get the Variations member data.
TStop_offset GetStop_offset(void) const
Get the Stop_offset member data.
void SetSeq(TSeq &value)
Assign a value to Seq data member.
const TFrameshift & GetFrameshift(void) const
Get the Frameshift member data.
const TName & GetName(void) const
Get the Name member data.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
void ResetStop_offset_fuzz(void)
Reset Stop_offset_fuzz data member.
bool IsNote(void) const
Check if variant Note is selected.
const TMethod & GetMethod(void) const
Get the Method member data.
const TPlacements & GetPlacements(void) const
Get the Placements member data.
bool IsSetVariant_prop(void) const
variant properties bit fields Check if a value has been assigned to Variant_prop data member.
bool IsSetSample_id(void) const
Check if a value has been assigned to Sample_id data member.
list< CRef< CUser_object > > TExt
TGene_location GetGene_location(void) const
Get the Gene_location member data.
void ResetStart_offset(void)
Reset Start_offset data member.
void SetType(TType value)
Assign a value to Type data member.
const TSynonyms & GetSynonyms(void) const
Get the Synonyms member data.
list< CRef< CDbtag > > TDbxrefs
const TSeq & GetSeq(void) const
Get the Seq member data.
TPlacements & SetPlacements(void)
Assign a value to Placements data member.
void SetStart_offset(TStart_offset value)
Assign a value to Start_offset data member.
void ResetPlacements(void)
Reset Placements data member.
void ResetSeq(void)
Reset Seq data member.
bool IsSetPhase(void) const
Check if a value has been assigned to Phase data member.
const TData & GetData(void) const
Get the Data member data.
void SetMol(TMol value)
Assign a value to Mol data member.
bool IsSetSynonyms(void) const
Check if a value has been assigned to Synonyms data member.
bool IsSetExt(void) const
Additional undescribed extensions Check if a value has been assigned to Ext data member.
bool IsSetMethod(void) const
sequencing / acuisition method Check if a value has been assigned to Method data member.
const TParent_id & GetParent_id(void) const
Get the Parent_id member data.
bool IsSetX_length(void) const
Check if a value has been assigned to X_length data member.
bool IsSetStop_offset(void) const
Check if a value has been assigned to Stop_offset data member.
list< CRef< CDbtag > > TOther_ids
const TStart_offset_fuzz & GetStart_offset_fuzz(void) const
Get the Start_offset_fuzz member data.
TVariations & SetVariations(void)
Assign a value to Variations data member.
bool IsSetSomatic_origin(void) const
Check if a value has been assigned to Somatic_origin data member.
const TId & GetId(void) const
Get the Id member data.
list< CRef< C_E_Somatic_origin > > TSomatic_origin
const TMethod & GetMethod(void) const
Get the Method member data.
const TVariant_prop & GetVariant_prop(void) const
Get the Variant_prop member data.
bool IsSetStart_offset(void) const
location refinements, describing offsets into introns from product coordinates.
const TLoc & GetLoc(void) const
Get the Loc member data.
bool IsSet(void) const
Check if variant Set is selected.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
const TConsequence & GetConsequence(void) const
Get the Consequence member data.
bool IsUniparental_disomy(void) const
Check if variant Uniparental_disomy is selected.
TType GetType(void) const
Get the Type member data.
void SetStart_offset_fuzz(TStart_offset_fuzz &value)
Assign a value to Start_offset_fuzz data member.
TStart_offset GetStart_offset(void) const
Get the Start_offset member data.
bool IsSetDescription(void) const
tag for comment and descriptions Check if a value has been assigned to Description data member.
TConsequence & SetConsequence(void)
Assign a value to Consequence data member.
void ResetGene_location(void)
Reset Gene_location data member.
bool IsSetPlacement_method(void) const
Check if a value has been assigned to Placement_method data member.
list< CRef< C_E_Consequence > > TConsequence
bool IsSetConsequence(void) const
Check if a value has been assigned to Consequence data member.
TPhase GetPhase(void) const
Get the Phase member data.
@ eMol_cdna
"c." coordinates in HGVS
@ eMol_mitochondrion
"mt." coordinates in HGVS
@ eMol_rna
"n." coordinates in HGVS
@ eMol_protein
"p." coordinates in HGVS
@ eMol_genomic
"g." coordinates in HGVS
@ eCode_ambiguous_sequence
@ eCode_inconsistent_consequence
consequence protein variation attached to precursor variation's consequence could not be derived from...
@ eCode_seqfetch_intronic
can't fetch sequence for an intronic (anchor+offset)-based location
@ eCode_ref_same_as_variant
reference sequence at the location is same as variant sequence in the variation
@ eCode_source_location_overhang
The source location overhangs the alignment by at least 5kb (VAR-1307)
@ eCode_hgvs_exon_boundary
anchor position in an intronic HGVS expression is not at an exon boundary
@ eCode_split_mapping
a source interval maps to multiple non-abutting intervals.
@ eCode_hgvs_exon_boundary_induced
Similar to (2), except induced by 5'/3'-terminal or an exon extension (VAR-1309)
@ eCode_inconsistent_asserted_allele
asserted allele is inconsistent with the reference
@ eCode_mismatches_in_mapping
the source sequence differs from sequence at mapped loc
@ eCode_seqfetch_too_long
can't fetch sequence because location is longer than specified threshold
@ eCode_partial_mapping
mapped location is shorter than the query
@ eCode_seqfetch_invalid
can't fetch sequence because location is invalid (e.g. extends past the end)
@ eCode_hgvs_parsing
invalid hgvs expression
@ eCode_no_mapping
could not remap
@ ePlacement_method_projected
@ eMethod_E_computational
TType GetType(void) const
Get the Type member data.
const TInstance & GetInstance(void) const
Get the variant data.
const TVariant_prop & GetVariant_prop(void) const
Get the Variant_prop member data.
TAction GetAction(void) const
Get the Action member data.
bool IsSetSomatic_origin(void) const
Check if a value has been assigned to Somatic_origin data member.
list< CRef< CVariation_ref > > TVariations
TObservation GetObservation(void) const
Get the Observation member data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSetDelta(void) const
Sequence that replaces the location, in biological order.
bool IsSetSample_id(void) const
Check if a value has been assigned to Sample_id data member.
bool IsSetVariant_prop(void) const
variant properties bit fields Check if a value has been assigned to Variant_prop data member.
list< CRef< CDbtag > > TOther_ids
TType GetType(void) const
Get the Type member data.
const TSample_id & GetSample_id(void) const
Get the Sample_id member data.
void SetType(TType value)
Assign a value to Type data member.
bool IsSetOther_ids(void) const
Check if a value has been assigned to Other_ids data member.
bool IsSetSeq(void) const
Check if a value has been assigned to Seq data member.
bool IsSetAction(void) const
Check if a value has been assigned to Action data member.
list< CRef< CPhenotype > > TPhenotype
const TNote & GetNote(void) const
Get the variant data.
const TLoc & GetLoc(void) const
Get the variant data.
TEffect GetEffect(void) const
Get the Effect member data.
void SetObservation(TObservation value)
Assign a value to Observation data member.
const TId & GetId(void) const
Get the Id member data.
const TDelta & GetDelta(void) const
Get the Delta member data.
list< CRef< C_E_Somatic_origin > > TSomatic_origin
void SetMultiplier_fuzz(TMultiplier_fuzz &value)
Assign a value to Multiplier_fuzz data member.
bool IsSetConsequence(void) const
Check if a value has been assigned to Consequence data member.
const TData & GetData(void) const
Get the Data member data.
const TPhenotype & GetPhenotype(void) const
Get the Phenotype member data.
bool IsSetPhenotype(void) const
phenotype Check if a value has been assigned to Phenotype data member.
const TSeq & GetSeq(void) const
Get the Seq member data.
const TDescription & GetDescription(void) const
Get the Description member data.
bool IsInstance(void) const
Check if variant Instance is selected.
const TName & GetName(void) const
Get the Name member data.
list< CRef< C_E_Consequence > > TConsequence
const TLiteral & GetLiteral(void) const
Get the variant data.
const TSomatic_origin & GetSomatic_origin(void) const
Get the Somatic_origin member data.
const TConsequence & GetConsequence(void) const
Get the Consequence member data.
void ResetAction(void)
Reset Action data member.
bool IsSetDescription(void) const
tag for comment and descriptions Check if a value has been assigned to Description data member.
bool IsSetSynonyms(void) const
Check if a value has been assigned to Synonyms data member.
bool IsUniparental_disomy(void) const
Check if variant Uniparental_disomy is selected.
bool IsSet(void) const
Check if variant Set is selected.
bool IsSetMethod(void) const
Check if a value has been assigned to Method data member.
bool IsSetParent_id(void) const
Check if a value has been assigned to Parent_id data member.
void SetName(const TName &value)
Assign a value to Name data member.
void SetType(TType value)
Assign a value to Type data member.
void SetSeq(TSeq &value)
Assign a value to Seq data member.
TMultiplier GetMultiplier(void) const
Get the Multiplier member data.
bool IsSetMultiplier_fuzz(void) const
Check if a value has been assigned to Multiplier_fuzz data member.
const TParent_id & GetParent_id(void) const
Get the Parent_id member data.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
list< CRef< CDelta_item > > TDelta
bool IsUnknown(void) const
Check if variant Unknown is selected.
TVariations & SetVariations(void)
Assign a value to Variations data member.
bool IsLiteral(void) const
Check if variant Literal is selected.
const TName & GetName(void) const
Get the Name member data.
bool IsSetId(void) const
ids (i.e., SNP rsid / ssid, dbVar nsv/nssv) expected values include 'dbSNP|rs12334',...
bool IsSetMultiplier(void) const
Multiplier allows representing a tandem, e.g.
void ResetMultiplier(void)
Reset Multiplier data member.
bool IsSetObservation(void) const
Check if a value has been assigned to Observation data member.
const TOther_ids & GetOther_ids(void) const
Get the Other_ids member data.
bool IsSetName(void) const
names and synonyms some variants have well-known canonical names and possible accepted synonyms Check...
TDelta & SetDelta(void)
Assign a value to Delta data member.
const TMethod & GetMethod(void) const
Get the Method member data.
bool IsNote(void) const
Check if variant Note is selected.
bool IsThis(void) const
Check if variant This is selected.
TGene_location GetGene_location(void) const
Get the Gene_location member data.
bool IsLoc(void) const
Check if variant Loc is selected.
const TVariations & GetVariations(void) const
Get the Variations member data.
const TSynonyms & GetSynonyms(void) const
Get the Synonyms member data.
bool IsComplex(void) const
Check if variant Complex is selected.
@ eType_snv
delta=[morph of length 1] NOTE: this is snV not snP; the latter requires frequency-based validation t...
@ eType_inv
delta=[del, ins.seq= RevComp(variation-location)]
@ eType_mnp
delta=[morph of length >1]
@ eType_delins
delta=[del, ins]
@ eType_prot_nonsense
delta=[del]; variation-location is the tail of the protein being truncated
@ eType_prot_other
delta=any
@ eType_prot_silent
delta=[morph of length 1, same AA as at variation-location]
@ eType_prot_missense
delta=[morph of length 1]
@ eEffect_stop_gain
reference codon is not stop codon, but the snp variant allele changes the codon to a terminating codo...
@ eEffect_missense
one allele in the set changes protein peptide (0x4)
@ eEffect_nonsense
one allele in the set changes to STOP codon (TER). (0x2)
@ eEffect_stop_loss
reverse of STOP-GAIN: reference codon is a stop codon, but a snp variant allele changes the codon to ...
@ eEffect_synonymous
one allele in the set does not change the encoded amino acid (0x1)
@ eEffect_frameshift
one allele in the set changes all downstream amino acids (0x8)
@ eGene_location_in_start_codon
the variant is observed in a start codon (0x100)
@ eGene_location_acceptor
In acceptor splice-site (0x20)
@ eGene_location_near_gene_5
Within 2kb of the 5' end of a gene feature.
@ eGene_location_near_gene_3
Within 0.5kb of the 3' end of a gene feature.
@ eGene_location_utr_3
In 3' UTR (0x80)
@ eGene_location_in_gene
Sequence intervals covered by a gene ID but not having an aligned transcript (0x01)
@ eGene_location_utr_5
In 5' UTR (0x40)
@ eGene_location_intron
In Intron (0x08)
@ eGene_location_intergenic
variant located between genes (0x400)
@ eGene_location_donor
In donor splice-site (0x10)
@ eGene_location_in_stop_codon
the variant is observed in a stop codon (0x200)
@ eGene_location_conserved_noncoding
variant is located in a conserved non-coding region (0x800)
@ eAction_offset
go downstream by distance specified by multiplier (upstream if < 0), in genomic context.
@ eAction_morph
replace len(seq) positions starting with location.start with seq
@ eAction_del_at
excise sequence at location if multiplier is specified, delete len(location)*multiplier positions dow...
@ eAction_ins_before
insert seq before the location.start
@ eObservation_variant
inst represent the observed variant at a given position
@ eObservation_asserted
inst represents the asserted base at a position
unsigned int
A callback function used to compare two keys in a database.
double value_type
The numeric datatype used by the parser.
static void ChangeIdsInPlace(T &container, sequence::EGetIdType id_type, CScope &scope)
static set< int > GetFocusLocusIDs(const CBioseq_Handle &bsh)
static CVariantProperties::TEffect CalcEffectForProt(const string &prot_ref_str, const string &prot_delta_str)
static bool IsRightPartial(CBioseq_Handle bsh)
static bool HasProblematicExceptions(const CSeq_feat &cds_feat)
static CRef< CDelta_item > CreateDeltaForOffset(int offset, const CInt_fuzz *fuzz)
static bool Equals(const CVariation::TPlacements &p1, const CVariation::TPlacements &p2)
static void ApplyOffsetFuzz(CSeq_loc &loc, const CInt_fuzz &offset_fuzz, bool is_start)
static bool IsRefSeqGene(const CBioseq_Handle &bsh)
static CRef< CVariationException > CreateException(const string &message, CVariationException::ECode code=static_cast< CVariationException::ECode >(0))
static string Translate(const string &nuc_str, bool is_mito)
static int GetFuzzSign(const CInt_fuzz &fuzz, int loc_sign)
static void SwapLtGtFuzz(CInt_fuzz &fuzz)
CVariation_inst::EType CalcInstTypeForAA(const string &prot_ref_str, const string &prot_delta_str)
CRef< CVariation > InheritParentAttributes(const CVariation &child, const CVariation &parent)
static CRef< CVariation > CreateUnknownProtConsequenceVariation(const CVariantPlacement &nuc_p, const CSeq_feat &cds_feat, bool is_frameshifting, CScope &scope)
static CVariationUtil::TSOTerms CalcSOTermsForProt(TSignedSeqPos nuc_delta_len, const string &prot_ref_str, const string &prot_variant_str)
static CRef< CSeq_align > CreateSplicedSeqAlignFromFeat(const CSeq_feat &rna_feat)
static bool ValidExonTerminal(const set< TSeqPos > &exon_biostarts, const set< TSeqPos > &exon_biostops, TSeqPos exon_anchor_pos, int offset_pos)
static int GetSignedOffset(const CDelta_item &delta)
static bool ContainsIupacNaAmbiguities(const T &obj)
static bool Contains(const CSeq_loc &a, const CSeq_loc &b, CScope *scope)
static bool ValidExonTerminals(const set< TSeqPos > &exon_biostarts, const set< TSeqPos > &exon_biostops, const CVariantPlacement &p)
static size_t GetCommonSuffixLen(const string &a, const string &b)
static const CInt_fuzz * GetFuzz(const CDelta_item &delta)
static size_t GetCommonPrefixLen(const string &a, const string &b)
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
const CConstRef< CSeq_id > GetAccession(const CSeq_id_Handle &id_handle)
static const char * suffix[]
static const char * prefix[]
CConstRef< CSeq_feat > cdregion_feat
CRef< CSeq_loc_Mapper > mapper
Calculate upstream (first) and downstream(second) flanks for loc.
CRef< CSeq_loc > upstream
CRef< CSeq_loc > downstream
CRef< CTestThread > thr[k_NumThreadsMax]