46 #define NCBI_USE_ERRCODE_X Objects_SeqLocMap
55 switch ( GetErrCode() ) {
56 case eBadLocation:
return "eBadLocation";
57 case eUnknownLength:
return "eUnknownLength";
58 case eBadAlignment:
return "eBadAlignment";
59 case eBadFeature:
return "eBadFeature";
60 case eCanNotMap:
return "eCanNotMap";
61 case eOtherError:
return "eOtherError";
69 CSeq_loc_Mapper_Base basic approaches.
73 The mapper parses input data (two seq-locs, seq-alignment) and stores
74 mappings in a collection of CMappingRange objects. Each mapping range
75 contains source (id, start, stop, strand) and destination (id, start,
78 All coordinates are converted to genomic with one exception: if
79 source and destination locations have the same length and the mapper
80 can not obtain real sequence types, it assumes that both sequences
81 are nucleotides even if they are proteins. See x_AdjustSeqTypesToProt()
82 for more info on this special case.
84 The mapper uses several methods to check sequence types: by comparing
85 source and destination lengths, by calling GetSeqType() which is
86 overriden in CSeq_loc_Mapper to provide the correct information, using
87 some information from alignments (e.g. spiced-segs contain explicit
88 sequence types). If all these methods fail, the mapper may still
89 successfully do its job. E.g. if mapping is between two whole seq-locs,
90 it may be done with the assumption that both sequences have the same
93 The order of mapping ranges is not preserved, they are sorted by
94 source seq-id and start position.
96 When parsing input locations the mapper also tries to create equivalent
97 mappings for all synonyms of the source sequence id. The base class
98 does not provide synonyms, buy CSeq_loc_Mapper does override
99 CollectSynonyms() method to implement this.
101 In some situations (like mapping between a bioseq and its segments),
102 the mapper also creates dummy mappings from destination to itself,
103 so that during the mapping any ranges already on the destination
104 sequence are not truncated. See x_PreserveDestinationLocs().
109 Mapping of seq-locs is done range-by-range, the original seq-loc
110 is not parsed completely before mapping. Each original interval is
111 mapped through all matching mapping ranges, some parts may be mapped
114 The mapped ranges are first stored in a container of SMappedRange
115 structures. This is done to simplify merging ranges. If no merge
116 flag is set or the new range can not be merged with the collected
117 set, all ranges from the container are moved (pushed) to the
118 destination seq-loc and the new range starts the new collection.
119 This is done by x_PushMappedRange method (adding a new range) and
120 x_PushRangesToDstMix (pushing the collected mapped ranges to the
121 destination seq-loc).
123 The pushing also occurs in the following situations:
124 - When a source range is discarded (not just clipped) - see
126 - When a non-mapping range is copied to the destination mix (in fact,
127 in this case pushing is usually done by the truncation described
129 - When a new complex seq-loc is started (e.g. a new mix or equiv)
130 to preserve the structure of the source location.
132 Since merging is done only among the temporary collection, any
133 of the above conditions breaks merging. Examples:
134 - The original seq-loc is a mix, containing two other mixes A and B,
135 which contain overlapping ranges. These ranges will not be merged,
136 since they originate from different complex locations.
137 - If the original seq-loc contains three ranges A, B and C, which are
138 mapped so that A' and C' overlap or abut, but B is discarded, the
139 A' and C' will not be merged. Depending on the flags, B may be
140 also included in the mapped location between A' and C' (see
141 KeepNonmappingRanges).
143 TODO: Is the above behavior expected or should it be changed so that
144 merging can be done at least in some of the described cases?
146 After mapping the destination seq-loc may be a simple interval or
147 a mix of sub-locations. This mix can be optimized when the mapping
148 is finished: null locations are removed (if no GapPreserve is set),
149 as well as empty mixes etc. Mixes with a single element are replaced
150 with this element. Mixes which contain only intervals are converted
198 : m_Src_id_Handle(src_id),
199 m_Src_from(src_from),
200 m_Src_to(src_from + src_length - 1),
201 m_Src_strand(src_strand),
202 m_Dst_id_Handle(dst_id),
203 m_Dst_from(dst_from),
204 m_Dst_strand(dst_strand),
208 m_Src_bioseq_len(src_bioseq_len),
260 const bool partial_from = fuzz && fuzz->first && fuzz->first->IsLim() &&
262 const bool partial_to = fuzz && fuzz->second && fuzz->second->IsLim() &&
272 if( (frame_shift > 0) && partial_from && (from == 0) && (
m_Src_from == 0) ) {
284 if( ((
int)new_dst_end - (
int)src_to_dst_end) >= 0 && (new_dst_end - src_to_dst_end) <= 2 ) {
285 ret.
SetTo( new_dst_end );
297 if ( (frame_shift > 0) && partial_from && (from == 0) && (
m_Src_from == 0) ) {
298 ret.
SetTo( new_dst_end + frame_shift );
361 switch ( fuzz->
Which() ) {
384 if ( !alt.empty() ) {
440 : m_ReverseSrc(
false),
449 TRange(cvt->m_Src_from, cvt->m_Src_to), cvt));
468 src_id, src_from, src_length, src_strand,
469 dst_id, dst_from, dst_strand,
470 ext_to, frame, src_bioseq_len, dst_len ));
486 return ranges->second.begin(
TRange(from, to));
649 #define STRAND_TO_INDEX(is_set, strand) \
650 ((is_set) ? size_t((strand) + 1) : 0)
652 #define INDEX_TO_STRAND(idx) \
657 : m_MergeFlag(eMergeNone),
658 m_GapFlag(eGapPreserve),
659 m_MiscFlags(fTrimSplicedSegs),
661 m_LastTruncated(
false),
665 m_MapOptions(options)
672 : m_MergeFlag(eMergeNone),
673 m_GapFlag(eGapPreserve),
674 m_MiscFlags(fTrimSplicedSegs),
676 m_LastTruncated(
false),
677 m_Mappings(mapping_ranges),
680 m_MapOptions(options)
688 : m_MergeFlag(eMergeNone),
689 m_GapFlag(eGapPreserve),
690 m_MiscFlags(fTrimSplicedSegs),
692 m_LastTruncated(
false),
696 m_MapOptions(options)
705 : m_MergeFlag(eMergeNone),
706 m_GapFlag(eGapPreserve),
707 m_MiscFlags(fTrimSplicedSegs),
709 m_LastTruncated(
false),
713 m_MapOptions(options)
722 : m_MergeFlag(eMergeNone),
723 m_GapFlag(eGapPreserve),
724 m_MiscFlags(fTrimSplicedSegs),
726 m_LastTruncated(
false),
730 m_MapOptions(options)
740 : m_MergeFlag(eMergeNone),
741 m_GapFlag(eGapPreserve),
742 m_MiscFlags(fTrimSplicedSegs),
744 m_LastTruncated(
false),
748 m_MapOptions(options)
758 : m_MergeFlag(eMergeNone),
759 m_GapFlag(eGapPreserve),
760 m_MiscFlags(fTrimSplicedSegs),
762 m_LastTruncated(
false),
775 : m_MergeFlag(eMergeNone),
776 m_GapFlag(eGapPreserve),
777 m_MiscFlags(fTrimSplicedSegs),
779 m_LastTruncated(
false),
783 m_MapOptions(options)
793 : m_MergeFlag(eMergeNone),
794 m_GapFlag(eGapPreserve),
795 m_MiscFlags(fTrimSplicedSegs),
797 m_LastTruncated(
false),
801 m_MapOptions(options)
811 : m_MergeFlag(eMergeNone),
812 m_GapFlag(eGapPreserve),
813 m_MiscFlags(fTrimSplicedSegs),
815 m_LastTruncated(
false),
819 m_MapOptions(seq_info, opts)
933 bool known_dst_types =
x_CheckSeqTypes(target, dst_type, dst_total_len);
942 "Frame can not be specified for a protein source location.");
957 "Frame can not be specified for a protein target location.");
968 bool known_types = known_src_types && known_dst_types;
969 if ( !known_types ) {
991 "Undefined location length -- "
992 "unable to detect sequence type");
994 if (src_total_len == dst_total_len) {
1002 src_type = dst_type;
1017 else if (src_total_len/3 == dst_total_len || src_total_len == (dst_total_len + 1)*3) {
1028 "Sequence types (nuc to prot) are inconsistent with "
1029 "location lengths");
1032 else if (dst_total_len/3 == src_total_len || dst_total_len == (src_total_len + 1)*3) {
1043 "Sequence types (prot to nuc) are inconsistent with "
1044 "location lengths");
1051 "Wrong location length -- "
1052 "unable to detect sequence type");
1060 bool multiseq_src = !
source.GetId();
1061 bool multiseq_dst = !target.
GetId();
1063 if ( src_frame ) src_total_len -= src_frame - 1;
1064 if ( dst_frame ) dst_total_len -= dst_frame - 1;
1067 if (src_total_len == (dst_total_len + 1)*3 && !multiseq_dst) {
1072 else if (src_total_len/3 == dst_total_len && src_total_len % 3 != 0) {
1074 "Source and destination lengths do not match, "
1075 "dropping " << src_total_len % 3 <<
1076 " overhanging bases on source location");
1079 if (dst_total_len*3 >= src_total_len + 3 ||
1080 dst_total_len*3 + 3 <= src_total_len) {
1082 "Source and destination lengths do not match.");
1087 if (dst_total_len == (src_total_len + 1)*3 && !multiseq_src) {
1091 else if (dst_total_len/3 == src_total_len && dst_total_len % 3 != 0) {
1093 "Source and destination lengths do not match, "
1094 "dropping " << dst_total_len % 3 <<
1095 " overhanging bases on destination location");
1098 if (src_total_len*3 >= dst_total_len + 3 ||
1099 src_total_len*3 + 3 <= dst_total_len) {
1101 "Source and destination lengths do not match.");
1105 else if (src_total_len != dst_total_len) {
1107 "Source and destination lengths do not match.");
1113 int src_width = (src_type ==
eSeq_prot) ? 3 : 1;
1114 int dst_width = (dst_type ==
eSeq_prot) ? 3 : 1;
1142 else if ( !rg.
Empty() ) {
1164 else if ( !rg.
Empty() ) {
1170 static_cast<TSeqPos>(src_frame) <= src_len ) {
1171 if( !
source.IsReverseStrand() ) {
1172 src_start += src_frame - 1;
1174 src_len -= src_frame - 1;
1177 static_cast<TSeqPos>(dst_frame) <= dst_len ) {
1179 dst_start += dst_frame - 1;
1181 dst_len -= dst_frame - 1;
1187 src_bioseq_len = src_width*src_bioseq_len;
1189 TSeqPos last_src_start = 0, last_src_len = 0;
1190 TSeqPos last_dst_start = 0, last_dst_len = 0;
1191 bool last_src_reverse =
false, last_dst_reverse =
false;
1196 while (src_it && dst_it) {
1208 if ( !last_src_reverse ) {
1209 if (last_src_start + last_src_len != src_start) {
1214 if (src_start + src_len != last_src_start) {
1222 if ( !last_dst_reverse ) {
1223 if (last_dst_start + last_dst_len != dst_start) {
1228 if (dst_start + dst_len != last_dst_start) {
1233 last_src_start = src_start;
1234 last_src_len = src_len;
1235 last_dst_start = dst_start;
1236 last_dst_len = dst_len;
1241 src_frame ? src_frame : dst_frame,
1254 if (src_len == 0 && ++src_it) {
1260 else if (
r.IsWhole() ) {
1283 if (dst_len == 0 && ++dst_it) {
1289 else if (
r.IsWhole() ) {
1323 if (idh == *it)
return true;
1334 unique_ptr<IMapper_Sequence_Info::TSynonyms> from_syn;
1360 for (
size_t i = 0;
i < (*diag_it)->GetIds().
size(); ++
i) {
1365 if (from_ids &&
x_IsSynonym(*(*diag_it)->GetIds()[
i], *from_ids)) {
1372 "Target ID not found in the alignment");
1376 "Source ID not found in the alignment");
1401 "Target ID not found in the alignment");
1405 "Source ID not found in the alignment");
1415 if ((*std_seg)->IsSetIds() && !(*std_seg)->GetIds().empty()) {
1416 for (
size_t i = 0;
i < (*std_seg)->GetIds().
size(); ++
i) {
1426 for (
size_t i = 0;
i < (*std_seg)->GetLoc().
size(); ++
i) {
1427 const CSeq_id* row_id = (*std_seg)->GetLoc()[
i]->GetId();
1436 "Target ID not found in the alignment");
1461 "Target ID not found in the alignment");
1465 "Source ID not found in the alignment");
1493 if (
x_IsSynonym((*it)->GetFirst_id(), to_ids) ) {
1496 else if (
x_IsSynonym((*it)->GetSecond_id(), to_ids) ) {
1505 "Unsupported alignment type");
1562 if (to_row == 0 || to_row == 1) {
1568 "Invalid row number in spliced-seg alignment");
1579 "Unsupported alignment type");
1590 size_t dim = diag.
GetDim();
1592 if (dim != diag.
GetIds().size()) {
1601 if (have_strands && dim != diag.
GetStrands().size()) {
1610 int dst_width = (dst_type ==
eSeq_prot) ? 3 : 1;
1616 for (
size_t row = 0;
row < dim; ++
row) {
1622 for (
size_t row = 0;
row < dim; ++
row) {
1623 if (
row == to_row) {
1631 int src_width = (src_type ==
eSeq_prot) ? 3 : 1;
1640 dst_id, dst_start, dst_len, dst_strand, 0, 0);
1643 _ASSERT(!src_len && !dst_len);
1654 size_t dim = denseg.
GetDim();
1659 if (numseg != denseg.
GetLens().size()) {
1661 numseg =
min(numseg, denseg.
GetLens().size());
1663 if (dim != denseg.
GetIds().size()) {
1667 if (dim*numseg != denseg.
GetStarts().size()) {
1669 dim =
min(dim*numseg, denseg.
GetStarts().size()) / numseg;
1672 if (have_strands && dim*numseg != denseg.
GetStrands().size()) {
1681 for (
size_t row = 0;
row < dim; ++
row) {
1690 int dst_width = (dst_type ==
eSeq_prot) ? 3 : 1;
1691 for (
size_t row = 0;
row < dim; ++
row) {
1692 if (
row == to_row) {
1701 int src_width = (src_type ==
eSeq_prot) ? 3 : 1;
1725 if (src_len != dst_len) {
1727 "Genomic vs product length mismatch in dense-seg");
1730 src_id, src_start, src_len, src_strand,
1731 dst_id, dst_start, dst_len, dst_strand,
1735 if (src_len != 0 || dst_len != 0) {
1737 "Different lengths of source and destination rows "
1742 for (
size_t seg = 0; seg < numseg; ++seg) {
1744 int i_dst_start = denseg.
GetStarts()[seg*dim + to_row];
1745 if (i_src_start < 0 || i_dst_start < 0) {
1760 dst_id, dst_start, dst_len, dst_strand, 0, 0);
1763 _ASSERT(!src_len && !dst_len);
1774 size_t dim = sseg.
GetDim();
1775 if (dim != sseg.
GetLoc().size()) {
1780 && dim != sseg.
GetIds().size()) {
1786 for (
size_t row = 0;
row < dim; ++
row ) {
1787 if (
row == to_row) {
1807 size_t dim = pseg.
GetDim();
1810 if (numseg != pseg.
GetLens().size()) {
1814 if (dim != pseg.
GetIds().size()) {
1818 if (dim*numseg != pseg.
GetStarts().size()) {
1820 dim =
min(dim*numseg, pseg.
GetStarts().size()) / numseg;
1823 if (have_strands && dim*numseg != pseg.
GetStrands().size()) {
1832 for (
size_t row = 0;
row < dim; ++
row) {
1841 int dst_width = (dst_type ==
eSeq_prot) ? 3 : 1;
1843 for (
size_t row = 0;
row < dim; ++
row) {
1844 if (
row == to_row) {
1852 int src_width = (src_type ==
eSeq_prot) ? 3 : 1;
1853 for (
size_t seg = 0; seg < numseg; ++seg) {
1870 src_id, src_start, src_len, src_strand,
1871 dst_id, dst_start, dst_len, dst_strand,
1875 _ASSERT(!src_len && !dst_len);
1916 switch ( part.
Which() ) {
1947 if (mapping_len == 0)
return;
1950 TSeqPos pgen_len = mapping_len;
1951 TSeqPos pprod_len = mapping_len;
1953 TSeqPos pgen_start = rev_gen ?
1954 gen_start + gen_len - mapping_len : gen_start;
1955 TSeqPos pprod_start = rev_prod ?
1956 prod_start + prod_len - mapping_len : prod_start;
1960 gen_id, pgen_start, pgen_len, gen_strand,
1961 prod_id, pprod_start, pprod_len, prod_strand,
1966 prod_id, pprod_start, pprod_len, prod_strand,
1967 gen_id, pgen_start, pgen_len, gen_strand,
1972 _ASSERT(pgen_len == 0 && pprod_len == 0);
1974 gen_start += mapping_len;
1976 gen_len -= mapping_len;
1978 prod_start += mapping_len;
1980 prod_len -= mapping_len;
2009 mapping_len += plen;
2015 gen_id, gen_start, gen_len, gen_strand,
2016 prod_id, prod_start, prod_len, prod_strand);
2034 gen_id, gen_start, gen_len, gen_strand,
2035 prod_id, prod_start, prod_len, prod_strand);
2056 bool prod_is_prot =
false;
2060 prod_is_prot =
true;
2078 if (!ex_gen_id || !ex_prod_id) {
2093 "Wrong product-start type in spliced-exon, "
2094 "does not match product-type");
2098 "Wrong product-end type in spliced-exon, "
2099 "does not match product-type");
2104 TSeqPos gen_len = gen_to - gen_from + 1;
2105 TSeqPos prod_len = prod_to - prod_from + 1;
2112 *ex_gen_id, gen_from, gen_len, ex_gen_strand,
2113 *ex_prod_id, prod_from, prod_len, ex_prod_strand);
2119 *ex_gen_id, gen_from, gen_len, ex_gen_strand,
2120 *ex_prod_id, prod_from, prod_len, ex_prod_strand,
2125 *ex_prod_id, prod_from, prod_len, ex_prod_strand,
2126 *ex_gen_id, gen_from, gen_len, ex_gen_strand,
2131 if (gen_len || prod_len) {
2133 "Genomic vs product length mismatch in spliced-exon");
2150 size_t numseg =
row.GetNumseg();
2152 if (numseg !=
row.GetFirst_starts().size()) {
2154 "Invalid 'first-starts' size in sparse-align");
2155 numseg =
min(numseg,
row.GetFirst_starts().size());
2157 if (numseg !=
row.GetSecond_starts().size()) {
2159 "Invalid 'second-starts' size in sparse-align");
2160 numseg =
min(numseg,
row.GetSecond_starts().size());
2162 if (numseg !=
row.GetLens().size()) {
2164 numseg =
min(numseg,
row.GetLens().size());
2166 bool have_strands =
row.IsSetSecond_strands();
2167 if (have_strands && numseg !=
row.GetSecond_strands().size()) {
2169 "Invalid 'second-strands' size in sparse-align");
2170 numseg =
min(numseg,
row.GetSecond_strands().size());
2174 const CSeq_id& second_id =
row.GetSecond_id();
2178 int first_width = (first_type ==
eSeq_prot) ? 3 : 1;
2179 int second_width = (second_type ==
eSeq_prot) ? 3 : 1;
2191 for (
size_t i = 0;
i < numseg;
i++) {
2192 TSeqPos first_start = first_starts[
i]*first_width;
2193 TSeqPos second_start = second_starts[
i]*second_width;
2194 TSeqPos first_len = lens[
i]*len_width;
2195 TSeqPos second_len = first_len;
2200 second_id, second_start, second_len, strand,
2205 second_id, second_start, second_len, strand,
2210 _ASSERT(!first_len && !second_len);
2225 return primary_it !=
m_SynonymMap.
end() ? primary_it->second : synonym;
2236 return found->second;
2252 if ( synonyms.
empty() ) {
2263 return primary_it->second;
2306 if (it->second != seqtype) {
2308 "Attempt to modify a known sequence type.");
2323 bool found_type =
false;
2327 if ( !idh )
continue;
2335 else if (seqtype != it_type) {
2343 if ( it.GetRange().IsWhole() ) {
2354 len += it.GetRange().GetLength();
2371 if ( !idh )
continue;
2379 else if (ret != st->second) {
2383 "Unable to detect sequence types in the locations.");
2409 bool have_id =
false;
2410 bool have_known =
false;
2415 if (id_it->first == primary_id) {
2423 if ( !have_id )
return;
2427 "Can not adjust sequence types to protein.");
2457 if ( rg_it->IsWhole() ) {
2461 else if ( !rg_it->Empty() ) {
2462 from = rg_it->GetFrom()*3;
2463 to = rg_it->GetToOpen()*3;
2465 rg_it->SetOpen(from, to);
2501 TSeqPos cvt_src_start = src_start;
2502 TSeqPos cvt_dst_start = dst_start;
2505 const TSeqPos original_dst_len = dst_len;
2507 if (src_len == dst_len) {
2512 src_len -= src_start;
2516 dst_len -= dst_start;
2526 cvt_length = src_len;
2530 else if (src_len > dst_len) {
2537 cvt_src_start += src_len - dst_len;
2540 src_start += dst_len;
2542 cvt_length = dst_len;
2545 src_len -= cvt_length;
2556 cvt_dst_start += dst_len - src_len;
2559 dst_start += src_len;
2561 cvt_length = src_len;
2564 dst_len -= cvt_length;
2572 bool ext_to =
false;
2576 if (
IsReverse(dst_strand) && fuzz_from ) {
2577 ext_to = fuzz_from &&
2578 fuzz_from->
IsLim() &&
2581 else if ( !
IsReverse(dst_strand) && fuzz_to ) {
2589 dst_id, cvt_dst_start, dst_strand, cvt_length, ext_to, frame,
2590 src_bioseq_len, original_dst_len);
2621 if (length > src_seq_len - src_start) {
2622 TSeqPos trim = length - src_seq_len + src_start;
2635 if (length > dst_seq_len - dst_start) {
2636 TSeqPos trim = length - dst_seq_len + dst_start;
2642 dst_len = dst_len > trim ? dst_len - trim : 0;
2647 if (length == 0)
return;
2649 main_id, src_start, length, src_strand,
2650 dst_idh, dst_start, dst_strand,
2657 .push_back(
TRange(dst_start, dst_start + length - 1));
2668 for (
size_t str_idx = 0; str_idx <
m_DstRanges.size(); str_idx++) {
2672 id_it->second.sort();
2676 int dst_width = (dst_type ==
eSeq_prot) ? 3 : 1;
2681 if ( rg_it->IsWhole() ) {
2685 else if ( !rg_it->Empty() ) {
2686 rg_start = rg_it->GetFrom()*dst_width;
2687 rg_stop = rg_it->GetTo()*dst_width;
2692 dst_start = rg_start;
2698 dst_stop =
max(dst_stop, rg_stop);
2707 id_it->first, dst_start,
ENa_strand(str_idx));
2711 dst_start = dst_stop;
2715 dst_start = rg_start;
2719 if (dst_start < dst_stop) {
2725 id_it->first, dst_start,
ENa_strand(str_idx));
2742 bool is_first =
true;
2746 for( ; loc_iter; ++loc_iter ) {
2756 new_loc_piece->
Assign( *loc_piece );
2765 new_loc->
Add( *new_loc_piece );
2767 new_loc->
Add( *loc_piece );
2781 switch( loc_piece->
Which() ) {
2786 const bool from_fuzz_is_bad =
2791 const bool to_fuzz_is_bad =
2797 if( from_fuzz_is_bad || to_fuzz_is_bad ) {
2799 new_loc->
Assign( *loc_piece );
2801 if( from_fuzz_is_bad ) {
2802 new_loc->
SetInt().ResetFuzz_from();
2805 if( to_fuzz_is_bad ) {
2806 new_loc->
SetInt().ResetFuzz_to();
2817 const bool is_fuzz_range =
2819 if( is_fuzz_range ) {
2821 new_loc->
Assign( *loc_piece );
2823 new_loc->
SetPnt().ResetFuzz();
2846 switch (loc->
Which()) {
2864 bool have_null =
false;
2865 while (mix_locs.size() > 1 &&
2866 mix_locs.back()->IsNull())
2869 mix_locs.pop_back();
2873 mix_locs.size() > 0 && !mix_locs.back()->IsNull()) {
2876 mix_locs.push_back(null_loc);
2901 if ( !(*it)->IsInt() ) {
2909 push_back(
Ref(&(*it)->SetInt()));
2921 "Unsupported location type");
2947 bool partial_left =
false;
2948 bool partial_right =
false;
2957 TRange trimmed_left, trimmed_right;
2966 (left != *last_src_to + 1);
2970 partial_left = (cvt_idx == mappings.size() - 1) ||
2971 (mappings[cvt_idx + 1]->m_Src_to + 1 != left);
2980 partial_right = (cvt_idx == mappings.size() - 1) ||
2981 (mappings[cvt_idx + 1]->m_Src_from != right + 1);
2986 (right + 1 != *last_src_to);
2991 if ( partial_left ) {
2995 if ( partial_right ) {
2996 if ( partial_left ) err_msg +=
",";
3008 *last_src_to = reverse ? left : right;
3014 if ( partial_left ) {
3020 if ( (!reverse && cvt_idx == 0) ||
3021 (reverse && cvt_idx == mappings.size() - 1) ) {
3023 fuzz.first = src_fuzz.first;
3026 if ( partial_right ) {
3032 if ( (reverse && cvt_idx == 0) ||
3033 (!reverse && cvt_idx == mappings.size() - 1) ) {
3035 fuzz.second = src_fuzz.second;
3048 if (reverse && !fuzz.second) {
3052 else if (!reverse && !fuzz.first) {
3081 bool is_set_dst_strand = cvt.
Map_Strand(is_set_strand,
3082 src_strand, &dst_strand);
3150 "Unknown sequence type in the source location, "
3151 "mapped graph data may be incorrect.");
3158 for ( ; rg_it; ++rg_it) {
3159 mappings.push_back(rg_it->second);
3174 if( r_it && r_it->second ) {
3180 const int shift = ( mappings[0]->m_Frame - 1 );
3197 for (
size_t idx = 0; idx < mappings.size(); ++idx) {
3199 is_set_strand, src_strand,
3222 "Unknown sequence length in the source whole location, "
3223 "mapped graph data may be incorrect.");
3234 if (
si.IsSetFuzz_from() ) {
3236 fuzz.first->Assign(
si.GetFuzz_from());
3238 if (
si.IsSetFuzz_to() ) {
3240 fuzz.second->Assign(
si.GetFuzz_to());
3258 rg, fuzz,
false, 0);
3276 fuzz.first->Assign(pp.
GetFuzz());
3297 rg, fuzz,
false, 0);
3311 switch ( src_loc.
Which() ) {
3340 for ( ; mit; ++mit) {
3453 fuzz.second->Assign(pnt.
GetFuzz());
3456 fuzz.first->Assign(pnt.
GetFuzz());
3578 if ( src_bond.
IsSetB() ) {
3607 if ( src_bond.
IsSetB() ) {
3617 dst_mix.
Set().push_back(pntA);
3623 dst_mix.
Set().push_back(null_loc);
3624 dst_mix.
Set().push_back(pntB);
3634 "Unsupported location type");
3696 return Map(*total_range);
3710 aln_mapper->Convert(*
row);
3713 aln_mapper->Convert();
3715 return aln_mapper->GetDstAlign();
3743 if (from == to && (!rg_fuzz.first && !rg_fuzz.second) &&
3748 loc->
SetPnt().SetPoint(from);
3749 if (strand_idx > 0) {
3753 if ( rg_fuzz.first ) {
3754 loc->
SetPnt().SetFuzz(*rg_fuzz.first);
3756 else if ( rg_fuzz.second ) {
3757 loc->
SetPnt().SetFuzz(*rg_fuzz.second);
3769 loc->
SetInt().SetFrom(from);
3771 if (strand_idx > 0) {
3774 if ( rg_fuzz.first ) {
3775 loc->
SetInt().SetFuzz_from(*rg_fuzz.first);
3777 if ( rg_fuzz.second ) {
3778 loc->
SetInt().SetFuzz_to(*rg_fuzz.second);
3787 size_t strand_idx)
const
3792 if (str_vec.size() <= strand_idx) {
3793 str_vec.resize(strand_idx + 1);
3795 return str_vec[strand_idx];
3814 "Merging ranges is incompatible with "
3815 "including source locations.");
3817 bool reverse = (strand_idx > 0) &&
3826 if ( push_reverse ) {
3841 if ( push_reverse ) {
3860 no_merge = no_merge ||
3861 (it->second.size() <= strand_idx) || it->second.empty();
3867 const SMappedRange& mrg = it->second[strand_idx].front();
3869 no_merge = no_merge ||
3872 no_merge = no_merge || (mrg.
group != group);
3876 const SMappedRange& mrg = it->second[strand_idx].back();
3878 no_merge = no_merge ||
3881 no_merge = no_merge || (mrg.
group != group);
3888 if ( push_reverse ) {
3900 SMappedRange& last_rg = it->second[strand_idx].front();
3902 last_rg.
fuzz.first = fuzz.first;
3907 last_rg.
fuzz.second = fuzz.second;
3930 if (
range.Empty() ) {
3933 else if (
range.IsWhole() ) {
3939 int seq_width = (seq_type ==
eSeq_prot) ? 3 : 1;
3940 loc->
SetInt().SetId(*
id);
3943 if (src_strand > 0) {
3951 push_reverse = !push_reverse;
3953 if ( push_reverse ) {
4001 if ( mix.size() > 0 && (*mix.rbegin())->IsNull() ) {
4041 if ( !id_it->first ) {
4045 dst_mix.push_back(null_loc);
4050 for (
int str = 0;
str < (
int)id_it->second.size(); ++
str) {
4051 if (id_it->second[
str].size() == 0) {
4062 id_it->second[
str].sort();
4066 if ( rg_it->range.Empty() ) {
4069 loc->
SetEmpty().Assign(*id_it->first.GetSeqId());
4071 dst_mix.push_front(loc);
4074 dst_mix.push_back(loc);
4081 from = rg_it->range.GetFrom();
4082 to = rg_it->range.GetTo();
4084 group = rg_it->group;
4091 if (rg_it->range.GetFrom() == to + 1) {
4092 to = rg_it->range.GetTo();
4093 fuzz.second = rg_it->fuzz.second;
4101 if (rg_it->range.GetTo() <= to) {
4106 if (rg_it->range.GetFrom() == from) {
4107 to = rg_it->range.GetTo();
4108 fuzz.second = rg_it->fuzz.second;
4114 if (rg_it->range.GetFrom() <= to + 1) {
4115 if (rg_it->range.GetTo() > to) {
4116 to = rg_it->range.GetTo();
4117 fuzz.second = rg_it->fuzz.second;
4136 from = rg_it->range.GetFrom();
4137 to = rg_it->range.GetTo();
4139 group = rg_it->group;
4168 _ASSERT(from < src.size() && to <= src.size());
4169 dst.insert(dst.end(), src.begin() + from, src.begin() + to);
4179 if ( !mapped_loc ) {
4185 ret->
SetLoc(*mapped_loc);
4189 bool src_type_set =
false;
4196 if ( !src_type_set ) {
4198 src_type_set =
true;
4200 else if (src_type != it_type) {
4202 "Source graph location contains different sequence "
4203 "types -- can not map graph data.");
4207 bool dst_type_set =
false;
4214 if ( !dst_type_set ) {
4216 dst_type_set =
true;
4218 else if (dst_type != it_type) {
4220 "Mapped graph location contains different sequence "
4221 "types -- can not map graph data.");
4238 if (src_type != dst_type &&
4246 else if (comp % 3 == 0) {
4252 "Can not map seq-graph data between "
4253 "different sequence types.");
4262 switch ( src_data.
Which() ) {
4269 ITERATE(TGraphRanges, it, ranges) {
4270 TSeqPos from = it->GetFrom()/comp_div;
4271 TSeqPos to = it->GetTo()/comp_div + 1;
4275 numval += to - from;
4283 ITERATE(TGraphRanges, it, ranges) {
4284 TSeqPos from = it->GetFrom()/comp_div;
4285 TSeqPos to = it->GetTo()/comp_div + 1;
4289 numval += to - from;
4297 ITERATE(TGraphRanges, it, ranges) {
4298 TSeqPos from = it->GetFrom()/comp_div;
4299 TSeqPos to = it->GetTo()/comp_div + 1;
4303 numval += to - from;
4320 size_t mapped_count = 0;
4321 size_t non_mapped_count = 0;
4327 bool mapped =
false;
4337 if ( loc && !loc->
IsNull() ) {
4344 if ( loc && !loc->
IsNull() ) {
4360 error.empty() ?
"Failed to map seq-feat" :
error,
4371 error.empty() ?
string(
"Failed to map seq-feat.") :
error);
4397 error.empty() ?
"Failed to map seq-align" :
error,
4408 error.empty() ?
string(
"Failed to map seq-align") :
error);
4434 error.empty() ?
"Failed to map seq-graph" :
error,
4445 error.empty() ?
string(
"Failed to map seq-graph") :
error);
4455 "Can not map seq-annot - unsupported type.");
4461 if ( mapped_count ) {
4481 MAPPER_NONMAPPING_AS_NULL);
4487 return TNonMappingAsNullParam::GetDefault();
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
ENa_strand Reverse(ENa_strand s)
ESeqLocExtremes
Used to determine the meaning of a location's Start/Stop positions.
@ eExtreme_Biological
5' and 3'
bool SameOrientation(ENa_strand a, ENa_strand b)
Seq-loc and seq-align mapper exceptions.
virtual void CollectSynonyms(const CSeq_id_Handle &id, TSynonyms &synonyms)
Collect all synonyms for the id including the id itself.
virtual TSeqPos GetSequenceLength(const CSeq_id_Handle &)
Get sequence length or kInvalidSeqPos.
virtual TSeqType GetSequenceType(const CSeq_id_Handle &)
Get information about sequence type (nuc or prot).
CRange< TSeqPos > GetSeqRange(TDim row) const
Helper class for mapping graphs.
CMappingRange - describes a single interval to interval mapping.
Storage for multiple mapping ranges.
Default IMessage implementation: text and severity only.
Class used to map seq-alignments.
namespace ncbi::objects::
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
CSeq_loc_Mapper_Options –.
virtual CSeq_id_Handle GetBestSynonym(const CSeq_id &id)
CTotalRangeSynonymMapper(const TSynonymMap &syn_map)
virtual ~CTotalRangeSynonymMapper(void)
CSeq_loc_Mapper_Base::TSynonymMap TSynonymMap
const TSynonymMap & m_Map
Interface for mapping IDs to the best synonym.
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
static const char si[8][64]
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define ERASE_ITERATE(Type, Var, Cont)
Non-constant version with ability to erase current element, if container permits.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Error
Error message.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
void Warning(CExceptionArgs_Base &args)
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
void Info(CExceptionArgs_Base &args)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnText
I/O stream manipulators –.
static bool HaveListeners(void)
Check if there are any listeners installed in the current thread.
static EPostResult Post(const IMessage &message)
Post the message to listener(s), if any.
virtual void Write(CNcbiOstream &out) const
Print the message and any additional information to the stream.
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
void GetMatchingIds(TSeqIdHandles &matches) const
Collect partially matching seq-ids: no-version, no-name etc.
void SetPacked_int(TPacked_int &v)
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
bool IsReverseStrand(void) const
Return true if all ranges have reverse strand.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
CConstRef< CSeq_loc > GetRangeAsSeq_loc(void) const
Get seq-loc for the current iterator position.
CRef< CSeq_loc > Merge(TOpFlags flags, ISynonymMapper *syn_mapper) const
All functions create and return a new seq-loc object.
const_iterator end(void) const
CSeq_id_Handle GetSeq_id_Handle(void) const
bool IsWhole(void) const
True if the current location is a whole sequence.
const CInt_fuzz * GetFuzzFrom(void) const
const CInt_fuzz * GetFuzzTo(void) const
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
TRange GetRange(void) const
Get the range.
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
ENa_strand GetStrand(void) const
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
void SetPartialStop(bool val, ESeqLocExtremes ext)
void SetNull(void)
Override all setters to incorporate cache invalidation.
bool IsPartialStop(ESeqLocExtremes ext) const
void SetTruncatedStop(bool val, ESeqLocExtremes ext)
@ eOrder_Biological
Iterate sub-locations in positional order.
@ eEmpty_Allow
ignore empty locations
void x_InitializeAlign(const CSeq_align &map_align, const CSeq_id &to_id, const CSeq_id *from_id=nullptr)
CRef< IMapper_Sequence_Info > m_SeqInfo
TSeqPos x_GetRangeLength(const CSeq_loc_CI &it)
TRange Map_Range(TSeqPos from, TSeqPos to, const TRangeFuzz *fuzz=0) const
Map an interval, set fuzz when the mapping truncates the original range.
pair< TFuzz, TFuzz > TRangeFuzz
TDstStrandMap m_DstRanges
bool x_CheckSeqTypes(const CSeq_loc &loc, ESeqType &seqtype, TSeqPos &len)
CRef< CSeq_loc > x_RangeToSeq_loc(const CSeq_id_Handle &idh, TSeqPos from, TSeqPos to, size_t strand_idx, TRangeFuzz rg_fuzz)
void x_SetMiscFlag(EMiscFlags flag, bool value)
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
TSeqPos GetSequenceLength(const CSeq_id &id)
virtual void CollectSynonyms(const CSeq_id_Handle &id, TSynonyms &synonyms)=0
Collect all synonyms for the id including the id itself.
CSeq_loc_Mapper_Base(CMappingRanges *mapping_ranges, CSeq_loc_Mapper_Options options=CSeq_loc_Mapper_Options())
Mapping through a pre-filled CMappipngRanges.
ESeqType GetSeqTypeById(const CSeq_id_Handle &idh) const
Methods for getting sequence types, use cached types (m_SeqTypes) if possible.
TRangeMap::const_iterator TRangeIterator
void x_AddExonPartsMapping(TSeqPos &mapping_len, ESplicedRow to_row, const CSeq_id &gen_id, TSeqPos &gen_start, TSeqPos &gen_len, ENa_strand gen_strand, const CSeq_id &prod_id, TSeqPos &prod_start, TSeqPos &prod_len, ENa_strand prod_strand)
CRef< CGraphRanges > m_GraphRanges
void x_PushMappedRange(const CSeq_id_Handle &id, size_t strand_idx, const TRange &range, const TRangeFuzz &fuzz, bool push_reverse, int group)
CMappingRange(CSeq_id_Handle src_id, TSeqPos src_from, TSeqPos src_length, ENa_strand src_strand, CSeq_id_Handle dst_id, TSeqPos dst_from, ENa_strand dst_strand, bool ext_to=false, int frame=0, TSeqPos src_bioseq_len=kInvalidSeqPos, TSeqPos dst_len=kInvalidSeqPos)
EMapResult
Result of seq-annot mapping.
CSeq_id_Handle m_Dst_id_Handle
void x_Map_Fuzz(TFuzz &fuzz) const
void AddRange(const TRange &rg)
const CSeq_id_Handle & GetDstIdHandle(void) const
CSeq_align::C_Segs::TDendiag TDendiag
list< SMappedRange > TMappedRanges
const TIdMap & GetIdMap() const
CSeq_loc_Mapper_Message(const string &msg, EDiagSev sev, int err_code=0, int sub_code=0)
ESplicedRow
Spliced-seg row indexing constants.
virtual TSeqPos GetSequenceLength(const CSeq_id_Handle &idh)=0
Get sequence length or kInvalidSeqPos.
const CSeq_graph * GetGraph(void) const
Get seq-graph object or null.
bool GetTrimMappedLocation(void) const
Mapped location trimming at sequence end.
void x_InitializeFeat(const CSeq_feat &map_feat, EFeatMapDirection dir)
void SetSeqTypeById(const CSeq_id_Handle &idh, ESeqType seqtype) const
Methods for setting sequence types.
IMapper_Sequence_Info & GetSeqInfo(void) const
CSeq_loc_Mapper_Options –.
EFeatMapDirection
Mapping direction used when initializing the mapper with a feature.
void x_StripExtraneousFuzz(CRef< CSeq_loc > &loc) const
vector< TMappedRanges > TRangesByStrand
CSeq_loc_Mapper_Options m_MapOptions
const CSeq_id_Handle & CollectSynonyms(const CSeq_id_Handle &id) const
void ResetObject(void)
Set the stored object to null.
CRef< CSeq_loc > MapTotalRange(const CSeq_loc &seq_loc)
Take the total range from the location and run it through the mapper.
const CSeq_loc * GetLoc(void) const
Get seq-loc object or null.
bool CanMap(TSeqPos from, TSeqPos to, bool is_set_strand, ENa_strand strand) const
Check if the interval can be mapped through this mapping range.
void x_InitSparse(const CSparse_seg &sparse, size_t to_row)
void SetReverseSrc(bool value=true)
CSeq_loc_Mapper_Options & SetAlign_Sparse_ToFirst(bool value=true)
CSeq_align::C_Segs::TStd TStd
bool GoodSrcId(const CSeq_id &id) const
Check if the id is on the source sequence.
void AddConversion(CRef< CMappingRange > cvt)
Add new mapping range to the proper place.
void x_Map_PackedPnt_Element(const CPacked_seqpnt &pp, TSeqPos p)
void x_NextMappingRange(const CSeq_id &src_id, TSeqPos &src_start, TSeqPos &src_len, ENa_strand src_strand, const CSeq_id &dst_id, TSeqPos &dst_start, TSeqPos &dst_len, ENa_strand dst_strand, const CInt_fuzz *fuzz_from=0, const CInt_fuzz *fuzz_to=0, int frame=0, TSeqPos src_bioseq_len=kInvalidSeqPos)
const CSeq_align * GetAlign(void) const
Get seq-align object or null.
bool x_ReverseRangeOrder(int str) const
TRangeFuzz Map_Fuzz(const TRangeFuzz &fuzz) const
Map fuzz if one is set in the original location.
ESeqType x_ForceSeqTypes(const CSeq_loc &loc) const
void SetFeat(const CSeq_feat &feat)
Set seq-feat object (copy into the message).
void SetReverseDst(bool value=true)
bool Map_Strand(bool is_set_strand, ENa_strand src, ENa_strand *dst) const
Map the strand, return true if the destination strand should be set (even if it's eNa_strand_unknown ...
void x_PushSourceRange(const CSeq_id_Handle &idh, size_t src_strand, size_t dst_strand, const TRange &range, bool push_reverse)
void x_IterateExonParts(const CSpliced_exon::TParts &parts, ESplicedRow to_row, const CSeq_id &gen_id, TSeqPos &gen_start, TSeqPos &gen_len, ENa_strand gen_strand, const CSeq_id &prod_id, TSeqPos &prod_start, TSeqPos &prod_len, ENa_strand prod_strand)
void SetAlign(const CSeq_align &align)
Set seq-align object (copy into the message).
EObjectType Which(void) const
Check type of the object stored in the message.
void x_InitSpliced(const CSpliced_seg &spliced, const TSynonyms &to_ids)
list< TRange > TDstRanges
virtual void Write(CNcbiOstream &out) const
Print the message and any additional information to the stream.
bool GetAlign_Dense_seg_TotalRange(void) const
Dense-seg mapping option.
TSeqPos Map_Pos(TSeqPos pos) const
Map a single point.
void x_InitAlign(const CDense_diag &diag, size_t to_row, size_t from_row)
CMappingRange::TRange TRange
CRef< CSeq_loc > x_GetMappedSeq_loc(void)
TSeqPos GetOffset(void) const
void x_SetLastTruncated(void)
void x_PushLocToDstMix(CRef< CSeq_loc > loc)
void x_PushRangesToDstMix(void)
bool GetReverseSrc(void) const
CRef< CSeq_loc > m_Dst_loc
void IncOffset(TSeqPos inc)
const CSeq_feat * GetFeat(void) const
Get seq-feat object or null.
void x_InitializeLocs(const CSeq_loc &source, const CSeq_loc &target, int src_frame=0, int dst_frame=0)
bool x_IsSetMiscFlag(EMiscFlags flag) const
static TSeqPos sx_GetExonPartLength(const CSpliced_exon_chunk &part)
virtual CSeq_loc_Mapper_Message * Clone(void) const
Create a copy of the message.
void x_Map_PackedInt_Element(const CSeq_interval &si)
bool GetReverseDst(void) const
void SetLoc(const CSeq_loc &loc)
Set seq-loc object (copy into the message).
void x_MapSeq_loc(const CSeq_loc &src_loc)
CRef< CSeq_loc > m_SrcLocs
void x_PreserveDestinationLocs(void)
CMappingRange::TRange TRange
vector< TRange > TGraphRanges
CRef< CSeq_align > x_MapSeq_align(const CSeq_align &src_align, size_t *row)
virtual TSeqType GetSequenceType(const CSeq_id_Handle &idh)=0
Get information about sequence type (nuc or prot).
bool x_IsSynonym(const CSeq_id &id, const TSynonyms &synonyms) const
CRef< CMappingRanges > m_Mappings
void x_OptimizeSeq_loc(CRef< CSeq_loc > &loc) const
const CSeq_id_Handle & x_GetPrimaryId(const CSeq_id_Handle &synonym) const
TMappedRanges & x_GetMappedRanges(const CSeq_id_Handle &id, size_t strand_idx) const
const TGraphRanges & GetRanges(void) const
CInt_fuzz::ELim x_ReverseFuzzLim(CInt_fuzz::ELim lim) const
ESeqType GetSeqType(const CSeq_id_Handle &idh) const
TRangeIterator BeginMappingRanges(CSeq_id_Handle id, TSeqPos from, TSeqPos to) const
Get mapping ranges iterator for the given seq-id and range.
CMappingRanges::TSortedMappings TSortedMappings
CSeq_id_Handle m_Src_id_Handle
CConstRef< CSeq_loc > x_FixNonsenseFuzz(CConstRef< CSeq_loc > loc_piece) const
bool x_MapNextRange(const TRange &src_rg, bool is_set_strand, ENa_strand src_strand, const TRangeFuzz &src_fuzz, TSortedMappings &mappings, size_t cvt_idx, TSeqPos *last_src_to)
void SetGraph(const CSeq_graph &graph)
Set seq-graph object (copy into the message).
friend class CSeq_align_Mapper_Base
bool GetAlign_Sparse_ToSecond(void) const
pair< TFuzz, TFuzz > TRangeFuzz
void SetFuzzOption(TFuzzOption newOption)
virtual ~CSeq_loc_Mapper_Message(void)
void x_AddConversion(const CSeq_id &src_id, TSeqPos src_start, ENa_strand src_strand, const CSeq_id &dst_id, TSeqPos dst_start, ENa_strand dst_strand, TSeqPos length, bool ext_right, int frame, TSeqPos src_bioseq_len, TSeqPos dst_length)
static bool GetNonMappingAsNull(void)
void x_AdjustSeqTypesToProt(const CSeq_id_Handle &idh)
virtual CSeq_align_Mapper_Base * InitAlignMapper(const CSeq_align &src_align)
vector< TDstIdMap > TDstStrandMap
bool x_MapInterval(const CSeq_id &src_id, TRange src_rg, bool is_set_strand, ENa_strand src_strand, TRangeFuzz orig_fuzz)
~CSeq_loc_Mapper_Base(void)
CSeq_loc_Mapper_Options & SetAlign_Sparse_ToSecond(bool value=true)
void SetOffset(TSeqPos offset)
@ eMapped_All
All annotations were mapped, none was removed.
@ eMapped_None
No annotation was mapped, the input seq-annot is unchanged.
@ eMapped_Some
Some (not all) annotations were mapped.
@ fAnnotMap_RemoveNonMapping
Remove annotations which can not be mapped with this mapper.
@ fAnnotMap_ThrowOnFailure
Throw exception if an annotation can not be mapped.
@ fAnnotMap_Location
Map seq-feat locations.
@ fAnnotMap_Product
Map seq-feat products.
@ fFuzzOption_RemoveLimTlOrTr
@ eLocationToProduct
Map from the feature's location to product.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
TObjectType * GetPointerOrNull(void) THROWS_NONE
Get pointer value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
@ eParam_NoThread
Do not use per-thread values.
position_type GetLength(void) const
TParent::value_type value_type
position_type GetTo(void) const
position_type GetToOpen(void) const
position_type GetFrom(void) const
static TThisType GetEmpty(void)
TThisType & SetLength(position_type length)
TThisType & Set(position_type from, position_type to)
TThisType & SetOpen(position_type from, position_type toOpen)
static TThisType GetWhole(void)
static position_type GetWholeTo(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
bool IsLim(void) const
Check if variant Lim is selected.
TRange & SetRange(void)
Select the variant.
TAlt & SetAlt(void)
Select the variant.
TLim GetLim(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
TLim & SetLim(void)
Select the variant.
const TAlt & GetAlt(void) const
Get the variant data.
bool IsRange(void) const
Check if variant Range is selected.
const TRange & GetRange(void) const
Get the variant data.
@ eLim_tl
space to left of position
@ eLim_tr
space to right of position
@ e_Alt
set of alternatives for the integer
bool IsSetProduct_strand(void) const
should be 'plus' or 'minus' Check if a value has been assigned to Product_strand data member.
const TDenseg & GetDenseg(void) const
Get the variant data.
bool IsProtpos(void) const
Check if variant Protpos is selected.
vector< CRef< CSparse_align > > TRows
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member.
const TLoc & GetLoc(void) const
Get the Loc member data.
TMatch GetMatch(void) const
Get the variant data.
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
bool IsSetProduct_strand(void) const
should be 'plus' or 'minus' Check if a value has been assigned to Product_strand data member.
const TStarts & GetStarts(void) const
Get the Starts member data.
const TStarts & GetStarts(void) const
Get the Starts member data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
bool IsMismatch(void) const
Check if variant Mismatch is selected.
static string SelectionName(E_Choice index)
Retrieve selection name (for diagnostic purposes).
const TLens & GetLens(void) const
Get the Lens member data.
const TIds & GetIds(void) const
Get the Ids member data.
TDiag GetDiag(void) const
Get the variant data.
TNumseg GetNumseg(void) const
Get the Numseg member data.
TLen GetLen(void) const
Get the Len member data.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
bool IsSetGenomic_strand(void) const
genomic-strand represents the strand of translation Check if a value has been assigned to Genomic_str...
bool IsSetProduct_id(void) const
product is either protein or transcript (cDNA) Check if a value has been assigned to Product_id data ...
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
TDim GetDim(void) const
Get the Dim member data.
const TIds & GetIds(void) const
Get the Ids member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
bool IsSetProduct_id(void) const
product is either protein or transcript (cDNA) Check if a value has been assigned to Product_id data ...
const TSpliced & GetSpliced(void) const
Get the variant data.
bool IsSetIds(void) const
Check if a value has been assigned to Ids data member.
TDim GetDim(void) const
Get the Dim member data.
const TLens & GetLens(void) const
Get the Lens member data.
bool IsGenomic_ins(void) const
Check if variant Genomic_ins is selected.
const TPacked & GetPacked(void) const
Get the variant data.
bool IsMatch(void) const
Check if variant Match is selected.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
bool IsSetGenomic_strand(void) const
Check if a value has been assigned to Genomic_strand data member.
const TStd & GetStd(void) const
Get the variant data.
const TIds & GetIds(void) const
Get the Ids member data.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
vector< int > TSecond_starts
TDim GetDim(void) const
Get the Dim member data.
const TDendiag & GetDendiag(void) const
Get the variant data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
const TPresent & GetPresent(void) const
Get the Present member data.
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
bool IsDiag(void) const
Check if variant Diag is selected.
const TStrands & GetStrands(void) const
Get the Strands member data.
const TIds & GetIds(void) const
Get the Ids member data.
const TStrands & GetStrands(void) const
Get the Strands member data.
list< CRef< CSpliced_exon_chunk > > TParts
vector< int > TFirst_starts
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
const TStarts & GetStarts(void) const
Get the Starts member data.
TNumseg GetNumseg(void) const
Get the Numseg member data.
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
list< CRef< CSeq_align > > Tdata
const TSparse & GetSparse(void) const
Get the variant data.
bool IsSetGenomic_id(void) const
Check if a value has been assigned to Genomic_id data member.
bool IsProduct_ins(void) const
Check if variant Product_ins is selected.
TDim GetDim(void) const
Get the Dim member data.
const TRows & GetRows(void) const
Get the Rows member data.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
vector< ENa_strand > TSecond_strands
const TDisc & GetDisc(void) const
Get the variant data.
const TStrands & GetStrands(void) const
Get the Strands member data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool IsSetGenomic_id(void) const
Check if a value has been assigned to Genomic_id data member.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eProduct_type_transcript
E_Choice Which(void) const
Which variant is currently selected.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void SetProduct(TProduct &value)
Assign a value to Product data member.
const TLocation & GetLocation(void) const
Get the Location member data.
TFrame GetFrame(void) const
Get the Frame member data.
const TData & GetData(void) const
Get the Data member data.
const TCdregion & GetCdregion(void) const
Get the variant data.
const TProduct & GetProduct(void) const
Get the Product member data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
const TFuzz_from & GetFuzz_from(void) const
Get the Fuzz_from member data.
void SetA(TA &value)
Assign a value to A data member.
bool IsMix(void) const
Check if variant Mix is selected.
const TB & GetB(void) const
Get the B member data.
bool IsEmpty(void) const
Check if variant Empty is selected.
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
const Tdata & Get(void) const
Get the member data.
TStrand GetStrand(void) const
Get the Strand member data.
const TId & GetId(void) const
Get the Id member data.
const TPnt & GetPnt(void) const
Get the variant data.
TPoint GetPoint(void) const
Get the Point member data.
const TFuzz_to & GetFuzz_to(void) const
Get the Fuzz_to member data.
const TWhole & GetWhole(void) const
Get the variant data.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
list< CRef< CSeq_loc > > Tdata
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
E_Choice Which(void) const
Which variant is currently selected.
const TId & GetId(void) const
Get the Id member data.
const TId & GetId(void) const
Get the Id member data.
TStrand GetStrand(void) const
Get the Strand member data.
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
const Tdata & Get(void) const
Get the member data.
const TPacked_pnt & GetPacked_pnt(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TEquiv & GetEquiv(void) const
Get the variant data.
list< CRef< CSeq_loc > > Tdata
vector< TSeqPos > TPoints
const TA & GetA(void) const
Get the A member data.
const TEmpty & GetEmpty(void) const
Get the variant data.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
Tdata & Set(void)
Assign a value to data member.
bool IsSetFuzz_to(void) const
Check if a value has been assigned to Fuzz_to data member.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
const TPoints & GetPoints(void) const
Get the Points member data.
TStrand GetStrand(void) const
Get the Strand member data.
TTo GetTo(void) const
Get the To member data.
const TInt & GetInt(void) const
Get the variant data.
bool IsNull(void) const
Check if variant Null is selected.
void SetB(TB &value)
Assign a value to B data member.
bool IsSetFuzz_from(void) const
Check if a value has been assigned to Fuzz_from data member.
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
const TMix & GetMix(void) const
Get the variant data.
bool IsPnt(void) const
Check if variant Pnt is selected.
bool IsSetB(void) const
other end may not be available Check if a value has been assigned to B data member.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
const TBond & GetBond(void) const
Get the variant data.
@ e_not_set
No variant selected.
@ e_Equiv
equivalent sets of locations
@ e_Empty
to NULL one Seq-id in a collection
@ e_Feat
indirect, through a Seq-feat
bool IsSetComp(void) const
compression (residues/value) Check if a value has been assigned to Comp data member.
void SetMin(TMin value)
Assign a value to Min data member.
void SetAxis(TAxis value)
Assign a value to Axis data member.
TValues & SetValues(void)
Assign a value to Values data member.
void SetAxis(TAxis value)
Assign a value to Axis data member.
TByte & SetByte(void)
Select the variant.
TMin GetMin(void) const
Get the Min member data.
const TInt & GetInt(void) const
Get the variant data.
void SetNumval(TNumval value)
Assign a value to Numval data member.
void SetComp(TComp value)
Assign a value to Comp data member.
TAxis GetAxis(void) const
Get the Axis member data.
TReal & SetReal(void)
Select the variant.
TInt & SetInt(void)
Select the variant.
const TGraph & GetGraph(void) const
Get the Graph member data.
TValues & SetValues(void)
Assign a value to Values data member.
const TValues & GetValues(void) const
Get the Values member data.
TMin GetMin(void) const
Get the Min member data.
TMax GetMax(void) const
Get the Max member data.
const TByte & GetByte(void) const
Get the variant data.
void SetGraph(TGraph &value)
Assign a value to Graph data member.
void SetMax(TMax value)
Assign a value to Max data member.
const TReal & GetReal(void) const
Get the variant data.
void SetMax(TMax value)
Assign a value to Max data member.
TAxis GetAxis(void) const
Get the Axis member data.
TMin GetMin(void) const
Get the Min member data.
TMax GetMax(void) const
Get the Max member data.
void SetMax(TMax value)
Assign a value to Max data member.
const TValues & GetValues(void) const
Get the Values member data.
E_Choice Which(void) const
Which variant is currently selected.
const TValues & GetValues(void) const
Get the Values member data.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
const TLoc & GetLoc(void) const
Get the Loc member data.
void SetAxis(TAxis value)
Assign a value to Axis data member.
void Reset(void)
Reset the whole object.
void SetMin(TMin value)
Assign a value to Min data member.
TComp GetComp(void) const
Get the Comp member data.
TMax GetMax(void) const
Get the Max member data.
void SetMin(TMin value)
Assign a value to Min data member.
TValues & SetValues(void)
Assign a value to Values data member.
TAxis GetAxis(void) const
Get the Axis member data.
void SetData(TData &value)
Assign a value to Data data member.
list< CRef< CSeq_graph > > TGraph
list< CRef< CSeq_align > > TAlign
list< CRef< CSeq_feat > > TFtable
const TData & GetData(void) const
Get the Data member data.
E_Choice Which(void) const
Which variant is currently selected.
unsigned int
A callback function used to compare two keys in a database.
Definition of all error codes used in objects libraries.
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
NCBI_PARAM_DECL(bool, Mapper, NonMapping_As_Null)
#define INDEX_TO_STRAND(idx)
#define STRAND_TO_INDEX(is_set, strand)
const CMappingRange::TFuzz kEmptyFuzz(0)
typedef NCBI_PARAM_TYPE(Mapper, NonMapping_As_Null) TNonMappingAsNullParam
ENa_strand s_IndexToStrand(size_t idx)
void CopyGraphData(const TData &src, TData &dst, TSeqPos from, TSeqPos to)
NCBI_PARAM_DEF_EX(bool, Mapper, NonMapping_As_Null, false, eParam_NoThread, MAPPER_NONMAPPING_AS_NULL)
#define row(bind, expected)