77 # define ASSERT_CONSISTENCY() \
79 TNumseg numseg = GetNumseg(); \
80 TDim dim = GetDim(); \
81 _ASSERT(numseg >= 0); \
83 _ASSERT(size_t(numseg) == GetLens().size()); \
84 _ASSERT(size_t(numseg) * dim == GetStarts().size()); \
85 _ASSERT(!IsSetStrands() || GetStarts().size() == GetStrands().size()); \
86 _ASSERT(size_t(dim) == GetIds().size()); \
89 # define ASSERT_CONSISTENCY() NCBI_EAT_SEMICOLON()
97 "CDense_seg::CheckNumRows(): "
102 "CDense_seg::CheckNumRows(): "
103 "ids.size is inconsistent with dim");
118 const size_t num =
static_cast<size_t>(numrows) * numsegs;
122 "CDense_seg::CheckNumSegs(): "
127 "CDense_seg::CheckNumSegs(): "
130 if (starts.size() != num) {
132 "CDense_seg::CheckNumSegs(): "
133 "starts.size is inconsistent with dim * numseg");
135 if (lens.size() !=
size_t(numsegs)) {
137 "CDense_seg::CheckNumSegs(): "
138 "lens.size is inconsistent with numseg");
140 if (strands.size() && strands.size() != num) {
142 "CDense_seg::CheckNumSegs(): "
143 "strands.size is inconsistent with dim * numseg");
145 if (widths.size() && widths.size() !=
size_t(numrows)) {
147 "CDense_seg::CheckNumSegs(): "
148 "widths.size is inconsistent with dim");
160 "CDense_seg::GetSeq_id(): "
161 "can not get seq-id for the row requested.");
171 if (row < 0 || row >= dim) {
173 "CDense_seg::GetSeqStart(): "
174 "Invalid row number");
180 int pos = (seg - 1) * dim +
row;
182 if ((start = starts[pos]) >= 0) {
190 while (++seg < numseg) {
191 if ((start = starts[pos]) >= 0) {
198 "CDense_seg::GetSeqStart(): "
209 if (row < 0 || row >= dim) {
211 "CDense_seg::GetSeqStop(): "
212 "Invalid row number");
219 while (++seg < numseg) {
220 if ((start = starts[pos]) >= 0) {
221 return start +
GetLens()[seg] - 1;
227 int pos = (seg - 1) * dim +
row;
229 if ((start = starts[pos]) >= 0) {
230 return start +
GetLens()[seg] - 1;
236 "CDense_seg::GetSeqStop(): "
243 const size_t& strands_size =
GetStrands().size();
245 if ( !strands_size ) {
250 if (strands_size < (
size_t) dim) {
259 "CDense_seg::GetSeqStrand(): "
260 "Invalid strands size");
263 if (row < 0 || row >= dim) {
265 "CDense_seg::GetSeqStrand(): "
266 "Invalid row number");
289 const size_t max = numrows * (numsegs -1);
291 bool strands_exist = !strands.empty();
293 size_t numseg = 0, numrow = 0,
offset = 0;
294 for (numrow = 0; numrow < numrows; numrow++) {
296 bool plus = strands_exist ?
306 for (numseg = 0; numseg < numsegs; numseg++) {
307 start = starts[
offset + numrow];
309 if (start < min_start) {
310 string errstr =
string(
"CDense_seg::Validate():")
311 +
" Starts are not consistent!"
314 numsegs - 1 - numseg) +
322 lens[
plus ? numseg : numsegs - 1 - numseg] *
323 (widths.size() == numrows ?
332 if (min_start == -1) {
333 string errstr =
string(
"CDense_seg::Validate():")
348 list<TSignedSeqRange> delete_ranges;
355 for (j = 0; j <
GetDim(); ++j) {
357 if (this_start == -1) {
362 if (
GetDim() - count_gaps > 1) {
379 for (j = 0; j <
GetDim(); ++j) {
381 if (this_start == -1) {
386 if (
GetDim() - count_gaps > 1) {
396 list<TSignedSeqRange>::reverse_iterator iter = delete_ranges.rbegin();
397 list<TSignedSeqRange>::reverse_iterator end = delete_ranges.rend();
398 for ( ; iter != end; ++iter) {
400 if (
r.GetLength() == 0) {
438 vector<bool> can_merge(
GetNumseg() - 1,
true);
439 unsigned int merge_count = 0;
442 for (j = 0; j <
GetDim(); ++j) {
447 if ( (this_start == -1 && next_start != -1)
448 || (this_start != -1 && next_start == -1) ) {
449 can_merge[
i] =
false;
455 if (this_start != -1 && next_start != -1) {
463 if (this_start + seg_len != next_start) {
464 can_merge[
i] =
false;
475 can_merge[
i] =
false;
485 if (merge_count == 0) {
494 new_lens.reserve(
GetNumseg() - merge_count);
499 if (
i > 0 && can_merge[
i - 1]) {
503 for (j = 0; j <
GetDim(); ++j) {
505 new_starts[new_starts.size() -
GetDim() + j] =
513 for (j = 0; j <
GetDim(); ++j) {
536 #define IDX(_x,_y) (((_x)*GetDim())+(_y))
537 bool swaps_made =
false;
542 bool curr_gap =
false, next_gap =
false;
543 int curr_seq_row =
GetDim()+1, next_seq_row =
GetDim()+1;
544 for (
int j=0; j <
GetDim(); ++j) {
548 curr_seq_row =
min(curr_seq_row, j);
552 next_seq_row =
min(next_seq_row, j);
554 if(!(curr_gap & next_gap))
560 if(next_seq_row < curr_seq_row) {
561 for(
int j=0; j <
GetDim(); ++j) {
584 vector<bool>
remove(numseg,
true);
585 unsigned int remove_count = 0;
586 for (
i = 0;
i < numseg; ++
i) {
588 for (j = 0; j < dim; ++j) {
600 if (remove_count == 0) {
608 new_starts.reserve((numseg - remove_count) * dim);
609 new_lens.reserve(numseg - remove_count);
611 new_strands.reserve((numseg - remove_count) * dim);
613 for (
i = 0;
i < numseg; ++
i) {
617 for (j = 0; j < dim; ++j) {
618 new_starts.push_back(
GetStarts()[
i * dim + j]);
663 CDense_seg::TLens::iterator
f =
SetLens().begin();
664 CDense_seg::TLens::iterator
r =
SetLens().end();
688 if (row1 >=
GetDim() || row1 < 0 ||
689 row2 >=
GetDim() || row2 < 0) {
691 "Row numbers supplied to CDense_seg::SwapRows must be "
692 "in the range [0, dim)");
701 for(
int i = 0;
i < idxStop;
i +=
GetDim()) {
707 for(
int i = 0;
i < idxStop;
i +=
GetDim()) {
723 for (seg = 0; seg <
GetNumseg() && !found; ++seg) {
727 if (pos >= start && pos < start +
len) {
734 "CDense_seg::x_FindSegment(): "
735 "Can't find a segment containing position " +
749 if (row < 0 || row >=
GetDim()) {
751 "CDense_seg::ExtractSlice():"
752 " Invalid row number ("
761 "CDense_seg::ExtractSlice(): "
763 ") off end of alignment");
767 "CDense_seg::ExtractSlice(): "
769 ") off end of alignment");
790 swap(startOffset, stopOffset);
791 swap(startSeg, stopSeg);
801 start += startOffset;
813 if (seg == startSeg) {
816 if (seg == stopSeg) {
844 new_ds->
SetDim(
static_cast<TDim>(rows.size()));
848 new_ds->
SetIds().reserve(rows.size());
857 if (*row < 0 || *row >= dim) {
859 "CDense_seg::ExtractRows():"
860 " Invalid row number ("
866 new_ds->
SetIds().push_back(id_copy);
868 for (
TNumseg segnum = 0; segnum < numseg; ++segnum) {
871 int idx = segnum * dim + *
row;
904 "Negative offset greater than seq position");
936 }
while (++seq_loc_i);
940 if (ttl_loc_len < row_stop + 1) {
941 string errstr(
"CDense_seg::RemapToLoc():"
942 " Seq-loc is not long enough to"
943 " cover the alignment!"
944 " Maximum row seq pos is ");
946 errstr +=
". The total seq-loc len is only ";
948 errstr +=
", it should be at least ";
950 errstr +=
" (= max seq pos + 1).";
972 size_t idx = loc_plus ?
row : (numsegs - 1) * numrows +
row;
973 TNumseg seg = loc_plus ? 0 : numsegs - 1;
974 while (loc_plus ? seg <
GetNumseg() : seg >= 0) {
975 if (starts[idx] == -1) {
978 idx += numrows; seg++;
980 idx -= numrows; seg--;
986 if ((loc_plus == row_plus ?
987 starts[idx] : ttl_loc_len - starts[idx] - lens[seg])
988 > len_so_far + loc_len) {
996 "CDense_seg::RemapToLoc():"
1003 "CDense_seg::RemapToLoc():"
1004 " The strand should be the same accross"
1005 " the input seq-loc");
1010 if (loc_plus == row_plus) {
1014 start - len_so_far + ttl_loc_len - starts[idx] - lens[seg];
1017 if (lens[seg] >
len) {
1026 "CDense_seg::RemapToLoc():"
1032 (loc_plus ? seg : seg + 1),
1034 SetLens()[loc_plus ? seg + 1 : seg] = len_diff;
1037 TStarts temp_starts(numrows, -1);
1038 for (
int row_i = 0, tmp_idx = seg * numrows;
1039 row_i < numrows; ++row_i, ++tmp_idx) {
1041 if (this_start != -1) {
1042 temp_starts[row_i] = this_start;
1045 temp_starts[row_i] = start;
1047 temp_starts[row_i] +=
len;
1050 this_start += len_diff;
1055 len_so_far += loc_len;
1059 (loc_plus ? seg + 1 : seg) * numrows,
1060 temp_starts.begin(), temp_starts.end());
1062 if (strands.size()) {
1065 strands.begin(), strands.begin() + numrows);
1070 if ((len_diff = lens[seg] -
len) > 0) {
1072 idx += numrows; seg++;
1074 idx -= numrows; seg--;
1085 idx += numrows; seg++;
1087 idx -= numrows; seg--;
1092 if ( !ignore_strand ) {
1093 if (loc_plus != row_plus) {
1094 if (!strands.size()) {
1098 for (seg = 0, idx =
row;
1099 seg <
GetNumseg(); seg++, idx += numrows) {
1119 bool strands_exist = !strands.empty();
1130 vector<TNumseg> extra_segs;
1143 new_numsegs = numsegs;
1146 new_ids.resize(numrows);
1156 vector<TSignedSeqPos> expected_positions;
1157 expected_positions.resize(numrows, -1);
1160 plus.resize(numrows,
true);
1161 if (strands_exist) {
1170 TNumseg idx = 0, new_idx = 0, extra_idx = 0;
1173 for (seg = 0; seg < numsegs; seg++) {
1183 if (expected_pos >= 0) {
1187 extra_len = start - expected_pos;
1189 extra_len = expected_pos - start -
len;
1192 if (extra_len < 0) {
1193 string errstr(
"CDense_seg::AddUnalignedSegments():"
1194 " Illegal overlap at Row ");
1196 errstr +=
" Segment ";
1201 }
else if (extra_len > 0) {
1203 extra_segs.push_back(seg);
1204 extra_lens.push_back(extra_len);
1205 extra_starts.resize(extra_idx + numrows, -1);
1206 extra_starts[extra_idx +
row] =
1209 extra_idx += numrows;
1216 expected_pos = start +
len;
1218 expected_pos = start;
1225 new_numsegs = numsegs + extra_numsegs;
1226 new_lens.resize(numsegs + extra_numsegs);
1227 new_starts.resize(numrows * new_numsegs);
1228 for (seg = 0, new_seg = 0, extra_seg = 0,
1229 idx = 0, new_idx = 0, extra_idx = 0;
1234 if (extra_numsegs) {
1235 for ( ; size_t(extra_seg) < extra_segs.size() && extra_segs[extra_seg] == seg; ++new_seg, ++extra_seg) {
1236 new_lens[new_seg] = extra_lens[extra_seg];
1237 for (
row = 0;
row < numrows; ++
row, ++new_idx, ++extra_idx) {
1238 new_starts[new_idx] = extra_starts[extra_idx];
1244 new_lens[new_seg] = lens[seg];
1246 new_starts[new_idx++] = starts[idx++];
1253 if (strands_exist) {
1255 for (new_seg = 0; new_seg < new_numsegs; new_seg++) {
1256 for (
row = 0;
row < numrows;
row++, new_idx++) {
1275 const string& transcript )
1278 bool query_strand_specific =
1280 bool subj_strand_specific =
1283 if(!query_strand_specific || !subj_strand_specific) {
1303 string::const_iterator ib = transcript.begin();
1304 string::const_iterator ie = transcript.end();
1305 string::const_iterator ii = ib;
1306 unsigned char seg_type;
1307 static const char* badsymerr =
"Unknown or unsupported transcript symbol";
1308 char c = ii != ie ? *ii++ : 0;
1309 if(c ==
'M' || c ==
'R') {
1314 else if (c ==
'I') {
1318 else if (c ==
'D') {
1325 "Empty transcript");
1335 if(
isalpha((
unsigned char) c)) {
1337 if(seg_type == 0 && (c ==
'M' || c ==
'R')) {
1341 else if(seg_type == 1 && c ==
'I') {
1344 else if(seg_type == 2 && c ==
'D') {
1352 starts.push_back(seg_type == 1? (
TSeqPos)-1: query_start + query_close);
1353 strands.push_back(query_strand);
1357 starts.push_back(seg_type == 2? (
TSeqPos)-1: subj_start + subj_close);
1358 strands.push_back(subj_strand);
1361 case 0: seg_len = pos1 - start1;
break;
1362 case 1: seg_len = pos2 - start2;
break;
1363 case 2: seg_len = pos1 - start1;
break;
1365 lens.push_back(seg_len);
1372 if(c ==
'M' || c ==
'R'){
1377 else if (c ==
'I') {
1381 else if (c ==
'D') {
1395 if(!
isdigit((
unsigned char) c)) {
1398 "Alignment transcript corrupt");
1402 while(ii < ie &&
isdigit((
unsigned char)(*ii))) {
1403 len = 10*
len + *ii -
'0';
1408 case 0: pos1 +=
len; pos2 +=
len;
break;
1409 case 1: pos2 +=
len;
break;
1410 case 2: pos1 +=
len;
break;
1416 starts.push_back(seg_type == 1? (
TSeqPos)-1: query_start + query_close);
1417 strands.push_back(query_strand);
1420 starts.push_back(seg_type == 2? (
TSeqPos)-1: subj_start + subj_close);
1421 strands.push_back(subj_strand);
1425 case 0: seg_len = pos1 - start1;
break;
1426 case 1: seg_len = pos2 - start2;
break;
1427 case 2: seg_len = pos1 - start1;
break;
1429 lens.push_back(seg_len);
1440 "Invalid row number in CreateRowSeq_interval(): " +
1449 for (
int seg = 0; seg <
GetNumseg(); seg++) {
1459 if (start +
len > to) {
1471 "Can not convert row to seq-interval - invalid from/to value");
1477 if (plus_len >= minus_len*2) {
1481 else if (plus_len*2 < minus_len) {
1522 catch ( bad_alloc& ) {
1558 catch ( bad_alloc& ) {
1561 DefaultRead(
in, member);
1568 return type.FindMember(
"starts");
1575 x_GetMember().SetLocalReadHook(
in, hook);
1582 x_GetMember().SetGlobalReadHook(hook);
1595 catch ( bad_alloc& ) {
1598 DefaultRead(
in, member);
1605 return type.FindMember(
"lens");
1612 x_GetMember().SetLocalReadHook(
in, hook);
1619 x_GetMember().SetGlobalReadHook(hook);
1632 catch ( bad_alloc& ) {
1635 DefaultRead(
in, member);
1642 return type.FindMember(
"strands");
1649 x_GetMember().SetLocalReadHook(
in, hook);
1656 x_GetMember().SetGlobalReadHook(hook);
#define ASSERT_CONSISTENCY()
NCBI_PARAM_DECL(bool, OBJECTS, DENSE_SEG_RESERVE)
NCBI_PARAM_DEF_EX(bool, OBJECTS, DENSE_SEG_RESERVE, true, eParam_NoThread, OBJECTS_DENSE_SEG_RESERVE)
bool IsReverse(ENa_strand s)
static char * s_Reserve(size_t size, CSimpleBufferT< char > &buffer)
static void SetHook(CObjectIStream &in)
static void SetGlobalHook(void)
static CObjectTypeInfoMI x_GetMember(void)
void ReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
This method will be called at approriate time when the object of requested type is to be read.
void ReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
This method will be called at approriate time when the object of requested type is to be read.
static void SetHook(CObjectIStream &in)
static void SetGlobalHook(void)
static CObjectTypeInfoMI x_GetMember(void)
static void SetHook(CObjectIStream &in)
void ReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
This method will be called at approriate time when the object of requested type is to be read.
static CObjectTypeInfoMI x_GetMember(void)
static void SetGlobalHook(void)
virtual void PreReadClassMember(CObjectIStream &in, const CObjectInfoMI &member)
Return true to invoke default reading method afterwards.
void TrimEndGaps()
Trim leading/training gaps if possible.
CRef< CDense_seg > ExtractRows(const vector< TDim > &rows) const
Extract specified rows of the alignment, in specified order.
const CSeq_id & GetSeq_id(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
CRef< CSeq_interval > CreateRowSeq_interval(TDim row) const
TSeqPos GetSeqStop(TDim row) const
void Reverse(void)
Reverse the segments' orientation.
static void SetGlobalReserveHooks(void)
void OffsetRow(TDim row, TSignedSeqPos offset)
Offset row's coords.
void RemovePureGapSegs()
Remove any segments in which every row has a gap (these can arise when ExtractRows is used)
TNumseg CheckNumSegs(void) const
void SwapRows(TDim row1, TDim row2)
Swap two rows (changing *order*, not content)
TSeqPos GetSeqStart(TDim row) const
void OrderAdjacentGaps()
Order adjacent gaps, so that the side with sequence is in row-decending order.
void FromTranscript(TSeqPos query_start, ENa_strand query_strand, TSeqPos subj_start, ENa_strand subj_strand, const string &transcript)
Initialize from pairwise alignment transcript (a string representation produced by CNWAligner)
CRef< CDense_seg > ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const
Extract a slice of the alignment that includes the specified range.
TDim CheckNumRows(void) const
TNumseg x_FindSegment(TDim row, TSignedSeqPos pos) const
CRef< CDense_seg > FillUnaligned() const
Create a new dense-seg with added all unaligned pieces (implicit inserts), if any,...
void Compact()
Join adjacent mergeable segments to create a more compact alignment.
void Validate(bool full_test=false) const
void RemapToLoc(TDim row, const CSeq_loc &loc, bool ignore_strand=false)
static void SetReserveHooks(CObjectIStream &in)
void Assign(const CSerialObject &obj, ESerialRecursionMode how=eRecursive)
overloaded Assign()
const TWidths & GetWidths(void) const
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Base class for all serializable objects.
static const char si[8][64]
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
ESerialRecursionMode
How to assign and compare child sub-objects of serial objects.
C & SerialAssign(C &dest, const C &src, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
TRange GetRange(void) const
Get the range.
ENa_strand GetStrand(void) const
static C * Get(const CTypesIterator &it)
TMemberIndex GetMemberIndex(void) const
Get index of the member in the class.
const CObjectInfo & GetClassObject(void) const
Get containing class data.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
@ eParam_NoThread
Do not use per-thread values.
position_type GetLength(void) const
CRange< TSignedSeqPos > TSignedSeqRange
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetLens(void) const
lengths in ids order within segs Check if a value has been assigned to Lens data member.
TLens & SetLens(void)
Assign a value to Lens data member.
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
const TStarts & GetStarts(void) const
Get the Starts member data.
vector< ENa_strand > TStrands
TDim & SetDim(void)
Assign a value to Dim data member.
const TLens & GetLens(void) const
Get the Lens member data.
vector< TSignedSeqPos > TStarts
void SetDim(TDim value)
Assign a value to Dim data member.
vector< CRef< CSeq_id > > TIds
TDim GetDim(void) const
Get the Dim member data.
TNumseg & SetNumseg(void)
Assign a value to Numseg data member.
TStarts & SetStarts(void)
Assign a value to Starts data member.
TStrands & SetStrands(void)
Assign a value to Strands data member.
bool IsSetStarts(void) const
start OFFSETS in ids order within segs Check if a value has been assigned to Starts data member.
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
const TIds & GetIds(void) const
Get the Ids member data.
bool CanGetStrands(void) const
Check if it is safe to call GetStrands method.
TNumseg GetNumseg(void) const
Get the Numseg member data.
TIds & SetIds(void)
Assign a value to Ids data member.
const TStrands & GetStrands(void) const
Get the Strands member data.
bool IsSetIds(void) const
sequences in order Check if a value has been assigned to Ids data member.
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsWhole(void) const
Check if variant Whole is selected.
void SetStrand(TStrand value)
Assign a value to Strand data member.
@ eNa_strand_both
in forward orientation
const struct ncbi::grid::netcache::search::fields::SIZE size
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
#define row(bind, expected)