NCBI C++ ToolKit
alnmix.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: alnmix.cpp 77169 2017-03-30 17:34:38Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Kamen Todorov, NCBI
27 *
28 * File Description:
29 * Alignment mix
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
39 
41 #include <objects/seq/Bioseq.hpp>
43 
44 #include <serial/iterator.hpp>
45 
46 #include <algorithm>
47 
49 BEGIN_objects_SCOPE // namespace ncbi::objects::
50 
51 
53  : x_CalculateScore(0)
54 {
55  x_Init();
56 }
57 
58 
60  TCalcScoreMethod calc_score)
61  : m_Scope(&scope),
62  x_CalculateScore(calc_score)
63 {
64  if ( !x_CalculateScore ) {
66  }
67  x_Init();
68 }
69 
70 
72 {
73 }
74 
75 
76 void
78 {
80  new CAlnMixSequences() :
84 }
85 
86 
87 void
89 {
91 }
92 
93 
94 void
96 {
97  if (m_InputAlnsMap.find((void *)&aln) == m_InputAlnsMap.end()) {
98  // add only if not already added
99  m_InputAlnsMap[(void *)&aln] = &aln;
100  m_InputAlns.push_back(CConstRef<CSeq_align>(&aln));
101 
102  if (aln.GetSegs().IsDenseg()) {
103  Add(aln.GetSegs().GetDenseg(), flags);
104  } else if (aln.GetSegs().IsStd()) {
106  (m_Scope ? this : 0);
107  Add(*sa, flags);
108  } else if (aln.GetSegs().IsDisc()) {
110  aln_it,
111  aln.GetSegs().GetDisc().Get()) {
112  Add(**aln_it, flags);
113  }
114  }
115  }
116 }
117 
118 
119 void
121 {
122  const CDense_seg* dsp = &ds;
123 
124  if (m_InputDSsMap.find((void *)dsp) != m_InputDSsMap.end()) {
125  return; // it has already been added
126  }
127  x_Reset();
128 #if _DEBUG
129  dsp->Validate(true);
130 #endif
131 
132  // translate (extend with widths) the dense-seg if necessary
133  if (flags & fForceTranslation && !dsp->IsSetWidths()) {
134  if ( !m_Scope ) {
135  string errstr = string("CAlnMix::Add(): ")
136  + "Cannot force translation for Dense_seg "
137  + NStr::NumericToString(m_InputDSs.size() + 1) + ". "
138  + "Neither CDense_seg::m_Widths are supplied, "
139  + "nor OM is used to identify molecule type.";
140  NCBI_THROW(CAlnException, eMergeFailure, errstr);
141  } else {
142  m_InputDSs.push_back(x_ExtendDSWithWidths(*dsp));
143  dsp = m_InputDSs.back();
144  }
145  } else {
146  m_InputDSs.push_back(CConstRef<CDense_seg>(dsp));
147  }
148 
149  if (flags & fCalcScore) {
150  if ( !x_CalculateScore ) {
151  // provide the default calc method
153  }
154  }
155  if ( !m_Scope && x_CalculateScore) {
156  NCBI_THROW(CAlnException, eMergeFailure, "CAlnMix::Add(): "
157  "Score calculation requested without providing "
158  "a scope in the CAlnMix constructor.");
159  }
160  m_AddFlags = flags;
161 
162  m_InputDSsMap[(void *)dsp] = dsp;
163 
164  m_AlnMixSequences->Add(*dsp, flags);
165 
166  m_AlnMixMatches->Add(*dsp, flags);
167 }
168 
169 
172 {
173  if (ds.IsSetWidths()) {
174  NCBI_THROW(CAlnException, eMergeFailure,
175  "CAlnMix::x_ExtendDSWithWidths(): "
176  "Widths already exist for the input alignment");
177  }
178 
179  bool contains_AA = false, contains_NA = false;
180  CRef<CAlnMixSeq> aln_seq;
181  for (CDense_seg::TDim numrow = 0; numrow < ds.GetDim(); numrow++) {
182  m_AlnMixSequences->x_IdentifyAlnMixSeq(aln_seq, *ds.GetIds()[numrow]);
183  if (aln_seq->m_IsAA) {
184  contains_AA = true;
185  } else {
186  contains_NA = true;
187  }
188  }
189  if (contains_AA && contains_NA) {
190  NCBI_THROW(CAlnException, eMergeFailure,
191  "CAlnMix::x_ExtendDSWithWidths(): "
192  "Incorrect input Dense-seg: Contains both AAs and NAs but "
193  "widths do not exist!");
194  }
195 
196  CRef<CDense_seg> new_ds(new CDense_seg());
197 
198  // copy from the original
199  new_ds->Assign(ds);
200 
201  if (contains_NA) {
202  // fix the lengths
203  const CDense_seg::TLens& lens = ds.GetLens();
204  CDense_seg::TLens& new_lens = new_ds->SetLens();
205  for (CDense_seg::TNumseg numseg = 0; numseg < ds.GetNumseg(); numseg++) {
206  if (lens[numseg] % 3) {
207  string errstr =
208  string("CAlnMix::x_ExtendDSWithWidths(): ") +
209  "Length of segment " + NStr::IntToString(numseg) +
210  " is not divisible by 3.";
211  NCBI_THROW(CAlnException, eMergeFailure, errstr);
212  } else {
213  new_lens[numseg] = lens[numseg] / 3;
214  }
215  }
216  }
217 
218  // add the widths
219  CDense_seg::TWidths& new_widths = new_ds->SetWidths();
220  new_widths.resize(ds.GetDim(), contains_NA ? 3 : 1);
221 #if _DEBUG
222  new_ds->Validate(true);
223 #endif
224  return new_ds;
225 }
226 
227 
228 void
230 {
231  CRef<CAlnMixSeq> aln_seq1, aln_seq2;
232  m_AlnMixSequences->x_IdentifyAlnMixSeq(aln_seq1, id1);
233  m_AlnMixSequences->x_IdentifyAlnMixSeq(aln_seq2, id2);
234  if (aln_seq1->m_BioseqHandle != aln_seq2->m_BioseqHandle) {
235  string errstr =
236  string("CAlnMix::ChooseSeqId(CSeq_id& id1, const CSeq_id& id2):")
237  + " Seq-ids: " + id1.AsFastaString()
238  + " and " + id2.AsFastaString()
239  + " do not resolve to the same bioseq handle,"
240  " but are used on the same 'row' in different segments."
241  " This is legally allowed in a Std-seg, but conversion to"
242  " Dense-seg cannot be performed.";
243  NCBI_THROW(CAlnException, eInvalidSeqId, errstr);
244  }
245  CRef<CSeq_id> id1cref(&id1);
246  CRef<CSeq_id> id2cref(&(const_cast<CSeq_id&>(id2)));
247  if (CSeq_id::BestRank(id1cref) > CSeq_id::BestRank(id2cref)) {
248 #ifdef _DEBUG
249  if (id1.IsGi()) {
250  const CTextseq_id* txt_id = id2.GetTextseq_Id();
251  if (txt_id && !txt_id->IsSetVersion()) {
252  ERR_POST("Using version-less accession " << txt_id->GetAccession()
253  << " instead of GI " << id1.GetGi());
254  }
255  }
256 #endif
257  id1.Reset();
258  SerialAssign<CSeq_id>(id1, id2);
259  }
260 #ifdef _DEBUG
261  else if (id2.IsGi()) {
262  const CTextseq_id* txt_id = id1.GetTextseq_Id();
263  if (txt_id && !txt_id->IsSetVersion()) {
264  ERR_POST("Using version-less accession " << txt_id->GetAccession()
265  << " instead of GI " << id2.GetGi());
266  }
267  }
268 #endif
269 }
270 
271 
272 void
274 {
275  x_SetTaskName("Sorting");
276  if (flags & fSortSeqsByScore) {
277  if (flags & fSortInputByScore) {
279  } else {
281  }
282  }
283  if (flags & fSortInputByScore) {
285  } else {
287  }
288  x_SetTaskName("Merging");
291 }
292 
293 
294 const CDense_seg&
296 {
297  return m_AlnMixMerger->GetDenseg();
298 }
299 
300 
301 const CSeq_align&
303 {
304  return m_AlnMixMerger->GetSeqAlign();
305 }
306 
307 
308 END_objects_SCOPE // namespace ncbi::objects::
static CRef< CScope > m_Scope
void Add(const CDense_seg &ds, TAddFlags flags=0)
"Add" a Dense-seg to the existing matches.
Definition: alnmatch.cpp:97
void SortByChainScore()
Definition: alnmatch.cpp:90
void SortByScore()
Modifying algorithms.
Definition: alnmatch.cpp:83
void Merge(TMergeFlags flags=0)
Definition: alnmerger.cpp:85
const CSeq_align & GetSeqAlign(void) const
Definition: alnmerger.hpp:171
const CDense_seg & GetDenseg(void) const
Definition: alnmerger.hpp:159
void Reset()
Definition: alnmerger.cpp:65
bool m_IsAA
Definition: alnseq.hpp:153
const CBioseq_Handle * m_BioseqHandle
Definition: alnseq.hpp:148
void Add(const CDense_seg &ds, TAddFlags flags=0)
Definition: alnseq.cpp:105
void SortByScore()
Definition: alnseq.cpp:91
void SortByChainScore()
Definition: alnseq.cpp:98
void x_IdentifyAlnMixSeq(CRef< CAlnMixSeq > &aln_seq, const CSeq_id &seq_id)
Definition: alnseq.cpp:214
int TMergeFlags
Definition: alnmix.hpp:114
@ fCalcScore
Definition: alnmix.hpp:72
@ fForceTranslation
Definition: alnmix.hpp:77
void Add(const CDense_seg &ds, TAddFlags flags=0)
Definition: alnmix.cpp:120
TConstAlns m_InputAlns
Definition: alnmix.hpp:149
CRef< CAlnMixSequences > m_AlnMixSequences
Definition: alnmix.hpp:155
CRef< CAlnMixMerger > m_AlnMixMerger
Definition: alnmix.hpp:157
TCalcScoreMethod x_CalculateScore
Definition: alnmix.hpp:147
@ fSortSeqsByScore
Definition: alnmix.hpp:106
@ fSortInputByScore
Definition: alnmix.hpp:107
CAlnMixMatches::TCalcScoreMethod TCalcScoreMethod
Definition: alnmix.hpp:58
CRef< CAlnMixMatches > m_AlnMixMatches
Definition: alnmix.hpp:156
TAddFlags m_AddFlags
Definition: alnmix.hpp:153
CAlnMix(void)
Definition: alnmix.cpp:52
int TAddFlags
Definition: alnmix.hpp:82
virtual void ChooseSeqId(CSeq_id &id1, const CSeq_id &id2)
Definition: alnmix.cpp:229
TConstDSs m_InputDSs
Definition: alnmix.hpp:148
void Merge(TMergeFlags flags=0)
Definition: alnmix.cpp:273
TConstDSsMap m_InputDSsMap
Definition: alnmix.hpp:150
const CSeq_align & GetSeqAlign(void) const
Definition: alnmix.cpp:302
const CDense_seg & GetDenseg(void) const
Definition: alnmix.cpp:295
CRef< CDense_seg > x_ExtendDSWithWidths(const CDense_seg &ds)
Definition: alnmix.cpp:171
void x_Reset(void)
Definition: alnmix.cpp:88
void x_Init(void)
Definition: alnmix.cpp:77
~CAlnMix(void)
Definition: alnmix.cpp:71
CRef< CScope > m_Scope
Definition: alnmix.hpp:146
TConstAlnsMap m_InputAlnsMap
Definition: alnmix.hpp:151
int CalculateScore(TNumrow row1, TNumrow row2) const
Definition: alnvec.cpp:926
TWidths & SetWidths(void)
Definition: Dense_seg.hpp:217
bool IsSetWidths(void) const
Definition: Dense_seg.hpp:196
vector< int > TWidths
Definition: Dense_seg.hpp:73
void Validate(bool full_test=false) const
Definition: Dense_seg.cpp:274
void Assign(const CSerialObject &obj, ESerialRecursionMode how=eRecursive)
overloaded Assign()
Definition: Dense_seg.cpp:62
CScope –.
Definition: scope.hpp:92
CRef< CSeq_align > CreateDensegFromStdseg(SSeqIdChooser *SeqIdChooser=0) const
---------------------------------------------------------------------------- PRE : the Seq-align has ...
Definition: Seq_align.cpp:728
void x_SetTaskName(const string &name)
Methods for reporting task progress.
ITaskProgressCallback * x_GetTaskProgressCallback() const
Callback accessor.
void SetTaskProgressCallback(ITaskProgressCallback *callback)
Hook a callback to a task.
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
static uch flags
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
string
Definition: cgiapp.hpp:687
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:774
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
bool IsStd(void) const
Check if variant Std is selected.
Definition: Seq_align_.hpp:746
bool IsDisc(void) const
Check if variant Disc is selected.
Definition: Seq_align_.hpp:772
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Dense_seg_.hpp:505
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
list< CRef< CSeq_align > > Tdata
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
virtual void Reset(void)
Reset the whole object.
Definition: Seq_id_.cpp:56
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
const TAccession & GetAccession(void) const
Get the Accession member data.
Modified on Wed Apr 24 14:15:28 2024 by modify_doxy.py rev. 669887