NCBI C++ ToolKit
table_data_aln_summary.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: table_data_aln_summary.cpp 47464 2023-04-20 00:19:10Z evgeniev $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Liangshou Wu
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include <gui/objutils/label.hpp>
36 
39 #include <objmgr/util/sequence.hpp>
40 
43 
44 //#include <objmgr/util/sequence.hpp>
45 
48 
50 {
51  _ASSERT( !objects.empty() );
52  m_Scope = objects.front().scope;
54  const CObject& obj = *iter->object;
55  const type_info& type = typeid(obj);
56 
57  if (typeid(CSeq_align) == type) {
58  const CSeq_align& align = dynamic_cast<const CSeq_align&>(obj);
59  m_Aligns.push_back(CConstRef<CSeq_align>(&align));
60 
61  } else if (typeid(CSeq_align_set) == type) {
62  const CSeq_align_set& align_set = dynamic_cast<const CSeq_align_set&>(obj);
63  std::copy(align_set.Get().begin(), align_set.Get().end(), std::back_inserter(m_Aligns));
64 
65  } else if (typeid(CSeq_annot) == type) {
66  const CSeq_annot& annot = dynamic_cast<const CSeq_annot&>(obj);
67  if (annot.IsAlign()) {
68  const CSeq_annot::TData::TAlign& aligns = annot.GetData().GetAlign();
69  std::copy(aligns.begin(), aligns.end(), std::back_inserter(m_Aligns));
70  }
71  }
72  }
73 
74  int prev_total = 0;
75  ITERATE( vector< CConstRef<CSeq_align> >, aix, m_Aligns ){
76  int num_rows = (*aix)->CheckNumRows();
77  int total = prev_total + num_rows -1;
78  prev_total = total;
79 
80  m_RowsUpTo.push_back( total );
81  }
82 }
83 
85 {
86  // data is already loaded in the ctor, nothing else to do here
87 }
88 
90  ITableData::kString,// query accession
91  ITableData::kString,// subject accession
92  ITableData::kInt, // query start
93  ITableData::kInt, // query stop
94  ITableData::kString,// query strand
95  ITableData::kInt, // subject start
96  ITableData::kInt, // subject stop
97  ITableData::kString,// subject strand
98  ITableData::kReal, // percent identity
99  ITableData::kReal, // percent coverage
100  ITableData::kInt, // mumber of mismatches
101  ITableData::kInt, // number of gaps
102  ITableData::kInt, // number of gapped bases
103  ITableData::kInt, // alignment score
104  ITableData::kReal, // blast-style 'e_value'
105  ITableData::kString,// query defline
106  ITableData::kString // subject defline
107 };
108 
109 
111  "Query",
112  "Subject",
113  "Query Start",
114  "Query Stop",
115  "Query Strand",
116  "Subject Start",
117  "Subject Stop",
118  "Subject Strand",
119  "Identity",
120  "Coverage",
121  "Mismatches",
122  "Gaps",
123  "Gap Bases",
124  "Score",
125  "E-Value",
126  "Query Defline",
127  "Subject Defline"
128 };
129 
130 
132 {
134  return s_ColTypes[col];
135 
136  return ITableData::kNone;
137 }
138 
139 
140 string CTableDataAlnSummary::GetColumnLabel(size_t col) const
141 {
143  return s_ColNames[col];
144 
145  return kEmptyStr;
146 }
147 
148 
150 {
151  return m_RowsUpTo.empty() ? 0 : m_RowsUpTo[ m_RowsUpTo.size() -1 ];
152 }
153 
154 
156 {
157  return (size_t)CTableDataAlnSummary::eMaxColNum;
158 }
159 
160 
161 void CTableDataAlnSummary::GetStringValue(size_t row, size_t col, string& value) const
162 {
163  value.resize(0);
164  if( row >= GetRowsCount() ) return;
165 
166  const CAlnSummary& summary = x_GetAlnSummary(row);
167  EColType type = (EColType)(col);
168  switch (type) {
169  case eQuery:
170  value = summary.m_Query;
171  break;
172  case eSubject:
173  value = summary.m_Subject;
174  break;
175  case eQStrand:
176  value = x_StrandToStr(summary.m_QStrand);
177  break;
178  case eSStrand:
179  value = x_StrandToStr(summary.m_SStrand);
180  break;
181  case eQDefline:
182  value = summary.m_QDefline;
183  break;
184  case eSDefline:
185  value = summary.m_SDefline;
186  break;
187  default:
188  {{
190  if (type == ITableData::kInt)
192  else if (type == ITableData::kReal)
194  }}
195  break;
196  }
197 
198 }
199 
200 long CTableDataAlnSummary::GetIntValue(size_t row, size_t col) const
201 {
202  long value = 0;
203  if( row >= GetRowsCount() ) return value;
204 
205  const CAlnSummary& summary = x_GetAlnSummary(row);
206  EColType type = (EColType)(col);
207  switch (type) {
208  case eQStart:
209  value = (long)summary.m_QStart;
210  break;
211  case eQStop:
212  value = (long)summary.m_QStop;
213  break;
214  case eSStart:
215  value = (long)summary.m_SStart;
216  break;
217  case eSStop:
218  value = (long)summary.m_SStop;
219  break;
220  case eMismatches:
221  value = (long)summary.m_Mismatches;
222  break;
223  case eGaps:
224  value = (long)summary.m_Gaps;
225  break;
226  case eGappedBases:
227  value = (long)summary.m_GappedBases;
228  break;
229  case eScore:
230  value = (long)summary.m_Score;
231  break;
232  default:
233  break;
234  }
235 
236  return value;
237 }
238 
239 
240 double CTableDataAlnSummary::GetRealValue(size_t row, size_t col) const
241 {
242  double value = 0.0;
243  if( row >= GetRowsCount() ) return value;
244 
245  const CAlnSummary& summary = x_GetAlnSummary(row);
246  EColType type = (EColType)(col);
247  switch (type) {
248  case ePctIdentity:
249  value = summary.m_PctIdentity;
250  break;
251  case ePctCoverage:
252  value = summary.m_PctCoverage;
253  break;
254  case eEValue:
255  value = summary.m_EValue;
256  break;
257  default:
258  break;
259  }
260 
261  return value;
262 }
263 
264 
266 {
268  return value;
269 }
270 
271 
273 {
275  if( row >= GetRowsCount() ) return value;
276 
277  value.scope = m_Scope;
278 
279  int alrow = 0;
280  while( alrow < m_RowsUpTo.size() ){
281  if( row < m_RowsUpTo[alrow] ){
282  break;
283  }
284  alrow++;
285  }
286 
287  value.object = m_Aligns[alrow];
288 
289  return value;
290 }
291 
292 
295 {
297  if (iter != m_CachedSummary.end()) {
298  return *iter->second;
299  }
300 
301  CScope& scope = const_cast<CScope&>(*m_Scope);
302  CRef<CAlnSummary> sum( new CAlnSummary() );
303  m_CachedSummary[row] = sum;
304 
305  int alrow = 0;
306  while( alrow < m_RowsUpTo.size() ){
307  if( row < m_RowsUpTo[alrow] ){
308  break;
309  }
310  alrow++;
311  }
312 
313  CSeq_align::TDim seqrow = static_cast<CSeq_align::TDim>(row - (alrow == 0 ? 0 : m_RowsUpTo[alrow-1]));
314 
315  // update cached summary
316  const CSeq_align& align = *m_Aligns[alrow];
317 
318  // retrieve alignment label
319  //CLabel::GetLabel(align, &sum->m_Label, CLabel::eContent);
320 
321  // retrieve sequence accession
323  if (idh) {
324  CLabel::GetLabel(*idh.GetSeqId(), &sum->m_Query, CLabel::eContent, &scope);
325  } else {
326  CLabel::GetLabel(align.GetSeq_id(0), &sum->m_Query, CLabel::eContent, &scope);
327  }
328  idh = sequence::GetId(align.GetSeq_id(seqrow+1), scope, sequence::eGetId_Best);
329  if (idh) {
330  CLabel::GetLabel(*idh.GetSeqId(), &sum->m_Subject, CLabel::eContent, &scope);
331  } else {
332  CLabel::GetLabel(align.GetSeq_id(seqrow+1), &sum->m_Subject, CLabel::eContent, &scope);
333  }
334 
335  // get start and stop
336  sum->m_QStart = align.GetSeqStart(0) + 1;
337  sum->m_QStop = align.GetSeqStop(0) + 1;
338  sum->m_SStart = align.GetSeqStart(seqrow+1) + 1;
339  sum->m_SStop = align.GetSeqStop(seqrow+1) + 1;
340 
341  // get strands
342  sum->m_QStrand = align.GetSeqStrand(0);
343  sum->m_SStrand = align.GetSeqStrand(seqrow+1);
344 
345  CScoreBuilder builder;
346  TSeqPos align_length = builder.GetAlignLength(align);
347 
348  // get percent coverage
349  double coverage = 0.0;
350  int mismatches = 0;
351  double identity = 0.0;
352  size_t gaps = 0;
353  size_t gap_bases = 0;
354 
355  try {
356  if ( !align.GetNamedScore(CSeq_align::eScore_PercentCoverage, coverage) ) {
357  coverage = builder.GetPercentCoverage(scope, align);
358  }
359 
360  // get percent identity and number of mismatches
361  if (align_length < 1000000) {
362  int identities = 0;
363  if ( !align.GetNamedScore(CSeq_align::eScore_IdentityCount, identities) ||
364  !align.GetNamedScore(CSeq_align::eScore_MismatchCount, mismatches) ) {
365  builder.GetMismatchCount(scope, align, identities, mismatches);
366  }
367 
368  if ( !align.GetNamedScore(CSeq_align::eScore_PercentIdentity, identity) ) {
369  identity = identities * 100.0 / align_length;
370  }
371  }
372 
373  // get number of gaps
374  gaps = builder.GetGapCount(align);
375 
376  // get number of gapped bases
377  gap_bases = builder.GetGapBaseCount(align);
378 
379  } catch (CException&) {
380  // ignore
381  }
382 
383  sum->m_PctCoverage = coverage;
384  sum->m_PctIdentity = identity;
385  sum->m_Mismatches = mismatches;
386  sum->m_Gaps = gaps;
387  sum->m_GappedBases = gap_bases;
388 
389  // score
390  {
391  int score = 0;
393  sum->m_Score = score;
394  }
395 
396  // evalue
397  {
398  double e_value = 0.0;
400  sum->m_EValue = e_value;
401  }
402 
403  // deflines
404  {
405  sequence::CDeflineGenerator gen;
406  CBioseq_Handle bsh = scope.GetBioseqHandle(align.GetSeq_id(0));
407  if (bsh) {
408  sum->m_QDefline = gen.GenerateDefline(bsh);
409  } else {
410  sum->m_QDefline = sum->m_Query;
411  }
412  bsh = scope.GetBioseqHandle(align.GetSeq_id(1));
413  if (bsh) {
414  sum->m_SDefline = gen.GenerateDefline(bsh);
415  } else {
416  sum->m_SDefline = sum->m_Subject;
417  }
418 
419  }
420 
421  return *sum;
422 }
423 
424 
426 {
427  switch (strand) {
428  case eNa_strand_plus:
429  return "+";
430  case eNa_strand_minus:
431  return "-";
432  case eNa_strand_both:
433  return "Both";
434  case eNa_strand_both_rev:
435  return "Both reverse";
436  case eNa_strand_other:
437  return "Other";
438  case eNa_strand_unknown:
439  default:
440  return "Unknown";
441  }
442 }
443 
444 
446 
CBioseq_Handle –.
CObject –.
Definition: ncbiobj.hpp:180
CScope –.
Definition: scope.hpp:92
TSeqPos GetAlignLength(const CSeq_align &align, bool ungapped=false)
Compute the length of the alignment (= length of all segments, gaps + aligned)
int GetGapCount(const CSeq_align &align)
Compute the number of gaps in the alignment.
double GetPercentCoverage(CScope &scope, const CSeq_align &align, unsigned query=0)
Compute percent coverage of the query (sequence 0) (range 0-100)
int GetMismatchCount(CScope &scope, const CSeq_align &align)
Compute the number of mismatches in the alignment.
int GetGapBaseCount(const CSeq_align &align)
Compute the number of gap bases in the alignment (= length of all gap segments)
@ eScore_PercentCoverage
Definition: Seq_align.hpp:168
@ eScore_IdentityCount
Definition: Seq_align.hpp:145
@ eScore_PercentIdentity
Definition: Seq_align.hpp:189
@ eScore_MismatchCount
Definition: Seq_align.hpp:154
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
bool GetNamedScore(const string &id, int &score) const
Get score.
Definition: Seq_align.cpp:563
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
bool IsAlign(void) const
Definition: Seq_annot.cpp:182
virtual long GetIntValue(size_t row, size_t col) const
virtual ColumnType GetColumnType(size_t col) const
virtual string GetColumnLabel(size_t col) const
virtual double GetRealValue(size_t row, size_t col) const
virtual SConstScopedObject GetObjectValue(size_t row, size_t col) const
virtual size_t GetRowsCount() const
CRef< objects::CScope > m_Scope
CTableDataAlnSummary(TConstScopedObjects &objects)
vector< CConstRef< objects::CSeq_align > > m_Aligns
virtual SConstScopedObject GetRowObject(size_t row) const
const CAlnSummary & x_GetAlnSummary(size_t row) const
virtual void GetStringValue(size_t row, size_t col, string &value) const
virtual size_t GetColsCount() const
string x_StrandToStr(objects::ENa_strand strand) const
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
API (CDeflineGenerator) for computing sequences' titles ("definitions").
static int type
Definition: getdata.c:31
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
vector< SConstScopedObject > TConstScopedObjects
Definition: objects.hpp:65
@ eContent
Definition: label.hpp:62
CConstRef< CSeq_id > GetSeqId(void) const
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5181
#define kEmptyStr
Definition: ncbistr.hpp:123
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
const Tdata & Get(void) const
Get the member data.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_other
Definition: Na_strand_.hpp:70
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ eNa_strand_both_rev
in reverse orientation
Definition: Na_strand_.hpp:69
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
list< CRef< CSeq_align > > TAlign
Definition: Seq_annot_.hpp:194
const TAlign & GetAlign(void) const
Get the variant data.
Definition: Seq_annot_.hpp:641
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
#define row(bind, expected)
Definition: string_bind.c:73
Definition: type.c:6
USING_SCOPE(objects)
static const string s_ColNames[CTableDataAlnSummary::eMaxColNum]
static ITableData::ColumnType s_ColTypes[CTableDataAlnSummary::eMaxColNum]
#define _ASSERT
Modified on Fri Sep 20 14:57:31 2024 by modify_doxy.py rev. 669887