NCBI C++ ToolKit
dense_ds.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: dense_ds.cpp 46043 2021-01-21 17:33:26Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrey Yazhuk
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include <corelib/ncbistd.hpp>
35 
40 
41 #include <gui/objutils/label.hpp>
43 
48 
49 #include <algorithm>
50 
53 
54 
55 ///////////////////////////////////////////////////////////////////////////////
56 /// CHitSeqId
57 
58 bool CHitSeqId::Equals(const IHitSeqId& id) const
59 {
60  const CHitSeqId* hit_id = dynamic_cast<const CHitSeqId*>(&id);
61  if(hit_id) {
62  CConstRef<CSeq_id> s_id = hit_id->m_SeqId;
63  return m_SeqId->Equals(*s_id);
64  }
65  return false;
66 }
67 
68 
70 {
71  return new CHitSeqId(*m_SeqId);
72 }
73 
74 ///////////////////////////////////////////////////////////////////////////////
75 /// CHitSeqRowId
76 
77 CHitSeqRowId::CHitSeqRowId(int row, const objects::CSeq_id& id)
78 : m_Row(row),
79  m_SeqId(&id)
80 {
81 }
82 
83 
85 : m_Row(id.m_Row),
86  m_SeqId(id.m_SeqId)
87 {
88 }
89 
90 
91 bool CHitSeqRowId::Equals(const IHitSeqId& id) const
92 {
93  const CHitSeqRowId* r_id = dynamic_cast<const CHitSeqRowId*>(&id);
94  if(r_id) {
95  return m_Row == r_id->m_Row && m_SeqId->Equals(*r_id->m_SeqId);
96  }
97  return false;
98 }
99 
100 
102 {
103  return new CHitSeqRowId(m_Row, *m_SeqId);
104 }
105 
106 
108 {
109  return m_SeqId;
110 }
111 
112 
113 ///////////////////////////////////////////////////////////////////////////////
114 /// CHitMatrixDataSource
116 {
118 }
119 
120 
122 {
123  x_Clear();
124 }
125 
126 
128 {
129  CBioseq_Handle h_null;
130  m_QueryHandle = h_null;
131  m_SubjectHandle = h_null;
132 
135 
136  ITERATE(THitAdapterCont, it_a, m_Hits) {
137  delete(*it_a);
138  }
139  m_Hits.clear();
140 
141  m_SubjectID.reset();
142  m_QueryID.reset();
143 }
144 
145 
147 {
148  x_ClearHits();
149 
150  m_ScoreMap.clear();
151 
152  ITERATE(TIdVector, it_s, m_SeqIds) {
153  delete(*it_s);
154  }
155 
157  m_SeqIds.clear();
158  m_CanCreateRowBased = false;
159 
160  m_AlnIdMap.reset();
161  m_AlnStats.Reset();
162 
163  m_Scope.Reset();
164  m_Aligns.clear();
165 }
166 
167 
168 template<typename TIDRefCont>
169  typename TIDRefCont::const_iterator
170  find_id(const TIDRefCont& cont, const IHitSeqId& id)
171 {
172  ITERATE (typename TIDRefCont, it, cont) {
173  if((*it)->Equals(id))
174  return it;
175  }
176  return cont.end();
177 }
178 
179 
181 {
182  x_Clear();
183 
184  x_TestAndSelectAligns(aligns);
185 
186  m_Scope.Reset(&scope);
187 
189 
191  if(! m_CanCreateRowBased) {
192  m_Params.m_RowBased = false;
193  }
194 
196 
197  /**
198  int delta = aligns.size() - m_Aligns.size();
199  if(delta > 0) {
200  // TODO remove error reporting, Data Source should be clean of GUI
201  string msg = "Some of the Seq-aligns are invalid or incompatible with Dot Matrix View.\n\
202  See diagnostic output for details.";
203  NcbiErrorBox(msg, "Dot Matrix View - Invalid Data");
204  }
205  **/
206 }
207 
208 
209 bool FIdEqual(const CConstRef<CSeq_id>& id1, const CConstRef<CSeq_id>& id2)
210 {
211  return id1->Equals(*id2);
212 }
213 
214 
216 {
217  typedef CSeq_align::C_Segs TSegs;
218  bool check_std = false;
219 
220  TSegs::E_Choice seg_type = align.GetSegs().Which();
221  switch(seg_type) {
222  case TSegs::e_Std:
223  check_std = true;
224 
225  case TSegs::e_Denseg:
226  case TSegs::e_Dendiag:
227  case TSegs::e_Sparse:
228  case TSegs::e_Spliced:
229  try {
230  bool good = true;
231  if(check_std) {
232  good = x_GoodSeg(align.GetSegs().GetStd());
233  }
234  if(good) {
235  m_AlnIdMap->push_back(align); // this tests the Seq-align
236  m_Aligns.push_back(CConstRef<CSeq_align>(&align));
237  }
238  } catch (CAlnException e) {
239  ERR_POST("CHitMatrixDataSource skipping Seq-align - " << e.what());
240  }
241  break;
242 
243  case TSegs::e_Disc:
245  x_TestAndSelectAlign(**it);
246  }
247  break;
248 
249 
250  default:
251  ERR_POST("CHitMatrixDataSource::x_TestAndSelectAligns() - unsupported type of Seq-align");
252  break;
253  }
254 }
255 
257 {
258 
259  m_AlnIdMap.reset(new TAlnIdMap(TIdExtract(), aligns.size()));
260 
261  ITERATE(TAlignVector, it, aligns) {
262  const CSeq_align& align = **it;
263  x_TestAndSelectAlign(align);
264  }
265 
266  m_AlnStats.Reset(new TAlnStats(*m_AlnIdMap));
267 }
268 
269 
271 {
272  ITERATE(TStd, it, std_list) {
273  const CStd_seg& stdseg = **it;
274 
275  CStd_seg::TLoc locs = stdseg.GetLoc();
276  ITERATE(CStd_seg::TLoc, it_loc, locs) {
277  const CSeq_loc& loc = **it_loc;
278  ENa_strand strand = loc.GetStrand();
279  if(strand != eNa_strand_plus && strand != eNa_strand_minus
280  && strand != eNa_strand_unknown) {
281  LOG_POST(Info << "Seq-locs in a Std-seg have invalid strand information");
282  return false;
283  }
284  }
285  }
286  return true;
287 }
288 
289 
291 {
292  // TODO this can be optimized with the new API
293  const TAlnIdMap::TIdVec* ids = NULL;
294  for( size_t i = 0; i < m_AlnIdMap->size(); i++ ) {
295  const TAlnIdMap::TIdVec& ids_2 = (*m_AlnIdMap)[i];
296  if(ids == NULL) {
297  ids = &ids_2;
298  } else {
299  if(*ids != ids_2) {
300  return false;
301  }
302  }
303  }
304  return true;
305 }
306 
307 
308 void CHitMatrixDataSource::SetParams(const SParams& params, bool create_hits)
309 {
310  bool based_ch = (m_Params.m_RowBased != params.m_RowBased);
311  bool orient_ch = (m_Params.m_Direction != params.m_Direction);
312 
313  if(based_ch || orient_ch) {
314  /// save IDs for later use
315  unique_ptr<IHitSeqId> s_id(m_SubjectID.release());
316  unique_ptr<IHitSeqId> q_id(m_QueryID.release());
317 
318  // delete Hits and clear IDs
319  x_ClearHits();
320 
321  if(based_ch) {
323 
324  s_id.reset();
325  q_id.reset();
326 
327  m_Params.m_RowBased = params.m_RowBased;
328  m_SeqIds.clear();
329 
331  }
332 
334 
335  // recreate Hits if needed (and if possible)
336  if(create_hits && s_id.get() && q_id.get()) {
337  SelectIds(*s_id, *q_id);
338  }
339  }
340 }
341 
342 
344 {
345  return m_CanCreateRowBased;
346 }
347 
348 
350 {
351  params = m_Params;
352 }
353 
354 
355 /// builds a map of all scores found in the alignments
357 {
358  for(size_t i = 0; i < m_Aligns.size(); i++ ) {
359  const CSeq_align& align = *m_Aligns[i];
360 
361  if(align.CanGetSegs()) {
362  const CSeq_align::TScore& scores = align.GetScore();
363  ITERATE(CSeq_align::TScore , itSc, scores) {
364  const CScore& score = **itSc;
365  if(score.CanGetId()) {
366  const CObject_id& id = score.GetId();
367 
368  // get score value
369  double value = -1;
370  const CScore::C_Value& val = score.GetValue();
371  switch(val.Which()) {
372  case CScore::C_Value::e_Real: value = val.GetReal(); break;
373  case CScore::C_Value::e_Int: value = val.GetInt(); break;
374  default: _ASSERT(false);
375  }
376 
377  TScoreMap::iterator it_score = m_ScoreMap.find(id.GetStr());
378  if(it_score == m_ScoreMap.end()) {
379  // new Score found - insert it
380  m_ScoreMap[id.GetStr()] = make_pair(value, value);
381  } else {
382  // update value range
383  TValueRange& range = it_score->second;
384  range.first = min(range.first, value);
385  range.second = max(range.second, value);
386  }
387  }
388  }
389  }
390  }
391 }
392 
393 
395 {
396 public:
398  : m_Id(&id) {}
399 
401  : m_Id(id) {}
402 
404  : m_Id(id) {}
405 
406  bool operator()(const CSeq_id& id) const
407  {
408  return m_Id->Equals(id);
409  }
411  {
412  return m_Id->Equals(*id);
413  }
414  bool operator()(IHitSeqId* id) const
415  {
416  return id->GetSeqId()->Equals(*m_Id);
417  }
418 protected:
420 };
421 
422 
424  const IHitSeqId& id,
425  TSeqIdRefVector& ids) const
426 {
427  _ASSERT(align.CanGetSegs());
428  typedef CSeq_align::C_Segs TSegs;
429 
430  switch(align.GetSegs().Which()) {
431  case TSegs::e_Denseg:
432  case TSegs::e_Dendiag:
433  case TSegs::e_Sparse:
434  case TSegs::e_Spliced:
435  case TSegs::e_Std: {
436  const CSeq_id& seq_id = *id.GetSeqId();
437  const TAlnIdMap::TIdVec& id_vec = (*m_AlnIdMap)[align];
438  int self_count = 0;
439 
440  ITERATE(TAlnIdMap::TIdVec, it, id_vec) {
441  const IAlnSeqId& aln_id = **it;
442  const CSeq_id& id = aln_id.GetSeqId();
443  CSeqIdEquals Eq(id);
444  bool add = true;
445 
446  if(Eq(seq_id)) {
447  self_count++;
448  add = (self_count == 2);
449  }
450  if(add) {
451  if(ids.end() == std::find_if(ids.begin(), ids.end(), Eq)) {
452  ids.push_back(CConstRef<CSeq_id>(&id));
453  }
454  }
455  }
456 
457  if(self_count == 0) {
458  ids.clear();
459  }
460  break;
461  }
462  default:
463  break;
464  }
465 }
466 
467 
469  TSeqIdRefVector& ids, bool unique) const
470 {
471  _ASSERT(align.CanGetSegs());
472  typedef CSeq_align::C_Segs TSegs;
473 
474  switch(align.GetSegs().Which()) {
475  case TSegs::e_Denseg:
476  case TSegs::e_Dendiag:
477  case TSegs::e_Sparse:
478  case TSegs::e_Spliced:
479  case TSegs::e_Std: {
480  const TAlnIdMap::TIdVec& id_vec = (*m_AlnIdMap)[align];
481  ITERATE(TAlnIdMap::TIdVec, it, id_vec) {
482  const IAlnSeqId& aln_id = **it;
483  const CSeq_id& id = aln_id.GetSeqId();
484 
485  if(unique) {
486  CSeqIdEquals Eq(id);
487  if(ids.end() == std::find_if(ids.begin(), ids.end(), Eq)) {
488  ids.push_back(CConstRef<CSeq_id>(&id));
489  }
490  } else {
491  ids.push_back(CConstRef<CSeq_id>(&id));
492  }
493  }
494  break;
495  }
496  default:
497  break;
498  }
499 }
500 
501 
502 // iterates on Seq-aligns and builds a unique set of Seq-ids
504  const SParams& params) const
505 {
506  if(params.m_RowBased) {
507  /// create CHitSeqRowId based on the first Seq-align (all others must be
508  /// identical)
509  if(m_Aligns.size()) {
510  const CSeq_align& align = *m_Aligns[0];
511  // alignment must be of type Dense-seg
512 
513  TSeqIdRefVector aln_ids;
514  x_GetSeqAlignIds(align, aln_ids, false);
515 
516  for( size_t row = 0; row < aln_ids.size(); row++ ) {
517  const CSeq_id& seq_id = *aln_ids[row];
518  seq_ids.push_back(new CHitSeqRowId((int)row, seq_id));
519  }
520  }
521  } else {
522  // ID-based alignment
523  for(size_t i = 0; i < m_Aligns.size(); i++ ) {
524  const CSeq_align& align = *m_Aligns[i];
525 
526  TSeqIdRefVector aln_ids;
527  x_GetSeqAlignIds(align, aln_ids, true);
528 
529  ITERATE(TSeqIdRefVector, itID, aln_ids) {
530  CConstRef<CSeq_id> seq_id = *itID;
531  // find this ID in the vector
532  CSeqIdEquals Eq(seq_id);
533  if(seq_ids.end() == std::find_if(seq_ids.begin(), seq_ids.end(), Eq)) {
534  seq_ids.push_back(new CHitSeqId(*seq_id));
535  }
536  }
537  }
538  }
539 
540  //TODO prefetching bioseq handles
541  CScope& scope = const_cast<CScope&>(m_Scope.GetObject());
542  ITERATE( TIdVector, it, m_SeqIds ){
543  IHitSeqId& id = **it;
544  m_HIdToHandleMap.insert(scope.GetBioseqHandle(*id.GetSeqId()));
545  }
546 }
547 
548 
550 {
551  return m_SeqIds;
552 }
553 
554 
556  const SParams& params) const
557 {
558  if(params == m_Params) {
559  // params are the same as those currently set in the Data Source
560  // make a copy of objects in m_SeqIds
561  ids = m_SeqIds;
562  for( size_t i = 0; i < ids.size(); i++ ) {
563  ids[i] = ids[i]->Clone();
564  }
565  } else {
566  x_CreateIds(ids, params);
567  }
568 }
569 
570 
572 {
574 }
575 
576 
577 // returns ids for all sequences aligned with the given sequence
579  const TIdVector& ids,
580  TIdVector& aligned,
581  const SParams& params)
582 {
583  //CStopWatch sw; sw.Start();
584  aligned.clear();
585 
586  if(params.m_RowBased) {
587  ITERATE(TIdVector, it, ids) {
588  if(! (*it)->Equals(id)) {
589  aligned.push_back(*it);
590  }
591  }
592  } else {
593  // Seq-id based identification
594  TIdVector::const_iterator it_id = find_id(ids, id);
595 
596  if(it_id != ids.end()) {
597  // TODO get a list of Seq-aligns from m_AlnStats
598 
599 
600  CIRef<IAlnSeqId> aln_id(new CAlnSeqId(*id.GetSeqId()));
601  const TAlnStats::TIdVec& aligned_ids = m_AlnStats->GetAlignedIds(aln_id);
602  ITERATE(TAlnStats::TIdVec, it_a, aligned_ids) {
603  const IAlnSeqId& aa = **it_a;
604  aligned.push_back(new CHitSeqId(aa.GetSeqId()));
605  }
606 
607  /* old code
608  // there are alignments with this Id - iterate by all seq_aligns
609  ITERATE(TAlignVector, it, m_Aligns) {
610  const CSeq_align& align = **it;
611 
612  // get the list of Seq-ids in this Seq-align
613  TSeqIdRefVector aln_ids;
614  x_GetAlignedSeqIds(align, id, aln_ids);
615 
616  for( size_t index = 0; index < aln_ids.size(); index++ ) {
617  CHitSeqId s_id(*aln_ids[index]);
618 
619  TIdVector::const_iterator it = find_id(aligned, s_id);
620  if(it == aligned.end()) { // not in our vector yet
621  aligned.push_back(s_id.Clone());
622  }
623  }
624  }
625  */
626  } else {
627  _ASSERT(false);
628  }
629  }
630  //double t = sw.Elapsed();
631  //LOG_POST(Info << "CHitMatrixDataSource::GetAlignedIdsForParams() - " << (1000.0 * t) << " ms");
632 }
633 
634 
636 {
637  if(! m_SeqIds.empty()) {
638  size_t index_2 = min<size_t>(1, m_SeqIds.size() - 1);
639  return SelectIds(*m_SeqIds[0], *m_SeqIds[index_2]);
640  }
641  return false;
642 }
643 
644 
645 /// Creates Hits for all pairwise alignments between selected by Query and Subject IDs
647 {
648  //LOG_POST("CHitMatrixDataSource::SelectIds subject " << GetLabel(s_id)
649  // << ", query " << GetLabel(q_id));
650  x_ClearHits();
651 
652  // check that both ids exist in data
653  TIdVector::const_iterator itQuery = find_id(m_SeqIds, q_id);
654  TIdVector::const_iterator itSubject = find_id(m_SeqIds, s_id);
655 
656  bool b_ok = itQuery != m_SeqIds.end() && itSubject != m_SeqIds.end();
657 
658  if(b_ok) {
659  // iterate though Seq_aligns and create Hit Adapters
660  m_SubjectID.reset(s_id.Clone());
661  m_QueryID.reset(q_id.Clone());
662 
663  m_QueryHandle = m_Scope->GetBioseqHandle(*m_QueryID->GetSeqId());
664  m_SubjectHandle = m_Scope->GetBioseqHandle(*m_SubjectID->GetSeqId());
665 
666  // iterate by all seq_aligns
668  const CSeq_align& align = **it;
669  x_CreateHits(align);
670  }
671 
673  }
674  return b_ok;
675 }
676 
677 
679 {
680  _ASSERT(align.IsSetSegs());
681 
682  if(m_Params.m_RowBased) {
683  /// Create a hit for exactly one pair of rows
684  const CHitSeqRowId* q_id =
685  dynamic_cast<const CHitSeqRowId*>(m_QueryID.get());
686  const CHitSeqRowId* s_id =
687  dynamic_cast<const CHitSeqRowId*>(m_SubjectID.get());
688  _ASSERT(q_id && s_id);
689 
690  int q_index = q_id->GetRow();
691  int s_index = s_id->GetRow();
692 
693  x_CreateHit(align, q_index, s_index);
694  } else {
695  /// create hits for all possible unique combinations or rows
696  /// corresponding to the given Seq-ids
697  TSeqIdRefVector aln_ids;
698  x_GetSeqAlignIds(align, aln_ids, false);
699 
700  // find all rows in the Seq-align where subject ID occurs
701  vector<size_t> s_rows;
702  size_t n_rows = aln_ids.size();
703  for( size_t s_index = 0; s_index < n_rows; s_index++ ) {
704  const CSeq_id& id = *aln_ids[s_index];
705  if(m_SubjectID->GetSeqId()->Equals(id)) {
706  s_rows.push_back(s_index);
707  }
708  }
709 
710  // for every row corresponding to the Query Seq-id
711  for( size_t q_index = 0; q_index < n_rows; q_index++ ) {
712  const CSeq_id& id = *aln_ids[q_index];
713 
714  if(m_QueryID->GetSeqId()->Equals(id)) {
715  // create Hits for all rows corresponding to the Subject Seq-id
716  for( size_t i = 0; i < s_rows.size(); i++ ) {
717  size_t s_index = s_rows[i];
718  x_CreateHit(align, q_index, s_index);
719  }
720  }
721  }
722  }
723 }
724 
725 
726 // creates a Hits from the two specified rows of the alignment
728  size_t q_index, size_t s_index)
729 {
730  if(align.IsSetSegs()) {
731  typedef CSeq_align::C_Segs TSegs;
732 
733  switch(align.GetSegs().Which()) {
734  case TSegs::e_Std:
735  x_CreateHit_Std(align, q_index, s_index);
736  break;
737 
738  case TSegs::e_Denseg:
739  case TSegs::e_Dendiag:
740  case TSegs::e_Sparse:
741  case TSegs::e_Spliced:
742  x_CreateHits_Diagonal(align, q_index, s_index);
743  break;
744 
745  default:
746  LOG_POST(Info << "CHitMatrixDataSource() - this type of Seq-align"
747  << " is not supported");
748  break;
749  }
750  }
751 }
752 
753 
754 /// creates a single hit from 2 rows of a Seq-align
756  size_t q_index, size_t s_index)
757 {
758  _ASSERT(align.GetSegs().IsDenseg());
759 
760  const CDense_seg& denseg = align.GetSegs().GetDenseg();
761  bool aln = CDenseSegHit::HasAlignment(denseg, q_index, s_index, m_Params.m_Direction);
762 
763  if(aln) {
764  m_Hits.push_back(new CDenseSegHit(align, (int)q_index, (int)s_index));
765  }
766 }
767 
768 
770 {
772 
773  ITERATE(list< CRef< CStd_seg > >, it, std_list) {
774  const CStd_seg& stdseg = **it;
775  TSeqRange r(stdseg.GetSeqStart((int)row), stdseg.GetSeqStop((int)row));
776 
777  if(range.Empty()) {
778  range = r;
779  } else {
780  range.CombineWith(r);
781  }
782  }
783  return range;
784 }
785 
786 
788  size_t q_index, size_t s_index)
789 {
790  _ASSERT(align.GetSegs().IsStd());
791 
792  typedef list< CRef< CStd_seg > > TStd;
793  const TStd& std_list = align.GetSegs().GetStd();
794 
795  bool aln = CStdSegHit::HasAlignment(std_list, q_index, s_index, m_Params.m_Direction);
796 
797  if(aln) {
798  m_Hits.push_back(new CStdSegHit(align, (int)q_index, (int)s_index));
799  }
800 }
801 
802 
804  size_t q_index, size_t s_index)
805 {
806  TAlnIdMap::TIdVec ids = (*m_AlnIdMap)[align];
807  TAlnSeqIdIRef& q_id = ids[q_index];
808  TAlnSeqIdIRef& s_id = ids[s_index];
809 
810  CRef<CPairwiseAln> pairwise(new CPairwiseAln(q_id, s_id));
811  ConvertSeqAlignToPairwiseAln(*pairwise, align, (int)q_index, (int)s_index, m_Params.m_Direction);
812  if( ! pairwise->empty()) {
813  m_Hits.push_back(new CDiagHit(align, *pairwise));
814  }
815 }
816 
817 
818 // Determines the smallest rectangle in {query, subject} space that contains
819 // all the hits
821 {
822  for( size_t i = 0; i < m_Hits.size(); i++ ) {
823  IHit& hit = *m_Hits[i];
824 
825  size_t n_elem = hit.GetElemsCount();
826  for( size_t j = 0; j < n_elem; j++ ) {
827  const IHitElement& elem = hit.GetElem(j);
828 
829  TSeqPos q_from = elem.GetQueryStart();
830  TSeqPos q_to = q_from + (TSeqPos) elem.GetQueryLength() - 1;
831 
832  TSeqPos s_from = elem.GetSubjectStart();
833  TSeqPos s_to = s_from + (TSeqPos) elem.GetSubjectLength() - 1;
834 
835  if(m_QueryHitsRange.Empty()) {
836  m_QueryHitsRange.Set(q_from, q_to);
837  } else {
838  m_QueryHitsRange += TSeqRange(q_from, q_to);
839  }
840  if(m_SubjectHitsRange.Empty()) {
841  m_SubjectHitsRange.Set(s_from, s_to);
842  } else {
843  m_SubjectHitsRange += TSeqRange(s_from, s_to);
844  }
845  }
846  }
847 }
848 
849 
851 {
852  return m_Scope.GetObject();
853 }
854 
855 
857 {
858  return *m_QueryID;
859 }
860 
861 
863 {
864  return *m_SubjectID;
865 }
866 
867 
869 {
870  return m_QueryHandle;
871 }
872 
873 
875 {
876  return m_SubjectHandle;
877 }
878 
879 
881 {
882  _ASSERT(m_Scope);
883 
884  CConstRef<CSeq_id> seq_id = id.GetSeqId();
885  if(seq_id) {
886  return m_Scope->GetBioseqHandle(*seq_id);
887  }
888  return CBioseq_Handle(); // empty
889 }
890 
891 
893 {
894  _ASSERT(m_Scope);
895 
896  string s;
897  const CHitSeqRowId* hit_id = dynamic_cast<const CHitSeqRowId*>(&id);
898  if(hit_id) {
899  s = "Row " + NStr::IntToString(hit_id->GetRow()) + ", ";
900  }
902  return s;
903 }
904 
905 
907 {
908  return m_Hits;
909 }
910 
911 
912 void CHitMatrixDataSource::GetScoreNames(vector<string>& names) const
913 {
915  names.push_back(it->first);
916  }
917 }
918 
919 
921  CHitMatrixDataSource::GetScoreRange(const string& name) const
922 {
924  _ASSERT(it != m_ScoreMap.end());
925  return it->second;
926 }
927 
928 
930 {
931  return m_QueryHitsRange;
932 }
933 
934 
936 {
937  return m_SubjectHitsRange;
938 }
939 
940 
void ConvertSeqAlignToPairwiseAln(CPairwiseAln &pairwise_aln, const objects::CSeq_align &sa, objects::CSeq_align::TDim row_1, objects::CSeq_align::TDim row_2, CAlnUserOptions::EDirection direction=CAlnUserOptions::eBothDirections, const TAlnSeqIdVec *ids=0)
Build pairwise alignment from the selected rows of a seq-align.
TAlnSeqIdExtract::TIdVec TIdVec
Container (vector) of seq-ids.
Definition: aln_tests.hpp:62
Default IAlnSeqId implementation based on CSeq_id_Handle.
Definition: aln_seqid.hpp:116
_TAlnIdVec::TIdVec TIdVec
Vector of ids used in all alignments.
Definition: aln_stats.hpp:70
@ eBothDirections
No filtering: use both direct and reverse sequences.
CBioseq_Handle –.
Wraps a CSeq_align containing CDense_seg and provides a simple API for interpreting it as a pairwise ...
Definition: dense_hit.hpp:93
static bool HasAlignment(const objects::CDense_seg &denseg, size_t q_index, size_t s_index, TDirection dir)
CDenseSegHit.
Definition: dense_hit.cpp:46
objects::CBioseq_Handle m_QueryHandle
Definition: dense_ds.hpp:219
void x_CreateHit_Denseg(const objects::CSeq_align &align, size_t q_index, size_t s_index)
creates a single hit from 2 rows of a Seq-align
Definition: dense_ds.cpp:755
void x_GetSeqAlignIds(const objects::CSeq_align &align, TSeqIdRefVector &ids, bool unique) const
Definition: dense_ds.cpp:468
bool x_GoodSeg(const TStd &std_list)
Definition: dense_ds.cpp:270
TSeqRange GetStdSegRange(const TStd &std_list, size_t row)
Definition: dense_ds.cpp:769
TAlignVector m_Aligns
Definition: dense_ds.hpp:201
vector< CConstRef< objects::CSeq_id > > TSeqIdRefVector
Definition: dense_ds.hpp:153
virtual bool SelectDefaultIds()
selects default query and subject
Definition: dense_ds.cpp:635
TSeqRange m_SubjectHitsRange
Definition: dense_ds.hpp:223
objects::CBioseq_Handle m_SubjectHandle
Definition: dense_ds.hpp:218
virtual void GetAlignedIdsForParams(const IHitSeqId &id, const TIdVector &ids, TIdVector &aligned, const SParams &params)
returns a list of ids that will be aligned with the specified id if the given params are applied.
Definition: dense_ds.cpp:578
void x_CreateHits_Diagonal(const objects::CSeq_align &align, size_t q_index, size_t s_index)
Definition: dense_ds.cpp:803
virtual objects::CBioseq_Handle GetSubjectHandle()
Definition: dense_ds.cpp:874
virtual void SetParams(const SParams &params, bool create_hits)
Definition: dense_ds.cpp:308
virtual void GetParams(SParams &params) const
Definition: dense_ds.cpp:349
THitAdapterCont m_Hits
Definition: dense_ds.hpp:221
TValueRange GetScoreRange(const string &name) const
Definition: dense_ds.cpp:921
const THitAdapterCont & GetHits() const
Definition: dense_ds.cpp:906
virtual objects::CScope & GetScope()
Definition: dense_ds.cpp:850
bool x_CanCreateRowBased()
Definition: dense_ds.cpp:290
THIdToHandleMap m_HIdToHandleMap
list of Ids for all sequences
Definition: dense_ds.hpp:212
unique_ptr< IHitSeqId > m_QueryID
Definition: dense_ds.hpp:216
void x_CalculateHitsRange()
Definition: dense_ds.cpp:820
void Init(objects::CScope &scope, TAlignVector &aligns)
Definition: dense_ds.cpp:180
void x_CreateHit_Std(const objects::CSeq_align &align, size_t q_index, size_t s_index)
Definition: dense_ds.cpp:787
TScoreMap m_ScoreMap
Definition: dense_ds.hpp:213
virtual string GetLabel(const IHitSeqId &id)
Definition: dense_ds.cpp:892
CAlnIdMap< TAlnPtrVector, TIdExtract > TAlnIdMap
Definition: dense_ds.hpp:158
TSeqRange GetQueryHitsRange()
Definition: dense_ds.cpp:929
void x_CreateIds(TIdVector &seq_ids, const SParams &params) const
Definition: dense_ds.cpp:503
void x_GetAlignedSeqIds(const CSeq_align &align, const IHitSeqId &id, TSeqIdRefVector &ids) const
Definition: dense_ds.cpp:423
virtual IHitSeqId & GetSubjectId() const
Definition: dense_ds.cpp:862
void x_CreateHits(const objects::CSeq_align &align)
Definition: dense_ds.cpp:678
virtual IHitSeqId & GetQueryId() const
Definition: dense_ds.cpp:856
objects::CSeq_align::TSegs::TStd TStd
Definition: dense_ds.hpp:154
virtual void GetAlignedIds(const IHitSeqId &id, TIdVector &aligned)
Definition: dense_ds.cpp:571
unique_ptr< IHitSeqId > m_SubjectID
Definition: dense_ds.hpp:215
CAlnStats< TAlnIdMap > TAlnStats
Definition: dense_ds.hpp:198
virtual objects::CBioseq_Handle GetBioseqHandle(const IHitSeqId &id)
Definition: dense_ds.cpp:880
void x_TestAndSelectAligns(TAlignVector &aligns)
Definition: dense_ds.cpp:256
TSeqRange GetSubjectHitsRange()
Definition: dense_ds.cpp:935
unique_ptr< TAlnIdMap > m_AlnIdMap
Definition: dense_ds.hpp:204
CAlnSeqIdsExtract< CAlnSeqId > TIdExtract
Definition: dense_ds.hpp:157
virtual objects::CBioseq_Handle GetQueryHandle()
Definition: dense_ds.cpp:868
virtual void GetHitSeqIdsForParams(TIdVector &ids, const SParams &params) const
returns a list of ids that Data Source will create if the given params are applied.
Definition: dense_ds.cpp:555
void x_TestAndSelectAlign(const objects::CSeq_align &align)
Definition: dense_ds.cpp:215
virtual bool SelectIds(const IHitSeqId &q_id, const IHitSeqId &s_id)
Creates Hits for all pairwise alignments between selected by Query and Subject IDs.
Definition: dense_ds.cpp:646
void x_UpdateScoreMap()
builds a map of all scores found in the alignments
Definition: dense_ds.cpp:356
virtual ~CHitMatrixDataSource()
Definition: dense_ds.cpp:121
CRef< objects::CScope > m_Scope
Definition: dense_ds.hpp:202
CHitMatrixDataSource()
CHitMatrixDataSource.
Definition: dense_ds.cpp:115
TSeqRange m_QueryHitsRange
Definition: dense_ds.hpp:224
void x_CreateHit(const objects::CSeq_align &align, size_t q_index, size_t s_index)
Definition: dense_ds.cpp:727
virtual bool CanCreateRowBased() const
Definition: dense_ds.cpp:343
CRef< TAlnStats > m_AlnStats
Definition: dense_ds.hpp:205
virtual void GetScoreNames(vector< string > &names) const
Definition: dense_ds.cpp:912
virtual const TIdVector & GetHitSeqIds()
returns list of all seq ids in the Data Source
Definition: dense_ds.cpp:549
CHitSeqId.
Definition: dense_ds.hpp:66
CConstRef< objects::CSeq_id > m_SeqId
Definition: dense_ds.hpp:77
virtual bool Equals(const IHitSeqId &id) const
CHitSeqId.
Definition: dense_ds.cpp:58
CHitSeqId(const objects::CSeq_id &id)
Definition: dense_ds.hpp:68
virtual IHitSeqId * Clone() const
Definition: dense_ds.cpp:69
CHitSeqRowId.
Definition: dense_ds.hpp:84
virtual bool Equals(const IHitSeqId &id) const
Definition: dense_ds.cpp:91
virtual CConstRef< objects::CSeq_id > GetSeqId() const
returns CSeq_id associated with IHitSeqId
Definition: dense_ds.cpp:107
CHitSeqRowId(int row, const objects::CSeq_id &id)
CHitSeqRowId.
Definition: dense_ds.cpp:77
virtual IHitSeqId * Clone() const
Definition: dense_ds.cpp:101
CConstRef< objects::CSeq_id > m_SeqId
Definition: dense_ds.hpp:97
int GetRow() const
Definition: dense_ds.hpp:94
A pairwise aln is a collection of ranges for a pair of rows.
CScope –.
Definition: scope.hpp:92
C_Value –.
Definition: Score_.hpp:91
Definition: Score.hpp:57
bool operator()(IHitSeqId *id) const
Definition: dense_ds.cpp:414
CSeqIdEquals(const CRef< CSeq_id > &id)
Definition: dense_ds.cpp:400
CConstRef< CSeq_id > m_Id
Definition: dense_ds.cpp:419
CSeqIdEquals(const CConstRef< CSeq_id > &id)
Definition: dense_ds.cpp:403
CSeqIdEquals(const CSeq_id &id)
Definition: dense_ds.cpp:397
bool operator()(const CSeq_id &id) const
Definition: dense_ds.cpp:406
bool operator()(CConstRef< CSeq_id > &id) const
Definition: dense_ds.cpp:410
Wraps a CSeq_align containing CStd_seg and provides a simple API for interpreting it as a pairwise al...
Definition: std_seg_hit.hpp:96
static bool HasAlignment(const TStd &std_list, size_t q_index, size_t s_index, TDirection dir)
Definition: std_seg_hit.cpp:56
TSignedSeqPos GetSeqStop(TDim row) const
Definition: Std_seg.cpp:99
TSignedSeqPos GetSeqStart(TDim row) const
Definition: Std_seg.cpp:76
Wrapper interface for seq-ids used in alignments.
Definition: aln_seqid.hpp:54
virtual const CSeq_id & GetSeqId(void) const =0
Get CSeq_id.
IHitElement.
Definition: hit.hpp:50
virtual TSeqPos GetQueryLength() const =0
virtual TSignedSeqPos GetQueryStart() const =0
virtual TSeqPos GetSubjectLength() const =0
virtual TSignedSeqPos GetSubjectStart() const =0
vector< CConstRef< objects::CSeq_align > > TAlignVector
vector< IHitSeqId * > TIdVector
vector< IHit * > THitAdapterCont
pair< double, double > TValueRange
IHitSeqId - abstract identifier of a sequence in an alignment.
virtual IHitSeqId * Clone() const =0
IHit.
Definition: hit.hpp:69
virtual TDim GetElemsCount() const =0
virtual const IHitElement & GetElem(TDim elem_index) const =0
const_iterator end() const
Definition: map.hpp:152
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
void clear()
Definition: set.hpp:153
Include a standard set of the NCBI C++ Toolkit most basic headers.
bool FIdEqual(const CConstRef< CSeq_id > &id1, const CConstRef< CSeq_id > &id2)
Definition: dense_ds.cpp:209
USING_SCOPE(ncbi::objects)
TIDRefCont::const_iterator find_id(const TIDRefCont &cont, const IHitSeqId &id)
Definition: dense_ds.cpp:170
static const struct name_t names[]
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
@ eDefault
Definition: label.hpp:73
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType & GetObject(void)
Get object.
Definition: ncbiobj.hpp:1011
static TThisType GetEmpty(void)
Definition: range.hpp:306
TThisType & Set(position_type from, position_type to)
Definition: range.hpp:188
bool Empty(void) const
Definition: range.hpp:148
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5086
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
vector< CRef< CSeq_loc > > TLoc
Definition: Std_seg_.hpp:93
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
vector< CRef< CScore > > TScore
Definition: Seq_align_.hpp:398
const TLoc & GetLoc(void) const
Get the Loc member data.
Definition: Std_seg_.hpp:357
bool IsSetSegs(void) const
Check if a value has been assigned to Segs data member.
Definition: Seq_align_.hpp:909
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
Definition: Seq_align_.hpp:915
const TStd & GetStd(void) const
Get the variant data.
Definition: Seq_align_.hpp:752
const TValue & GetValue(void) const
Get the Value member data.
Definition: Score_.hpp:465
bool CanGetId(void) const
Check if it is safe to call GetId method.
Definition: Score_.hpp:438
bool IsStd(void) const
Check if variant Std is selected.
Definition: Seq_align_.hpp:746
list< CRef< CSeq_align > > Tdata
const TScore & GetScore(void) const
Get the Score member data.
Definition: Seq_align_.hpp:896
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const TId & GetId(void) const
Get the Id member data.
Definition: Score_.hpp:444
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
int i
range(_Ty, _Ty) -> range< _Ty >
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
T max(T x_, T y_)
T min(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
#define row(bind, expected)
Definition: string_bind.c:73
#define _ASSERT
Modified on Tue Jul 09 14:25:40 2024 by modify_doxy.py rev. 669887