NCBI C++ ToolKit
alignment_job.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: alignment_job.cpp 47784 2024-08-30 18:24:47Z asztalos $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Liangshou Wu
27  *
28  * File Description:
29  *
30  */
31 #include <ncbi_pch.hpp>
32 
33 
49 
50 #include <gui/objutils/utils.hpp>
52 
57 #include <objmgr/align_ci.hpp>
58 #include <objmgr/util/sequence.hpp>
59 #include <objmgr/util/feature.hpp>
61 #include <objmgr/graph_ci.hpp>
62 #include <objmgr/impl/synonyms.hpp>
63 
66 
74 
75 #include <corelib/ncbiutil.hpp>
77 
78 #include <math.h>
79 
81 #include <future>
82 #include <corelib/rwstream.hpp>
83 #include <util/checksum.hpp>
85 #include <corelib/ncbiapp.hpp>
86 
89 
90 /// always show individual alignments when zoom level is below this threshold.
91 /// Unit: bases per screen pixel (BPP)
92 static const double kSequenceZoomLevel = 1.0/8.;
93 
94 /// Scale at which align details are made visible
95 /// Align details: unaligned tails, mate pairs
96 static const double kDetailsZoomLevel = 2;
97 
98 /// maximal number of alignments allowed to load to avoid potential memery
99 /// and performance problem.
100 /// Unit: number of alignments
101 static const int kMaxAlignmentLoaded = 250000;
102 
103 
104 
105 ///////////////////////////////////////////////////////////////////////////////
106 /// CSGAlignmentJob
107 ///////////////////////////////////////////////////////////////////////////////
108 
110  objects::CBioseq_Handle handle,
111  const objects::SAnnotSelector& sel,
112  const TSeqRange& range, TModelUnit window,
113  TSignedSeqPos &maxStartTail, TSignedSeqPos &maxEndTail)
114  : CSGAnnotJob(desc, handle, sel, range)
115  , m_Window(window)
116  , m_AlignLimit(-1)
117  , m_SmearOverLimit(false)
118  , m_LinkPair(false)
119  , m_LoadCoverageGraph(true)
120  , m_HideSra(CAlignmentConfig::eHide_None)
121  , m_UnalignedTailsMode(CAlignmentConfig::eTails_ShowGlyph)
122  , m_ShowSecondPass(true)
123  , m_MaxStartTail(maxStartTail)
124  , m_MaxEndTail(maxEndTail)
125 {
126 }
127 
128 
130  objects::CBioseq_Handle handle,
131  const objects::SAnnotSelector& sel,
132  const TSeqRange& range, TModelUnit window,
133  int align_limit, bool smear_if_overlimit,
134  bool link_pair,
135  TSignedSeqPos &maxStartTail, TSignedSeqPos &maxEndTail)
136  : CSGAnnotJob(desc, handle, sel, range)
137  , m_Window(window)
138  , m_AlignLimit(align_limit)
139  , m_SmearOverLimit(smear_if_overlimit)
140  , m_LinkPair(link_pair)
141  , m_LoadCoverageGraph(false)
142  , m_HideSra(CAlignmentConfig::eHide_None)
143  , m_UnalignedTailsMode(CAlignmentConfig::eTails_ShowGlyph)
144  , m_ShowSecondPass(true)
145  , m_MaxStartTail(maxStartTail)
146  , m_MaxEndTail(maxEndTail)
147 {}
148 
149 
151  objects::CBioseq_Handle handle,
152  const objects::SAnnotSelector& sel,
153  const TSeqRange& range,
154  TModelUnit window,
155  vector< CRef<CAlignGlyph> > aligns,
156  TSignedSeqPos &maxStartTail, TSignedSeqPos &maxEndTail)
157  : CSGAnnotJob(desc, handle, sel, range)
158  , m_Aligns(aligns)
159  , m_Window(window)
160  , m_AlignLimit(-1)
161  , m_SmearOverLimit(false)
162  , m_LinkPair(false)
163  , m_LoadCoverageGraph(false)
164  , m_HideSra(CAlignmentConfig::eHide_None)
165  , m_UnalignedTailsMode(CAlignmentConfig::eTails_ShowGlyph)
166  , m_ShowSecondPass(true)
167  , m_MaxStartTail(maxStartTail)
168  , m_MaxEndTail(maxEndTail)
169 {}
170 
171 
172 void CSGAlignmentJob::GetAnnotNames(const objects::CBioseq_Handle& handle,
173  const TSeqRange& range,
174  objects::SAnnotSelector& sel,
176 {
177  sel.SetCollectNames();
178  CAlign_CI aln_iter(handle, range, sel);
179  ITERATE (CAlign_CI::TAnnotNames, iter, aln_iter.GetAnnotNames()) {
180  if (iter->IsNamed()) {
181  if (iter->GetName().find("@@") == string::npos) {
182  names.insert(TAnnotNameTitleMap::value_type(iter->GetName(), ""));
183  }
184  } else {
187  }
188  }
189 }
190 
191 
192 void CSGAlignmentJob::SetSortBy(const string& sort_by)
193 {
194  if (sort_by.empty()) return;
195 
196  size_t found = sort_by.find_first_of('|');
197  string sorter_type = NStr::TruncateSpaces(sort_by.substr(0, found));
198  transform(sorter_type.begin(), sorter_type.end(), sorter_type.begin(), ::tolower);
199  string sort_str = kEmptyStr;
200  if (found != string::npos) {
201  sort_str = sort_by.substr(found + 1);
202  }
203 
204  m_Sorter.Reset(CAlignSorterFactory::CreateAlignSorter(sorter_type, sort_str));
205 }
206 
208 CSGAlignmentJob::GetAlignType(const objects::CBioseq_Handle& handle,
209  objects::SAnnotSelector& sel,
210  bool& has_quality_map,
211  bool isFastConfig)
212 {
213  // LOG_POST("<<<<");
214  sel.SetMaxSize(1);
215  sel.SetCollectNames(false);
216 
218 
219  //!! tmp measure! TMS will hopefully serve us alignment types later
220  if(isFastConfig) {
222  } else {
223  // LOG_POST("Creating alignment iterator");
224  CAlign_CI aln_iter(handle, TSeqRange::GetWhole(), sel);
225 
226  // LOG_POST("Creating alignment iterator done");
227  if (aln_iter) {
228  const CSeq_align& align = *aln_iter;
229  int num_row = 0;
230  try {
231  num_row = align.CheckNumRows();
232  } catch (CException&) {
233  }
234 
235  if (num_row < 2) return type;
236 
237  CScope& scope = handle.GetScope();
238 
239  // check align type
240  for (int row = 0; row < num_row; ++row) {
241  // LOG_POST("Getting type from row " << row);
243  CBioseq_Handle row_handle = scope.GetBioseqHandle(align.GetSeq_id(row));
244  // LOG_POST("done");
245  if ( !row_handle ) continue;
246 
247  switch (row_handle.GetBioseqCore()->GetInst().GetMol())
248  {
249  case objects::CSeq_inst::eMol_dna:
250  case objects::CSeq_inst::eMol_rna:
251  case objects::CSeq_inst::eMol_na:
252  this_type = IAlnExplorer::fDNA;
253  break;
254 
255  case objects::CSeq_inst::eMol_aa:
256  this_type = IAlnExplorer::fProtein;
257  break;
258 
259  default:
260  break;
261  }
262 
263  if (this_type == IAlnExplorer::fMixed) {
264  type = this_type;
265  break;
266  }
267 
268  if (row == 0) {
269  type = this_type;
270  } else if (this_type != type) {
272  break;
273  }
274  }
275  // LOG_POST("Processing rows done");
276 
277  // check if there is quality graph
278  if(!isFastConfig) {
279  for (int row = num_row - 1; row >= 0; --row) {
280  // LOG_POST("Checking for quality graphs in row " << row);
281  // LOG_POST("Creating graph selector");
283  // LOG_POST("Creating graph selector done");
284  g_sel.AddUnnamedAnnots();
285  g_sel.SetCollectNames();
286  sel.SetMaxSize(1);
287  CBioseq_Handle row_handle = scope.GetBioseqHandle(align.GetSeq_id(row));
288  if (row_handle) {
289  // LOG_POST("Creating graph iterator");
290  CGraph_CI graph_iter(row_handle, align.GetSeqRange(row), sel);
291  // LOG_POST("Creating graph iterator done");
292  if (graph_iter && graph_iter.GetSize() == 1) {
293  has_quality_map = true;
294  break;
295  }
296  }
297  }
298  // LOG_POST("Checking for quality graphs done");
299  }
300  }
301  }
302  // LOG_POST(">>>>");
303 
304  return type;
305 }
306 
308 {
309  // if (!mapped_align.IsSetExt() || (CAlignmentConfig::eHide_None == m_HideSra))
310  // return false;
312  return false;
313 
314  if (mapped_align.IsSetExt()) {
315  const CSeq_align_Base::TExt& exts = mapped_align.GetExt();
316  ITERATE (CSeq_align_Base::TExt, iter, exts) {
317  if (!(*iter)->GetType().IsStr())
318  continue;
319 
320  if ((CAlignmentConfig::eHide_Duplicates & m_HideSra) && ((*iter)->GetType().GetStr() == "PCR duplicate"))
321  return true;
322 
323  if ((CAlignmentConfig::eHide_BadReads & m_HideSra) && ((*iter)->GetType().GetStr() == "Poor sequence quality"))
324  return true;
325  }
326  }
327  /*
328  if (CAlignmentConfig::eHide_BadReads & m_HideSra) {
329  auto align_ratio = mapped_align.AlignLengthRatio();
330  if (align_ratio < 0.01)
331  return true;
332  }
333  */
334  return false;
335 }
336 
337 static void s_ObjectIdToStr(const CObject_id& id, string& id_str)
338 {
339  if (id.IsStr()) {
340  id_str = id.GetStr();
341  } else {
342  id_str = NStr::NumericToString(id.GetId());
343  }
344 }
345 
346 
347 static bool
348 s_CheckTraceMateAlign(bool match_id,
349  bool is_bam_align,
350  const CSeq_align& align,
351  string& ti,
352  string& mate_ti,
353  const string& expect_mate_ti = "")
354 {
355  //
356  // specific requirements for mate pair alignments:
357  //
358 
359  // must be a pairwise dense-seg
360  if ( !align.GetSegs().IsDenseg() ||
361  align.GetSegs().GetDenseg().GetIds().size() != 2) {
362  return false;
363  }
364  ti = "";
365  mate_ti = "";
366 
367  // there are two possible cases:
368  // 1. trace assemblies that store mate pair info in seq-align::score
369  // 2. cSRA/Bam short reads that store mate pair info in seq-align::ext
370 
371  // first, let's check short reads case
372  if (is_bam_align) {
373  if (align.IsSetExt()) {
374  // exclude secondary alignments
375  ITERATE (CSeq_align::TExt, iter, align.GetExt()) {
376  if ((*iter)->GetType().IsStr() &&
377  (*iter)->GetType().GetStr() == "Secondary") {
378  return false;
379  }
380  }
381  }
382 
383  int i = 1;
384  // assumming the second row is the short read
385  // try second row first.
386  while (ti.empty() && i >=0) {
387  const CSeq_id& seq_id = align.GetSeq_id(i);
388  if (seq_id.IsLocal()) {
389  s_ObjectIdToStr(seq_id.GetLocal(), ti);
390  } else if (seq_id.IsGeneral()) {
391  s_ObjectIdToStr(seq_id.GetGeneral().GetTag(), ti);
392  }
393  --i;
394  }
395 
396  if (ti.empty()) {
397  return false;
398  }
399  }
400 
401  if (match_id) {
402  size_t len = ti.size();
403  mate_ti = ti.substr(0, len - 1) + (ti[len-1] == '1' ? "2" : "1");
404  } else if (align.IsSetExt() && !ti.empty()) {
405  ITERATE (CSeq_align::TExt, iter, align.GetExt()) {
406  if ((*iter)->GetType().IsStr() &&
407  (*iter)->GetType().GetStr() == "Secondary") {
408  return false;
409  }
410  if ((*iter)->GetType().IsStr() &&
411  (*iter)->GetType().GetStr() == "Mate read") {
412  CConstRef<CUser_field> user_field = (*iter)->GetFieldRef("lcl|");
413  if (user_field) {
414  if (user_field->GetData().IsStr()) {
415  mate_ti = user_field->GetData().GetStr();
416  } else if (user_field->GetData().IsInt()) {
417  mate_ti = NStr::NumericToString(user_field->GetData().GetInt());
418  }
419  }
420  }
421  }
422  }
423 
424  if (is_bam_align) {
425  if (mate_ti.empty()) {
426  // check if it is possible to generate mate_ti based on ti
427  // The ti naming pattern should be: xxxxx.[12]
428  size_t len = ti.size();
429  const char last_second_c = ti[len-2];
430  const char last_c = ti[len-1];
431  if (last_second_c == '.' && (last_c == '1' || last_c == '2')) {
432  mate_ti = ti.substr(0, len - 1) + (last_c == '1' ? "2" : "1");
433  }
434  }
435 
436  if (mate_ti.empty()) {
437  return false;
438  } else {
439  return true;
440  }
441  }
442 
443  // then, check the IDs for a trace ID
444  ITERATE (CDense_seg::TIds, iter,
445  align.GetSegs().GetDenseg().GetIds()) {
446  const CSeq_id& id = **iter;
447  if (id.IsGeneral() &&
448  (id.GetGeneral().GetDb() == "ti" ||
449  id.GetGeneral().GetDb() == "TRACE")) {
450  if (id.GetGeneral().GetTag().IsId()) {
451  ti = NStr::IntToString(id.GetGeneral().GetTag().GetId());
452  } else {
453  ti = id.GetGeneral().GetTag().GetStr();
454  }
455  break;
456  }
457  }
458 
459  if (ti.empty()) {
460  return false;
461  }
462 
463  // must have a score field named 'matepair_ti'
464  int mate_ti_int = 0;
465  if ( !align.GetNamedScore("matepair ti", mate_ti_int) &&
466  !align.GetNamedScore("bad matepair ti", mate_ti_int)) {
467  return false;
468  }
469 
470  mate_ti = NStr::SizetToString((unsigned int)mate_ti_int);
471  if (!expect_mate_ti.empty() && mate_ti != expect_mate_ti) {
472  return false;
473  }
474 
475  return true;
476 }
477 
478 
480 {
481  try {
482  if ( !m_Aligns.empty() ) {
485  result->m_Token = m_Token;
486  NON_CONST_ITERATE (vector< CRef<CAlignGlyph> >, iter, m_Aligns) {
487  if (IsCanceled()) {
488  return IAppJob::eCanceled;
489  }
490  CRef<CSGJobResult> single_res = x_LoadAlignmentFeats(**iter);
491  if (single_res) {
492  result->m_Results.push_back(single_res);
493  }
494  }
495 
496  return eCompleted;
497  } else if (m_LoadCoverageGraph) {
498  return x_GetCoverageGraph();
499  }
500 
501  return x_LoadAlignments();
502  } catch (CException& ex) {
503  m_Error.Reset(new CAppJobError(ex.GetMsg()));
504  return eFailed;
505  } catch (std::exception& ex) {
506  m_Error.Reset(new CAppJobError(ex.what()));
507  return eFailed;
508  }
509 
510  return eCompleted;
511 }
512 
514 {
516  return;
517  TSignedSeqPos start_tail(0);
518  TSignedSeqPos end_tail(0);
519  aln_datasource.GetUnalignedTails(start_tail, end_tail);
520  if (start_tail > m_MaxStartTail)
521  m_MaxStartTail = start_tail;
522  if (end_tail > m_MaxEndTail)
523  m_MaxEndTail = end_tail;
524 }
525 
527 {
530  result->m_Token = m_Token;
532 
533  // maximal number alignment allowed to load
536  }
537 
538  // upper limit used to control maximal number of alignments to load
539  int upper_limit = m_AlignLimit;
540 
541  if (m_AlignLimit > -1 && m_Window <= kSequenceZoomLevel) {
542  // If alignments won't be smeared even the number of alignments
543  // is over the limit (!m_SmearOverLimit), and it is not in the
544  // smear mode (m_AlignLimit != -1), and, and the zoom level
545  // is at sequence level (m_Window <= kSequenceZoomLevel), we
546  // shall load all alignments and show them.
547  upper_limit = kMaxAlignmentLoaded;
548 
549  // increase the limit to accept any number of alignment
550  // available in the given range.
551  m_AlignLimit = upper_limit + 1;
552  } else if (m_SmearOverLimit) {
553  // In this case, we need to smear alignments if total number
554  // is over the limit. So we have to load all alignment. This
555  // is for regular alignments only.
556  upper_limit = kMaxAlignmentLoaded;
557  }
558 
559  if (upper_limit <= 0) {
560  return eCompleted;
561  }
562 
563  CSeqGlyph::TObjects& objs = result->m_ObjectList;
564  m_Sel.SetMaxSize(kMaxAlignmentLoaded);//upper_limit + 1);
565  //CStopWatch sw(CStopWatch::eStart);
566 
567  CAlign_CI align_iter(m_Handle, m_Range, m_Sel);
568  int obj_size = (int)align_iter.GetSize();
569  if (obj_size)
570  result->m_DataHandle = align_iter.GetAnnot().GetTSE_Handle();
571  //ERR_POST(Error << "align_iter init " << obj_size << " out of " << (upper_limit + 1) << " alignments in " << sw.AsSmartString(CTimeSpan::eSSP_Millisecond) << "\n");
572 
573  SetTaskTotal(obj_size);
574  SetTaskCompleted(0);
575 
576  if (obj_size > m_AlignLimit) {
577  if (m_SmearOverLimit) {
578  // smear bar is requested when over limit
579  return x_GetAlignSmear(objs, align_iter);
580  }
581  // see if x_GetAlignemnts can eliminate the number of objects
582  // by limiting the number of rows in icicles
583  if (obj_size <= m_AlignLimit * 3) {
584  x_GetAlignments(objs, align_iter, true);
585  if ((int)objs.size() > m_AlignLimit) {
586  objs.clear();
587  }
588  }
589  // Otherwise, do thing. Maybe, pileup display is required in
590  // this case which will be requested and done in other place
591  // ELSE { RETURN NOTHING }
592  } else {
593  TSeqRange wholeRange(0, m_Handle.GetBioseqLength() - 1);
594  bool account_for_tails = (m_UnalignedTailsMode == CAlignmentConfig::eTails_ShowSequence) && (wholeRange != m_Range)
595  && ((m_MaxStartTail > 0) || (m_MaxEndTail > 0));
596  if (account_for_tails) {
597  // Extend the search range to accomodate the tails
598  std::unique_ptr<CAlign_CI> extended_align_iter;
599  TSignedSeqPos ext_from = m_Range.GetFrom() - m_MaxEndTail - 1;
600  TSignedSeqPos ext_to = m_Range.GetTo() + m_MaxStartTail + 1;
601  TSignedSeqPos bs_len = m_Handle.GetBioseqLength() - 1;
602  TSeqRange extendedRange(ext_from > 0 ? ext_from : 0, ext_to < bs_len ? ext_to : bs_len);
603  extended_align_iter.reset(new CAlign_CI(m_Handle, extendedRange, m_Sel));
604  obj_size = (int)extended_align_iter->GetSize();
605  status = x_GetAlignments(objs, *(extended_align_iter.get()), true, true);
606  } else {
607  // return all alignments, this includes the case when it
608  // is at sequence level (m_AlignLimit has been adjusted)
609  status = x_GetAlignments(objs, align_iter, true);
610  }
611  }
612  SetTaskCompleted(obj_size);
613  return status;
614 }
615 
618 {
621 
622  const IAlnGraphicDataSource& aln_mgr = align.GetAlignMgr();
623  // aligned seq-id
624  const CSeq_id& aligned_seq =
625  aln_mgr.GetSeqId(aln_mgr.GetAnchor() == 0 ? 1 : 0);
626  // anchored seq-loc
627  const CSeq_loc& loc = align.GetLocation();
628 
629  try {
630  // map visible range to product feature
631  CSeq_loc_Mapper seq_range_mapper(align.GetAlignment(),
632  (size_t)(1 - align.GetOrigAnchor()), &GetScope());
633  CRef<CSeq_loc> tmp_loc(new CSeq_loc());
634  tmp_loc->SetInt().SetFrom(m_Range.GetFrom());
635  tmp_loc->SetInt().SetTo (m_Range.GetTo());
636  tmp_loc->SetId(*loc.GetId());
637 
638  // our new location for feature iterator
639  CRef<CSeq_loc> seq_loc(new CSeq_loc());
640  CRef<CSeq_loc> mapped_loc = seq_range_mapper.Map(tmp_loc.GetObject());
641 
642  CSeq_loc::TRange mapped_range = mapped_loc->GetTotalRange();
643  seq_loc->SetInt().SetFrom(mapped_range.GetFrom());
644  seq_loc->SetInt().SetTo(mapped_range.GetTo());
645  seq_loc->SetId(aligned_seq);
646 
647  // use newly created location to get the features
648  CFeat_CI feat_iter(GetScope(), seq_loc.GetObject(), m_Sel);
649 
650  if (feat_iter.GetSize() > 0) {
651  //::wxWakeUpIdle();
652  CSeq_loc_Mapper mapper(
653  align.GetAlignment(), (size_t)align.GetOrigAnchor(), &GetScope());
654 
655  CLinkedFeature::TLinkedFeats main_features;
656  CSeqGlyph::TObjects other_features;
657  for ( ; feat_iter; ++feat_iter) {
658  if (IsCanceled()) {
659  return result;
660  }
661  const CMappedFeat& feat = *feat_iter;
662  int subtype = feat.GetFeatSubtype();
663  int type = feat.GetFeatType();
665  CRef<CLinkedFeature> fref( new CLinkedFeature(feat) );
666  main_features.push_back(fref);
667  } else {
668  CRef<CSeqGlyph> g_glyph = x_CreateFeatGlyph(mapper, feat, align);
669  if (g_glyph) {
670  other_features.push_back(g_glyph);
671  }
672  }
673  }
674 
675  // linking the features
676  if ( !CSeqUtils::LinkFeatures(main_features,
677  feature::CFeatTree::eFeatId_by_type, this) ||
678  !x_CreateGeneModels(mapper, main_features, tmp, align)) {
679  return result;
680  }
681 
682  std::copy(other_features.begin(), other_features.end(),
683  back_inserter(tmp));
684  }
685  }
686  catch (CAnnotMapperException&) {
687  /// ignore errors from location mapping
688  return result;
689  }
690 
691  if ( !tmp.empty() ) {
692  result.Reset(new CSGJobResult());
693  result->m_ObjectList.swap(tmp);
694  result->m_Token = m_Token;
695  result->m_Owner = CRef<CSeqGlyph>(&align);
696  }
697  return result;
698 }
699 
700 
703  const CMappedFeat& mapped_feat,
704  CAlignGlyph& align) const
705 {
706  CRef<CSeqGlyph> glyph;
707  // mapped_feat.GetLocation() -- location of the feature on the aligned sequence
708  // mapped_loc -- location of the feature on the main sequence
709  CConstRef<CSeq_loc> mapped_loc = mapper.Map(mapped_feat.GetLocation());
710  if (!mapped_loc->IsNull() && !mapped_loc->IsEmpty()) {
711  CFeatGlyph* feat;
712  bool cds = false;
713  if (mapped_feat.GetData().IsCdregion()) {
714  feat = new CCdsGlyph(mapped_feat, *mapped_loc);
715  cds = true;
716  } else {
717  feat = new CFeatGlyph(mapped_feat, *mapped_loc);
718  }
719 
720  if (mapped_feat.GetOriginalSeq_feat()->IsSetProduct()) {
721 
722  const IAlnGraphicDataSource& aln_mgr = align.GetAlignMgr();
723 
724  int anchor = aln_mgr.GetAnchor();
725  int aligned_seq = anchor == 0 ? 1 : 0;
726 
727  CProjectedMappingInfo projected_info;
728  projected_info.SetAlignmentDataSource(aln_mgr);
729 
730  const CSeq_align& orig_aln = align.GetAlignment();
731  CSeq_loc_Mapper aln_mapper_up(orig_aln, aln_mgr.GetSeqId(anchor), &GetScope());
732  CSeq_loc_Mapper aln_mapper_down(orig_aln, aln_mgr.GetSeqId(aligned_seq), &GetScope());
734  AutoPtr<CSeq_loc_Mapper> prod_mapper_up(0);
735 
736  try {
737  auto id = Ref(new CSeq_id);
738  id->Assign(aln_mgr.GetSeqId(aligned_seq));
739 
740  // aligned_loc - location of the feature on the aligned sequence mapped through the alignments
741  CRef<CSeq_loc> aligned_loc = Ref(new CSeq_loc);
742  for (CSeq_loc_CI lit(mapped_feat.GetLocation()); lit; ++lit) {
743  auto anchor_loc = aln_mapper_up.Map(*lit.GetRangeAsSeq_loc());
744  if (anchor_loc->IsNull())
745  continue;
746  auto mapped_loc = aln_mapper_down.Map(*anchor_loc);
747  if (mapped_loc->IsNull())
748  continue;
749  aligned_loc->Add(*mapped_loc);
750  }
752 
753  for (CSeq_loc_CI lit(*aligned_loc); lit; ++lit) {
754 
755  auto aligned_segment = lit.GetRangeAsSeq_loc();
756  auto anchor_loc = aln_mapper_up.Map(*aligned_segment);
757  if (anchor_loc->IsNull()) {
758  _ASSERT(true);
759  continue;
760  }
761  auto prod_loc = prod_mapper.Map(*aligned_segment);
762  if (prod_loc->IsNull()) {
763  _ASSERT(true);
764  continue;
765  }
766 
767  auto prod_range = prod_loc->GetTotalRange();
768  auto prod_from = prod_range.GetFrom();
769  auto prod_to = prod_range.GetTo();
770 
771  if (cds) {
772  if (aln_mgr.GetBaseWidth(anchor) == 3) {
773  TSeqPos offset_from = 0;
774  TSeqPos offset_to = 0;
775  auto gen_loc = aln_mapper_down.Map(*anchor_loc);
776  // calculate protein range in genomic coordinates
777  // adjust coordinates to account for ending codons
778  // that borrows a base from other segments
779  // the borrowed bases are caclulated via mapping
780  CSeq_loc_CI g_l(*gen_loc);
781  int i = 0;
782  while (g_l) {
783  auto len = g_l.GetRange().GetLength();
784  if (len < 3) {
785  if (i == 0)
786  offset_from = len;
787  else
788  offset_to = len;
789  }
790  ++i;
791  ++g_l;
792  }
793  prod_from *= 3;
794  prod_to *= 3;
795  bool neg = prod_loc->GetStrand() == eNa_strand_minus;
796 
797  if (offset_from > 0) {
798  // offset == 2 means that one base was borrowed by another segemnt
799  // therefor product coordiante should be adjusted
800 
801  if (offset_from == 2) {
802  if (neg) {
803  prod_to -= 1;
804  } else {
805  prod_from += 1;
806  }
807  }
808  } else {
809  if (neg)
810  prod_from -= 2;
811  }
812  if (offset_to > 0) {
813  // offset == 1 means that this segment borrowed one base from another segemnt
814  // therefore product coordiante should be adjusted
815  if (offset_to == 1) {
816  if (neg) {
817  prod_from -= 1;
818  } else {
819  prod_to += 1;
820  }
821  }
822 
823  } else {
824  // if there no offset
825  // prod_to is the last base of the codon i.e. codon start + 2
826  if (!neg)
827  prod_to += 2;
828  }
829  } else {
830  if (!prod_mapper_up)
832  auto prod_mapped = prod_mapper_up->Map(*prod_loc);
833  if (prod_mapped->IsNull()) {
834  _ASSERT(true);
835  continue;
836  }
837  TSeqPos offset_from = 0;
838  TSeqPos offset_to = 0;
839  auto diff_loc = prod_mapped->Subtract(*aligned_segment, 0, nullptr, nullptr);
840  CSeq_loc_CI g_l(*diff_loc);
841  while (g_l) {
842  auto len = g_l.GetRange().GetLength();
843  if (aligned_segment->GetStrand() == eNa_strand_minus) {
844  if (g_l.GetRange().GetFrom() > aligned_segment->GetTotalRange().GetTo())
845  offset_from = len;
846  else if (g_l.GetRange().GetTo() < aligned_segment->GetTotalRange().GetFrom())
847  offset_to = len;
848  } else {
849  if (g_l.GetRange().GetTo() < aligned_segment->GetTotalRange().GetFrom())
850  offset_from = len;
851  else if (g_l.GetRange().GetFrom() > aligned_segment->GetTotalRange().GetTo())
852  offset_to = len;
853  }
854  ++g_l;
855  }
856 
857  bool neg = prod_loc->GetStrand() == eNa_strand_minus;
858  prod_from *= 3;
859  prod_to *= 3;
860 
861  if (offset_from > 0) {
862  // offset == 2 means that one base was borrowed by another segemnt
863  // therefor product coordiante should be adjusted
864 
865  if (offset_from == 2) {
866  if (neg) {
867  prod_to -= offset_from;
868  prod_range.SetTo(prod_range.GetTo() - 1);
869  } else {
870  prod_from += offset_from;
871  prod_range.SetFrom(prod_range.GetFrom() + 1);
872  }
873  } else {
874  if (neg) {
875  prod_to -= offset_from;
876  } else {
877  prod_from += offset_from;
878  }
879 
880  }
881  } else {
882  if (neg)
883  prod_from -= 2;
884  }
885  if (offset_to > 0) {
886  if (neg) {
887  prod_from -= offset_to;
888  } else {
889  prod_to += offset_to;
890  }
891  } else {
892  // if there no offset
893  // prod_to is the last base of the codon i.e. codon start + 2
894  if (!neg)
895  prod_to += 2;
896  }
897 /*
898  prod_from *= 3;
899  prod_to *= 3;
900  auto aligned_r = aligned_segment->GetTotalRange();
901  auto mapped_r = prod_mapped->GetTotalRange();
902  int off_from = aligned_r.GetFrom() - mapped_r.GetFrom();
903  if (off_from > 0 && off_from < 3) {
904  if (off_from > 1)
905  prod_range.SetFrom(prod_range.GetFrom() + 1);
906  prod_from += off_from;
907  prod_to += off_from;
908  }
909  int off_to = mapped_r.GetTo() - aligned_r.GetTo();
910  if (off_to > 1) {
911  prod_range.SetTo(prod_range.GetTo() + 1);
912  }
913  prod_to += off_to;
914  // need to adjust to max to include the stop codon
915  prod_to = max<int>(prod_to, prod_from + aligned_r.GetLength() - 1);
916 */
917  }
918  }
919  CRef<CSeq_interval> prod_int(new CSeq_interval);
920  prod_int->SetFrom(prod_range.GetFrom());
921  prod_int->SetTo(prod_range.GetTo());
922  prod_int->SetStrand(prod_loc->GetStrand());
923  prod_int->SetId().Assign(*prod_loc->GetId());
924  prod_int->SetPartialStart(prod_loc->IsPartialStart(eExtreme_Biological), eExtreme_Biological);
925  prod_int->SetPartialStop(prod_loc->IsPartialStop(eExtreme_Biological), eExtreme_Biological);
926 
927  CRef<CSeq_interval> gen_int(new CSeq_interval);
928  gen_int->SetFrom(aligned_segment->GetTotalRange().GetFrom());
929  gen_int->SetTo(aligned_segment->GetTotalRange().GetTo());
930  gen_int->SetStrand(aligned_segment->GetStrand());
931  gen_int->SetPartialStart(aligned_segment->IsPartialStart(eExtreme_Biological), eExtreme_Biological);
932  gen_int->SetPartialStop(aligned_segment->IsPartialStop(eExtreme_Biological), eExtreme_Biological);
933  gen_int->SetId(*id);
934 
935  projected_info.push_back(CProjectedMappingInfo::value_type(prod_int, gen_int,
936  anchor_loc->GetTotalRange(), TSeqRange(prod_from, prod_to)));
937  }
938  } catch (const CException&) {
939 
940  }
941  feat->SetProjectedMappingInfo(projected_info);
942  }
943  glyph.Reset(feat);
944  CFeatGlyph::TIntervals& intervals = feat->SetIntervals();
945  const CSeq_loc& loc = feat->GetLocation();
946  CSeq_loc_CI iter(loc);
947  for ( ; iter; ++iter) {
948  intervals.push_back(iter.GetRange());
949  }
950  }
951  return glyph;
952 }
953 
954 
955 bool
958  CSeqGlyph::TObjects& objs,
959  CAlignGlyph& align) const
960 {
961  // Create glyph for each feature, and put the linked features
962  // into the right group
963 
964  CRef<CLayoutGroup> global_exon_group;
966  if (IsCanceled()) return false;
967 
968  const CMappedFeat& mapped_feat = (*iter)->GetMappedFeature();
969  CRef<CSeqGlyph> glyph = x_CreateFeatGlyph(mapper, mapped_feat, align);
970  if ( !glyph ) continue;
971 
972  CSeqFeatData::ESubtype subtype =
973  (*iter)->GetFeature().GetData().GetSubtype();
974  if ((*iter)->GetChildren().empty()) {
975  if (subtype == CSeqFeatData::eSubtype_exon) {
976  if ( !global_exon_group ) {
977  global_exon_group.Reset(new CLayoutGroup);
978  }
979  global_exon_group->PushBack(glyph);
980  } else {
981  objs.push_back(glyph);
982  }
983  continue;
984  }
985 
986  CRef<CLayoutGroup> exon_group;
987  CRef<CLayoutGroup> gene_group;
988  gene_group.Reset(new CGeneGroup());
989  objs.push_back(CRef<CSeqGlyph>(gene_group.GetPointer()));
990  gene_group->PushBack(glyph);
991  NON_CONST_ITERATE (CLinkedFeature::TLinkedFeats, c_iter, (*iter)->GetChildren()) {
992  const CMappedFeat& c_mapped_feat = (*c_iter)->GetMappedFeature();
993  CRef<CSeqGlyph> c_glyph = x_CreateFeatGlyph(mapper, c_mapped_feat, align);
994  if ( !c_glyph ) continue;
995 
996  subtype = (*c_iter)->GetFeature().GetData().GetSubtype();
997  if (subtype == CSeqFeatData::eSubtype_exon) {
998  if ( !exon_group ) {
999  exon_group.Reset(new CLayoutGroup);
1000  }
1001  exon_group->PushBack(c_glyph);
1002  } else {
1003  gene_group->PushBack(c_glyph);
1004  NON_CONST_ITERATE (CLinkedFeature::TLinkedFeats, g_iter, (*c_iter)->GetChildren()) {
1005  const CMappedFeat& g_mapped_feat = (*g_iter)->GetMappedFeature();
1006  CRef<CSeqGlyph> g_glyph = x_CreateFeatGlyph(mapper, g_mapped_feat, align);
1007  if (g_glyph) {
1008  gene_group->PushBack(g_glyph);
1009  }
1010  }
1011  }
1012  }
1013  if (exon_group) {
1014  if (exon_group->GetChildren().size() == 1) {
1015  gene_group->Append(exon_group->SetChildren());
1016  } else {
1017  gene_group->PushBack(exon_group.GetPointer());
1018  }
1019  }
1020  }
1021  if (global_exon_group) {
1022  if (objs.size() == 1 && dynamic_cast<CGeneGroup*>(objs.front().GetPointer())) {
1023  CGeneGroup* group = dynamic_cast<CGeneGroup*>(objs.front().GetPointer());
1024  if (global_exon_group->GetChildren().size() == 1) {
1025  group->Append(global_exon_group->SetChildren());
1026  } else {
1027  group->PushBack(global_exon_group.GetPointer());
1028  }
1029  } else {
1030  if (global_exon_group->GetChildren().size() == 1) {
1031  objs.push_back(global_exon_group->SetChildren().front());
1032  } else {
1033  objs.push_back(CRef<CSeqGlyph>(global_exon_group.GetPointer()));
1034  }
1035  }
1036  }
1037 
1038  return true;
1039 }
1040 
1041 
1044  CAlign_CI& align_iter)
1045 {
1046  if (align_iter.GetSize() == 0) return eCompleted;
1047 
1048  CRef<CAlignSmearGlyph> smear_glyph;
1049 
1050  const CSeq_annot& seq_annot_first =
1052  if (CAlignmentSmear::SeparateStrands(seq_annot_first)) {
1053  // positive strand
1054  smear_glyph.Reset(new CAlignSmearGlyph(m_Handle, m_Range.GetFrom(),
1056  if (smear_glyph) {
1057  CAlignmentSmear& smear = smear_glyph->GetAlignSmear();
1058  if ( !smear.AddAlignments(align_iter, this) ) {
1059  return eCanceled;
1060  }
1061  objs.emplace_back(smear_glyph.GetPointer());
1062  }
1063 
1064  // negative strand
1065  smear_glyph.Reset(new CAlignSmearGlyph(m_Handle, m_Range.GetFrom(),
1067  if (smear_glyph) {
1068  CAlignmentSmear& smear = smear_glyph->GetAlignSmear();
1069  if ( !smear.AddAlignments(align_iter, this)) {
1070  return eCanceled;
1071  }
1072  objs.push_back(CRef<CSeqGlyph>(smear_glyph.GetPointer()));
1073  }
1074  } else {
1075  smear_glyph.Reset(new CAlignSmearGlyph(m_Handle, m_Range.GetFrom(),
1077  if (smear_glyph) {
1078  CAlignmentSmear& smear = smear_glyph->GetAlignSmear();
1079  if ( !smear.AddAlignments(align_iter, this) ) {
1080  return eCanceled;
1081  }
1082  objs.emplace_back(smear_glyph.GetPointer());
1083  }
1084  }
1085 
1086  return eCompleted;
1087 }
1088 
1089 static bool s_IsMatchById(CConstRef<CSeq_annot> aln_annot)
1090 {
1091  bool match_id = false;
1092 
1093  if (aln_annot && aln_annot->CanGetDesc()) {
1094  ITERATE(objects::CAnnot_descr::Tdata, descrIter, aln_annot->GetDesc().Get())
1095  if ((*descrIter)->IsUser() &&
1096  (*descrIter)->GetUser().GetType().IsStr() &&
1097  (*descrIter)->GetUser().GetType().GetStr() == "Mate read") {
1098  CConstRef<CUser_field> user_field =
1099  (*descrIter)->GetUser().GetFieldRef("Match by local Seq-id");
1100  if (user_field && user_field->GetData().IsBool() &&
1101  user_field->GetData().GetBool()) {
1102  match_id = true;
1103  break;
1104  }
1105  }
1106  }
1107  return match_id;
1108 }
1109 
1110 
1111 static
1113 {
1114  int reciprocity = 0;
1115  return align.GetNamedScore("reciprocity", reciprocity) && reciprocity != 3;
1116 }
1117 
1118 IAppJob::EJobState CSGAlignmentJob::x_GetAlignments(CSeqGlyph::TObjects& objs, CAlign_CI& align_iter, bool sparseAln, bool filterResults)
1119 {
1120  if (align_iter.GetSize() == 0) return eCompleted;
1121 
1122  SetTaskName("Creating alignment glyphs...");
1123 
1124  TMatedAlignments mated_aligns;
1125  bool match_id = false;
1126 
1128 
1132  match_id = s_IsMatchById(aln_annot);
1133  }
1134 
1135  auto synonyms = m_Handle.GetSynonyms();
1136 
1137  for (; align_iter; ++align_iter) {
1138  if (IsCanceled())
1139  return eCanceled;
1140 
1141  const CSeq_align_Handle& align_handle = align_iter.GetSeq_align_Handle();
1142  const CSeq_align& align = *align_iter;
1143 
1144  if (FilterDuplicatesAndBadReads(align)) {
1145  AddTaskCompleted(1);
1146  continue;
1147  }
1148  if (m_ShowSecondPass == false && s_IsSecondPassAlignment(align)) {
1149  AddTaskCompleted(1);
1150  continue;
1151  }
1152 
1153  TAlnDataSources aln_datasources;
1154 
1155  vector<CSeq_align::TDim> anchors;
1156  GetAnchors(m_Handle, synonyms.GetPointer(), align, anchors);
1157 
1158  bool crossOrigin = CSeqUtils::IsCrossOrigin(align);
1159  if (crossOrigin) {
1160  auto genomic_loc = align.CreateRowSeq_loc(1);
1161  CRef<CSeq_loc> range_loc(m_Handle.GetRangeSeq_loc(m_Range.GetFrom(), m_Range.GetTo()));
1163  // skip the spliced_seg cross-origin alignments that are not in the visible area
1164  AddTaskCompleted(1);
1165  continue;
1166  }
1167  else {
1168  // need to split the alignment into two new alignments
1169  // keep only the one that is in the visible area
1170  x_CreateAlignGlyphsForCrossOriginAlign(objs, align, align_handle, sparseAln, mated_aligns, match_id, flag, filterResults);
1171  }
1172  }
1173  else {
1174  x_BuildAlignDataSource(align, aln_datasources, sparseAln);
1175  x_CreateCAlignGlyph(objs, align, align_handle, aln_datasources, mated_aligns, match_id, flag, filterResults);
1176  }
1177  AddTaskCompleted(1);
1178  }
1179  set<CSeqGlyph*> objs_to_remove;
1180  CSeqGlyph::TObjects mated_objs;
1181  //
1182  // final pass - connect our mate pair alignments, if we can
1183  //
1184  NON_CONST_ITERATE(TMatedAlignments, iter, mated_aligns)
1185  {
1186  if (IsCanceled()) {
1187  return eCanceled;
1188  }
1189 
1190  if (!iter->second.first) {
1191  continue;
1192  }
1193 
1194  CAlignGlyph& first_mate = *iter->second.first;
1195  const CSeq_align& align = first_mate.GetAlignment();
1196  const CSeq_align_Handle& orig_align = first_mate.GetOrigAlignment();
1197  const string& ti = iter->first;
1198  string mate_ti = iter->second.second;
1199 
1200  //
1201  // verify that we have a mated alignment for this trace alignment
1202  //
1203  CRef<CAlignGlyph> second_mate;
1204  TMatedAlignments::iterator mate_iter = mated_aligns.find(mate_ti);
1205  if (mate_iter == mated_aligns.end()) {
1206  //
1207  // NOT FOUND
1208  // try iterating. We do this only for trace assembly, not for
1209  // other cases like cSRA/BAM short reads.
1210  //
1211  CSeq_id id("gnl|ti|" + mate_ti);
1212  CBioseq_Handle h = m_Handle.GetScope().GetBioseqHandle(id);
1213  if (h) {
1214  SAnnotSelector sel =
1216  CAlign_CI second_mate_iter(h, sel);
1217  for (; second_mate_iter; ++second_mate_iter) {
1218  string temp = "";
1219  if (!s_CheckTraceMateAlign(false, false, *second_mate_iter,
1220  temp, mate_ti, ti)) {
1221  continue;
1222  }
1223 
1224  try {
1225  // create CAlnGraphicDataSource
1226  TAlnDataSources aln_datasources;
1227  vector<CSeq_align::TDim> anchors;
1228  GetAnchors(m_Handle, synonyms.GetPointer(), align, anchors);
1229 
1230  BuildAlignDataSource(m_Handle, anchors, align, sparseAln, m_Window, aln_datasources);
1231  if (!aln_datasources.empty()) {
1232  x_UpdateMaxTails(*(aln_datasources.front().first));
1233  second_mate.Reset(new CAlignGlyph(
1234  aln_datasources.front().first, orig_align, align,
1235  first_mate.GetOrigAnchor()));
1236  }
1237  } catch (CException& e) {
1238  LOG_POST(Warning << "error in GetAlignments(): " << e.GetMsg());
1239  }
1240  break;
1241  }
1242  }
1243 
1244  if (!second_mate) {
1245  //LOG_POST(Warning << "failed to find mate pair for ti = " << ti);
1246  // no mate available, so treat it as a normal alignment
1247  // objs.emplace_back(&first_mate);
1248  continue;
1249  }
1250  } else if (!mate_iter->second.first) {
1251  // already processed
1252  continue;
1253  } else {
1254  second_mate = mate_iter->second.first;
1255  mate_iter->second.first.Reset();
1256  }
1257 
1259  als.emplace_back(&first_mate);
1260  als.emplace_back(second_mate.GetPointer());
1261  objs_to_remove.insert({ &first_mate, second_mate.GetPointer() });
1262  mated_objs.emplace_back(new CMatePairGlyph(als));
1263  }
1264  if (!objs_to_remove.empty()) {
1265  auto remove_begin = remove_if(objs.begin(), objs.end(), [&objs_to_remove](CRef<CSeqGlyph>& glyph) {
1266  return objs_to_remove.count(glyph.GetPointer()) > 0;
1267  });
1268  objs.erase(remove_begin, objs.end());
1269  move(mated_objs.begin(), mated_objs.end(), back_inserter(objs));
1270  }
1271 
1272  if (m_Sorter && mated_aligns.empty()) {
1273  // we don't sort alignments if they are paired and linked together
1274  typedef map<int, CSeqGlyph::TObjects> TAlignGroupMap;
1275  TAlignGroupMap sorted_aligns;
1277  {
1278  CAlignGlyph* align_glyph = dynamic_cast<CAlignGlyph*>(iter->GetPointer());
1279  _ASSERT(align_glyph);
1280  int idx = m_Sorter->GetGroupIdx(align_glyph->GetAlignment());
1281  if (idx > -1) {
1282  sorted_aligns[idx].push_back(*iter);
1283  }
1284  }
1285  if (sorted_aligns.size() > 1) {
1286  objs.clear();
1287  map<string, int> sorted_groups;
1288  for (auto const& group : sorted_aligns) {
1289  sorted_groups[m_Sorter->GroupIdxToName(group.first)] = group.first;
1290  }
1291  for (auto const& group : sorted_groups) {
1292  CRef<CNamedGroup> named_group(new CNamedGroup(group.first));
1293  named_group->Set(sorted_aligns[group.second]);
1294  objs.emplace_back(named_group.GetPointer());
1295  }
1296  }
1297  }
1298 
1299  if (m_LayoutPolicy != 0) {
1300  for (auto& o : objs) {
1301  TSeqRange range = o->GetRange();
1302  o->SetWidth(range.GetLength());
1303  o->SetLeft(range.GetFrom());
1304  o->SetHeight(1);
1305  }
1306  CLayoutGroup group;
1307  group.SetChildren().swap(objs);
1308  group.SetTearline(eMaxRowLimit);
1310  m_LayoutPolicy->BuildLayout(group, bound);
1311  group.SetChildren().swap(objs);
1312  auto remove_begin = remove_if(objs.begin(), objs.end(), [](CRef<CSeqGlyph>& glyph) {
1313  return !glyph->GetVisible();
1314  });
1315  objs.erase(remove_begin, objs.end());
1316  }
1317  SetTaskCompleted(static_cast<int>(objs.size()));
1318  return eCompleted;
1319 }
1320 
1321 void CSGAlignmentJob::x_BuildAlignDataSource(const CSeq_align& align, TAlnDataSources& align_ds, bool sparseAln)
1322 {
1323  vector<CSeq_align::TDim> anchors;
1324  auto synonyms = m_Handle.GetSynonyms();
1325  GetAnchors(m_Handle, synonyms.GetPointer(), align, anchors);
1326 
1327  bool retry = false;
1328  try {
1329  // try using both direction
1330  BuildAlignDataSource(m_Handle, anchors, align, sparseAln, m_Window, align_ds, &m_Range);
1331  }
1332  catch (CAlignRangeCollException& e) {
1333  if (sparseAln) {
1334  // try to build without sparseAln flag
1335  sparseAln = false;
1336  retry = true;
1337  }
1338  else {
1339  LOG_POST(Warning << "error in GetAlignments(): " << e.GetMsg());
1340  }
1341  }
1342  catch (CException& e) {
1343  // log errors
1344  LOG_POST(Warning << "error in GetAlignments(): " << e.GetMsg());
1345  }
1346  if (retry) {
1347  try {
1348  // try using both direction
1349  BuildAlignDataSource(m_Handle, anchors, align, false, m_Window, align_ds, &m_Range);
1350  }
1351  catch (CException& e) {
1352  // log errors
1353  LOG_POST(Warning << "error in GetAlignments(): " << e.GetMsg());
1354  }
1355  }
1356 }
1357 
1358 // keep the original align_handle
1360  const CSeq_align& align,
1361  const CSeq_align_Handle& align_handle,
1362  bool sparseAln,
1363  TMatedAlignments& mated_aligns,
1364  bool match_id,
1366  bool filterResults)
1367 {
1368  _ASSERT(align.GetSegs().IsSpliced());
1369 
1370  // split the alignment into two new alignments
1371  // keep the one(s) that is(are) in the visible area
1372  // generate DS for that(them)
1373 
1374  TAlnDataSources align_ds;
1375  CRef<CSeq_loc> range_loc(m_Handle.GetRangeSeq_loc(m_Range.GetFrom(), m_Range.GetTo()));
1376 
1377  vector<CSeq_align::TDim> anchors;
1378  auto synonyms = m_Handle.GetSynonyms();
1379  GetAnchors(m_Handle, synonyms.GetPointer(), align, anchors);
1380 
1381  // for cross-origin features, it suffices to split the alignment at the exon boundary corresponding to the origin.
1382  auto& const_exons = align.GetSegs().GetSpliced().GetExons();
1383 
1384  // create label for tooltip
1385  string ranges_str;
1386  for (const auto& exon_it : const_exons) {
1387  TSeqRange exon_rng = exon_it->GetRowSeq_range(anchors[0], true);
1388 
1389  if (!ranges_str.empty()) {
1390  ranges_str += " , ";
1391  }
1392  ranges_str += "(";
1393  ranges_str += NStr::IntToString(exon_rng.GetFrom() + 1, NStr::fWithCommas);
1394  ranges_str += "..";
1395  ranges_str += NStr::IntToString(exon_rng.GetTo() + 1, NStr::fWithCommas);
1396  ranges_str += ")";
1397  }
1398 
1399  for (const auto& exon_it : const_exons) {
1400  TSeqRange exon_rng = exon_it->GetRowSeq_range(anchors[0], true);
1401 
1402  CRef<CSeq_loc> exon_loc(m_Handle.GetRangeSeq_loc(exon_rng.GetFrom(), exon_rng.GetTo()));
1404  continue;
1405  }
1406 
1407  CRef<CSeq_align> new_align(SerialClone(align));
1408  auto& exons = new_align->SetSegs().SetSpliced().SetExons();
1409  ERASE_ITERATE(CSpliced_seg::TExons, iter, exons) {
1410  auto& exon = **iter;
1411  TSeqRange exon_range(exon.GetGenomic_start(), exon.GetGenomic_end());
1412  if (exon_range != exon_rng) {
1413  exons.erase(iter);
1414  }
1415  }
1416 
1417  try {
1418  new_align->Validate();
1419  }
1420  catch (const CException& e) {
1421  LOG_POST(Error << "Seq_align failed validation: " << e.GetMsg());
1422  LOG_POST(Error << MSerial_AsnText << new_align.GetObject());
1423  continue;
1424  }
1425 
1426  x_BuildAlignDataSource(*new_align, align_ds, sparseAln);
1427  for (auto& it : align_ds) {
1428  it.first->SetCrossOrigin(true);
1429  it.first->SetCrossOriginRange(ranges_str);
1430  }
1431 
1432  x_CreateCAlignGlyph(objs, *new_align, align_handle, align_ds, mated_aligns, match_id, flag, filterResults);
1433 
1434  }
1435 }
1436 
1438  const TAlnDataSources& aln_datasources, TMatedAlignments& mated_aligns, bool match_id,
1439  CSGAlignmentJob::EAlignFlags flag, bool filterResults)
1440 {
1441  for (size_t i = 0; i < aln_datasources.size(); ++i) {
1442  int anchor = aln_datasources[i].second;
1443  auto& aln_datasource = aln_datasources[i].first;
1444 
1445  if (filterResults) { // Filter results in case extended range is used
1446  TSeqRange aln_range = aln_datasource->GetAlnRangeWithTails();
1447  if (!aln_range.IntersectingWith(m_Range)) {
1448  continue;
1449  }
1450  }
1451  x_UpdateMaxTails(*aln_datasource);
1452  //
1453  // check to see if this is a mate pair alignment
1454  //
1455  auto align_glyph = Ref(new CAlignGlyph(aln_datasource, align_handle, align, anchor));
1456  objs.emplace_back(align_glyph.GetPointer());
1457  if (flag & fAlign_LinkMatePairs) {
1458  string ti;
1459  string mate_ti;
1460  if (s_CheckTraceMateAlign(match_id, match_id, align, ti, mate_ti)) {
1461  // we will process mate mair alignments in a separate pass
1462  // after we complete all other alignments
1463  //TMatedAlign mp(objs.pwal, mate_ti);
1464  mated_aligns.emplace(ti, make_pair(align_glyph, mate_ti));
1465  }
1466  }
1467  }
1468 }
1469 
1470 
1473 {
1474  CSGJobResult* result = new CSGJobResult();
1476  SetTaskName("Loading coverate graph...");
1477 
1478  CSeqGlyph::TObjects glyphs;
1479  // Limit to just one coverage graph GB-7336
1480  // in case both generated by object manager are added by the user
1481  // are in the scope
1482  auto sel = m_Sel;
1483  sel.SetMaxSize(1);
1484  EJobState state = x_CreateHistFromGraph(glyphs, sel, m_Window, true, TAxisLimits());
1485  if (state == eCompleted) {
1486  result->m_ObjectList.swap(glyphs);
1487  }
1488 
1489  result->m_Token = m_Token;
1490 
1491  return state;
1492 }
1493 
1494 ///////////////////////////////////////////////////////////////////////////////
1495 /// CSGAlignScoringJob
1496 ///////////////////////////////////////////////////////////////////////////////
1498  const TAlnScoreMap& scores)
1499  : CSeqGraphicJob(desc)
1500  , m_AlnScoreMap(scores)
1501 {
1502  SetTaskName("Calculating alignment scores...");
1503 }
1504 
1505 
1507 {
1510 
1512  SetTaskCompleted(0);
1513 
1515  if (IsCanceled()) {
1516  return IAppJob::eCanceled;
1517  }
1518  CIRef<ISGAlnScore> score = iter->second;
1519  if ( !score->HasScores() ) {
1520  score->CalculateScores();
1521  }
1522  AddTaskCompleted(1);
1523  }
1524  result->m_Token = m_Token;
1525  return IAppJob::eCompleted;
1526 }
1527 
1528 
1529 ///////////////////////////////////////////////////////////////////////////////
1530 /// CSGAlignStatJob
1532  objects::CBioseq_Handle handle,
1533  const objects::SAnnotSelector& aln_sel,
1534  const objects::SAnnotSelector& graph_sel,
1535  const TSeqRange& range,
1536  const TAlnMgrVec& aligns,
1537  TModelUnit scale,
1538  const string& cache_key)
1539  : CSGAnnotJob(desc, handle, aln_sel, range)
1540  , m_GraphSel(graph_sel)
1541  , m_Aligns(aligns)
1542  , m_Window(scale)
1543  , m_CacheKey(cache_key)
1544 {
1545  SetTaskName("Loading alignment pileup ...");
1546 }
1547 
1548 
1549 /// layout style to layout display name
1551 static const TGraphTitle s_GraphTitles[] = {
1552  {"Number of A bases", CAlnStatConfig::eStat_A},
1553  {"Number of C bases", CAlnStatConfig::eStat_C},
1554  {"Number of G bases", CAlnStatConfig::eStat_G},
1555  {"Number of T bases", CAlnStatConfig::eStat_T},
1556  {"Number of inserts", CAlnStatConfig::eStat_Gap},
1557  {"Number of introns", CAlnStatConfig::eStat_Intron },
1558  {"Number of matches", CAlnStatConfig::eStat_Match}
1559 };
1560 
1563 
1564 size_t s_GraphTitleToIndex(const string& title)
1565 {
1566  TGraphTitleMap::const_iterator iter = sm_GraphTitleMap.find(title);
1567  if (iter != sm_GraphTitleMap.end()) {
1568  return (size_t)iter->second;
1569  }
1570  return -1;
1571 }
1572 
1573 
1574 const string& s_GraphIndexToTitle(size_t idx)
1575 {
1577  for (iter = sm_GraphTitleMap.begin(); iter != sm_GraphTitleMap.end(); ++iter) {
1578  if ((size_t)iter->second == idx) {
1579  return iter->first;
1580  }
1581  }
1582  _ASSERT(false); // shouldn't get to here
1583  return kEmptyStr;
1584 }
1585 
1586 
1588 {
1589  graphs_out.clear();
1590  if (m_Aligns.empty())
1591  return;
1592  SetTaskName("Creating pileup graphs ...");
1593  SetTaskTotal((int)m_Aligns.size());
1594  SetTaskCompleted(0);
1595 
1596  // initialize the top sequence to generate statistics for every base
1597  vector<unsigned char> ref_bases;
1598  size_t ref_len = 0;
1599  TSeqPos start = m_Range.GetFrom();
1600  TSeqPos stop = m_Range.GetToOpen();
1601  // graphs holder created from the loaded seq-aligns.
1602  {
1603  string seq_str;
1604  CSeqVector seq_vector =
1606  seq_vector.GetSeqData(start, stop, seq_str);
1607  ref_len = seq_str.length();
1608  if (ref_len < stop - start) {
1609  stop = start + static_cast<TSeqPos>(ref_len);
1610  }
1611  ref_bases.resize(ref_len);
1612  for (size_t i = 0; i < ref_len; ++i) {
1613  ref_bases[i] = (unsigned char)CAlnStatConfig::GetRefSeqIdex(seq_str[i]);
1614  }
1615  }
1616 
1617  CRef<CSeq_loc> loc(new CSeq_loc());
1618  loc->SetInt().SetFrom(start);
1619  loc->SetInt().SetTo(stop - 1);
1620  range.Set(start, stop - 1);
1621  CConstRef<CSeq_id> seq_id = m_Handle.GetSeqId();
1622  loc->SetId(*seq_id);
1623 
1624  vector<CRef<CSeq_graph>> graphs;
1625  graphs.resize(CAlnStatConfig::eStat_Match + 1);
1626 
1627  for (int i = 0; i <= CAlnStatConfig::eStat_Match; ++i) {
1629  g->SetTitle(s_GraphIndexToTitle((size_t)i));
1630  g->SetLoc(*loc);
1631  g->SetGraph().SetInt().SetValues().resize(ref_bases.size(), 0);
1632  graphs[i] = g;
1633  }
1634 
1635  string aln_seq; // variable used in nested loops down here, brought up decl. for better heap alloc.performance
1636  ITERATE(TAlnMgrVec, iter, m_Aligns)
1637  {
1638  if (IsCanceled())
1639  return;
1640  AddTaskCompleted(1);
1641 
1642  const IAlnGraphicDataSource& align = **iter;
1643 
1644  if (align.GetAlignType() != IAlnExplorer::fDNA) {
1645  // If it is not a DNA-to-DNA alignment, skip it
1646  continue;
1647  }
1648 
1649  auto row_n = align.GetNumRows();
1650  auto anchor = align.GetAnchor();
1651  TSeqPos anchor_start = align.GetSeqStart(anchor);
1652  TSeqPos anchor_stop = align.GetSeqStop(anchor);
1653  auto base_width_anchor = align.GetBaseWidth(anchor);
1654  anchor_start = max(anchor_start, start);
1655  anchor_stop = min(anchor_stop, stop - 1);
1656  if ((*iter)->IsSimplified()) {
1657  // No need to look at match/mismatch.
1658  // Only count overall coverage.
1659  vector<int>& match_vals =
1660  graphs[CAlnStatConfig::eStat_Match]->SetGraph().SetInt().SetValues();
1661 
1662  if (anchor_start < start) anchor_start = start;
1663  if (anchor_stop > stop) anchor_stop = stop;
1664  while (anchor_start <= anchor_stop) {
1665  size_t curr_pos = anchor_start - start;
1666  vector<int>& ref_vals =
1667  graphs[ref_bases[curr_pos]]->SetGraph().SetInt().SetValues();
1668  ref_vals[curr_pos]++;
1669  match_vals[curr_pos]++;
1670  anchor_start++;
1671  }
1672  continue;
1673  }
1674  TSignedSeqPos aln_r_s = align.GetAlnPosFromSeqPos(anchor, anchor_start * base_width_anchor, IAlnExplorer::eRight);
1675  TSignedSeqPos aln_r_e = align.GetAlnPosFromSeqPos(anchor, anchor_stop * base_width_anchor, IAlnExplorer::eLeft);
1676  if (aln_r_e < aln_r_s)
1677  swap(aln_r_s, aln_r_e);
1678  TSignedSeqRange aln_range(aln_r_s, aln_r_e);
1679  //TSignedSeqRange aln_range(align.GetAlnPosFromSeqPos(anchor, anchor_start, IAlnExplorer::eRight),
1680  // align.GetAlnPosFromSeqPos(anchor, anchor_stop, IAlnExplorer::eLeft));
1681 
1682  for (TNumrow r = 0; r < row_n; r++) {
1683  if (r == anchor)
1684  continue;
1685 
1686  auto base_width_row = align.GetBaseWidth(r);
1687 
1688  unique_ptr<IAlnSegmentIterator> p_it(
1689  align.CreateSegmentIterator(r,
1690  aln_range, IAlnSegmentIterator::eAllSegments));
1691 
1692  for (IAlnSegmentIterator& it = *p_it; it; ++it) {
1693  const IAlnSegment& seg = *it;
1694  if ((seg.GetType() & IAlnSegment::fIndel &&
1695  !seg.GetRange().Empty()) ||
1696  seg.GetType() & IAlnSegment::fUnaligned ||
1697  seg.GetType() & IAlnSegment::fGap) {
1698  // ignore the inserts
1699  continue;
1700  }
1701  const IAlnSegment::TSignedRange& curr_aln_r = seg.GetAlnRange();
1702  if (curr_aln_r.Empty())
1703  continue;
1704 // TSignedSeqPos curr_aln_start = curr_aln_r.GetFrom();
1705  TSeqPos seg_len = (TSeqPos)curr_aln_r.GetLength();
1706  TModelUnit curr_seq_from =
1707  align.GetSeqPosFromAlnPos(anchor, curr_aln_r.GetFrom()) / base_width_anchor;
1708  TModelUnit curr_seq_to =
1709  align.GetSeqPosFromAlnPos(anchor, curr_aln_r.GetTo()) / base_width_anchor;
1710  if (curr_seq_from > curr_seq_to)
1711  swap(curr_seq_from, curr_seq_to);
1712 // TSeqPos curr_seq_from =
1713  // (TSeqPos)align.GetSeqPosFromAlnPos(anchor, curr_aln_start, IAlnExplorer::eRight);
1714  TSeqPos off = 0;
1715  size_t pos = 0;
1716  if (curr_seq_from < start) {
1717  pos = start - curr_seq_from;
1718  } else {
1719  off = curr_seq_from - start;
1720  }
1721  size_t curr_pos = off;
1722  if (seg.GetType() & IAlnSegment::fAligned) {
1723  aln_seq.clear();
1724 
1725  TModelUnit r_seq_from =
1726  align.GetSeqPosFromAlnPos(r, curr_aln_r.GetFrom()) / base_width_row;
1727  TModelUnit r_seq_to =
1728  align.GetSeqPosFromAlnPos(r, curr_aln_r.GetTo()) / base_width_row;
1729 
1730  align.GetSeqString(aln_seq, r, IAlnSegment::TSignedRange(r_seq_from, r_seq_to), curr_aln_r, false);
1731 
1732  bool reverse = align.IsPositiveStrand(anchor) != align.IsPositiveStrand(r);
1733  if (reverse) {
1734  string tmp_seq;
1736  0, static_cast<TSeqPos>(aln_seq.length()), tmp_seq);
1737  swap(tmp_seq, aln_seq);
1738  }
1739  }
1740  while (pos < seg_len && curr_pos < ref_len) {
1741  if (IsCanceled())
1742  return;
1743 
1744  if (seg.GetType() & IAlnSegment::fAligned) {
1745  if (pos < aln_seq.size()) {
1746  int base_type = CAlnStatConfig::GetRefSeqIdex(aln_seq[pos]);
1747  vector<int>& vals =
1748  graphs[base_type]->SetGraph().SetInt().SetValues();
1749  vals[curr_pos] += 1;
1750  if ((CAlnStatConfig::eStat_Gap != base_type) &&
1751  (CAlnStatConfig::eStat_Intron != base_type) &&
1752  (base_type == ref_bases[curr_pos])) {
1753  vector<int>& m_vals =
1754  graphs[CAlnStatConfig::eStat_Match]->SetGraph().SetInt().SetValues();
1755  m_vals[curr_pos] += 1;
1756  }
1757  } else {
1758  // It shouldn't get to here
1759  _ASSERT(false);
1760  }
1761  } else {
1762  if (pos < aln_seq.size()) {
1763  int base_type = CAlnStatConfig::GetRefSeqIdex(aln_seq[pos]);
1764  if (CAlnStatConfig::eStat_Gap == base_type) {
1765  // gap
1766  vector<int>& vals = graphs[CAlnStatConfig::eStat_Gap]->SetGraph().SetInt().SetValues();
1767  vals[curr_pos] += 1;
1768  }
1769  else if (CAlnStatConfig::eStat_Intron == base_type) {
1770  // intron
1771  vector<int>& vals = graphs[CAlnStatConfig::eStat_Intron]->SetGraph().SetInt().SetValues();
1772  vals[curr_pos] += 1;
1773  }
1774  }
1775  }
1776  ++pos;
1777  ++curr_pos;
1778  }
1779  }
1780  }
1781  }
1782  for (const auto& gr : graphs) {
1783  graphs_out.emplace_back(gr.GetPointer());
1784  }
1785 }
1786 
1788 {
1789 public:
1791  : CGraphStatCollector(window)
1792  , m_Data(data)
1793  {
1794  }
1795  virtual void NewGroup(int batch_size) override
1796  {
1797  matches.clear();
1798  matches.reserve(batch_size);
1799  mismatches.clear();
1800  mismatches.reserve(batch_size);
1801  gaps.clear();
1802  gaps.reserve(batch_size);
1803  introns.clear();
1804  introns.reserve(batch_size);
1805  }
1806  virtual void AddStat(size_t index, CAlnStatGlyph::SStatStruct& stat) override
1807  {
1808  auto& data = stat.m_Data;
1809  int agtc = data[CAlnStatConfig::eStat_A]
1816  gaps.push_back(data[CAlnStatConfig::eStat_Gap]);
1818  }
1819  virtual void UpdateGroup(size_t offset) override {
1820  CPileUpGraph::TUpdateMap update = {
1821  { "m", &matches },
1822  { "mm", &mismatches },
1823  { "g", &gaps },
1824  { "n", &introns }
1825  };
1826  m_Data.Update(TSeqRange(static_cast<TSeqPos>(offset), static_cast<TSeqPos>(offset + matches.size() - 1)), update);
1827  }
1828  virtual void ClearRange(const TSeqRange& range) override {
1829  matches.clear();
1830  matches.resize(range.GetLength(), 0);
1831  mismatches.clear();
1832  mismatches.resize(range.GetLength(), 0);
1833  gaps.clear();
1834  gaps.resize(range.GetLength(), 0);
1835  introns.clear();
1836  introns.resize(range.GetLength(), 0);
1837  UpdateGroup(range.GetFrom());
1838  }
1839  virtual bool ComputePileUp() override { return false; }
1840 
1841 private:
1843  vector<CPileUpGraph::TValType> matches;
1844  vector<CPileUpGraph::TValType> mismatches;
1845  vector<CPileUpGraph::TValType> gaps;
1846  vector<CPileUpGraph::TValType> introns;
1847 };
1848 
1850 {
1851 public:
1853  : CGraphStatCollector(window)
1854  , m_Stats(stats)
1855 
1856  {
1857  }
1858  virtual void AddStat(size_t index, CAlnStatGlyph::SStatStruct& stat) override
1859  {
1860  if (m_LastIndex != (int)index) {
1861  m_LastIndex = static_cast<int>(index);
1862  m_MaxCount = -1;
1863  }
1864  // stat.m_Data[CAlnStatConfig::eStat_Gap] = 0; //.. if we want to exclude gaps
1865  int agtc = stat.m_Data[CAlnStatConfig::eStat_A] +
1870  agtc - stat.m_Data[CAlnStatConfig::eStat_Match];
1876 
1877  int curr_max = max(stat.m_Data[CAlnStatConfig::eStat_Gap],
1879  if (curr_max < stat.m_Data[CAlnStatConfig::eStat_Intron]) {
1880  curr_max = stat.m_Data[CAlnStatConfig::eStat_Intron];
1881  }
1882  if (m_MaxCount < curr_max) {
1883  m_Stats[index] = stat;
1884  m_MaxCount = curr_max;
1885  }
1886  }
1887 
1888 private:
1890  int m_MaxCount = -1;
1891  int m_LastIndex = -1;
1892 };
1893 
1895 {
1896  TSeqPos start = main_range.GetFrom();
1897  TSeqPos stop = main_range.GetToOpen();
1898  size_t val_num = (size_t)(stop - start);
1899  bool match_graph = false;
1900  // try to see if there is pileup graphs available
1901  SetTaskName("Loading pileup graphs ...");
1902 
1903  map<TSeqRange, TGraphs> groups;
1904  //CStopWatch sw(CStopWatch::eStart);
1905  CGraph_CI graph_iter(m_Handle, main_range, m_GraphSel);
1906  //ERR_POST(Error << "graph_iter init in " << sw.AsSmartString(CTimeSpan::eSSP_Millisecond) << "\n");
1907  if (graph_iter.GetSize()) {
1908  tse_handle = graph_iter.GetAnnot().GetTSE_Handle();
1909  // The graphs are stored in the order of CAlnStatConfig::EStatType
1910  // which is A, G, T, C, Gap, and Match
1911  //TGraphVec graphs;
1912 
1913  for (; graph_iter; ++graph_iter) {
1914  size_t g_idx = s_GraphTitleToIndex(graph_iter->GetTitle());
1915  if (g_idx == (size_t)-1)
1916  continue;
1917  const auto& gr = graph_iter->GetMappedGraph();
1918  const auto& range = gr.GetLoc().GetTotalRange();
1919  auto it = groups.find(range);
1920  if (it == groups.end())
1921  it = groups.emplace(range, TGraphs(CAlnStatConfig::eStat_Match + 1)).first;
1922  it->second[g_idx] = &gr;
1923  if (CAlnStatConfig::eStat_Match == g_idx)
1924  match_graph = true;
1925  }
1926  }
1927  stat_collector.ClearRange(main_range);
1928  if (groups.empty()) {
1929  if (!stat_collector.ComputePileUp())
1930  return eCompleted;
1931 
1932  // no pileup graph is available, try computing pileup on the fly
1933  if (m_Aligns.empty()) {
1934  // make sure the alignments are loaded
1935  SetTaskName("Loading alignments ...");
1936  EJobState status = x_LoadAlignments(tse_handle);
1937  if ( status != eCompleted)
1938  return status;
1939  }
1940  if (m_Aligns.empty())
1941  return eCompleted;
1942  TSeqRange range;
1943  TGraphs graphs;
1944  x_CreatePileUpGraphs(range, graphs);
1945  if (!graphs.empty())
1946  groups.emplace(range, graphs);
1947  match_graph = true;
1948  }
1949  if (groups.empty())
1950  return eCompleted;
1951  if (IsCanceled())
1952  return eCanceled;
1953  // We don't store more than one set of statistics for each screen pixel.
1954  // If the zoom scale is larger than one nucleotide per pixel, then we
1955  // need to squeeze several sets of statistics (one per base) into one (pixel)
1956 
1957  double window = stat_collector.GetWindow();
1958  // resize the stats to the actual size needed
1959  if (window < 1.0) window = 1.0;
1960  size_t pix_num = (size_t)ceil(val_num / window);
1961  auto l_GetVal = [](const CSeq_graph::TGraph& graph, int curr_idx) {
1962  int val = 0;
1963  if (graph.IsInt()) {
1964  const vector<int>& vals = graph.GetInt().GetValues();
1965  val = vals[curr_idx];
1966  } else { // must be byte
1967  const CByte_graph::TValues& vals = graph.GetByte().GetValues();
1968  val = (int)(unsigned char)(vals[curr_idx]);
1969  }
1970  return val;
1971  };
1972  CSeqVector seq_vector = m_Handle.GetSeqVector(CBioseq_Handle::eCoding_Iupac);
1973 
1974  size_t curr_start, curr_idx, num_vals, ref_idx, stat_idx, end_idx;
1975  int graph_off;
1976  size_t offset;
1977  double curr_pos;
1978  string ref_seq_str;
1979  for (const auto& group : groups) {
1980  const auto& range = group.first;
1981  const auto& graphs = group.second;
1982  curr_start = max<int>(main_range.GetFrom(), range.GetFrom());
1983  curr_idx = curr_start - range.GetFrom();
1984  num_vals = range.GetLength();
1985  stat_collector.NewGroup(static_cast<int>(num_vals));
1986  // reference sequence
1987  seq_vector.GetSeqData(static_cast<TSeqPos>(curr_start), static_cast<TSeqPos>(curr_start + num_vals), ref_seq_str);
1988  ref_idx = 0;
1989  graph_off = (int)range.GetFrom() - (int)main_range.GetFrom();
1990  curr_pos = (double)(curr_start - main_range.GetFrom());
1991  stat_idx = (size_t)floor(curr_pos / window);
1992  offset = main_range.GetFrom() + stat_idx;
1993 
1994  for (; stat_idx < pix_num && curr_idx < num_vals; ++stat_idx) {
1995 
1996  if (IsCanceled())
1997  return eCanceled;
1998  curr_pos += window;
1999  end_idx = min<size_t>(floor(curr_pos + 0.5) - graph_off, num_vals);
2000 
2001  // choose one set of stats from [start_idx, end_idx) to
2002  // represent current pixel (i). The criteria are
2003  // like this: max(max(gap(n), mismatch(n))), where n belongs
2004  // to one of [start_idx, end_idx). In case all of them have
2005  // the same max(gap, mismatch), then the first one is used.
2006 
2007  for (; curr_idx < end_idx; ++curr_idx, ++ref_idx) {
2008  // collect pileup at base start_idx
2009  CAlnStatGlyph::SStatStruct tmp_stat;
2010  for (size_t g_idx = 0; g_idx <= (size_t)CAlnStatConfig::eStat_Match; ++g_idx) {
2011  if (g_idx == (size_t)CAlnStatConfig::eStat_Match) {
2012  auto matches = (match_graph && graphs[g_idx]) ?
2013  l_GetVal(graphs[g_idx]->GetGraph(), static_cast<int>(curr_idx)) : 0;
2014  if (ref_idx < ref_seq_str.size()) {
2015  int base_type = CAlnStatConfig::GetRefSeqIdex(ref_seq_str[ref_idx]);
2016  if ((CAlnStatConfig::eStat_Gap != base_type) && (CAlnStatConfig::eStat_Intron != base_type)) {
2017  if (matches == 0)
2018  matches = tmp_stat.m_Data[base_type];
2019  else if (tmp_stat.m_Data[base_type] == 0) {
2020  tmp_stat.m_Data[base_type] = matches;
2021  }
2022  }
2023  }
2024  tmp_stat.m_Data[CAlnStatConfig::eStat_Match] = matches;
2025  } else if (graphs[g_idx]) {
2026  tmp_stat.m_Data[g_idx] = l_GetVal(graphs[g_idx]->GetGraph(), static_cast<int>(curr_idx));
2027  }
2028  }
2029  stat_collector.AddStat(stat_idx, tmp_stat);
2030  }
2031  }
2032  stat_collector.UpdateGroup(offset);
2033  }
2034 
2035  return eCompleted;
2036 }
2037 
2038 
2040 {
2041  // prepare the job result object
2044  result->m_Token = m_Token;
2045 
2046  if (!m_CacheKey.empty()) {
2047  // BAM/cSAR alignment collect pileup into CAlignmentGraph and stored in ICache
2048  try {
2050  vector<TSeqRange> missing_ranges;
2051  data->GetMissingRegions(m_Range, missing_ranges);
2052  bool update_data = !missing_ranges.empty();
2053  if (update_data) {
2054  if (!data->IsCacheGood()) {
2056  /// Restrart Worker Node since remote file was updated
2057  /// and BAM/cSRA objmgr cached data got invalidated
2058  ERR_POST(Fatal << "Restart due to expired cache data");
2059 
2060  /// Clearing object manager cache for the remote file
2061  CGraph_CI graph_iter(m_Handle, m_Range, m_GraphSel);
2062  if (graph_iter) {
2063  m_Handle.GetScope().RemoveFromHistory(graph_iter.GetAnnot().GetTSE_Handle());
2064  }
2065  }
2066  vector<future<void>> results;
2067  mutex data_handle_lock;
2068  for (auto& range : missing_ranges) {
2069  results.emplace_back(async(launch::async, [&](){
2070  CGraphStatCollector_Graph stat_collector(1., *data);
2071  CTSE_Handle tse_handle;
2072  x_CollectAlignStats(range, tse_handle, stat_collector);
2073  if (tse_handle) {
2074  lock_guard<mutex> guard(data_handle_lock);
2075  result->m_DataHandle = tse_handle;
2076  }
2077  }));
2078  }
2079 
2080  for (auto& f : results)
2081  f.get();
2082  } else {
2083  int num_reads = data->GetNumberOfReads(m_Range);
2084  if (num_reads == 0) {
2085  CGraphStatCollector_Graph stat_collector(1., *data);
2086  CTSE_Handle tse_handle;
2087  x_CollectAlignStats(m_Range, tse_handle, stat_collector);
2088  if (tse_handle)
2089  result->m_DataHandle = tse_handle;
2090  num_reads = data->GetNumberOfReads(m_Range);
2091  if (num_reads != 0) {
2092  auto diag = GetDiagContext().Extra();
2093  string err_msg = "failure at "
2095  + ".."
2097  diag.Print("graph_cache_integrity", err_msg);
2098  ERR_POST(Error << "GraphCache integrity check: " << err_msg);
2099  }
2100  }
2101 
2102  }
2104  CAlnStatGlyph::TStatVec& stats = stat_glyph->GetStatVec();
2105  data->UpdateAlignStats(m_Range, m_Window, stats);
2106  stat_glyph->ShowIntrons();
2107  result->m_ObjectList.emplace_back(stat_glyph.GetPointer());
2108  if (update_data)
2110  return eCompleted;
2111  } catch (exception& e) {
2112  m_Error.Reset(new CAppJobError(string(e.what())));
2113  }
2114  return eFailed;
2115  }
2116 
2117  // regular alignments
2118  // collect pileup directly into CAlnStatGlyph glyph
2120  CAlnStatGlyph::TStatVec& stats = stat_glyph->GetStatVec();
2121  size_t pix_num = (size_t)ceil(m_Range.GetLength() / max(1.,m_Window));
2122  stats.resize(pix_num);
2123  CGraphStatCollector_Glyph stat_collector(m_Window, stats);
2124  CTSE_Handle tse_handle;
2125  x_CollectAlignStats(m_Range, tse_handle, stat_collector);
2126  stat_glyph->ShowIntrons();
2127  if (tse_handle)
2128  result->m_DataHandle = tse_handle;
2129  result->m_ObjectList.emplace_back(stat_glyph.GetPointer());
2130  return eCompleted;
2131 }
2132 
2133 
2135 {
2136  SetTaskName("Load alignments ...");
2137  m_Sel.SetMaxSize(kMaxAlignmentLoaded);
2138  CAlign_CI align_iter(m_Handle, m_Range, m_Sel);
2139  if (align_iter.GetSize() == 0)
2140  return eCompleted;
2141  tse_handle = align_iter.GetAnnot().GetTSE_Handle();
2142 
2143  SetTaskName("Create alignment data source ...");
2144  SetTaskTotal((int)align_iter.GetSize());
2145  SetTaskCompleted(0);
2146  auto synonyms = m_Handle.GetSynonyms();
2147  for ( ; align_iter; ++align_iter) {
2148  if (IsCanceled()) {
2149  return eCanceled;
2150  }
2151  vector<CSeq_align::TDim> anchors;
2152  GetAnchors(m_Handle, synonyms.GetPointer(), *align_iter, anchors);
2153 
2154  TAlnDataSources aln_datasources;
2155  try {
2156  // try using both direction
2157  BuildAlignDataSource(m_Handle, anchors, *align_iter, true, 0.0, aln_datasources);
2158  } catch (CException& e) {
2159  // log errors
2160  LOG_POST(Warning << "error in GetAlignments(): " << e.GetMsg());
2161  }
2162 
2163  for (size_t i = 0; i < aln_datasources.size(); ++i) {
2164  auto& aln_datasource = aln_datasources[i].first;
2165  m_Aligns.push_back(aln_datasource);
2166  }
2167  AddTaskCompleted(1);
2168  }
2169 
2170  SetTaskCompleted((int)align_iter.GetSize());
2171  return eCompleted;
2172 }
2173 
2174 void GetAnchors(CBioseq_Handle& handle, const CSynonymsSet* synonyms, const CSeq_align& align, vector<CSeq_align::TDim>& anchors)
2175 {
2176  CSeq_align::TDim num_row = align.CheckNumRows();
2177  if (num_row == 0)
2178  NCBI_THROW(CException, eUnknown, "Get empty alignment!");
2179 
2180  auto& ids = handle.GetId();
2181  CSeq_align::TDim row = 0;
2182  for (row = 0; row < num_row; ++row) {
2183  const auto& row_id = align.GetSeq_id(row);
2184  for (auto& id : ids) {
2185  if (id.GetSeqId()->Match(row_id)) {
2186  anchors.push_back(row);
2187  }
2188  }
2189  }
2190  if (anchors.empty() && synonyms) {
2191  CScope& scope = handle.GetScope();
2192  /// try a more aggressive matching approach
2193  for (size_t level = 0; level <= 5 && anchors.empty(); ++level) {
2194  for (row = 0; row < num_row; ++row) {
2195  const auto& row_id = align.GetSeq_id(row);
2197  if (!idh)
2198  idh = sequence::GetId(row_id, scope, sequence::eGetId_Canonical);
2199  if (handle.ContainsSegment(idh, level)) {
2200  anchors.push_back(row);
2201  }
2202  }
2203  }
2204  }
2205 }
2206 
2207 static CRef<CSeq_align> s_CompressDiscToDenseq(list<CRef<CSeq_align>>& aln_set, int anchor_row)
2208 {
2209  auto new_sa = CRef<CSeq_align>();
2210 // if (!aln.GetSegs().IsDisc() || aln.GetSegs().GetDisc().Get().empty())
2211 // return new_sa;
2212 // auto& aln_set = aln.GetSegs().GetDisc().Get();
2213  if (aln_set.empty())
2214  return new_sa;
2215  if (!aln_set.front()->GetSegs().IsDenseg())
2216  return new_sa;
2217  new_sa.Reset(new CSeq_align);
2218  new_sa->SetType(CSeq_align::eType_not_set);
2219  CDense_seg& new_ds = new_sa->SetSegs().SetDenseg();
2220  auto dim = aln_set.front()->GetDim();
2221  new_ds.SetDim(dim);
2222  new_ds.SetNumseg(static_cast<CDense_seg::TNumseg>(aln_set.size()));
2223  new_ds.SetIds() = aln_set.front()->GetSegs().GetDenseg().GetIds();
2224  for (size_t j = 0; j < aln_set.size(); ++j) {
2225  for (auto i = 0; i < dim; ++i) {
2226  new_ds.SetStrands().push_back(aln_set.front()->GetSeqStrand(i));
2227  }
2228  }
2229  int curr_seg = 0;
2230  vector<int> prev_start(dim, -1);
2231  for (auto it : aln_set) {
2232  auto len = it->GetSeqRange(anchor_row).GetLength();
2233  new_ds.SetLens().push_back(len);
2234  for (auto i = 0; i < dim; ++i) {
2235  auto r = it->GetSeqRange(i);
2236  auto from = max<int>(prev_start[i], r.GetFrom());
2237  new_ds.SetStarts().push_back(from);
2238  prev_start[i] = from + len;
2239  }
2240  }
2241 #ifdef _DEBUG
2242  try {
2243  new_sa->Validate(true);
2244  } catch (exception& e) {
2245  ERR_POST(Error << e.what());
2246  new_sa.Reset(nullptr);
2247  }
2248 #endif
2249  return new_sa;
2250 }
2251 
2253  vector<CSeq_align::TDim>& anchors,
2254  const CSeq_align& align,
2255  bool sparseAln,
2256  TModelUnit window,
2257  TAlnDataSources& data_sources,
2259 {
2260  data_sources.clear();
2261  // get the anchor rows which are the referent sequence
2262  _ASSERT(!anchors.empty());
2263  if (anchors.empty())
2264  NCBI_THROW(CException, eUnknown, "Cannot find the anchor sequence in the alignment!");
2265 
2266  CSeq_align::TDim num_row = align.CheckNumRows();
2267  if (num_row == 0)
2268  NCBI_THROW(CException, eUnknown, "Get empty alignment!"); // empty alignment
2269 
2270  bool self_alignment = false;
2271  if (num_row == (int)anchors.size()) {
2272  // This is a self-aligned alignment (same sequence for all rows).
2273  // Request no merge (ePreserveRows) when creating CAnchoredAln
2274  self_alignment = true;
2275 
2276  // check if all aligned rows match with the exactly same range
2277  bool exact_self_aligned = true;
2278  CRange<TSeqPos> aligned_r = align.GetSeqRange(0);
2279  for (CSeq_align::TDim row = 1; row < num_row; ++row) {
2280  if (aligned_r != align.GetSeqRange(row)) {
2281  exact_self_aligned = false;
2282  break;
2283  }
2284  }
2285 
2286  // Use the first row only to avoid duplication if it is an exact
2287  // self-aligned alignment (JIRA GB-1289). Otherwise show the alignment
2288  // multiple times, once for each row as the anchor sequence range
2289  // (JIRA GB-2349)
2290  if (exact_self_aligned) {
2291  anchors.clear();
2292  anchors.push_back(0);
2293  }
2294  }
2295  int alignment_partiality = IAlnGraphicDataSource::ePartial_None;
2296  CConstRef<CSeq_align> align_ref(&align);
2297 
2298  if (align.CheckNumRows() == 2 && (anchors.size() == 1 || self_alignment)) {
2299  //if (align.GetSeqStrand(0) != align.GetSeqStrand(1))
2300  //cout << MSerial_AsnText << align << endl;
2301 
2302  ITERATE(vector<CSeq_align::TDim>, row_iter, anchors) {
2303  CSeq_align::TDim anchor_row = *row_iter;
2304  try {
2305  if (window > 0.0 && align.GetSeqRange(anchor_row).GetLength() / window < 5.0) {
2306  data_sources.emplace_back(new CSimpleGraphicDataSource(align, handle.GetScope(), anchor_row), anchor_row);
2307  }
2308  else if (align.CheckNumRows() == 2) {
2309  if (align.GetSegs().IsDenseg() &&
2310  align.GetSegs().GetDenseg().CheckNumSegs() > 0 &&
2311  !align.GetSegs().GetDenseg().IsSetWidths()) { // we don't deal with mixed denseg
2312  data_sources.emplace_back(new CDensegGraphicDataSource(align, handle.GetScope(), anchor_row), anchor_row);
2313  }
2314  else if (align.GetSegs().IsStd() && align.GetSegs().GetStd().size() == 1) {
2315  const vector< CRef< CSeq_loc > >& locs =
2316  align.GetSegs().GetStd().front()->GetLoc();
2317  _ASSERT(locs.size() == 2);
2318  if (locs[0]->IsInt() && locs[1]->IsInt()) {
2319  TSeqPos len1 = locs[0]->GetTotalRange().GetLength();
2320  TSeqPos len2 = locs[1]->GetTotalRange().GetLength();
2321  if (len1 != len2 && len1 != 3 * len2 && len2 != 3 * len1) {
2322  // For std-seg with two uneven aligned segments, if it is not
2323  // a protein-to-genomic alignment, use a simplified alignemnt
2324  // manager to handle this special case.
2325  CSimpleGraphicDataSource* simple_ds;
2326  data_sources.emplace_back(simple_ds = new CSimpleGraphicDataSource(align, handle.GetScope(), anchor_row), anchor_row);
2327  simple_ds->SetRegularity(false);
2328 
2329  }
2330  }
2331  }
2332  else if (range != nullptr && align.GetSegs().IsDisc() && !align.GetSegs().GetDisc().Get().empty()) {
2333 
2334  // remove all alignments from seq_align_set that don't fit in range
2335  // except one set on the left and one set on the right
2336  // so that the lines connecting alignments would continues to the edge of the screen
2337  // mark resulting datasource as having partial 5prime and/or partial 3prime alignment
2338  const auto& orig_aln_list = align.GetSegs().GetDisc().Get();
2339 
2340  auto new_aln_set = Ref(new CSeq_align_set);
2341  auto& new_aln_list = new_aln_set->Set();
2342 
2343  // assumption is that the rest of the aligns in align_set have the same strand value
2344  bool is_minus = orig_aln_list.front()->GetSeqStrand(anchor_row) == eNa_strand_minus;
2345  function<void(CSeq_align*)> add_to_front = [&new_aln_list](CSeq_align* aln) { new_aln_list.emplace_front(aln); };
2346  function<void(CSeq_align*)> add_to_back = [&new_aln_list](CSeq_align* aln) { new_aln_list.emplace_back(aln); };
2347  auto add_to_list = is_minus ? add_to_front : add_to_back;
2348 
2349  CSeq_align* prev = nullptr;
2350  auto splice_alignment = [&](const CRef<CSeq_align>& aln)->bool {
2351  auto r = aln->GetSeqRange(anchor_row);
2352  auto seq_align = const_cast<CSeq_align*>(aln.GetPointer());
2353  if (r.GetTo() < range->GetFrom()) {
2354  alignment_partiality |= IAlnGraphicDataSource::ePartial_5prime;
2355  prev = seq_align;
2356  }
2357  else if (range->IntersectingWith(r)) {
2358  if (prev) {
2359  add_to_list(prev);
2360  prev = nullptr;
2361  }
2362  add_to_list(seq_align);
2363  }
2364  else if (r.GetFrom() > range->GetTo()) {
2365  if (prev) {
2366  add_to_list(prev);
2367  prev = nullptr;
2368  }
2369  add_to_list(seq_align);
2370  alignment_partiality |= IAlnGraphicDataSource::ePartial_3prime;
2371  return false; // All alignmens within the range are processed
2372  }
2373  return true;
2374  };
2375  if (is_minus) {
2376  all_of(orig_aln_list.rbegin(), orig_aln_list.rend(), splice_alignment);
2377  }
2378  else {
2379  all_of(orig_aln_list.begin(), orig_aln_list.end(), splice_alignment);
2380  }
2381  auto prev_size = new_aln_list.size();
2382  /// if the discontinuous alignments contains more thand 300 densegs
2383  /// convert it to one denseg alignment
2384  /// where each segment would represent the span of each denseg in disc densegs
2385  /// turn off coloring as it will be incorrect
2386  /// this is done for rendering optimization
2387  /// as drawing more than 300 denseg affects performance
2388  if (prev_size > 300) {
2389  CRef<CSeq_align> compressed_aln = s_CompressDiscToDenseq(new_aln_list, anchor_row);
2390  if (compressed_aln) {
2392  data_sources.emplace_back(ds = new CDensegGraphicDataSource(*compressed_aln, handle.GetScope(), anchor_row), anchor_row);
2393  ds->SetColoringAvailable(false);
2395  continue;
2396  }
2397  }
2398 
2399  if (!new_aln_list.empty() && new_aln_list.size() < orig_aln_list.size()) {
2400  auto new_align = Ref(new CSeq_align);
2401  new_align->SetType(CSeq_align::eType_disc);
2402  swap(new_align->SetSegs().SetDisc().Set(), new_aln_set->Set());
2403  align_ref.Reset(&*new_align);
2404  }
2405  else {
2406  alignment_partiality = IAlnGraphicDataSource::ePartial_None;
2407  }
2408  }
2409  }
2410  }
2411  catch (const CException&) {
2412  // ignore
2413  }
2414  }
2415  }
2416 
2417  if ( !data_sources.empty() )
2418  return;
2419 
2420  bool report_overlaps = false;
2421 
2422  //CStopWatch sw(CStopWatch::eStart);
2423  ITERATE (vector<CSeq_align::TDim>, row_iter, anchors) {
2424  CSeq_align::TDim anchor = *row_iter;
2425  // generate an alignment manager for this alignment (either CAlnVec or CSparseAln)
2426  if (sparseAln) {
2427  typedef vector<const objects::CSeq_align*> TAlnVector;
2430 
2431  CAlnContainer aln_container;
2432  aln_container.insert(*align_ref);
2433  CAlnSeqIdsExtract<CAlnSeqId> id_extract;
2434  TAlnIdMap aln_id_map(id_extract, aln_container.size());
2435  int num_align = 0;
2436  ITERATE(CAlnContainer, aln_it, aln_container) {
2437  try {
2438  // Create a vector of seq-ids for the seq-align
2439  aln_id_map.push_back(**aln_it);
2440  ++num_align;
2441  } catch (CAlnException e) {
2442  // Skipping this alignment
2443  }
2444  }
2445  // Create align statistics object
2446  TAlnStats aln_stats(aln_id_map);
2447  // Create user options
2448  CAlnUserOptions aln_user_options;
2449  aln_user_options.m_Direction = CAlnUserOptions::eBothDirections;
2450  TAnchoredAlnVec anchored_aln_vec;
2451 
2452  // Explicitly specify anchor sequence seq_id
2453  //aln_user_options.SetAnchorId(aln_id_map[0][anchor]);
2454  //CreateAnchoredAlnVec(aln_stats, anchored_aln_vec, aln_user_options);
2455 
2456  // Use anchor sequence row number stored in seq-align
2457  for (size_t aln_idx = 0; aln_idx < aln_stats.GetAlnCount(); ++aln_idx) {
2458  CRef<CAnchoredAln> anchored_aln =
2459  CreateAnchoredAlnFromAln(aln_stats, aln_idx, aln_user_options, anchor);
2460  if ( anchored_aln ) {
2461  anchored_aln_vec.push_back(anchored_aln);
2462  // Calc scores
2463  for (CSeq_align::TDim row = 0; row < anchored_aln->GetDim(); ++row) {
2464  ITERATE(CPairwiseAln, rng_it, *anchored_aln->GetPairwiseAlns()[row]) {
2465  anchored_aln->SetScore() += rng_it->GetLength();
2466  }
2467  }
2468  anchored_aln->SetScore() /= anchored_aln->GetDim();
2469  }
2470  }
2471 
2472  if (self_alignment) {
2473  aln_user_options.m_MergeAlgo = CAlnUserOptions::ePreserveRows;
2474  } else {
2476  }
2477  CRef<CAnchoredAln> out_aln(new CAnchoredAln);
2478  BuildAln(anchored_aln_vec, *out_aln, aln_user_options);
2479  anchored_aln_vec.clear();
2480  anchored_aln_vec.push_back(out_aln);
2481 
2482  NON_CONST_ITERATE (TAnchoredAlnVec, a_iter, anchored_aln_vec) {
2483  bool has_overlaps = false;
2484  auto& anchored_aln = **a_iter;
2485 
2486  IAlnGraphicDataSource* ds_ref = nullptr;
2487 
2488  for (CAnchoredAln::TDim row = 0; has_overlaps == false && row < anchored_aln.GetDim(); ++row) {
2489  const CPairwiseAln& pw = *anchored_aln.GetPairwiseAlns()[row];
2490  has_overlaps = pw.IsSet(CPairwiseAln::fOverlap);
2491  }
2492  _ASSERT(anchored_aln.GetDim() > 1);
2493  if (anchored_aln.GetDim() < 2) {
2494  // something is wrong, skip this one
2495  LOG_POST(Warning << "CSGAlignmentJob::BuildAlignDataSource(): "
2496  << "Something is wrong with the alignment. It has only one row.");
2497  continue;
2498  }
2499  if (has_overlaps)
2500  report_overlaps = true;
2501 
2502  if (has_overlaps || anchored_aln.GetPairwiseAlns()[0]->GetSecondBaseWidth() == -1) {
2503  // use a simplified alignemnt manager to handle this special case.
2504  CSimpleGraphicDataSource* simple_ds;
2505  data_sources.emplace_back(simple_ds = new CSimpleGraphicDataSource(align, handle.GetScope(), anchor), anchor);
2506  simple_ds->SetRegularity(false);
2507  } else {
2508  // Build a sparse align
2509  CConstRef<CSparseAln> sparse_aln(new CSparseAln(**a_iter, handle.GetScope()));
2510  _ASSERT(sparse_aln->GetDim() > 1);
2511  if (sparse_aln->GetDim() < 2) {
2512  // something is wrong, skip this one
2513  LOG_POST(Warning << "CSGAlignmentJob::BuildAlignDataSource(): "
2514  << "Something is wrong with the alignment. Create CSparseAln has only one row.");
2515  continue;
2516  }
2517  data_sources.emplace_back(new CSparseAlnGraphicDataSource(sparse_aln), anchor);
2518  }
2519  }
2520  } else {
2521  CRef<CAlnVec> aln_mgr;
2522  if (align_ref->GetSegs().IsDenseg()) {
2523  aln_mgr.Reset(new CAlnVec(align_ref->GetSegs().GetDenseg(), handle.GetScope()));
2524  } else {
2525  CAlnMix mix(handle.GetScope());
2526  mix.Add(*align_ref);
2527  mix.Merge(CAlnMix::fGapJoin);
2528 
2529  aln_mgr.Reset(new CAlnVec(mix.GetDenseg(), handle.GetScope()));
2530  }
2531  // anchor the alignment on the referent sequence
2532  aln_mgr->SetAnchor(anchor);
2533  data_sources.emplace_back(new CAlnVecGraphicDataSource(*aln_mgr), anchor);
2534  }
2535  }
2536  if (report_overlaps) {
2537  LOG_POST(Warning << "Coloring is not supported for alignment with overlapping segments");
2538  }
2539  if (alignment_partiality != IAlnGraphicDataSource::ePartial_None) {
2540  for_each(data_sources.begin(), data_sources.end(), [&alignment_partiality](TAlnDS_Anchor& dsa){
2541  dsa.first->SetPartialFlags(alignment_partiality);
2542  });
2543  }
2544  //ERR_POST(Error << "Build datasource done in " << sw.AsSmartString(CTimeSpan::eSSP_Millisecond) << "\n");
2545 
2546 
2547  //// preload aligned sequences
2548  //try {
2549  // for (int row = 0; row < aln_datasource->GetNumRows(); ++row) {
2550  // aln_datasource->GetBioseqHandle(row);
2551  // }
2552  //} catch (CException& e) {
2553  // // ignore the error on resolving the seq-id
2554  // LOG_POST(Error << e.GetMsg());
2555  //}
2556 
2557  return;
2558 }
2559 
2560 
2561 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
USING_SCOPE(objects)
static void s_ObjectIdToStr(const CObject_id &id, string &id_str)
static const double kDetailsZoomLevel
Scale at which align details are made visible Align details: unaligned tails, mate pairs.
static const TGraphTitle s_GraphTitles[]
SStaticPair< const char *, CAlnStatConfig::EStatType > TGraphTitle
layout style to layout display name
static const int kMaxAlignmentLoaded
maximal number of alignments allowed to load to avoid potential memery and performance problem.
void GetAnchors(CBioseq_Handle &handle, const CSynonymsSet *synonyms, const CSeq_align &align, vector< CSeq_align::TDim > &anchors)
const string & s_GraphIndexToTitle(size_t idx)
static CRef< CSeq_align > s_CompressDiscToDenseq(list< CRef< CSeq_align >> &aln_set, int anchor_row)
static bool s_IsMatchById(CConstRef< CSeq_annot > aln_annot)
size_t s_GraphTitleToIndex(const string &title)
static bool s_IsSecondPassAlignment(const CSeq_align &align)
DEFINE_STATIC_ARRAY_MAP(TGraphTitleMap, sm_GraphTitleMap, s_GraphTitles)
CStaticArrayMap< string, CAlnStatConfig::EStatType > TGraphTitleMap
void BuildAlignDataSource(CBioseq_Handle &handle, vector< CSeq_align::TDim > &anchors, const CSeq_align &align, bool sparseAln, TModelUnit window, TAlnDataSources &data_sources, CRange< TSeqPos > *range)
static bool s_CheckTraceMateAlign(bool match_id, bool is_bam_align, const CSeq_align &align, string &ti, string &mate_ti, const string &expect_mate_ti="")
static const double kSequenceZoomLevel
always show individual alignments when zoom level is below this threshold.
pair< CRef< IAlnGraphicDataSource >, int > TAlnDS_Anchor
CSGAlignmentJob.
vector< TAlnDS_Anchor > TAlnDataSources
void BuildAln(TAnchoredAlnVec &in_alns, CAnchoredAln &out_aln, const CAlnUserOptions &options, TAlnSeqIdIRef pseudo_seqid=TAlnSeqIdIRef())
Build anchored alignment from a set of alignmnets.
CRef< CAnchoredAln > CreateAnchoredAlnFromAln(const _TAlnStats &aln_stats, size_t aln_idx, const CAlnUserOptions &options, objects::CSeq_align::TDim explicit_anchor_row=-1)
Create an anchored alignment from Seq-align using hints.
CAlnStats< TAlnIdMap > TAlnStats
Default implementations for alignment stats.
Definition: aln_stats.hpp:398
vector< const CSeq_align * > TAlnVector
CAlnIdMap< vector< const CSeq_align * >, TIdExtract > TAlnIdMap
Default implementations of CAlnIdMap.
Definition: aln_tests.hpp:161
void remove_if(Container &c, Predicate *__pred)
Definition: chainer.hpp:69
void transform(Container &c, UnaryFunction *op)
Definition: chainer.hpp:86
Checksum and hash calculation classes.
AutoPtr –.
Definition: ncbimisc.hpp:401
bool IsSet(int flags) const
static IAlignSorter * CreateAlignSorter(std::string const &sort_by, std::string const &sort_str)
CAlign_CI –.
Definition: align_ci.hpp:63
File Description:
CSeq_align container.
const_iterator insert(const CSeq_align &seq_align)
Insert new CSeq_align into the list.
size_type size(void) const
Container mapping seq-aligns to vectors of participating seq-ids.
Definition: aln_tests.hpp:56
void push_back(const CSeq_align &aln)
Adding an alignment.
Definition: aln_tests.hpp:87
void SetAnchor(TNumrow anchor)
Definition: alnmap.cpp:79
void Add(const CDense_seg &ds, TAddFlags flags=0)
Definition: alnmix.cpp:120
@ fGapJoin
Definition: alnmix.hpp:103
void Merge(TMergeFlags flags=0)
Definition: alnmix.cpp:273
const CDense_seg & GetDenseg(void) const
Definition: alnmix.cpp:295
IAlnSeqId extracting functor.
static int GetRefSeqIdex(char base)
@ eStat_Intron
intron (for mRNA-to-genome alignments)
@ eStat_Mismatch
mismatches (A+G+T+C - matches)
@ eStat_Total
total alignment count at this base (A+G+T+C+Gap)
vector< SStatStruct > TStatVec
Helper class which collects seq-align statistics: seq-ids participating in alignments and rows,...
Definition: aln_stats.hpp:57
size_t GetAlnCount(void) const
How many alignments do we have?
Definition: aln_stats.hpp:146
Options for different alignment manager operations.
EMergeAlgo m_MergeAlgo
EDirection m_Direction
@ ePreserveRows
Preserve all rows as they were in the input (e.g.
@ eBothDirections
No filtering: use both direct and reverse sequences.
CAlnVecGraphicDataSource - implementation of CAlnGraphicDataSource for CAlnVec-based alignments.
Query-anchored alignment can be 2 or multi-dimentional.
const TPairwiseAlnVector & GetPairwiseAlns(void) const
The vector of pairwise alns.
TDim GetDim(void) const
How many rows.
void SetScore(int score)
Set the total score.
Seq-loc and seq-align mapper exceptions.
const TAnnotNames & GetAnnotNames(void) const
size_t GetSize(void) const
CSeq_annot_Handle GetAnnot(void) const
CAppJobError Default implementation for IAppJobError - encapsulates a text error message.
CBatchJobResult – the data structure holding the seqgraphic job results for a batch job processing.
CBioseq_Handle –.
TNumseg CheckNumSegs(void) const
Definition: Dense_seg.cpp:109
bool IsSetWidths(void) const
Definition: Dense_seg.hpp:196
This class is intended to be used for pairwised dense-seg seq-align to replace CSparseAlnGraphicDataS...
void SetColoringAvailable(bool v=true)
TIntervals & SetIntervals()
CFeatGlyph inline implementations.
virtual const objects::CSeq_loc & GetLocation(void) const
access the position of this object.
void SetProjectedMappingInfo(const CProjectedMappingInfo &info)
CFeat_CI –.
Definition: feat_ci.hpp:64
CGeneGroup is a subclass of CLayoutGroup contains gene, RNA, cds, and Exon features.
void SaveData(CRef< TData > data)
Clones TData, puts it into Save Queue for asynchroneous storage operation.
void RemoveData(const string &data_key)
Removes Data from in-memory cache.
CRef< TData > GetData(const string &data_key)
Retrieves TData from in-memory cache or if not found calls TData::Init to initialize new TData instan...
static CGraphCache & GetInstance()
Definition: graph_cache.hpp:97
CAlnStatGlyph::TStatVec & m_Stats
CGraphStatCollector_Glyph(double window, CAlnStatGlyph::TStatVec &stats)
virtual void AddStat(size_t index, CAlnStatGlyph::SStatStruct &stat) override
virtual void UpdateGroup(size_t offset) override
virtual bool ComputePileUp() override
virtual void NewGroup(int batch_size) override
vector< CPileUpGraph::TValType > matches
vector< CPileUpGraph::TValType > mismatches
virtual void AddStat(size_t index, CAlnStatGlyph::SStatStruct &stat) override
CGraphStatCollector_Graph(double window, CPileUpGraph &data)
vector< CPileUpGraph::TValType > introns
vector< CPileUpGraph::TValType > gaps
virtual void ClearRange(const TSeqRange &range) override
CGraphStatCollector Helper class to use with CSGAlignStatJob::x_CollectAlignStats x_CollectAlignStats...
virtual bool ComputePileUp()
double GetWindow() const
virtual void ClearRange(const TSeqRange &range)
virtual void AddStat(size_t index, CAlnStatGlyph::SStatStruct &stat)=0
virtual void UpdateGroup(size_t offset)
virtual void NewGroup(int batch_size)
CGraph_CI –.
Definition: graph_ci.hpp:234
CJobResultBase – the basic job result class holding a token.
CLayoutGroup is a container of CSeqGlyphs (layout objects).
void PushBack(CSeqGlyph *obj)
Append a layout object to the end.
TObjectList & SetChildren()
const TObjectList & GetChildren() const
CLayoutGroup inline methods.
void Append(TObjectList &objs)
CMappedFeat –.
Definition: mapped_feat.hpp:59
vector< CRef< CAlignGlyph > > TAlignList
CTitleGroup is a layout group with a title.
Definition: named_group.hpp:45
A pairwise aln is a collection of ranges for a pair of rows.
CPileUpGraph.
void SetAlignmentDataSource(const IAlnGraphicDataSource &align_ds)
CProjectedMappingInfo inline methods.
CRef –.
Definition: ncbiobj.hpp:618
CSGAlignScoringJob(const string &desc, const TAlnScoreMap &scores)
CSGAlignScoringJob.
virtual EJobState x_Execute()
method truly doing the job.
TAlnScoreMap m_AlnScoreMap
TAlnMgrVec m_Aligns
EJobState x_LoadAlignments(objects::CTSE_Handle &tse_handle)
EJobState x_CollectAlignStats(const TSeqRange &main_range, objects::CTSE_Handle &tse_handle, CGraphStatCollector &stat_collector)
vector< CConstRef< IAlnGraphicDataSource > > TAlnMgrVec
objects::SAnnotSelector m_GraphSel
pileup graph selector
string m_CacheKey
Graph cache key calculated by alignment ds.
TModelUnit m_Window
current zoom scale
CSGAlignStatJob(const string &desc, objects::CBioseq_Handle handle, const objects::SAnnotSelector &aln_sel, const objects::SAnnotSelector &graph_sel, const TSeqRange &range, const TAlnMgrVec &aligns, TModelUnit scale, const string &cache_key)
CSGAlignStatJob.
IAlnGraphicDataSource::TNumrow TNumrow
vector< CConstRef< objects::CSeq_graph > > TGraphs
virtual EJobState x_Execute()
method truly doing the job.
void x_CreatePileUpGraphs(TSeqRange &range, TGraphs &graphs)
void x_UpdateMaxTails(const IAlnGraphicDataSource &aln_datasource)
CIRef< ILayoutPolicy > m_LayoutPolicy
CAlignmentConfig::EUnalignedTailsMode m_UnalignedTailsMode
Flag, indicating whether and how to display unaligned tails.
void x_CreateCAlignGlyph(CSeqGlyph::TObjects &objs, const objects::CSeq_align &align, const objects::CSeq_align_Handle &align_handle, const TAlnDataSources &aln_datasources, TMatedAlignments &mated_aligns, bool match_id, CSGAlignmentJob::EAlignFlags flag, bool filterResults)
EJobState x_LoadAlignments()
CRef< CSGJobResult > x_LoadAlignmentFeats(CAlignGlyph &align)
CAlignmentConfig::EHideSraAlignments m_HideSra
Flag, indicating whether to hide duplicates and/or bad reads.
static void GetAnnotNames(const objects::CBioseq_Handle &handle, const TSeqRange &range, objects::SAnnotSelector &sel, TAnnotNameTitleMap &names)
int m_AlignLimit
max number of alignments to load.
bool x_CreateGeneModels(objects::CSeq_loc_Mapper &mapper, CLinkedFeature::TLinkedFeats &feats, CSeqGlyph::TObjects &objs, CAlignGlyph &align) const
IAppJob::EJobState x_GetAlignments(CSeqGlyph::TObjects &objs, objects::CAlign_CI &align_iter, bool sparseAln, bool filterResults=false)
EAlignFlags
Request a list of alignments from a bioseq.
static IAlnExplorer::EAlignType GetAlignType(const objects::CBioseq_Handle &handle, objects::SAnnotSelector &sel, bool &has_quality_map, bool isFastConfig=false)
when isFastConfig set to true, it indicates that that the call is used by seqconfig this means that s...
IAppJob::EJobState x_GetCoverageGraph()
IAppJob::EJobState x_GetAlignSmear(CSeqGlyph::TObjects &objs, objects::CAlign_CI &align_iter)
void SetSortBy(const string &sort_by)
TModelUnit m_Window
current window for smear bars
CSGAlignmentJob(const string &desc, objects::CBioseq_Handle handle, const objects::SAnnotSelector &sel, const TSeqRange &range, TModelUnit window, TSignedSeqPos &maxStartTail, TSignedSeqPos &maxEndTail)
constructor for loading coverage graph.
void x_BuildAlignDataSource(const objects::CSeq_align &align, TAlnDataSources &align_ds, bool sparseAln)
TSignedSeqPos & m_MaxEndTail
Max length of unaligned tail at the end of the sequence.
vector< CRef< CAlignGlyph > > m_Aligns
alignments feature retrieval
bool FilterDuplicatesAndBadReads(const objects::CSeq_align &mapped_align)
virtual EJobState x_Execute()
method truly doing the job.
void x_CreateAlignGlyphsForCrossOriginAlign(CSeqGlyph::TObjects &objs, const objects::CSeq_align &align, const objects::CSeq_align_Handle &align_handle, bool sparseAln, TMatedAlignments &mated_aligns, bool match_id, CSGAlignmentJob::EAlignFlags flag, bool filterResults)
CRef< CSeqGlyph > x_CreateFeatGlyph(objects::CSeq_loc_Mapper &mapper, const objects::CMappedFeat &mapped_feat, CAlignGlyph &align) const
CIRef< IAlignSorter > m_Sorter
bool m_SmearOverLimit
flag indicating if smear is needed
bool m_LinkPair
link mate pair aligns
TSignedSeqPos & m_MaxStartTail
> Flag to show second-pass alignments (relevant to assembly to assembly alignments)
objects::SAnnotSelector m_Sel
our annotation selector
objects::CScope & GetScope(void) const
CSGAnnotJob inline methods.
TSeqRange m_Range
target range
IAppJob::EJobState x_CreateHistFromGraph(CSeqGlyph::TObjects &glyphs, const objects::SAnnotSelector &sel, TModelUnit window, bool fixed_scale, const TAxisLimits &y_limits)
objects::CBioseq_Handle m_Handle
target sequence
CSGJobResult – the data structure holding the seqgraphic job results.
CScope –.
Definition: scope.hpp:92
void SetTearline(size_t Tearline)
tearline factor – used to limit a list of glyphs
Definition: seq_glyph.hpp:730
list< CRef< CSeqGlyph > > TObjects
Definition: seq_glyph.hpp:85
CSeqGraphicJob – the base class of seqgraphic job for handling the job status such as reporting the p...
CRef< CObject > m_Result
virtual void SetTaskTotal(int total)
virtual void SetTaskName(const string &name)
virtual void SetTaskCompleted(int completed)
set total finished task number.
virtual void AddTaskCompleted(int delta)
set to add newly finished task number.
TJobToken m_Token
Job token recognizable by job listener.
CRef< CAppJobError > m_Error
static SIZE_TYPE ReverseComplement(const string &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst)
@ e_Iupacna
Definition: sequtil.hpp:47
CSeqVector –.
Definition: seq_vector.hpp:65
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
CRef< CSeq_loc > CreateRowSeq_loc(TDim row) const
Definition: Seq_align.cpp:2028
TDim CheckNumRows(void) const
Validatiors.
Definition: Seq_align.cpp:73
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
bool GetNamedScore(const string &id, int &score) const
Get score.
Definition: Seq_align.cpp:563
void Validate(bool full_test=false) const
Definition: Seq_align.cpp:649
void SetPartialStart(bool val, ESeqLocExtremes ext)
void SetPartialStop(bool val, ESeqLocExtremes ext)
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeq_loc_Mapper –.
This class is intended to be used for optimized version of pairwise seq-align to replace CSparseAlnGr...
Sparse alignment.
Definition: sparse_aln.hpp:51
TDim GetDim(void) const
Alignment dimension (number of sequence rows in the alignment)
Definition: sparse_aln.cpp:69
void Update(const TSeqRange &range, const TUpdateMap &update)
class CStaticArrayMap<> provides access to a static array in much the same way as CStaticArraySet<>,...
Definition: static_map.hpp:175
TBase::const_iterator const_iterator
Definition: static_map.hpp:179
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Return a field reference representing the tokenized key, or a NULL reference if the key doesn't exist...
Definition: User_field.cpp:226
@ eRight
Towards higher aln coord (always to the right)
@ eLeft
Towards lower aln coord (always to the left)
IAlnGraphicDataSource - interface to a data source representing an abstract alignment used in graphic...
virtual void GetUnalignedTails(TSignedSeqPos &start_tail, TSignedSeqPos &end_tail, bool clipped=true) const
virtual IAlnExplorer::EAlignType GetAlignType() const
virtual TSeqPos GetSeqStart(TNumrow row) const =0
virtual const objects::CSeq_id & GetSeqId(TNumrow row) const =0
virtual string & GetSeqString(string &buffer, TNumrow row, const TSignedRange &seq_rng, const TSignedRange &aln_rng, bool anchor_direct) const
Get sequence string for a given row in sequence range.
virtual bool IsPositiveStrand(TNumrow row) const =0
virtual TSeqPos GetSeqStop(TNumrow row) const =0
virtual TSignedSeqPos GetAlnPosFromSeqPos(TNumrow row, TSeqPos seq_pos, IAlnExplorer::ESearchDirection dir=IAlnExplorer::eNone, bool try_reverse_dir=true) const =0
Alignment segment iterator interface.
@ eAllSegments
Iterate all segments.
Alignment segment interface.
virtual const TSignedRange & GetRange(void) const =0
Get the selected row range.
@ fAligned
Aligned segment.
@ fIndel
Either anchor or the selected row is not present in the segment.
@ fUnaligned
The range on the selected sequence does not participate in the alignment (the alignment range of the ...
@ fGap
Both anchor row and the selected row are not included in the segment (some other row is present and t...
virtual TSegTypeFlags GetType(void) const =0
Get current segment type.
virtual const TSignedRange & GetAlnRange(void) const =0
Get alignment range for the segment.
vector< TSeqRange > TIntervals
virtual TSeqPos GetBaseWidth(IAlnExplorer::TNumrow) const =0
virtual IAlnExplorer::TNumrow GetNumRows() const =0
virtual IAlnExplorer::TNumrow GetAnchor() const =0
virtual IAlnSegmentIterator * CreateSegmentIterator(IAlnExplorer::TNumrow, const IAlnExplorer::TSignedRange &, IAlnSegmentIterator::EFlags) const =0
virtual TSignedSeqPos GetSeqPosFromAlnPos(IAlnExplorer::TNumrow for_row, TSeqPos aln_pos, IAlnExplorer::ESearchDirection dir=IAlnExplorer::eNone, bool try_reverse_dir=true) const =0
size_type size() const
Definition: map.hpp:148
container_type::iterator iterator
Definition: map.hpp:54
const_iterator end() const
Definition: map.hpp:152
bool empty() const
Definition: map.hpp:149
const_iterator find(const key_type &key) const
Definition: map.hpp:153
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
bool empty() const
Definition: set.hpp:133
static const struct name_t names[]
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static int type
Definition: getdata.c:31
static char tmp[3200]
Definition: utf8.c:42
int offset
Definition: replacements.h:160
static FILE * f
Definition: readconf.c:23
char data[12]
Definition: iconv.c:80
NetSchedule worker node application.
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
Definition: ncbimisc.hpp:480
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define ERASE_ITERATE(Type, Var, Cont)
Non-constant version with ability to erase current element, if container permits.
Definition: ncbimisc.hpp:843
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
CDiagContext & GetDiagContext(void)
Get diag context instance.
Definition: logging.cpp:818
CDiagContext_Extra Extra(void) const
Create a temporary CDiagContext_Extra object.
Definition: ncbidiag.hpp:2095
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
void Fatal(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1209
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
static bool LinkFeatures(CLinkedFeature::TLinkedFeats &feats, TFeatLinkingMode mode=1, ISeqTaskProgressCallback *p_cb=NULL)
Link features into a hierarchical list.
Definition: utils.cpp:453
static objects::SAnnotSelector GetAnnotSelector(TAnnotFlags flags=0)
request an annotation selector for a given type
Definition: utils.cpp:168
list< CRef< CLinkedFeature > > TLinkedFeats
Definition: utils.hpp:80
static bool IsCrossOrigin(const objects::CSeq_loc &loc)
static bool SeparateStrands(const objects::CSeq_annot &seq_annot)
int GetOrigAnchor(void) const
const IAlnGraphicDataSource & GetAlignMgr(void) const
Inline methods.
static const string & GetUnnamedAnnot()
Get the commonly used symbol representing a unnnamed annotation.
Definition: utils.hpp:534
static bool IsGeneModelFeature(int type, int subtype)
Does feature belong to gene model track Gene, RNA, cdregion, exon, C_region, and VDJ segments.
Definition: na_utils.cpp:190
const objects::CSeq_align_Handle & GetOrigAlignment(void) const
virtual const objects::CSeq_loc & GetLocation(void) const
access the position of this object.
void AddAlignments(const objects::SAnnotSelector &sel, ISeqTaskProgressCallback *p_cb=NULL)
Smear all the alignments matched by this selector on my bioseq.
const objects::CSeq_align & GetAlignment(void) const
@ eMaxRowLimit
GLdouble TModelUnit
Definition: gltypes.hpp:48
virtual bool IsCanceled() const override
EJobState
Job states (describe FSM)
Definition: app_job.hpp:86
@ eUnknown
Definition: app_popup.hpp:72
@ eCanceled
Definition: app_job.hpp:91
@ eCompleted
Definition: app_job.hpp:89
@ eFailed
Definition: app_job.hpp:90
C * SerialClone(const C &src)
Create on heap a clone of the source object.
Definition: serialbase.hpp:512
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
Definition: Seq_loc.cpp:3474
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
CRef< CSeq_loc > Merge(TOpFlags flags, ISynonymMapper *syn_mapper) const
All functions create and return a new seq-loc object.
Definition: Seq_loc.cpp:5037
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
Definition: Seq_loc.cpp:3875
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
TRange GetRange(void) const
Get the range.
Definition: Seq_loc.hpp:1042
ENa_strand GetStrand(void) const
Definition: Seq_loc.hpp:1056
@ fMerge_OverlappingOnly
Definition: Seq_loc.hpp:329
@ fMerge_Contained
Definition: Seq_loc.hpp:326
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
@ fCompareAll
@ fComparePositional
Use positional coordinates (ignore strands) when looking for abutting locations.
@ eNoOverlap
CSeq_locs do not overlap or abut.
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
@ eGetId_Canonical
Definition: sequence.hpp:114
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
@ eProductToLocation
Map from the feature's product to location.
@ eLocationToProduct
Map from the feature's location to product.
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
const CTSE_Handle & GetTSE_Handle(void) const
const CSeqFeatData & GetData(void) const
CConstRef< CSeq_annot > GetCompleteSeq_annot(void) const
Complete and return const reference to the current seq-annot.
bool ContainsSegment(const CSeq_id &id, size_t resolve_depth=kMax_Int, EFindSegment limit_flag=eFindSegment_NoLimit) const
Check if the seq-id describes a segment of the bioseq.
CScope & GetScope(void) const
Get scope this handle belongs to.
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
CSeqFeatData::ESubtype GetFeatSubtype(void) const
CSeqFeatData::E_Choice GetFeatType(void) const
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to the seq-annot.
const TId & GetId(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
const string & GetTitle(void) const
Definition: graph_ci.hpp:112
const CSeq_loc & GetLocation(void) const
const CSeq_graph & GetMappedGraph(void) const
Graph mapped to the master sequence.
Definition: graph_ci.hpp:100
SAnnotSelector & SetCollectNames(bool value=true)
Collect available annot names rather than annots.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
SAnnotSelector & SetMaxSize(TMaxSize max_size)
Set maximum number of annotations to find.
CSeq_align_Handle GetSeq_align_Handle(void) const
Get original alignment handle.
Definition: align_ci.cpp:233
SAnnotSelector & AddUnnamedAnnots(void)
Add unnamed annots to set of annots names to look for.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType & GetObject(void)
Get object.
Definition: ncbiobj.hpp:1011
position_type GetLength(void) const
Definition: range.hpp:158
bool IntersectingWith(const TThisType &r) const
Definition: range.hpp:331
position_type GetToOpen(void) const
Definition: range.hpp:138
bool Empty(void) const
Definition: range.hpp:148
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
static TThisType GetWhole(void)
Definition: range.hpp:272
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
Definition: ncbistr.cpp:2742
#define kEmptyStr
Definition: ncbistr.hpp:123
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
Definition: ncbistr.cpp:3177
@ fWithCommas
Use commas as thousands separator.
Definition: ncbistr.hpp:254
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
const TStr & GetStr(void) const
Get the variant data.
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
const TData & GetData(void) const
Get the Data member data.
TBool GetBool(void) const
Get the variant data.
bool IsInt(void) const
Check if variant Int is selected.
bool IsStr(void) const
Check if variant Str is selected.
TInt GetInt(void) const
Get the variant data.
bool IsBool(void) const
Check if variant Bool is selected.
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561
bool IsSetExt(void) const
extra info Check if a value has been assigned to Ext data member.
Definition: Seq_align_.hpp:989
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:427
list< CRef< CUser_object > > TExt
Definition: Seq_align_.hpp:402
vector< CRef< CSeq_id > > TIds
Definition: Dense_seg_.hpp:106
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
const TStd & GetStd(void) const
Get the variant data.
Definition: Seq_align_.hpp:752
TStarts & SetStarts(void)
Assign a value to Starts data member.
Definition: Dense_seg_.hpp:536
TStrands & SetStrands(void)
Assign a value to Strands data member.
Definition: Dense_seg_.hpp:586
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
bool IsStd(void) const
Check if variant Std is selected.
Definition: Seq_align_.hpp:746
bool IsDisc(void) const
Check if variant Disc is selected.
Definition: Seq_align_.hpp:772
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:474
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Dense_seg_.hpp:505
const TExt & GetExt(void) const
Get the Ext member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
Definition: Seq_align_.hpp:778
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
@ eType_disc
discontinuous alignment
Definition: Seq_align_.hpp:104
bool IsCdregion(void) const
Check if variant Cdregion is selected.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
void SetTo(TTo value)
Assign a value to To data member.
bool IsEmpty(void) const
Check if variant Empty is selected.
Definition: Seq_loc_.hpp:516
void SetId(TId &value)
Assign a value to Id data member.
bool IsGeneral(void) const
Check if variant General is selected.
Definition: Seq_id_.hpp:877
void SetFrom(TFrom value)
Assign a value to From data member.
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Seq_id_.cpp:193
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
void SetStrand(TStrand value)
Assign a value to Strand data member.
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
vector< char > TValues
Definition: Byte_graph_.hpp:89
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_graph_.cpp:131
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_graph_.hpp:751
const TValues & GetValues(void) const
Get the Values member data.
Definition: Int_graph_.hpp:425
const TByte & GetByte(void) const
Get the variant data.
Definition: Seq_graph_.cpp:153
const TValues & GetValues(void) const
Get the Values member data.
const Tdata & Get(void) const
Get the member data.
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Seq_annot_.hpp:852
bool CanGetDesc(void) const
Check if it is safe to call GetDesc method.
Definition: Seq_annot_.hpp:846
TMol GetMol(void) const
Get the Mol member data.
Definition: Seq_inst_.hpp:612
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
int len
range(_Ty, _Ty) -> range< _Ty >
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
int tolower(Uchar c)
Definition: ncbictype.hpp:72
Useful/utility classes and methods.
T max(T x_, T y_)
T bound(T x_, T xlo_, T xhi_)
T min(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
vector< CRef< CAnchoredAln > > TAnchoredAlnVec
Collection of anchored alignments.
struct named_group named_group
static int * results[]
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
Reader-writer based streams.
#define row(bind, expected)
Definition: string_bind.c:73
int m_Data[CAlnStatConfig::eStat_Total+1]
SAnnotSelector –.
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
Definition: type.c:6
#define _ASSERT
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
else result
Definition: token2.c:20
Modified on Fri Sep 20 14:57:49 2024 by modify_doxy.py rev. 669887