NCBI C++ ToolKit
alignment_scorer.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: alignment_scorer.cpp 64926 2014-10-23 16:04:54Z boukn $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Nathan Bouk
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbiexpt.hpp>
34 #include <corelib/ncbi_system.hpp>
35 #include <math.h>
36 
39 #include <objmgr/scope.hpp>
41 #include <objmgr/annot_ci.hpp>
51 #include <objects/seq/Seq_ext.hpp>
55 #include <objmgr/seq_vector.hpp>
57 
59 #include <objects/seq/Seqdesc.hpp>
62 #include <objects/seq/Seq_hist.hpp>
63 #include <objmgr/util/sequence.hpp>
64 
67 
68 
69 
71 
74 USING_SCOPE(blast);
75 
77 
79  CScope& Scope, CScoreBuilder &Scorer,
80  bool skip_unsupported)
81 {
82  ITERATE(CSeq_align_set::Tdata, Iter, alignments.Get()) {
83  CRef<CSeq_align> Curr(*Iter);
84 
85  double DummyScore;
86 
87  if ( Curr->GetSegs().IsDenseg() || ! skip_unsupported) {
88  if(!Curr->GetNamedScore(CSeq_align::eScore_Blast, DummyScore)) {
89  Scorer.AddScore(Scope, *Curr, CSeq_align::eScore_Blast);
90  }
91 
92  if(!Curr->GetNamedScore(CSeq_align::eScore_BitScore, DummyScore)) {
93  Scorer.AddScore(Scope, *Curr, CSeq_align::eScore_BitScore);
94  }
95 
96  if(!Curr->GetNamedScore(CSeq_align::eScore_EValue, DummyScore)) {
97  Scorer.AddScore(Scope, *Curr, CSeq_align::eScore_EValue);
98  }
99  }
100 
101  if(!Curr->GetNamedScore(CSeq_align::eScore_IdentityCount, DummyScore)) {
102  Scorer.AddScore(Scope, *Curr, CSeq_align::eScore_IdentityCount);
103  }
104 
105  if (Curr->GetSegs().Which() == CSeq_align::C_Segs::e_Disc) {
106  /// Compute score for the components of a Disc-seg alignment
107  s_ScoreAlignments(Curr->GetSegs().GetDisc(), Scope, Scorer,
108  skip_unsupported);
109  }
110  }
111 }
112 
113 
115 {
116  bool skip_unsupported = m_Options & eSkipUnsupportedAlignments;
119 
120  NON_CONST_ITERATE(CAlignResultsSet::TQueryToSubjectSet, QueryIter, AlignSet->Get()) {
121  NON_CONST_ITERATE(CQuerySet::TAssemblyToSubjectSet, AssemIter, QueryIter->second->Get()) {
122  NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, AssemIter->second) {
123  //NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, QueryIter->second->Get()) {
124  s_ScoreAlignments(*SubjectIter->second, Scope, Scorer,
125  skip_unsupported);
126  }
127  }
128  }
129 }
130 
131 
132 
133 
135 {
136  //CScoreBuilder Scorer(*Options);
137  //Scorer.SetEffectiveSearchSpace(Options->GetEffectiveSearchSpace());
139 
140  NON_CONST_ITERATE(CAlignResultsSet::TQueryToSubjectSet, QueryIter, AlignSet->Get()) {
141  NON_CONST_ITERATE(CQuerySet::TAssemblyToSubjectSet, AssemIter, QueryIter->second->Get()) {
142  NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, AssemIter->second) {
143  //NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, QueryIter->second->Get()) {
144 
145  ITERATE(CSeq_align_set::Tdata, Iter, SubjectIter->second->Get()) {
146 
147  CRef<CSeq_align> Curr(*Iter);
148 
149  //Scorer.AddScore(Scope, *Curr, CScoreBuilder::eScore_PercentIdentity);
153  }
154  }
155  }
156  }
157 }
158 
159 
160 
161 
162 
164 {
165  //CScoreBuilder Scorer(*Options);
166  //Scorer.SetEffectiveSearchSpace(Options->GetEffectiveSearchSpace());
168 
169  NON_CONST_ITERATE(CAlignResultsSet::TQueryToSubjectSet, QueryIter, AlignSet->Get()) {
170  NON_CONST_ITERATE(CQuerySet::TAssemblyToSubjectSet, AssemIter, QueryIter->second->Get()) {
171  NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, AssemIter->second) {
172  //NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, QueryIter->second->Get()) {
173 
174  ITERATE(CSeq_align_set::Tdata, Iter, SubjectIter->second->Get()) {
175 
176  CRef<CSeq_align> Curr(*Iter);
177 
178  Scorer.AddScore(Scope, *Curr, CSeq_align::eScore_PercentCoverage);
179  }
180  }
181  }
182  }
183 }
184 
185 
186 
188 {
189  CScoreBuilder Scorer;
190 
191  NON_CONST_ITERATE(CAlignResultsSet::TQueryToSubjectSet, QueryIter, AlignSet->Get()) {
192  NON_CONST_ITERATE(CQuerySet::TAssemblyToSubjectSet, AssemIter, QueryIter->second->Get()) {
193  NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, AssemIter->second) {
194  //NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, QueryIter->second->Get()) {
195 
196  ITERATE(CSeq_align_set::Tdata, Iter, SubjectIter->second->Get()) {
197 
198  CRef<CSeq_align> Curr(*Iter);
199 
200  TSeqPos AlignLen, AlignedLen;
201  AlignedLen = Scorer.GetAlignLength(*Curr, true);
202  AlignLen = Curr->GetSeqRange(1).GetLength();
203 
204  Curr->SetNamedScore("expansion", double(AlignLen)/double(AlignedLen) );
205  }
206  }
207  }
208  }
209 }
210 
211 
212 
214 {
215  CScoreBuilder Scorer;
216 
217  NON_CONST_ITERATE(CAlignResultsSet::TQueryToSubjectSet, QueryIter, AlignSet->Get()) {
218  NON_CONST_ITERATE(CQuerySet::TAssemblyToSubjectSet, AssemIter, QueryIter->second->Get()) {
219  NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, AssemIter->second) {
220  //NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, QueryIter->second->Get()) {
221 
222  ITERATE(CSeq_align_set::Tdata, Iter, SubjectIter->second->Get()) {
223 
224  CRef<CSeq_align> Curr(*Iter);
225 
226  CSeq_id QueryId;
227  QueryId.Assign(Curr->GetSeq_id(0));
228  CBioseq_Handle QueryHandle = Scope.GetBioseqHandle(QueryId);
229 
230  double WeightedIdent;
231  double NumIdent, Matches, QueryLen;
232 
234  Matches = Curr->GetAlignLength(false);
235  QueryLen = QueryHandle.GetInst_Length();
236 
237  WeightedIdent = (NumIdent + (Matches * m_K)) / (Matches + (QueryLen * m_K));
238  Curr->SetNamedScore("weighted_identity", WeightedIdent);
239  }
240  }
241  }
242  }
243 }
244 
245 
247 {
249 
250  NON_CONST_ITERATE(CAlignResultsSet::TQueryToSubjectSet, QueryIter, AlignSet->Get()) {
251  CSeq_id QueryId;
252  QueryId.Set(QueryIter->first);
253  CBioseq_Handle Handle = Scope.GetBioseqHandle(QueryId);
254  if(!Handle)
255  continue;
256  TSeqPos QueryLen = Handle.GetInst_Length();
257 
258  NON_CONST_ITERATE(CQuerySet::TAssemblyToSubjectSet, AssemIter, QueryIter->second->Get()) {
259  NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, AssemIter->second) {
260  CSeq_id TargetId;
261  TargetId.Set(SubjectIter->first);
262  CBioseq_Handle Handle = Scope.GetBioseqHandle(TargetId);
263  if (!Handle || !Handle.CanGetInst_Ext() ||
264  !Handle.GetInst_Ext().IsDelta()) {
265  continue;
266  }
267  TSeqPos TargetLen = Handle.GetInst_Length();
268  const CDelta_ext& DeltaExt = Handle.GetInst_Ext().GetDelta();
269  // Get Gaps on Target
270  list<TSeqRange> Gaps;
271  TSeqPos CurrStart = 0;
272  ITERATE(list<CRef<CDelta_seq> >, DeltaIter, DeltaExt.Get()) {
273  if( (*DeltaIter)->IsLiteral() ) {
274  TSeqPos Len = (*DeltaIter)->GetLiteral().GetLength();
275  TSeqRange Gap(CurrStart, CurrStart+Len-1);
276  Gaps.push_back(Gap);
277  CurrStart += Len;
278  } else if( (*DeltaIter)->IsLoc() ) {
279  TSeqPos Len = (*DeltaIter)->GetLoc().GetInt().GetLength();
280  TSeqPos CurrStop = CurrStart + Len - 1;
281  CurrStart = CurrStop + 1;
282  }
283  }
284 
285  ITERATE(CSeq_align_set::Tdata, Iter, SubjectIter->second->Get()) {
286 
287  CRef<CSeq_align> Curr(*Iter);
288 
289  TSeqRange TargetRange = Curr->GetSeqRange(1);
290  if(TargetRange.GetLength() == QueryLen) {
291  Curr->SetNamedScore("hangs", 0);
292  continue;
293  }
294 
295  // the ranges will normally be next to gaps, not intersect gaps
296  // so expand the aligned range by 1 in both directions
297  if(TargetRange.GetFrom() > 0)
298  TargetRange.SetFrom(TargetRange.GetFrom()-1);
299  TargetRange.SetTo(TargetRange.GetTo()+1);
300 
301  int Hangs = 0;
302  // check internal gaps
303  ITERATE(list<TSeqRange>, GapIter, Gaps) {
304  if(GapIter->IntersectingWith(TargetRange)) {
305  Hangs = 1;
306  break;
307  }
308  }
309  // check the edges
310  if(TargetRange.GetFrom() == 0)
311  Hangs = 1;
312  if(TargetRange.GetTo() >= (TargetLen-1))
313  Hangs = 1;
314  Curr->SetNamedScore("hangs", Hangs);
315  }
316  }
317  }
318  }
319 }
320 
321 
322 
323 
325 {
326  NON_CONST_ITERATE(CAlignResultsSet::TQueryToSubjectSet, QueryIter, AlignSet->Get()) {
327  NON_CONST_ITERATE(CQuerySet::TAssemblyToSubjectSet, AssemIter, QueryIter->second->Get()) {
328  NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, AssemIter->second) {
329  //NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, QueryIter->second->Get()) {
330 
331  ITERATE(CSeq_align_set::Tdata, Iter, SubjectIter->second->Get()) {
332 
333  CRef<CSeq_align> Curr(*Iter);
334 
335  if(!Curr->GetSegs().IsDenseg())
336  continue;
337 
338  if(Curr->GetSegs().GetDenseg().GetDim() != 2)
339  continue;
340 
341  vector<pair<TSeqPos, TSeqPos> > Tails;
342 
343  for(int Row = 0; Row < Curr->GetSegs().GetDenseg().GetDim(); ++Row) {
344  TSeqPos Start = Curr->GetSeqStart(Row);
345  TSeqPos Stop = Curr->GetSeqStop(Row);
346 
347  TSeqPos SeqLen = Scope.GetBioseqHandle(Curr->GetSeq_id(Row)).GetInst_Length();
348 
349  pair<TSeqPos, TSeqPos> Pair;
350  if(Curr->GetSeqStrand(Row) == eNa_strand_plus) {
351  Pair.first = Start;
352  Pair.second = SeqLen - Stop - 1;
353  } else {
354  Pair.second = Start;
355  Pair.first = SeqLen - Stop - 1;
356  }
357  Tails.push_back(Pair);
358  }
359 
360  bool Full, Half;
361  int Contained;
362  TSeqPos TailLen;
363 
364  Full = (Tails[0].second <= m_Slop && Tails[1].first <= m_Slop)
365  || (Tails[0].first <= m_Slop && Tails[1].second <= m_Slop);
366 
367  if(Tails[0].first <= m_Slop && Tails[0].second <= m_Slop)
368  Contained = 0;
369  else if(Tails[1].first <= m_Slop && Tails[1].second <= m_Slop)
370  Contained = 1;
371  else
372  Contained = -1;
373 
374  bool Forewards;
375  Forewards = (Tails[0].first > Tails[0].second)
376  && (Tails[1].first < Tails[1].second);
377 
378  if(Forewards) {
379  Half = Tails[0].second <= m_Slop || Tails[1].first <= m_Slop;
380  TailLen = max(Tails[0].second, Tails[1].first);
381  }
382  else {
383  Half = Tails[0].first <= m_Slop || Tails[1].second <= m_Slop;
384  TailLen = max(Tails[0].first, Tails[1].second);
385  }
386 
387  Curr->SetNamedScore("full_dovetail", (int)Full);
388  Curr->SetNamedScore("half_dovetail", (int)Half);
389  Curr->SetNamedScore("contained", Contained);
390  Curr->SetNamedScore("tail_length", (int)TailLen);
391  //Curr->SetNamedScore("align_length_ungap", (int)Curr->GetAlignLength(false));
392  }
393  }
394  }
395  }
396 }
397 
398 
399 
401 {
402  CScoreBuilder Scorer;
403 
404  NON_CONST_ITERATE(CAlignResultsSet::TQueryToSubjectSet, QueryIter, AlignSet->Get()) {
405  NON_CONST_ITERATE(CQuerySet::TAssemblyToSubjectSet, AssemIter, QueryIter->second->Get()) {
406  NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, AssemIter->second) {
407  //NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, QueryIter->second->Get()) {
408 
409  ITERATE(CSeq_align_set::Tdata, Iter, SubjectIter->second->Get()) {
410 
411  CRef<CSeq_align> Curr(*Iter);
412 
413  if(!Curr->GetSegs().IsDenseg())
414  continue;
415 
416  CSeq_id EndId;
417  EndId.Assign(Curr->GetSeq_id(0));
418 
419  CBioseq_Handle EndHandle = Scope.GetBioseqHandle(EndId);
420 
421  // Extract the Seq-annot.locs, for the clip region.
422  CSeq_loc ClipLoc;
423  ClipLoc.SetEmpty();
424 
426  CAnnot_CI AnnotIter(EndHandle, Sel);
427 
428  while(AnnotIter) {
429  if (AnnotIter->IsFtable() &&
430  AnnotIter->GetName() == "NCBI_GPIPE") {
431  CConstRef<CSeq_annot> Annot = AnnotIter->GetCompleteSeq_annot();
432 
434  Annot->GetData().GetFtable()) {
435  CConstRef<CSeq_feat> Feat = *FeatIter;
436  if (Feat->CanGetLocation() &&
437  Feat->CanGetData() &&
438  Feat->GetData().IsRegion() &&
439  (Feat->GetData().GetRegion() == "high_quality" ||
440  Feat->GetData().GetRegion() == "hight_quality") ) {
441 
442  ClipLoc.Assign(Feat->GetLocation());
443  }
444  }
445  }
446  ++AnnotIter;
447  }
448 
449 
450  if(ClipLoc.IsEmpty() && EndHandle.HasAnnots() ) {
451  CConstRef<CBioseq> EndBioseq = EndHandle.GetCompleteBioseq();
452  ITERATE(CBioseq::TAnnot, AnnotIter, EndBioseq->GetAnnot()) {
453  if( (*AnnotIter)->GetData().IsLocs() ) {
455  LocIter, (*AnnotIter)->GetData().GetLocs()) {
456  if( (*LocIter)->IsInt() &&
457  (*LocIter)->GetInt().GetId().Equals(EndId))
458  ClipLoc.Assign(**LocIter);
459  }
460  }
461  }
462  }
463 
464  if(ClipLoc.IsEmpty())
465  continue;
466 
467 
468  CRef<CSeq_id> ClipId(new CSeq_id);
469  ClipId->SetLocal().SetStr("CLIPPED_SCORER_ID_"+EndId.GetSeqIdString(true) );
470 
471  {{
472  CBioseq_Handle ClipHandle;
473 
474  try {
475  ClipHandle = Scope.GetBioseqHandle(*ClipId);
476  } catch(...) {
477  ;
478  }
479  // if this sequence hasn't been added to the scope yet, add it
480  if(!ClipHandle) {
481  CRef<CBioseq> ClipBioseq(new CBioseq);
482  ClipBioseq->SetId().push_back(ClipId);
483  CRef<CDelta_seq> ClipDelta(new CDelta_seq);
484  ClipDelta->SetLoc().Assign(ClipLoc);
485  ClipBioseq->SetInst().SetExt().SetDelta().Set().push_back(ClipDelta);
486  ClipBioseq->SetInst().SetRepr() = CSeq_inst::eRepr_virtual;
487  ClipBioseq->SetInst().SetMol() = CSeq_inst::eMol_dna;
488  ClipBioseq->SetInst().SetLength() = ClipLoc.GetInt().GetLength();
489  ClipHandle = Scope.AddBioseq(*ClipBioseq);
490  }
491  }}
492 
493 
494  // Fudge the New alignment.
495  CRef<CSeq_align> ClipAlign(new CSeq_align);
496  try {
497  ClipAlign->Assign(*Curr);
498  CRef<CDense_seg> ClipDenseg;
499  CRange<TSeqPos> ClipRange(ClipLoc.GetInt().GetFrom(), ClipLoc.GetInt().GetTo());
500  CRange<TSeqPos> AlignRange = ClipAlign->GetSeqRange(0);
501  CRange<TSeqPos> Intersect = ClipRange.IntersectionWith(AlignRange);
502  if(Intersect.Empty())
503  continue;
504 
505  ClipDenseg = ClipAlign->GetSegs().GetDenseg().ExtractSlice(0,
506  Intersect.GetFrom(), Intersect.GetTo() );
507  ClipDenseg->SetIds().front()->Assign(*ClipId);
508  ClipDenseg->OffsetRow(0, -(int)ClipLoc.GetInt().GetFrom());
509  ClipAlign->SetSegs().SetDenseg().Assign(*ClipDenseg);
510  ClipAlign->SetScore().clear();
511  } catch(CException& e) {
512  ERR_POST(Error << "Make Clip Align exception: " << e.ReportAll());
513  ERR_POST("Probably means the clip region is invalid.");
514  throw e;
515  }
516 
517 
518  double Temp;
519 
520  Scorer.AddScore(Scope, *ClipAlign, CSeq_align::eScore_IdentityCount);
522  Curr->SetNamedScore("num_ident_clip", Temp);
523 
524  Scorer.AddScore(Scope, *ClipAlign, CSeq_align::eScore_PercentIdentity_Gapped);
525  Scorer.AddScore(Scope, *ClipAlign, CSeq_align::eScore_PercentIdentity_Ungapped);
528  Curr->SetNamedScore("pct_identity_gapopen_only_clip", Temp);
529 
530  Scorer.AddScore(Scope, *ClipAlign, CSeq_align::eScore_PercentCoverage);
532  Curr->SetNamedScore("pct_coverage_clip", Temp);
533 
534  TSeqPos AlignLen = 0;
535  TSeqPos AlignedLen = 0;
536  AlignedLen = Scorer.GetAlignLength(*ClipAlign, true);
537  try {
538  // Sometimes the clipped region ends up being entirely gap, on the subject side.
539  AlignLen = ClipAlign->GetSeqRange(1).GetLength();
540  ClipAlign->SetNamedScore("expansion", double(AlignLen)/double(AlignedLen) );
541  ClipAlign->GetNamedScore("expansion", Temp);
542  Curr->SetNamedScore("expansion_clip", Temp);
543  } catch(CException& e) {
544  ERR_POST(Error << "Expansion exception, unable to get alignment length: " << e.ReportAll());
545  throw e;
546  }
547 
548  } // end alignment loop
549  }
550  }
551  }
552 }
553 
554 
555 
556 
557 
559  CScope& Scope)
560 {
561 
563  AlignSet->Get()) {
564  NON_CONST_ITERATE(CQuerySet::TAssemblyToSubjectSet, AssemIter, QueryIter->second->Get()) {
565  NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, AssemIter->second) {
566  //NON_CONST_ITERATE(CQuerySet::TSubjectToAlignSet, SubjectIter, QueryIter->second->Get()) {
567 
568  ITERATE(CSeq_align_set::Tdata, Iter, SubjectIter->second->Get()) {
569 
570  CRef<CSeq_align> Curr(*Iter);
571 
572  list<CRef<CSeq_id> > QueryIds, SubjectIds;
573 
574  x_GetCompList(Curr->GetSeq_id(0),
575  Curr->GetSeqStart(0),
576  Curr->GetSeqStop(0),
577  QueryIds, Scope);
578  x_GetCompList(Curr->GetSeq_id(1),
579  Curr->GetSeqStart(1),
580  Curr->GetSeqStop(1),
581  SubjectIds, Scope);
582 
583  bool IsCommon = x_CompareCompLists(QueryIds, SubjectIds);
584 
585  Curr->SetNamedScore("common_component", (int)IsCommon);
586  }
587  }
588  }
589  }
590 }
591 
592 
594  list<CRef<CSeq_id> >& CompIds)
595 {
596  if(!Handle.CanGetDescr())
597  return;
598 
599 /*
600 user {
601  type
602  str "RefGeneTracking" ,
603  data {
604  {
605  label
606  str "Status" ,
607  data
608  str "Inferred" } ,
609  {
610  label
611  str "Assembly" ,
612  data
613  fields {
614  {
615  label
616  id 0 ,
617  data
618  fields {
619  {
620  label
621  str "accession" ,
622  data
623  str "AC092447.5" } } } ,
624  {
625  label
626  id 0 ,
627  data
628  fields {
629  {
630  label
631  str "gi" ,
632  data
633  int 123456 } } } } } } } ,
634 */
635 
636  const CSeq_descr& Descr = Handle.GetDescr();
637  ITERATE(CSeq_descr::Tdata, DescIter, Descr.Get()) {
638  const CSeqdesc& Desc = **DescIter;
639  if(!Desc.IsUser())
640  continue;
641 
642  const CUser_object& User = Desc.GetUser();
643  string TypeString;
644  if(User.CanGetType() && User.GetType().IsStr())
645  TypeString = User.GetType().GetStr();
646  if(TypeString == "RefGeneTracking" && User.HasField("Assembly")) {
647 
648  const CUser_field& TopField = User.GetField("Assembly");
649  ITERATE(CUser_field::C_Data::TFields, FieldIter, TopField.GetData().GetFields()) {
650  const CUser_field& MiddleField = **FieldIter;
651  ITERATE(CUser_field::C_Data::TFields, FieldIter, MiddleField.GetData().GetFields()) {
652 
653  const CUser_field& LastField = **FieldIter;
654  string IdStr;
655  if(LastField.GetData().IsStr())
656  IdStr = LastField.GetData().GetStr();
657  else if(LastField.GetData().IsInt())
658  IdStr = NStr::IntToString(LastField.GetData().GetInt());
659  else
660  continue;
661 
662  CRef<CSeq_id> Id;
663  try {
664  Id.Reset(new CSeq_id(IdStr));
665  } catch(...) {
666  continue; // Non-Seq-id entry, just skip it.
667  }
668 
669  CSeq_id_Handle CanonicalId;
670  CanonicalId = sequence::GetId(*Id, Handle.GetScope(),
672  Id->Assign(*CanonicalId.GetSeqId());
673  CompIds.push_back(Id);
674  }
675  }
676  }
677  }
678 
679 }
680 
681 
683  TSeqPos Start, TSeqPos Stop,
684  list<CRef<CSeq_id> >& CompIds)
685 {
686  if(!Handle.CanGetInst_Ext())
687  return;
688 
689  const CSeq_ext& Ext = Handle.GetInst_Ext();
690 
691  if(!Ext.IsDelta())
692  return;
693 
694  const CDelta_ext& DeltaExt = Ext.GetDelta();
695 
696  TSeqPos SegStart = 0;
697  ITERATE(CDelta_ext::Tdata, SegIter, DeltaExt.Get()) {
698  CRef<CDelta_seq> Seg(*SegIter);
699 
700  if(Seg->IsLiteral()) {
701  SegStart += Seg->GetLiteral().GetLength();
702  continue;
703  }
704 
705  const CSeq_loc& Loc = Seg->GetLoc();
706 
707  if(!Loc.IsInt()) {
708  // ??
709  continue;
710  }
711 
712  const CSeq_interval& Int = Loc.GetInt();
713 
714  if(Start <= (SegStart+Int.GetLength()) && Stop >= SegStart) {
715  CSeq_id_Handle CanonicalId;
716  CanonicalId = sequence::GetId(Int.GetId(), Handle.GetScope(),
718 
719  CRef<CSeq_id> Id(new CSeq_id);
720  Id->Assign(*CanonicalId.GetSeqId());
721  CompIds.push_back(Id);
722  }
723 
724  SegStart += Int.GetLength();
725  }
726 }
727 
728 
730  TSeqPos Start, TSeqPos Stop,
731  list<CRef<CSeq_id> >& CompIds)
732 {
733  if(!Handle.CanGetInst_Hist())
734  return;
735 
736  const CSeq_hist& Hist = Handle.GetInst_Hist();
737 
738  if(!Hist.CanGetAssembly())
739  return;
740 
741  const CSeq_hist::TAssembly& Assembly = Hist.GetAssembly();
742 
743  CSeq_id_Handle HandleId = sequence::GetId(Handle.GetSeq_id_Handle(),
744  Handle.GetScope(), sequence::eGetId_Canonical);
745 
746  ITERATE(CSeq_hist::TAssembly, AlignIter, Assembly) {
747  const CSeq_align& Align = **AlignIter;
748 
749  // the Seq-aligns are expected to be in this row order
750  // but just in case, there is a check and swap later if needed
751  int query_row = 0;
752  int component_row = 1;
753 
754  CSeq_id_Handle AlignIdHandle =
755  CSeq_id_Handle::GetHandle(Align.GetSeq_id(query_row));
756  AlignIdHandle = sequence::GetId(AlignIdHandle, Handle.GetScope(),
758  if (AlignIdHandle != HandleId) {
759  std::swap(query_row, component_row);
760  }
761 
762  list< CConstRef<CSeq_align> > SplitAligns;
763  if (Align.GetSegs().IsDisc()) {
765  Align.GetSegs().GetDisc().Get()) {
766  SplitAligns.push_back(*it);
767  }
768  } else {
769  SplitAligns.push_back(CConstRef<CSeq_align>(&Align));
770  }
771 
772  ITERATE(list<CConstRef<CSeq_align> >, SplitIter, SplitAligns) {
773  CSeq_id_Handle CompId =
775  ((*SplitIter)->GetSeq_id(component_row));
776  CompId = sequence::GetId(CompId, Handle.GetScope(),
778 
779  if(Start <= (*SplitIter)->GetSeqStop(query_row) &&
780  Stop >= (*SplitIter)->GetSeqStart(query_row)) {
781  CRef<CSeq_id> Id(new CSeq_id);
782  Id->Assign(*CompId.GetSeqId());
783  CompIds.push_back(Id);
784  }
785  }
786  }
787 }
788 
789 
791  TSeqPos Start, TSeqPos Stop,
792  list<CRef<CSeq_id> >& CompIds,
793  CScope& Scope)
794 {
796 
797  x_GetDeltaExtCompList(Handle, Start, Stop, CompIds);
798  if(CompIds.empty())
799  x_GetUserCompList(Handle, CompIds);
800  if(CompIds.empty())
801  x_GetSeqHistCompList(Handle, Start, Stop, CompIds);
802  if(CompIds.empty()) {
803  CRef<CSeq_id> SelfId(new CSeq_id);
804  SelfId->Assign(Id);
805  CompIds.push_back(SelfId);
806  }
807 }
808 
809 
811  list<CRef<CSeq_id> >& SubjectIds)
812 {
813  list<CRef<CSeq_id> >::iterator QueryIter, SubjectIter;
814  for(QueryIter = QueryIds.begin(); QueryIter != QueryIds.end(); ++QueryIter) {
815  for(SubjectIter = SubjectIds.begin(); SubjectIter != SubjectIds.end(); ++SubjectIter) {
816  if( (*QueryIter)->Equals(**SubjectIter) )
817  return true;
818  }
819  }
820 
821  return false;
822 }
823 
824 
825 
826 
827 
829 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define static
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
USING_SCOPE(objects)
static void s_ScoreAlignments(const CSeq_align_set &alignments, CScope &Scope, CScoreBuilder &Scorer, bool skip_unsupported)
static const Int8 kEffectiveSearchSpace
Declares the CBl2Seq (BLAST 2 Sequences) class.
Declares the CBlastNucleotideOptionsHandle class.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
Definitions of special type used in BLAST.
@ eMegablast
Nucl-Nucl (traditional megablast)
Definition: blast_types.hpp:65
Main argument class for BLASTN application.
CAnnot_CI –.
Definition: annot_ci.hpp:59
CBioseq_Handle –.
void ScoreAlignments(TAlignResultsRef Alignments, objects::CScope &Scope)
void ScoreAlignments(TAlignResultsRef Alignments, objects::CScope &Scope)
void x_GetCompList(const objects::CSeq_id &Id, TSeqPos Start, TSeqPos Stop, list< CRef< objects::CSeq_id > > &CompIds, objects::CScope &Scope)
void x_GetSeqHistCompList(CBioseq_Handle Handle, TSeqPos Start, TSeqPos Stop, list< CRef< objects::CSeq_id > > &CompIds)
void x_GetDeltaExtCompList(CBioseq_Handle Handle, TSeqPos Start, TSeqPos Stop, list< CRef< objects::CSeq_id > > &CompIds)
void x_GetUserCompList(CBioseq_Handle Handle, list< CRef< objects::CSeq_id > > &CompIds)
void ScoreAlignments(TAlignResultsRef Alignments, objects::CScope &Scope)
bool x_CompareCompLists(list< CRef< objects::CSeq_id > > &QueryIds, list< CRef< objects::CSeq_id > > &SubjectIds)
CDelta_seq –.
Definition: Delta_seq.hpp:66
void OffsetRow(TDim row, TSignedSeqPos offset)
Offset row's coords.
Definition: Dense_seg.cpp:890
CRef< CDense_seg > ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const
Extract a slice of the alignment that includes the specified range.
Definition: Dense_seg.cpp:747
void ScoreAlignments(TAlignResultsRef Alignments, objects::CScope &Scope)
void ScoreAlignments(TAlignResultsRef Alignments, objects::CScope &Scope)
void ScoreAlignments(TAlignResultsRef Alignments, objects::CScope &Scope)
void ScoreAlignments(TAlignResultsRef Alignments, objects::CScope &Scope)
void ScoreAlignments(TAlignResultsRef Alignments, objects::CScope &Scope)
CScope –.
Definition: scope.hpp:92
TSeqPos GetAlignLength(const CSeq_align &align, bool ungapped=false)
Compute the length of the alignment (= length of all segments, gaps + aligned)
void AddScore(CScope &scope, CSeq_align &align, EScoreType score)
deprecated: use CSeq_align::EScoreType directly
void SetEffectiveSearchSpace(Int8 searchsp)
@ eScore_PercentIdentity_GapOpeningOnly
Definition: Seq_align.hpp:165
@ eScore_PercentIdentity_Gapped
Definition: Seq_align.hpp:163
@ eScore_PercentIdentity_Ungapped
Definition: Seq_align.hpp:164
@ eScore_PercentCoverage
Definition: Seq_align.hpp:168
@ eScore_IdentityCount
Definition: Seq_align.hpp:145
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
void SetNamedScore(const string &id, int score)
Definition: Seq_align.cpp:636
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
bool GetNamedScore(const string &id, int &score) const
Get score.
Definition: Seq_align.cpp:563
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
Definition: Seq_align.cpp:1993
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
CSeq_ext –.
Definition: Seq_ext.hpp:66
CSeq_hist –.
Definition: Seq_hist.hpp:66
TSeqPos GetLength(void) const
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:71
void ScoreAlignments(TAlignResultsRef Alignments, objects::CScope &Scope)
#define Len
Declares the CDiscNucleotideOptionsHandle class.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define Handle
Definition: ncbistd.hpp:119
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
Definition: ncbiexpt.cpp:370
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
CConstRef< CSeq_id > GetSeqId(void) const
CSeq_id & Set(const CTempString &the_id, TParseFlags flags=fParse_AnyRaw)
Reassign based on flat specifications; arguments interpreted as with constructors.
Definition: Seq_id.cpp:2457
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
void SetEmpty(TEmpty &v)
Definition: Seq_loc.hpp:981
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eGetId_Canonical
Definition: sequence.hpp:114
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
bool IsFtable(void) const
CConstRef< CSeq_annot > GetCompleteSeq_annot(void) const
Complete and return const reference to the current seq-annot.
bool HasAnnots(void) const
TInst_Length GetInst_Length(void) const
const string & GetName(void) const
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
position_type GetLength(void) const
Definition: range.hpp:158
TThisType IntersectionWith(const TThisType &r) const
Definition: range.hpp:312
bool Empty(void) const
Definition: range.hpp:148
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool CanGetType(void) const
Check if it is safe to call GetType method.
const TData & GetData(void) const
Get the Data member data.
const TFields & GetFields(void) const
Get the variant data.
vector< CRef< CUser_field > > TFields
bool IsInt(void) const
Check if variant Int is selected.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TInt GetInt(void) const
Get the variant data.
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
const TType & GetType(void) const
Get the Type member data.
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
TScore & SetScore(void)
Assign a value to Score data member.
Definition: Seq_align_.hpp:902
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
bool IsDisc(void) const
Check if variant Disc is selected.
Definition: Seq_align_.hpp:772
list< CRef< CSeq_align > > Tdata
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
const TRegion & GetRegion(void) const
Get the variant data.
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
bool CanGetData(void) const
Check if it is safe to call GetData method.
Definition: Seq_feat_.hpp:919
bool CanGetLocation(void) const
Check if it is safe to call GetLocation method.
Definition: Seq_feat_.hpp:1111
bool IsRegion(void) const
Check if variant Region is selected.
@ e_Region
named region (globin locus)
bool IsEmpty(void) const
Check if variant Empty is selected.
Definition: Seq_loc_.hpp:516
const TId & GetId(void) const
Get the Id member data.
TFrom GetFrom(void) const
Get the From member data.
TLocal & SetLocal(void)
Select the variant.
Definition: Seq_id_.cpp:199
TTo GetTo(void) const
Get the To member data.
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
list< CRef< CSeq_align > > TAssembly
Definition: Seq_hist_.hpp:248
const TUser & GetUser(void) const
Get the variant data.
Definition: Seqdesc_.cpp:384
const TAnnot & GetAnnot(void) const
Get the Annot member data.
Definition: Bioseq_.hpp:366
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
const TAssembly & GetAssembly(void) const
Get the Assembly member data.
Definition: Seq_hist_.hpp:512
bool IsDelta(void) const
Check if variant Delta is selected.
Definition: Seq_ext_.hpp:336
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
list< CRef< CSeq_loc > > TLocs
Definition: Seq_annot_.hpp:197
const TDelta & GetDelta(void) const
Get the variant data.
Definition: Seq_ext_.cpp:180
const TFtable & GetFtable(void) const
Get the variant data.
Definition: Seq_annot_.hpp:621
list< CRef< CSeq_feat > > TFtable
Definition: Seq_annot_.hpp:193
const Tdata & Get(void) const
Get the member data.
Definition: Delta_ext_.hpp:164
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
list< CRef< CSeq_annot > > TAnnot
Definition: Bioseq_.hpp:97
list< CRef< CDelta_seq > > Tdata
Definition: Delta_ext_.hpp:89
bool IsUser(void) const
Check if variant User is selected.
Definition: Seqdesc_.hpp:1122
bool CanGetAssembly(void) const
Check if it is safe to call GetAssembly method.
Definition: Seq_hist_.hpp:506
@ eRepr_virtual
no seq data
Definition: Seq_inst_.hpp:93
Main class to perform a BLAST search on the local machine.
Magic spell ;-) needed for some weird compilers... very empiric.
#define NCBI_CONST_INT8(v)
64-bit integers
Definition: ncbi_std.h:195
Defines NCBI C++ exception handling.
T max(T x_, T y_)
bool IsCommon(const COrg_ref &org, const string &val)
Definition: utilities.cpp:2129
SAnnotSelector –.
#define const
Definition: zconf.h:232
Modified on Tue Apr 23 07:40:25 2024 by modify_doxy.py rev. 669887