NCBI C++ ToolKit
merge_aligner.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: merge_aligner.cpp 95346 2021-11-08 14:35:53Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Nathan Bouk
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbiexpt.hpp>
34 #include <corelib/ncbi_system.hpp>
35 #include <math.h>
36 
38 
41 #include <objmgr/scope.hpp>
51 #include <objmgr/seq_vector.hpp>
52 
55 
56 
59 
62 
63 
64 TAlignResultsRef CMergeAligner::GenerateAlignments(objects::CScope& Scope,
65  ISequenceSet* QuerySet,
66  ISequenceSet* SubjectSet,
67  TAlignResultsRef AccumResults)
68 {
69  TAlignResultsRef NewResults(new CAlignResultsSet);
70  m_TreeAlignMerger.SetScope(&Scope);
71 
73  QueryIter, AccumResults->Get()) {
74 
75  int BestRank = QueryIter->second->GetBestRank();
76  if(BestRank > m_Threshold || BestRank == -1) {
77  _TRACE("Determined ID: "
78  << QueryIter->second->GetQueryId()->AsFastaString()
79  << " needs Merging.");
80 
81  CRef<CSeq_align_set> Results;
82  Results = x_MergeAlignments(*QueryIter->second, Scope);
83 
84  if(!Results->Get().empty()) {
85  ERR_POST(Info << "Merge created " << Results->Get().size() << " aligns");
86  NewResults->Insert(CRef<CQuerySet>(new CQuerySet(*Results)));
87  }
88  }
89  }
90 
91  return NewResults;
92 }
93 
94 
97 {
99 
101  QueryAligns.Get()) {
103  AssemIter->second) {
104 
105  CRef<CSeq_align_set> Set = SubjectIter->second;
106 
107 #if 1
108  int options[2] = { fCompart_AllowIntersections,
110  for(int i = 0; i < 1; i++) {
111  list< CRef<CSeq_align_set> > compartments;
112  FindCompartments(Set->Get(), compartments,
113  options[i]);
114 
115  ITERATE (list< CRef<CSeq_align_set> >, cit, compartments) {
116  CRef<CSeq_align_set> sas = *cit;
117  x_SortAlignSet(*sas, options[i]);
119  if( out && !out->Set().empty() ) {
120  ITERATE(CSeq_align_set::Tdata, AlignIter, out->Set()) {
121  Merged->Set().push_back(*AlignIter);
122  }
123  }
124  }
125  }
126 #endif
127 
128 #if 0
130  Minuses(new CSeq_align_set);
131 
132  ITERATE(CSeq_align_set::Tdata, AlignIter, Set->Get()) {
133  if( (*AlignIter)->GetSeqStrand(0) == eNa_strand_plus)
134  Pluses->Set().push_back(*AlignIter);
135  else if( (*AlignIter)->GetSeqStrand(0) == eNa_strand_minus)
136  Minuses->Set().push_back(*AlignIter);
137  }
138 
139  CRef<CSeq_align_set> PlusOut, MinusOut;
140 
141  if(!Pluses->Set().empty()) {
142  x_SortAlignSet(*Pluses);
143  PlusOut = x_MergeSeqAlignSet(*Pluses, Scope);
144  }
145  if(!Minuses->Set().empty()) {
146  x_SortAlignSet(*Minuses);
147  MinusOut = x_MergeSeqAlignSet(*Minuses, Scope);
148  }
149 
150  if(!PlusOut.IsNull())
151  ITERATE(CSeq_align_set::Tdata, AlignIter, PlusOut->Set()) {
152  Merged->Set().push_back(*AlignIter);
153  }
154  if(!MinusOut.IsNull())
155  ITERATE(CSeq_align_set::Tdata, AlignIter, MinusOut->Set()) {
156  Merged->Set().push_back(*AlignIter);
157  }
158 #endif
159  }
160  }
161 
162  return Merged;
163 }
164 
165 
167 CMergeAligner::x_MergeSeqAlignSet(CSeq_align_set& InAligns, objects::CScope& Scope)
168 {
169  list<CRef<CSeq_align> > In;
170  ITERATE(CSeq_align_set::Tdata, AlignIter, InAligns.Get()) {
171  CRef<CSeq_align> Align(*AlignIter);
172  In.push_back(Align);
173  }
174 
176 
177  try {
178  switch(m_Mode) {
179  case eAlignCleanup:
180  {{
181  CAlignCleanup Cleaner(Scope);
182  Cleaner.FillUnaligned(true);
183  Cleaner.Cleanup(In, Out->Set());
184  break;
185  }}
186  case eTreeAlignMerger:
187  {{
188  m_TreeAlignMerger.Merge(In, Out->Set());
189  break;
190  }}
191  }
192  } catch(CException& e) {
193  ERR_POST(Error << "Merge Error: " << e.ReportAll());
194  throw e;
195  }
196 
197  NON_CONST_ITERATE(CSeq_align_set::Tdata, AlignIter, Out->Set()) {
198  CRef<CSeq_align> Align(*AlignIter);
199  CDense_seg& Denseg = Align->SetSegs().SetDenseg();
200 
201  if(!Denseg.CanGetStrands() || Denseg.GetStrands().empty()) {
202  Denseg.SetStrands().resize(Denseg.GetDim()*Denseg.GetNumseg(), eNa_strand_plus);
203  }
204 
205  if(Denseg.GetSeqStrand(1) != eNa_strand_plus) {
206  Denseg.Reverse();
207  }
208 
209  CRef<CDense_seg> Filled = Denseg.FillUnaligned();
210  Denseg.Assign(*Filled);
211 
212  Align->SetNamedScore(GetName(), 1);
213  }
214 
215  if(Out->Set().empty())
216  return CRef<CSeq_align_set>();
217  return Out;
218 }
219 
220 
221 
223  const CRef<objects::CSeq_align>& B)
224 {
225  CScoreBuilder Scorer;
226  TSeqPos Lengths[2];
227  Lengths[0] = Scorer.GetAlignLength(*A);
228  Lengths[1] = Scorer.GetAlignLength(*B);
229  return (Lengths[0] > Lengths[1]);
230 }
231 
232 
234  const CRef<objects::CSeq_align>& B)
235 {
236  int Scores[2] = {0, 0};
237  A->GetNamedScore(CSeq_align::eScore_Score, Scores[0]);
238  B->GetNamedScore(CSeq_align::eScore_Score, Scores[1]);
239  return (Scores[0] > Scores[1]);
240 }
241 
242 
243 void CMergeAligner::x_SortAlignSet(CSeq_align_set& AlignSet, int CompartFlags)
244 {
245  vector<CRef<CSeq_align> > TempVec;
246  TempVec.reserve(AlignSet.Set().size());
247  copy(AlignSet.Set().begin(), AlignSet.Set().end(),
248  insert_iterator<vector<CRef<CSeq_align> > >(TempVec, TempVec.end()));
249 
250  if(CompartFlags & fCompart_SortByScore)
251  sort(TempVec.begin(), TempVec.end(), s_SortByScore);
252  else
253  sort(TempVec.begin(), TempVec.end(), s_SortByAlignedLength);
254 
255  AlignSet.Set().clear();
256  copy(TempVec.begin(), TempVec.end(),
257  insert_iterator<CSeq_align_set::Tdata>(AlignSet.Set(), AlignSet.Set().end()));
258 }
259 
260 
262 //end
Declares the CBl2Seq (BLAST 2 Sequences) class.
Declares the CBlastNucleotideOptionsHandle class.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
Definitions of special type used in BLAST.
Main argument class for BLASTN application.
class CAlignCleanup implements an alignment cleanup utility based on the C++ alignment manager.
void FillUnaligned(bool b)
Fill any unaligned regions with explicit gaps.
void Cleanup(const TAligns &aligns_in, TAligns &aligns_out, EMode mode=eDefault)
ENa_strand GetSeqStrand(TDim row) const
Definition: Dense_seg.cpp:241
void Reverse(void)
Reverse the segments' orientation.
Definition: Dense_seg.cpp:644
CRef< CDense_seg > FillUnaligned() const
Create a new dense-seg with added all unaligned pieces (implicit inserts), if any,...
Definition: Dense_seg.cpp:1108
void Assign(const CSerialObject &obj, ESerialRecursionMode how=eRecursive)
overloaded Assign()
Definition: Dense_seg.cpp:62
@ eAlignCleanup
Use the older (CAlignCleanup) merge algorithm.
@ eTreeAlignMerger
Use the new (CTreeAlignMerger) merge algorithm.
string GetName() const
void x_SortAlignSet(objects::CSeq_align_set &AlignSet, int CompartFlags=0)
CRef< objects::CSeq_align_set > x_MergeAlignments(CQuerySet &QueryAligns, objects::CScope &Scope)
CTreeAlignMerger m_TreeAlignMerger
CRef< objects::CSeq_align_set > x_MergeSeqAlignSet(objects::CSeq_align_set &InAligns, objects::CScope &Scope)
TAssemblyToSubjectSet & Get()
Definition: result_set.hpp:82
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
TSeqPos GetAlignLength(const CSeq_align &align, bool ungapped=false)
Compute the length of the alignment (= length of all segments, gaps + aligned)
void SetNamedScore(const string &id, int score)
Definition: Seq_align.cpp:636
void Merge(const list< CRef< objects::CSeq_align > > &Input, list< CRef< objects::CSeq_align > > &Output)
Definition: merge_tree.cpp:300
Declares the CDiscNucleotideOptionsHandle class.
#define A(i)
Definition: ecp_curves.c:936
std::ofstream out("events_result.xml")
main entry point for tests
void FindCompartments(const list< CRef< CSeq_align > > &aligns, list< CRef< CSeq_align_set > > &align_sets, TCompartOptions options=fCompart_Defaults, float diff_len_filter=3.0f)
@ fCompart_SortByScore
@ fCompart_AllowIntersections
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define _TRACE(message)
Definition: ncbidbg.hpp:122
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
Definition: ncbiexpt.cpp:370
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
Tdata & Set(void)
Assign a value to data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
TStrands & SetStrands(void)
Assign a value to Strands data member.
Definition: Dense_seg_.hpp:586
bool CanGetStrands(void) const
Check if it is safe to call GetStrands method.
Definition: Dense_seg_.hpp:574
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
list< CRef< CSeq_align > > Tdata
const TStrands & GetStrands(void) const
Get the Strands member data.
Definition: Dense_seg_.hpp:580
const Tdata & Get(void) const
Get the member data.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
int i
Main class to perform a BLAST search on the local machine.
USING_SCOPE(objects)
static bool s_SortByScore(const CRef< objects::CSeq_align > &A, const CRef< objects::CSeq_align > &B)
static bool s_SortByAlignedLength(const CRef< objects::CSeq_align > &A, const CRef< objects::CSeq_align > &B)
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
Defines NCBI C++ exception handling.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
void Out(T t, int w, CNcbiOstream &to=cout)
Definition: parse.cpp:467
Modified on Wed Apr 17 13:10:07 2024 by modify_doxy.py rev. 669887