NCBI C++ ToolKit
cuConsensusMaker.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuConsensusMaker.cpp 38459 2008-07-07 18:34:41Z lanczyck $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  */
24 
25 #include <ncbi_pch.hpp>
29 
31 BEGIN_SCOPE(cd_utils)
32 /*
33 ConsensusMaker::ConsensusMaker(CRef<CSeq_align_set> seqAlign, CCdCore* cd) :
34  m_cd(cd), m_seqAligns(&(seqAlign->Set())), m_consensus(), m_rp()
35 {
36  addRows();
37  makeConsensus();
38 }*/
39 
41  m_consensus(), m_cd(cd), m_seqAligns(cd->GetSeqAligns()), m_made(false), m_inclusionRule(incl), m_rp()
42 {
43  addRows();
44  CRef< CSeq_id > seqId;
45  cd->GetSeqIDFromAlignment(0, seqId);
46  if (!IsConsensus(seqId))
47  {
48  makeConsensus();
49  m_made =true;
50  }
51 }
52 
54 {
55 }
56 
58 {
59  list<CRef< CSeq_align > >::iterator lit = m_seqAligns.begin();
60  if (lit == m_seqAligns.end())
61  return;
62  //build Residue profiles indexed by master
63  string mSeq, sSeq;
64  int seqinx = 0;
65  // add slaves
66  for(; lit != m_seqAligns.end(); lit++)
67  {
68  BlockModelPair bmPair(*(lit));
69  if (lit == m_seqAligns.begin())
70  {
71  m_masterSeqId = bmPair.getMaster().getSeqId();
73  {
74  vector<int> seqIndice;
75  m_cd->FindConsensusInSequenceList(&seqIndice);
76  if (seqIndice.size() > 0)
77  mSeq = m_cd->GetSequenceStringByIndex(seqIndice[0]);
78  }
79  else
80  {
81  seqinx = m_cd->GetSeqIndex(m_masterSeqId);
82  mSeq = m_cd->GetSequenceStringByIndex(seqinx);
83  }
84  m_conSeqId = new CSeq_id(CSeq_id::e_Local, "consensus", "");
85  }
86  sSeq.erase();
87  seqinx = m_cd->GetSeqIndex(bmPair.getSlave().getSeqId());
88  sSeq = m_cd->GetSequenceStringByIndex(seqinx);
89  m_rp.addOneRow(bmPair, mSeq, sSeq);
90  }
91 }
93 {
97  BlockModelPair& guideAlignment = m_rp.getGuideAlignment();
98  guideAlignment.getMaster().setSeqId(m_masterSeqId);
99  guideAlignment.getSlave().setSeqId(m_conSeqId);
100 }
101 
103 {
104  return m_consensus;
105 }
106 
108 {
110  CBioseq& bioseq = result->SetSeq();
111  list< CRef< CSeq_id > >& idList = bioseq.SetId();
112  idList.push_back(m_conSeqId);
113  CSeq_inst& seqInst = bioseq.SetInst();
114  seqInst.SetRepr(CSeq_inst::eRepr_raw);
115  seqInst.SetMol(CSeq_inst::eMol_aa);
116  seqInst.SetLength(m_consensus.size());
117  CSeq_data& seqData = seqInst.SetSeq_data();
118  seqData.SetNcbieaa(*(new CSeq_data::TNcbieaa(m_consensus)));
119  //*(new CSeq_data(m_consensus, CSeq_data::e_Ncbieaa)));
120  return result;
121 }
122 
124 {
125  return m_rp.getGuideAlignment();
126 }
127 
129 {
130  return m_rp.getGuideAlignment();
131 }
132 
134 {
135  const BlockModelPair& guideAlignment = m_rp.getGuideAlignment();
136  return guideAlignment.toSeqAlign();
137 }
138 
140 {
141  UnalignedSegReader ucr;
143  m_rp.skipUnalignedSeg(ucr, threshold);
146 }
147 
149 {
151  list<CRef< CSeq_align > >& resultList = result->Set();
152  const BlockModelPair& guideAlignment = m_rp.getGuideAlignment();
153 
154  list<CRef< CSeq_align > >::const_iterator lit = m_seqAligns.begin();
155  //add the consensus to master
156  BlockModelPair m2m(*lit);
157  m2m.getSlave() = m2m.getMaster();
158  m2m.remaster(guideAlignment);
159  resultList.push_back(m2m.toSeqAlign());
160 
161  for (; lit != m_seqAligns.end(); lit++)
162  {
163  BlockModelPair bmp(*lit);
164  bmp.remaster(guideAlignment);
165  resultList.push_back(bmp.toSeqAlign());
166  }
167  return result;
168 }
169 
171 {
173  return;
174  BlockModelPair guideAlignment(m_rp.getGuideAlignment());
175  list< CRef< CSeq_align > >& cdAlignList = (*(m_cd->SetSeqannot().begin()))->SetData().SetAlign();
176  list<CRef< CSeq_align > >* seqAlignLp = &cdAlignList;
177  list<CRef< CSeq_align > > extendedSeqAlignList;
178  if (extended)
179  {
180  degapAlignment(m_cd, extendedSeqAlignList);
181  guideAlignment.degap();
182  seqAlignLp = &extendedSeqAlignList;
183  }
184  list<CRef< CSeq_align > >& seqAlignList = *seqAlignLp;
185  list<CRef< CSeq_align > >::iterator lit = seqAlignList.begin();
186  //add consensus::old_master pair
187  BlockModelPair m2m(*lit);
188  m2m.getSlave() = m2m.getMaster();
189  m2m.remaster(guideAlignment);
190  for (; lit != seqAlignList.end(); lit++)
191  {
192  BlockModelPair bmp(*lit);
193  bmp.remaster(guideAlignment);
194  (*lit) = bmp.toSeqAlign();
195  }
196  seqAlignList.push_front(m2m.toSeqAlign());
197 
198  if (extended)
199  cdAlignList.assign(seqAlignList.begin(), seqAlignList.end());
201 }
202 
204 {
205  CRef< CSeq_align_set > seqAligns (new CSeq_align_set());
206  list< CRef< CSeq_align > >& seqAlignList = seqAligns->Set();
207  degapAlignment(cd, seqAlignList);
208  return seqAligns;
209 }
210 
212 {
213  int num = cd->GetNumRows();
214  //skip i=0, it is the master not a real seqAlign row
215  for (int i = 1; i < num; i++)
216  {
217  BlockModelPair bp(cd->GetSeqAlign(i));
218  bp.degap();
219  seqAlignList.push_back(bp.toSeqAlign());
220  }
221 }
222 
224 {
225  list< CRef< CSeq_align > > seqAlignList;
226  degapAlignment(cd, seqAlignList);
227  list< CRef< CSeq_align > >& cdAlignList = (*(cd->SetSeqannot().begin()))->SetData().SetAlign();
228  cdAlignList.assign(seqAlignList.begin(), seqAlignList.end());
229 }
230 
232 {
233  //CRef<CSeq_align_set> seqAlignSet = degapAlignment(cd);
234  //list< CRef< CSeq_align > >& seqAlignList = seqAlignSet->Set();
235  list< CRef< CSeq_align > > seqAlignList;
236  degapAlignment(cd, seqAlignList);
237  list< CRef< CSeq_align > >::iterator lit = seqAlignList.begin();
238  for (; lit != seqAlignList.end(); lit++)
239  {
240  cd->AddPendingSeqAlign(*lit);
241  }
242 }
243 
244 END_SCOPE(cd_utils)
void degap()
Definition: cuBlock.cpp:945
BlockModel & getMaster()
Definition: cuBlock.cpp:925
BlockModel & getSlave()
Definition: cuBlock.cpp:935
CRef< CSeq_align > toSeqAlign() const
Definition: cuBlock.cpp:983
int remaster(const BlockModelPair &guide)
Definition: cuBlock.cpp:1013
CRef< CSeq_id > getSeqId() const
Definition: cuBlock.hpp:102
void setSeqId(CRef< CSeq_id > seqId)
Definition: cuBlock.hpp:103
int GetNumRows() const
Definition: cuCdCore.cpp:215
bool AddSequence(CRef< CSeq_entry > seqAntry)
Definition: cuCdCore.cpp:1153
bool GetSeqAlign(int Row, CRef< CSeq_align > &seqAlign)
Definition: cuCdCore.cpp:1419
bool AddPendingSeqAlign(CRef< CSeq_align > seqAlign)
Definition: cuCdCore.cpp:1112
bool FindConsensusInSequenceList(vector< int > *indices=NULL) const
Definition: cuCdCore.cpp:1536
string GetSequenceStringByIndex(int SeqIndex)
Definition: cuCdCore.cpp:724
int GetSeqIndex(const CRef< CSeq_id > &SeqID) const
Definition: cuCdCore.cpp:337
bool UsesConsensusSequenceAsMaster() const
Definition: cuCdCore.cpp:1558
CNCBIeaa –.
Definition: NCBIeaa.hpp:66
Definition: Seq_entry.hpp:56
static void degapCdAlignment(CCdCore *cd)
const string & getConsensus()
CRef< CSeq_entry > getConsensusSeqEntry()
ResidueProfiles m_rp
CRef< CSeq_id > m_conSeqId
CRef< CSeq_id > m_masterSeqId
static void degapCdAlignmentToPending(CCdCore *cd)
CRef< CSeq_align_set > remasterWithConsensus() const
list< CRef< CSeq_align > > m_seqAligns
static CRef< CSeq_align_set > degapAlignment(CCdCore *cd)
CRef< CSeq_align > getGuideSeqAlign()
const BlockModelPair & getGuideAlignment() const
void skipUnalignedSeg(int threshold)
void setInclusionThreshold(double th)
const string & makeConsensus()
const string getConsensus(bool inNcbieaa=true)
void countUnalignedConsensus(UnalignedSegReader &ucr)
const BlockModelPair & getGuideAlignment() const
void addOneRow(BlockModelPair &bmp, const string &mSeq, const string &sSeq)
bool skipUnalignedSeg(UnalignedSegReader &ucr, int len)
bool IsConsensus(const CRef< CSeq_id > &seqId)
Definition: cuSequence.cpp:405
thread_local unique_ptr< FtaMsgPost > bmp
Definition: ftaerr.cpp:120
#define false
Definition: bool.h:36
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
TSeqannot & SetSeqannot(void)
Assign a value to Seqannot data member.
Definition: Cdd_.hpp:1228
Tdata & Set(void)
Assign a value to data member.
@ e_Local
local use
Definition: Seq_id_.hpp:95
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
TNcbieaa & SetNcbieaa(void)
Select the variant.
Definition: Seq_data_.hpp:657
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void SetRepr(TRepr value)
Assign a value to Repr data member.
Definition: Seq_inst_.hpp:574
void SetLength(TLength value)
Assign a value to Length data member.
Definition: Seq_inst_.hpp:668
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
Definition: Seq_inst_.cpp:130
void SetMol(TMol value)
Assign a value to Mol data member.
Definition: Seq_inst_.hpp:621
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
int i
else result
Definition: token2.c:20
Modified on Fri Sep 20 14:57:10 2024 by modify_doxy.py rev. 669887