NCBI C++ ToolKit
seqalign_set_convert.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seqalign_set_convert.cpp 83266 2018-08-07 18:29:39Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file seqalign_set_convert.cpp
31  * Converts a Seq-align-set into a neutral seqalign for use with the
32  * CSeqAlignCmp class
33  */
34 
35 #include <ncbi_pch.hpp>
36 #include "seqalign_set_convert.hpp"
37 
38 // Object includes
45 
48 BEGIN_SCOPE(blast)
49 BEGIN_SCOPE(qa)
50 
51 template <class T>
52 void s_PrintTextAsnObject(const string fname, const T& obj)
53 {
54 #if defined(VERBOSE_DEBUG)
55  ofstream out(fname.c_str());
56  if (!out) {
57  throw runtime_error("Failed to open" + fname);
58  }
59  out << MSerial_AsnText << obj;
60 #endif
61 }
62 
63 void SetScores(const CSeq_align::TScore& scores, SeqAlign& retval)
64 {
65  ITERATE(CSeq_align::TScore, s, scores) {
66  if ( !(*s)->CanGetId() ) {
67  continue;
68  }
69 
70  _ASSERT((*s)->GetId().IsStr());
71  const string score_type = (*s)->GetId().GetStr();
72  if (score_type == "score") {
73  retval.score = (*s)->GetValue().GetInt();
74  } else if (score_type == "e_value") {
75  retval.evalue = (*s)->GetValue().GetReal();
76  } else if (score_type == "bit_score") {
77  retval.bit_score = (*s)->GetValue().GetReal();
78  } else if (score_type == "num_ident") {
79  retval.num_ident = (*s)->GetValue().GetInt();
80  }
81  }
82 }
83 
84 void DensegConvert(const objects::CDense_seg& denseg, SeqAlign& retval)
85 {
86  string fname("densegconvert.asn");
87  s_PrintTextAsnObject(fname, denseg);
88 
89  if (denseg.CanGetDim() && denseg.GetDim() != SeqAlign::kNumDimensions) {
90  throw runtime_error("Invalid number of dimensions");
91  }
92 
93  retval.query_strand = denseg.GetSeqStrand(0);
94  retval.subject_strand = denseg.GetSeqStrand(1);
95 
96  copy(denseg.GetStarts().begin(),
97  denseg.GetStarts().end(),
98  back_inserter(retval.starts));
99 
100  copy(denseg.GetLens().begin(),
101  denseg.GetLens().end(),
102  back_inserter(retval.lengths));
103  _ASSERT(retval.lengths.size() == (size_t)denseg.GetNumseg());
104 
105  _ASSERT(denseg.CanGetIds());
106  const CDense_seg::TIds& ids = denseg.GetIds();
107  _ASSERT(ids.size() == (size_t)denseg.GetDim());
108 
109  CRef<CSeq_id> query_id = ids.front();
110  if (query_id->IsGi()) {
111  retval.sequence_gis.SetQuery(query_id->GetGi());
112  }
113 
114  CRef<CSeq_id> subj_id = ids.back();
115  if (subj_id->IsGi()) {
116  retval.sequence_gis.SetSubject(subj_id->GetGi());
117  }
118 
119 }
120 
121 void SeqAlignConvert(const objects::CSeq_align& sa, SeqAlign& retval)
122 {
123  string fname("seqalignconvert.asn");
124  s_PrintTextAsnObject(fname, sa);
125 
126  if (sa.GetType() != CSeq_align::eType_partial) {
127  throw runtime_error("Seq-align is not of partial type");
128  }
129  if (sa.CanGetDim() && sa.GetDim() != SeqAlign::kNumDimensions) {
130  throw runtime_error("Invalid number of dimensions");
131  }
132 
133  SetScores(sa.GetScore(), retval);
134 
135  const CSeq_align::C_Segs& segs = sa.GetSegs();
136 
137  switch (segs.Which()) {
139  _ASSERT(segs.IsDenseg());
140  DensegConvert(segs.GetDenseg(), retval);
141  break;
142 
144  _ASSERT(segs.IsStd());
145  //StdsegConvert(segs.GetStd(), retval);
146  throw runtime_error("Std-seg support is not implemented");
147  break;
148 
149  default:
150  throw runtime_error("Unsupported alignment data type");
151  }
152 }
153 
154 void BlastSeqAlignSetConvert(const objects::CSeq_align& sa,
155  std::vector<SeqAlign>& retval)
156 {
157  string fname("blastseqalignconvert.asn");
158  s_PrintTextAsnObject(fname, sa);
159 
160  SeqAlign neutral_seqalign;
161  SeqAlignConvert(sa, neutral_seqalign);
162  retval.push_back(neutral_seqalign);
163 }
164 
165 void SeqAlignSetConvert(const objects::CSeq_align_set& ss,
166  std::vector<SeqAlign>& retval)
167 {
168  if ( !ss.CanGet() ) {
169  throw runtime_error("Empty Seq-align-set");
170  }
171 
172  retval.clear();
173  ITERATE(CSeq_align_set::Tdata, seqalign_set, ss.Get()) {
174  BlastSeqAlignSetConvert(**seqalign_set, retval);
175  }
176 }
177 
178 END_SCOPE(qa)
179 END_SCOPE(blast)
User-defined methods of the data storage class.
void SetQuery(TGi gi)
void SetSubject(TGi gi)
#define T(s)
Definition: common.h:230
std::ofstream out("events_result.xml")
main entry point for tests
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
vector< CRef< CScore > > TScore
Definition: Seq_align_.hpp:398
vector< CRef< CSeq_id > > TIds
Definition: Dense_seg_.hpp:106
bool IsStd(void) const
Check if variant Std is selected.
Definition: Seq_align_.hpp:746
list< CRef< CSeq_align > > Tdata
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
@ eType_partial
mapping pieces together
Definition: Seq_align_.hpp:103
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
Magic spell ;-) needed for some weird compilers... very empiric.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
USING_SCOPE(objects)
void DensegConvert(const objects::CDense_seg &denseg, SeqAlign &retval)
void s_PrintTextAsnObject(const string fname, const T &obj)
void SetScores(const CSeq_align::TScore &scores, SeqAlign &retval)
void SeqAlignConvert(const objects::CSeq_align &sa, SeqAlign &retval)
void BlastSeqAlignSetConvert(const objects::CSeq_align &sa, std::vector< SeqAlign > &retval)
void SeqAlignSetConvert(const objects::CSeq_align_set &ss, std::vector< SeqAlign > &retval)
Converts a Seq-align-set into a neutral seqalign for use with the CSeqAlignCmp class.
Neutral sequence alignment (for representing an HSP in BLAST)
std::vector< TSeqPos > lengths
Lengths of aligned segments.
double bit_score
HSP bit score.
int num_ident
Number of identical residues.
double evalue
HSP evalue.
CAlignedGis sequence_gis
Gis of the aligned sequences.
std::vector< int > starts
Query/Subject starting offsets.
int score
HSP score.
int query_strand
Strand of the query sequence.
int subject_strand
Strand of the subject sequence.
#define _ASSERT
#define const
Definition: zconf.h:230
Modified on Thu Dec 07 10:08:48 2023 by modify_doxy.py rev. 669887