NCBI C++ ToolKit
match.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Azat Badretdin
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 #include <ncbi_pch.hpp>
33 #include "read_blast_result.hpp"
34 
35 
36 
38  (
39  const CSeq_feat& f1,
40  const string& type1
41  )
42 {
43  bool is_rrna = type1 == "16S" || type1 == "23S" || type1 == "5S";
44  string diag_name_rna = is_rrna ? "rRNA" : "tRNA";
45  string diag_name = diagName(diag_name_rna, type1);
46  if(PrintDetails()) NcbiCerr << "match_na[f1," << type1 << ",] starts: "
47  << diag_name << NcbiEndl;
48  bool absent = true;
49  int gleft=0, gright=0;
50  int goverlap=0;
51  int gabs_left=0;
52  bool match_ext=false;
53  int input_left, input_right;
54  {
55  TSeqPos from, to;
56  ENa_strand strand;
57  getFromTo( f1.GetLocation(), from, to, strand);
58  input_left = from;
59  input_right = to;
60  }
61  CNcbiStrstream input_range_stream;
62  input_range_stream << (input_left + 1) << "..." << (input_right + 1) << '\0';
63  string input_range = input_range_stream.str();
65  {
66  string type2 = is_rrna ? ext_rna->type : ext_rna->type3;
67  if(PrintDetails()) {
68  NcbiCerr << "match_na[f1,"
69  << type1 << ","
70  << input_range
71  << "] with "
72  << type2 << NcbiEndl;
73  NcbiCerr << "ext_rna: \n"
74  <<"\t key="<<ext_rna->key<<"\n"
75  <<"\t locus_tag"<<ext_rna->locus_tag<<"\n"
76  <<"\t name"<<ext_rna->name<<"\n"
77  <<"\t description"<<ext_rna->description<<"\n"
78  <<"\t type"<<ext_rna->type<<"\n"
79  <<"\t type3"<<ext_rna->type3<<"\n"
80  <<"\t exons[0].from" << ext_rna->exons[0].from<<"\n"
81  <<"\t exons[0].to" << ext_rna->exons[0].to<<"\n"
82  <<"\t exons[0].strand" << ext_rna->exons[0].strand<<"\n"
83  ;
84  }
85  if(type1 != type2) {
86  if(PrintDetails()) {
87  cerr << " type mismatch, skipping\n";
88  }
89  continue;
90  }
91  absent = true;
92  int left, right;
93  bool strand_match;
94  int abs_left;
95  int overlap =match_na(f1, *ext_rna, left, right, strand_match, abs_left);
96 // 1.
97  if(overlap==0) {
98  if(PrintDetails()) {
99  cerr << " overlap 0, skipping\n";
100  }
101  continue;
102  }
103  absent = false;
104 // 2.
105  if(!strand_match) {
106  if(PrintDetails()) {
107  cerr << " strand mismatch, skipping\n";
108  }
109  continue;
110  }
111 // 3.
112  if(!match_ext || abs(left)+abs(right)<abs(gleft)+abs(gright) )
113  {
114  match_ext = true;
115  gleft = left;
116  gright = right;
117  goverlap = overlap;
118  gabs_left = abs_left;
119  }
120  } // NON_CONST_ITERATE(TSimpleSeqs, ext_rna, m_extRNAtable2)
121  if(absent)
122  {
124  if ( ! is_rrna ) {
125  buffer << "no external tRNA for this aminoacid: "
126  <<type1 << "[" << input_range << "]" << NcbiEndl;
127  } else {
128  buffer << "no external rRNA for this rRNA type: "
129  <<type1 << "[" << input_range << "]" << NcbiEndl;
130  }
131  buffer << "start bp: " << gabs_left << NcbiEndl;
132  buffer << '\0';
133  if(PrintDetails()) NcbiCerr << "match_na[f1,type1]: " << buffer.str() << NcbiEndl;
134  CNcbiStrstream misc_feat;
135  misc_feat << buffer.str() << '\0';
136  problemStr problem = {eTRNAMissing,
137  buffer.str(), misc_feat.str(), "", "", -1, -1, eNa_strand_unknown };
138  m_diag[diag_name].problems.push_back(problem);
139  return absent;
140  }
141  if(!match_ext && !absent)
142  {
144  if ( ! is_rrna) {
145  buffer << "tRNA does not match strand this aminoacid: "
146  <<type1 << "[" << input_range << "]" << NcbiEndl;
147  } else {
148  buffer << "rRNA does not match strand this rRNA type: "
149  <<type1 << "[" << input_range << "]" << NcbiEndl;
150  }
151  buffer << "start bp: " << gabs_left << NcbiEndl;
152  buffer << '\0';
153  if(PrintDetails()) NcbiCerr << "match_na[f1,type1]: " << buffer.str() << NcbiEndl;
154  CNcbiStrstream misc_feat;
155  misc_feat << buffer.str() << '\0';
156  problemStr problem = {eTRNABadStrand,
157  buffer.str(), misc_feat.str(), "", "", -1, -1, eNa_strand_unknown };
158  m_diag[diag_name].problems.push_back(problem);
159  return absent;
160  }
161  if(gright || gleft)
162  {
163  if(!goverlap)
164  {
166  buffer << "closest " << diag_name_rna
167  << "for (" <<type1 << "[" << input_range << "]"
168  << ") does not even overlap" << NcbiEndl;
169  buffer << "start bp: " << gabs_left << NcbiEndl;
170  buffer << '\0';
171  CNcbiStrstream misc_feat;
172  misc_feat << buffer.str() << '\0';
173  if(PrintDetails()) NcbiCerr << "match_na[f1,type1]: " << buffer.str()
174  << NcbiEndl;
175  problemStr problem = {eTRNAComMismatch, buffer.str(),
176  misc_feat.str(), "", "", -1, -1, eNa_strand_unknown };
177  m_diag[diag_name].problems.push_back(problem);
178  }
179  else
180  {
182  if ( ! is_rrna ) {
183  buffer << "closest tRNA for this aminoacid: "
184  <<type1 << "[" << input_range << "]"
185  << " have mismatched ends:" << NcbiEndl;
186  } else {
187  buffer << "closest rRNA for this rRNA type: "
188  <<type1 << "[" << input_range << "]"
189  << " have mismatched ends:" << NcbiEndl;
190  }
191  buffer << "start bp: " << gabs_left << NcbiEndl;
192  buffer << "left: " << gleft << ", right: " << gright << " bp shifted relative to the calculated ends" << NcbiEndl;
193  buffer << "overlap: " << goverlap << NcbiEndl;
194  CNcbiStrstream misc_feat;
195  misc_feat << buffer.str() << '\0';
196  if(PrintDetails()) NcbiCerr << "match_na[f1,type1]: " << buffer.str() << NcbiEndl;
197  problemStr problem = {eTRNAMismatch, buffer.str(), misc_feat.str(), "", "", -1, -1, eNa_strand_unknown };
198  m_diag[diag_name].problems.push_back(problem);
199  }
200  if(PrintDetails()) NcbiCerr << "match_na[f1," << type1 << "[" << input_range << "]" << "] ends with mismatch" << NcbiEndl;
201  return absent;
202  }
203 
204  if(PrintDetails()) NcbiCerr << "match_na[f1," << type1 << "[" << input_range << "]" << "] ends" << NcbiEndl;
205  return absent;
206 }
207 
209  (
210  const CSeq_feat& f1,
211  const TSimpleSeq& ext_rna,
212  int& left,
213  int& right,
214  bool& strand_match,
215  int& abs_left
216  )
217 {
218  int result = 0;
219  if(PrintDetails()) NcbiCerr << "match_na[f1,ext_rna,...] starts" << NcbiEndl;
220 
221  ENa_strand input_strand;
222  TSeqPos from, to;
223  getFromTo( f1.GetLocation(), from, to, input_strand);
224 
225  ENa_strand calc_strand = ext_rna.exons[0].strand;
226 
227  int input_left, input_right, calc_left, calc_right;
228  input_left = from;
229  input_right = to;
230  abs_left = input_left;
231 
232  calc_left = ext_rna.exons[0].from;
233  calc_right= ext_rna.exons[0].to;
234 
235 // output
236  left = input_left - calc_left;
237  right = input_right - calc_right;
238 
239  int max_left = max(input_left, calc_left);
240  int min_right = min(input_right, calc_right);
241 // output
242  result = min_right>max_left ? min_right-max_left : 0;
243  overlaps(f1.GetLocation(), calc_left, calc_right, result);
244  if(PrintDetails()) NcbiCerr << "match_na[f1,ext_rna,...] result: "
245  << input_left << ","
246  << input_right << ","
247  << calc_left << ","
248  << calc_right << ","
249  << left << ","
250  << right << ","
251  << result << ","
252  << NcbiEndl;
253 
254 // output
255  if(result>0)
256  {
257  strand_match = input_strand == calc_strand;
258  if(PrintDetails()) NcbiCerr << "match_na[f1,ext_rna,...] strands: "
259  << int(input_strand) << ","
260  << int(calc_strand) << ","
261  << strand_match << ","
262  << NcbiEndl;
263  }
264  else
265  {
266  strand_match = true;
267  }
268 
269  if(!strand_match)
270  {
271  if(PrintDetails()) NcbiCerr << "match_na[f1,ext_rna,...] no strand match" << NcbiEndl;
272  }
273 
274  if(PrintDetails()) NcbiCerr << "match_na[f1,ext_rna,...] ends" << NcbiEndl;
275  return result;
276 }
277 
static bool PrintDetails(int current_verbosity=m_current_verbosity)
int overlaps(const TSimpleSeqs::iterator &seq1, const TSimpleSeqs::iterator &seq2, int &overlap)
Definition: overlaps.cpp:146
TSimpleSeqs m_extRNAtable2
static void getFromTo(const CSeq_loc &loc, TSeqPos &from, TSeqPos &to, ENa_strand &strand)
Definition: locations.cpp:34
bool match_na(const CSeq_feat &f1, const string &type1)
Definition: match.cpp:38
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NcbiEndl
Definition: ncbistre.hpp:548
#define NcbiCerr
Definition: ncbistre.hpp:544
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
#define abs(a)
Definition: ncbi_heapmgr.c:130
T max(T x_, T y_)
T min(T x_, T y_)
static pcre_uint8 * buffer
Definition: pcretest.c:1051
string diagName(const string &type, const string &value)
Definition: problems.cpp:839
@ eTRNAComMismatch
@ eTRNABadStrand
@ eTRNAMismatch
@ eTRNAMissing
list< TSimpleSeq > TSimpleSeqs
TSimplePairs exons
else result
Definition: token2.c:20
Modified on Fri Apr 12 17:20:46 2024 by modify_doxy.py rev. 669887