NCBI C++ ToolKit
read_rrna.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: read_rrna.cpp 91324 2020-10-09 14:48:11Z gouriano $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Azat Badretdin
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 #include <ncbi_pch.hpp>
33 #include "read_blast_result.hpp"
34 
35 int CReadBlastApp::ReadRRNA2(const string& file)
36 {
37  if(PrintDetails()) NcbiCerr << "ReadRRNA2(" << file << "): start" << NcbiEndl;
38  int n=0;
39  ifstream is(file.c_str());
40  if(!is.good())
41  {
42  NcbiCerr << "CReadBlastApp::ReadRRNA2(" << file << "): ERROR: cannot open " << NcbiEndl;
43  }
44  map <string, TSimpleSeq> tmp_rrna; // with hash
45  while(is.good())
46  {
47  char line[0x1000];
48  is.getline(line, 0xFFF);
49  if(PrintDetails()) NcbiCerr << "ReadRRNA2(" << file << "): line: " << line << NcbiEndl;
50  if(!is.good()) break;
51 // <TAB>223771 225324 7EB2BCB7 + 16S ideal
52  char *token = strtok(line, " \t");
53  int icol=1;
54  int from=0, to=0;
55  string type3 = "";
56  string codon="";
57  string genome_name="";
58  map <string, int> last_for_type;
59  string hash;
60  string strand1;
61  string method;
62  // int length=0;
63  // double probability=0.0;
64  while(token != 0)
65  {
66  if(PrintDetails()) NcbiCerr << "ReadRRNA2(" << file << "): token[" << icol << "]: " << token << NcbiEndl;
67  switch(icol)
68  {
69  case 1: from = atoi(token); break;
70  case 2: to = atoi(token); break;
71  case 3: hash = token; break;
72  case 4: strand1 = token; break;
73  case 5: type3 = token; break;
74  case 6: method = token; break;
75  // case 7: length = atoi(token); break;
76  // case 8: probability = atof(token); break;
77  default: break;
78  }
79  token = strtok(0, " \t");
80  icol++;
81  }
82  // if(method != "ideal" && type3 == "5S") continue;
83  bool new_rrna = tmp_rrna.find(hash) == tmp_rrna.end();
84  ENa_strand strand = strand1 == "-" ? eNa_strand_minus : eNa_strand_plus;
85 // last for type
86  // int ilast_for_type = 1;
87  // if(last_for_type.find(type3) != last_for_type.end()) ilast_for_type = last_for_type[type3];
88  last_for_type[type3]++;
89 //
90  CNcbiStrstream descstr;
91  descstr << type3 << " ribosomal RNA predicted by NCBI method with "
92  << (method=="ideal" ? "high":"low" )
93  << " score";
94 /*
95  if(probability > 0)
96  descstr << " with probability " << probability;
97 */
98  descstr << '\0';
99  TSimpleSeq new_ext_rna;
100  TSimpleSeq& ext_rna = new_rrna ? new_ext_rna : tmp_rrna[hash];
101 // lot of tRNAs seems to be having left position just one bp off, assuming that it is a difference in naming
102  TSimplePair pair;
103  pair.from = from;
104  pair.to = to ;
105  pair.strand = strand;
106  ext_rna.exons.push_back(pair);
107  ext_rna.type = type3;
108  ext_rna.key = ext_rna.exons[0].from;
109  ext_rna.name = hash;
110  ext_rna.description = descstr.str();
111  if(new_rrna) tmp_rrna[hash] = new_ext_rna;
112  }
113  for(map<string,TSimpleSeq>::const_iterator seq = tmp_rrna.begin(); seq!=tmp_rrna.end(); seq++)
114  {
115  m_extRNAtable2.push_back(seq->second);
116  string ext_rna_range = printed_range(seq->second);
117  if(PrintDetails()) NcbiCerr << "ReadRRNA2(" << file << "): adding "
118  << ext_rna_range << NcbiEndl;
119  n++;
120  }
121 
122  if(PrintDetails()) NcbiCerr << "ReadRRNA2(" << file << "): end" << NcbiEndl;
123  return n;
124 }
125 
126 
static bool PrintDetails(int current_verbosity=m_current_verbosity)
int ReadRRNA2(const string &file)
Definition: read_rrna.cpp:35
TSimpleSeqs m_extRNAtable2
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
#define NcbiEndl
Definition: ncbistre.hpp:548
#define NcbiCerr
Definition: ncbistre.hpp:544
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
FILE * file
yy_size_t n
string printed_range(const TSeqPos from2, const TSeqPos to2)
Definition: shortcuts.cpp:320
TSimplePairs exons
Definition: _hash_fun.h:40
Modified on Wed Dec 06 07:15:28 2023 by modify_doxy.py rev. 669887