NCBI C++ ToolKit
read_blast_result.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef READ_BLAST_RESULT__HPP
2 #define READ_BLAST_RESULT__HPP
3 
4 /* $Id: read_blast_result.hpp 91324 2020-10-09 14:48:11Z gouriano $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Azat Badretdin
30 *
31 * File Description:
32 * major header
33 *
34 * ===========================================================================
35 */
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <string.h>
39 #include <stdlib.h>
40 
41 
42 #include <corelib/ncbiapp.hpp>
43 #include <corelib/ncbiargs.hpp>
44 #include <corelib/ncbienv.hpp>
45 #include <corelib/ncbifile.hpp>
46 
47 #include <serial/iterator.hpp>
48 #include <serial/objistr.hpp>
49 #include <serial/objostr.hpp>
50 #include <serial/serial.hpp>
51 
52 #include <objmgr/util/sequence.hpp>
54 #include <objmgr/bioseq_handle.hpp>
55 
58 #include <objects/general/Date.hpp>
59 
62 
65 
66 #include <objects/seq/Bioseq.hpp>
67 #include <objects/seq/Seq_inst.hpp>
72 #include <objects/seq/Seqdesc.hpp>
73 
79 
83 
87 
88 #include <string>
89 #include <algorithm>
90 
93 
94 #include "tbl.hpp"
95 
96 typedef struct
97  {
99  string locus_tag;
100  int sort_key;
101  } TGenfo;
102 //typedef map<CSeq_id::EAccessionInfo, CRef<CSeq_loc> > TranStr3;
105 
106 
107 
108 typedef struct { int q_left_left, q_left_middle, q_left_right, space, q_right_left, q_right_middle, q_right_right;
109  int s_left_left, s_left_middle, s_left_right;
110  int s_right_left, s_right_middle, s_right_right;
111  string q_id_left, q_id_right;
112  long s_id;
113  string q_name_left, q_name_right;
114  string s_name;
117  ENa_strand left_strand, right_strand;
118  int left_frame, right_frame;
119  int diff_left, diff_right;
120  int diff_edge_left, diff_edge_right;
128  {
132  eTbl
133  };
134 
136  {
137  eOverlap = (1<<0),
139  ePartial = (1<<2),
140  eFrameShift = (1<<3),
142  eRnaOverlap = (1<<5),
143  eTRNAMissing = (1<<6),
144  eTRNABadStrand = (1<<7),
146  eTRNAMismatch = (1<<9),
147  eTRNAAbsent = (1<<10),
148  eRemoveOverlap = (1<<11),
149  eTRNAUndefStrand = (1<<12),
150  eShortProtein = (1<<13),
155  | eTRNAAbsent
157  | eTRNAMismatch
158  ,
160  | eRnaOverlap
162  | ePartial
163  | eFrameShift
164  | eTRNAProblems
165  ,
166  };
167 
169  {
178  };
179 
180 typedef struct
181  {
183  string message;
185 //
186  string id1;
187  string id2;
188  int i1, i2;
190  } problemStr;
191 
192 typedef struct
193  {
194  list<problemStr> problems;
195  } diagStr; // argument to seq
196 
197 typedef struct
198  {
200  int count, rnacount, genecount;
201  string name;
202  } TProblem_loc;
203 
205 
206 
208 typedef struct
209  {
210  list < long > sbjGIs;
211  int sbjLen;
212  string sbjName;
213  double bitscore;
214  double eval;
215  int nident;
216  int alilen;
217  double pident;
218  int npos;
219  int ppos;
220  string alignment;
221  int sbjstart, sbjend, q_start, q_end;
222  }
223  hitStr;
224 typedef struct
225  {
226  int qLen;
227  string qName;
228  vector < hitStr > hits;
229  }
230  blastStr;
231 
232 typedef struct
233  {
234  string s_name;
235  } perfectHitStr;
236 
238 
240 
241 typedef struct
242  {
243  string type3;
244  int from;
245  int to;
247  } TExtRNA;
248 
249 typedef struct
250  {
251  int from, to;
253  bool fuzzy_to;
255  } TSimplePair;
256 
257 typedef vector<TSimplePair> TSimplePairs;
258 
259 typedef struct
260  {
261  int key;
262  string locus_tag;
263  string name;
264  string description;
265  string type;
266  string type3;
269  }
270 TSimpleSeq;
271 
272 typedef list<TSimpleSeq> TSimpleSeqs;
273 
274 typedef vector<TExtRNA> TExtRNAtable;
275 
277 {
278  virtual void Init(void);
279  virtual int Run(void);
280 public:
281  static string getLocusTag(const CBioseq& seq);
282  static const CSeq_loc& getGenomicLocation(const CBioseq& seq);
283 
284 private:
285 
286 // Main functions
287  int ReadBlast(const char *file, map<string, blastStr>& blastMap );
288  int ReadTRNA2(const string& file);
289  int ReadRRNA2(const string& file);
290  int StoreBlast(map<string, blastStr>& blastMap );
291  // int ReadParents(CNcbiIstream& in, const string& nacc);
292  int ReadParents(CNcbiIstream& in, const list<long>& nacc);
293  bool ReadPreviousAcc(const string& file, list<long>& input_acc);
294  int ProcessCDD(map<string, blastStr>& blastMap);
295  int ReadTagMap(const char *file);
296 
297  int SortSeqs(void);
298  int CollectSimpleSeqs(TSimpleSeqs& seqs);
299  int SortSeqs(CBioseq_set::TSeq_set& seqs);
300  int AnalyzeSeqs(void);
302 
303  int SetParents(CSeq_entry* parent, CBioseq_set::TSeq_set& where);
304  int AnalyzeSeqsViaBioseqs(bool in_pool_prot, bool against_prot);
305  int AnalyzeSeqsViaBioseqs( CBioseq_set::TSeq_set& in_pool_seqs, CBioseq_set::TSeq_set& against_seqs,
306  bool in_pool_prot, bool against_prot);
307  int AnalyzeSeqsViaBioseqs(CBioseq& left, CBioseq_set::TSeq_set& against_seqs,
308  bool against_prot);
309  int AnalyzeSeqsViaBioseqs(CBioseq& left, CBioseq& right);
310  int AnalyzeSeqsViaBioseqs1(CBioseq& left); // unary analysis
311 
312 
313  int simple_overlaps(void);
314  int short_proteins(void);
315 // this is for optimization, do not laugh
316  void ugly_simple_overlaps_call(int& n_user_neighbors, int& n_ext_neighbors,
317  TSimpleSeqs::iterator& ext_rna,
318  TSimpleSeqs::iterator& first_user_in_range, TSimpleSeqs::iterator& first_user_non_in_range,
319  TSimpleSeqs& seqs, int max_distance,
320  TSimpleSeqs::iterator& first_ext_in_range, TSimpleSeqs::iterator& first_ext_non_in_range,
321  string& bufferstr);
322 
323  void addLoctoSimpleSeq(TSimpleSeq& seq, const CSeq_loc& loc);
324 
325 
326 
327 
328  int CollectFrameshiftedSeqs(map<string,string>& problem_names);
329  int CollectRNAFeatures(TProblem_locs& problem_locs);
330  int RemoveProblems(map<string, string>& problem_seqs, LocMap& loc_map);
331  int FixStrands(void);
332  int RemoveProblems(CSeq_entry& entry, map<string, string>& problem_seqs, LocMap& loc_map);
333  int RemoveProblems(CBioseq_set& setseq, map<string, string>& problem_seqs, LocMap& loc_map);
334  int RemoveProblems(CBioseq& seq, map<string, string>& problem_seqs, LocMap& loc_map);
335  int RemoveProblems(CBioseq_set::TSeq_set& seqs, map<string, string>& problem_seqs, LocMap& loc_map);
336  int RemoveProblems(CBioseq::TAnnot& annots, map<string, string>& problem_seqs, LocMap& loc_map);
338 
339 // reshuffles seq entry, when it has only one sequence
340  void NormalizeSeqentry(CSeq_entry& entry);
341 
342 
343  int RemoveInterim(void);
344  int RemoveInterim(CBioseq::TAnnot& annots); // proteins
345  int RemoveInterim2(CBioseq::TAnnot& annots); // nucleotides
346 
347 
348  void processFeature ( CSeq_annot::C_Data::TFtable::iterator& feat, TranStrMap3& tranStrMap );
349  template <typename t> void processAnnot ( CBioseq::TAnnot::iterator& annot, t& tranStrMap);
350  void addLocation(string& prot_id, CBioseq& seq, CRef<CSeq_loc>& loc, const string& locus_tag);
351 
352 
353  int CopyInfoFromGenesToProteins(void);
354  void dump_fasta_for_pretty_blast ( diagMap& diag);
355  void append_misc_feature(CBioseq_set::TSeq_set& seqs, const string& name, EProblem problem_type);
356  const CBioseq& get_nucleotide_seq(const CBioseq& seq);
357 
358 // tools
359  static char *next_w(char *w);
360  static char *skip_space(char *w);
361  static bool is_prot_entry(const CBioseq& seq);
362  static bool has_blast_hits(const CBioseq& seq);
363  static bool skip_to_valid_seq_cand(
364  CBioseq_set::TSeq_set::const_iterator& seq,
365  const CBioseq_set::TSeq_set& seqs);
366  static bool skip_to_valid_seq_cand(
367  CBioseq_set::TSeq_set::iterator& seq,
368  CBioseq_set::TSeq_set& seqs);
369  static int skip_toprot(CTypeIterator<CBioseq>& seq);
370  static int skip_toprot(CTypeConstIterator<CBioseq>& seq);
371  static bool skip_toprot(CBioseq_set::TSeq_set::const_iterator& seq,
372  const CBioseq_set::TSeq_set& seqs);
373  static bool skip_toprot(CBioseq_set::TSeq_set::iterator& seq,
374  CBioseq_set::TSeq_set& seqs);
375  static bool hasGenomicLocation(const CBioseq& seq);
376 
377  static const CSeq_interval& getGenomicInterval(const CBioseq& seq);
378  static bool hasGenomicInterval(const CBioseq& seq);
379 
380  static string GetProtName(const CBioseq& seq);
381 
382  static string getAnnotName(CBioseq::TAnnot::const_iterator& annot);
383  static string getAnnotComment(CBioseq::TAnnot::const_iterator& annot);
384  static int getQueryLen(const CBioseq& seq);
385  static vector<long> getGIs(CBioseq::TAnnot::const_iterator& annot);
386  static int getLenScore( CBioseq::TAnnot::const_iterator& annot);
387  static void getBounds
388  (
389  CBioseq::TAnnot::const_iterator& annot,
390  int* qFrom, int* qTo, int* sFrom, int* sTo
391  );
392  static bool giMatch(const vector<long>& left, const vector<long>& right);
393  static int collectPerfectHits(vector<perfectHitStr>& perfect, const CBioseq& seq);
394  static void check_alignment
395  (
396  CBioseq::TAnnot::const_iterator& annot,
397  const CBioseq& seq,
398  vector<perfectHitStr>& results
399  );
400 public:
401  static bool less_pair(const pair<int,int>& first,
402  const pair<int,int>& second);
403  static bool less_seq(const CRef<CSeq_entry>& first,
404  const CRef<CSeq_entry>& second);
405  static bool less_simple_seq(const TSimpleSeq& first,
406  const TSimpleSeq& second);
407  static void getFromTo(const CSeq_loc& loc, TSeqPos& from, TSeqPos& to, ENa_strand& strand);
408  static void getFromTo(const CSeq_loc_mix& mix, TSeqPos& from, TSeqPos& to, ENa_strand& strand);
409  static void getFromTo(const CPacked_seqint& mix, TSeqPos& from, TSeqPos& to, ENa_strand& strand);
410  static void getFromTo(const CSeq_interval& inter, TSeqPos& from, TSeqPos& to, ENa_strand& strand);
411  static int get_neighboring_sequences(
412  const TSimpleSeqs::iterator& ext_rna,
413  TSimpleSeqs::iterator& first_user_in_range, TSimpleSeqs::iterator& first_user_non_in_range,
414  TSimpleSeqs& seqs, const int max_distance);
415  static int sequence_proximity(const int target_from, const int target_to,
416  const int from, const int to, const int key);
417  static int sequence_proximity(const int target_from, const int target_to,
418  const int from, const int to, const int key, const int max_distance);
419  static void addSimpleTab(CNcbiStrstream& buffer, const string tag, const TSimpleSeqs::iterator& ext_rna,
420  const int max_distance);
421 
422 
423 private:
424 
425 // polymorphic wrappers around core data
426  CBeginInfo Begin(void);
428 // input tools
429 
430  static ECoreDataType getCoreDataType(istream& in);
431  bool IsSubmit();
432  bool IsEntry ();
433  bool IsTbl ();
434 
435 // output tools
436 
437  static void printReport( distanceReportStr *report, ostream& out=NcbiCout);
438  static void printOverlapReport( distanceReportStr *report, ostream& out=NcbiCout);
439  static void printPerfectHit ( const perfectHitStr& hit, ostream& out=NcbiCout);
440 
441  void printGeneralInfo(ostream& out=NcbiCerr);
442 
443  static void dumpAlignment( const string& alignment, const string& file);
444 
445 // more streamline output tools
446  static bool hasProblems(const CBioseq& seq, diagMap& diag, const EProblem type);
447  static bool hasProblems(const string& qname, diagMap& diag, const EProblem type);
448  void reportProblems(const bool report_and_forget, diagMap& diag, ostream& out,
449  const CBioseq::TAnnot& annots, const EProblem type);
450  void reportProblems(const bool report_and_forget, diagMap& diag, ostream& out,
451  const CSeq_annot::C_Data::TFtable& feats, const EProblem type);
452  void reportProblems(const string& qname, diagMap& diag, ostream& out, const EProblem type);
453  void reportProblems(const bool report_and_forget, diagMap& diag, ostream& out=NcbiCout, const EProblem type=eAllProblems);
454  void reportProblemMessage(const string& message, ostream& out=NcbiCout);
455  static string ProblemType(const EProblem type);
456 
457  void reportProblemType(const EProblem type, ostream& out=NcbiCout);
458  void reportProblemSequenceName(const string& name, ostream& out=NcbiCout);
459 
460  void erase_problems(const string& qname, diagMap& diag, const EProblem type);
461 
462 // verbosity output tools
463  static void PushVerbosity(void) { m_saved_verbosity.push( m_current_verbosity); }
465 public:
466  static bool PrintDetails(int current_verbosity = m_current_verbosity)
467  {
468  bool result = current_verbosity < m_verbosity_threshold;
469  if(result)
470  NcbiCerr << current_verbosity << "(" << m_verbosity_threshold << "): ";
471  return result;
472  }
473  static void IncreaseVerbosity(void) { m_current_verbosity++; }
474  static void DecreaseVerbosity(void) { m_current_verbosity--; }
475 private:
476 
477  void GetGenomeLen();
478  void CheckUniqLocusTag();
479 // algorithms
480  void GetRNAfeats
481  (
482  const LocMap& loc_map,
483  CSeq_annot::C_Data::TFtable& rna_feats,
484  const CSeq_annot::C_Data::TFtable& feats
485  );
486  void GetLocMap
487  (
488  LocMap& loc_map,
489  const CSeq_annot::C_Data::TFtable& feats
490  );
491  bool CheckMissingRibosomalRNA( const CBioseq::TAnnot& annots);
493 
494  int find_overlap(TSimpleSeqs::iterator& seq, const TSimpleSeqs::iterator& ext_rna,
495  TSimpleSeqs& seqs, int& overlap);
496  int find_overlap(TSimpleSeqs::iterator& seq, const TSimpleSeqs::iterator& ext_rna,
497  TSimpleSeqs& seqs, TSimpleSeqs& best_seq);
498 
499  bool overlaps_na ( const CBioseq::TAnnot& annots);
500  bool overlaps_na ( const CSeq_annot::C_Data::TFtable& feats);
501  bool overlaps_na ( const CSeq_feat& f1, const CSeq_feat& f2, int& overlap);
502 
503  bool overlaps_prot_na ( CBioseq& seq, const CBioseq::TAnnot& annots);
504  bool overlaps_prot_na ( CBioseq& seq, const CSeq_annot::C_Data::TFtable& feats );
505  bool overlaps_prot_na ( const string& n1, const CSeq_interval& i1, const CSeq_feat& f2, int& overlap );
506 
507 
508  bool match_na ( const CSeq_feat& f1, const string& type1);
509  int match_na ( const CSeq_feat& f1, const TSimpleSeq& ext_rna,
510  int& left, int& right, bool& strand_match, int& abs_left );
511  int overlaps(const TSimpleSeqs::iterator& seq1, const TSimpleSeqs::iterator& seq2, int& overlap);
512 
513  template <typename t1, typename t2> bool overlaps ( const t1& l1, const t2& l2, int& overlap);
514  // bool overlaps ( const CSeq_loc& l1, const CSeq_loc& l2, int& overlap);
515  bool overlaps ( const CSeq_loc& l1, int from, int to, int& overlap);
516  bool complete_overlap ( const CSeq_loc& l1, const CSeq_loc& l2);
517  // template <typename t1, typename t2> bool overlaps ( const t1& l1, const t2& l2, int& overlap);
518  bool overlaps
519  (
520  const CBioseq& left,
521  const CBioseq& right
522  );
523  bool fit_blast
524  (
525  const CBioseq& left,
526  const CBioseq& right,
527  string& common_subject
528  );
529  static bool fit_blast
530  (
531  const CBioseq& left,
532  const CBioseq& right,
533  CBioseq::TAnnot::const_iterator& left_annot,
534  CBioseq::TAnnot::const_iterator& right_annot,
535  int left_qLen,
536  int right_qLen,
537  int space,
538  distanceReportStr* report
539  );
540 
541 // member vars
542  // Member variable to help illustrate our naming conventions
545  int m_length;
549  bool m_usemap;
550  string m_align_dir;
553  // string m_previous_genome;
554  list<long> m_previous_genome;
555 
556 // TExtRNAtable m_extRNAtable;
557  TSimpleSeqs m_extRNAtable2; // external rna sequences
558  TSimpleSeqs m_simple_seqs; // internal rna sequences
559 
560 // alogithm control
561  static double m_small_tails_threshold;
562  static int m_n_best_hit;
563  static double m_eThreshold;
564  static double m_entireThreshold;
565  static double m_partThreshold;
570 
571 
572 // verbosity
575  static stack < int > m_saved_verbosity;
576 
577 
578 };
579 
580 // global functions
581 ESerialDataFormat s_GetFormat(const string& name);
582 
583 
585 
586 template <typename interval_type> string GetLocationString ( const interval_type& loc);
587 string GetLocationString ( const CSeq_feat& f); // will return just location
588 // string GetLocationString ( const CSeq_interval& inter );
589 // string GetLocationString ( const CSeq_loc& loc);
590 
591 string GetLocusTag(const CSeq_feat& f, const LocMap& loc_map);
592 
593 int addProblems(list<problemStr>& dest, const list<problemStr>& src);
594 
595 string GetStringDescr(const CBioseq& bioseq);
596 
597 string Get3type(const CRNA_ref& rna);
598 string GetRRNAtype(const CRNA_ref& rna);
599 
600 string printed_range(const TSeqPos from2, const TSeqPos to2); // Mother Of Printed_Ranges
601 string printed_range(const CSeq_feat& feat);
602 string printed_range(const CSeq_loc& seq_interval);
603 string printed_ranges(const CSeq_loc& seq_interval);
604 string printed_range(const CBioseq& seq);
605 string get_title(const CBioseq& seq);
606 string printed_range(const TSimpleSeqs::iterator& ext_rna);
607 string printed_range(const TSimplePair& apair);
608 string printed_range(const TSimpleSeq& ext_rna);
609 string printed_range(const TSimpleSeqs::iterator& ext_rna, const TSimpleSeqs::iterator& end);
610 string printed_range(const TSimpleSeqs::iterator& ext_rna, TSimpleSeqs& seqs);
611 
612 
613 
614 
615 
616 
617 string let1_2_let3(char let1);
618 string diagName(const string& type, const string& value);
619 
621 EMyFeatureType get_my_feat_type(const CSeq_feat& feat, const LocMap& loc_map);
622 string get_trna_string(const CSeq_feat& feat);
623 string GetRNAname(const CSeq_feat& feat);
624 
625 #endif // READ_BLAST_RESULT__HPP
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Class holding information about root of non-modifiable object hierarchy Do not use it directly.
Definition: iterator.hpp:58
Class holding information about root of non-modifiable object hierarchy Do not use it directly.
Definition: iterator.hpp:86
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
void printGeneralInfo(ostream &out=NcbiCerr)
int CollectFrameshiftedSeqs(map< string, string > &problem_names)
Definition: problems.cpp:890
int CollectRNAFeatures(TProblem_locs &problem_locs)
Definition: problems.cpp:855
static bool hasProblems(const CBioseq &seq, diagMap &diag, const EProblem type)
Definition: problems.cpp:141
static int m_verbosity_threshold
static bool PrintDetails(int current_verbosity=m_current_verbosity)
map< string, string > m_tagmap
void addLocation(string &prot_id, CBioseq &seq, CRef< CSeq_loc > &loc, const string &locus_tag)
Definition: copy_loc.cpp:135
static string GetProtName(const CBioseq &seq)
Definition: shortcuts.cpp:82
static bool less_pair(const pair< int, int > &first, const pair< int, int > &second)
static bool giMatch(const vector< long > &left, const vector< long > &right)
static int getQueryLen(const CBioseq &seq)
Definition: shortcuts.cpp:96
static vector< long > getGIs(CBioseq::TAnnot::const_iterator &annot)
static int sequence_proximity(const int target_from, const int target_to, const int from, const int to, const int key)
Definition: missing.cpp:348
CConstBeginInfo ConstBegin(void)
void CheckUniqLocusTag()
Definition: shortcuts.cpp:47
static stack< int > m_saved_verbosity
void erase_problems(const string &qname, diagMap &diag, const EProblem type)
Definition: problems.cpp:79
void GetLocMap(LocMap &loc_map, const CSeq_annot::C_Data::TFtable &feats)
Definition: locations.cpp:203
static bool has_blast_hits(const CBioseq &seq)
Definition: fit_blast.cpp:369
int RemoveInterim2(CBioseq::TAnnot &annots)
Definition: problems.cpp:792
static void IncreaseVerbosity(void)
static int m_n_best_hit
static char * next_w(char *w)
list< long > m_previous_genome
static int get_neighboring_sequences(const TSimpleSeqs::iterator &ext_rna, TSimpleSeqs::iterator &first_user_in_range, TSimpleSeqs::iterator &first_user_non_in_range, TSimpleSeqs &seqs, const int max_distance)
Definition: missing.cpp:275
bool overlaps_na(const CBioseq::TAnnot &annots)
Definition: overlaps.cpp:182
int AnalyzeSeqs(void)
Definition: analyze.cpp:151
static int collectPerfectHits(vector< perfectHitStr > &perfect, const CBioseq &seq)
void reportProblemMessage(const string &message, ostream &out=NcbiCout)
Definition: problems.cpp:172
static string getAnnotComment(CBioseq::TAnnot::const_iterator &annot)
Definition: shortcuts.cpp:471
static void printOverlapReport(distanceReportStr *report, ostream &out=NcbiCout)
void processFeature(CSeq_annot::C_Data::TFtable::iterator &feat, TranStrMap3 &tranStrMap)
Definition: copy_loc.cpp:166
TSimpleSeqs m_simple_seqs
int find_overlap(TSimpleSeqs::iterator &seq, const TSimpleSeqs::iterator &ext_rna, TSimpleSeqs &seqs, int &overlap)
Definition: overlaps.cpp:92
static bool hasGenomicInterval(const CBioseq &seq)
Definition: locations.cpp:160
static string getAnnotName(CBioseq::TAnnot::const_iterator &annot)
Definition: shortcuts.cpp:459
void reportProblemType(const EProblem type, ostream &out=NcbiCout)
Definition: problems.cpp:217
static string ProblemType(const EProblem type)
Definition: problems.cpp:177
static void addSimpleTab(CNcbiStrstream &buffer, const string tag, const TSimpleSeqs::iterator &ext_rna, const int max_distance)
Definition: missing.cpp:376
static const CSeq_interval & getGenomicInterval(const CBioseq &seq)
Definition: locations.cpp:181
int ProcessCDD(map< string, blastStr > &blastMap)
Definition: read_blast.cpp:545
void processAnnot(CBioseq::TAnnot::iterator &annot, t &tranStrMap)
Definition: copy_loc.cpp:190
int ReadRRNA2(const string &file)
Definition: read_rrna.cpp:35
static double m_trnascan_scoreThreshold
int simple_overlaps(void)
Definition: missing.cpp:149
void addLoctoSimpleSeq(TSimpleSeq &seq, const CSeq_loc &loc)
static bool hasGenomicLocation(const CBioseq &seq)
Definition: locations.cpp:100
int FixStrands(void)
Definition: problems.cpp:250
static int m_current_verbosity
bool ReadPreviousAcc(const string &file, list< long > &input_acc)
void reportProblemSequenceName(const string &name, ostream &out=NcbiCout)
Definition: problems.cpp:238
static int skip_toprot(CTypeIterator< CBioseq > &seq)
int RemoveProblems(map< string, string > &problem_seqs, LocMap &loc_map)
Definition: problems.cpp:319
bool complete_overlap(const CSeq_loc &l1, const CSeq_loc &l2)
Definition: overlaps.cpp:537
virtual void Init(void)
Initialize the application.
int short_proteins(void)
int CollectSimpleSeqs(TSimpleSeqs &seqs)
static void dumpAlignment(const string &alignment, const string &file)
static int m_cds_overlapThreshold
int CopyInfoFromGenesToProteins(void)
===========================================================================
Definition: copy_loc.cpp:33
int ReadBlast(const char *file, map< string, blastStr > &blastMap)
Definition: read_blast.cpp:39
static double m_entireThreshold
static int m_rna_overlapThreshold
void GetRNAfeats(const LocMap &loc_map, CSeq_annot::C_Data::TFtable &rna_feats, const CSeq_annot::C_Data::TFtable &feats)
int SetParents(CSeq_entry *parent, CBioseq_set::TSeq_set &where)
static void check_alignment(CBioseq::TAnnot::const_iterator &annot, const CBioseq &seq, vector< perfectHitStr > &results)
static ECoreDataType getCoreDataType(istream &in)
static bool is_prot_entry(const CBioseq &seq)
const CBioseq & get_nucleotide_seq(const CBioseq &seq)
virtual int Run(void)
Run the application.
int StoreBlast(map< string, blastStr > &blastMap)
Definition: read_blast.cpp:374
static char * skip_space(char *w)
static void DecreaseVerbosity(void)
int AnalyzeSeqsViaBioseqs(bool in_pool_prot, bool against_prot)
Definition: analyze.cpp:35
int overlaps(const TSimpleSeqs::iterator &seq1, const TSimpleSeqs::iterator &seq2, int &overlap)
Definition: overlaps.cpp:146
TSimpleSeqs m_extRNAtable2
int ReadTagMap(const char *file)
int ReadParents(CNcbiIstream &in, const list< long > &nacc)
static bool less_simple_seq(const TSimpleSeq &first, const TSimpleSeq &second)
static void getBounds(CBioseq::TAnnot::const_iterator &annot, int *qFrom, int *qTo, int *sFrom, int *sTo)
Definition: shortcuts.cpp:435
static void printPerfectHit(const perfectHitStr &hit, ostream &out=NcbiCout)
static int m_shortProteinThreshold
static void PopVerbosity(void)
ECoreDataType m_coreDataType
static const CSeq_loc & getGenomicLocation(const CBioseq &seq)
Definition: locations.cpp:120
static void printReport(distanceReportStr *report, ostream &out=NcbiCout)
bool overlaps_prot_na(CBioseq &seq, const CBioseq::TAnnot &annots)
Definition: overlaps.cpp:161
static int getLenScore(CBioseq::TAnnot::const_iterator &annot)
Definition: shortcuts.cpp:420
static bool less_seq(const CRef< CSeq_entry > &first, const CRef< CSeq_entry > &second)
bool CheckMissingRibosomalRNA(const CBioseq::TAnnot &annots)
Definition: missing.cpp:40
static void getFromTo(const CSeq_loc &loc, TSeqPos &from, TSeqPos &to, ENa_strand &strand)
Definition: locations.cpp:34
void ugly_simple_overlaps_call(int &n_user_neighbors, int &n_ext_neighbors, TSimpleSeqs::iterator &ext_rna, TSimpleSeqs::iterator &first_user_in_range, TSimpleSeqs::iterator &first_user_non_in_range, TSimpleSeqs &seqs, int max_distance, TSimpleSeqs::iterator &first_ext_in_range, TSimpleSeqs::iterator &first_ext_non_in_range, string &bufferstr)
Definition: missing.cpp:99
void GetGenomeLen()
Definition: shortcuts.cpp:36
void reportProblems(const bool report_and_forget, diagMap &diag, ostream &out, const CBioseq::TAnnot &annots, const EProblem type)
Definition: problems.cpp:36
bool fit_blast(const CBioseq &left, const CBioseq &right, string &common_subject)
Definition: fit_blast.cpp:37
int RemoveInterim(void)
Definition: problems.cpp:713
static double m_small_tails_threshold
static double m_partThreshold
int AnalyzeSeqsViaBioseqs1(CBioseq &left)
Definition: analyze.cpp:143
static string getLocusTag(const CBioseq &seq)
Definition: locations.cpp:141
bool match_na(const CSeq_feat &f1, const string &type1)
Definition: match.cpp:38
static double m_eThreshold
static bool skip_to_valid_seq_cand(CBioseq_set::TSeq_set::const_iterator &seq, const CBioseq_set::TSeq_set &seqs)
CBeginInfo Begin(void)
void NormalizeSeqentry(CSeq_entry &entry)
Definition: problems.cpp:403
void dump_fasta_for_pretty_blast(diagMap &diag)
void append_misc_feature(CBioseq_set::TSeq_set &seqs, const string &name, EProblem problem_type)
Definition: problems.cpp:921
int ReadTRNA2(const string &file)
Definition: read_trna.cpp:35
static void PushVerbosity(void)
CSeq_submit m_Submit
Definition: Seq_entry.hpp:56
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
Definition: tbl.hpp:111
Definition: map.hpp:338
char value[7]
Definition: config.c:431
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
std::ofstream out("events_result.xml")
main entry point for tests
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
#define NcbiCout
Definition: ncbistre.hpp:543
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define NcbiCerr
Definition: ncbistre.hpp:544
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
list< CRef< CSeq_entry > > TSeq_set
list< CRef< CSeq_feat > > TFtable
Definition: Seq_annot_.hpp:193
list< CRef< CSeq_annot > > TAnnot
Definition: Bioseq_.hpp:97
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
FILE * file
const struct ncbi::grid::netcache::search::fields::KEY key
Magic spell ;-) needed for some weird compilers... very empiric.
EIPRangeType t
Definition: ncbi_localip.c:101
const char * tag
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
std::istream & in(std::istream &in_, double &x_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
The Object manager core.
static pcre_uint8 * buffer
Definition: pcretest.c:1051
EMyFeatureType
@ eMyFeatureType_normal_tRNA
@ eMyFeatureType_unknown
@ eMyFeatureType_atypical_tRNA
@ eMyFeatureType_miscRNA
@ eMyFeatureType_pseudo_tRNA
@ eMyFeatureType_rRNA
@ eMyFeatureType_hypo_CDS
@ eMyFeatureType_normal_CDS
ESerialDataFormat s_GetFormat(const string &name)
Definition: bss_info.cpp:337
@ eMayBeNotFrameShift
@ eTRNAComMismatch
@ ePartial
@ eTRNABadStrand
@ eShortProtein
@ eOverlap
@ eTRNAMismatch
@ eAllProblems
@ eTRNAUndefStrand
@ eRemoveOverlap
@ eTRNAProblems
@ eRnaOverlap
@ eTRNAAbsent
@ eFrameShift
@ eCompleteOverlap
@ eTRNAMissing
@ eRelFrameShift
vector< TExtRNA > TExtRNAtable
int addProblems(list< problemStr > &dest, const list< problemStr > &src)
Definition: problems.cpp:844
vector< TSimplePair > TSimplePairs
string get_trna_string(const CSeq_feat &feat)
Definition: shortcuts.cpp:197
string GetStringDescr(const CBioseq &bioseq)
Definition: shortcuts.cpp:309
string Get3type(const CRNA_ref &rna)
Definition: shortcuts.cpp:115
EMyFeatureType get_my_feat_type(const CSeq_feat &feat, const LocMap &loc_map)
Definition: shortcuts.cpp:246
map< string, TProblem_loc > TProblem_locs
string GetLocationString(const interval_type &loc)
Definition: locations.cpp:232
string get_title(const CBioseq &seq)
Definition: shortcuts.cpp:159
map< CSeq_id::EAccessionInfo, TGenfo > TranStr3
string let1_2_let3(char let1)
EMyFeatureType get_my_seq_type(const CBioseq &seq)
Definition: shortcuts.cpp:176
USING_SCOPE(ncbi)
map< string, TranStr3 > TranStrMap3
string diagName(const string &type, const string &value)
Definition: problems.cpp:839
string GetLocusTag(const CSeq_feat &f, const LocMap &loc_map)
map< string, CRef< CSeq_feat > > LocMap
CBioseq_set::TSeq_set * get_parent_seqset(const CBioseq &seq)
string printed_ranges(const CSeq_loc &seq_interval)
Definition: shortcuts.cpp:341
map< string, diagStr > diagMap
map< long, long > parent_map
@ eUndefined
string GetRNAname(const CSeq_feat &feat)
Definition: shortcuts.cpp:229
list< TSimpleSeq > TSimpleSeqs
string GetRRNAtype(const CRNA_ref &rna)
Definition: shortcuts.cpp:101
string printed_range(const TSeqPos from2, const TSeqPos to2)
Definition: shortcuts.cpp:320
ENa_strand strand
CRef< CSeq_loc > seqloc
string locus_tag
CRef< CBioseq > seq
TSimplePairs exons
vector< hitStr > hits
list< problemStr > problems
CRef< const CSeq_loc > loc2
list< long > sbjGIs
string alignment
string misc_feat_message
ENa_strand strand
Definition: type.c:6
else result
Definition: token2.c:20
Modified on Sun Feb 25 03:04:01 2024 by modify_doxy.py rev. 669887