NCBI C++ ToolKit
tabular.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: tabular.hpp 100836 2023-09-18 15:48:00Z jianye $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Ilya Dondoshansky
27 *
28 * ===========================================================================
29 */
30 
31 /// @file: tabular.hpp
32 /// Formatting of pairwise sequence alignments in tabular form.
33 
34 #ifndef OBJTOOLS_ALIGN_FORMAT___TABULAR_HPP
35 #define OBJTOOLS_ALIGN_FORMAT___TABULAR_HPP
36 
37 #include <corelib/ncbistre.hpp>
40 #include <objmgr/scope.hpp>
45 
46 #include <algorithm>
47 
49 BEGIN_SCOPE(align_format)
50 
51 
52 /// Class containing information needed for tabular formatting of BLAST
53 /// results.
55 {
56 public:
57  /// In what form should the sequence identifiers be shown?
58  enum ESeqIdType {
59  eFullId = 0, ///< Show full seq-id, with multiple ids concatenated.
60  eAccession, ///< Show only best accession
61  eAccVersion, ///< Show only best accession.version
62  eGi ///< Show only gi
63  };
64 
65  /// What delimiter to use between fields in each row of the tabular output.
67  eTab = 0, ///< Tab
68  eSpace, ///< Space
69  eComma, ///< Comma
70  eCustom ///<Custom
71  };
72 
73  /// Constructor
74  /// @param ostr Stream to write output to [in]
75  /// @param format Output format - what fields to include in the output [in]
76  /// @param delim Delimiter to use between tabular fields [in]
77  /// @note fields that are not recognized will be ignored, if no fields are
78  /// specified (or left after purging those that are not recognized), the
79  /// default format is assumed
81  const string& format = kDfltArgTabularOutputFmt,
82  EFieldDelimiter delim = eTab,
83  bool parse_local_ids = false);
84 
85  /// Destructor
87  /// Set query id from a objects::CSeq_id
88  /// @param id List of Seq-ids to use [in]
90  /// Set query id from a Bioseq handle
91  /// @param bh Bioseq handle to get Seq-ids from
92  void SetQueryId(const objects::CBioseq_Handle& bh);
93  ///Get query seqid list
94  const list<CRef<CSeq_id> >& GetQueryId() const {
95  return m_QueryId;
96  };
97  /// Set subject id from a objects::CSeq_id
98  /// @param id List of Seq-ids to use [in]
100  /// Set subject id from a Bioseq handle
101  /// @param bh Bioseq handle to get Seq-ids from
102  void SetSubjectId(const objects::CBioseq_Handle& bh);
103  /// Set the HSP scores
104  /// @param score Raw score [in]
105  /// @param bit_score Bit score [in]
106  /// @param evalue Expect value [in]
107  void SetScores(int score, double bit_score, double evalue);
108  /// Set the HSP endpoints. Note that if alignment is on opposite strands,
109  /// the subject offsets must be reversed.
110  /// @param q_start Starting offset in query [in]
111  /// @param q_end Ending offset in query [in]
112  /// @param s_start Starting offset in subject [in]
113  /// @param s_end Ending offset in subject [in]
114  void SetEndpoints(int q_start, int q_end, int s_start, int s_end);
115  /// Set various counts/lengths
116  /// @param num_ident Number of identities [in]
117  /// @param length Alignment length [in]
118  /// @param gaps Total number of gaps [in]
119  /// @param gap_opens Number of gap openings [in]
120  /// @param positives Number of positives [in]
121  void SetCounts(int num_ident, int length, int gaps, int gap_opens,
122  int positives =0, int query_frame = 1,
123  int subject_frame = 1);
124  /// Sets the Blast-traceback-operations string.
125  /// @param btop_string string for blast traceback operations [in]
126  void SetBTOP(string btop_string);
127  /// Set all member fields, given a Seq-align
128  /// @param sal Seq-align to get data from [in]
129  /// @param scope Scope for Bioseq retrieval [in]
130  /// @param matrix Matrix to calculate positives; NULL if not applicable. [in]
131  /// @return 0 on success, 1 if query or subject Bioseq is not found.
132  int SetFields(const objects::CSeq_align& sal,
133  objects::CScope& scope,
134  CNcbiMatrix<int>* matrix=0);
135  void SetCustomDelim(string customDelim) {
136  x_SetFieldDelimiter(eCustom,customDelim);
137  }
138  /// Print one line of tabular output
139  virtual void Print(void);
140  /// Print the tabular output header
141  /// @param program Program name to show in the header [in]
142  /// @param bioseq Query Bioseq [in]
143  /// @param dbname Search database name [in]
144  /// @param rid the search RID (if not applicable, it should be empty
145  /// the string) [in]
146  /// @param iteration Iteration number (for PSI-BLAST), use default
147  /// parameter value when not applicable [in]
148  /// @param align_set All alignments for this query [in]
149  void PrintHeader(const string& program,
150  const objects::CBioseq& bioseq,
151  const string& dbname,
152  const string& rid = kEmptyStr,
153  unsigned int iteration =
155  const objects::CSeq_align_set* align_set=0,
156  CConstRef<objects::CBioseq> subj_bioseq
158 
159  /// Prints number of queries processed.
160  /// @param num_queries number of queries processed [in]
161  void PrintNumProcessed(int num_queries);
162 
163  /// Return all field names supported in the format string.
164  list<string> GetAllFieldNames(void);
165 
166  /// Should local IDs be parsed or not?
167  /// @param val value to set [in]
168  /// Returns true if the field was requested in the format specification
169  /// @param field Which field to test [in]
170  void SetParseLocalIds(bool val) { m_ParseLocalIds = val; }
171 
172  /// Should subject deflien be parsed for id or not?
173  /// @param val value to set [in]
174  void SetParseSubjectDefline(bool val) { m_ParseSubjectDefline = val; }
175 
176  /// Avoid fetching sequence (if possible)
177  /// If the sequence is needed (e.g., will be formatted, it will be fetched)
178  /// @param nofetch Do not fetch if true [in]
179  void SetNoFetch(bool nofetch);
180  /// Avoid fetch of sequence if true returned
181  bool GetNoFetch();
182 
183  // Set Genetic code for translating seqs
184  void SetQueryGeneticCode(int q_gc) {m_QueryGeneticCode = q_gc;}
185  void SetDbGeneticCode(int db_gc) {m_DbGeneticCode = db_gc;}
186 
187  /// Set query range
188  /// @param query range [in]
189  void SetQueryRange(TSeqRange & q_range) { m_QueryRange = q_range;}
190 
191 protected:
192  bool x_IsFieldRequested(ETabularField field);
193  /// Add a field to the list of fields to show, if it is not yet present in
194  /// the list of fields.
195  /// @param field Which field to add? [in]
196  void x_AddFieldToShow(ETabularField field);
197  /// Delete a field from the list of fields to show
198  /// @param field Which field to delete? [in]
199  void x_DeleteFieldToShow(ETabularField field);
200  /// Add a default set of fields to show.
201  void x_AddDefaultFieldsToShow(void);
202  /// Set fields to show, given an output format string
203  /// @param format Output format [in]
204  void x_SetFieldsToShow(const string& format);
205  /// Reset values of all fields.
206  void x_ResetFields(void);
207  /// Set the tabular fields delimiter.
208  /// @param delim Which delimiter to use
209  void x_SetFieldDelimiter(EFieldDelimiter delim, string customDelim = "");
210  /// Print the names of all supported fields
211  void x_PrintFieldNames(void);
212  /// Print the value of a given field
213  /// @param field Which field to show? [in]
214  void x_PrintField(ETabularField field);
215  /// Print query Seq-id
216  void x_PrintQuerySeqId(void) const;
217  /// Print query gi
218  void x_PrintQueryGi(void);
219  /// Print query accession
220  void x_PrintQueryAccession(void);
221  /// Print query accession.version
222  void x_PrintQueryAccessionVersion(void);
223  /// Print query and database names
224  void x_PrintQueryAndDbNames(const string& program,
225  const objects::CBioseq& bioseq,
226  const string& dbname,
227  const string& rid,
228  unsigned int iteration,
229  CConstRef<objects::CBioseq> subj_bioseq);
230  /// Print subject Seq-id
231  void x_PrintSubjectSeqId(void);
232  /// Print all Seq-ids associated with this subject, separated by ';'
233  void x_PrintSubjectAllSeqIds(void);
234  /// Print subject gi
235  void x_PrintSubjectGi(void);
236  /// Print all gis associated with this subject, separated by ';'
237  void x_PrintSubjectAllGis(void);
238  /// Print subject accession
239  void x_PrintSubjectAccession(void);
240  /// Print subject accession.version
241  void x_PrintSubjectAccessionVersion(void);
242  /// Print all accessions associated with this subject, separated by ';'
243  void x_PrintSubjectAllAccessions(void);
244  /// Print aligned part of query sequence
245  void x_PrintQuerySeq(void);
246  /// Print aligned part of subject sequence
247  void x_PrintSubjectSeq(void);
248  /// Print query start
249  void x_PrintQueryStart(void);
250  /// Print query end
251  void x_PrintQueryEnd(void);
252  /// Print subject start
253  void x_PrintSubjectStart(void);
254  /// Print subject end
255  void x_PrintSubjectEnd(void);
256  /// Print e-value
257  void x_PrintEvalue(void);
258  /// Print bit score
259  void x_PrintBitScore(void);
260  /// Print raw score
261  void x_PrintScore(void);
262  /// Print alignment length
263  void x_PrintAlignmentLength(void);
264  /// Print percent of identical matches
265  void x_PrintPercentIdentical(void);
266  /// Print number of identical matches
267  void x_PrintNumIdentical(void);
268  /// Print number of mismatches
269  void x_PrintMismatches(void);
270  /// Print number of positive matches
271  void x_PrintNumPositives(void);
272  /// Print number of gap openings
273  void x_PrintGapOpenings(void);
274  /// Print total number of gaps
275  void x_PrintGaps(void);
276  /// Print percent positives
277  void x_PrintPercentPositives();
278  /// Print frames
279  void x_PrintFrames();
280  void x_PrintQueryFrame();
281  void x_PrintSubjectFrame();
282  void x_PrintBTOP();
283  /// Print the query sequence length
284  void x_PrintQueryLength();
285  /// Print the subject sequence length
286  void x_PrintSubjectLength();
287  /// Print subject tax info
288  void x_PrintSubjectTaxIds();
289  void x_PrintSubjectSciNames();
290  void x_PrintSubjectCommonNames();
291  void x_PrintSubjectBlastNames();
292  void x_PrintSubjectSuperKingdoms();
293  void x_PrintSubjectTaxId();
294  void x_PrintSubjectSciName();
295  void x_PrintSubjectCommonName();
296  void x_PrintSubjectBlastName();
297  void x_PrintSubjectSuperKingdom();
298  void x_PrintSubjectTitle();
299  void x_PrintSubjectAllTitles();
300  void x_PrintSubjectStrand();
301  void x_PrintSeqalignCoverage();
302  void x_PrintSubjectCoverage();
303  void x_PrintUniqSubjectCoverage();
304  void x_SetTaxInfo(const objects::CBioseq_Handle & handle, const CRef<objects::CBlast_def_line_set> & bdlRef);
305  void x_SetTaxInfoAll(const objects::CBioseq_Handle & handle, const CRef<objects::CBlast_def_line_set> & bdlRef);
306  void x_SetSubjectIds(const objects::CBioseq_Handle& bh, const CRef<objects::CBlast_def_line_set> & bdlRef);
307  void x_SetQueryCovSubject(const objects::CSeq_align & align);
308  void x_SetQueryCovUniqSubject(const objects::CSeq_align & align);
309  void x_SetQueryCovSeqalign(const CSeq_align & align, int query_len);
310  void x_CheckTaxDB();
311 
312  CNcbiOstream& m_Ostream; ///< Stream to write output to
313  string m_FieldDelimiter; ///< Delimiter character for fields to print.
314  string m_QuerySeq; ///< Aligned part of the query sequence
315  string m_SubjectSeq; ///< Aligned part of the subject sequence
316  int m_QueryStart; ///< Starting offset in query
317  int m_QueryEnd; ///< Ending offset in query
318  int m_QueryFrame; ///< query frame
319  int m_SubjectStart; ///< Starting offset in subject
320  int m_SubjectEnd; ///< Ending offset in subject
321  int m_SubjectFrame; ///< subject frame
322  bool m_NoFetch; ///< program as a string
323 
324 private:
325 
326  list<CRef<objects::CSeq_id> > m_QueryId; ///< List of query ids for this HSP
327  list<CRef<objects::CSeq_id> > m_SubjectId;
328  /// All subject sequence ids for this HSP
329  vector<list<CRef<objects::CSeq_id> > > m_SubjectIds;
330  TSeqPos m_QueryLength; ///< Length of query sequence
331  TSeqPos m_SubjectLength; ///< Length of subject sequence
332  int m_Score; ///< Raw score of this HSP
333  string m_BitScore; ///< Bit score of this HSP, in appropriate format
334  string m_Evalue; ///< E-value of this HSP, in appropriate format
335  int m_AlignLength; ///< Alignment length of this HSP
336  int m_NumGaps; ///< Total number of gaps in this HSP
337  int m_NumGapOpens; ///< Number of gap openings in this HSP
338  int m_NumIdent; ///< Number of identities in this HSP
339  int m_NumPositives; ///< Number of positives in this HSP
340  /// Map of field enum values to field names.
342  list<ETabularField> m_FieldsToShow; ///< Which fields to show?
343  /// Should the query deflines be parsed for local IDs?
345  /// Parse subejct defline?
347  string m_BTOP; /// Blast-traceback-operations.
348 
349  //TaxInfo
351  vector<string> m_SubjectSciNames;
352  vector<string> m_SubjectCommonNames;
361 
363  pair<string, int> m_QueryCovSubject;
364  pair<string, int> m_QueryCovUniqSubject;
366 
369 
372 };
373 
374 
376 {
378 }
379 
381 {
383 }
384 
386 {
388 }
389 
391 {
393 }
394 
396 {
398 }
399 
401 {
403 }
404 
406 {
407  m_Ostream << m_Evalue;
408 }
409 
411 {
413 }
414 
416 {
417  m_Ostream << m_Score;
418 }
419 
421 {
423 }
424 
426 {
427  double perc_ident =
428  (m_AlignLength > 0 ? ((double)m_NumIdent)/m_AlignLength * 100 : 0);
429  m_Ostream << NStr::DoubleToString(perc_ident, 3);
430 }
431 
433 {
434  double perc_positives =
435  (m_AlignLength > 0 ? ((double)m_NumPositives)/m_AlignLength * 100 : 0);
436  m_Ostream << NStr::DoubleToString(perc_positives, 2);
437 }
438 
440 {
442 }
443 
445 {
447 }
448 
450 {
452 }
453 
455 {
456  m_Ostream << m_BTOP;
457 }
458 
460 {
462 }
463 
465 {
466  int num_mismatches = m_AlignLength - m_NumIdent - m_NumGaps;
467  m_Ostream << num_mismatches;
468 }
469 
471 {
473 }
474 
475 // FIXME; do this via a bit field
477 {
478  return find(m_FieldsToShow.begin(),
479  m_FieldsToShow.end(),
480  field) != m_FieldsToShow.end();
481 }
482 
484 {
486 }
487 
489 {
490  m_Ostream << m_NumGaps;
491 }
493 {
495 }
496 
498 {
500 }
501 
502 inline void CBlastTabularInfo::SetNoFetch(bool nofetch)
503 {
504  m_NoFetch = nofetch;
505 }
506 
508 {
509  return m_NoFetch;
510 }
511 
512 /// Class containing information needed for tabular formatting of BLAST
513 /// results.
515 {
516 public:
517 
518  /// struct containing annotated domain information
519  struct SIgDomain {
520  SIgDomain(const string& n, int s, int e, int ss, int se):
521  name(n), start(s), end(e),
522  s_start(ss), s_end(se), length(0),
523  num_match(0), num_mismatch(0), num_gap(0) {};
524  const string name;
525  int start;
526  int end; // actual end + 1
527  int s_start;
528  int s_end; // actual end + 1
529  int length;
532  int num_gap;
533  };
534 
535  /// struct containing annotated gene information
536  struct SIgGene {
537  void Set(const string id, int s, int e) {
538  if (id.substr(0,4) == "lcl|") {
539  sid = id.substr(4, id.size());
540  } else {
541  sid = id;
542  }
543  start = s;
544  end = e;
545  }
546  void Reset() {
547  sid = "";
548  start = -1;
549  end = -1;
550  };
551  string sid;
552  int start;
553  int end;
554  };
555 
556  /// What delimiter to use between fields in each row of the tabular output.
557  /// Constructor
558  /// @param ostr Stream to write output to [in]
559  /// @param format Output format - what fields to include in the output [in]
561  const string& format = kDfltArgTabularOutputFmt,
562  EFieldDelimiter delim = eTab)
563  : CBlastTabularInfo(ostr, format, delim) { };
564 
565  /// Destructor
567  x_ResetIgFields();
568  };
569 
571  const string& program,
572  const objects::CBioseq& bioseq,
573  const string& dbname,
574  const string& domain_sys,
575  const string& rid = kEmptyStr,
576  unsigned int iteration =
578  const objects::CSeq_align_set* align_set=0,
579  CConstRef<objects::CBioseq> subj_bioseq
581 
582  /// Set fields for master alignment
583  int SetMasterFields(const objects::CSeq_align& align,
584  objects::CScope& scope,
585  const string& chain_type,
586  const string& master_chain_type_to_show,
587  CNcbiMatrix<int>* matrix=0);
588 
589  /// Set fields for all other alignments
590  int SetFields(const objects::CSeq_align& align,
591  objects::CScope& scope,
592  const string& chain_type,
593  const string& master_chain_type_to_show,
594  CNcbiMatrix<int>* matrix=0);
595 
596  /// Override the print method
597  virtual void Print(void);
598 
599  /// Print domain information
600  void PrintMasterAlign(const CConstRef<blast::CIgBlastOptions>& ig_opts, const string& header = "# ") const;
601 
602  void SetAirrFormatData(CScope& scope,
603  const CRef<blast::CIgAnnotation> &annot,
604  const CBioseq_Handle& query_handle,
605  CConstRef<CSeq_align_set> align_result,
606  const CConstRef<blast::CIgBlastOptions>& ig_opts);
607 
608  void PrintAirrRearrangement(CScope& scope,
609  const CRef<blast::CIgAnnotation> &annot,
610  const string& program_version,
611  const CBioseq& query_bioseq,
612  const string& dbname,
613  const string& domain_sys,
614  const string& rid,
615  unsigned int iteration,
616  const CSeq_align_set* align_set,
617  CConstRef<CBioseq> subj_bioseq,
618  CNcbiMatrix<int>* matrix,
619  bool print_airr_format_header,
620  const CConstRef<blast::CIgBlastOptions>& ig_opts);
621 
622  /// Print Html style summary
623  void PrintHtmlSummary(const CConstRef<blast::CIgBlastOptions>& ig_opts) const;
624 
625  /// Set out-of-frame information
626  void SetFrame(const string &frame = "N/A") {
627  m_FrameInfo = frame;
628  };
629 
630  /// Set strand information
631  void SetMinusStrand(bool minus = true) {
632  m_IsMinusStrand = minus;
633  };
634 
635  /// Set sequence type
636  void SetSeqType(bool isNucl) {
637  m_IsNucl = isNucl;
638  };
639 
640  /// Set domain info
641  void AddIgDomain(const string &name, int start, int end,
642  int s_start=-1, int s_end=-1) {
643  if (start <0 || end <= start) return;
644  SIgDomain * domain = new SIgDomain(name, start, end, s_start, s_end);
645  x_ComputeIgDomain(*domain);
646  m_IgDomains.push_back(domain);
647  };
648 
649 
650  /// Set gene info
651  void SetVGene(const string &id, int s, int e) {
652  m_VGene.Set(id, s,e);
653  }
654 
655  /// Set gene info
656  void SetDGene(const string &id, int s, int e) {
657  m_DGene.Set(id, s,e);
658  }
659 
660  /// Set gene info
661  void SetJGene(const string &id, int s, int e) {
662  m_JGene.Set(id, s,e);
663  }
664 
665  /// Set gene info
666  void SetCGene(const string &id, int s, int e) {
667  m_CGene.Set(id, s,e);
668  }
669 
670  /// One method to set all annotation information
671  void SetIgAnnotation(const CRef<blast::CIgAnnotation> &annot,
672  const CConstRef<blast::CIgBlastOptions> &ig_opts,
673  CConstRef<CSeq_align_set>& align_result,
674  CScope& scope);
675  /// method to set cdr3 and fwr4 annotation information
676  void SetIgCDR3FWR4Annotation(const CRef<blast::CIgAnnotation> &annot);
677 
678  ///Getter
679  void GetIgInfo (string& v,
680  string& d,
681  string& j,
682  string& c,
683  string& master_chain_to_show,
684  string& cdr3_nuc,
685  string& cdr3_aa,
686  string& productive) const {
687  v = m_VGene.sid;
688  d = m_DGene.sid;
689  j = m_JGene.sid;
690  c = m_CGene.sid;
691  master_chain_to_show = m_MasterChainTypeToShow;
692  cdr3_nuc = m_Cdr3Seq;
693  cdr3_aa = m_Cdr3SeqTrans;
694  productive = m_OtherInfo[4];
695  }
696 
697  ///Get Ig domain
698  const vector<SIgDomain*>& GetIgDomains() const {
699 
700  return m_IgDomains;
701  }
702 
703 
704 protected:
705  void x_ResetIgFields();
706  void x_PrintIgGenes(bool isHtml=false, const string& header="# ") const;
707  void x_ComputeIgDomain(SIgDomain &domain);
708  void x_PrintIgDomain(const SIgDomain &domain) const;
709  void x_PrintIgDomainHtml(const SIgDomain &domain) const;
710  void x_PrintPartialQuery(int start, int end, bool isHtml=false) const;
711 
712 
713 private:
714  string m_Query;
715  bool m_IsNucl;
717  string m_FrameInfo;
718  string m_VFrameShift;
719  string m_ChainType;
725  vector<SIgDomain *> m_IgDomains;
726 
727  //index 0-2, not currently being used
728  // index 4, productive/non-productive
729  // index 3, stop codon or not
730  static const int num_otherinfo = 5;
731  string m_OtherInfo[num_otherinfo];
736 
737  string m_Fwr1Seq;
739  string m_Cdr1Seq;
741  string m_Fwr2Seq;
743  string m_Cdr2Seq;
745  string m_Fwr3Seq;
747  string m_Fwr4Seq;
749  string m_Cdr3Seq;
751 
752  string m_AirrCdr3Seq;
756 
758  string m_VAlign;
766 };
767 
768 END_SCOPE(align_format)
770 
771 #endif /* OBJTOOLS_ALIGN_FORMAT___TABULAR_HPP */
User-defined methods of the data storage class.
void SetScores(objects::CSeq_align &seq_align, objects::CScope &scope, const string &matrix_name="BLOSUM62")
CBioseq_Handle –.
Class containing information needed for tabular formatting of BLAST results.
Definition: tabular.hpp:55
void SetQueryGeneticCode(int q_gc)
Definition: tabular.hpp:184
ESeqIdType
In what form should the sequence identifiers be shown?
Definition: tabular.hpp:58
@ eAccession
Show only best accession.
Definition: tabular.hpp:60
@ eAccVersion
Show only best accession.version.
Definition: tabular.hpp:61
list< CRef< objects::CSeq_id > > m_QueryId
List of query ids for this HSP.
Definition: tabular.hpp:326
string m_BitScore
Bit score of this HSP, in appropriate format.
Definition: tabular.hpp:333
void SetParseSubjectDefline(bool val)
Should subject deflien be parsed for id or not?
Definition: tabular.hpp:174
void x_PrintSubjectSeq(void)
Print aligned part of subject sequence.
Definition: tabular.hpp:380
TTaxId m_SubjectTaxId
Definition: tabular.hpp:355
void x_PrintQueryLength()
Print the query sequence length.
Definition: tabular.hpp:492
void x_PrintNumIdentical(void)
Print number of identical matches.
Definition: tabular.hpp:459
string m_SubjectSciName
Definition: tabular.hpp:356
map< string, ETabularField > m_FieldMap
Map of field enum values to field names.
Definition: tabular.hpp:341
void x_PrintSubjectEnd(void)
Print subject end.
Definition: tabular.hpp:400
pair< string, int > m_QueryCovUniqSubject
Definition: tabular.hpp:364
void SetQueryId(const objects::CBioseq_Handle &bh)
Set query id from a Bioseq handle.
void x_PrintGapOpenings(void)
Print number of gap openings.
Definition: tabular.hpp:483
set< string > m_SubjectSuperKingdoms
Definition: tabular.hpp:354
int m_AlignLength
Alignment length of this HSP.
Definition: tabular.hpp:335
set< string > m_SubjectBlastNames
Definition: tabular.hpp:353
bool m_ParseSubjectDefline
Parse subejct defline?
Definition: tabular.hpp:346
void x_PrintBTOP()
Definition: tabular.hpp:454
string m_SubjectBlastName
Definition: tabular.hpp:358
EFieldDelimiter
What delimiter to use between fields in each row of the tabular output.
Definition: tabular.hpp:66
@ eComma
Comma.
Definition: tabular.hpp:69
@ eSpace
Space.
Definition: tabular.hpp:68
void SetCustomDelim(string customDelim)
Definition: tabular.hpp:135
int m_SubjectStart
Starting offset in subject.
Definition: tabular.hpp:319
void x_PrintPercentIdentical(void)
Print percent of identical matches.
Definition: tabular.hpp:425
void x_PrintScore(void)
Print raw score.
Definition: tabular.hpp:415
void x_PrintQueryStart(void)
Print query start.
Definition: tabular.hpp:385
void x_PrintBitScore(void)
Print bit score.
Definition: tabular.hpp:410
TSeqPos m_SubjectLength
Length of subject sequence.
Definition: tabular.hpp:331
void x_PrintGaps(void)
Print total number of gaps.
Definition: tabular.hpp:488
void x_PrintNumPositives(void)
Print number of positive matches.
Definition: tabular.hpp:470
void x_PrintQueryFrame()
Definition: tabular.hpp:444
int m_SubjectEnd
Ending offset in subject.
Definition: tabular.hpp:320
string m_SubjectSuperKingdom
Definition: tabular.hpp:359
string m_SubjectSeq
Aligned part of the subject sequence.
Definition: tabular.hpp:315
void SetQueryRange(TSeqRange &q_range)
Set query range.
Definition: tabular.hpp:189
void SetSubjectId(list< CRef< objects::CSeq_id > > &id)
Set subject id from a objects::CSeq_id.
CNcbiOstream & m_Ostream
Stream to write output to.
Definition: tabular.hpp:312
CRef< CBlast_def_line_set > m_SubjectDefline
Definition: tabular.hpp:360
void SetParseLocalIds(bool val)
Should local IDs be parsed or not?
Definition: tabular.hpp:170
void x_PrintSubjectStart(void)
Print subject start.
Definition: tabular.hpp:395
int m_QueryEnd
Ending offset in query.
Definition: tabular.hpp:317
int m_SubjectFrame
subject frame
Definition: tabular.hpp:321
int m_NumGapOpens
Number of gap openings in this HSP.
Definition: tabular.hpp:337
vector< string > m_SubjectCommonNames
Definition: tabular.hpp:352
void SetSubjectId(const objects::CBioseq_Handle &bh)
Set subject id from a Bioseq handle.
pair< string, int > m_QueryCovSubject
Definition: tabular.hpp:363
void x_PrintSubjectLength()
Print the subject sequence length.
Definition: tabular.hpp:497
list< ETabularField > m_FieldsToShow
Which fields to show?
Definition: tabular.hpp:342
virtual void Print(void)
Print one line of tabular output.
Definition: tabular.cpp:1094
int SetFields(const objects::CSeq_align &sal, objects::CScope &scope, CNcbiMatrix< int > *matrix=0)
Set all member fields, given a Seq-align.
Definition: tabular.cpp:743
string m_CustomDelim
Definition: tabular.hpp:371
bool m_ParseLocalIds
Should the query deflines be parsed for local IDs?
Definition: tabular.hpp:344
void PrintHeader(const string &program, const objects::CBioseq &bioseq, const string &dbname, const string &rid=kEmptyStr, unsigned int iteration=numeric_limits< unsigned int >::max(), const objects::CSeq_align_set *align_set=0, CConstRef< objects::CBioseq > subj_bioseq=CConstRef< objects::CBioseq >())
Print the tabular output header.
Definition: tabular.cpp:1225
void x_PrintEvalue(void)
Print e-value.
Definition: tabular.hpp:405
void x_PrintMismatches(void)
Print number of mismatches.
Definition: tabular.hpp:464
void SetNoFetch(bool nofetch)
Avoid fetching sequence (if possible) If the sequence is needed (e.g., will be formatted,...
Definition: tabular.hpp:502
bool m_NoFetch
program as a string
Definition: tabular.hpp:322
void SetDbGeneticCode(int db_gc)
Definition: tabular.hpp:185
void x_PrintPercentPositives()
Print percent positives.
Definition: tabular.hpp:432
vector< list< CRef< objects::CSeq_id > > > m_SubjectIds
All subject sequence ids for this HSP.
Definition: tabular.hpp:329
string m_FieldDelimiter
Delimiter character for fields to print.
Definition: tabular.hpp:313
bool GetNoFetch()
Avoid fetch of sequence if true returned.
Definition: tabular.hpp:507
const list< CRef< CSeq_id > > & GetQueryId() const
Get query seqid list.
Definition: tabular.hpp:94
set< TTaxId > m_SubjectTaxIds
Blast-traceback-operations.
Definition: tabular.hpp:350
TSeqRange m_QueryRange
Definition: tabular.hpp:370
bool x_IsFieldRequested(ETabularField field)
Definition: tabular.hpp:476
int m_NumPositives
Number of positives in this HSP.
Definition: tabular.hpp:339
int m_QueryStart
Starting offset in query.
Definition: tabular.hpp:316
string m_SubjectStrand
Definition: tabular.hpp:362
void SetQueryId(list< CRef< objects::CSeq_id > > &id)
Set query id from a objects::CSeq_id.
void x_PrintQuerySeq(void)
Print aligned part of query sequence.
Definition: tabular.hpp:375
void x_PrintSubjectFrame()
Definition: tabular.hpp:449
void x_PrintFrames()
Print frames.
Definition: tabular.hpp:439
void x_PrintAlignmentLength(void)
Print alignment length.
Definition: tabular.hpp:420
int m_Score
Raw score of this HSP.
Definition: tabular.hpp:332
vector< string > m_SubjectSciNames
Definition: tabular.hpp:351
int m_QueryFrame
query frame
Definition: tabular.hpp:318
list< CRef< objects::CSeq_id > > m_SubjectId
Definition: tabular.hpp:327
string m_SubjectCommonName
Definition: tabular.hpp:357
void x_PrintQueryEnd(void)
Print query end.
Definition: tabular.hpp:390
int m_NumGaps
Total number of gaps in this HSP.
Definition: tabular.hpp:336
int m_NumIdent
Number of identities in this HSP.
Definition: tabular.hpp:338
string m_QuerySeq
Aligned part of the query sequence.
Definition: tabular.hpp:314
string m_Evalue
E-value of this HSP, in appropriate format.
Definition: tabular.hpp:334
TSeqPos m_QueryLength
Length of query sequence.
Definition: tabular.hpp:330
Class containing information needed for tabular formatting of BLAST results.
Definition: tabular.hpp:515
void SetMinusStrand(bool minus=true)
Set strand information.
Definition: tabular.hpp:631
vector< SIgDomain * > m_IgDomains
Definition: tabular.hpp:725
~CIgBlastTabularInfo()
Destructor.
Definition: tabular.hpp:566
map< string, string > m_AirrData
Definition: tabular.hpp:764
CIgBlastTabularInfo(CNcbiOstream &ostr, const string &format=kDfltArgTabularOutputFmt, EFieldDelimiter delim=eTab)
What delimiter to use between fields in each row of the tabular output.
Definition: tabular.hpp:560
CRef< CSeq_align > m_TopAlign_V
Definition: tabular.hpp:754
void SetSeqType(bool isNucl)
Set sequence type.
Definition: tabular.hpp:636
void SetJGene(const string &id, int s, int e)
Set gene info.
Definition: tabular.hpp:661
const vector< SIgDomain * > & GetIgDomains() const
Get Ig domain.
Definition: tabular.hpp:698
void SetDGene(const string &id, int s, int e)
Set gene info.
Definition: tabular.hpp:656
void AddIgDomain(const string &name, int start, int end, int s_start=-1, int s_end=-1)
Set domain info.
Definition: tabular.hpp:641
void SetFrame(const string &frame="N/A")
Set out-of-frame information.
Definition: tabular.hpp:626
void SetCGene(const string &id, int s, int e)
Set gene info.
Definition: tabular.hpp:666
CRef< CSeq_align > m_TopAlign_J
Definition: tabular.hpp:762
CRef< CSeq_align > m_TopAlign_D
Definition: tabular.hpp:755
CRef< CSeq_align > m_TopAlign_C
Definition: tabular.hpp:763
void SetVGene(const string &id, int s, int e)
Set gene info.
Definition: tabular.hpp:651
void GetIgInfo(string &v, string &d, string &j, string &c, string &master_chain_to_show, string &cdr3_nuc, string &cdr3_aa, string &productive) const
Getter.
Definition: tabular.hpp:679
string m_AirrCdr3SeqTrans
Definition: tabular.hpp:753
string m_MasterChainTypeToShow
Definition: tabular.hpp:720
CObject –.
Definition: ncbiobj.hpp:180
CScope –.
Definition: scope.hpp:92
void Print(const CCompactSAMApplication::AlignInfo &ai)
ETabularField
Enumeration for all fields that are supported in the tabular output.
string kDfltArgTabularOutputFmt
Default value for tabular and comma-separated value output formats.
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5189
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NCBI_ALIGN_FORMAT_EXPORT
Definition: ncbi_export.h:1081
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
Declares CIgBlast, the C++ API for the IG-BLAST engine.
yy_size_t n
const struct ncbi::grid::netcache::search::fields::SIZE size
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
T max(T x_, T y_)
T minus(T x_)
static Format format
Definition: njn_ioutil.cpp:53
Defines BLAST database access classes.
struct containing annotated domain information
Definition: tabular.hpp:519
SIgDomain(const string &n, int s, int e, int ss, int se)
Definition: tabular.hpp:520
struct containing annotated gene information
Definition: tabular.hpp:536
void Set(const string id, int s, int e)
Definition: tabular.hpp:537
@ eGi
GI Index.
Modified on Fri Jul 19 17:08:16 2024 by modify_doxy.py rev. 669887