NCBI C++ ToolKit
format_flags.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: format_flags.cpp 100754 2023-09-07 13:34:28Z madden $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  */
27 
28 #include <ncbi_pch.hpp>
29 
31 #include <sstream>
32 
34 BEGIN_SCOPE(align_format)
35 
36 const string kArgOutputFormat("outfmt");
39  "qaccver saccver pident length mismatch gapopen qstart qend sstart send "
40  "evalue bitscore";
42 
45  SFormatSpec("qseqid",
46  "Query Seq-id",
47  eQuerySeqId),
48  SFormatSpec("qgi",
49  "Query GI",
50  eQueryGi),
51  SFormatSpec("qacc",
52  "Query accession",
54  SFormatSpec("qaccver",
55  "Query accession.version",
57  SFormatSpec("qlen",
58  "Query sequence length",
59  eQueryLength),
60  SFormatSpec("sseqid",
61  "Subject Seq-id",
63  SFormatSpec("sallseqid",
64  "All subject Seq-id(s), separated by a ';'",
66  SFormatSpec("sgi",
67  "Subject GI",
68  eSubjectGi),
69  SFormatSpec("sallgi",
70  "All subject GIs",
72  SFormatSpec("sacc",
73  "Subject accession",
75  SFormatSpec("saccver",
76  "Subject accession.version",
78  SFormatSpec("sallacc",
79  "All subject accessions",
81  SFormatSpec("slen",
82  "Subject sequence length",
84  SFormatSpec("qstart",
85  "Start of alignment in query",
86  eQueryStart),
87  SFormatSpec("qend",
88  "End of alignment in query",
89  eQueryEnd),
90  SFormatSpec("sstart",
91  "Start of alignment in subject",
93  SFormatSpec("send",
94  "End of alignment in subject",
95  eSubjectEnd),
96  SFormatSpec("qseq",
97  "Aligned part of query sequence",
98  eQuerySeq),
99  SFormatSpec("sseq",
100  "Aligned part of subject sequence",
101  eSubjectSeq),
102  SFormatSpec("evalue",
103  "Expect value",
104  eEvalue),
105  SFormatSpec("bitscore",
106  "Bit score",
107  eBitScore),
108  SFormatSpec("score",
109  "Raw score",
110  eScore),
111  SFormatSpec("length",
112  "Alignment length",
114  SFormatSpec("pident",
115  "Percentage of identical matches",
117  SFormatSpec("nident",
118  "Number of identical matches",
119  eNumIdentical),
120  SFormatSpec("mismatch",
121  "Number of mismatches",
122  eMismatches),
123  SFormatSpec("positive",
124  "Number of positive-scoring matches",
125  ePositives),
126  SFormatSpec("gapopen",
127  "Number of gap openings",
128  eGapOpenings),
129  SFormatSpec("gaps",
130  "Total number of gaps",
131  eGaps),
132  SFormatSpec("ppos",
133  "Percentage of positive-scoring matches",
135  SFormatSpec("frames",
136  "Query and subject frames separated by a '/'",
137  eFrames),
138  SFormatSpec("qframe",
139  "Query frame",
140  eQueryFrame),
141  SFormatSpec("sframe",
142  "Subject frame",
143  eSubjFrame),
144  SFormatSpec("btop",
145  "Blast traceback operations (BTOP)",
146  eBTOP),
147  SFormatSpec("staxid",
148  "Subject Taxonomy ID",
149  eSubjectTaxId),
150  SFormatSpec("ssciname",
151  "Subject Scientific Name",
153  SFormatSpec("scomname",
154  "Subject Common Name",
156  SFormatSpec("sblastname",
157  "Subject Blast Name",
159  SFormatSpec("sskingdom",
160  "Subject Super Kingdom",
162  SFormatSpec("staxids",
163  "unique Subject Taxonomy ID(s), separated by a ';'\n\t\t\t (in numerical order)",
165  SFormatSpec("sscinames",
166  "unique Subject Scientific Name(s), separated by a ';'",
168  SFormatSpec("scomnames",
169  "unique Subject Common Name(s), separated by a ';'",
171  SFormatSpec("sblastnames",
172  "unique Subject Blast Name(s), separated by a ';'\n\t\t\t (in alphabetical order)",
174  SFormatSpec("sskingdoms",
175  "unique Subject Super Kingdom(s), separated by a ';'\n\t\t\t (in alphabetical order) ",
177  SFormatSpec("stitle",
178  "Subject Title",
179  eSubjectTitle),
180  SFormatSpec("salltitles",
181  "All Subject Title(s), separated by a '<>'",
183  SFormatSpec("sstrand",
184  "Subject Strand",
186  SFormatSpec("qcovs",
187  "Query Coverage Per Subject",
189  SFormatSpec("qcovhsp",
190  "Query Coverage Per HSP",
192  SFormatSpec("qcovus",
193  "Query Coverage Per Unique Subject (blastn only)",
195 };
196 
198 {
199  // Igblast needs extra "gaps" column by default
200  if (is_igblast) {
202  "qseqid sseqid pident length mismatch gapopen gaps qstart qend sstart send "
203  "evalue bitscore";
204  }
205  ostringstream os;
206  for (size_t i = 0; i < kNumTabularOutputFormatSpecifiers; i++) {
207  os << "\t" << setw(10) << sc_FormatSpecifiers[i].name << " means ";
208  os << sc_FormatSpecifiers[i].description << "\n";
209  }
210  os << "When not provided, the default value is:\n";
211  os << "'" << kDfltArgTabularOutputFmt << "', which is equivalent ";
212  os << "to the keyword '" << kDfltArgTabularOutputFmtTag << "'";
213  return os.str();
214 }
215 
216 const string kArgShowGIs("show_gis");
217 const bool kDfltArgShowGIs = false;
218 const string kArgNumDescriptions("num_descriptions");
219 const size_t kDfltArgNumDescriptions = 500;
220 const string kArgNumAlignments("num_alignments");
221 const size_t kDfltArgNumAlignments = 250;
222 const string kArgProduceHtml("html");
223 const bool kDfltArgProduceHtml = false;
224 const size_t kDfltLineLength = 60;
225 const string kArgAlignSeqList("alignseqlist");
226 const string kArgMetadata("searchmetadata");
227 const string kArgQueryIndex("queryindex");
228 const string kArgSortHits("sorthits");
229 const string kArgSortHSPs("sorthsps");
230 
233  SSAMFormatSpec("SQ",
234  "Include Sequence Data",
235  eSAM_SeqData),
236  SSAMFormatSpec("SR",
237  "Subject as Reference Seq",
239 };
240 
242 {
243  ostringstream os;
244  for (size_t i =0; i < kNumSAMOutputFormatSpecifiers; i++) {
245  os << "\t" << setw(10) << sc_SAMFormatSpecifiers[i].name << " means ";
246  os << sc_SAMFormatSpecifiers[i].description << "\n";
247  }
248 
249  return os.str();
250 }
251 
252 END_SCOPE(align_format)
const string kArgSortHits("sorthits")
string kDfltArgTabularOutputFmt
Default value for tabular and comma-separated value output formats.
const string kArgShowGIs("show_gis")
const string kArgNumAlignments("num_alignments")
const int kDfltArgOutputFormat
Default value for formatted output type.
const bool kDfltArgShowGIs
Default value for the "show GIs" formatter option.
const string kArgOutputFormat("outfmt")
const SFormatSpec sc_FormatSpecifiers[kNumTabularOutputFormatSpecifiers]
Array containing the supported output formats for tabular output.
const string kArgQueryIndex("queryindex")
const string kDfltArgTabularOutputFmtTag("std")
const size_t kNumSAMOutputFormatSpecifiers
Number of elements in the sc_SAMFormatSpecifiers array.
string DescribeTabularOutputFormatSpecifiers(bool is_igblast)
Returns a string documenting the available format specifiers.
const bool kDfltArgProduceHtml
Default value which specifies whether to create output as HTML or not.
const string kArgNumDescriptions("num_descriptions")
const size_t kNumTabularOutputFormatSpecifiers
Number of elements in the sc_FormatSpecifiers array.
const size_t kDfltArgNumDescriptions
Default number of one-line descriptions to display in the traditional BLAST report.
string DescribeSAMOutputFormatSpecifiers()
Returns a string documenting the available format specifiers.
const string kArgProduceHtml("html")
const string kArgMetadata("searchmetadata")
const string kArgAlignSeqList("alignseqlist")
const size_t kDfltArgNumAlignments
Default number of alignments to display in the traditional BLAST report.
const size_t kDfltLineLength
const SSAMFormatSpec sc_SAMFormatSpecifiers[kNumSAMOutputFormatSpecifiers]
Array containing the supported output formats for SAM output.
const string kArgSortHSPs("sorthsps")
@ eAlignmentLength
Alignment length.
@ eSubjectEnd
End of alignment in subject.
@ eSubjectAllAccessions
All subject accessions, separated by ';'.
@ eQueryCovSubject
Query Coverage per Subject.
@ ePositives
Number of positive-scoring matches.
@ eSubjectAllGis
All subject gis.
@ eSubjectSciName
Subject Scientific Name.
@ eSubjectGi
Subject gi.
@ eSubjFrame
Subject frame.
@ eSubjectSeq
Aligned part of subject sequence.
@ eQueryStart
Start of alignment in query.
@ eFrames
Query and subject frames separated by a '/'.
@ eSubjectTaxIds
Subject Tax IDs.
@ eQueryEnd
End of alignment in query.
@ eSubjectCommonNames
Subject Common Names.
@ eQueryAccession
Query accession.
@ eSubjectLength
Subject sequence length.
@ eSubjectSeqId
Subject Seq-id(s)
@ eQueryAccessionVersion
Query accession.version.
@ eGapOpenings
Number of gap openings.
@ eQuerySeqId
Query Seq-id(s)
@ eSubjectAccession
Subject accession.
@ ePercentPositives
Percentage of positive-scoring matches.
@ eSubjectBlastName
Subject Blast Name.
@ eNumIdentical
Number of identical matches.
@ eSubjectAllSeqIds
If multiple redundant sequences, all sets of subject Seq-ids, separated by ';'.
@ eMismatches
Number of mismatches.
@ eSubjectStrand
Subject Strand.
@ eSubjectBlastNames
Subject Blast Names.
@ eQueryCovUniqSubject
Query Coverage per Subject.
@ eBitScore
Bit score.
@ ePercentIdentical
Percentage of identical matches.
@ eSubjectSciNames
Subject Scientific Names.
@ eSubjectTitle
Only the first subject defline.
@ eSubjectSuperKingdoms
Subject Super Kingdoms.
@ eQuerySeq
Aligned part of query sequence.
@ eSubjectTaxId
Subject Tax ID.
@ eSubjectStart
Start of alignment in subject.
@ eQueryGi
Query gi.
@ eSubjAccessionVersion
Subject accession.version.
@ eSubjectSuperKingdom
Subject Super Kingdom.
@ eSubjectAllTitles
All subject deflines.
@ eGaps
Total number of gaps.
@ eScore
Raw score.
@ eBTOP
BLAST traceback operations.
@ eEvalue
Expect value.
@ eQueryFrame
Query frame.
@ eQueryLength
Query sequence length.
@ eQueryCovSeqalign
Query Coverage per Seqalign.
@ eSubjectCommonName
Subject Common Name.
@ eSAM_SubjAsRefSeq
Subject as reference seqs.
@ eSAM_SeqData
Include seq data.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
int i
Structure to store the format specification strings, their description and their corresponding enumer...
string name
Format specification name.
string description
A description of what the above name represents.
string name
Format specification name.
string description
A description of what the above name represents.
#define const
Definition: zconf.h:232
Modified on Wed Jun 19 17:00:25 2024 by modify_doxy.py rev. 669887