NCBI C++ ToolKit
align_format_util.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: align_format_util.hpp 100164 2023-06-28 13:36:01Z merezhuk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  */
27 
28 #ifndef OBJTOOLS_ALIGN_FORMAT___ALIGN_FORMAT_UTIL_HPP
29 #define OBJTOOLS_ALIGN_FORMAT___ALIGN_FORMAT_UTIL_HPP
30 
31 #include <corelib/ncbistre.hpp>
32 #include <corelib/ncbireg.hpp>
36 #include <objects/seq/Bioseq.hpp>
38 #include <objmgr/bioseq_handle.hpp>
42 #include <util/math/matrix.hpp>
46 
47 
48 #ifdef _MSC_VER
49 #define strcasecmp _stricmp
50 #define strdup _strdup
51 # if _MSC_VER < 1900
52 #define snprintf _snprintf
53 # endif
54 #endif
55 
56 /**setting up scope*/
58 
59 class CCgiContext;
60 
61 BEGIN_SCOPE(align_format)
62 
63 
64 ///blast related url
65 
66 ///class info
67 static const char kClassInfo[] = "class=\"info\"";
68 
69 static const char kDefaultProtocol[] = "https:";
70 ///entrez
71 // .ncbirc alias: ENTREZ
72 static const char kEntrezUrl[] = "<a title=\"Show report for <@acc@>\" <@cssInf@>href=\"<@protocol@>//www.ncbi.nlm.nih.gov/<@db@>/<@acc@>?report=genbank&log$=<@log@>&blast_rank=<@blast_rank@>&RID=<@rid@>\" <@target@>>";
73 
74 //.ncbirc alias: ENTREZ_TM
75 static const char kEntrezTMUrl[] = "<@protocol@>//www.ncbi.nlm.nih.gov/<@db@>/<@acc@>?report=genbank&log$=<@log@>&blast_rank=<@blast_rank@>&RID=<@rid@>";
76 
77 //.ncbirc alias: WGS
78 static const char kWGSUrl[] = "<@protocol@>//www.ncbi.nlm.nih.gov/nuccore/<@wgsacc@>";
79 
80 
81 
82 ///trace db
83 //.ncbirc alias: TRACE
84 static const char kTraceUrl[] = "<a title=\"Show report for <@val@>\" <@cssInf@>href=\"<@protocol@>//www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=<@val@>&RID=<@rid@>\">";
85 
86 ///genome button
87 //.ncbirc alias: GENOME_BTN
88 static const char kGenomeButton[] = "<table border=0 width=600 cellpadding=8>\
89 <tr valign=\"top\"><td><a href=\
90 \"//www.ncbi.nlm.nih.gov/mapview/map_search.cgi?taxid=%d&RID=%s&CLIENT=\
91 %s&QUERY_NUMBER=%d\"><img border=0 src=\"html/GenomeView.gif\"></a></td>\
92 <td>Show positions of the BLAST hits in the %s genome \
93 using the Entrez Genomes MapViewer</td></tr></table><p>";
94 
95 ///unigene
96 // .ncbirc alias: UNIGEN
97 static const char kUnigeneUrl[] = "<a class=\"gene\" term=\"<@uid@>\" href=\"<@protocol@>//www.ncbi.nlm.nih.gov/unigene/?<@termParam@>RID=<@rid@>&log$=unigene<@log@>&blast_rank=<@blast_rank@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>\
98 <input type=\"hidden\" value=\"<@label@>\" />";
99 
100 //substitues <@lnk_displ@>
101 static const char kUnigeneImg[] = "<img border=0 height=16 width=16 src=\"images/U.gif\" alt=\"UniGene info linked to <@label@>\">";
102 //For text link <@lnk@> is substituted by formatted url
103 static const string kUnigeneDispl = "<div><@lnk@>-<span class=\"rlLink\">clustered expressed sequence tags</span></div>";
104 
105 ///structure
106 // .ncbirc alias: STRUCTURE_URLhttps://www.ncbi.nlm.nih.gov/Structure/pdb/1AJ9
107 static const char kStructureUrl[] = "<a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/Structure/icn3d/full.html?from=blast&blast_rep_id=<@label@>&query_id=<@queryID@>&command=view+annotations;set+annotation+cdd;set+annotation+site;set+view+detailed+view;select+chain+<@label@>;show+selection&log$=<@log@>&blast_rank=<@blast_rank@>&RID=<@rid@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>";
108 static const char kStructureAlphaFoldUrl[] = "<a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/Structure/icn3d/full.html?from=blast&blast_rep_id=<@label@>&query_id=<@queryID@>&command=view+annotations;set+annotation+cdd;set+annotation+site;set+view+detailed+view;select+chain+!A;show+selection&log$=<@log@>&blast_rank=<@blast_rank@>&RID=<@rid@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>";
109 
110 
111 //substitues <@lnk_displ@>
112 static const char kStructureImg[] = "<img border=0 height=16 width=16 src=\"<@protocol@>//www.ncbi.nlm.nih.gov/Structure/cblast/str_link.gif\" alt=\"Structure related to <@label@>\">";
113 //For text link <@lnk@> is substituted by formatted url
114 static const string kStructureDispl = "<div><@lnk@>-<span class=\"rlLink\">3D structure displays</span></div>";
115 
116 ///structure overview
117 static const char kStructure_Overview[] = "<a href=\"<@protocol@>//www.ncbi.nlm.nih.\
118 gov/Structure/cblast/cblast.cgi?blast_RID=%s&blast_rep_gi=%d&hit=%d&%s\
119 &blast_view=%s&hsp=0&taxname=%s&client=blast\">Related Structures</a>";
120 
121 
122 ///Geo
123 // .ncbirc alias: GEO
124 static const char kGeoUrl[] = "<a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/geoprofiles/?term=genbank[Platform+Reporter+Type]+AND+<@label@>[Reporter+Identifier]&RID=<@rid@>&log$=geo<@log@>&blast_rank=<@blast_rank@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>";
125 
126 
127 //substitues <@lnk_displ@>
128 static const char kGeoImg[] = "<img border=0 height=16 width=16 src=\"images/E.gif\" alt=\"GEO profiles info linked to <@label@>\">";
129 //For text link <@lnk@> is substituted by formatted url
130 static const string kGeoDispl = "<div><@lnk@>-<span class=\"rlLink\">microarray expression data</span></div>";
131 
132 ///Gene
133 // .ncbirc alias: GENE
134 //static const char kGeneUrl[] = "<a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/gene?term=<@label@>[<@uid@>]&RID=<@rid@>&log$=gene<@log@>&blast_rank=<@blast_rank@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a><input type=\"hidden\" value=";
135 static const char kGeneUrl[] = "<a class=\"gene\" term=\"<@uid@>\" href=\"<@protocol@>//www.ncbi.nlm.nih.gov/gene?<@termParam@>RID=<@rid@>&log$=gene<@log@>&blast_rank=<@blast_rank@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>\
136 <input type=\"hidden\" value=\"<@label@>\" />";
137 static const char kGeneTerm[] = "term=<@label@><@uid@>&";
138 //substitues <@lnk_displ@>
139 static const char kGeneImg[] = "<img border=0 height=16 width=16 src=\"images/G.gif\" alt=\"Gene info linked to <@label@>\">";
140 //For text link <@lnk@> is substituted by formatted url
141 static const string kGeneDispl = "<div><@lnk@>-<span class=\"rlLink\">associated gene details</span></div>";
142 
143 ///Bioassay for proteins
144 // .ncbirc alias: BIOASSAY_PROT
145 static const char kBioAssayProtURL[] = "<a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/entrez?db=pcassay&term=<@gi@>[PigGI]&RID=<@rid@>&log$=pcassay<@log@>&blast_rank=<@blast_rank@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>";
146 //substitues <@lnk_displ@>
147 static const char kBioAssayProtImg[] = "<img border=0 height=16 width=16 src=\"images/Bioassay.gif\" alt=\"PubChem BioAssay Info linked to <@label@>\">";
148 
149 ///Bioassay for nucleotides
150 // .ncbirc alias: BIOASSAY_NUC
151 static const char kBioAssayNucURL[] = "<a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/entrez?db=pcassay&term=<@gi@>[RNATargetGI]&RID=<@rid@>&log$=pcassay<@log@>&blast_rank=<@blast_rank@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>";
152 static const char kBioAssayNucImg[] = "<img border=0 height=16 width=16 src=\"images/Bioassay.gif\" alt=\"PubChem BioAssay Info linked to <@label@>\">";
153 
154 //For text link <@lnk@> is substituted by formatted url for both BioAssay Nuc and Prot
155 static const string kBioAssayDispl = "<div><@lnk@>-<span class=\"rlLink\">bioactivity screening</span></div>";
156 
157 ///mapviewer linkout
158 // .ncbirc alias: MAPVIEWER
159 static const char kMapviwerUrl[] = "<a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/mapview/map_search.cgi?direct=on&gbgi=<@gi@>&THE_BLAST_RID=<@rid@>&log$=map<@log@>&blast_rank=<@blast_rank@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>";
160 //substitues <@lnk_displ@>
161 static const char kMapviwerImg[] = "<img border=0 height=16 width=16 src=\"images/M.gif\" alt=\"Genome view with mapviewer linked to <@label@>\">";
162 //For text link <@lnk@> is substituted by formatted url
163 static const string kMapviwerDispl = "<div><@lnk@>-<span class=\"rlLink\">aligned genomic context</span></div>";
164 
165 ///mapviewer linkout
166 //for used for NT/NW/NC
167 static const string kMapviewBlastHitUrl = "<@protocol@>//www.ncbi.nlm.nih.gov/mapview/maps.cgi?maps=blast_set";
168 static const string kMapviewBlastHitParams = "<a href=\"<@user_url@>&db=<@db@>&na=<@is_na@>&gnl=<@gnl@>&gi=<@gi@>&term=<@gi@>[gi]&taxid=<@taxid@>&RID=<@rid@>&QUERY_NUMBER=<@query_number@>&log$=nucl<@log@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>";
169 
170 
171 ///Repr microbial Genome linkout
172 // .ncbirc alias: REPR_MICROBIAL_GENOMES
173 static const char kReprMicrobialGenomesUrl[] = "<a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/genome?term=<@label@>[<@uid@>]&RID=<@rid@>&log$=map<@log@>&blast_rank=<@blast_rank@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>";
174 //substitues <@lnk_displ@>
175 static const char kReprMicrobialGenomesImg[] = "<img border=0 height=16 width=16 src=\"images/L.gif\" alt=\"View genome information for <@label@>\">";
176 //For text link <@lnk@> is substituted by formatted url
177 static const string kReprMicrobialGenomesDispl = "<div><@lnk@>-<span class=\"rlLink\">Genomic Sequence</span></div>";
178 
179 // .ncbirc alias: GENOME_DATA_VIEWER /genome/gdv/browser/?context=blast&id=NC_000019.10&alignid=<@label@>&rid=N9WGPH30015
180 static const char kGenomeDataViewerNucUrl[] = "<span class=\"adNew\">New</span><a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/genome/gdv/browser/?context=blast&id=<@label@>&alignid=<@queryID@>&from=<@from@>&to=<@to@>&rid=<@rid@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>";
181 static const char kGenomeDataViewerProtUrl[] = "<span class=\"adNew\">New</span><a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/genome/gdv/browser/?context=Protein&acc=<@label@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>";
182 static const char kGenomeDataViewerNuclTranscriptUrl[] = "<span class=\"adNew\">New</span><a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/genome/gdv/browser/?context=nucleotide&acc=<@label@>\"<@lnkTitle@><@lnkTarget@>><@lnk_displ@></a>";
183 //substitues <@lnk_displ@>
184 static const char kGenomeDataViewerImg[] = "<div class=\"gdv\">V</div>";
185 //For text link <@lnk@> is substituted by formatted url
186 static const string kGenomeDataViewerDispl = "<div><@lnk@>-<span class=\"rlLink\">aligned genomic context</span></div>";
187 
188 static const char kIdenticalProteinsUrl[] = "<a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/ipg/<@label@>\" title=\"View proteins identical to <@label@>\" <@lnkTarget@>><@lnk_displ@></a>";
189 static const string kIdenticalProteinsDispl = "<div><@lnk@>-<span class=\"rlLink\">Identical proteins to <@label@></span></div>";
190 
191 
192 ///dumpgnl
193 static const char kDownloadUrl[] = "/blast/dumpgnl.cgi";
194 static const char kDownloadLink[] = "<a href=\"<@download_url@>&segs=<@segs@>\"><@lnk_displ@></a>";
195 //substitues <@lnk_displ@>
196 static const char kDownloadImg[] = "<img border=0 height=16 width=16 src=\"images/D.gif\" alt=\"Download subject sequence <@label@> spanning the HSP\">";
197 
198 static const char kSeqViewerUrl[] = "<@protocol@>//www.ncbi.nlm.nih.gov/<@dbtype@>/<@seqid@>?report=graph&rid=<@rid@>[<@seqid@>]&<@seqViewerParams@>&v=<@from@>:<@to@>&appname=ncbiblast&link_loc=<@link_loc@>";
199 static const string kSeqViewerParams = "tracks=[key:sequence_track,name:Sequence,display_name:Sequence,id:STD1,category:Sequence,annots:Sequence,ShowLabel:true][key:gene_model_track,CDSProductFeats:false][key:alignment_track,name:other alignments,annots:NG Alignments|Refseq Alignments|Gnomon Alignments|Unnamed,shown:false]";
200 
201 static const char kSeqViewerUrlNonGi[] = "<@protocol@>//www.ncbi.nlm.nih.gov/projects/sviewer/?RID=<@rid@>&id=<@firstSeqID@>&<@seqViewerParams@>&v=<@from@>:<@to@>&appname=ncbiblast&link_loc=<@link_loc@>";
202 
203 //to test ranges use:
204 //static const char kSeqViewerUrl[] = "//www.ncbi.nlm.nih.gov/<@dbtype@>/<@gi@>?report=graph&rid=<@rid@>&tracks=[key:gene_model_track],[key:alignment_track]&v=<@from@>:<@to@>,<@fromTest@>:<@toTest@>&flip=<@flip@>";
205 
206 static const char kCustomLinkTemplate[] = "<a href=\"<@custom_url@>\" class=\"<@custom_cls@>\" target=\"<@custom_trg@>\" title=\"<@custom_title@>\"><@custom_lnk_displ@></a>";
207 static const char kCustomLinkTitle[]="Show <@custom_report_type@> report for <@seqid@>";
208 
209 static const char kGenericLinkTemplate[] = "<a title=\"Show report for <@seqid@>\" href=\"<@url@>\" ><@seqid@></a>";
210 static const char kGenericLinkMouseoverTmpl[] = "<span class=\"jig-ncbipopper\" data-jigconfig=\"destText:'<@defline@>'\"><a onclick=\"window.open(this.href,'<@target@>')\" href=\"<@url@>\" ><@seqid@></a></span>";
211 
212 
213 ///Sub-sequence
214 // .ncbirc alias: ENTREZ_SUBSEQ
215 
216 static const char kEntrezSubseqUrl[] = "<a href=\"<@protocol@>//www.ncbi.nlm.nih.gov/<@db@>/<@gi@>?report=gbwithparts&from=<@from@>&to=<@to@>&RID=<@rid@>\">";
217 
218 // .ncbirc alias: ENTREZ_SUBSEQ_TM
219 static const char kEntrezSubseqTMUrl[] = "<@protocol@>//www.ncbi.nlm.nih.gov/<@db@>/<@gi@>?report=gbwithparts&from=<@from@>&to=<@to@>&RID=<@rid@>";
220 
221 ///Default linkout order
222 //.ncbirc alias: LINKOUT_ORDER
223 static const char kLinkoutOrderStr[] = "G,U,E,S,B,R,M,V,T";
224 
225 // .ncbirc alias: BL2SEQ
226 //static const char kBl2seqUrl[] = "<a href=\"blast.ncbi.nlm.nih.gov/Blast.cgi?QUERY=<@query@>&SUBJECTS=<@subject@>&EXPECT=10&SHOW_OVERVIEW=on&OLD_BLAST=false&NEW_VIEW=on\">Get TBLASTX alignments</a>";
227 static const char kBl2seqUrl[] = "<a href=\"blast.ncbi.nlm.nih.gov/Blast.cgi?QUERY=<@query@>&SUBJECTS=<@subject@>&PROGRAM=tblastx&EXPECT=10&CMD=request&SHOW_OVERVIEW=on&OLD_BLAST=false&NEW_VIEW=on\">Get TBLASTX alignments</a>";
228 
229 
230 
231 // .ncbirc alias: GETSEQ_SUB_FRM_0
232 // .ncbirc alias: GETSEQ_SUB_FRM_1
233 static const char k_GetSeqSubmitForm_0[] = "<FORM method=\"post\" \
234 action=\"//www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?SUBMIT=y\" \
235 name=\"%s%d\"><input type=button value=\"Get selected sequences\" \
236 onClick=\"finalSubmit(%d, 'getSeqAlignment%d', 'getSeqGi', '%s%d', %d)\"><input \
237 type=\"hidden\" name=\"db\" value=\"\"><input type=\"hidden\" name=\"term\" \
238 value=\"\"><input type=\"hidden\" name=\"doptcmdl\" value=\"docsum\"><input \
239 type=\"hidden\" name=\"cmd\" value=\"search\"></form>";
240 static const char k_GetSeqSubmitForm_1[] = "<FORM method=\"POST\" \
241 action=\"//www.ncbi.nlm.nih.gov/Traces/trace.cgi\" \
242 name=\"%s%d\"><input type=button value=\"Get selected sequences\" \
243 onClick=\"finalSubmit(%d, 'getSeqAlignment%d', 'getSeqGi', '%s%d', %d)\"><input \
244 type=\"hidden\" name=\"val\" value=\"\"><input \
245 type=\"hidden\" name=\"cmd\" value=\"retrieve\"></form>";
246 
247 // .ncbirc alias: GETSEQ_SEL_FRM
248 // 'getSeqAlignment%d', 'getSeqGi')\"></form>";
250 type=\"button\" value=\"Select all\" onClick=\"handleCheckAll('select', \
251 'getSeqAlignment%d', 'getSeqGi')\"></form></td><td><FORM><input \
252 type=\"button\" value=\"Deselect all\" onClick=\"handleCheckAll('deselect', \
253 'getSeqAlignment%d', 'getSeqGi')\"></form>";
254 
255 // .ncbirc alias: TREEVIEW_FRM
256 static const char k_GetTreeViewForm[] = "<FORM method=\"post\" \
257 action=\"//www.ncbi.nlm.nih.gov/blast/treeview/blast_tree_view.cgi?request=page&rid=%s&queryID=%s&distmode=on\" \
258 name=\"tree%s%d\" target=\"trv%s\"> \
259 <input type=button value=\"Distance tree of results\" onClick=\"extractCheckedSeq('getSeqAlignment%d', 'getSeqGi', 'tree%s%d')\"> \
260 <input type=\"hidden\" name=\"sequenceSet\" value=\"\"><input type=\"hidden\" name=\"screenWidth\" value=\"\"></form>";
261 
262 // .ncbirc alias: GENE_INFO
264 "<@protocol@>//www.ncbi.nlm.nih.gov/sites/entrez?db=gene&cmd=search&term=%d&RID=%s&log$=geneexplicit%s&blast_rank=%d";
265 
266 // .ncbirc alias: TREEVIEW_CGI
267 static const char kGetTreeViewCgi[] = "<@protocol@>//www.ncbi.nlm.nih.gov/blast/treeview/blast_tree_view.cgi";
268 // .ncbirc alias: ENTREZ_QUERY_CGI
269 static const char kEntrezQueryCgi[] = "<@protocol@>//www.ncbi.nlm.nih.gov/entrez/query.fcgi";
270 // .ncbirc alias: TRACE_CGI
271 static const char kTraceCgi[] = "<@protocol@>//www.ncbi.nlm.nih.gov/Traces/trace.cgi";
272 // .ncbirc alias: MAP_SEARCH_CGI
273 static const char kMapSearchCgi[] = "<@protocol@>//www.ncbi.nlm.nih.gov/mapview/map_search.cgi";
274 // .ncbirc alias: CBLAST_CGI
275 static const char kCBlastCgi[] = "<@protocol@>//www.ncbi.nlm.nih.gov/Structure/cblast/cblast.cgi";
276 // .ncbirc alias: ENTREZ_VIEWER_CGI
277 static const char kEntrezViewerCgi[] = "<@protocol@>//www.ncbi.nlm.nih.gov/entrez/viewer.fcgi";
278 // .ncbirc alias: BL2SEQ_WBLAST_CGI
279 static const char kBl2SeqWBlastCgi[] = "<@protocol@>//www.ncbi.nlm.nih.gov/blast/bl2seq/wblast2.cgi";
280 // .ncbirc alias: ENTREZ_SITES_CGI
281 static const char kEntrezSitesCgi[] = "<@protocol@>//www.ncbi.nlm.nih.gov/sites/entrez";
282 
283 
284 /// create map source of all static URL's using previously defined pairs
285 /// this map should be in alphabetical order!!!
286 typedef SStaticPair<const char*, const char*> TTagUrl;
288  { "BIOASSAY_NUC", kBioAssayNucURL },
289  { "BIOASSAY_PROT", kBioAssayProtURL },
290  { "BL2SEQ", kBl2seqUrl },
291  { "BL2SEQ_WBLAST_CGI", kBl2SeqWBlastCgi },
292  { "CBLAST_CGI", kCBlastCgi },
293  { "ENTREZ", kEntrezUrl },
294  { "ENTREZ_QUERY_CGI", kEntrezQueryCgi },
295  { "ENTREZ_SITES_CGI", kEntrezSitesCgi },
296  { "ENTREZ_SUBSEQ", kEntrezSubseqUrl },
297  { "ENTREZ_SUBSEQ_TM", kEntrezSubseqTMUrl },
298  { "ENTREZ_TM", kEntrezTMUrl },
299  { "ENTREZ_VIEWER_CGI", kEntrezViewerCgi },
300  { "GENE", kGeneUrl },
301  { "GENE_INFO", kGeneInfoUrl },
302  { "GENOME_BTN", kGenomeButton },
303  { "GENOME_DATA_VIEWER_NUC", kGenomeDataViewerNucUrl },
304  { "GENOME_DATA_VIEWER_PROT", kGenomeDataViewerProtUrl },
305  { "GENOME_DATA_VIEWER_TRANSCR", kGenomeDataViewerNuclTranscriptUrl },
306  { "GEO", kGeoUrl },
307  { "GETSEQ_SEL_FRM", k_GetSeqSelectForm },
308  { "GETSEQ_SUB_FRM_0", k_GetSeqSubmitForm_0 },
309  { "GETSEQ_SUB_FRM_1", k_GetSeqSubmitForm_1 },
310  { "MAPVIEWER", kMapviwerUrl },
311  { "MAP_SEARCH_CGI", kMapSearchCgi },
312  { "REPR_MICROBIAL_GENOMES", kReprMicrobialGenomesUrl},
313  { "STRUCTURE_OVW", kStructure_Overview },
314  { "STRUCTURE_URL", kStructureUrl },
315  { "TRACE", kTraceUrl },
316  { "TRACE_CGI", kTraceCgi },
317  { "TREEVIEW_CGI", kGetTreeViewCgi },
318  { "TREEVIEW_FRM", k_GetTreeViewForm },
319  { "UNIGEN", kUnigeneUrl },
320  { "WGS", kWGSUrl },
321 };
322 
323 #ifndef NCBI_SWIG
326 #endif
327 
328 
329 #ifndef DIM
330 /// Calculates the dimensions of a static array
331 #define DIM(static_array) (sizeof(static_array)/sizeof(*static_array))
332 #endif
333 
334 ///protein matrix define
335 enum {
336  ePMatrixSize = 23 // number of amino acid for matrix
337 };
338 
339 /// Number of ASCII characters for populating matrix columns
340 const int k_NumAsciiChar = 128;
341 
342 /// Residues
344 extern const char k_PSymbol[];
345 
346 /** This class contains misc functions for displaying BLAST results. */
347 
349 {
350 public:
351 
352  /// The string containing the message that no hits were found
353  static const char kNoHitsFound[];
354 
355  ///Error info structure
356  struct SBlastError {
358  string message;
359  };
360 
361  ///Blast database info
362  struct SDbInfo {
364  string name;
365  string definition;
366  string date;
369  bool subset;
370  /// Filtering algorithm ID used in BLAST search
372  /// Filtering algorithm options used in BLAST search
374 
375  /// Default constructor
377  is_protein = true;
378  name = definition = date = "Unknown";
379  total_length = 0;
380  number_seqs = 0;
381  subset = false;
382  }
383  };
384 
385  ///Structure that holds information needed for creation seqID URL in descriptions
386  /// and alignments
387  struct SSeqURLInfo {
388  string user_url; ///< user url TOOL_URL from .ncbirc
389  string blastType; ///< blast type refer to blobj->adm->trace->created_by
390  bool isDbNa; ///< bool indicating if the database is nucleotide or not
391  string database; ///< name of the database
392  string rid; ///< blast RID
393  int queryNumber; ///< the query number
394  TGi gi; ///< gi to use
395  string accession; ///< accession
396  int linkout; ///< linkout flag
397  int blast_rank; ///< index of the current alignment
398  bool isAlignLink; ///< bool indicating if link is in alignment section
399  bool new_win; ///< bool indicating if click of the url will open a new window
400  CRange<TSeqPos> seqRange;///< sequence range
401  bool flip; ///< flip sequence in case of opposite strands
402  TTaxId taxid; ///< taxid
403  bool addCssInfo; ///< bool indicating that css info should be added
404  string segs; ///< string containing align segments in the the following format seg1Start-seg1End,seg2Start-seg2End
405  string resourcesUrl; ///< URL(s) to other resources from .ncbirc
406  bool useTemplates; ///< bool indicating that templates should be used when contsructing links
407  bool advancedView; ///< bool indicating that advanced view design option should be used when contsructing links
408  string seqUrl; ///< sequence URL created
409  string defline; ///< sequence defline
411 
412  /// Constructor
413  SSeqURLInfo(string usurl,string bt, bool isnuc,string db, string rid_in,int qn,
414  TGi gi_in, string acc, int lnk, int blrk,bool alnLink, bool nw, CRange<TSeqPos> range = CRange<TSeqPos>(0,0),bool flp = false,
415  TTaxId txid = INVALID_TAX_ID,bool addCssInf = false,string seqSegs = "",string resUrl = "",bool useTmpl = false, bool advView = false)
416  : user_url(usurl),blastType(bt), isDbNa(isnuc), database(db),rid(rid_in),
417  queryNumber(qn), gi(gi_in), accession(acc), linkout(lnk),blast_rank(blrk),isAlignLink(alnLink),
418  new_win(nw),seqRange(range),flip(flp),taxid (txid),addCssInfo(addCssInf),segs(seqSegs),
419  resourcesUrl(resUrl),useTemplates(useTmpl),advancedView(advView){}
420 
421  };
422 
423  struct SLinkoutInfo {
424  string rid;
425  string cdd_rid;
426  string entrez_term;
427  bool is_na;
428  string database;
430  string user_url;
432 
435 
437  int cur_align;
438  string taxName;
439  string gnl;
441 
442  string linkoutOrder;
445 
446  string giList;
447  string labelList;
449 
450  string queryID;
451 
452 
453  void Init(string rid_in, string cdd_rid_in, string entrez_term_in, bool is_na_in,
454  string database_in, int query_number_in, string user_url_in, string preComputedResID_in,
455  string linkoutOrder_in,
456  bool structure_linkout_as_group_in = false, bool for_alignment_in = true) {
457  rid = rid_in;
458  cdd_rid = cdd_rid_in;
459  entrez_term = entrez_term_in;
460  is_na = is_na_in;
461  database = database_in;
462  query_number = query_number_in;
463  user_url = user_url_in;
464  preComputedResID = preComputedResID_in;
465 
466  linkoutOrder = linkoutOrder_in;
467  //linkoutdb = linkoutdb_in;
468  //mv_build_name = mv_build_name_in;
469 
470 
471  structure_linkout_as_group = structure_linkout_as_group_in;
472  for_alignment = for_alignment_in;
473  }
474 
475  void Init(string rid_in, string cdd_rid_in, string entrez_term_in, bool is_na_in,
476  string database_in, int query_number_in, string user_url_in, string preComputedResID_in,
477  string linkoutOrder_in, ILinkoutDB* linkoutdb_in, string mv_build_name_in,
478  bool structure_linkout_as_group_in = false, bool for_alignment_in = true) {
479 
480  Init(rid_in,cdd_rid_in,entrez_term_in,is_na_in,
481  database_in,query_number_in, user_url_in,preComputedResID_in,
482  linkoutOrder_in,
483  structure_linkout_as_group_in, for_alignment_in);
484 
485  linkoutdb = linkoutdb_in;
486  mv_build_name = mv_build_name_in;
487  }
488  };
489 
490  ///Structure that holds information for all hits of one subject in Seq Align Set
492  //values used in descriptions display
493  double evalue; //lowest evalue in Seq Align Set , displayed on the results page as 'Evalue',
494  double bit_score; //Highest bit_score in Seq Align Set, displayed on the results page as 'Max Score'
495  double total_bit_score; //total bit_score for Seq Align Set, displayed on the results page as 'Total Score'
496  int percent_coverage; //percent coverage for Seq Align Set, displayed on the results page as 'Query coverage'
497  //calulated as 100*master_covered_length/queryLength
498  double percent_identity; //highest percent identity in Seq Align Set, displayed on the results page as 'Max ident'
499  //calulated as 100*match/align_length
500 
501  int hspNum; //hsp number, number of hits
502  Int8 totalLen; //total alignment length
503 
504  int raw_score; //raw score, read from the 'score' in first align in Seq Aln Set, not used
505  list<TGi> use_this_gi; //Limit formatting by these GI's, read from the first align in Seq Aln Set
506  list<string> use_this_seq; //Limit formatting by these seqids, read from the first align in Seq Aln Set
507  int sum_n; //sum_n in score block , read from the first align in Seq Aln Set
508 
509  int master_covered_length; //total query length covered by alignment - calculated, used calculate percent_coverage
510 
511  int match; //number of matches in the alignment with highest percent identity,used to calulate percent_identity
512  int align_length; //length of the alignment with highest percent identity,used to calulate percent_identity
513 
514  CConstRef<objects::CSeq_id> id; //subject seq id
515  CRange<TSeqPos> subjRange; //total subject sequence range- calculated
516  bool flip; //indicates opposite strands in the first seq align
517  };
518 
519  enum DbSortOrder {
520  eNonGenomicFirst = 1,
521  eGenomicFirst
522  };
523 
524  enum HitOrder {
525  eEvalue = 0,
529  eQueryCoverage
530  };
531 
532  enum HspOrder {
533  eHspEvalue = 0,
538  };
539 
541  eLinkTypeDefault = 0,
542  eLinkTypeMapViewer = (1 << 0),
543  eLinkTypeSeqViewer = (1 << 1),
544  eDownLoadSeq = (1 << 2),
545  eLinkTypeGenLinks = (1 << 3),
546  eLinkTypeTraceLinks = (1 << 4),
547  eLinkTypeSRALinks = (1 << 5),
548  eLinkTypeSNPLinks = (1 << 6),
549  eLinkTypeGSFastaLinks = (1 << 7)
550  };
551 
552  ///db type
553  enum DbType {
554  eDbGi = 0,
556  eDbTypeNotSet
557  };
558 
559  //Formatting flag for adding spaces
561  eSpacePosToCenter = (1 << 0), ///place the param in the center of the string
562  eSpacePosAtLineStart = (1 << 1), ///add spaces at the begining of the string
563  eSpacePosAtLineEnd = (1 << 2), ///add spaces at the end of the string
564  eAddEOLAtLineStart = (1 << 3), ///add EOL at the beginning of the string
565  eAddEOLAtLineEnd = (1 << 4) ///add EOL at the end of the string
566  };
567 
568 
569  ///Output blast errors
570  ///@param error_return: list of errors to report
571  ///@param error_post: post to stderr or not
572  ///@param out: stream to ouput
573  ///
574  static void BlastPrintError(list<SBlastError>& error_return,
575  bool error_post, CNcbiOstream& out);
576 
577  ///Print out misc information separated by "~"
578  ///@param str: input information
579  ///@param line_len: length of each line desired
580  ///@param out: stream to ouput
581  ///
582  static void PrintTildeSepLines(string str, size_t line_len,
583  CNcbiOstream& out);
584 
585  /// Fills one BLAST dbinfo structure.
586  /// Intended for use in bl2seq mode with multiple subject sequences.
587  /// database title set to "User specified sequence set"
588  /// @param retval return vector [in/out]
589  /// @param is_protein are these databases protein? [in]
590  /// @param numSeqs number of sequecnes in set [in]
591  /// @param numLetters size of the set [in]
592  /// @param tag Hint to user about subject sequences [in]
593  static void FillScanModeBlastDbInfo(vector<SDbInfo>& retval,
594  bool is_protein, int numSeqs, Int8 numLetters, string& tag);
595 
596  /// Retrieve BLAST database information for presentation in BLAST report
597  /// @param dbname space-separated list of BLAST database names [in]
598  /// @param is_protein are these databases protein? [in]
599  /// @param dbfilt_algorithm BLAST database filtering algorithm ID (if
600  /// applicable), use -1 if not applicable [in]
601  /// @param is_remote is this a remote BLAST search? [in]
602  static void GetBlastDbInfo(vector<SDbInfo>& retval,
603  const string& blastdb_names, bool is_protein,
604  int dbfilt_algorithm,
605  bool is_remote = false);
606 
607  ///Print out blast database information
608  ///@param dbinfo_list: database info list
609  ///@param line_length: length of each line desired
610  ///@param out: stream to ouput
611  ///@param top Is this top or bottom part of the BLAST report?
612  static void PrintDbReport(const vector<SDbInfo>& dbinfo_list,
613  size_t line_length,
614  CNcbiOstream& out,
615  bool top=false);
616 
617  ///Print out kappa, lamda blast parameters
618  ///@param lambda
619  ///@param k
620  ///@param h
621  ///@param line_len length of each line desired
622  ///@param out stream to ouput
623  ///@param gapped gapped alignment?
624  ///@param gbp Gumbel parameters
625  static void PrintKAParameters(double lambda, double k, double h,
626  size_t line_len, CNcbiOstream& out,
627  bool gapped, const Blast_GumbelBlk *gbp=NULL);
628 
629  /// Returns a full '|'-delimited Seq-id string for a Bioseq.
630  /// @param cbs Bioseq object [in]
631  /// @param believe_local_id Should local ids be parsed? [in]
632  static string
633  GetSeqIdString(const objects::CBioseq& cbs, bool believe_local_id=true);
634 
635  /// Returns a full '|'-delimited Seq-id string for a a list of seq-ids.
636  /// @param ids lsit of seq-ids [in]
637  /// @param believe_local_id Should local ids be parsed? [in]
638  static string
639  GetSeqIdString(const list<CRef<objects::CSeq_id> > & ids, bool believe_local_id);
640 
641  /// Returns a full description for a Bioseq, concatenating all available
642  /// titles.
643  /// @param cbs Bioseq object [in]
644  static string GetSeqDescrString(const objects::CBioseq& cbs);
645 
646  ///Print out blast query info
647  /// @param cbs bioseq of interest
648  /// @param line_len length of each line desired
649  /// @param out stream to ouput
650  /// @param believe_query use user id or not
651  /// @param html in html format or not [in]
652  /// @param tabular Is this done for tabular formatting? [in]
653  /// @param rid the RID to acknowledge (if not empty) [in]
654  ///
655  static void AcknowledgeBlastQuery(const objects::CBioseq& cbs, size_t line_len,
656  CNcbiOstream& out, bool believe_query,
657  bool html, bool tabular=false,
658  const string& rid = kEmptyStr);
659 
660  /// Print out blast subject info
661  /// @param cbs bioseq of interest
662  /// @param line_len length of each line desired
663  /// @param out stream to ouput
664  /// @param believe_query use user id or not
665  /// @param html in html format or not [in]
666  /// @param tabular Is this done for tabular formatting? [in]
667  ///
668  static void AcknowledgeBlastSubject(const objects::CBioseq& cbs, size_t line_len,
669  CNcbiOstream& out, bool believe_query,
670  bool html, bool tabular=false);
671 
672  /// Retrieve a scoring matrix for the provided matrix name
673  /// @return the requested matrix (indexed using ASCII characters) or an empty
674  /// matrix if matrix_name is invalid or can't be found.
675  static void GetAsciiProteinMatrix(const char* matrix_name,
676  CNcbiMatrix<int>& retval);
677 private:
678  static void x_AcknowledgeBlastSequence(const objects::CBioseq& cbs,
679  size_t line_len,
680  CNcbiOstream& out,
681  bool believe_query,
682  bool html,
683  const string& label,
684  bool tabular /* = false */,
685  const string& rid /* = kEmptyStr*/);
686 public:
687 
688  /// Prints out PHI-BLAST info for header (or footer)
689  /// @param num_patterns number of times pattern appears in query [in]
690  /// @param pattern the pattern used [in]
691  /// @param prob probability of pattern [in]
692  /// @param offsets vector of pattern offsets in query [in]
693  /// @param out stream to ouput [in]
694  static void PrintPhiInfo(int num_patterns, const string& pattern,
695  double prob,
696  vector<int>& offsets,
697  CNcbiOstream& out);
698 
699  ///Extract score info from blast alingment
700  ///@param aln: alignment to extract score info from
701  ///@param score: place to extract the raw score to
702  ///@param bits: place to extract the bit score to
703  ///@param evalue: place to extract the e value to
704  ///@param sum_n: place to extract the sum_n to
705  ///@param num_ident: place to extract the num_ident to
706  ///@param use_this_gi: place to extract use_this_gi to
707  ///
708  static void GetAlnScores(const objects::CSeq_align& aln,
709  int& score,
710  double& bits,
711  double& evalue,
712  int& sum_n,
713  int& num_ident,
714  list<TGi>& use_this_gi);
715 
716  ///Extract score info from blast alingment
717  ///@param aln: alignment to extract score info from
718  ///@param score: place to extract the raw score to
719  ///@param bits: place to extract the bit score to
720  ///@param evalue: place to extract the e value to
721  ///@param sum_n: place to extract the sum_n to
722  ///@param num_ident: place to extract the num_ident to
723  ///@param use_this_seqid: place to extract use_this_seqid to
724  ///
725  static void GetAlnScores(const objects::CSeq_align& aln,
726  int& score,
727  double& bits,
728  double& evalue,
729  int& sum_n,
730  int& num_ident,
731  list<string>& use_this_seq);
732 
733  ///Extract score info from blast alingment
734  /// Second version that fetches compositional adjustment integer
735  ///@param aln: alignment to extract score info from
736  ///@param score: place to extract the raw score to
737  ///@param bits: place to extract the bit score to
738  ///@param evalue: place to extract the e value to
739  ///@param sum_n: place to extract the sum_n to
740  ///@param num_ident: place to extract the num_ident to
741  ///@param use_this_gi: place to extract use_this_gi to
742  ///@param comp_adj_method: composition based statistics method [out]
743  ///
744  static void GetAlnScores(const objects::CSeq_align& aln,
745  int& score,
746  double& bits,
747  double& evalue,
748  int& sum_n,
749  int& num_ident,
750  list<TGi>& use_this_gi,
751  int& comp_adj_method);
752 
753  ///Extract score info from blast alingment
754  /// Second version that fetches compositional adjustment integer
755  ///@param aln: alignment to extract score info from
756  ///@param score: place to extract the raw score to
757  ///@param bits: place to extract the bit score to
758  ///@param evalue: place to extract the e value to
759  ///@param sum_n: place to extract the sum_n to
760  ///@param num_ident: place to extract the num_ident to
761  ///@param use_this_seq: place to extract use_this_seq to
762  ///@param comp_adj_method: composition based statistics method [out]
763  ///
764  static void GetAlnScores(const objects::CSeq_align& aln,
765  int& score,
766  double& bits,
767  double& evalue,
768  int& sum_n,
769  int& num_ident,
770  list<string>& use_this_seq,
771  int& comp_adj_method);
772 
773  ///Extract use_this_gi info from blast alingment
774  ///@param aln: alignment to extract score info from
775  ///@param use_this_gi: place to extract use_this_gi to
776  static void GetUseThisSequence(const objects::CSeq_align& aln,list<TGi>& use_this_gi);
777 
778  ///Extract use_this_seq info from blast alingment
779  ///@param aln: alignment to extract score info from
780  ///@param use_this_seq: place to extract use_this_seq to
781  static void GetUseThisSequence(const objects::CSeq_align& aln,list<string>& use_this_seq);
782 
783  ///Matches text seqID or gi with the list of seqIds or gis
784  ///@param cur_gi: gi to match
785  ///@param seqID: CSeq_id to extract label info to match
786  ///@param use_this_seq: list<string> containg gi:sssssss or seqid:sssssssss
787  ///@param isGiList: bool= true if use_this_seq conatins gi list
788  ///@ret: bool=true if the match is found
789  static bool MatchSeqInSeqList(TGi cur_gi, CRef<objects::CSeq_id> &seqID, list<string> &use_this_seq,bool *isGiList = NULL);
790 
791  ///Matches string of seqIDs (gis or text seqID)
792  ///@param alnSeqID: CSeq_id to extract label info to match
793  ///@param use_this_seq: list<string> containg gi:sssssss or seqid:sssssssss
794  ///@param seqList: string contaning comma separated seqIds
795  ///@ret: bool=true if the match is found
796  static bool MatchSeqInSeqList(CConstRef<objects::CSeq_id> &alnSeqID, list<string> &use_this_seq,vector <string> &seqList);
797 
798  static bool MatchSeqInUseThisSeqList(list<string> &use_this_seq, string textSeqIDToMatch);
799 
800  ///Check if use_this_seq conatins gi list
801  ///@param use_this_seq: list<string> containg gi:sssssss or seqid:sssssssss
802  ///@ret: bool= true if use_this_seq conatins gi list
803  static bool IsGiList(list<string> &use_this_seq);
804 
805  ///Convert if string gi list to TGi list
806  ///@param use_this_seq: list<string> containg gi:sssssss
807  ///@ret: list<TGi> containin sssssss
808  static list<TGi> StringGiToNumGiList(list<string> &use_this_seq);
809 
810  ///Add the specified white space
811  ///@param out: ostream to add white space
812  ///@param number: the number of white spaces desired
813  ///
814  static void AddSpace(CNcbiOstream& out, int number);
815 
816  ///Return ID for GNL label
817  ///@param dtg: dbtag to build label from
818  static string GetGnlID(const objects::CDbtag& dtg);
819 
820  ///Return a label for an ID
821  /// Tries to recreate behavior of GetLabel before a change that
822  /// prepends "ti|" to trace IDs
823  ///@param id CSeqId: to build label from
824  ///@param with_version bool: include version to the label
825  static string GetLabel(CConstRef<objects::CSeq_id> id, bool with_version = false);
826 
827  ///format evalue and bit_score
828  ///@param evalue: e value
829  ///@param bit_score: bit score
830  ///@param total_bit_score: total bit score(??)
831  ///@param raw_score: raw score (e.g., BLOSUM score)
832  ///@param evalue_str: variable to store the formatted evalue
833  ///@param bit_score_str: variable to store the formatted bit score
834  ///@param raw_score_str: variable to store the formatted raw score
835  ///
836  static void GetScoreString(double evalue,
837  double bit_score,
838  double total_bit_score,
839  int raw_score,
840  string& evalue_str,
841  string& bit_score_str,
842  string& total_bit_score_str,
843  string& raw_score_str);
844 
845  ///Fill new alignset containing the specified number of alignments with
846  ///unique slave seqids. Note no new seqaligns were created. It just
847  ///references the original seqalign
848  ///@param source_aln: the original alnset
849  ///@param new_aln: the new alnset
850  ///@param num: the specified number
851  ///@return actual number of subject sequences
852  static void PruneSeqalign(const objects::CSeq_align_set& source_aln,
853  objects::CSeq_align_set& new_aln,
854  unsigned int num = static_cast<unsigned int>(kDfltArgNumAlignments));
855 
856  ///Calculate number of subject sequnces in alignment limitted by num
857  ///@param source_aln: the original alnset
858  ///@param num: the specified number of subj sequences to consider
859  ///@return actual number of subject seqs in alignment
860  static unsigned int GetSubjectsNumber(const objects::CSeq_align_set& source_aln,
861  unsigned int num);
862 
863  ///Fill new alignset containing the specified number of alignments
864  ///plus the rest of alignments for the last subget seq
865  ///unique slave seqids. Note no new seqaligns were created. It just
866  ///references the original seqalign
867  ///
868  ///@param source_aln: the original alnset
869  ///@param new_aln: the new alnset
870  ///@param num: the specified number
871  ///
872  static void PruneSeqalignAll(const objects::CSeq_align_set& source_aln,
873  objects::CSeq_align_set& new_aln,
874  unsigned int number);
875 
876  /// Count alignment length, number of gap openings and total number of gaps
877  /// in a single alignment.
878  /// @param salv Object representing one alignment (HSP) [in]
879  /// @param align_length Total length of this alignment [out]
880  /// @param num_gaps Total number of insertions and deletions in this
881  /// alignment [out]
882  /// @param num_gap_opens Number of gap segments in the alignment [out]
883  static void GetAlignLengths(objects::CAlnVec& salv, int& align_length,
884  int& num_gaps, int& num_gap_opens);
885 
886  /// If a Seq-align-set contains Seq-aligns with discontinuous type segments,
887  /// extract the underlying Seq-aligns and put them all in a flat
888  /// Seq-align-set.
889  /// @param source Original Seq-align-set
890  /// @param target Resulting Seq-align-set
891  static void ExtractSeqalignSetFromDiscSegs(objects::CSeq_align_set& target,
892  const objects::CSeq_align_set& source);
893 
894  ///Create denseseg representation for densediag seqalign
895  ///@param aln: the input densediag seqalign
896  ///@return: the new denseseg seqalign
897  static CRef<objects::CSeq_align> CreateDensegFromDendiag(const objects::CSeq_align& aln);
898 
899  ///return the tax id for a seqid
900  ///@param id: seq id
901  ///@param scope: scope to fetch this sequence
902  ///
903  static TTaxId GetTaxidForSeqid(const objects::CSeq_id& id, objects::CScope& scope);
904 
905  ///return the frame for a given strand
906  ///Note that start is zero bases. It returns frame +/-(1-3).
907  ///0 indicates error
908  ///@param start: sequence start position
909  ///@param strand: strand
910  ///@param id: the seqid
911  ///@return: the frame
912  ///
913  static int GetFrame (int start, objects::ENa_strand strand, const objects::CBioseq_Handle& handle);
914 
915  ///return the comparison result: 1st >= 2nd => true, false otherwise
916  ///@param info1
917  ///@param info2
918  ///@return: the result
919  ///
920  static bool SortHitByTotalScoreDescending(CRef<objects::CSeq_align_set> const& info1,
921  CRef<objects::CSeq_align_set> const& info2);
922 
923  static bool
924  SortHitByMasterCoverageDescending(CRef<objects::CSeq_align_set> const& info1,
925  CRef<objects::CSeq_align_set> const& info2);
926 
927 
928  ///group hsp's with the same id togeter
929  ///@param target: the result list
930  ///@param source: the source list
931  ///
932  static void HspListToHitList(list< CRef<objects::CSeq_align_set> >& target,
933  const objects::CSeq_align_set& source);
934 
935  ///extract all nested hsp's into a list
936  ///@param source: the source list
937  ///@return the list of hsp's
938  ///
939  static CRef<objects::CSeq_align_set> HitListToHspList(list< CRef<objects::CSeq_align_set> >& source);
940 
941  ///extract seq_align_set coreesponding to seqid list
942  ///@param all_aln_set: CSeq_align_set source/target list
943  ///@param alignSeqList: string of seqIds separated by comma
944  ///
945  static void ExtractSeqAlignForSeqList(CRef<objects::CSeq_align_set> &all_aln_set, string alignSeqList);
946 
947  ///return the custom url (such as mapview)
948  ///@param ids: the id list
949  ///@param taxid
950  ///@param user_url: the custom url
951  ///@param database
952  ///@param db_is_na: is db nucleotide?
953  ///@param rid: blast rid
954  ///@param query_number: the blast query number.
955  ///@param for_alignment: is the URL generated for an alignment or a top defline?
956  ///
957  static string BuildUserUrl(const objects::CBioseq::TId& ids, TTaxId taxid, string user_url,
958  string database, bool db_is_na, string rid,
959  int query_number, bool for_alignment);
960 
961  ///return the SRA (Short Read Archive) URL
962  ///@param ids: the id list
963  ///@param user_url: the URL of SRA cgi
964  ///@return newly constructed SRA URL pointing to the identified spot
965  ///
966  static string BuildSRAUrl(const objects::CBioseq::TId& ids, string user_url);
967 
968 
969  ///calculate the percent identity for a seqalign
970  ///@param aln" the seqalign
971  ///@param scope: scope to fetch sequences
972  ///@do_translation: is this a translated nuc to nuc alignment?
973  ///@return: the identity
974  ///
975  static double GetPercentIdentity(const objects::CSeq_align& aln, objects::CScope& scope,
976  bool do_translation);
977 
978  ///get the alignment length
979  ///@param aln" the seqalign
980  ///@do_translation: is this a translated nuc to nuc alignment?
981  ///@return: the alignment length
982  ///
983  static int GetAlignmentLength(const objects::CSeq_align& aln, bool do_translation);
984 
985  ///sort a list of seqalign set by alignment identity
986  ///@param seqalign_hit_list: list to be sorted.
987  ///@param do_translation: is this a translated nuc to nuc alignment?
988  ///
989  static void SortHitByPercentIdentityDescending(list< CRef<objects::CSeq_align_set> >&
990  seqalign_hit_list,
991  bool do_translation);
992 
993  ///sorting function for sorting a list of seqalign set by descending identity
994  ///@param info1: the first element
995  ///@param info2: the second element
996  ///@return: info1 >= info2?
997  ///
998  static bool SortHitByPercentIdentityDescendingEx
999  (const CRef<objects::CSeq_align_set>& info1,
1000  const CRef<objects::CSeq_align_set>& info2);
1001 
1002  ///sorting function for sorting a list of seqalign by descending identity
1003  ///@param info1: the first element
1004  ///@param info2: the second element
1005  ///@return: info1 >= info2?
1006  ///
1007  static bool SortHspByPercentIdentityDescending
1008  (const CRef<objects::CSeq_align>& info1,
1009  const CRef<objects::CSeq_align>& info2);
1010 
1011  ///sorting function for sorting a list of seqalign by ascending mater
1012  ///start position
1013  ///@param info1: the first element
1014  ///@param info2: the second element
1015  ///@return: info1 >= info2?
1016  ///
1018  const CRef<objects::CSeq_align>& info2);
1019 
1020  static bool SortHspBySubjectStartAscending(const CRef<objects::CSeq_align>& info1,
1021  const CRef<objects::CSeq_align>& info2);
1022 
1023  static bool SortHitByScoreDescending
1024  (const CRef<objects::CSeq_align_set>& info1,
1025  const CRef<objects::CSeq_align_set>& info2);
1026 
1027 
1028  static bool SortHspByScoreDescending(const CRef<objects::CSeq_align>& info1,
1029  const CRef<objects::CSeq_align>& info2);
1030 
1031  ///sorting function for sorting a list of seqalign set by ascending mater
1032  ///start position
1033  ///@param info1: the first element
1034  ///@param info2: the second element
1035  ///@return: info1 >= info2?
1036  ///
1037  static bool SortHitByMasterStartAscending(CRef<objects::CSeq_align_set>& info1,
1039 
1040  ///sort a list of seqalign set by molecular type
1041  ///@param seqalign_hit_list: list to be sorted.
1042  ///@param scope: scope to fetch sequence
1043  ///
1044  static void
1045  SortHitByMolecularType(list< CRef<objects::CSeq_align_set> >& seqalign_hit_list,
1046  objects::CScope& scope, ILinkoutDB* linkoutdb,
1047  const string& mv_build_name);
1048 
1049  ///actual sorting function for SortHitByMolecularType
1050  ///@param info1: the first element
1051  ///@param info2: the second element
1052  ///@return: info1 >= info2?
1053  ///
1054  //static bool SortHitByMolecularTypeEx (const CRef<objects::CSeq_align_set>& info1,
1055  // const CRef<objects::CSeq_align_set>& info2);
1056 
1057  static void
1058  SortHit(list< CRef<objects::CSeq_align_set> >& seqalign_hit_list,
1059  bool do_translation, objects::CScope& scope, int sort_method,
1060  ILinkoutDB* linkoutdb, const string& mv_build_name);
1061 
1062  static void SplitSeqalignByMolecularType(vector< CRef<objects::CSeq_align_set> >&
1063  target,
1064  int sort_method,
1065  const objects::CSeq_align_set& source,
1066  objects::CScope& scope,
1067  ILinkoutDB* linkoutdb,
1068  const string& mv_build_name);
1071  objects::CScope& scope,
1072  objects::CSeq_align_set& aln_set,
1073  bool nuc_to_nuc_translation,
1074  int db_order,
1075  int hit_order,
1076  int hsp_order,
1077  ILinkoutDB* linkoutdb,
1078  const string& mv_build_name);
1079 
1081  SortSeqalignForSortableFormat(objects::CSeq_align_set& aln_set,
1082  bool nuc_to_nuc_translation,
1083  int hit_order,
1084  int hsp_order);
1085 
1086  static list< CRef<objects::CSeq_align_set> >
1087  SortOneSeqalignForSortableFormat(const objects::CSeq_align_set& source,
1088  bool nuc_to_nuc_translation,
1089  int hit_sort,
1090  int hsp_sort);
1091 
1092  /// function for calculating percent match for an alignment.
1093  ///@param numerator
1094  /// int numerator in percent identity calculation.
1095  ///@param denominator
1096  /// int denominator in percent identity calculation.
1097  static int GetPercentMatch(int numerator, int denominator);
1098  static double GetPercentIdentity(int numerator, int denominator);
1099 
1100 
1101  ///function for Filtering seqalign by expect value
1102  ///@param source_aln
1103  /// CSeq_align_set original seqalign
1104  ///@param evalueLow
1105  /// double min expect value
1106  ///@param evalueHigh
1107  /// double max expect value
1108  ///@return
1109  /// CRef<CSeq_align_set> - filtered seq align
1110  static CRef<objects::CSeq_align_set> FilterSeqalignByEval(objects::CSeq_align_set& source_aln,
1111  double evalueLow,
1112  double evalueHigh);
1113 
1114  ///function for Filtering seqalign by percent identity
1115  ///@param source_aln
1116  /// CSeq_align_set original seqalign
1117  ///@param percentIdentLow
1118  /// double min percent identity
1119  ///@param percentIdentHigh
1120  /// double max percent identity
1121  ///@return
1122  /// CRef<CSeq_align_set> - filtered seq align
1123  static CRef<objects::CSeq_align_set> FilterSeqalignByPercentIdent(objects::CSeq_align_set& source_aln,
1124  double percentIdentLow,
1125  double percentIdentHigh);
1126 
1127  ///function for Filtering seqalign by expect value and percent identity
1128  ///@param source_aln
1129  /// CSeq_align_set original seqalign
1130  ///@param evalueLow
1131  /// double min expect value
1132  ///@param evalueHigh
1133  /// double max expect value
1134  ///@param percentIdentLow
1135  /// double min percent identity
1136  ///@param percentIdentHigh
1137  /// double max percent identity
1138  ///@return
1139  /// CRef<CSeq_align_set> - filtered seq align
1140  static CRef<objects::CSeq_align_set> FilterSeqalignByScoreParams(objects::CSeq_align_set& source_aln,
1141  double evalueLow,
1142  double evalueHigh,
1143  double percentIdentLow,
1144  double percentIdentHigh);
1145 
1146 
1147  static CRef<objects::CSeq_align_set> FilterSeqalignByScoreParams(objects::CSeq_align_set& source_aln,
1148  double evalueLow,
1149  double evalueHigh,
1150  double percentIdentLow,
1151  double percentIdentHigh,
1152  int queryCoverageLow,
1153  int queryCoverageHigh);
1154  ///function for Filtering seqalign by specific subjects
1155  ///@param source_aln
1156  /// CSeq_align_set original seqalign
1157  ///@param seqList
1158  /// vector of strings with seqIDs
1159  ///@return
1160  /// CRef<CSeq_align_set> - filtered seq align
1161  static CRef<objects::CSeq_align_set> FilterSeqalignBySeqList(objects::CSeq_align_set& source_aln,
1162  vector <string> &seqList);
1163 
1164  ///function to remove sequences of accesionType from use_this_seq list
1165  ///@param use_this_seq
1166  /// list <string> of seqIDs
1167  ///@param accesionType
1168  /// CSeq_id::EAccessionInfo accession type to check
1169  ///@return
1170  /// bool true if list changed
1171  static bool RemoveSeqsOfAccessionTypeFromSeqInUse(list<string> &use_this_seq, objects::CSeq_id::EAccessionInfo accesionType);
1172 
1173  ///function for Limitting seqalign by hsps number
1174  ///(by default results are not cut off within the query)
1175  ///@param source_aln
1176  /// CSeq_align_set original seqalign
1177  ///@param maxAligns
1178  /// double max number of alignments (per query)
1179  ///@param maxHsps
1180  /// double max number of Hsps (for all qeuries)
1181  ///@return
1182  /// CRef<CSeq_align_set> - filtered seq align
1183  static CRef<objects::CSeq_align_set> LimitSeqalignByHsps(objects::CSeq_align_set& source_aln,
1184  int maxAligns,
1185  int maxHsps);
1186 
1187  ///function for extracting seqalign for the query
1188  ///@param source_aln
1189  /// CSeq_align_set original seqalign
1190  ///@param queryNumber
1191  /// int query number ,starts from 1, 0 means return all queries
1192  ///@return
1193  /// CRef<CSeq_align_set> - seq align set for queryNumber, if invalid queryNumber return empty CSeq_align_set
1194  static CRef<objects::CSeq_align_set> ExtractQuerySeqAlign(CRef<objects::CSeq_align_set>& source_aln,
1195  int queryNumber);
1196 
1197  static void BuildFormatQueryString (CCgiContext& ctx,
1198  string& cgi_query);
1199 
1200  static void BuildFormatQueryString (CCgiContext& ctx,
1201  map< string, string>& parameters_to_change,
1202  string& cgi_query);
1203 
1204  static bool IsMixedDatabase(const objects::CSeq_align_set& alnset,
1205  objects::CScope& scope, ILinkoutDB* linkoutdb,
1206  const string& mv_build_name);
1207  static bool IsMixedDatabase(CCgiContext& ctx);
1208 
1209 
1210  ///Get the list of urls for linkouts
1211  ///@param linkout: the membership value
1212  ///@param ids: CBioseq::TId object
1213  ///@param rid: RID
1214  ///@param cdd_rid: CDD RID
1215  ///@param entrez_term: entrez query term
1216  ///@param is_na: is this sequence nucleotide or not
1217  ///@param first_gi: first gi in the list (used to contsruct structure url)
1218  ///@param structure_linkout_as_group: bool used to contsruct structure url
1219  ///@param for_alignment: bool indicating if link is located in alignment section
1220  ///@param int cur_align: int current alignment/description number
1221  ///@param bool textLink: bool indicating that if true link will be presented as text, otherwise as image
1222  ///@return list of string containing all linkout urls for one seq
1223  static list<string> GetLinkoutUrl(int linkout,
1224  const objects::CBioseq::TId& ids,
1225  const string& rid,
1226  const string& cdd_rid,
1227  const string& entrez_term,
1228  bool is_na,
1229  TGi first_gi,
1230  bool structure_linkout_as_group,
1231  bool for_alignment,
1232  int cur_align,
1233  string preComputedResID);
1234 
1235 
1236  ///Create map that holds all linkouts for the list of blast deflines and corresponding seqIDs
1237  ///@param bdl: list of CRef<CBlast_def_line>
1238  ///@param linkout_map: map that holds linkouts and corresponding CBioseq::TId for the whole list of blast deflines
1239  ///
1240  static void GetBdlLinkoutInfo(const list< CRef< objects::CBlast_def_line > > &bdl,
1241  map<int, vector < objects::CBioseq::TId > > &linkout_map,
1242  ILinkoutDB* linkoutdb,
1243  const string& mv_build_name);
1244 
1245  ///Create map that holds all linkouts for one seqID
1246  ///@param cur_id: CBioseq::TId
1247  ///@param linkout_map: map that holds linkouts and corresponding CBioseq::TId for the whole list of blast deflines
1248  ///
1249  static void GetBdlLinkoutInfo(objects::CBioseq::TId& cur_id,
1250  map<int, vector <objects::CBioseq::TId > > &linkout_map,
1251  ILinkoutDB* linkoutdb,
1252  const string& mv_build_name);
1253 
1254  ///Get linkout membership for for the list of blast deflines
1255  ///@param bdl: list of CRef<CBlast_def_line>
1256  ///@param rid: blast rid
1257  ///@param cdd_rid: blast cdd_rid
1258  ///@param entrez_term: entrez_term for building url
1259  ///@param is_na: bool indication if query is nucleotide
1260  ///@param first_gi: first gi in the list (used to contsruct structure url)
1261  ///@param structure_linkout_as_group: bool used to contsruct structure url
1262  ///@param for_alignment: bool indicating tif link is locted in alignment section
1263  ///@param int cur_align: int current alignment/description number
1264  ///@param linkoutOrder: string of letters separated by comma specifing linkout order like "G,U,M,E,S,B"
1265  ///@param taxid: int taxid
1266  ///@param database: database name
1267  ///@param query_number: query_number
1268  ///@param user_url: url defined as TOOL_URL for blast_type in .ncbirc
1269  ///@return list of string containing all linkout urls for all of the seqs in the list of blast deflines
1270  ///
1271  static list<string> GetFullLinkoutUrl(const list< CRef< objects::CBlast_def_line > > &bdl,
1272  const string& rid,
1273  const string& cdd_rid,
1274  const string& entrez_term,
1275  bool is_na,
1276  bool structure_linkout_as_group,
1277  bool for_alignment,
1278  int cur_align,
1279  string& linkoutOrder,
1280  TTaxId taxid,
1281  string &database,
1282  int query_number,
1283  string &user_url,
1284  string &preComputedResID,
1285  ILinkoutDB* linkoutdb,
1286  const string& mv_build_name);
1287 
1288  ///Get linkout membership for one seqID
1289  ///@param cur_id: CBioseq::TId seqID
1290  ///@param rid: blast rid
1291  ///@param cdd_rid: blast cdd_rid
1292  ///@param entrez_term: entrez_term for building url
1293  ///@param is_na: bool indication if query is nucleotide
1294  ///@param first_gi: first gi in the list (used to contsruct structure url)
1295  ///@param structure_linkout_as_group: bool used to contsruct structure url
1296  ///@param for_alignment: bool indicating tif link is locted in alignment section
1297  ///@param int cur_align: int current alignment/description number
1298  ///@param linkoutOrder: string of letters separated by comma specifing linkout order like "G,U,M,E,S,B"
1299  ///@param taxid: int taxid
1300  ///@param database: database name
1301  ///@param query_number: query_number
1302  ///@param user_url: url defined as TOOL_URL for blast_type in .ncbirc
1303  ///@return list of string containing all linkout urls for all of the seqs in the list of blast deflines
1304  ///
1305  static list<string> GetFullLinkoutUrl(objects::CBioseq::TId& cur_id,
1306  const string& rid,
1307  const string& cdd_rid,
1308  const string& entrez_term,
1309  bool is_na,
1310  bool structure_linkout_as_group,
1311  bool for_alignment,
1312  int cur_align,
1313  string& linkoutOrder,
1314  TTaxId taxid,
1315  string &database,
1316  int query_number,
1317  string &user_url,
1318  string &preComputedResID,
1319  ILinkoutDB* linkoutdb,
1320  const string& mv_build_name,
1321  bool getIdentProteins);
1322  static list<string> GetFullLinkoutUrl(const list< CRef< objects::CBlast_def_line > > &bdl, SLinkoutInfo &linkoutInfo);
1323  static list<string> GetFullLinkoutUrl(objects::CBioseq::TId& cur_id,SLinkoutInfo &linkoutInfo,bool getIdentProteins);
1324  static int GetSeqLinkoutInfo(objects::CBioseq::TId& cur_id,
1325  ILinkoutDB** linkoutdb,
1326  const string& mv_build_name,
1327  TGi gi = INVALID_GI);
1328  static int GetMasterCoverage(const objects::CSeq_align_set& alnset);
1329  static CRange<TSeqPos> GetSeqAlignCoverageParams(const objects::CSeq_align_set& alnset,int *masterCoverage,bool *flip);
1330 
1331 
1332  ///retrieve URL from .ncbirc file combining host/port and format strings values.
1333  ///consult blastfmtutil.hpp
1334  ///@param url_name: url name to retrieve
1335  ///@param index: name index ( default: -1: )
1336  ///@return: URL format string from .ncbirc file or default as kNAME
1337  ///
1338  static string GetURLFromRegistry( const string url_name, int index = -1);
1339 
1340  ////get default value if there is problem with .ncbirc file or
1341  ////settings are not complete. return corresponding static value
1342  ///@param url_name: constant name to return .
1343  ///@param index: name index ( default: -1: )
1344  ///@return: URL format string defined in blastfmtutil.hpp
1345  static string GetURLDefault( const string url_name, int index = -1);
1346 
1347  ///Replace template tags by real data
1348  ///@param inpString: string containing template data
1349  ///@param tmplParamName:string with template tag name
1350  ///@param templParamVal: int value that replaces template
1351  ///@return:string containing template data replaced by real data
1352  ///
1353  ///<@tmplParamName@> is replaced by templParamVal
1354  static string MapTemplate(string inpString,string tmplParamName,Int8 templParamVal);
1355 
1356  ///Replace template tags by real data
1357  ///@param inpString: string containing template data
1358  ///@param tmplParamName:string with template tag name
1359  ///@param templParamVal: string value that replaces template
1360  ///@return:string containing template data replaced by real data
1361  ///
1362  ///<@tmplParamName@> is replaced by templParamVal
1363  static string MapTemplate(string inpString,string tmplParamName,string templParamVal);
1364 
1365  ///Replace template tags by real data and calculate and add spaces dependent on maxParamLength and spacesFormatFlag
1366  ///@param inpString: string containing template data
1367  ///@param tmplParamName:string with template tag name
1368  ///@param templParamVal: string value that replaces template
1369  ///@param maxParamLength: unsigned int maxParamLength
1370  ///@param spacesFormatFlag: int formatting flag
1371  ///@return:string containing template data replaced by real data
1372  ///
1373  ///<@tmplParamName@> is replaced by templParamVal
1374  static string MapSpaceTemplate(string inpString,string tmplParamName,string templParamVal, unsigned int maxParamLength, int spacesFormatFlag = eSpacePosAtLineEnd);
1375 
1376  ///Calculate the number of spaces and add them to paramVal
1377  ///@param string: input parameter value
1378  ///@param size_t: max length for the string that holds parameter
1379  ///@param int: additional fomatting after adding spaces
1380  ///@param string: the position of spaces and additional formatting
1381  ///@return:string containing paramVal and spaces place appropriately
1382  static string AddSpaces(string paramVal, size_t maxParamLength, int spacesFormatFlag = eSpacePosToCenter);
1383 
1384 
1385  static string GetProtocol(void);
1386 
1387  static void InitConfig();
1388 
1389  static string MapProtocol(string url_link);
1390 
1391  ///Create URL for seqid
1392  ///@param seqUrlInfo: struct SSeqURLInfo containing data for URL construction
1393  ///@param id: seqid CSeq_id
1394  ///@param scopeRef:scope to fetch sequence
1395  static string GetIDUrl(SSeqURLInfo *seqUrlInfo,
1396  const objects::CSeq_id& id,
1397  objects::CScope &scope);
1398 
1399 
1400  ///Create URL for seqid
1401  ///@param seqUrlInfo: struct SSeqURLInfo containing data for URL construction
1402  ///@param ids: CBioseq::TId object
1403  static string GetIDUrl(SSeqURLInfo *seqUrlInfo,
1404  const objects::CBioseq::TId* ids);
1405 
1406  static string GetFullIDLink(SSeqURLInfo *seqUrlInfo,
1407  const objects::CBioseq::TId* ids);
1408 
1409  ///Create URL for seqid that goes to entrez or trace
1410  ///@param seqUrlInfo: struct SSeqURLInfo containing data for URL construction
1411  ///@param id: seqid CSeq_id
1412  ///@param scopeRef:scope to fetch sequence
1413  static string GetIDUrlGen(SSeqURLInfo *seqUrlInfo,
1414  const objects::CSeq_id& id,
1415  objects::CScope &scope);
1416 
1417 
1418  ///Create URL for seqid that goes to entrez or trace
1419  ///@param seqUrlInfo: struct SSeqURLInfo containing data for URL construction
1420  ///@param ids: CBioseq::TId object
1421  static string GetIDUrlGen(SSeqURLInfo *seqUrlInfo,const objects::CBioseq::TId* ids);
1422 
1423  ///Create info indicating what kind of links to display
1424  ///@param seqUrlInfo: struct SSeqURLInfo containing data for URL construction
1425  ///@param customLinkTypesInp: original types of links to be included in the list
1426  ///@return: int containing customLinkTypes with the bits set to indicate what kind of links to display for the sequence
1427  ///
1428  ///examples:(Mapviewer,Download,GenBank,FASTA,Seqviewer, Trace, SRA, SNP, GSFASTA)
1429  static int SetCustomLinksTypes(SSeqURLInfo *seqUrlInfo, int customLinkTypesInp);
1430 
1431  ///Create the list of string links for seqid that go
1432  /// - to GenBank,FASTA and Seqviewer for gi > 0
1433  /// - customized links determined by seqUrlInfo->blastType for gi = 0
1434  /// - customized links determined by customLinkTypes
1435  ///@param seqUrlInfo: struct SSeqURLInfo containing data for URL construction
1436  ///@param id: CSeq_id object
1437  ///@param scope: scope to fetch this sequence
1438  ///@param customLinkTypes: types of links to be included in the list(mapviewer,seqviewer or download etc)
1439  ///@param customLinksList: list of strings containing links
1440  static list <string> GetCustomLinksList(SSeqURLInfo *seqUrlInfo,
1441  const objects::CSeq_id& id,
1442  objects::CScope &scope,
1443  int customLinkTypes = eLinkTypeDefault);
1444 
1445  static list<string> GetGiLinksList(SSeqURLInfo *seqUrlInfo,bool hspRange = false);
1446  static string GetGraphiscLink(SSeqURLInfo *seqUrlInfo,bool hspRange = false);
1447  static list<string> GetSeqLinksList(SSeqURLInfo *seqUrlInfo,bool hspRange = false);
1448 
1449 
1450 
1451 
1452  ///Create URL showing aligned regions info
1453  ///@param seqUrlInfo: struct SSeqURLInfo containing data for URL construction
1454  ///@param id: CSeq_id object
1455  ///@param scope: scope to fetch this sequence
1456  ///@return: string containing URL
1457  ///
1458  static string GetFASTALinkURL(SSeqURLInfo *seqUrlInfo,
1459  const objects::CSeq_id& id,
1460  objects::CScope &scope);
1461 
1462  ///Create URL to FASTA info
1463  ///@param seqUrlInfo: struct SSeqURLInfo containing data for URL construction
1464  ///@param id: CSeq_id object
1465  ///@param scope: scope to fetch this sequence
1466  ///@return: string containing URL
1467  ///
1468  static string GetAlignedRegionsURL(SSeqURLInfo *seqUrlInfo,
1469  const objects::CSeq_id& id,
1470  objects::CScope &scope);
1471 
1472  ///Set the database as gi type
1473  ///@param actual_aln_list: the alignment
1474  ///@param scope: scope to fetch sequences
1475  ///
1476  static CAlignFormatUtil::DbType GetDbType(const objects::CSeq_align_set& actual_aln_list,
1477  objects::CScope & scope);
1478 
1479  static CAlignFormatUtil::SSeqAlignSetCalcParams* GetSeqAlignCalcParams(const objects::CSeq_align& aln);
1480 
1481  static CAlignFormatUtil::SSeqAlignSetCalcParams* GetSeqAlignSetCalcParams(const objects::CSeq_align_set& aln,int queryLength,bool do_translation);
1482 
1483  static CAlignFormatUtil::SSeqAlignSetCalcParams* GetSeqAlignSetCalcParamsFromASN(const objects::CSeq_align_set& alnSet);
1484  static double GetSeqAlignSetCalcPercentIdent(const objects::CSeq_align_set& aln, bool do_translation);
1485 
1486  static map < string, CRef<objects::CSeq_align_set> > HspListToHitMap(vector <string> seqIdList, const objects::CSeq_align_set& source);
1487 
1488  ///Scan the the list of blast deflines and find seqID to be use in display
1489  ///@param handle: CBioseq_Handle [in]
1490  ///@param aln_id: CSeq_id object for alignment seq [in]
1491  ///@param use_this_gi: list<int> list of gis to use [in]
1492  ///@param gi: gi to be used for display if exists or 0
1493  ///@param taxid: taxid to be used for display if exists or 0
1494  ///@return: CSeq_id object to be used for display
1495  static CRef<objects::CSeq_id> GetDisplayIds(const objects::CBioseq_Handle& handle,
1496  const objects::CSeq_id& aln_id,
1497  list<TGi>& use_this_gi,
1498  TGi& gi,
1499  TTaxId& taxid);
1500  ///Scan the the list of blast deflines and find seqID to be use in display
1501  ///@param handle: CBioseq_Handle [in]
1502  ///@param aln_id: CSeq_id object for alignment seq [in]
1503  ///@param use_this_gi: list<int> list of gis to use [in]
1504  ///@param gi: gi to be used for display if exists or 0
1505  ///@return: CSeq_id object to be used for display
1506  static CRef<objects::CSeq_id> GetDisplayIds(const objects::CBioseq_Handle& handle,
1507  const objects::CSeq_id& aln_id,
1508  list<TGi>& use_this_gi,
1509  TGi& gi);
1510 
1512  const objects::CSeq_id& aln_id,
1513  list<TGi>& use_this_gi);
1514  static bool GetTextSeqID(const list<CRef<objects::CSeq_id> > & ids,string *textSeqID = NULL);
1515  static bool GetTextSeqID(CConstRef<objects::CSeq_id> seqID,string *textSeqID = NULL);
1516 
1517 
1518  ///Scan the the list of blast deflines and find seqID to be use in display
1519  ///@param handle: CBioseq_Handle [in]
1520  ///@param aln_id: CSeq_id object for alignment seq [in]
1521  ///@param use_this_seq: list<string> list of seqids to use (gi:ssssss or seqid:sssss) [in]
1522  ///@param gi: pointer to gi to be used for display if exists
1523  ///@param taxid: pointer to taxid to be used for display if exists
1524  ///@param textSeqID: pointer to textSeqID to be used for display if exists
1525  ///@return: CSeq_id object to be used for display
1526  static CRef<objects::CSeq_id> GetDisplayIds(const objects::CBioseq_Handle& handle,
1527  const objects::CSeq_id& aln_id,
1528  list<string>& use_this_seq,
1529  TGi *gi = NULL,
1530  TTaxId *taxid = NULL,
1531  string *textSeqID = NULL);
1532 
1533 
1534 
1535  ///Check if accession is WGS
1536  ///@param accession: string accession [in]
1537  ///@param wgsProj: string wgsProj [out]
1538  ///@return: bool indicating if accession is WGS
1539  static bool IsWGSAccession(string &accession, string &wgsProj);
1540 
1541  ///Check if accession is WGS
1542 
1543 
1544  ///@param accession: string accession [in]
1545  ///@return: bool indicating if accession is WGS
1546  static bool IsWGSPattern(string &wgsAccession);
1547 
1548  ///Get Gene symobol for gi
1549  ///@param giForGeneLookup: gi
1550  ///@return: string gene symbol
1551  static string GetGeneInfo(TGi giForGeneLookup);
1552 
1553 
1554  static unique_ptr<CNcbiRegistry> m_Reg;
1555  static string m_Protocol;
1556  static bool m_geturl_debug_flag;
1557  static unique_ptr<CGeneInfoFileReader> m_GeneInfoReader;
1558 
1559  /// Calculate the uniq subject query coverage range (blastn only)
1560  static int GetUniqSeqCoverage(objects::CSeq_align_set & alnset);
1561  static TGi GetGiForSeqIdList (const list<CRef<objects::CSeq_id> >& ids);
1562 
1563  static string GetTitle(const objects::CBioseq_Handle & bh);
1564 
1565  /// Get sequence id with no database source (bare accession)
1566  static string GetBareId(const objects::CSeq_id& id);
1567 
1568 protected:
1569 
1570  ///Wrap a string to specified length. If break happens to be in
1571  /// a word, it will extend the line length until the end of the word
1572  ///@param str: input string
1573  ///@param line_len: length of each line desired
1574  ///@param out: stream to ouput
1575  ///@param html Is this HTML output? [in]
1576  static void x_WrapOutputLine(string str, size_t line_len,
1577  CNcbiOstream& out,
1578  bool html = false);
1579 };
1580 
1581 END_SCOPE(align_format)
1583 
1584 #endif /* OBJTOOLS_ALIGN_FORMAT___ALIGN_FORMAT_UTIL_HPP */
User-defined methods of the data storage class.
#define static
User-defined methods of the data storage class.
static const char kDownloadLink[]
static const char kBioAssayProtImg[]
static const char kCBlastCgi[]
static const char k_GetSeqSubmitForm_1[]
static const char kGenomeDataViewerNucUrl[]
static const char kGeneUrl[]
Gene.
static const char kLinkoutOrderStr[]
Default linkout order.
static const char kSeqViewerUrl[]
static const char kStructureImg[]
const int k_NumAsciiChar
Number of ASCII characters for populating matrix columns.
static const char kGetTreeViewCgi[]
static const string kMapviwerDispl
static const char kEntrezQueryCgi[]
static const char kGenericLinkMouseoverTmpl[]
static const string kSeqViewerParams
DEFINE_STATIC_ARRAY_MAP(TTagUrlMap, sm_TagUrlMap, s_TagUrls)
static const char kEntrezTMUrl[]
static const char kMapviwerUrl[]
mapviewer linkout
static const char kCustomLinkTemplate[]
static const string kGenomeDataViewerDispl
static const char kGeoUrl[]
Geo.
static const char kEntrezSubseqUrl[]
Sub-sequence.
static const char kEntrezViewerCgi[]
static const char kStructureAlphaFoldUrl[]
CStaticArrayMap< string, string > TTagUrlMap
static const char kDownloadUrl[]
dumpgnl
static const char kGeneInfoUrl[]
static const string kGeneDispl
static const char k_GetSeqSubmitForm_0[]
static const char kDownloadImg[]
static const TTagUrl s_TagUrls[]
static const char kBl2seqUrl[]
static const string kUnigeneDispl
static const string kGeoDispl
static const char kClassInfo[]
blast related url
@ ePMatrixSize
static const char kUnigeneUrl[]
unigene
static const char kGeoImg[]
static const char kGeneTerm[]
static const char kBioAssayNucURL[]
Bioassay for nucleotides.
static const char kWGSUrl[]
static const char kIdenticalProteinsUrl[]
static const string kIdenticalProteinsDispl
static const char kStructure_Overview[]
structure overview
static const char kGeneImg[]
static const char kMapSearchCgi[]
static const string kReprMicrobialGenomesDispl
static const char kDefaultProtocol[]
static const string kBioAssayDispl
static const char kReprMicrobialGenomesUrl[]
Repr microbial Genome linkout.
static const char kEntrezUrl[]
entrez
static const char kEntrezSubseqTMUrl[]
static const char kReprMicrobialGenomesImg[]
static const char kEntrezSitesCgi[]
static const char kGenomeDataViewerImg[]
static const string kMapviewBlastHitParams
static const string kMapviewBlastHitUrl
mapviewer linkout
static const char kUnigeneImg[]
static const char k_GetSeqSelectForm[]
static const char kStructureUrl[]
structure
static const char kGenomeDataViewerProtUrl[]
const char k_PSymbol[]
Residues.
static const char kBl2SeqWBlastCgi[]
static const char kBioAssayNucImg[]
static const char kGenomeDataViewerNuclTranscriptUrl[]
static const char kGenericLinkTemplate[]
static const char kMapviwerImg[]
static const char kTraceCgi[]
static const char kBioAssayProtURL[]
Bioassay for proteins.
static const char kTraceUrl[]
trace db
static const char k_GetTreeViewForm[]
static const string kStructureDispl
static const char kCustomLinkTitle[]
static const char kGenomeButton[]
genome button
static const char kSeqViewerUrlNonGi[]
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
string GetBareId(const CSeq_id &id)
Definition: seq_writer.cpp:256
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
This class contains misc functions for displaying BLAST results.
static void GetAlnScores(const objects::CSeq_align &aln, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< TGi > &use_this_gi, int &comp_adj_method)
Extract score info from blast alingment Second version that fetches compositional adjustment integer.
static string GetIDUrl(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL for seqid.
static CRef< objects::CSeq_id > GetDisplayIds(const objects::CBioseq_Handle &handle, const objects::CSeq_id &aln_id, list< string > &use_this_seq, TGi *gi=NULL, TTaxId *taxid=NULL, string *textSeqID=NULL)
Scan the the list of blast deflines and find seqID to be use in display.
static void GetUseThisSequence(const objects::CSeq_align &aln, list< string > &use_this_seq)
Extract use_this_seq info from blast alingment.
static bool IsMixedDatabase(const objects::CSeq_align_set &alnset, objects::CScope &scope, ILinkoutDB *linkoutdb, const string &mv_build_name)
static list< string > GetFullLinkoutUrl(const list< CRef< objects::CBlast_def_line > > &bdl, const string &rid, const string &cdd_rid, const string &entrez_term, bool is_na, bool structure_linkout_as_group, bool for_alignment, int cur_align, string &linkoutOrder, TTaxId taxid, string &database, int query_number, string &user_url, string &preComputedResID, ILinkoutDB *linkoutdb, const string &mv_build_name)
Get linkout membership for for the list of blast deflines.
static string GetIDUrlGen(SSeqURLInfo *seqUrlInfo, const objects::CBioseq::TId *ids)
Create URL for seqid that goes to entrez or trace.
static string GetIDUrl(SSeqURLInfo *seqUrlInfo, const objects::CBioseq::TId *ids)
Create URL for seqid.
static void GetAlnScores(const objects::CSeq_align &aln, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< string > &use_this_seq)
Extract score info from blast alingment.
static list< string > GetFullLinkoutUrl(objects::CBioseq::TId &cur_id, const string &rid, const string &cdd_rid, const string &entrez_term, bool is_na, bool structure_linkout_as_group, bool for_alignment, int cur_align, string &linkoutOrder, TTaxId taxid, string &database, int query_number, string &user_url, string &preComputedResID, ILinkoutDB *linkoutdb, const string &mv_build_name, bool getIdentProteins)
Get linkout membership for one seqID.
static void GetAlnScores(const objects::CSeq_align &aln, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< TGi > &use_this_gi)
Extract score info from blast alingment.
static CRef< objects::CSeq_id > GetDisplayIds(const objects::CBioseq_Handle &handle, const objects::CSeq_id &aln_id, list< TGi > &use_this_gi, TGi &gi)
Scan the the list of blast deflines and find seqID to be use in display.
static bool m_geturl_debug_flag
static list< string > GetFullLinkoutUrl(objects::CBioseq::TId &cur_id, SLinkoutInfo &linkoutInfo, bool getIdentProteins)
static string GetIDUrlGen(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL for seqid that goes to entrez or trace.
static CRef< objects::CSeq_align_set > FilterSeqalignByScoreParams(objects::CSeq_align_set &source_aln, double evalueLow, double evalueHigh, double percentIdentLow, double percentIdentHigh, int queryCoverageLow, int queryCoverageHigh)
static bool MatchSeqInSeqList(TGi cur_gi, CRef< objects::CSeq_id > &seqID, list< string > &use_this_seq, bool *isGiList=NULL)
Matches text seqID or gi with the list of seqIds or gis.
static CRef< objects::CSeq_align_set > SortSeqalignForSortableFormat(objects::CSeq_align_set &aln_set, bool nuc_to_nuc_translation, int hit_order, int hsp_order)
static CRef< objects::CSeq_id > GetDisplayIds(const objects::CBioseq_Handle &handle, const objects::CSeq_id &aln_id, list< TGi > &use_this_gi, TGi &gi, TTaxId &taxid)
Scan the the list of blast deflines and find seqID to be use in display.
static list< string > GetFullLinkoutUrl(const list< CRef< objects::CBlast_def_line > > &bdl, SLinkoutInfo &linkoutInfo)
static bool GetTextSeqID(const list< CRef< objects::CSeq_id > > &ids, string *textSeqID=NULL)
static void GetUseThisSequence(const objects::CSeq_align &aln, list< TGi > &use_this_gi)
Extract use_this_gi info from blast alingment.
static string GetSeqIdString(const objects::CBioseq &cbs, bool believe_local_id=true)
Returns a full '|'-delimited Seq-id string for a Bioseq.
static CRef< objects::CSeq_align_set > SortSeqalignForSortableFormat(CCgiContext &ctx, objects::CScope &scope, objects::CSeq_align_set &aln_set, bool nuc_to_nuc_translation, int db_order, int hit_order, int hsp_order, ILinkoutDB *linkoutdb, const string &mv_build_name)
static double GetPercentIdentity(const objects::CSeq_align &aln, objects::CScope &scope, bool do_translation)
calculate the percent identity for a seqalign
static void GetAlnScores(const objects::CSeq_align &aln, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< string > &use_this_seq, int &comp_adj_method)
Extract score info from blast alingment Second version that fetches compositional adjustment integer.
static CRef< objects::CSeq_align_set > FilterSeqalignByScoreParams(objects::CSeq_align_set &source_aln, double evalueLow, double evalueHigh, double percentIdentLow, double percentIdentHigh)
function for Filtering seqalign by expect value and percent identity
static TGi GetDisplayIds(const list< CRef< objects::CBlast_def_line > > &bdl, const objects::CSeq_id &aln_id, list< TGi > &use_this_gi)
static unique_ptr< CNcbiRegistry > m_Reg
static void GetBdlLinkoutInfo(const list< CRef< objects::CBlast_def_line > > &bdl, map< int, vector< objects::CBioseq::TId > > &linkout_map, ILinkoutDB *linkoutdb, const string &mv_build_name)
Create map that holds all linkouts for the list of blast deflines and corresponding seqIDs.
static bool GetTextSeqID(CConstRef< objects::CSeq_id > seqID, string *textSeqID=NULL)
static string m_Protocol
static unique_ptr< CGeneInfoFileReader > m_GeneInfoReader
static void GetBdlLinkoutInfo(objects::CBioseq::TId &cur_id, map< int, vector< objects::CBioseq::TId > > &linkout_map, ILinkoutDB *linkoutdb, const string &mv_build_name)
Create map that holds all linkouts for one seqID.
static bool MatchSeqInSeqList(CConstRef< objects::CSeq_id > &alnSeqID, list< string > &use_this_seq, vector< string > &seqList)
Matches string of seqIDs (gis or text seqID)
static string GetSeqIdString(const list< CRef< objects::CSeq_id > > &ids, bool believe_local_id)
Returns a full '|'-delimited Seq-id string for a a list of seq-ids.
class CStaticArrayMap<> provides access to a static array in much the same way as CStaticArraySet<>,...
Definition: static_map.hpp:175
setting up scope
Definition: ilinkoutdb.hpp:43
char value[7]
Definition: config.c:431
static Uint4 border[]
CS_CONTEXT * ctx
Definition: t0006.c:12
static void Init(void)
Definition: cursor6.c:76
#define T(s)
Definition: common.h:230
#define G(x, y, z)
Definition: md4.c:179
std::ofstream out("events_result.xml")
main entry point for tests
static bool InitConfig(const CArgs &args, Parser &config)
Definition: flat2asn.cpp:59
@ eSubjectStart
Start of alignment in subject.
@ eEvalue
Expect value.
const size_t kDfltArgNumAlignments
Default number of alignments to display in the traditional BLAST report.
Defines a class for reading Gene information from files.
#define INVALID_GI
Definition: ncbimisc.hpp:1089
#define INVALID_TAX_ID
Definition: ncbimisc.hpp:1116
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define NULL
Definition: ncbistd.hpp:225
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
string GetLabel(const CSeq_id &id)
NCBI_XOBJUTIL_EXPORT string GetTitle(const CBioseq_Handle &hnd, TGetTitleFlags flags=0)
Definition: seqtitle.cpp:106
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NCBI_ALIGN_FORMAT_EXPORT
Definition: ncbi_export.h:1081
static const char label[]
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
Declares the ILinkoutDB interface.
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n td
use only n Cassandra database for the lookups</td > n</tr > n< tr > n< td > yes</td > n< td > do not use tables BIOSEQ_INFO and BLOB_PROP in the Cassandra database
for(len=0;yy_str[len];++len)
static int input()
#define S(x, n)
static MDB_envinfo info
Definition: mdb_load.c:37
static CRef< CSeq_align > CreateDensegFromDendiag(CSeq_align const &aln)
const TYPE & Get(const CNamedParameterList *param)
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::KEY key
Magic spell ;-) needed for some weird compilers... very empiric.
const CharType(& source)[N]
Definition: pointer.h:1149
unsigned int a
Definition: ncbi_localip.c:102
const char * tag
Process information in the NCBI Registry, including working with configuration files.
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
const double E
T prob(T x_)
double lambda(size_t dimMatrix_, const Int4 *const *scoreMatrix_, const double *q_)
static bool IsWGSAccession(const string &acc, const CTextseq_id &id, TAllowSeqType allow_seq_type)
static BOOL number
Definition: pcregrep.c:193
static bool SortHspByMasterStartAscending(const SHspInfo *info1, const SHspInfo *info2)
static const char * str(char *buf, int n)
Definition: stats.c:84
Structure to hold the Gumbel parameters (for FSC).
Definition: blast_stat.h:94
SDbInfo()
Default constructor.
string filt_algorithm_name
Filtering algorithm ID used in BLAST search.
string filt_algorithm_options
Filtering algorithm options used in BLAST search.
void Init(string rid_in, string cdd_rid_in, string entrez_term_in, bool is_na_in, string database_in, int query_number_in, string user_url_in, string preComputedResID_in, string linkoutOrder_in, bool structure_linkout_as_group_in=false, bool for_alignment_in=true)
void Init(string rid_in, string cdd_rid_in, string entrez_term_in, bool is_na_in, string database_in, int query_number_in, string user_url_in, string preComputedResID_in, string linkoutOrder_in, ILinkoutDB *linkoutdb_in, string mv_build_name_in, bool structure_linkout_as_group_in=false, bool for_alignment_in=true)
Structure that holds information for all hits of one subject in Seq Align Set.
Structure that holds information needed for creation seqID URL in descriptions and alignments.
bool flip
flip sequence in case of opposite strands
string user_url
user url TOOL_URL from .ncbirc
SSeqURLInfo(string usurl, string bt, bool isnuc, string db, string rid_in, int qn, TGi gi_in, string acc, int lnk, int blrk, bool alnLink, bool nw, CRange< TSeqPos > range=CRange< TSeqPos >(0, 0), bool flp=false, TTaxId txid=INVALID_TAX_ID, bool addCssInf=false, string seqSegs="", string resUrl="", bool useTmpl=false, bool advView=false)
Constructor.
string blastType
blast type refer to blobj->adm->trace->created_by
CRange< TSeqPos > seqRange
sequence range
string database
name of the database
string defline
sequence defline
bool new_win
bool indicating if click of the url will open a new window
bool addCssInfo
bool indicating that css info should be added
bool useTemplates
bool indicating that templates should be used when contsructing links
string segs
string containing align segments in the the following format seg1Start-seg1End,seg2Start-seg2End
int blast_rank
index of the current alignment
bool advancedView
bool indicating that advanced view design option should be used when contsructing links
string seqUrl
sequence URL created
bool isAlignLink
bool indicating if link is in alignment section
bool isDbNa
bool indicating if the database is nucleotide or not
string resourcesUrl
URL(s) to other resources from .ncbirc.
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
static string subject
static string query
Definition: type.c:6
#define const
Definition: zconf.h:230
Modified on Sat Mar 02 10:54:03 2024 by modify_doxy.py rev. 669887