NCBI C++ ToolKit
showalign.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: showalign.cpp 100791 2023-09-13 12:42:42Z zaretska $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and thesubset U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jian Ye
27  *
28  * File Description:
29  * Sequence alignment display
30  *
31  */
32 #include <ncbi_pch.hpp>
33 
36 
37 #include <corelib/ncbiexpt.hpp>
38 #include <corelib/ncbiutil.hpp>
39 #include <corelib/ncbistre.hpp>
40 #include <corelib/ncbireg.hpp>
41 
42 #include <util/range.hpp>
43 #include <util/md5.hpp>
44 #include <objtools/blast/seqdb_reader/seqdb.hpp> // for CSeqDB::ExtractBlastDefline
45 
46 #include <objmgr/scope.hpp>
47 #include <objmgr/feat_ci.hpp>
49 
50 #include <objmgr/util/sequence.hpp>
51 #include <objmgr/util/feature.hpp>
52 
57 #include <objects/seq/Seqdesc.hpp>
58 #include <objects/seq/Bioseq.hpp>
59 
61 
64 
69 
72 
73 #include <stdio.h>
76 #include <html/htmlhelper.hpp>
77 #include <cgi/cgictx.hpp>
78 
81 USING_SCOPE(sequence);
82 BEGIN_SCOPE(align_format)
83 
86 static const string k_FrameConversion[k_NumFrame] = {"+1", "+2", "+3", "-1",
87  "-2", "-3"};
88 static const int k_GetSubseqThreshhold = 10000;
89 
90 ///threshhold to color mismatch. 98 means 98%
91 static const int k_ColorMismatchIdentity = 0;
92 static const int k_GetDynamicFeatureSeqLength = 200000;
93 static const string k_DumpGnlUrl = "/blast/dumpgnl.cgi";
94 static const int k_FeatureIdLen = 16;
95 const string color[]={"#000000", "#808080", "#FF0000"};
96 const string k_ColorRed = "#FF0000";
97 const string k_ColorPink = "#F805F5";
98 
99 static const char k_IntronChar = '~';
100 static const int k_IdStartMargin = 2;
101 static const int k_SeqStopMargin = 2;
102 static const int k_StartSequenceMargin = 2;
103 static const int k_AlignStatsMargin = 2;
104 static const int k_SequencePropertyLabelMargin = 2;
105 
106 const string k_DefaultAnchorTempl = "<a name=<@id_lbl@>></a>";
107 const string k_DefaultAnchorWithPosTempl = "<a name=#_<@resultPositionIndex@>_<@id_lbl@>></a>";
108 static const string k_DefaultSpaceMaintainerTempl = "<span class=\"smn\"><@chkbox@></span>";
109 static const string k_DefaultCheckboxTempl = "<input type=\"checkbox\" name=\"getSeqGi\" value=\"<@id_lbl@>\" onClick=\"synchronizeCheck(this.value, 'getSeqAlignment<@queryNumber@>', 'getSeqGi', this.checked)\">";
110 static const string k_DefaultCheckboxExTempl = "<input type=\"checkbox\" name=\"getSeqGi\" value=\"<@id_lbl@>\" checked=\"checked\" onClick=\"synchAl(this);\">";
111 
112 //highlight the seqid for pairwise-with-identity format
113 const string k_DefaultPairwiseWithIdntTempl = "<font color=\"#FF0000\"><b><@alndata@></b></font>";//k_ColorRed
114 const string k_DefaultFeaturesTempl = "<font color=\"#F805F5\"><b><@alndata@></b></font>";//k_ColorPink
115 const string k_DefaultMaskSeqLocTempl = "<font color=\"<@color@>\"><@alndata@></font>";
116 
117 
118 #ifdef USE_ORG_IMPL
119 static string k_GetSeqSubmitForm[] = {"<FORM method=\"post\" \
120 action=\"//www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?SUBMIT=y\" \
121 name=\"%s%d\"><input type=button value=\"Get selected sequences\" \
122 onClick=\"finalSubmit(%d, 'getSeqAlignment%d', 'getSeqGi', '%s%d', %d)\"><input \
123 type=\"hidden\" name=\"db\" value=\"\"><input type=\"hidden\" name=\"term\" \
124 value=\"\"><input type=\"hidden\" name=\"doptcmdl\" value=\"docsum\"><input \
125 type=\"hidden\" name=\"cmd\" value=\"search\"></form>",
126 
127  "<FORM method=\"POST\" \
128 action=\"//www.ncbi.nlm.nih.gov/Traces/trace.cgi\" \
129 name=\"%s%d\"><input type=button value=\"Get selected sequences\" \
130 onClick=\"finalSubmit(%d, 'getSeqAlignment%d', 'getSeqGi', '%s%d', %d)\"><input \
131 type=\"hidden\" name=\"val\" value=\"\"><input \
132 type=\"hidden\" name=\"cmd\" value=\"retrieve\"></form>"
133 };
134 
135 static string k_GetSeqSelectForm = "<FORM><input \
136 type=\"button\" value=\"Select all\" onClick=\"handleCheckAll('select', \
137 'getSeqAlignment%d', 'getSeqGi')\"></form></td><td><FORM><input \
138 type=\"button\" value=\"Deselect all\" onClick=\"handleCheckAll('deselect', \
139 'getSeqAlignment%d', 'getSeqGi')\"></form>";
140 
141 
142 static string k_GetTreeViewForm = "<FORM method=\"post\" \
143 action=\"//www.ncbi.nlm.nih.gov/blast/treeview/blast_tree_view.cgi?request=page&rid=%s&queryID=%s&distmode=on\" \
144 name=\"tree%s%d\" target=\"trv%s\"> \
145 <input type=button value=\"Distance tree of results\" onClick=\"extractCheckedSeq('getSeqAlignment%d', 'getSeqGi', 'tree%s%d')\"> \
146 <input type=\"hidden\" name=\"sequenceSet\" value=\"\"><input type=\"hidden\" name=\"screenWidth\" value=\"\"></form>";
147 #endif
148 
149 
150 static const int k_MaxDeflinesToShow = 8;
151 static const int k_MinDeflinesToShow = 3;
152 
153 
155  CScope& scope,
156  list <CRef<CSeqLocInfo> >* mask_seqloc,
157  list <FeatureInfo*>* external_feature,
158  const char* matrix_name /* = BLAST_DEFAULT_MATRIX */)
159  : m_SeqalignSetRef(&seqalign),
160  m_Seqloc(mask_seqloc),
161  m_QueryFeature(external_feature),
162  m_Scope(scope),
163  m_LinkoutDB(NULL),
164  m_UseLongSeqIds(false)
165 {
166  m_AlignOption = 0;
167  m_SeqLocChar = eX;
168  m_SeqLocColor = eBlack;
169  m_LineLen = align_format::kDfltLineLength;
170  m_IsDbNa = true;
171  m_CanRetrieveSeq = false;
172  m_DbName = NcbiEmptyString;
173  m_NumAlignToShow = 1000000;
174  m_AlignType = eNotSet;
175  m_Rid = "0";
176  m_CddRid = "0";
177  m_EntrezTerm = NcbiEmptyString;
178  m_QueryNumber = 0;
179  m_BlastType = NcbiEmptyString;
180  m_MidLineStyle = eBar;
181  m_ConfigFile = NULL;
182  m_Reg = NULL;
183  m_DynamicFeature = NULL;
184  m_MasterGeneticCode = 1;
185  m_SlaveGeneticCode = 1;
186  m_AlignTemplates = NULL;
187  m_Ctx = NULL;
188  m_Matrix = NULL; //-RMH-
189  m_DomainInfo = NULL;
190  m_SeqPropertyLabel.reset(new vector<string>);
191  m_TranslatedFrameForLocalSeq = eFirst;
192  m_ResultPositionIndex = -1;
193  m_currAlignSeqListIndex = 1;
194  m_QueryAnchoredSetIndex = -1;
197  ? matrix_name
199 
200  // Use default score matrix if one with the provided name was not found.
201  // This may happen for a user's score matrix that was read from a file
202  // (using BLASTMAT environment variable).
203  if (mtx.GetData().empty()) {
205  }
206 
207  // -RMH- --- Need to see if we can retrieve our matrix this way.
208  // for now don't initialize if empty
209  //_ASSERT(!mtx.GetData().empty());
210  if ( !mtx.GetData().empty() )
211  {
212  m_Matrix = new int*[mtx.GetRows()];
213  for(size_t i = 0; i<mtx.GetRows(); ++i) {
214  m_Matrix[i] = new int[mtx.GetCols()];
215  }
216  // copy data from matrix
217  for(size_t i = 0; i<mtx.GetRows(); ++i) {
218  for (size_t j = 0; j < mtx.GetCols(); j++) {
219  m_Matrix[i][j] = mtx(i, j);
220  }
221  }
222  }
223 }
224 
225 
227 {
228  // -RMH- See above
229  if ( m_Matrix )
230  {
231  for(int i = 0; i<k_NumAsciiChar; ++i) {
232  delete [] m_Matrix[i];
233  }
234  delete [] m_Matrix;
235  if (m_ConfigFile) {
236  delete m_ConfigFile;
237  }
238  if (m_Reg) {
239  delete m_Reg;
240  }
241 
242  if(m_DynamicFeature){
243  delete m_DynamicFeature;
244  }
245  }
246 }
247 
248 //8.Display Identities,positives,frames etc
249 string CDisplaySeqalign::x_FormatIdentityInfo(string alignInfo, SAlnInfo* aln_vec_info)
250 {
251  int aln_stop = (int)m_AV->GetAlnStop();
252  int master_strand = m_AV->StrandSign(0);
253  int slave_strand = m_AV->StrandSign(1);
254  int master_frame = aln_vec_info->alnRowInfo->frame[0];
255  int slave_frame = aln_vec_info->alnRowInfo->frame[1];
256  bool aln_is_prot = (m_AlignType & eProt) != 0 ? true : false;
257 
258 
259  string alignParams = alignInfo;//Some already filled in x_DisplayAlignInfo
260 
261 
262  alignParams = CAlignFormatUtil::MapTemplate(alignParams, "aln_match",NStr::IntToString(aln_vec_info->match) + "/"+ NStr::IntToString(aln_stop+1));
263  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_ident",aln_vec_info->identity);
264 
265  if(aln_is_prot){
266  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_pos",NStr::IntToString(aln_vec_info->positive + aln_vec_info->match) + "/" + NStr::IntToString(aln_stop+1));
267  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_pos_prc",NStr::IntToString(((aln_vec_info->positive + aln_vec_info->match)*100)/(aln_stop+1)));
268  }
269  else {
270  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_strand",(master_strand==1 ? "Plus" : "Minus")+ (string)"/"+ (slave_strand==1? "Plus" : "Minus"));
271  }
272 
273  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_gaps",NStr::IntToString(aln_vec_info->gap) + "/" + NStr::IntToString(aln_stop+1));
274  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_gaps_prc",NStr::IntToString((aln_vec_info->gap*100)/(aln_stop+1)));
275 
276  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_params_frame",(master_frame != 0 || slave_frame != 0) ? m_AlignTemplates->alignInfoFrameTmpl: "");
277  if(master_frame != 0 && slave_frame != 0) {
278  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame",((master_frame > 0) ? "+" : "") + NStr::IntToString(master_frame)
279  + (string)"/"+((slave_frame > 0) ? "+" : "") + NStr::IntToString(slave_frame));
280  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","shown");
281  } else if (master_frame != 0){
282  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame",((master_frame > 0) ? "+" : "") + NStr::IntToString(master_frame));
283  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","shown");
284  } else if (slave_frame != 0){
285  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame",((slave_frame > 0) ? "+" : "") + NStr::IntToString(slave_frame)) ;
286  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","shown");
287  }
288  else {
289  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame","");
290  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","");
291  }
292  return alignParams;
293 }
294 
295 
296 ///show blast identity, positive etc.
297 ///@param out: output stream
298 ///@param aln_stop: stop in aln coords
299 ///@param identity: identity
300 ///@param positive: positives
301 ///@param match: match
302 ///@param gap: gap
303 ///@param master_strand: plus strand = 1 and minus strand = -1
304 ///@param slave_strand: plus strand = 1 and minus strand = -1
305 ///@param master_frame: frame for master
306 ///@param slave_frame: frame for slave
307 ///@param aln_is_prot: is protein alignment?
308 ///
309 static void s_DisplayIdentityInfo(CNcbiOstream& out, int aln_stop,
310  int identity, int positive, int match,
311  int gap, int master_strand,
312  int slave_strand, int master_frame,
313  int slave_frame, bool aln_is_prot)
314 {
315  out<<" Identities = "<<match<<"/"<<(aln_stop+1)<<" ("<<identity<<"%"<<")";
316  if(aln_is_prot){
317  out<<", Positives = "<<(positive + match)<<"/"<<(aln_stop+1)
318  <<" ("<<CAlignFormatUtil::GetPercentMatch(positive + match, aln_stop+1)<<"%"<<")";
319  }
320  out<<", Gaps = "<<gap<<"/"<<(aln_stop+1)
321  <<" ("<<CAlignFormatUtil::GetPercentMatch(gap, aln_stop+1)<<"%"<<")"<<"\n";
322  if (!aln_is_prot){
323  out<<" Strand="<<(master_strand==1 ? "Plus" : "Minus")
324  <<"/"<<(slave_strand==1? "Plus" : "Minus")<<"\n";
325  }
326  if(master_frame != 0 && slave_frame != 0) {
327  out <<" Frame = " << ((master_frame > 0) ? "+" : "")
328  << master_frame <<"/"<<((slave_frame > 0) ? "+" : "")
329  << slave_frame<<"\n";
330  } else if (master_frame != 0){
331  out <<" Frame = " << ((master_frame > 0) ? "+" : "")
332  << master_frame << "\n";
333  } else if (slave_frame != 0){
334  out <<" Frame = " << ((slave_frame > 0) ? "+" : "")
335  << slave_frame <<"\n";
336  }
337  out<<"\n";
338 
339 }
340 
341 ///wrap line
342 ///@param out: output stream
343 ///@param str: string to wrap
344 ///
345 static void s_WrapOutputLine(CNcbiOstream& out, const string& str)
346 {
347  const int line_len = 60;
348  bool do_wrap = false;
349  int length = (int) str.size();
350  if (length > line_len) {
351  for (int i = 0; i < length; i ++){
352  if(i > 0 && i % line_len == 0){
353  do_wrap = true;
354  }
355  out << str[i];
356  if(do_wrap && isspace((unsigned char) str[i])){
357  out << "\n";
358  do_wrap = false;
359  }
360  }
361  } else {
362  out << str;
363  }
364 }
365 
366 ///To add style to bases for some conditions
367 ///@param seq: sequence
368 ///@param currIndex: current seq index
369 ///@startStyledOutput: condition for starting output into the string to be styled
370 ///@stopStyledOutput: condition for stopping output into the string to be styled
371 ///@tmpl: template used for output of styled string
372 ///@styledSeqStr: the string to be styled by appling template
373 ///@param out: output stream
374 ///
375 //This function appends seq[currIndex] to styledSeqStr if startStyledOutput==true or !styledSeqStr.empty() && !stopStyledOutput
376 //If stopStyledOutput==true or it is the end of the seq and styledSeqStr has data,
377 //Template like "<font color="#00000"><@alndata@></font>" or <span class="red"><@alndata@></span> is applied to styledSeqStr
378 // and output to CNcbiOstream
379 static bool s_ProcessStyledContent(string& seq, int currIndex, bool startStyledOutput, bool stopStyledOutput, string tmpl,string &styledSeqStr,CNcbiOstream& out)
380 {
381  bool isStyled = false;
382  if(startStyledOutput || (!styledSeqStr.empty() && !stopStyledOutput)){
383  styledSeqStr += seq[currIndex];
384  isStyled = true;
385  }
386  if(!styledSeqStr.empty() && (stopStyledOutput || currIndex == (int)seq.size() - 1) ) {
387  styledSeqStr = CAlignFormatUtil::MapTemplate(tmpl,"alndata",styledSeqStr);
388  out << styledSeqStr;
389  styledSeqStr = "";
390  }
391  return isStyled;
392 }
393 
394 ///To add color to bases other than identityChar
395 ///@param seq: sequence
396 ///@param identity_char: identity character
397 ///@param out: output stream
398 ///
399 static void s_ColorDifferentBases(string& seq, char identity_char,
400  CNcbiOstream& out){
401  std::string colorSeqStr;
402  for(int i = 0; i < (int)seq.size(); i ++){
403  bool isStyled = s_ProcessStyledContent(seq,i,seq[i] != identity_char,seq[i] == identity_char,k_DefaultPairwiseWithIdntTempl,colorSeqStr,out);
404  if(!isStyled) out << seq[i];
405  }
406 }
407 
408 ///return the frame for a given strand
409 ///Note that start is zero bases. It returns frame +/-(1-3). 0 indicates error
410 ///@param start: sequence start position
411 ///@param strand: strand
412 ///@param id: the seqid
413 ///@param scope: the scope
414 ///@return: the frame
415 ///
416 static int s_GetFrame (int start, ENa_strand strand, const CSeq_id& id,
417  CScope& sp)
418 {
419  int frame = 0;
420  if (strand == eNa_strand_plus) {
421  frame = (start % 3) + 1;
422  } else if (strand == eNa_strand_minus) {
423  frame = -(((int)sp.GetBioseqHandle(id).GetBioseqLength() - start - 1)
424  % 3 + 1);
425 
426  }
427  return frame;
428 }
429 
430 ///reture the frame for master seq in stdseg
431 ///@param ss: the input stdseg
432 ///@param scope: the scope
433 ///@return: the frame
434 ///
435 static int s_GetStdsegMasterFrame(const CStd_seg& ss, CScope& scope)
436 {
437  const CRef<CSeq_loc> slc = ss.GetLoc().front();
438  ENa_strand strand = GetStrand(*slc);
439  int frame = s_GetFrame(strand == eNa_strand_plus ?
440  GetStart(*slc, &scope) : GetStop(*slc, &scope),
441  strand == eNa_strand_plus ?
443  *(ss.GetIds().front()), scope);
444  return frame;
445 }
446 
447 
448 ///return concatenated exon sequence
449 ///@param feat: the feature containing this cds
450 ///@param feat_strand: the feature strand
451 ///@param range: the range list of seqloc
452 ///@param total_coding_len: the total exon length excluding intron
453 ///@param raw_cdr_product: the raw protein sequence
454 ///@return: the concatenated exon sequences with amino acid aligned to
455 ///to the second base of a codon
456 ///
457 static string s_GetConcatenatedExon(CFeat_CI& feat,
458  ENa_strand feat_strand,
459  list<CRange<TSeqPos> >& range,
460  TSeqPos total_coding_len,
461  string& raw_cdr_product, TSeqPos frame_adj)
462 {
463 
464  string concat_exon(total_coding_len, ' ');
465  TSeqPos frame = 1;
466  const CCdregion& cdr = feat->GetData().GetCdregion();
467  if(cdr.IsSetFrame()){
468  frame = cdr.GetFrame();
469  }
470  TSeqPos num_coding_base;
471  int num_base;
472  TSeqPos coding_start_base;
473  if(feat_strand == eNa_strand_minus){
474  coding_start_base = total_coding_len - 1 - (frame -1) - frame_adj;
475  num_base = total_coding_len - 1;
476  num_coding_base = 0;
477 
478  } else {
479  coding_start_base = 0;
480  coding_start_base += frame - 1 + frame_adj;
481  num_base = 0;
482  num_coding_base = 0;
483  }
484 
485  ITERATE(list<CRange<TSeqPos> >, iter, range){
486  //note that feature on minus strand needs to be
487  //filled backward.
488  if(feat_strand != eNa_strand_minus){
489  for(TSeqPos i = 0; i < iter->GetLength(); i ++){
490  if((TSeqPos)num_base >= coding_start_base){
491  num_coding_base ++;
492  if(num_coding_base % 3 == 2){
493  //a.a to the 2nd base
494  if(num_coding_base / 3 < raw_cdr_product.size()){
495  //make sure the coding region is no
496  //more than the protein seq as there
497  //could errors in ncbi record
498  concat_exon[num_base]
499  = raw_cdr_product[num_coding_base / 3];
500  }
501  }
502  }
503  num_base ++;
504  }
505  } else {
506 
507  for(TSeqPos i = 0; i < iter->GetLength() &&
508  num_base >= 0; i ++){
509  if((TSeqPos)num_base <= coding_start_base){
510  num_coding_base ++;
511  if(num_coding_base % 3 == 2){
512  //a.a to the 2nd base
513  if(num_coding_base / 3 <
514  raw_cdr_product.size() &&
515  coding_start_base >= num_coding_base){
516  //make sure the coding region is no
517  //more than the protein seq as there
518  //could errors in ncbi record
519  concat_exon[num_base]
520  = raw_cdr_product[num_coding_base / 3];
521  }
522  }
523  }
524  num_base --;
525  }
526  }
527  }
528  return concat_exon;
529 }
530 
531 ///map slave feature info to master seq
532 ///@param master_feat_range: master feature seqloc to be filled
533 ///@param feat: the feature in concern
534 ///@param slave_feat_range: feature info for slave
535 ///@param av: the alignment vector for master-slave seqalign
536 ///@param row: the row
537 ///@param frame_adj: frame adjustment
538 ///
539 
540 static void s_MapSlaveFeatureToMaster(list<CRange<TSeqPos> >& master_feat_range,
541  ENa_strand& master_feat_strand, CFeat_CI& feat,
542  list<CSeq_loc_CI::TRange>& slave_feat_range,
543  ENa_strand slave_feat_strand,
544  CAlnVec* av,
545  int row, TSeqPos frame_adj)
546 {
547  TSeqPos trans_frame = 1;
548  const CCdregion& cdr = feat->GetData().GetCdregion();
549  if(cdr.IsSetFrame()){
550  trans_frame = cdr.GetFrame();
551  }
552  trans_frame += frame_adj;
553 
554  TSeqPos prev_exon_len = 0;
555  bool is_first_in_range = true;
556 
557  if ((av->IsPositiveStrand(1) && slave_feat_strand == eNa_strand_plus) ||
558  (av->IsNegativeStrand(1) && slave_feat_strand == eNa_strand_minus)) {
559  master_feat_strand = eNa_strand_plus;
560  } else {
561  master_feat_strand = eNa_strand_minus;
562  }
563 
564  list<CSeq_loc_CI::TRange> acutal_slave_feat_range = slave_feat_range;
565 
566  ITERATE(list<CSeq_loc_CI::TRange>, iter_temp,
567  acutal_slave_feat_range){
568  CRange<TSeqPos> actual_feat_seq_range = av->GetSeqRange(row).
569  IntersectionWith(*iter_temp);
570  if(!actual_feat_seq_range.Empty()){
571  TSeqPos slave_aln_from = 0, slave_aln_to = 0;
572  TSeqPos frame_offset = 0;
573  int curr_exon_leading_len = 0;
574  //adjust frame
575  if (is_first_in_range) {
576  if (slave_feat_strand == eNa_strand_plus) {
577  curr_exon_leading_len
578  = actual_feat_seq_range.GetFrom() - iter_temp->GetFrom();
579 
580  } else {
581  curr_exon_leading_len
582  = iter_temp->GetTo() - actual_feat_seq_range.GetTo();
583  }
584  is_first_in_range = false;
585  frame_offset = (3 - (prev_exon_len + curr_exon_leading_len)%3
586  + (trans_frame - 1)) % 3;
587  }
588 
589  if (av->IsPositiveStrand(1) &&
590  slave_feat_strand == eNa_strand_plus) {
591  slave_aln_from
592  = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetFrom() +
593  frame_offset, CAlnMap::eRight );
594 
595  slave_aln_to =
596  av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo(),
598  } else if (av->IsNegativeStrand(1) &&
599  slave_feat_strand == eNa_strand_plus) {
600 
601  slave_aln_from
602  = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo(),
604 
605  slave_aln_to =
606  av->GetAlnPosFromSeqPos(row,
607  actual_feat_seq_range.GetFrom() +
608  frame_offset, CAlnMap::eLeft);
609  } else if (av->IsPositiveStrand(1) &&
610  slave_feat_strand == eNa_strand_minus) {
611  slave_aln_from
612  = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetFrom(),
614 
615  slave_aln_to =
616  av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo() -
617  frame_offset, CAlnMap::eLeft);
618 
619  } else if (av->IsNegativeStrand(1) &&
620  slave_feat_strand == eNa_strand_minus){
621  slave_aln_from
622  = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo() -
623  frame_offset, CAlnMap::eRight );
624 
625  slave_aln_to =
626  av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetFrom(),
628  }
629 
630  TSeqPos master_from =
631  av->GetSeqPosFromAlnPos(0, slave_aln_from, CAlnMap::eRight);
632 
633  TSeqPos master_to =
634  av->GetSeqPosFromAlnPos(0, slave_aln_to, CAlnMap::eLeft);
635 
636  CRange<TSeqPos> master_range(master_from, master_to);
637  master_feat_range.push_back(master_range);
638 
639  }
640  prev_exon_len += iter_temp->GetLength();
641  }
642 }
643 
644 
645 
646 ///return cds coded sequence and fill the id if found
647 ///@param genetic_code: the genetic code
648 ///@param feat: the feature containing this cds
649 ///@param scope: scope to fetch sequence
650 ///@param range: the range list of seqloc
651 ///@param handle: the bioseq handle
652 ///@param feat_strand: the feature strand
653 ///@param feat_id: the feature id to be filled
654 ///@param frame_adj: frame adjustment
655 ///@param mix_loc: is this seqloc mixed with other seqid?
656 ///@return: the encoded protein sequence
657 ///
658 static string s_GetCdsSequence(int genetic_code, CFeat_CI& feat,
659  CScope& scope, list<CRange<TSeqPos> >& range,
660  const CBioseq_Handle& handle,
661  ENa_strand feat_strand, string& feat_id,
662  TSeqPos frame_adj, bool mix_loc)
663 {
664  string raw_cdr_product = NcbiEmptyString;
665  if(feat->IsSetProduct() && feat->GetProduct().IsWhole() && !mix_loc){
666  //show actual aa if there is a cds product
667 
668  const CSeq_id& productId =
669  feat->GetProduct().GetWhole();
670  const CBioseq_Handle& productHandle
671  = scope.GetBioseqHandle(productId );
672  feat_id = "CDS:" +
673  CDeflineGenerator().GenerateDefline(productHandle).substr(0, k_FeatureIdLen);
674  productHandle.
675  GetSeqVector(CBioseq_Handle::eCoding_Iupac).
676  GetSeqData(0, productHandle.
677  GetBioseqLength(), raw_cdr_product);
678  } else {
679  CSeq_loc isolated_loc;
680  ITERATE(list<CRange<TSeqPos> >, iter, range){
681  TSeqPos from = iter->GetFrom();
682  TSeqPos to = iter->GetTo();
683  if(feat_strand == eNa_strand_plus){
684  isolated_loc.
685  Add(*(handle.GetRangeSeq_loc(from + frame_adj,
686  to,
687  feat_strand)));
688  } else {
689  isolated_loc.
690  Add(*(handle.GetRangeSeq_loc(from,
691  to - frame_adj,
692  feat_strand)));
693  }
694  }
695  CGenetic_code gc;
697  ce->Select(CGenetic_code::C_E::e_Id);
698  ce->SetId(genetic_code);
699  gc.Set().push_back(ce);
700  isolated_loc.SetPartialStart(true, eExtreme_Biological);
701  isolated_loc.SetPartialStop (true, eExtreme_Biological);
702  CSeqTranslator::Translate(isolated_loc, handle.GetScope(),
703  raw_cdr_product, &gc);
704 
705  }
706  return raw_cdr_product;
707 }
708 
709 ///fill the cds start positions (1 based)
710 ///@param line: the input cds line
711 ///@param concat_exon: exon only string
712 ///@param length_per_line: alignment length per line
713 ///@param feat_aln_start_totalexon: feature aln pos in concat_exon
714 ///@param strand: the alignment strand
715 ///@param start: start list to be filled
716 ///
717 static void s_FillCdsStartPosition(string& line, string& concat_exon,
718  size_t length_per_line,
719  TSeqPos feat_aln_start_totalexon,
720  ENa_strand seq_strand,
721  ENa_strand feat_strand,
722  list<TSeqPos>& start)
723 {
724  size_t actual_line_len = 0;
725  size_t aln_len = line.size();
726  TSeqPos previous_num_letter = 0;
727 
728  //the number of amino acids preceeding this exon start position
729  for (size_t i = 0; i <= feat_aln_start_totalexon; i ++){
730  if(feat_strand == eNa_strand_minus){
731  //remember the amino acid in this case goes backward
732  //therefore we count backward too
733 
734  int pos = concat_exon.size() -1 - i;
735  if(pos >= 0 && isalpha((unsigned char) concat_exon[pos])){
736  previous_num_letter ++;
737  }
738 
739  } else {
740  if(isalpha((unsigned char) concat_exon[i])){
741  previous_num_letter ++;
742  }
743  }
744  }
745 
746 
747  TSeqPos prev_num = 0;
748  //go through the entire feature line and get the amino acid position
749  //for each line
750  for(size_t i = 0; i < aln_len; i += actual_line_len){
751  //handle the last row which may be shorter
752  if(aln_len - i< length_per_line) {
753  actual_line_len = aln_len - i;
754  } else {
755  actual_line_len = length_per_line;
756  }
757  //the number of amino acids on this row
758  TSeqPos cur_num = 0;
759  bool has_intron = false;
760 
761  //go through each character on a row
762  for(size_t j = i; j < actual_line_len + i; j ++){
763  //don't count gap
764  if(isalpha((unsigned char) line[j])){
765  cur_num ++;
766  } else if(line[j] == k_IntronChar){
767  has_intron = true;
768  }
769  }
770 
771  if(cur_num > 0){
772  if(seq_strand == eNa_strand_plus){
773  if(feat_strand == eNa_strand_minus) {
774  start.push_back(previous_num_letter - prev_num);
775  } else {
776  start.push_back(previous_num_letter + prev_num);
777  }
778  } else {
779  if(feat_strand == eNa_strand_minus) {
780  start.push_back(previous_num_letter + prev_num);
781  } else {
782  start.push_back(previous_num_letter - prev_num);
783  }
784  }
785  } else if (has_intron) {
786  start.push_back(0); //sentinal for no show
787  }
788  prev_num += cur_num;
789  }
790 }
791 
792 ///make a new copy of master seq with feature info and return the scope
793 ///that contains this sequence
794 ///@param feat_range: the feature seqlocs
795 ///@param feat_seq_strand: the stand info
796 ///@param handle: the seq handle for the original master seq
797 ///@return: the scope containing the new master seq
798 ///
799 static CRef<CScope> s_MakeNewMasterSeq(list<list<CRange<TSeqPos> > >& feat_range,
800  list<ENa_strand>& feat_seq_strand,
801  const CBioseq_Handle& handle)
802 {
806  CRef<CScope> scope (new CScope(*obj));
807  scope->AddDefaults();
808  CRef<CBioseq> cbsp(new CBioseq());
809  cbsp->Assign(*(handle.GetCompleteBioseq()));
810 
811  CBioseq::TAnnot& anot_list = cbsp->SetAnnot();
812  CRef<CSeq_annot> anot(new CSeq_annot);
814  data->Select(CSeq_annot::TData::e_Ftable);
815  anot->SetData(*data);
816  CSeq_annot::TData::TFtable& ftable = anot->SetData().SetFtable();
817  int counter = 0;
818  ITERATE(list<list<CRange<TSeqPos> > >, iter, feat_range) {
819  counter ++;
820  CRef<CSeq_feat> seq_feat(new CSeq_feat);
821  CRef<CSeqFeatData> feat_data(new CSeqFeatData);
823  seq_feat->SetData(*feat_data);
824  seq_feat->SetComment("Putative " + NStr::IntToString(counter));
825  CRef<CSeq_loc> seq_loc (new CSeq_loc);
826 
827  ITERATE(list<CRange<TSeqPos> >, iter2, *iter) {
828  seq_loc->Add(*(handle.GetRangeSeq_loc(iter2->GetFrom(),
829  iter2->GetTo(),
830  feat_seq_strand.front())));
831  }
832  seq_feat->SetLocation(*seq_loc);
833  ftable.push_back(seq_feat);
834  feat_seq_strand.pop_front();
835  }
836  anot_list.push_back(anot);
837  CRef<CSeq_entry> entry(new CSeq_entry());
838  entry->SetSeq(*cbsp);
839  scope->AddTopLevelSeqEntry(*entry);
840 
841  return scope;
842 }
843 
844 //output feature lines
845 //@param reference_feat_line: the master feature line to be compared
846 //for coloring
847 //@param feat_line: the slave feature line
848 //@param color_feat_mismatch: color or not
849 //@param start: the alignment pos
850 //@param len: the length per line
851 //@param out: stream for output
852 //
853 static void s_OutputFeature(string& reference_feat_line,
854  string& feat_line,
855  bool color_feat_mismatch,
856  int start,
857  int len,
858  CNcbiOstream& out,
859  bool is_html)
860 {
861  if((int)feat_line.size() > start){
862  string actual_feat = feat_line.substr(start, len);
863  string actual_reference_feat = NcbiEmptyString;
864  if(reference_feat_line != NcbiEmptyString){
865  actual_reference_feat = reference_feat_line.substr(start, len);
866  }
867  if(color_feat_mismatch
868  && actual_reference_feat != NcbiEmptyString &&
869  !NStr::IsBlank(actual_reference_feat)){
870  string styledSequenceStr;
871  for(int i = 0; i < (int)actual_feat.size() &&
872  i < (int)actual_reference_feat.size(); i ++){
873  bool styledOutput = actual_feat[i] != actual_reference_feat[i] &&
874  (actual_feat[i] != ' ' && actual_feat[i] != k_IntronChar && actual_reference_feat[i] != k_IntronChar);
875  bool stopStyledOutput = (actual_feat[i] == actual_reference_feat[i]) && actual_feat[i] != ' ';
876  bool isStyled = s_ProcessStyledContent(actual_feat,i,styledOutput,stopStyledOutput, k_DefaultFeaturesTempl,styledSequenceStr,out);
877  if(!isStyled) out << actual_feat[i];
878  }
879  } else {
880  out << (is_html?CHTMLHelper::HTMLEncode(actual_feat):actual_feat);
881  }
882  }
883 
884 }
885 
886 
888  int row,
889  string& master_feat_str,
890  CNcbiOstream& out)
891 {
892  TSAlnFeatureInfoList& feature = alnRoInfo->bioseqFeature[row];
893  CAlnMap::TSignedRange alignment_range = alnRoInfo->currRange;
894  int aln_start = alnRoInfo->currPrintSegment;
895  int line_length = alnRoInfo->currActualLineLen;
896  int start_length = alnRoInfo->maxStartLen;
897  int id_length = alnRoInfo->maxIdLen;
898  if (alnRoInfo->show_align_stats) {
899  id_length += alnRoInfo->max_align_stats_len + k_AlignStatsMargin;
900  }
901  if (alnRoInfo->show_seq_property_label){
902  id_length += alnRoInfo->max_seq_property_label + k_SequencePropertyLabelMargin;
903  }
904  NON_CONST_ITERATE(TSAlnFeatureInfoList, iter, feature) {
905  //check blank string for cases where CDS is in range
906  //but since it must align with the 2nd codon and is
907  //actually not in range
908  if (alignment_range.IntersectingWith((*iter)->aln_range) &&
909  !(NStr::IsBlank((*iter)->feature_string.
910  substr(aln_start, line_length)) &&
914  string checkboxBuf = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox","");
915  out << checkboxBuf;
916  }
917  out<<(*iter)->feature->feature_id;
918  if((*iter)->feature_start.empty()){
920  AddSpace(out, id_length + k_IdStartMargin
921  +start_length + k_StartSequenceMargin
922  -(*iter)->feature->feature_id.size());
923  } else {
924  int feat_start = (*iter)->feature_start.front();
925  if(feat_start > 0){
927  AddSpace(out, id_length + k_IdStartMargin
928  -(*iter)->feature->feature_id.size());
929  out << feat_start;
931  AddSpace(out, start_length -
932  NStr::IntToString(feat_start).size() +
934  } else { //no show start
936  AddSpace(out, id_length + k_IdStartMargin
937  +start_length + k_StartSequenceMargin
938  -(*iter)->feature->feature_id.size());
939  }
940 
941  (*iter)->feature_start.pop_front();
942  }
943  bool color_cds_mismatch = false;
944  if((m_AlignOption & eHtml) &&
945  (m_AlignOption & eShowCdsFeature) && row > 0){
946  //only for slaves, only for cds feature
947  color_cds_mismatch = true;
948  } else if((m_AlignOption & eHtml) &&
951  //mostly for igblast
952  //only for slave
953  color_cds_mismatch = true;
954  }
955  s_OutputFeature(master_feat_str,
956  (*iter)->feature_string,
957  color_cds_mismatch, aln_start,
958  line_length, out, (m_AlignOption & eHtml));
959  if(row == 0){//set master feature as reference
960  master_feat_str = (*iter)->feature_string;
961  }
962  out<<"\n";
963  }
964  }
965 
966 }
967 
968 string CDisplaySeqalign::x_HTMLSeqIDLink(SAlnRowInfo *alnRoInfo, int row,TGi giToUse)
969 {
970  const CBioseq_Handle& bsp_handle = m_AV->GetBioseqHandle(row);
971  string urlLink = NcbiEmptyString;
972  const list<CRef<CSeq_id> >& ids = bsp_handle.GetBioseqCore()->GetId();
973  CAlignFormatUtil::SSeqURLInfo *seqUrlInfo = x_InitSeqUrl(giToUse,alnRoInfo->seqidArray[row],alnRoInfo->taxid[row],ids);
975  seqUrlInfo->defline = sequence::CDeflineGenerator().GenerateDefline(bsp_handle);
976  }
977  seqUrlInfo->useTemplates = true;
978  urlLink = CAlignFormatUtil::GetFullIDLink(seqUrlInfo,&ids);
979  delete seqUrlInfo;
980  return urlLink;
981 }
982 
984  TTaxId taxid,const list<CRef<CSeq_id> >& ids)
985 {
986  string idString = m_AV->GetSeqId(1).GetSeqIdString();
987  CRange<TSeqPos> range = (m_AlnLinksParams.count(idString) > 0 && m_AlnLinksParams[idString].subjRange) ?
988  CRange<TSeqPos>(m_AlnLinksParams[idString].subjRange->GetFrom() + 1,m_AlnLinksParams[idString].subjRange->GetTo() + 1) :
989  CRange<TSeqPos>(0,0);
990  bool flip = (m_AlnLinksParams.count(idString) > 0) ? m_AlnLinksParams[idString].flip : false;
991  string user_url= (!m_BlastType.empty()) ? m_Reg->Get(m_BlastType, "TOOL_URL") : "";
992  giToUse = (giToUse == ZERO_GI) ? CAlignFormatUtil::GetGiForSeqIdList(ids):giToUse;
995  giToUse,
996  accession,
997  0,// linkout not used any more in seqUrl
998  m_cur_align,
999  true,
1000  (m_AlignOption & eNewTargetWindow) ? true : false,
1001  range,
1002  flip,
1003  taxid,
1004  (m_AlignOption & eShowInfoOnMouseOverSeqid) ? true : false);
1005  seqUrlInfo->resourcesUrl = (!m_BlastType.empty()) ? m_Reg->Get(m_BlastType, "RESOURCE_URL") : "";
1006  seqUrlInfo->advancedView = seqUrlInfo->useTemplates = m_AlignTemplates != NULL;
1007  return seqUrlInfo;
1008 }
1009 
1011  const list< CRef< CBlast_def_line > > &bdl_list,
1012  int lnkDispParams)
1013 {
1014  CAlignFormatUtil::SSeqURLInfo *seqUrlInfo = alnDispParams->seqUrlInfo;
1015  seqUrlInfo->hasTextSeqID = alnDispParams->hasTextSeqID;
1016  CRef<CSeq_id> seqID = alnDispParams->seqID;
1017  if(lnkDispParams & eDisplayResourcesLinks) {
1018  seqUrlInfo->segs = (lnkDispParams & eDisplayDownloadLink) ? x_GetSegs(1) : "";
1021  *seqID,
1022  m_Scope,
1023  customLinkTypes);
1024 
1026 
1027  //URL tp FASTA representation, includes genbank, trace and SNP
1029 
1030  //URL to FASTA for all regions
1032 
1033 
1034  if(m_AlignOption&eLinkout && (seqUrlInfo->hasTextSeqID)){
1036  m_LinkoutInfo.taxid = seqUrlInfo->taxid;
1037  m_LinkoutInfo.subjRange = seqUrlInfo->seqRange;
1038  if(bdl_list.size() > 0) {
1040  }
1041  else {
1043  }
1044 
1045  }
1046  }
1047 }
1048 
1049 
1050 void
1052 {
1053  ITERATE(TSeqLocInfoVector, sequence_masks, masks) {
1054  const CSeq_id& id = sequence_masks->front()->GetSeqId();
1055  m_SubjectMasks[id] = *sequence_masks;
1056  }
1057 }
1058 
1059 //align translation to 2nd base
1060 static string s_GetFinalTranslatedString(const CSeq_loc& loc, CScope& scope,
1061  int first_encoding_base, int align_length,
1062  const string& translation, const string& sequence,
1063  char gap_char){
1064 
1065  string feat(align_length, ' ');
1066  int num_base = 0;
1067  int j = 0;
1068 
1069  for (int i = first_encoding_base; i < (int) feat.size() &&
1070  j < (int)translation.size(); i ++) {
1071  if (sequence[i] != gap_char) {
1072  num_base ++;
1073 
1074  //aa residue to 2nd nuc position
1075  if (num_base%3 == 2) {
1076  feat[i] = translation[j];
1077  j ++;
1078  }
1079  }
1080  }
1081  return feat;
1082 }
1083 
1084 void CDisplaySeqalign::x_AddTranslationForLocalSeq(vector<TSAlnFeatureInfoList>& retval,
1085  vector<string>& sequence) const {
1086  if (m_AV->IsPositiveStrand(0) && m_AV->IsPositiveStrand(1)) {
1087 
1088  //find the first aln pos that both seq has no gaps for 3 consecutive pos.
1089  int non_gap_aln_pos = 0;
1090  CAlnVec::TResidue gap_char = m_AV->GetGapChar(0);
1091  int num_consecutive = 0;
1092  for (int i =0; i < (int) sequence[0].size(); i ++) {
1093  if (sequence[0][i] != gap_char &&
1094  sequence[1][i] != gap_char) {
1095 
1096  num_consecutive ++;
1097  if (num_consecutive >=3) {
1098  non_gap_aln_pos = i - 2;
1099  break;
1100  }
1101  } else {
1102  num_consecutive = 0;
1103  }
1104  }
1105 
1106 
1107  //master
1108  int master_frame_extra = m_AV->GetSeqPosFromAlnPos(0, non_gap_aln_pos)%3;
1109  int master_frame_start;
1110  //= m_AV->GetSeqPosFromSeqPos(0, 1, subject_frame_start);
1111  master_frame_start = m_AV->GetSeqPosFromAlnPos(0, non_gap_aln_pos) +
1112  (3 - (master_frame_extra - m_TranslatedFrameForLocalSeq))%3;
1113 
1114  CRef<CSeq_loc> master_loc(new CSeq_loc((CSeq_loc::TId &) m_AV->GetSeqId(0),
1115  master_frame_start,
1116  m_AV->GetSeqStop(0)));
1117  master_loc->SetPartialStart(true, eExtreme_Biological);
1118  master_loc->SetPartialStop (true, eExtreme_Biological);
1119  string master_translation;
1120  CSeqTranslator::Translate(*master_loc,
1121  m_Scope,
1122  master_translation);
1123  int master_first_encoding_base = m_AV->GetAlnPosFromSeqPos(0, master_frame_start);
1124  string master_feat = s_GetFinalTranslatedString(*master_loc, m_Scope,
1125  master_first_encoding_base,
1126  m_AV->GetAlnStop() + 1,
1127  master_translation,
1128  sequence[0], gap_char);
1129 
1130  CRef<SAlnFeatureInfo> master_featInfo(new SAlnFeatureInfo);
1131 
1132  x_SetFeatureInfo(master_featInfo, *master_loc, 0, m_AV->GetAlnStop(),
1133  m_AV->GetAlnStop(), ' ',
1134  " ", master_feat);
1135 
1136  retval[0].push_back(master_featInfo);
1137 
1138  //subject
1139  int subject_frame_start = m_AV->GetSeqPosFromSeqPos(1, 0, master_frame_start);
1140 
1141  CRef<CSeq_loc> subject_loc(new CSeq_loc((CSeq_loc::TId &) m_AV->GetSeqId(1),
1142  (CSeq_loc::TPoint) subject_frame_start,
1143  (CSeq_loc::TPoint) m_AV->GetSeqStop(1)));
1144  subject_loc->SetPartialStart(true, eExtreme_Biological);
1145  subject_loc->SetPartialStop (true, eExtreme_Biological);
1146  string subject_translation;
1147  CSeqTranslator::Translate(*subject_loc,
1148  m_Scope,
1149  subject_translation);
1150  int subject_first_encoding_base = m_AV->GetAlnPosFromSeqPos(1, subject_frame_start);
1151  string subject_feat = s_GetFinalTranslatedString(*subject_loc, m_Scope,
1152  subject_first_encoding_base,
1153  m_AV->GetAlnStop() + 1,
1154  subject_translation,
1155  sequence[1], gap_char);
1156 
1157  CRef<SAlnFeatureInfo> subject_featInfo(new SAlnFeatureInfo);
1158 
1159  x_SetFeatureInfo(subject_featInfo, *subject_loc, 0, m_AV->GetAlnStop(),
1160  m_AV->GetAlnStop(), ' ',
1161  " ", subject_feat);
1162 
1163  retval[1].push_back(subject_featInfo);
1164 
1165  }
1166 }
1167 
1168 //this is a special function to calculate pert_identity between master and a given row
1169 //for multiple alignment. Excluding leading and trailing gaps.
1170 void s_CalculateIdentity(const string& sequence_standard,
1171  const string& sequence , char gap_char,
1172  int& match, int& align_length){
1173  match = 0;
1174  align_length = 0;
1175  int start = 0;
1176  int end = (int)sequence.size() - 1;
1177  for(int i = 0; i < (int)sequence.size(); i++){
1178  if (sequence[i] != gap_char){
1179  start = i;
1180  break;
1181  }
1182  }
1183 
1184  for(int i = (int)sequence.size() - 1; i > 0; i--){
1185  if (sequence[i] != gap_char){
1186  end = i;
1187  break;
1188  }
1189  }
1190 
1191 
1192  for(int i = start; i <= end && i < (int)sequence.size() && i < (int)sequence_standard.size(); i++){
1193  if(sequence[i] == gap_char && sequence_standard[i] == gap_char) {
1194  //skip
1195  } else {
1196  if (sequence_standard[i]==sequence[i]){
1197  match ++;
1198  }
1199  align_length ++;
1200  }
1201  }
1202 }
1203 
1205 {
1206  size_t maxIdLen=0, maxStartLen=0;
1207  //, startLen=0, actualLineLen=0;
1208  //size_t aln_stop=m_AV->GetAlnStop();
1209  const int rowNum=m_AV->GetNumRows();
1211  m_AV->SetAnchor(0);
1212  }
1213  m_AV->SetGapChar('-');
1214 
1215  if (m_AlignOption & eShowEndGaps) {
1216  m_AV->SetEndChar('-');
1217  }
1218  else {
1219  m_AV->SetEndChar(' ');
1220  }
1221  vector<string> sequence(rowNum);
1222  vector<CAlnMap::TSeqPosList> seqStarts(rowNum);
1223  vector<CAlnMap::TSeqPosList> seqStops(rowNum);
1224  vector<CAlnMap::TSeqPosList> insertStart(rowNum);
1225  vector<CAlnMap::TSeqPosList> insertAlnStart(rowNum);
1226  vector<CAlnMap::TSeqPosList> insertLength(rowNum);
1227  vector<string> seqidArray(rowNum);
1228  string middleLine;
1229  vector<CAlnMap::TSignedRange> rowRng(rowNum);
1230  vector<int> frame(rowNum);
1231  vector<TTaxId> taxid(rowNum);
1232  int max_feature_num = 0;
1233  vector<int> match(rowNum-1);
1234  vector<double> percent_ident(rowNum-1);
1235  vector<int> align_length(rowNum-1);
1236  vector<string> align_stats(rowNum-1);
1237  vector<string> seq_property_label(rowNum-1);
1238  int max_align_stats = 0;
1239  int max_seq_property_label = 0;
1240 
1241  //Add external query feature info such as phi blast pattern
1242  vector<TSAlnFeatureInfoList> bioseqFeature;
1243  x_GetQueryFeatureList(rowNum, (int)m_AV->GetAlnStop(), bioseqFeature);
1244  if(m_DomainInfo && !m_DomainInfo->empty()){
1245  x_GetDomainInfo(rowNum, (int)m_AV->GetAlnStop(), bioseqFeature);
1246  }
1247  _ASSERT((int)bioseqFeature.size() == rowNum);
1248  // Mask locations for queries (first elem) and subjects (all other rows)
1249  vector<TSAlnSeqlocInfoList> masked_regions(rowNum);
1250  x_FillLocList(masked_regions[0], m_Seqloc);
1251 
1252  for (int row = 1; row < rowNum; row++) {
1253  const CSeq_id& id = m_AV->GetSeqId(row);
1254  x_FillLocList(masked_regions[row], &m_SubjectMasks[id]);
1255  }
1256 
1257  //prepare data for each row
1258  list<list<CRange<TSeqPos> > > feat_seq_range;
1259  list<ENa_strand> feat_seq_strand;
1260 
1261  for (int row=0; row<rowNum; row++) {
1262 
1263  string type_temp = m_BlastType;
1264  type_temp = NStr::TruncateSpaces(NStr::ToLower(type_temp));
1265  if((m_AlignTemplates == NULL && (type_temp == "mapview" || type_temp == "mapview_prev")) ||
1266  type_temp == "gsfasta" || type_temp == "gsfasta_prev"){
1267  taxid[row] = CAlignFormatUtil::GetTaxidForSeqid(m_AV->GetSeqId(row),
1268  m_Scope);
1269  } else if ((m_AlignOption & eHtml) && m_AV->GetSeqId(row).Which() == CSeq_id::e_Local && row > 0){
1270  //this is for adding url for local seqid, for example igblast db.
1271  taxid[row] = CAlignFormatUtil::GetTaxidForSeqid(m_AV->GetSeqId(row),
1272  m_Scope);
1273  } else {
1274  taxid[row] = ZERO_TAX_ID;
1275  }
1276  rowRng[row] = m_AV->GetSeqAlnRange(row);
1277  frame[row] = (m_AV->GetWidth(row) == 3 ?
1278  s_GetFrame(m_AV->IsPositiveStrand(row) ?
1279  m_AV->GetSeqStart(row) :
1280  m_AV->GetSeqStop(row),
1281  m_AV->IsPositiveStrand(row) ?
1283  m_AV->GetSeqId(row), m_Scope) : 0);
1284  //make sequence
1285  m_AV->GetWholeAlnSeqString(row, sequence[row], &insertAlnStart[row],
1286  &insertStart[row], &insertLength[row],
1287  (int)m_LineLen, &seqStarts[row], &seqStops[row]);
1289  m_AlignOption&eMergeAlign && m_AV->GetWidth(row) != 3) {
1290 
1291  s_CalculateIdentity(sequence[0], sequence[row], m_AV->GetGapChar(row),
1292  match[row-1], align_length[row-1]);
1293 
1294  if (align_length[row-1] > 0 ){
1295  percent_ident[row-1] = ((double)match[row-1])/align_length[row-1]*100;
1296  align_stats[row-1] = NStr::DoubleToString(percent_ident[row-1], 1, 0) +
1297  "% (" + NStr::IntToString(match[row-1]) + "/" +
1298  NStr::IntToString(align_length[row-1]) + ")" ;
1299  } else {//something is wrong
1300  percent_ident[row - 1] = 0;
1301  align_stats[row-1] = "0";
1302  }
1303 
1304  max_align_stats = max(max_align_stats,
1305  (int)align_stats[row-1].size());
1306  }
1307 
1308  //seq property label
1309  if(row > 0 &&
1311  m_AlignOption&eMergeAlign && m_AV->GetWidth(row) != 3) {
1312 
1313  if((int)m_SeqPropertyLabel->size() >= row -1){
1314  seq_property_label[row-1] = (*m_SeqPropertyLabel)[row]; //skip the first one which is for query
1315  } else {//something is wrong
1316  seq_property_label[row-1] = NcbiEmptyString;
1317  }
1318 
1319  max_seq_property_label = max(max_seq_property_label,
1320  (int)seq_property_label[row-1].size());
1321  }
1322 
1323  if (row == 1 && eShowTranslationForLocalSeq & m_AlignOption
1324  && m_AV->GetWidth(row) != 3
1325  && !(m_AlignType & eProt)) {
1326  x_AddTranslationForLocalSeq(bioseqFeature, sequence);
1327  }
1328  //make feature. Only for pairwise and untranslated for subject nuc seq
1329  if(!(m_AlignOption & eMasterAnchored) &&
1330  !(m_AlignOption & eMergeAlign) && m_AV->GetWidth(row) != 3 &&
1331  !(m_AlignType & eProt)){
1333  TGi master_gi = FindGi(m_AV->GetBioseqHandle(0).
1334  GetBioseqCore()->GetId());
1335  x_GetFeatureInfo(bioseqFeature[row], *m_featScope,
1336  CSeqFeatData::e_Cdregion, row, sequence[row],
1337  feat_seq_range, feat_seq_strand,
1338  row == 1 && !(master_gi > ZERO_GI) ? true : false);
1339 
1340  if(!(feat_seq_range.empty()) && row == 1) {
1341  //make a new copy of master bioseq and add the feature from
1342  //slave to make putative cds feature
1343  CRef<CScope> master_scope_with_feat =
1344  s_MakeNewMasterSeq(feat_seq_range, feat_seq_strand,
1345  m_AV->GetBioseqHandle(0));
1346  //make feature string for master bioseq
1347  list<list<CRange<TSeqPos> > > temp_holder;
1348  x_GetFeatureInfo(bioseqFeature[0], *master_scope_with_feat,
1349  CSeqFeatData::e_Cdregion, 0, sequence[0],
1350  temp_holder, feat_seq_strand, false);
1351  }
1352  }
1354  x_GetFeatureInfo(bioseqFeature[row], *m_featScope,
1355  CSeqFeatData::e_Gene, row, sequence[row],
1356  feat_seq_range, feat_seq_strand, false);
1357  }
1358  }
1359  //make id
1360  x_FillSeqid(seqidArray[row], row);
1361  maxIdLen=max<size_t>(seqidArray[row].size(), maxIdLen);
1362  size_t maxCood=max<size_t>(m_AV->GetSeqStart(row), m_AV->GetSeqStop(row));
1363  maxStartLen = max<size_t>(NStr::SizetToString(maxCood).size(), maxStartLen);
1364  }
1365  for(int i = 0; i < rowNum; i ++){//adjust max id length for feature id
1366  int num_feature = 0;
1367  ITERATE(TSAlnFeatureInfoList, iter, bioseqFeature[i]) {
1368  maxIdLen=max<size_t>((*iter)->feature->feature_id.size(), maxIdLen);
1369  num_feature ++;
1370  if(num_feature > max_feature_num){
1371  max_feature_num = num_feature;
1372  }
1373  }
1374  } //end of preparing row data
1375  SAlnRowInfo *alnRoInfo = new SAlnRowInfo();
1376  alnRoInfo->sequence = sequence;
1377  alnRoInfo->seqStarts = seqStarts;
1378  alnRoInfo->seqStops = seqStops;
1379  alnRoInfo->insertStart = insertStart;
1380  alnRoInfo->insertAlnStart = insertAlnStart;
1381  alnRoInfo->insertLength = insertLength;
1382  alnRoInfo->seqidArray = seqidArray;
1383  alnRoInfo->middleLine = middleLine;
1384  alnRoInfo->rowRng = rowRng;
1385  alnRoInfo->frame = frame;
1386  alnRoInfo->taxid = taxid;
1387  alnRoInfo->bioseqFeature = bioseqFeature;
1388  alnRoInfo->masked_regions = masked_regions;
1389  alnRoInfo->seqidArray = seqidArray;
1390  alnRoInfo->maxIdLen = maxIdLen;
1391  alnRoInfo->maxStartLen = maxStartLen;
1392  alnRoInfo->max_feature_num = max_feature_num;
1393  alnRoInfo->colorMismatch = false;
1394  alnRoInfo->rowNum = rowNum;
1395  alnRoInfo->match = match;
1396  alnRoInfo->percent_ident = percent_ident;
1397  alnRoInfo->align_length = align_length;
1398  alnRoInfo->align_stats = align_stats;
1399  alnRoInfo->max_align_stats_len=max_align_stats;
1400  alnRoInfo->seq_property_label = seq_property_label;
1401  alnRoInfo->max_seq_property_label = max_seq_property_label;
1402  return alnRoInfo;
1403 }
1404 //uses m_AV m_LineLen m_AlignOption m_QueryNumber
1406 {
1407  size_t aln_stop=m_AV->GetAlnStop();
1408  int rowNum = alnRoInfo->rowNum;
1409  vector<int> prev_stop(rowNum);
1411 
1412  //only for untranslated alignment
1415  m_AV->GetWidth(0) != 3 && m_AV->GetWidth(1) != 3) ? true : false;
1416 
1417  //only for untranslated alignment
1420  m_AV->GetWidth(0) != 3 && m_AV->GetWidth(1) != 3) ? true : false;
1421 
1422  //output rows
1423  string formattedString;
1424  for(int j=0; j<=(int)aln_stop; j+=(int)m_LineLen){
1425  string rowdata = x_DisplayRowDataSet(alnRoInfo,j, prev_stop);
1426  formattedString += rowdata;
1427  }//end of displaying rows
1428  return formattedString;
1429 }
1430 
1432 {
1433  size_t aln_stop=m_AV->GetAlnStop();
1434  int rowNum = alnRoInfo->rowNum;
1435  vector<int> prev_stop(rowNum);
1436 
1437  //only for untranslated alignment
1440  m_AV->GetWidth(0) != 3 && m_AV->GetWidth(1) != 3) ? true : false;
1441 
1442  //only for untranslated alignment
1445  m_AV->GetWidth(0) != 3 && m_AV->GetWidth(1) != 3) ? true : false;
1446  int rowSetsCount = 1;
1447  //output rows
1448  for(int j=0; j<=(int)aln_stop; j+=(int)m_LineLen){
1449  //Used for download query range specified by m_QueryAnchoredSetIndex
1450  //Until m_QueryAnchoredSetIndex==rowSetsCount do not display alignment just calculate prev_stop, etc.
1451  if(m_QueryAnchoredSetIndex != -1 && m_QueryAnchoredSetIndex != rowSetsCount) {
1452  x_ProcessRowDataSet(alnRoInfo,j, prev_stop);
1453  }
1454  else {
1455  string rowdata = x_DisplayRowDataSet(alnRoInfo,j, prev_stop);
1456  if(m_AlignTemplates && !m_AlignTemplates->alignQueryAnchTempl.empty()) {//Templates will be used for query anchored display
1458  rowdata = CAlignFormatUtil::MapTemplate(rowdata,"currQueryAnchSet",NStr::IntToString(rowSetsCount));
1459  rowdata = CAlignFormatUtil::MapTemplate(rowdata,"nextQueryAnchSet",NStr::IntToString(rowSetsCount + 1));
1460  rowdata = CAlignFormatUtil::MapTemplate(rowdata,"prevQueryAnchSet",NStr::IntToString(rowSetsCount - 1));
1461  rowdata = CAlignFormatUtil::MapTemplate(rowdata,"fromQueryRange",NStr::IntToString(j + 1));
1462  rowdata = CAlignFormatUtil::MapTemplate(rowdata,"toQueryRange",NStr::IntToString(j + alnRoInfo->currActualLineLen));
1463  }
1464  out << rowdata;
1465  }
1466  rowSetsCount++;
1467  }//end of displaying rows
1468 }
1469 
1470 
1471 
1472 string CDisplaySeqalign::x_DisplayRowDataSet(SAlnRowInfo *alnRoInfo,int aln_start, vector<int> &prev_stop)
1473 {
1474  size_t actualLineLen=0;
1475  string master_feat_str = NcbiEmptyString;
1476  size_t aln_stop=m_AV->GetAlnStop();
1477 
1478  int rowNum = alnRoInfo->rowNum;
1480 
1481 
1482  //output according to aln coordinates
1483  if(aln_stop-aln_start+1<m_LineLen) {
1484  actualLineLen=aln_stop-aln_start+1;
1485  } else {
1486  actualLineLen=m_LineLen;
1487  }
1488  CAlnMap::TSignedRange curRange(aln_start, aln_start+(int)actualLineLen-1);
1489  alnRoInfo->currPrintSegment = aln_start;
1490  alnRoInfo->currActualLineLen = actualLineLen;
1491  alnRoInfo->currRange = curRange;
1492  //here is each row
1493  for (int row=0; row<rowNum; row++) {
1494  bool hasSequence = true;
1495  if (!(m_AlignOption & eShowGapOnlyLines)) {
1496  hasSequence = curRange.IntersectingWith(alnRoInfo->rowRng[row]);
1497  }
1498  //only output rows that have sequence
1499  if (hasSequence){
1500  int end = alnRoInfo->seqStops[row].front() + 1;
1501  bool has_mismatch = false;
1502  //change the alignment line to identity style
1503  if (row>0 && m_AlignOption & eShowIdentity){//check usage - pairwise - only
1504  for (int index = aln_start; index < aln_start + (int)actualLineLen &&
1505  index < (int)alnRoInfo->sequence[row].size(); index ++){
1506  if (alnRoInfo->sequence[row][index] == alnRoInfo->sequence[0][index] &&
1507  isalpha((unsigned char) alnRoInfo->sequence[row][index])) {
1508  alnRoInfo->sequence[row][index] = k_IdentityChar;
1509  } else if (!has_mismatch) {
1510  has_mismatch = true;
1511  }
1512  }
1513  }
1514  //feature for query
1515  if(row == 0){
1516  x_PrintFeatures(alnRoInfo, row, master_feat_str, out);
1517  }
1520  }
1521  else {
1522  x_DisplaySequenceIDForPairwise(alnRoInfo,row,has_mismatch,out);
1523  }
1524  //print out sequence line
1525  x_DisplaySequenceLine(alnRoInfo, row, prev_stop[row], out);
1527  //inserts for anchored view
1528  x_DisplayInsertsForQueryAnchored(alnRoInfo,row,out);
1529  }
1530  //display subject sequence feature.
1531  if(row > 0){
1532  x_PrintFeatures(alnRoInfo, row, master_feat_str, out);
1533  }
1534  //display middle line for pairwise
1535  if (row == 0 && ((m_AlignOption & eShowMiddleLine)) && !(m_AlignOption&eMergeAlign)) {
1536  x_DisplayMiddLine(alnRoInfo, row,out);
1537  }
1538  prev_stop[row] = end;
1539  }
1540  if(!alnRoInfo->seqStarts[row].empty()){ //shouldn't need this check
1541  alnRoInfo->seqStarts[row].pop_front();
1542  }
1543  if(!alnRoInfo->seqStops[row].empty()){
1544  alnRoInfo->seqStops[row].pop_front();
1545  }
1546  }//end of displaying rows
1547  out<<"\n";
1548  string formattedString = CNcbiOstrstreamToString(out);
1549  return formattedString;
1550 }
1551 
1552 void CDisplaySeqalign::x_ProcessRowDataSet(SAlnRowInfo *alnRoInfo,int aln_start, vector<int> &prev_stop)
1553 {
1554  size_t actualLineLen=0;
1555  string master_feat_str = NcbiEmptyString;
1556  size_t aln_stop=m_AV->GetAlnStop();
1557 
1558  int rowNum = alnRoInfo->rowNum;
1559 
1560 
1561  //output according to aln coordinates
1562  if(aln_stop-aln_start+1<m_LineLen) {
1563  actualLineLen=aln_stop-aln_start+1;
1564  } else {
1565  actualLineLen=m_LineLen;
1566  }
1567  CAlnMap::TSignedRange curRange(aln_start, aln_start+(int)actualLineLen-1);
1568  alnRoInfo->currPrintSegment = aln_start;
1569  alnRoInfo->currActualLineLen = actualLineLen;
1570  alnRoInfo->currRange = curRange;
1571  //here is each row
1572  for (int row=0; row<rowNum; row++) {
1573  bool hasSequence = true;
1574  if (!(m_AlignOption & eShowGapOnlyLines)) {
1575  hasSequence = curRange.IntersectingWith(alnRoInfo->rowRng[row]);
1576  }
1577  //only output rows that have sequence
1578  if (hasSequence){
1579  int end = alnRoInfo->seqStops[row].front() + 1;
1580  prev_stop[row] = end;
1581  }
1582  if(!alnRoInfo->seqStarts[row].empty()){ //shouldn't need this check
1583  alnRoInfo->seqStarts[row].pop_front();
1584  }
1585  if(!alnRoInfo->seqStops[row].empty()){
1586  alnRoInfo->seqStops[row].pop_front();
1587  }
1588  }//end of displaying rows
1589 }
1590 
1592 {
1593  size_t startLen=0;
1594  int start = alnRoInfo->seqStarts[row].front() + 1; //+1 for 1 based
1595  int end = alnRoInfo->seqStops[row].front() + 1;
1596  int j = alnRoInfo->currPrintSegment;
1597  int actualLineLen = alnRoInfo->currActualLineLen;
1598  //print out sequence line
1599  //adjust space between id and start
1600  CAlignFormatUtil::AddSpace(out, alnRoInfo->maxIdLen-alnRoInfo->seqidArray[row].size() + k_IdStartMargin);
1601  //not to display start and stop number for empty row
1602  if ((j > 0 && end == prev_stop)
1603  || (j == 0 && start == 1 && end == 1)) {
1604  startLen = 0;
1605  } else {
1606  out << start;
1607  startLen=NStr::IntToString(start).size();
1608  }
1609 
1611  x_OutputSeq(alnRoInfo->sequence[row], m_AV->GetSeqId(row), j,
1612  (int)actualLineLen, alnRoInfo->frame[row], row,
1613  (row > 0 && alnRoInfo->colorMismatch)?true:false,
1614  alnRoInfo->masked_regions[row], out);
1616 
1617  //not to display stop number for empty row in the middle
1618  if (!(j > 0 && end == prev_stop)
1619  && !(j == 0 && start == 1 && end == 1)) {
1620  out << end;
1621  }
1622  out<<"\n";
1623 }
1624 
1626 {
1627  list<string> inserts;
1628  string insertPosString; //the one with "\" to indicate insert
1629  TSInsertInformationList insertList;
1630  int j = alnRoInfo->currPrintSegment;
1631  CAlnMap::TSignedRange curRange = alnRoInfo->currRange;
1632  x_GetInserts(insertList, alnRoInfo->insertAlnStart[row],
1633  alnRoInfo->insertStart[row], alnRoInfo->insertLength[row],
1634  j + (int)m_LineLen);
1635  x_FillInserts(row, curRange, j, inserts, insertPosString, insertList);
1636  bool insertAlready = false;
1637  for(list<string>::iterator iter = inserts.begin();
1638  iter != inserts.end(); iter ++){
1639  if(!insertAlready){
1642  string checkboxBuf = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox","");
1643  out << checkboxBuf;
1644  }
1645 
1646  int base_margin = alnRoInfo->maxIdLen + k_IdStartMargin + alnRoInfo->maxStartLen + k_StartSequenceMargin;
1647 
1648  if (alnRoInfo->show_align_stats) {
1649  base_margin += alnRoInfo->max_align_stats_len + k_AlignStatsMargin;
1650  }
1651  if (alnRoInfo->show_seq_property_label){
1652  base_margin += alnRoInfo->max_seq_property_label + k_SequencePropertyLabelMargin;
1653  }
1654  CAlignFormatUtil::AddSpace(out, base_margin);
1655  out << insertPosString<<"\n";
1656  }
1658  string checkboxBuf = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox","");
1659  out << checkboxBuf;
1660  }
1661  int base_margin = alnRoInfo->maxIdLen + k_IdStartMargin + alnRoInfo->maxStartLen + k_StartSequenceMargin;
1662 
1663  if (alnRoInfo->show_align_stats) {
1664  base_margin += alnRoInfo->max_align_stats_len + k_AlignStatsMargin;
1665  }
1666  if (alnRoInfo->show_seq_property_label){
1667  base_margin += alnRoInfo->max_seq_property_label + k_SequencePropertyLabelMargin;
1668  }
1669  CAlignFormatUtil::AddSpace(out, base_margin);
1670  out<<*iter<<"\n";
1671  insertAlready = true;
1672  }
1673 }
1674 
1676 {
1677  //highlight the seqid for pairwise-with-identity format
1678  if(row>0 && m_AlignOption&eHtml && !(m_AlignOption&eMergeAlign)
1679  && m_AlignOption&eShowIdentity && has_mismatch &&
1681  //highlight the seqid for pairwise-with-identity format
1682  string alnStr = CAlignFormatUtil::MapTemplate(k_DefaultPairwiseWithIdntTempl,"alndata",alnRoInfo->seqidArray[row]);
1683  out<< alnStr;
1684  }
1685  else {
1686  out<<alnRoInfo->seqidArray[row];
1687  }
1688 }
1689 
1691 {
1692  string urlLink = NcbiEmptyString;
1693  //setup url link for seqid
1694  TGi gi = ZERO_GI;
1695  if(m_AlignOption & eHtml){
1696  if(m_AV->GetSeqId(row).Which() == CSeq_id::e_Gi){
1697  gi = m_AV->GetSeqId(row).GetGi();
1698  }
1699  if(!(gi > ZERO_GI)){
1700  gi = CAlignFormatUtil::GetGiForSeqIdList(m_AV->GetBioseqHandle(row).
1701  GetBioseqCore()->GetId());
1702  }
1703  string anchorTmpl,checkBoxTmpl,id_lbl;
1704  bool showAnchor = (row == 0 && (m_AlignOption & eHyperLinkMasterSeqid)) || (row > 0 && (m_AlignOption & eHyperLinkSlaveSeqid));
1705  bool showCheckbox = ((m_AlignOption & eMergeAlign) && (m_AlignOption & eSequenceRetrieval) && m_CanRetrieveSeq) ||
1707  if(showAnchor){
1709  if (m_ResultPositionIndex >= 0){
1710  anchorTmpl = CAlignFormatUtil::MapTemplate(anchorTmpl,"resultPositionIndex",m_ResultPositionIndex);
1711  }
1712  anchorTmpl = CAlignFormatUtil::MapTemplate(anchorTmpl,"id_lbl",gi > ZERO_GI ?
1713  NStr::NumericToString(gi):alnRoInfo->seqidArray[row]);
1714  }
1715  //get sequence checkbox
1716  if(showCheckbox) {
1717  checkBoxTmpl = !(m_AlignOption & eShowCheckBox) ?
1718  ((row == 0) ? "" : k_DefaultCheckboxTempl) : k_DefaultCheckboxExTempl;
1719 
1720  checkBoxTmpl = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox",checkBoxTmpl);
1721  checkBoxTmpl = CAlignFormatUtil::MapTemplate(checkBoxTmpl,"queryNumber",NStr::IntToString(m_QueryNumber));
1723  const CRef<CSeq_id> seqID = FindBestChoice(m_AV->GetBioseqHandle(row).GetBioseqCore()->GetId(), CSeq_id::WorstRank);
1725  if(seqID->IsLocal()) {
1726  id_lbl = "lcl|" + id_lbl;
1727  }
1728  }
1729  }
1730  if(showCheckbox || showAnchor) {
1731  id_lbl = id_lbl.empty() ? ((gi > ZERO_GI) ? NStr::NumericToString(gi) : alnRoInfo->seqidArray[row]) : id_lbl;
1732  string displString = CAlignFormatUtil::MapTemplate(anchorTmpl + checkBoxTmpl,"id_lbl",id_lbl);
1733  out << displString;
1734  }
1735  }
1736 
1737  if(alnRoInfo->show_seq_property_label){
1738  if (row > 0){
1739 
1740  out<<alnRoInfo->seq_property_label[row-1];
1742  (int)alnRoInfo->seq_property_label[row-1].size() + k_SequencePropertyLabelMargin);
1743  } else {
1745  }
1746  }
1747 
1748  if(alnRoInfo->show_align_stats){
1749  if (row > 0){
1750  out<<alnRoInfo->align_stats[row-1];
1752  (int)alnRoInfo->align_stats[row-1].size() + k_AlignStatsMargin);
1753  } else {
1755  }
1756  }
1757  if(m_AlignOption & eHtml){
1758  if((row == 0 && (m_AlignOption & eHyperLinkMasterSeqid)) ||
1759  (row > 0 && (m_AlignOption & eHyperLinkSlaveSeqid))){
1760  m_cur_align = row;
1761  urlLink = x_HTMLSeqIDLink(alnRoInfo, row,gi);
1762  }
1763  }
1764  if(!urlLink.empty()) {
1765  out << urlLink;
1766  }
1767  else {
1768  out<<alnRoInfo->seqidArray[row];
1769  }
1770 }
1771 
1773 {
1774  int j = alnRoInfo->currPrintSegment;
1775  int actualLineLen = alnRoInfo->currActualLineLen;
1776  CSeq_id no_id;
1778  x_OutputSeq(alnRoInfo->middleLine, no_id, j, (int)actualLineLen, 0, row, false, alnRoInfo->masked_regions[row], out);
1779  out<<"\n";
1780 }
1781 
1783 {
1784  size_t aln_stop=m_AV->GetAlnStop();
1785 
1786  aln_vec_info->match = 0;
1787  aln_vec_info->positive = 0;
1788  aln_vec_info->gap = 0;
1789  aln_vec_info->identity = 0;
1790  x_FillIdentityInfo(aln_vec_info->alnRowInfo->sequence[0],
1791  aln_vec_info->alnRowInfo->sequence[1],
1792  aln_vec_info->match,
1793  aln_vec_info->positive,
1794  aln_vec_info->alnRowInfo->middleLine);
1796  aln_vec_info->identity = CAlignFormatUtil::GetPercentMatch(aln_vec_info->match, (int)aln_stop+1);
1797  if(aln_vec_info->identity >= k_ColorMismatchIdentity && aln_vec_info->identity <100 &&
1799  aln_vec_info->alnRowInfo->colorMismatch = true;
1800  }
1801  aln_vec_info->gap = x_GetNumGaps();
1802  }
1803 }
1804 
1806 {
1807  SAlnRowInfo *alnRoInfo = x_PrepareRowData();
1808 
1809  x_DisplayRowData(alnRoInfo,out);
1810  delete alnRoInfo;
1811 }
1812 
1814 {
1815 
1816  //make alnvector
1817  CRef<CAlnVec> avRef;
1818  CConstRef<CSeq_align> finalAln;
1819  if (align.GetSegs().Which() == CSeq_align::C_Segs::e_Std) {
1820  CRef<CSeq_align> densegAln = align.CreateDensegFromStdseg();
1822  finalAln = densegAln->CreateTranslatedDensegFromNADenseg();
1823  } else {
1824  finalAln = densegAln;
1825  }
1826  } else if(align.GetSegs().Which() ==
1829  finalAln = align.CreateTranslatedDensegFromNADenseg();
1830  } else {
1831  finalAln = &align;
1832  }
1833  } else if(align.GetSegs().Which() ==
1835  CRef<CSeq_align> densegAln =
1838  finalAln = densegAln->CreateTranslatedDensegFromNADenseg();
1839  } else {
1840  finalAln = densegAln;
1841  }
1842  } else {
1844  "Seq-align should be Denseg, Stdseg or Dendiag!");
1845  }
1846  CRef<CDense_seg> finalDenseg(new CDense_seg);
1847  const CTypeConstIterator<CDense_seg> ds = ConstBegin(*finalAln);
1848  if((ds->IsSetStrands()
1849  && ds->GetStrands().front()==eNa_strand_minus)
1850  && !(ds->IsSetWidths() && ds->GetWidths()[0] == 3)){
1851  //show plus strand if master is minus for non-translated case
1852  finalDenseg->Assign(*ds);
1853  finalDenseg->Reverse();
1854  avRef = new CAlnVec(*finalDenseg, m_Scope);
1855  } else {
1856  avRef = new CAlnVec(*ds, m_Scope);
1857  }
1858 
1860 
1861  return avRef;
1862 }
1863 
1864 //inits m_FeatObj,m_featScope,m_CanRetrieveSeq,m_ConfigFile,m_Reg,m_LinkoutOrder,m_DynamicFeature
1866 {
1867  //scope for feature fetching
1870  & eShowGeneFeature)){
1873  m_featScope = new CScope(*m_FeatObj); //for seq feature fetch
1874  string name = CGBDataLoader::GetLoaderNameFromArgs();
1875  m_featScope->AddDataLoader(name);
1876  }
1879  //set config file
1880  m_ConfigFile = new CNcbiIfstream(".ncbirc");
1882 
1883  if(!m_BlastType.empty()) m_LinkoutOrder = m_Reg->Get(m_BlastType,"LINKOUT_ORDER");
1885 
1886  string feat_file = m_Reg->Get("FEATURE_INFO", "FEATURE_FILE");
1887  string feat_file_index = m_Reg->Get("FEATURE_INFO",
1888  "FEATURE_FILE_INDEX");
1889  if(feat_file != NcbiEmptyString && feat_file_index != NcbiEmptyString){
1890  m_DynamicFeature = new CGetFeature(feat_file, feat_file_index);
1891  }
1892  }
1893  if(m_AlignOption&eLinkout) {
1894  string user_url = (!m_BlastType.empty()) ? m_Reg->Get(m_BlastType, "TOOL_URL") : "";
1895 
1896 
1900 
1901  CRef<CSeq_id> wid = FindBestChoice(m_Scope.GetBioseqHandle(actual_aln_list.Get().front()->GetSeq_id(0)).GetBioseqCore()->GetId(), CSeq_id::WorstRank);
1903  }
1904 }
1905 
1907 {
1908  CSeq_align_set actual_aln_list;
1910  *m_SeqalignSetRef);
1911  if (actual_aln_list.Get().empty()){
1912  return;
1913  }
1914 
1915  //inits m_FeatObj,m_featScope,m_CanRetrieveSeq,m_ConfigFile,m_Reg,m_LinkoutOrder,m_DynamicFeature
1916  x_InitAlignParams(actual_aln_list);
1917 
1918  //get sequence
1920  out<<"<form name=\"getSeqAlignment"<<m_QueryNumber<<"\">\n";
1921  }
1922  //begin to display
1923  int num_align = 0;
1925  m_currAlignHsp = 0;
1926  unique_ptr<CObjectOStream> out2(CObjectOStream::Open(eSerial_AsnText, out));
1927  //*out2 << *m_SeqalignSetRef;
1928  //get segs first and get hsp number - m_segs,m_Hsp,m_subjRange
1929  x_PreProcessSeqAlign(actual_aln_list);
1930  if(!(m_AlignOption&eMergeAlign)){
1931  /*pairwise alignment. Note we can't just show each alnment as we go
1932  because we will need seg information form all hsp's with the same id
1933  for genome url link. As a result we show hsp's with the same id
1934  as a group*/
1935 
1936  CConstRef<CSeq_id> previousId, subid;
1937  for (CSeq_align_set::Tdata::const_iterator
1938  iter = actual_aln_list.Get().begin();
1939  iter != actual_aln_list.Get().end()
1940  && num_align<m_NumAlignToShow; iter++, num_align++) {
1941 
1942  //make alnvector
1943  CRef<CAlnVec> avRef = x_GetAlnVecForSeqalign(**iter);
1944 
1945  if(!(avRef.Empty())){
1946  //Note: do not switch the set order per calnvec specs.
1948  avRef->SetGenCode(m_MasterGeneticCode, 0);
1949  try{
1950  const CBioseq_Handle& handle = avRef->GetBioseqHandle(1);
1951  if(handle){
1952 
1953  //save the current alnment regardless
1954  CRef<SAlnInfo> alnvecInfo(new SAlnInfo);
1955  int num_ident;
1957  alnvecInfo->score,
1958  alnvecInfo->bits,
1959  alnvecInfo->evalue,
1960  alnvecInfo->sum_n,
1961  num_ident,
1962  alnvecInfo->use_this_seqid,
1963  alnvecInfo->comp_adj_method);
1964  alnvecInfo->alnvec = avRef;
1965 
1966  subid=&(avRef->GetSeqId(1));
1967  bool showDefLine = previousId.Empty() || !subid->Match(*previousId);
1968  x_DisplayAlnvecInfo(out, alnvecInfo,showDefLine);
1969 
1970  previousId = subid;
1971  }
1972  } catch (const CException&){
1973  out << "Sequence with id "
1974  << (avRef->GetSeqId(1)).GetSeqIdString().c_str()
1975  <<" no longer exists in database...alignment skipped\n";
1976  continue;
1977  }
1978  }
1979  }
1980 
1981  } else if(m_AlignOption&eMergeAlign){ //multiple alignment
1982  vector< CRef<CAlnMix> > mix(k_NumFrame);
1983  //each for one frame for translated alignment
1984  for(int i = 0; i < k_NumFrame; i++){
1985  mix[i] = new CAlnMix(m_Scope);
1986  }
1987  num_align = 0;
1988  vector<CRef<CSeq_align_set> > alnVector(k_NumFrame);
1989  for(int i = 0; i < k_NumFrame; i ++){
1990  alnVector[i] = new CSeq_align_set;
1991  }
1992  for (CSeq_align_set::Tdata::const_iterator
1993  alnIter = actual_aln_list.Get().begin();
1994  alnIter != actual_aln_list.Get().end()
1995  && num_align<m_NumAlignToShow; alnIter ++, num_align++) {
1996 
1997  const CBioseq_Handle& subj_handle =
1998  m_Scope.GetBioseqHandle((*alnIter)->GetSeq_id(1));
1999  if(subj_handle){
2000  //need to convert to denseg for stdseg
2001  if((*alnIter)->GetSegs().Which() == CSeq_align::C_Segs::e_Std) {
2002  CTypeConstIterator<CStd_seg> ss = ConstBegin(**alnIter);
2003  CRef<CSeq_align> convertedDs =
2004  (*alnIter)->CreateDensegFromStdseg();
2005  if((convertedDs->GetSegs().GetDenseg().IsSetWidths()
2006  && convertedDs->GetSegs().GetDenseg().GetWidths()[0] == 3)
2008  //only do this for translated master
2009  int frame = s_GetStdsegMasterFrame(*ss, m_Scope);
2010  switch(frame){
2011  case 1:
2012  alnVector[0]->Set().push_back(convertedDs);
2013  break;
2014  case 2:
2015  alnVector[1]->Set().push_back(convertedDs);
2016  break;
2017  case 3:
2018  alnVector[2]->Set().push_back(convertedDs);
2019  break;
2020  case -1:
2021  alnVector[3]->Set().push_back(convertedDs);
2022  break;
2023  case -2:
2024  alnVector[4]->Set().push_back(convertedDs);
2025  break;
2026  case -3:
2027  alnVector[5]->Set().push_back(convertedDs);
2028  break;
2029  default:
2030  break;
2031  }
2032  }
2033  else {
2034  alnVector[0]->Set().push_back(convertedDs);
2035  }
2036  } else if((*alnIter)->GetSegs().Which() == CSeq_align::C_Segs::
2037  e_Denseg){
2038  alnVector[0]->Set().push_back(*alnIter);
2039  } else if((*alnIter)->GetSegs().Which() == CSeq_align::C_Segs::
2040  e_Dendiag){
2041  alnVector[0]->Set().\ push_back(CAlignFormatUtil::CreateDensegFromDendiag(**alnIter));
2042  } else {
2044  "Input Seq-align should be Denseg, Stdseg or Dendiag!");
2045  }
2046  }
2047  }
2048  for(int i = 0; i < (int)alnVector.size(); i ++){
2049  bool hasAln = false;
2051  alnRef = ConstBegin(*alnVector[i]); alnRef; ++alnRef){
2053  //*out2 << *ds;
2054  try{
2056  mix[i]->Add(*ds, CAlnMix::fForceTranslation);
2057  } else {
2058  if (ds->IsSetWidths() &&
2059  ds->GetWidths()[0] == 3 &&
2060  ds->IsSetStrands() &&
2061  ds->GetStrands().front()==eNa_strand_minus){
2062  mix[i]->Add(*ds, CAlnMix::fNegativeStrand);
2063  } else {
2064  mix[i]->Add(*ds, CAlnMix::fPreserveRows);
2065  }
2066  }
2067  } catch (const CException& e){
2068  _TRACE("Warning: " << e.what());
2069  continue;
2070  }
2071  hasAln = true;
2072  }
2073  if(hasAln){
2074  // *out2<<*alnVector[i];
2075  mix[i]->Merge(CAlnMix::fMinGap
2078  //*out2<<mix[i]->GetDenseg();
2079  }
2080  }
2081 
2082  int numDistinctFrames = 0;
2083  for(int i = 0; i < (int)alnVector.size(); i ++){
2084  if(!alnVector[i]->Get().empty()){
2085  numDistinctFrames ++;
2086  }
2087  }
2088  out<<"\n";
2089  for(int i = 0; i < k_NumFrame; i ++){
2090  try{
2091  CRef<CAlnVec> avRef (new CAlnVec (mix[i]->GetDenseg(),
2092  m_Scope));
2095  avRef->SetGenCode(m_MasterGeneticCode, 0);
2096  m_AV = avRef;
2097 
2098  if(numDistinctFrames > 1){
2099  out << "For reading frame " << k_FrameConversion[i]
2100  << " of query sequence:\n\n";
2101  }
2103  } catch (CException e){
2104  continue;
2105  }
2106  }
2107  }
2109  out<<"</form>\n";
2110  }
2111 }
2112 
2113 
2114 void CDisplaySeqalign::x_FillIdentityInfo(const string& sequence_standard,
2115  const string& sequence ,
2116  int& match, int& positive,
2117  string& middle_line)
2118 {
2119  match = 0;
2120  positive = 0;
2121  int min_length=min<int>((int)sequence_standard.size(), (int)sequence.size());
2123  middle_line = sequence;
2124  }
2125  for(int i=0; i<min_length; i++){
2126  if(sequence_standard[i]==sequence[i]){
2128  if(m_MidLineStyle == eBar ) {
2129  middle_line[i] = '|';
2130  } else if (m_MidLineStyle == eChar){
2131  middle_line[i] = sequence[i];
2132  }
2133  }
2134  match ++;
2135  } else {
2136  if ((m_AlignType&eProt)
2137  && m_Matrix[(int)sequence_standard[i]][(int)sequence[i]] > 0){
2138  positive ++;
2140  if (m_MidLineStyle == eChar){
2141  middle_line[i] = '+';
2142  }
2143  }
2144  } else {
2146  middle_line[i] = ' ';
2147  }
2148  }
2149  }
2150  }
2151 }
2152 
2153 
2154 int CDisplaySeqalign::x_GetLinkout(const objects::CSeq_id & id)
2156  int linkout = 0;
2157  if(m_AlignOption & eLinkout) {
2158  try {
2159  linkout = m_LinkoutDB
2161  : 0;
2162  }
2163  catch (const CException & e) {
2164  ERR_POST("Problem with linkoutdb: " + e.GetMsg());
2165  cerr << "[BLAST FORMATTER EXCEPTION] Problem with linkoutdb: " << e.GetMsg() << endl;
2166  m_AlignOption &= ~eLinkout; //Remove linkout bit for the rest of sequences
2167  linkout = 0;
2168  }
2169  }
2170  return linkout;
2171 }
2172 
2173 
2174 
2176  const CBioseq_Handle& bsp_handle,
2177  list<string> &use_this_seqid,
2178  TGi firstGi,
2179  int deflineNum)
2180 {
2181  SAlnDispParams *alnDispParams = NULL;
2182  const int kMaxDeflineNum = 10;
2183 
2184  bool isNa = bsp_handle.GetBioseqCore()->IsNa();
2185  int seqLength = (int)bsp_handle.GetBioseqLength();
2186 
2187  const list<CRef<CSeq_id> > ids = bdl->GetSeqid();
2189 
2191  TGi gi_in_use_this_gi = ZERO_GI;
2192  bool isGiList = false;
2193  bool match = CAlignFormatUtil::MatchSeqInSeqList(gi, wid, use_this_seqid,&isGiList);
2194  if(match && isGiList) gi_in_use_this_gi = gi;
2195 
2196  if(use_this_seqid.empty() || match) {
2197  firstGi = (firstGi == ZERO_GI) ? gi_in_use_this_gi : firstGi;
2198  alnDispParams = new SAlnDispParams();
2199  alnDispParams->gi = gi;
2200  alnDispParams->seqID = FindBestChoice(ids, CSeq_id::WorstRank); //change to use use_this_seq
2201  alnDispParams->hasTextSeqID = CAlignFormatUtil::GetTextSeqID(alnDispParams->seqID);
2202  alnDispParams->ids = bsp_handle.GetBioseqCore()->GetId();
2203  alnDispParams->label = CAlignFormatUtil::GetLabel(alnDispParams->seqID,CSeq_id::eContent);//Just accession without db part like ref| or pdbd|
2204 
2205 
2206  TTaxId taxid = ZERO_TAX_ID;
2207  string type_temp = m_BlastType;
2208  type_temp = NStr::TruncateSpaces(NStr::ToLower(type_temp));
2209  if(bdl->IsSetTaxid() && bdl->CanGetTaxid()){
2210  taxid = bdl->GetTaxid();
2211  }
2212 
2213  alnDispParams->seqUrlInfo = x_InitSeqUrl(gi_in_use_this_gi,alnDispParams->label,taxid,ids);
2214  if(m_AlignOption&eHtml){
2215  alnDispParams->id_url = CAlignFormatUtil::GetIDUrl(alnDispParams->seqUrlInfo,&ids);
2216  }
2218  int linkout = 0;
2219  if (alnDispParams->hasTextSeqID) {
2220  linkout = (deflineNum < kMaxDeflineNum) ? CAlignFormatUtil::GetSeqLinkoutInfo((CBioseq::TId &)ids,
2221  &m_LinkoutDB,
2223  alnDispParams->gi) : 0;
2225  }
2226 
2227  if(linkout != 0) {
2228  list<string> linkout_url = CAlignFormatUtil::
2229  GetLinkoutUrl(linkout, ids,
2230  m_Rid,
2232  isNa,
2233  firstGi,
2234  false, true, m_cur_align,m_PreComputedResID);
2235  ITERATE(list<string>, iter_linkout, linkout_url){
2236  alnDispParams->linkoutStr += *iter_linkout;
2237  }
2238  }
2239  if(seqLength > k_GetSubseqThreshhold){
2240  alnDispParams->dumpGnlUrl = x_GetDumpgnlLink(ids);
2241  }
2242 
2243  }
2244  if(bdl->IsSetTitle()){
2245  alnDispParams->title = bdl->GetTitle();
2246  }
2247  if(alnDispParams->title.empty()) {
2248  alnDispParams->title = CDeflineGenerator().GenerateDefline(bsp_handle);
2249  }
2250  }
2251  return alnDispParams;
2252 }
2253 
2254 
2255 
2257 {
2258  SAlnDispParams *alnDispParams = new SAlnDispParams();
2259  alnDispParams->gi = FindGi(bsp_handle.GetBioseqCore()->GetId());
2260  alnDispParams->seqID = FindBestChoice(bsp_handle.GetBioseqCore()->GetId(),CSeq_id::WorstRank);
2261  alnDispParams->label = CAlignFormatUtil::GetLabel(alnDispParams->seqID,CSeq_id::eContent);
2262  if(m_AlignOption&eHtml){
2263  alnDispParams->ids = bsp_handle.GetBioseqCore()->GetId();
2264  alnDispParams->seqUrlInfo = x_InitSeqUrl(alnDispParams->gi,alnDispParams->label,ZERO_TAX_ID,alnDispParams->ids);
2265  alnDispParams->id_url = CAlignFormatUtil::GetIDUrl(alnDispParams->seqUrlInfo,&alnDispParams->ids);
2266  }
2267  alnDispParams->title = CDeflineGenerator().GenerateDefline(bsp_handle);
2268  alnDispParams->hasTextSeqID = CAlignFormatUtil::GetTextSeqID(alnDispParams->seqID);
2269  return alnDispParams;
2270 }
2271 
2272 string
2273 CDisplaySeqalign::x_PrintDefLine(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_vec_info)
2275 {
2278  /* Facilitates comparing formatted output using diff */
2279  static string kLengthString("Length=");
2280 #ifdef CTOOLKIT_COMPATIBLE
2281  static bool value_set = false;
2282  if ( !value_set ) {
2283  if (getenv("CTOOLKIT_COMPATIBLE")) {
2284  kLengthString.assign(" Length = ");
2285  }
2286  value_set = true;
2287  }
2288 #endif /* CTOOLKIT_COMPATIBLE */
2289 
2290  if(bsp_handle){
2291  const CRef<CSeq_id> wid =
2292  FindBestChoice(bsp_handle.GetBioseqCore()->GetId(),
2294 
2295  const CRef<CBlast_def_line_set> bdlRef
2296  = CSeqDB::ExtractBlastDefline(bsp_handle);
2297  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
2298  bool isFirst = true;
2299  TGi firstGi = ZERO_GI;
2300 
2301  m_cur_align++;
2302 
2303  if(bdl.empty()){ //no blast defline struct, should be no such case now
2304  //actually not so fast...as we now fetch from entrez even when it's not in blast db
2305  //there is no blast defline in such case.
2306  CRef<SAlnDispParams> alnDispParams
2307  (x_FillAlnDispParams(bsp_handle));
2308  out << ">";
2310  && (m_AlignOption&eHtml) && m_CanRetrieveSeq && isFirst) {
2313  alnDispParams->gi > ZERO_GI ?
2314  NStr::NumericToString(alnDispParams->gi) : alnDispParams->label);
2315  out << buf;
2316  }
2317 
2318  if(m_AlignOption&eHtml){
2319 
2320  aln_vec_info->id_label = (alnDispParams->gi != ZERO_GI) ?
2321  NStr::NumericToString(alnDispParams->gi) : alnDispParams->label;
2322 
2323  out<<alnDispParams->id_url;
2324  }
2325 
2326  if(m_AlignOption&eShowGi && alnDispParams->gi > ZERO_GI &&
2327  !alnDispParams->seqID->IsGi()){
2328  out<<"gi|"<<alnDispParams->gi<<"|";
2329  }
2330  if(!((alnDispParams->seqID->AsFastaString().find("gnl|BL_ORD_ID") != string::npos) ||
2331  alnDispParams->seqID->AsFastaString().find("lcl|Subject_") != string::npos)){
2332  if (strncmp(alnDispParams->seqID->AsFastaString().c_str(), "lcl|", 4) == 0)
2333  out << alnDispParams->label;
2334  else {
2336  alnDispParams->gi > ZERO_GI)) {
2337  alnDispParams->seqID->WriteAsFasta(out);
2338  }
2339  else {
2340  out << CAlignFormatUtil::GetBareId(*alnDispParams->seqID);
2341  }
2342 
2343  }
2344  }
2345  if(m_AlignOption&eHtml){
2346  if(alnDispParams->id_url != NcbiEmptyString){
2347  out<<"</a>";
2348  }
2349  if(alnDispParams->gi != ZERO_GI){
2350  out<<"<a name="<<alnDispParams->gi<<"></a>";
2351  } else {
2352  out<<"<a name="<<alnDispParams->seqID->GetSeqIdString()<<"></a>";
2353  }
2354  }
2355  out <<" ";
2357  CHTMLHelper::HTMLEncode(alnDispParams->title) :
2358  alnDispParams->title);
2359 
2360  out<<"\n";
2361 
2362  } else {
2363  //print each defline
2364  bool bMultipleDeflines = false;
2365  int numBdl = 0;
2366  int maxNumBdl = (aln_vec_info->use_this_seqid.empty()) ? bdl.size() : aln_vec_info->use_this_seqid.size();
2367  for(list< CRef< CBlast_def_line > >::const_iterator
2368  iter = bdl.begin(); iter != bdl.end(); iter++){
2369 
2370  CRef<SAlnDispParams> alnDispParams
2371  (x_FillAlnDispParams(*iter, bsp_handle,
2372  aln_vec_info->use_this_seqid,
2373  firstGi, numBdl));
2374 
2375 
2376 
2377  if(alnDispParams) {
2378  numBdl++;
2379  if(isFirst){
2380  out << ">";
2381  } else{
2382  out << " ";
2383  if (m_AlignOption&eHtml && (int)(maxNumBdl) > k_MaxDeflinesToShow && numBdl == k_MinDeflinesToShow + 1){
2384  //Show first 3 deflines out of 8 or more, hide the rest
2385  string mdlTag = aln_vec_info->id_label;
2386  //string mdlTag = id_label + "_" + NStr::IntToString(m_cur_align);
2387  out << "<a href=\"#\" title=\"Other sequence titles\" onmouseover=\"showInfo(this)\" class=\"resArrowLinkW mdl hiding\" id=\"" <<
2388  mdlTag << "\">" << maxNumBdl - k_MinDeflinesToShow << " more sequence titles" << "</a>\n";
2389 
2390  out << " <div id=\"" << "info_" << mdlTag << "\" class=\"helpbox mdlbox hidden\">";
2391  bMultipleDeflines = true;
2392  }
2393  }
2394 
2395  if(isFirst){
2396  firstGi = alnDispParams->gi;
2397  }
2399  && (m_AlignOption&eHtml) && m_CanRetrieveSeq && isFirst) {
2402  alnDispParams->gi > ZERO_GI ?
2403  NStr::NumericToString(alnDispParams->gi) : alnDispParams->label);
2404  out << buf;
2405  }
2406 
2407  if(m_AlignOption&eHtml){
2408  out<< alnDispParams->id_url;
2409  }
2410 
2411  if(m_AlignOption&eShowGi && alnDispParams->gi > ZERO_GI &&
2412  !alnDispParams->seqID->IsGi()){
2413  out<<"gi|"<<alnDispParams->gi<<"|";
2414  }
2415  if(!(alnDispParams->seqID->AsFastaString().find("gnl|BL_ORD_ID") != string::npos) ||
2416  alnDispParams->seqID->AsFastaString().find("lcl|Subject_") != string::npos){
2417  if (strncmp(alnDispParams->seqID->AsFastaString().c_str(), "lcl|", 4) == 0) {
2418  out << alnDispParams->label;
2419  }
2420  else {
2421  if (m_UseLongSeqIds ||
2422  ((m_AlignOption & eShowGi) &&
2423  alnDispParams->gi > ZERO_GI)) {
2424 
2425  alnDispParams->seqID->WriteAsFasta(out);
2426  }
2427  else {
2429  *alnDispParams->seqID);
2430  }
2431  }
2432  }
2433  if(m_AlignOption&eHtml){
2434  if(alnDispParams->id_url != NcbiEmptyString){
2435  out<<"</a>";
2436  }
2437  if(alnDispParams->gi != ZERO_GI){
2438  out<<"<a name="<<alnDispParams->gi<<"></a>";
2439  aln_vec_info->id_label = NStr::NumericToString(alnDispParams->gi);
2440  } else {
2441  out<<"<a name="<<alnDispParams->seqID->GetSeqIdString(true)<<"></a>";
2442  aln_vec_info->id_label = alnDispParams->label;
2443  }
2444  if(m_AlignOption&eLinkout){
2445 
2446  out <<" ";
2447  out << alnDispParams->linkoutStr;
2448  if(!alnDispParams->dumpGnlUrl.empty()) {
2449 
2450  out<<alnDispParams->dumpGnlUrl;
2451  }
2452  }
2453  }
2454 
2455  if (out.tellp() > 1L) {
2456  out << " ";
2457  }
2458  if(!alnDispParams->title.empty()) {
2460  CHTMLHelper::
2461  HTMLEncode(alnDispParams->title) :
2462  alnDispParams->title);
2463  }
2464  out<<"\n";
2465  isFirst = false;
2466  }
2467  }
2468  if(m_AlignOption&eHtml && bMultipleDeflines) {
2469  out << "</div>";
2470  }
2471  }
2472  }
2473  out<<kLengthString<<bsp_handle.GetBioseqLength()<<"\n";
2474  string formattedString = CNcbiOstrstreamToString(out);
2475  return formattedString;
2476 }
2477 
2478 
2479 void CDisplaySeqalign::x_OutputSeq(string& sequence, const CSeq_id& id,
2480  int start, int len, int frame, int row,
2481  bool color_mismatch,
2482  const TSAlnSeqlocInfoList& loc_list,
2483  CNcbiOstream& out) const
2484 {
2485  _ASSERT((int)sequence.size() > start);
2486  list<CRange<int> > actualSeqloc;
2487  string actualSeq = sequence.substr(start, len);
2488 
2489  if(id.Which() != CSeq_id::e_not_set){
2490  /*only do this for sequence but not for others like middle line,
2491  features*/
2492  ITERATE(TSAlnSeqlocInfoList, iter, loc_list) {
2493  int from=(*iter)->aln_range.GetFrom();
2494  int to=(*iter)->aln_range.GetTo();
2495  int locFrame = (*iter)->seqloc->GetFrame();
2496  if(id.Match((*iter)->seqloc->GetInterval().GetId())
2497  && locFrame == frame){
2498  bool isFirstChar = true;
2499  CRange<int> eachSeqloc(0, 0);
2500  //go through each residule and mask it
2501  for (int i=max<int>(from, start);
2502  i<=min<int>(to, start+len -1); i++){
2503  //store seqloc start for font tag below
2504  if ((m_AlignOption & eHtml) && isFirstChar){
2505  isFirstChar = false;
2506  eachSeqloc.Set(i, eachSeqloc.GetTo());
2507  }
2508  if (m_SeqLocChar==eX){
2509  if(isalpha((unsigned char) actualSeq[i-start])){
2510  actualSeq[i-start]='X';
2511  }
2512  } else if (m_SeqLocChar==eN){
2513  actualSeq[i-start]='n';
2514  } else if (m_SeqLocChar==eLowerCase){
2515  actualSeq[i-start]=tolower((unsigned char) actualSeq[i-start]);
2516  }
2517  //store seqloc start for font tag below
2518  if ((m_AlignOption & eHtml)
2519  && i == min<int>(to, start+len)){
2520  eachSeqloc.Set(eachSeqloc.GetFrom(), i);
2521  }
2522  }
2523  if(!(eachSeqloc.GetFrom()==0&&eachSeqloc.GetTo()==0)){
2524  actualSeqloc.push_back(eachSeqloc);
2525  }
2526  }
2527  }
2528  }
2529 
2530  if(actualSeqloc.empty()){//no need to add font tag
2532  && color_mismatch && (m_AlignOption & eShowIdentity)){
2533  //color the mismatches. Only for rows without mask.
2534  //Otherwise it may confilicts with mask font tag.
2536  } else {
2537  out<<actualSeq;
2538  }
2539  } else {//now deal with font tag for mask for html display
2540  bool endTag = false;
2541  bool frontTag = false;
2542  string refStr;
2544  for (int i = 0; i < (int)actualSeq.size(); i ++){
2545  bool startStyledOutput = false,stopStyledOutput = false;
2546  for (list<CRange<int> >::iterator iter=actualSeqloc.begin();
2547  iter!=actualSeqloc.end(); iter++){
2548  int from = (*iter).GetFrom() - start;
2549  int to = (*iter).GetTo() - start;
2550  //start tag
2551  if(from == i){
2552  frontTag = true;
2553  }
2554  if(to == i && to > 0){
2555  endTag = true;
2556  }
2557  }
2558  startStyledOutput = frontTag;
2559  stopStyledOutput = endTag && frontTag;
2560  bool isStyled = s_ProcessStyledContent(actualSeq,i,startStyledOutput,stopStyledOutput,styledSqLocTmpl ,refStr,out);
2561  if(!isStyled) out<<actualSeq[i];
2562  if(endTag && frontTag){
2563  endTag = false;
2564  frontTag = false;
2565  }
2566  }
2567  }
2568 }
2569 
2570 
2573  int gap = 0;
2574  for (int row=0; row<m_AV->GetNumRows(); row++) {
2575  CRef<CAlnMap::CAlnChunkVec> chunk_vec
2576  = m_AV->GetAlnChunks(row, m_AV->GetSeqAlnRange(0));
2577  for (int i=0; i<chunk_vec->size(); i++) {
2578  CConstRef<CAlnMap::CAlnChunk> chunk = (*chunk_vec)[i];
2579  if (chunk->IsGap()) {
2580  gap += (chunk->GetAlnRange().GetTo()
2581  - chunk->GetAlnRange().GetFrom() + 1);
2582  }
2583  }
2584  }
2585  return gap;
2586 }
2587 
2588 
2589 void CDisplaySeqalign::x_GetFeatureInfo(TSAlnFeatureInfoList& feature,
2590  CScope& scope,
2591  CSeqFeatData::E_Choice choice,
2592  int row, string& sequence,
2593  list<list<CRange<TSeqPos> > >& feat_range_list,
2594  list<ENa_strand>& feat_seq_strand,
2595  bool fill_feat_range ) const
2596 {
2597  //Only fetch features for seq that has a gi unless it's master seq
2598  const CSeq_id& id = m_AV->GetSeqId(row);
2599 
2600  TGi gi_temp = FindGi(m_AV->GetBioseqHandle(row).GetBioseqCore()->GetId());
2601  if(gi_temp > ZERO_GI || row == 0){
2602  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
2603  if(handle){
2604  TSeqPos seq_start = m_AV->GetSeqPosFromAlnPos(row, 0);
2605  TSeqPos seq_stop = m_AV->GetSeqPosFromAlnPos(row, m_AV->GetAlnStop());
2606  CRef<CSeq_loc> loc_ref =
2607  handle.
2608  GetRangeSeq_loc(min(seq_start, seq_stop),
2609  max(seq_start, seq_stop));
2610  SAnnotSelector sel(choice);
2612 
2613  for (CFeat_CI feat(scope, *loc_ref, sel); feat; ++feat) {
2614  const CSeq_loc& loc = feat->GetLocation();
2615  bool has_id = false;
2616  list<CSeq_loc_CI::TRange> isolated_range;
2617  ENa_strand feat_strand = eNa_strand_plus, prev_strand = eNa_strand_plus;
2618  bool first_loc = true, mixed_strand = false, mix_loc = false;
2619  CRange<TSeqPos> feat_seq_range;
2620  TSeqPos other_seqloc_length = 0;
2621  //isolate the seqloc corresponding to feature
2622  //as this is easier to manipulate and remove seqloc that is
2623  //not from the bioseq we are dealing with
2624  for(CSeq_loc_CI loc_it(loc); loc_it; ++loc_it){
2625  const CSeq_id& id_it = loc_it.GetSeq_id();
2626  if(IsSameBioseq(id_it, id, &scope)){
2627  isolated_range.push_back(loc_it.GetRange());
2628  if(first_loc){
2629  feat_seq_range = loc_it.GetRange();
2630  } else {
2631  feat_seq_range += loc_it.GetRange();
2632  }
2633  has_id = true;
2634  if(loc_it.IsSetStrand()){
2635  feat_strand = loc_it.GetStrand();
2636  if(feat_strand != eNa_strand_plus &&
2637  feat_strand != eNa_strand_minus){
2638  feat_strand = eNa_strand_plus;
2639  }
2640  } else {
2641  feat_strand = eNa_strand_plus;
2642  }
2643 
2644  if(!first_loc && prev_strand != feat_strand){
2645  mixed_strand = true;
2646  }
2647  first_loc = false;
2648  prev_strand = feat_strand;
2649  } else {
2650  //if seqloc has other seqids then need to remove other
2651  //seqid encoded amino acids in the front later
2652  if (first_loc) {
2653  other_seqloc_length += loc_it.GetRange().GetLength();
2654  mix_loc = true;
2655  }
2656  }
2657  }
2658  //give up if mixed strand or no id
2659  if(!has_id || mixed_strand){
2660  continue;
2661  }
2662 
2663  string featLable = NcbiEmptyString;
2664  string featId;
2665  char feat_char = ' ';
2666  string alternativeFeatStr = NcbiEmptyString;
2667  TSeqPos feat_aln_from = 0;
2668  TSeqPos feat_aln_to = 0;
2669  TSeqPos actual_feat_seq_start = 0, actual_feat_seq_stop = 0;
2670  feature::GetLabel(feat->GetOriginalFeature(), &featLable,
2671  feature::fFGL_Both, &scope);
2672  featId = featLable.substr(0, k_FeatureIdLen); //default
2673  TSeqPos aln_stop = m_AV->GetAlnStop();
2674  CRef<SAlnFeatureInfo> featInfo;
2675 
2676  //find the actual feature sequence start and stop
2677  if(m_AV->IsPositiveStrand(row)){
2678  actual_feat_seq_start =
2679  max(feat_seq_range.GetFrom(), seq_start);
2680  actual_feat_seq_stop =
2681  min(feat_seq_range.GetTo(), seq_stop);
2682 
2683  } else {
2684  actual_feat_seq_start =
2685  min(feat_seq_range.GetTo(), seq_start);
2686  actual_feat_seq_stop =
2687  max(feat_seq_range.GetFrom(), seq_stop);
2688  }
2689  //the feature alignment positions
2690  feat_aln_from =
2691  m_AV->GetAlnPosFromSeqPos(row, actual_feat_seq_start);
2692  feat_aln_to =
2693  m_AV->GetAlnPosFromSeqPos(row, actual_feat_seq_stop);
2694  if(choice == CSeqFeatData::e_Gene){
2695  featInfo.Reset(new SAlnFeatureInfo);
2696  feat_char = '^';
2697 
2698  } else if(choice == CSeqFeatData::e_Cdregion){
2699 
2700  string raw_cdr_product =
2701  s_GetCdsSequence(m_SlaveGeneticCode, feat, scope,
2702  isolated_range, handle, feat_strand,
2703  featId, other_seqloc_length%3 == 0 ?
2704  0 : 3 - other_seqloc_length%3,
2705  mix_loc);
2706  if(raw_cdr_product == NcbiEmptyString){
2707  continue;
2708  }
2709  featInfo.Reset(new SAlnFeatureInfo);
2710 
2711  //line represents the amino acid line starting covering
2712  //the whole alignment. The idea is if there is no feature
2713  //in some range, then fill it with space and this won't
2714  //be shown
2715 
2716  string line(aln_stop+1, ' ');
2717  //pre-fill all cds region with intron char
2718  for (TSeqPos i = feat_aln_from; i <= feat_aln_to; i ++){
2719  line[i] = k_IntronChar;
2720  }
2721 
2722  //get total coding length
2723  TSeqPos total_coding_len = 0;
2724  ITERATE(list<CSeq_loc_CI::TRange>, iter, isolated_range){
2725  total_coding_len += iter->GetLength();
2726  }
2727 
2728  //fill concatenated exon (excluding intron)
2729  //with product
2730  //this is will be later used to
2731  //fill the feature line
2732  char gap_char = m_AV->GetGapChar(row);
2733  string concat_exon =
2734  s_GetConcatenatedExon(feat, feat_strand,
2735  isolated_range,
2736  total_coding_len,
2737  raw_cdr_product,
2738  other_seqloc_length%3 == 0 ?
2739  0 : 3 - other_seqloc_length%3);
2740 
2741 
2742  //fill slave feature info to make putative feature for
2743  //master sequence
2744  if (fill_feat_range) {
2745  list<CRange<TSeqPos> > master_feat_range;
2746  ENa_strand master_strand = eNa_strand_plus;
2747  s_MapSlaveFeatureToMaster(master_feat_range, master_strand,
2748  feat, isolated_range,
2749  feat_strand, m_AV, row,
2750  other_seqloc_length%3 == 0 ?
2751  0 :
2752  3 - other_seqloc_length%3);
2753  if(!(master_feat_range.empty())) {
2754  feat_range_list.push_back(master_feat_range);
2755  feat_seq_strand.push_back(master_strand);
2756  }
2757  }
2758 
2759 
2760  TSeqPos feat_aln_start_totalexon = 0;
2761  TSeqPos prev_feat_aln_start_totalexon = 0;
2762  TSeqPos prev_feat_seq_stop = 0;
2763  TSeqPos intron_size = 0;
2764  bool is_first = true;
2765  bool is_first_exon_start = true;
2766 
2767  //here things get complicated a bit. The idea is fill the
2768  //whole feature line in alignment coordinates with
2769  //amino acid on the second base of a condon
2770 
2771  //go through the feature seqloc and fill the feature line
2772 
2773  //Need to reverse the seqloc order for minus strand
2774  if(feat_strand == eNa_strand_minus){
2775  isolated_range.reverse();
2776  }
2777 
2778  ITERATE(list<CSeq_loc_CI::TRange>, iter, isolated_range){
2779  //intron refers to the distance between two exons
2780  //i.e. each seqloc is an exon
2781  //intron needs to be skipped
2782  if(!is_first){
2783  intron_size += iter->GetFrom()
2784  - prev_feat_seq_stop - 1;
2785  }
2786  CRange<TSeqPos> actual_feat_seq_range =
2787  loc_ref->GetTotalRange().
2788  IntersectionWith(*iter);
2789  if(!actual_feat_seq_range.Empty()){
2790  //the sequence start position in aln coordinates
2791  //that has a feature
2792  TSeqPos feat_aln_start;
2793  TSeqPos feat_aln_stop;
2794  if(m_AV->IsPositiveStrand(row)){
2795  feat_aln_start =
2796  m_AV->
2797  GetAlnPosFromSeqPos
2798  (row, actual_feat_seq_range.GetFrom());
2799  feat_aln_stop
2800  = m_AV->GetAlnPosFromSeqPos
2801  (row, actual_feat_seq_range.GetTo());
2802  } else {
2803  feat_aln_start =
2804  m_AV->
2805  GetAlnPosFromSeqPos
2806  (row, actual_feat_seq_range.GetTo());
2807  feat_aln_stop
2808  = m_AV->GetAlnPosFromSeqPos
2809  (row, actual_feat_seq_range.GetFrom());
2810  }
2811  //put actual amino acid on feature line
2812  //in aln coord
2813  for (TSeqPos i = feat_aln_start;
2814  i <= feat_aln_stop; i ++){
2815  if(sequence[i] != gap_char){
2816  //the amino acid position in
2817  //concatanated exon that corresponds
2818  //to the sequence position
2819  //note intron needs to be skipped
2820  //as it does not have cds feature
2821  TSeqPos product_adj_seq_pos
2822  = m_AV->GetSeqPosFromAlnPos(row, i) -
2823  intron_size - feat_seq_range.GetFrom();
2824  if(product_adj_seq_pos <
2825  concat_exon.size()){
2826  //fill the cds feature line with
2827  //actual amino acids
2828  line[i] =
2829  concat_exon[product_adj_seq_pos];
2830  //get the exon start position
2831  //note minus strand needs to be
2832  //counted backward
2833  if(m_AV->IsPositiveStrand(row)){
2834  //don't count gap
2835  if(is_first_exon_start &&
2836  isalpha((unsigned char) line[i])){
2837  if(feat_strand == eNa_strand_minus){
2838  feat_aln_start_totalexon =
2839  concat_exon.size()
2840  - product_adj_seq_pos + 1;
2841  is_first_exon_start = false;
2842 
2843  } else {
2844  feat_aln_start_totalexon =
2845  product_adj_seq_pos;
2846  is_first_exon_start = false;
2847  }
2848  }
2849 
2850  } else {
2851  if(feat_strand == eNa_strand_minus){
2852  if(is_first_exon_start &&
2853  isalpha((unsigned char) line[i])){
2854  feat_aln_start_totalexon =
2855  concat_exon.size()
2856  - product_adj_seq_pos + 1;
2857  is_first_exon_start = false;
2858  prev_feat_aln_start_totalexon =
2859  feat_aln_start_totalexon;
2860  }
2861  if(!is_first_exon_start){
2862  //need to get the
2863  //smallest start as
2864  //seqloc list is
2865  //reversed
2866  feat_aln_start_totalexon =
2867  min(TSeqPos(concat_exon.size()
2868  - product_adj_seq_pos + 1),
2869  prev_feat_aln_start_totalexon);
2870  prev_feat_aln_start_totalexon =
2871  feat_aln_start_totalexon;
2872  }
2873  } else {
2874  feat_aln_start_totalexon =
2875  max(prev_feat_aln_start_totalexon,
2876  product_adj_seq_pos);
2877 
2878  prev_feat_aln_start_totalexon =
2879  feat_aln_start_totalexon;
2880  }
2881  }
2882  }
2883  } else { //adding gap
2884  line[i] = ' ';
2885  }
2886 
2887  }
2888  }
2889 
2890  prev_feat_seq_stop = iter->GetTo();
2891  is_first = false;
2892  }
2893  alternativeFeatStr = line;
2894  s_FillCdsStartPosition(line, concat_exon, m_LineLen,
2895  feat_aln_start_totalexon,
2896  m_AV->IsPositiveStrand(row) ?
2898  feat_strand, featInfo->feature_start);
2899 
2900  }
2901 
2902  if(featInfo){
2903  x_SetFeatureInfo(featInfo, *loc_ref,
2904  feat_aln_from, feat_aln_to, aln_stop,
2905  feat_char, featId, alternativeFeatStr);
2906  feature.push_back(featInfo);
2907  }
2908  }
2909  }
2910  }
2911 }
2912 
2913 
2915  const CSeq_loc& seqloc, int aln_from,
2916  int aln_to, int aln_stop,
2917  char pattern_char, string pattern_id,
2918  string& alternative_feat_str) const
2919 {
2920  CRef<FeatureInfo> feat(new FeatureInfo);
2921  feat->seqloc = &seqloc;
2922  feat->feature_char = pattern_char;
2923  feat->feature_id = pattern_id;
2924 
2925  if(alternative_feat_str != NcbiEmptyString){
2926  feat_info->feature_string = alternative_feat_str;
2927  } else {
2928  //fill feature string
2929  string line(aln_stop+1, ' ');
2930  for (int j = aln_from; j <= aln_to; j++){
2931  line[j] = feat->feature_char;
2932  }
2933  feat_info->feature_string = line;
2934  }
2935 
2936  feat_info->aln_range.Set(aln_from, aln_to);
2937  feat_info->feature = feat;
2938 }
2939 
2940 ///add a "|" to the current insert for insert on next rows and return the
2941 ///insert end position.
2942 ///@param seq: the seq string
2943 ///@param insert_aln_pos: the position of insert
2944 ///@param aln_start: alnment start position
2945 ///@return: the insert end position
2946 ///
2947 static int x_AddBar(string& seq, int insert_alnpos, int aln_start){
2948  int end = (int)seq.size() -1 ;
2949  int barPos = insert_alnpos - aln_start + 1;
2950  string addOn;
2951  if(barPos - end > 1){
2952  string spacer(barPos - end - 1, ' ');
2953  addOn += spacer + "|";
2954  } else if (barPos - end == 1){
2955  addOn += "|";
2956  }
2957  seq += addOn;
2958  return max<int>((barPos - end), 0);
2959 }
2960 
2961 
2962 ///Add new insert seq to the current insert seq and return the end position of
2963 ///the latest insert
2964 ///@param cur_insert: the current insert string
2965 ///@param new_insert: the new insert string
2966 ///@param insert_alnpos: insert position
2967 ///@param aln_start: alnment start
2968 ///@return: the updated insert end position
2969 ///
2970 static int s_AdjustInsert(string& cur_insert, string& new_insert,
2971  int insert_alnpos, int aln_start)
2972 {
2973  int insertEnd = 0;
2974  int curInsertSize = (int)cur_insert.size();
2975  int insertLeftSpace = insert_alnpos - aln_start - curInsertSize + 2;
2976  //plus2 because insert is put after the position
2977  if(curInsertSize > 0){
2978  _ASSERT(insertLeftSpace >= 2);
2979  }
2980  int newInsertSize = (int)new_insert.size();
2981  if(insertLeftSpace - newInsertSize >= 1){
2982  //can insert with the end position right below the bar
2983  string spacer(insertLeftSpace - newInsertSize, ' ');
2984  cur_insert += spacer + new_insert;
2985 
2986  } else { //Need to insert beyond the insert postion
2987  if(curInsertSize > 0){
2988  cur_insert += " " + new_insert;
2989  } else { //can insert right at the firt position
2990  cur_insert += new_insert;
2991  }
2992  }
2993  insertEnd = aln_start + (int)cur_insert.size() -1 ; //-1 back to string position
2994  return insertEnd;
2995 }
2996 
2997 
2998 void CDisplaySeqalign::x_DoFills(int row, CAlnMap::TSignedRange& aln_range,
2999  int aln_start,
3000  TSInsertInformationList& insert_list,
3001  list<string>& inserts) const {
3002  if(!insert_list.empty()){
3003  string bar(aln_range.GetLength(), ' ');
3004 
3005  string seq;
3006  TSInsertInformationList leftOverInsertList;
3007  bool isFirstInsert = true;
3008  int curInsertAlnStart = 0;
3009  int prvsInsertAlnEnd = 0;
3010 
3011  //go through each insert and fills the seq if it can
3012  //be filled on the same line. If not, go to the next line
3013  NON_CONST_ITERATE(TSInsertInformationList, iter, insert_list) {
3014  curInsertAlnStart = (*iter)->aln_start;
3015  //always fill the first insert. Also fill if there is enough space
3016  if(isFirstInsert || curInsertAlnStart - prvsInsertAlnEnd >= 1){
3017  bar[curInsertAlnStart-aln_start+1] = '|';
3018  int seqStart = (*iter)->seq_start;
3019  int seqEnd = seqStart + (*iter)->insert_len - 1;
3020  string newInsert;
3021  newInsert = m_AV->GetSeqString(newInsert, row, seqStart,
3022  seqEnd);
3023  prvsInsertAlnEnd = s_AdjustInsert(seq, newInsert,
3024  curInsertAlnStart, aln_start);
3025  isFirstInsert = false;
3026  } else { //if no space, save the chunk and go to next line
3027  bar[curInsertAlnStart-aln_start+1] = '|';
3028  //indicate insert goes to the next line
3029  prvsInsertAlnEnd += x_AddBar(seq, curInsertAlnStart, aln_start);
3030  //May need to add a bar after the current insert sequence
3031  //to indicate insert goes to the next line.
3032  leftOverInsertList.push_back(*iter);
3033  }
3034  }
3035  //save current insert. Note that each insert has a bar and sequence
3036  //below it
3037  inserts.push_back(bar);
3038  inserts.push_back(seq);
3039  //here recursively fill the chunk that don't have enough space
3040  x_DoFills(row, aln_range, aln_start, leftOverInsertList, inserts);
3041  }
3042 
3043 }
3044 
3045 
3047  int aln_start, list<string>& inserts,
3048  string& insert_pos_string,
3049  TSInsertInformationList& insert_list) const
3050 {
3051 
3052  string line(aln_range.GetLength(), ' ');
3053 
3054  ITERATE(TSInsertInformationList, iter, insert_list){
3055  int from = (*iter)->aln_start;
3056  line[from - aln_start + 1] = '\\';
3057  }
3058  insert_pos_string = line;
3059  //this is the line with "\" right after each insert position
3060 
3061  //here fills the insert sequence
3062  x_DoFills(row, aln_range, aln_start, insert_list, inserts);
3063 }
3064 
3065 
3066 void CDisplaySeqalign::x_GetInserts(TSInsertInformationList& insert_list,
3067  CAlnMap::TSeqPosList& insert_aln_start,
3068  CAlnMap::TSeqPosList& insert_seq_start,
3069  CAlnMap::TSeqPosList& insert_length,
3070  int line_aln_stop)
3071 {
3072 
3073  while(!insert_aln_start.empty()
3074  && (int)insert_aln_start.front() < line_aln_stop){
3076  insert->aln_start = insert_aln_start.front() - 1;
3077  //Need to minus one as we are inserting after this position
3078  insert->seq_start = insert_seq_start.front();
3079  insert->insert_len = insert_length.front();
3080  insert_list.push_back(insert);
3081  insert_aln_start.pop_front();
3082  insert_seq_start.pop_front();
3083  insert_length.pop_front();
3084  }
3085 
3086 }
3087 
3088 
3089 string CDisplaySeqalign::x_GetSegs(int row) const
3091  string segs = NcbiEmptyString;
3092  if(m_AlignOption & eMergeAlign){ //only show this hsp
3093  segs = NStr::IntToString(m_AV->GetSeqStart(row))
3094  + "-" + NStr::IntToString(m_AV->GetSeqStop(row));
3095  } else { //for all segs
3096  string idString = m_AV->GetSeqId(1).GetSeqIdString();
3098  if ( iter != m_AlnLinksParams.end() ){
3099  segs = iter->second.segs;
3100  }
3101  }
3102  return segs;
3103 }
3104 
3105 
3106 
3107 string CDisplaySeqalign::x_GetDumpgnlLink(const list<CRef<CSeq_id> >& ids) const
3109  string dowloadUrl;
3110  string segs = x_GetSegs(1); //row=1
3112  string url_with_parameters = CAlignFormatUtil::BuildUserUrl(ids, ZERO_TAX_ID, kDownloadUrl,
3113  m_DbName,
3115  true);
3116  if (url_with_parameters != NcbiEmptyString) {
3117  dowloadUrl = CAlignFormatUtil::MapTemplate(kDownloadLink,"download_url",url_with_parameters);
3118  dowloadUrl = CAlignFormatUtil::MapTemplate(dowloadUrl,"segs",segs);
3119  dowloadUrl = CAlignFormatUtil::MapTemplate(dowloadUrl,"lnk_displ",kDownloadImg);
3120  dowloadUrl = CAlignFormatUtil::MapTemplate(dowloadUrl,"label",label);
3121  }
3122  return dowloadUrl;
3123 }
3124 
3125 
3129  CRef<CSeq_align_set> alnSetRef(new CSeq_align_set);
3130 
3131  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()){
3132  const CSeq_align::TSegs& seg = (*iter)->GetSegs();
3133  if(seg.Which() == CSeq_align::C_Segs::e_Std){
3134  if(seg.GetStd().size() > 1){
3135  //has more than one stdseg. Need to seperate as each
3136  //is a distinct HSP
3137  ITERATE (CSeq_align::C_Segs::TStd, iterStdseg, seg.GetStd()){
3138  CRef<CSeq_align> aln(new CSeq_align);
3139  if((*iterStdseg)->IsSetScores()){
3140  aln->SetScore() = (*iterStdseg)->GetScores();
3141  }
3142  aln->SetSegs().SetStd().push_back(*iterStdseg);
3143  alnSetRef->Set().push_back(aln);
3144  }
3145 
3146  } else {
3147  alnSetRef->Set().push_back(*iter);
3148  }
3149  } else if(seg.Which() == CSeq_align::C_Segs::e_Dendiag){
3150  if(seg.GetDendiag().size() > 1){
3151  //has more than one dendiag. Need to seperate as each is
3152  //a distinct HSP
3153  ITERATE (CSeq_align::C_Segs::TDendiag, iterDendiag,
3154  seg.GetDendiag()){
3155  CRef<CSeq_align> aln(new CSeq_align);
3156  if((*iterDendiag)->IsSetScores()){
3157  aln->SetScore() = (*iterDendiag)->GetScores();
3158  }
3159  aln->SetSegs().SetDendiag().push_back(*iterDendiag);
3160  if((*iter)->IsSetType() && (*iter)->CanGetType()){
3161  aln->SetType((*iter)->GetType());
3162  }
3163 
3164  alnSetRef->Set().push_back(aln);
3165  }
3166 
3167  } else {
3168  alnSetRef->Set().push_back(*iter);
3169  }
3170  } else { //Denseg, doing nothing.
3171 
3172  alnSetRef->Set().push_back(*iter);
3173  }
3174  }
3175 
3176  return alnSetRef;
3177 }
3178 
3179 
3183  CRef<CSeq_align_set> alnSetRef(new CSeq_align_set);
3184 
3185  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()){
3186  const CSeq_align::TSegs& seg = (*iter)->GetSegs();
3187  if(seg.Which() == CSeq_align::C_Segs::e_Std){
3188  ITERATE (CSeq_align::C_Segs::TStd, iterStdseg, seg.GetStd()){
3189  CRef<CSeq_align> aln(new CSeq_align);
3190  if((*iterStdseg)->IsSetScores()){
3191  aln->SetScore() = (*iterStdseg)->GetScores();
3192  }
3193  aln->SetSegs().SetStd().push_back(*iterStdseg);
3194  alnSetRef->Set().push_back(aln);
3195  }
3196  } else if(seg.Which() == CSeq_align::C_Segs::e_Dendiag){
3197  ITERATE (CSeq_align::C_Segs::TDendiag, iterDendiag,
3198  seg.GetDendiag()){
3199  CRef<CSeq_align> aln(new CSeq_align);
3200  if((*iterDendiag)->IsSetScores()){
3201  aln->SetScore() = (*iterDendiag)->GetScores();
3202  }
3203  aln->SetSegs().SetDendiag().push_back(*iterDendiag);
3204  alnSetRef->Set().push_back(aln);
3205  }
3206  } else { //Denseg, doing nothing.
3207 
3208  alnSetRef->Set().push_back(*iter);
3209  }
3210  }
3211 
3212  return alnSetRef;
3213 }
3214 
3215 // this version will set aggregate scores
3219  CRef<CSeq_align_set> alnSetRef(new CSeq_align_set);
3220 
3221  NON_CONST_ITERATE(CSeq_align_set::Tdata, iter, alnset.Set()){
3222  bool first_align = true;
3223  CSeq_align::TSegs& seg = (*iter)->SetSegs();
3224  if(seg.Which() == CSeq_align::C_Segs::e_Std){
3225  ITERATE (CSeq_align::C_Segs::TStd, iterStdseg, seg.GetStd()){
3226  CRef<CSeq_align> aln(new CSeq_align);
3227  if((*iterStdseg)->IsSetScores()){
3228  aln->SetScore() = (*iterStdseg)->GetScores();
3229  if (first_align) {
3230  // add aggegate scores to first seg, which becomes first alignment for subject
3231  first_align = false;
3232  std::vector< CRef< CScore > >& scores_in = (*iter)->SetScore();
3233  NON_CONST_ITERATE (std::vector< CRef< CScore > >, it_in, scores_in){
3234  if ((*it_in)->IsSetId()) {
3235  CObject_id& score_id = (*it_in)->SetId();
3236  bool found = false;
3237  std::vector< CRef< CScore > >& scores_out = aln->SetScore();
3238  ITERATE (std::vector< CRef< CScore > >, it_out, scores_out){
3239  if ((*it_out)->IsSetId()) {
3240  if (score_id.Match ((*it_out)->GetId())) {
3241  found = true;
3242  }
3243  }
3244  }
3245  if (!found) {
3246  scores_out.push_back (*it_in);
3247  }
3248  }
3249  }
3250  }
3251  }
3252  aln->SetSegs().SetStd().push_back(*iterStdseg);
3253  alnSetRef->Set().push_back(aln);
3254  }
3255  } else if(seg.Which() == CSeq_align::C_Segs::e_Dendiag){
3256  ITERATE (CSeq_align::C_Segs::TDendiag, iterDendiag, seg.GetDendiag()){
3257  CRef<CSeq_align> aln(new CSeq_align);
3258  if((*iterDendiag)->IsSetScores()){
3259  aln->SetScore() = (*iterDendiag)->GetScores();
3260  if (first_align) {
3261  first_align = false;
3262  std::vector< CRef< CScore > >& scores_in = (*iter)->SetScore();
3263  NON_CONST_ITERATE (std::vector< CRef< CScore > >, it_in, scores_in){
3264  if ((*it_in)->IsSetId()) {
3265  CObject_id& score_id = (*it_in)->SetId();
3266  bool found = false;
3267  std::vector< CRef< CScore > >& scores_out = aln->SetScore();
3268  ITERATE (std::vector< CRef< CScore > >, it_out, scores_out){
3269  if ((*it_out)->IsSetId()) {
3270  if (score_id.Match ((*it_out)->GetId())) {
3271  found = true;
3272  }
3273  }
3274  }
3275  if (!found) {
3276  scores_out.push_back (*it_in);
3277  }
3278  }
3279  }
3280  }
3281  }
3282  aln->SetSegs().SetDendiag().push_back(*iterDendiag);
3283  alnSetRef->Set().push_back(aln);
3284  }
3285  } else { //Denseg, doing nothing.
3286 
3287  alnSetRef->Set().push_back(*iter);
3288  }
3289  }
3290 
3291  return alnSetRef;
3292 }
3293 
3294 
3295 bool CDisplaySeqalign::x_IsGeneInfoAvailable(SAlnInfo* aln_vec_info)
3297  const CBioseq_Handle& bsp_handle =
3298  aln_vec_info->alnvec->GetBioseqHandle(1);
3299  if (bsp_handle &&
3300  (m_AlignOption&eHtml) &&
3301  (m_AlignOption&eLinkout) &&
3303  {
3306  {
3307  return false;
3308  }
3309 
3310  const CRef<CBlast_def_line_set> bdlRef
3311  = CSeqDB::ExtractBlastDefline(bsp_handle);
3312  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
3313 
3315  {
3316  int linkout = x_GetLinkout(*(*iter)->GetSeqid().front());
3317  if (linkout & eGene)
3318  {
3319  return true;
3320  }
3321  }
3322  }
3323  return false;
3324 }
3325 
3326 
3327 string CDisplaySeqalign::x_GetGeneLinkUrl(int gene_id)
3329  string strGeneLinkUrl = CAlignFormatUtil::GetURLFromRegistry("GENE_INFO");
3331  (new char[strGeneLinkUrl.size() + 1024]);
3332  sprintf(buf.get(), strGeneLinkUrl.c_str(),
3333  gene_id,
3334  m_Rid.c_str(),
3335  m_IsDbNa ? "nucl" : "prot",
3336  m_cur_align);
3337  strGeneLinkUrl.assign(buf.get());
3338  return strGeneLinkUrl;
3339 }
3340 
3341 
3342 
3343 string CDisplaySeqalign::x_DisplayGeneInfo(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_vec_info)
3346  try
3347  {
3348  if (x_IsGeneInfoAvailable(aln_vec_info))
3349  {
3350  if (m_GeneInfoReader.get() == 0)
3351  {
3352  m_GeneInfoReader.reset(new CGeneInfoFileReader(false));
3353  }
3354 
3355  TGi giForGeneLookup = FindGi(bsp_handle.GetBioseqCore()->GetId());
3356 
3358  m_GeneInfoReader->GetGeneInfoForGi(giForGeneLookup,infoList);
3359 
3360  CGeneInfoFileReader::TGeneInfoList::const_iterator
3361  itInfo = infoList.begin();
3362  if (itInfo != infoList.end())
3363  out << "\n";
3364  for (; itInfo != infoList.end(); itInfo++)
3365  {
3366  CRef<CGeneInfo> info = *itInfo;
3367  string strUrl = x_GetGeneLinkUrl(info->GetGeneId());
3368  string strInfo;
3369  info->ToString(strInfo, true, strUrl);
3370  out << strInfo << "\n";
3371  }
3372  }
3373  }
3374  catch (CException& e)
3375  {
3376  out << "(Gene info extraction error: "
3377  << e.GetMsg() << ")" << "\n";
3378  cerr << "[BLAST FORMATTER EXCEPTION] Gene info extraction error: " << e.GetMsg() << endl;
3379  }
3380  catch (...)
3381  {
3382  out << "(Gene info extraction error)" << "\n";
3383  cerr << "[BLAST FORMATTER EXCEPTION] Gene info extraction error " << endl;
3384  }
3385  string formattedString = CNcbiOstrstreamToString(out);
3386  return formattedString;
3387 }
3388 
3391  string query_buf;
3392  map< string, string> parameters_to_change;
3393  parameters_to_change.insert(map<string, string>::value_type("HSP_SORT", ""));
3394  CAlignFormatUtil::BuildFormatQueryString(*m_Ctx,parameters_to_change,query_buf);
3395  out << "\n";
3397  out << "Sort alignments for this subject sequence by:\n";
3399 
3400  string hsp_sort_value = m_Ctx->GetRequestValue("HSP_SORT").GetValue();
3401  int hsp_sort = hsp_sort_value == NcbiEmptyString ? 0 : NStr::StringToInt(hsp_sort_value);
3402 
3403  if (hsp_sort != CAlignFormatUtil::eEvalue) {
3404  out << "<a href=\"Blast.cgi?CMD=Get&" << query_buf
3405  << "&HSP_SORT="
3407  << "#" << id_label << "\">";
3408  }
3409 
3410  out << "E value";
3411  if (hsp_sort != CAlignFormatUtil::eEvalue) {
3412  out << "</a>";
3413  }
3414 
3416 
3417  if (hsp_sort != CAlignFormatUtil::eScore) {
3418  out << "<a href=\"Blast.cgi?CMD=Get&" << query_buf
3419  << "&HSP_SORT="
3421  << "#" << id_label << "\">";
3422  }
3423 
3424  out << "Score";
3425  if (hsp_sort != CAlignFormatUtil::eScore) {
3426  out << "</a>";
3427  }
3428 
3430 
3431  if (hsp_sort != CAlignFormatUtil::eHspPercentIdentity) {
3432  out << "<a href=\"Blast.cgi?CMD=Get&" << query_buf
3433  << "&HSP_SORT="
3435  << "#" << id_label << "\">";
3436  }
3437  out << "Percent identity";
3438  if (hsp_sort != CAlignFormatUtil::eHspPercentIdentity) {
3439  out << "</a>";
3440  }
3441  out << "\n";
3443  if (hsp_sort != CAlignFormatUtil::eQueryStart) {
3444  out << "<a href=\"Blast.cgi?CMD=Get&" << query_buf
3445  << "&HSP_SORT="
3447  << "#" << id_label << "\">";
3448  }
3449  out << "Query start position";
3450  if (hsp_sort != CAlignFormatUtil::eQueryStart) {
3451  out << "</a>";
3452  }
3454 
3455  if (hsp_sort != CAlignFormatUtil::eSubjectStart) {
3456  out << "<a href=\"Blast.cgi?CMD=Get&" << query_buf
3457  << "&HSP_SORT="
3459  << "#" << id_label << "\">";
3460  }
3461  out << "Subject start position";
3462  if (hsp_sort != CAlignFormatUtil::eSubjectStart) {
3463  out << "</a>";
3464  }
3465 
3466  out << "\n";
3467 }
3468 
3471  string alignSort = m_AlignTemplates->sortInfoTmpl;
3472  alignSort = CAlignFormatUtil::MapTemplate(alignSort,"id_label",m_CurrAlnID_DbLbl);
3473  alignSort = CAlignFormatUtil::MapTemplate(alignSort,"alnSeqGi",m_CurrAlnID_Lbl);
3474 
3475  string hsp_sort_value = m_Ctx ? m_Ctx->GetRequestValue("HSP_SORT").GetValue() : kEmptyStr;
3476  int hsp_sort = hsp_sort_value == NcbiEmptyString ? 0 : NStr::StringToInt(hsp_sort_value);
3477  for(int i = 0; i < 5; i++) {
3478  if(hsp_sort == i) {
3479  alignSort = CAlignFormatUtil::MapTemplate(alignSort,"sorted_" + NStr::IntToString(hsp_sort),"sortAlnArrowLinkW");
3480  }
3481  else {
3482  alignSort = CAlignFormatUtil::MapTemplate(alignSort,"sorted_" + NStr::IntToString(i),"");
3483  }
3484  }
3485  return alignSort;
3486 }
3487 
3490  const CBioseq_Handle& query_handle=m_AV->GetBioseqHandle(0);
3491  const CBioseq_Handle& subject_handle=m_AV->GetBioseqHandle(1);
3492  CSeq_id_Handle query_seqid = GetId(query_handle, eGetId_Best);
3493  CSeq_id_Handle subject_seqid = GetId(subject_handle, eGetId_Best);
3494  TGi query_gi = FindGi(query_handle.GetBioseqCore()->GetId());
3495  TGi subject_gi = FindGi(subject_handle.GetBioseqCore()->GetId());
3496 
3497  string url_link = CAlignFormatUtil::MapTemplate(kBl2seqUrl,"query",GI_TO(TIntId, query_gi));
3498  url_link = CAlignFormatUtil::MapTemplate(url_link,"subject", GI_TO(TIntId, subject_gi));
3499 
3500  out << url_link << "\n";
3501 }
3502 
3503 
3504 void CDisplaySeqalign::x_DisplayMpvAnchor(CNcbiOstream& out,SAlnInfo* aln_vec_info)
3506  //add id anchor for mapviewer link
3507  string type_temp = m_BlastType;
3508  type_temp = NStr::TruncateSpaces(NStr::ToLower(type_temp));
3509  if(m_AlignOption&eHtml &&
3510  (type_temp.find("genome") != string::npos ||
3511  type_temp == "mapview" ||
3512  type_temp == "mapview_prev" ||
3513  type_temp == "gsfasta" || type_temp == "gsfasta_prev")){
3514  string subj_id_str;
3515  char buffer[126];
3516  int master_start = m_AV->GetSeqStart(0) + 1;
3517  int master_stop = m_AV->GetSeqStop(0) + 1;
3518  int subject_start = m_AV->GetSeqStart(1) + 1;
3519  int subject_stop = m_AV->GetSeqStop(1) + 1;
3520 
3521  m_AV->GetSeqId(1).GetLabel(&subj_id_str, CSeq_id::eContent);
3522 
3523  sprintf(buffer, "<a name = %s_%d_%d_%d_%d_%d></a>",
3524  subj_id_str.c_str(), aln_vec_info->score,
3525  min(master_start, master_stop),
3526  max(master_start, master_stop),
3527  min(subject_start, subject_stop),
3528  max(subject_start, subject_stop));
3529 
3530  out << buffer << "\n";
3531  }
3532 }
3533 
3534 string CDisplaySeqalign::x_FormatAlnBlastInfo(SAlnInfo* aln_vec_info)
3536  string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3537  CAlignFormatUtil::GetScoreString(aln_vec_info->evalue,
3538  aln_vec_info->bits, 0, 0, evalue_buf,
3539  bit_score_buf, total_bit_buf, raw_score_buf);
3540 
3541  string alignParams = m_AlignTemplates->alignInfoTmpl;
3542 
3543  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_curr_num",NStr::IntToString(m_currAlignHsp + 1));
3544  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"alnSeqGi",m_CurrAlnID_Lbl);//not used now
3545 
3546  string hidePrevNaviagtion,hideNextNaviagtion, hideFirstNavigation;
3547  if(m_currAlignHsp == 0) {
3548  hidePrevNaviagtion = "disabled=\"disabled\"";
3549  hideFirstNavigation = "hidden";
3550  }
3551  if (m_currAlignHsp == m_TotalHSPNum - 1) {
3552  hideNextNaviagtion = "disabled=\"disabled\"";
3553  }
3554 
3555  const CRange<TSeqPos>& range = m_AV->GetSeqRange(1);
3556  TSeqPos from = (range.GetFrom()> range.GetTo()) ? range.GetTo() : range.GetFrom() + 1;
3557  TSeqPos to = (range.GetFrom()> range.GetTo()) ? range.GetFrom() : range.GetTo() + 1;
3558  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"fromHSP",from);
3559  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"toHSP",to);
3560 
3561  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_hide_prev",hidePrevNaviagtion);
3562  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_hide_next",hideNextNaviagtion);
3563  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_hide_fst",hideFirstNavigation);
3564  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"firstSeqID",m_CurrAlnAccession);//displays the first accession if multiple
3565  //current segment number = m_currAlignHsp + 1
3566  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_next_num",NStr::IntToString(m_currAlignHsp + 2));
3567  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_prev_num",NStr::IntToString(m_currAlignHsp));
3568 
3569 
3570  if (m_SeqalignSetRef->Get().front()->CanGetType() &&
3571  m_SeqalignSetRef->Get().front()->GetType() == CSeq_align_Base::eType_global)
3572  {
3573  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_score",aln_vec_info->score);
3574  }
3575  else
3576  {
3577  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_score",bit_score_buf);
3578  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_score_bits",aln_vec_info->score);
3579  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_eval",evalue_buf);
3580  if (aln_vec_info->sum_n > 0) {
3581  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_sumN",aln_vec_info->sum_n);
3582  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"sumNshow","shown");
3583  }
3584  else {
3585  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_sumN","");
3586  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"sumNshow","");
3587  }
3588 
3589  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_params_method",(aln_vec_info->comp_adj_method == 1 || aln_vec_info->comp_adj_method == 2) ? m_AlignTemplates->alignInfoMethodTmpl: "");
3590  if (aln_vec_info->comp_adj_method == 1){
3591  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth","Composition-based stats.");
3592  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth_hide","");//???? is that the same for all aligns???
3593  }
3594  else if (aln_vec_info->comp_adj_method == 2){
3595  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth","Compositional matrix adjust.");
3596  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth_hide","");//???? is that the same for all aligns???
3597  }
3598  else {
3599  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth_hide","hidden");//???? is that the same for all aligns???
3600  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth","");
3601  }
3602  }
3603  return alignParams;
3604 }
3605 //sumN - hidden, cbs_md - shown, aln_frame - hidden
3606 
3607 
3608 
3610  SAlnInfo* aln_vec_info)
3611 {
3612  string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3614  aln_vec_info->bits, 0, 0, evalue_buf,
3615  bit_score_buf, total_bit_buf, raw_score_buf);
3616 
3617  CRef<CSeq_align> first_aln = m_SeqalignSetRef->Get().front();
3618  if (m_SeqalignSetRef->Get().front()->CanGetType() &&
3619  m_SeqalignSetRef->Get().front()->GetType() == CSeq_align_Base::eType_global)
3620  {
3621  out<<" NW Score = "<< aln_vec_info->score;
3622  }
3623  else
3624  {
3625  // Disable bits score/evalue fields and only show raw
3626  // score for RMBlastN -RMH-
3628  {
3629  out<<" Score = "<<aln_vec_info->score<<"\n";
3630  }else
3631  {
3632  out<<" Score = "<<bit_score_buf<<" ";
3633  out<<"bits ("<<aln_vec_info->score<<"),"<<" ";
3634  out<<"Expect";
3635  if (aln_vec_info->sum_n > 0) {
3636  out << "(" << aln_vec_info->sum_n << ")";
3637  }
3638  out << " = " << evalue_buf;
3639  if (aln_vec_info->comp_adj_method == 1)
3640  out << ", Method: Composition-based stats.";
3641  else if (aln_vec_info->comp_adj_method == 2)
3642  out << ", Method: Compositional matrix adjust.";
3643  }
3644  }
3645  out << "\n";
3646 }
3647 
3648 //1. Display defline(s)
3649 //2. Display Gene info
3650 //3. Display Bl2Seq TBLASTX link
3651 //4. add id anchor for mapviewer link
3653  SAlnInfo* aln_vec_info,
3654  bool show_defline)
3655 {
3656  bool showSortControls = false;
3657  if(show_defline) {
3658  const CBioseq_Handle& bsp_handle=m_AV->GetBioseqHandle(1);
3661  //1. Display defline(s),Gene info
3662  string deflines = x_PrintDefLine(bsp_handle, aln_vec_info);
3663  out<< deflines;
3664  //2. Format Gene info
3665  string geneInfo = x_DisplayGeneInfo(bsp_handle,aln_vec_info);
3666  out<< geneInfo;
3667  }
3668 
3671  //3. Display Bl2Seq TBLASTX link
3673  }
3674  out << "\n";
3675  }
3676  showSortControls = true;
3677  }
3679  //4. add id anchor for mapviewer link
3680  x_DisplayMpvAnchor(out,aln_vec_info);
3681  }
3682 
3683  //Displays sorting controls, features, Score, Expect, Idnt,Gaps,strand,positives,frames etc
3684  x_DisplaySingleAlignParams(out, aln_vec_info,showSortControls);
3685  x_DisplayRowData(aln_vec_info->alnRowInfo,out);
3686 }
3687 
3688 
3689 
3690 //fill one defline info, using <@ALN_DEFLINE_ROW@>
3691 string
3692 CDisplaySeqalign::x_MapDefLine(SAlnDispParams *alnDispParams,bool isFirst, bool linkout,bool hideDefline,int seqLength)
3694  /*
3695  string firstSeqClassInfo = (isFirst) ? "" : "hidden"; //hide ">" sign if not first seq align
3696  string alnDefLine = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alnDefLineTmpl,"alnSeqSt",firstSeqClassInfo);
3697  */
3698  string alnDefLine = m_AlignTemplates->alnDefLineTmpl;
3700 
3701  string alnGi = (m_AlignOption&eShowGi && alnDispParams->gi > ZERO_GI) ?
3702  "gi|" + NStr::NumericToString(alnDispParams->gi) + "|" : "";
3703  string seqid;
3704  if(!(alnDispParams->seqID->AsFastaString().find("gnl|BL_ORD_ID") != string::npos) ||
3705  alnDispParams->seqID->AsFastaString().find("lcl|Subject_") != string::npos){
3706  if (m_UseLongSeqIds) {
3707  seqid = alnDispParams->seqID->AsFastaString();
3708  }
3709  else {
3710  seqid = CAlignFormatUtil::GetBareId(*alnDispParams->seqID);
3711  }
3712  }
3713 
3714  if(alnDispParams->id_url != NcbiEmptyString) {
3715  string seqInfo = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alnSeqInfoTmpl,"aln_url",alnDispParams->id_url);
3716  string trgt = (m_AlignOption & eNewTargetWindow) ? "TARGET=\"EntrezView\"" : "";
3717 
3718  seqInfo = CAlignFormatUtil::MapTemplate(seqInfo,"aln_target",trgt);
3719  seqInfo = CAlignFormatUtil::MapTemplate(seqInfo,"aln_rid",m_Rid);
3720 
3721  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"seq_info",seqInfo);
3722  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"aln_gi",alnGi);
3723  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"aln_seqid",seqid);
3724  }
3725  else {
3726  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"seq_info",alnGi + seqid);
3727  }
3728  string hspNum,isFirstDflAttr;
3729  if(isFirst) {
3730  string totalHsps = m_Ctx ? m_Ctx->GetRequestValue("TOTAL_HSPS").GetValue() : kEmptyStr; //Future use
3731  m_TotalHSPNum = totalHsps.empty() ? m_AlnLinksParams[m_AV->GetSeqId(1).GetSeqIdString()].hspNumber : NStr::StringToInt(totalHsps);
3732  hspNum = (m_TotalHSPNum != 0) ? NStr::IntToString(m_TotalHSPNum) : "";
3733  }
3734  else {
3735  isFirstDflAttr = "hidden";
3736  }
3737  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"alnSeqLength", NStr::IntToString(seqLength));
3738  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"alnHspNum",hspNum);
3739  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"frstDfl",isFirstDflAttr);
3740  string alnIdLbl = (alnDispParams->gi != ZERO_GI) ?
3741  NStr::NumericToString(alnDispParams->gi) : alnDispParams->seqID->GetSeqIdString();
3742  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"alnIdLbl",alnIdLbl);
3743  string linkoutStr, dnldLinkStr;
3744  if (linkout) {
3745  linkoutStr = (!alnDispParams->linkoutStr.empty()) ? alnDispParams->linkoutStr : "";
3746  dnldLinkStr = alnDispParams->dumpGnlUrl;
3747  }
3748  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine ,"alnLinkout",linkoutStr);
3749  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine ,"dndlLinkt",dnldLinkStr);
3750  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"alnTitle",CHTMLHelper::HTMLEncode(alnDispParams->title));
3751  return alnDefLine;
3752 }
3753 
3754 string
3755 CDisplaySeqalign::x_InitDefLinesHeader(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_vec_info)
3757  string deflines;
3758  string firstDefline;
3760  list<string>& use_this_seqid = aln_vec_info->use_this_seqid;
3761  if(bsp_handle){
3762  const CRef<CBlast_def_line_set> bdlRef = CSeqDB::ExtractBlastDefline(bsp_handle);
3763  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
3764  bool isFirst = true;
3765  TGi firstGi = ZERO_GI;
3766  m_NumBlastDefLines = 0;
3767  m_cur_align++;
3768  SAlnDispParams *alnDispParams;
3769  //fill length
3770  int seqLength = bsp_handle.GetBioseqLength();
3771  if(bdl.empty()){ //no blast defline struct, should be no such case now
3772  //actually not so fast...as we now fetch from entrez even when it's not in blast db
3773  //there is no blast defline in such case.
3774  alnDispParams = x_FillAlnDispParams(bsp_handle);
3775  string alnDefLine = x_MapDefLine(alnDispParams,isFirst,false,false,seqLength);
3776  m_CurrAlnID_Lbl = (alnDispParams->gi != ZERO_GI) ?
3777  NStr::NumericToString(alnDispParams->gi) : CAlignFormatUtil::GetLabel(alnDispParams->seqID);
3778 
3779  if (m_UseLongSeqIds || alnDispParams->seqID->IsLocal()) {
3780  m_CurrAlnAccession = alnDispParams->seqID->AsFastaString();
3781  }
3782  else {
3784  *alnDispParams->seqID);
3785  }
3787  x_InitAlignLinks(alnDispParams,bdl,eDisplayResourcesLinks);
3788  }
3789  delete alnDispParams;
3790  firstDefline = alnDefLine;
3792  } else {
3793  //format each defline
3794  int numBdl = 0;
3795  for(list< CRef< CBlast_def_line > >::const_iterator
3796  iter = bdl.begin(); iter != bdl.end(); iter++){
3797  alnDispParams = x_FillAlnDispParams(*iter,bsp_handle,use_this_seqid,firstGi,numBdl);
3798  if(alnDispParams) {
3799  numBdl++;
3800  bool hideDefline = (numBdl > 1)? true : false;
3801  string alnDefLine = x_MapDefLine(alnDispParams,isFirst,m_AlignOption&eLinkout,hideDefline,seqLength);
3802  if(isFirst){
3803  const CSeq_id& aln_id = m_AV->GetSeqId(1);
3804  TGi alnGi;
3805  CRef<CSeq_id> dispId = CAlignFormatUtil::GetDisplayIds(bsp_handle,aln_id,use_this_seqid,&alnGi);
3807  if(alnGi == ZERO_GI) {
3809  }
3810  else {
3812  }
3813 
3814  firstGi = alnGi;
3815 
3816  //This should probably change on dispId
3817  if (m_UseLongSeqIds) {
3819  alnDispParams->seqID->AsFastaString();
3820  }
3821  else {
3824  *alnDispParams->seqID);
3825  }
3826  if(m_CurrAlnAccession.find("gnl|BL_ORD_ID") != string::npos ||
3827  m_CurrAlnAccession.find("lcl|Subject_") != string::npos){
3828  ///Get first token of the title
3829  vector <string> parts;
3830  NStr::Split(alnDispParams->title," ",parts);
3831  if(parts.size() > 0) {
3832  m_CurrAlnAccession = parts[0];
3833  }
3834  }
3835  }
3836  //1. isFirst && firstGi == ZERO_GI - covers resource links for non-gis databases
3837  //2. alnDispParams->gi == firstGi - covers resource links for gi databases/
3838  if( (isFirst && firstGi == ZERO_GI) || (alnDispParams->gi == firstGi && firstGi != ZERO_GI) ) {
3839  //Get custom links only for the first gi
3840  int linksDisplayOption = eDisplayResourcesLinks;
3841  if(seqLength > k_GetSubseqThreshhold) {
3842  linksDisplayOption += eDisplayDownloadLink;
3843  }
3844  x_InitAlignLinks(alnDispParams,bdl,linksDisplayOption);
3845  firstDefline = alnDefLine;
3846  }
3847  else {
3848  deflines += alnDefLine; //this contains all deflines except the first one
3849  }
3850  if(isFirst) {
3851  isFirst = false;
3852  }
3853  if(m_AlignTemplates->alnTitlesTmpl.empty() && !firstDefline.empty()) {
3854  m_NumBlastDefLines = 1;
3855  break;
3856  }
3857 
3858  delete alnDispParams;
3859  }
3860  }
3861  m_NumBlastDefLines = numBdl;
3862  }
3863  if(m_NumBlastDefLines == 1) {
3864  deflines = firstDefline;
3865  }
3866  else {
3867  string alnTitles = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alnTitlesTmpl,"seqTitles",deflines);
3869  alnTitleslnk = CAlignFormatUtil::MapTemplate(alnTitleslnk,"allTitleNum",NStr::IntToString(m_NumBlastDefLines));
3870  alnTitleslnk = CAlignFormatUtil::MapTemplate(alnTitleslnk,"acc",m_CurrAlnAccession);
3871  alnTitleslnk = CAlignFormatUtil::MapTemplate(alnTitleslnk,"rid",m_Rid);
3872 
3873 
3874  deflines = firstDefline + alnTitleslnk + alnTitles;
3875  }
3876  }
3877  return deflines;
3878 }
3879 
3880 
3881 
3882 string
3883 CDisplaySeqalign::x_FormatDefLinesHeader(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_vec_info)
3886  string deflines, linkOutStr,customLinkStr;
3887  list<string> linkoutStr;
3888 
3889  m_CurrAlnID_DbLbl = "";
3890  if(bsp_handle){
3891  deflines = x_InitDefLinesHeader(bsp_handle,aln_vec_info);
3892 
3893  if(m_CustomLinksList.size() > 0) {
3894  ITERATE(list<string>, iter_custList, m_CustomLinksList){
3895  customLinkStr += *iter_custList;
3896  }
3897  }
3898  if(m_LinkoutList.size() > 0) {
3899  ITERATE(list<string>, iter_List, m_LinkoutList){
3900  linkOutStr += *iter_List;
3901  }
3902  }
3903  }
3904  //fill deflines
3905  string alignInfo = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alignHeaderTmpl,"aln_deflines",deflines);
3906 
3907  //fill multiple titles - not used now
3909  string alnSeqTitlesShow = (m_NumBlastDefLines > k_MaxDeflinesToShow) ? "" : "hidden";
3910  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqTitlesNum", NStr::IntToString(alnSeqTitlesNum));
3911  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqTitlesShow",alnSeqTitlesShow);
3912 
3913 
3914 
3915  //fill sequence checkbox
3916  string seqRetrieval = ((m_AlignOption&eSequenceRetrieval) && m_CanRetrieveSeq) ? "" : "hidden";
3917  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqGi",m_CurrAlnID_Lbl);
3918  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnQueryNum",NStr::IntToString(m_QueryNumber));
3919  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqRet",seqRetrieval);
3920 
3921 
3922  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnLinkOutLinks",linkOutStr);
3923  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnCustomLinks",customLinkStr);
3924  //fill id info
3925  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"firstSeqID",m_CurrAlnAccession);
3926 
3927  string isGenbankAttr = (NStr::Find(customLinkStr,"GenBank") == NPOS && NStr::Find(customLinkStr,"GenPept") == NPOS)? "hidden" : "";
3928  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"dwGnbn",isGenbankAttr);
3929 
3930  string hideDndl = (m_BlastType == "sra")? "hidden":"";
3931  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"hideDndl",hideDndl);
3932  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"aln_ord_pos",NStr::IntToString(m_cur_align));
3933 
3934  //The next two lines are not used for now
3935  //alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnFASTA",m_FASTAlinkUrl);
3936  //alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnRegFASTA",m_AlignedRegionsUrl);
3937 
3938  //fill sort info
3939  string sortInfo;
3940  if(m_TotalHSPNum > 1) {
3941  //3. Display sort info
3942  sortInfo = x_FormatAlignSortInfo();
3943  }
3944  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"sortInfo",sortInfo);
3945 
3946  return alignInfo;
3947 }
3948 
3949 
3950 
3951 
3952 //1. Display defline(s)
3953 //2. Display Gene info
3954 //3. Display Bl2Seq TBLASTX link
3956  SAlnInfo* aln_vec_info,
3957  bool show_defline)
3958 {
3959  string alignHeader;
3960  string sortOneAln = m_Ctx ? m_Ctx->GetRequestValue("SORT_ONE_ALN").GetValue() : kEmptyStr;
3961  if(show_defline) {
3962  const CBioseq_Handle& bsp_handle=m_AV->GetBioseqHandle(1);
3963  //1. Display defline(s),Gene info
3964  string alignHeader = x_FormatDefLinesHeader(bsp_handle, aln_vec_info);
3965  /**2. Format Gene info
3966  string geneInfo = x_DisplayGeneInfo(bsp_handle,aln_vec_info);
3967  alignHeader = CAlignFormatUtil::MapTemplate(alignHeader,"aln_gene_info",geneInfo); **/
3968  if(sortOneAln.empty()) {
3969 
3970  out<< alignHeader;
3972  //3. Display Bl2Seq TBLASTX link
3974  }
3975 
3976  }
3977  //start counting hsp
3978  string currHsp = m_Ctx ? m_Ctx->GetRequestValue("HSP_START").GetValue() : kEmptyStr;
3979  m_currAlignHsp = currHsp.empty() ? 0: NStr::StringToInt(currHsp);
3980  }
3982  //4. add id anchor for mapviewer link
3983  x_DisplayMpvAnchor(out,aln_vec_info);
3984  }
3985 
3986  //Displays sorting controls, features, Score, Expect, Idnt,Gaps,strand,positives,frames etc
3987  string alignInfo = x_FormatSingleAlign(aln_vec_info);
3988  out << alignInfo;
3989 }
3990 
3992  SAlnInfo* aln_vec_info,
3993  bool show_defline)
3994 {
3995 
3996  m_AV = aln_vec_info->alnvec;
3997  //Calculate Dynamic Features in aln_vec_info
3998  x_PrepareDynamicFeatureInfo(aln_vec_info);
3999  //Calculate row data for actual alignment display
4000  aln_vec_info->alnRowInfo = x_PrepareRowData();
4001 
4002  //Calculate indentity data in aln_vec_info
4004  x_PrepareIdentityInfo(aln_vec_info);
4005  }
4006  if(!m_AlignTemplates) {
4007  x_ShowAlnvecInfo(out,aln_vec_info,show_defline);
4008  }
4009  else {
4010  x_ShowAlnvecInfoTemplate(out,aln_vec_info,show_defline);
4011  }
4012 
4013  delete aln_vec_info->alnRowInfo;
4014 
4015  out<<"\n";
4016 }
4017 
4018 
4019 //Displays features, Score Expect, Idnt,Gaps,strand
4021  SAlnInfo* aln_vec_info,
4022  bool showSortControls)
4023 {
4025 
4026  if(showSortControls && m_AlignOption&eHtml &&
4027  m_AlnLinksParams[m_AV->GetSeqId(1).GetSeqIdString()].hspNumber > 1 &&
4029  //3. Display sort info
4030  x_DisplayAlignSortInfo(out,aln_vec_info->id_label);
4031  }
4032 
4033  //output dynamic feature lines
4034  if(aln_vec_info->feat_list.size() > 0 || aln_vec_info->feat5 || aln_vec_info->feat3 ){
4035  //6. Display Dynamic Features
4036  x_PrintDynamicFeatures(out,aln_vec_info);
4037  }
4038 
4039  //7. Display score,bits,expect,method
4040  x_DisplayAlignInfo(out,aln_vec_info);
4041  }
4042 
4044  //8.Display Identities,positives,strand, frames etc
4045  //x_DisplayIdentityInfo(aln_vec_info->alnRowInfo, out);
4047  (int)m_AV->GetAlnStop(),
4048  aln_vec_info->identity,
4049  aln_vec_info->positive,
4050  aln_vec_info->match,
4051  aln_vec_info->gap,
4052  m_AV->StrandSign(0),
4053  m_AV->StrandSign(1),
4054  aln_vec_info->alnRowInfo->frame[0],
4055  aln_vec_info->alnRowInfo->frame[1],
4056  ((m_AlignType & eProt) != 0 ? true : false));
4057  }
4058 }
4059 
4060 //<div class="dflLnk hsp <@multiHSP@>"><label>Range <@fromHSP@> to <@toHSP@>:</label><@alnHSPLinks@></div>
4061 string CDisplaySeqalign:: x_FormatAlnHSPLinks(string &alignInfo)
4063 
4064  string hspLinks;
4065  if(m_HSPLinksList.size() > 0) {
4066  const CRange<TSeqPos>& range = m_AV->GetSeqRange(1);
4067  TSeqPos from = (range.GetFrom()> range.GetTo()) ? range.GetTo() : range.GetFrom() + 1;
4068  TSeqPos to = (range.GetFrom()> range.GetTo()) ? range.GetFrom() : range.GetTo() + 1;
4069 
4070  int addToRange = (int)((to - from) * 0.05);//add 5% to each side
4071  int fromAdjust = max(0,(int)from - addToRange);
4072  int toAdjust = to + addToRange;
4073  string customLinkStr;
4074  ITERATE(list<string>, iter_custList, m_HSPLinksList){
4075  string singleLink = CAlignFormatUtil::MapTemplate(*iter_custList,"from",fromAdjust);
4076  singleLink = CAlignFormatUtil::MapTemplate(singleLink,"to",toAdjust);
4077  singleLink = CAlignFormatUtil::MapTemplate(singleLink,"fromHSP",from);
4078  singleLink = CAlignFormatUtil::MapTemplate(singleLink,"toHSP",to);
4079  hspLinks += singleLink;
4080  }
4081  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"fromHSP",from);
4082  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"toHSP",to);
4083  }
4084  string multiHSP = (hspLinks.empty()) ? "hidden" : "" ;
4085 
4086 
4087  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnHSPLinks",hspLinks);
4088  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"multiHSP",multiHSP);
4089  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"firstSeqID",m_CurrAlnAccession);
4090 
4091  return alignInfo;
4092 }
4093 
4094 //Displays features, Score Expect, Idnt,Gaps,strand
4095 string CDisplaySeqalign::x_FormatSingleAlign(SAlnInfo* aln_vec_info)
4097  string alignInfo;
4098 
4100 
4101  //7. Display score,bits,expect,method
4102  alignInfo = x_FormatAlnBlastInfo(aln_vec_info);
4103 
4104  //8.Display Identities,positives,strands, frames etc
4105  alignInfo = x_FormatIdentityInfo(alignInfo, aln_vec_info);
4106 
4107  //output dynamic feature lines
4108  //only for aln_vec_info->feat_list.size() > 0 || aln_vec_info->feat5 || aln_vec_info->feat3
4109  //6. Display Dynamic Features
4110  alignInfo = x_FormatDynamicFeaturesInfo(alignInfo, aln_vec_info);
4111  }
4112 
4113  alignInfo = (alignInfo.empty()) ? m_AlignTemplates->alignInfoTmpl : alignInfo;
4114  alignInfo = x_FormatAlnHSPLinks(alignInfo);
4115 
4116  m_currAlignHsp++;
4118 
4119  string alignRows = x_DisplayRowData(aln_vec_info->alnRowInfo);
4120  alignRows = CAlignFormatUtil::MapTemplate(alignRowsTemplate,"align_rows",alignRows);
4121  alignRows = CAlignFormatUtil::MapTemplate(alignRows,"aln_curr_num",NStr::IntToString(m_currAlignHsp));
4122  alignRows = CAlignFormatUtil::MapTemplate(alignRows,"alnSeqGi",m_CurrAlnID_Lbl);
4123 
4124  alignInfo += alignRows;
4125  return alignInfo;
4126 }
4127 
4128 
4129 
4130 void CDisplaySeqalign::x_PrepareDynamicFeatureInfo(SAlnInfo* aln_vec_info)
4132  aln_vec_info->feat5 = NULL;
4133  aln_vec_info->feat3 = NULL;
4134  aln_vec_info->feat_list.clear();
4135  //Calculate Dynamic Features in aln_vec_info
4137  && (int)m_AV->GetBioseqHandle(1).GetBioseqLength()
4139  if(m_DynamicFeature){
4140  const CRange<TSeqPos>& range = m_AV->GetSeqRange(1);
4141  aln_vec_info->actual_range = range;
4142  if(range.GetFrom() > range.GetTo()){
4143  aln_vec_info->actual_range.Set(range.GetTo(), range.GetFrom());
4144  }
4145  string id_str;
4146  const CBioseq_Handle& subject_handle=m_AV->GetBioseqHandle(1);
4148  wid->GetLabel(&id_str, CSeq_id::eBoth);
4149  aln_vec_info->subject_gi = FindGi(subject_handle.GetBioseqCore()->GetId());
4150  aln_vec_info->feat_list = m_DynamicFeature->GetFeatInfo(id_str, aln_vec_info->actual_range, aln_vec_info->feat5, aln_vec_info->feat3, 2);
4151  }
4152  }
4153 }
4154 
4155 static string s_MapFeatureURL(string viewerURL,
4156  string textSeqID,
4157  string db,
4158  int fromRange,
4159  int toRange,
4160  string rid)
4161 {
4162  string url_link = CAlignFormatUtil::MapTemplate(viewerURL,"db",db);
4163  url_link = CAlignFormatUtil::MapTemplate(url_link,"gi",textSeqID);
4164  url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",rid);
4165  url_link = CAlignFormatUtil::MapTemplate(url_link,"from",fromRange);
4166  url_link = CAlignFormatUtil::MapTemplate(url_link,"to",toRange);
4167  return url_link;
4168 }
4169 
4170 string CDisplaySeqalign::x_FormatOneDynamicFeature(string viewerURL,
4171  TGi subject_gi,
4172  int fromRange,
4173  int toRange,
4174  string featText)
4175 {
4176  string alignFeature = m_AlignTemplates->alignFeatureTmpl;
4177  string textSeqID;
4178 
4179  if(subject_gi > ZERO_GI) {
4180  //if(CAlignFormatUtil::GetTextSeqID((CConstRef<CSeq_id>)&m_AV->GetSeqId(1))) {
4181  alignFeature = CAlignFormatUtil::MapTemplate(alignFeature,"aln_feat_info",m_AlignTemplates->alignFeatureLinkTmpl);
4182  string url = s_MapFeatureURL(viewerURL,
4184  string(m_IsDbNa ? "nucleotide" : "protein"),
4185  fromRange + 1,
4186  toRange + 1,
4187  m_Rid);
4188  alignFeature = CAlignFormatUtil::MapTemplate(alignFeature,"aln_feat_url",url);
4189  alignFeature = CAlignFormatUtil::MapTemplate(alignFeature,"aln_feat",featText);
4190  }
4191  else {
4192  alignFeature = CAlignFormatUtil::MapTemplate(alignFeature,"aln_feat_info",featText);
4193  }
4194  return alignFeature;
4195 }
4196 
4197 
4198 //6. Display Dynamic Features
4199 string CDisplaySeqalign::x_FormatDynamicFeaturesInfo(string alignInfo, SAlnInfo* aln_vec_info)
4201  string alignParams = alignInfo;
4202  //string alignFeature = m_AlignTemplates->alignFeatureTmpl;
4203 
4204 
4205  string viewerURL = CAlignFormatUtil::GetURLFromRegistry("ENTREZ_SUBSEQ_TM");
4206 
4207  string allAlnFeatures = "";
4208  if(aln_vec_info->feat_list.size() > 0) { //has feature in this range
4209  ITERATE(vector<SFeatInfo*>, iter, aln_vec_info->feat_list){
4210 
4211  string alignFeature = x_FormatOneDynamicFeature(viewerURL,
4212  aln_vec_info->subject_gi,
4213  (*iter)->range.GetFrom(),
4214  (*iter)->range.GetTo(),
4215  (*iter)->feat_str);
4216 
4217  ///TO DO: NO hyperlink if aln_vec_info->subject_gi == 0
4218 
4219  allAlnFeatures += alignFeature;
4220  }
4221  } else { //show flank features
4222  if(aln_vec_info->feat5 || aln_vec_info->feat3){
4223  //TO DO: Check if we need that
4224  //out << " Features flanking this part of subject sequence:" << "\n";
4225  }
4226  if(aln_vec_info->feat5){
4227  string alignFeature = x_FormatOneDynamicFeature(viewerURL,
4228  aln_vec_info->subject_gi,
4229  aln_vec_info->feat5->range.GetFrom(),
4230  aln_vec_info->feat5->range.GetTo(),
4231  NStr::IntToString(aln_vec_info->actual_range.GetFrom() - aln_vec_info->feat5->range.GetTo()) + (string)" bp at 5' side: " + aln_vec_info->feat5->feat_str);
4232  allAlnFeatures += alignFeature;
4233  }
4234  if(aln_vec_info->feat3){
4235 
4236  string alignFeature = x_FormatOneDynamicFeature(viewerURL,
4237  aln_vec_info->subject_gi,
4238  aln_vec_info->feat3->range.GetFrom(),
4239  aln_vec_info->feat3->range.GetTo(),
4240  NStr::IntToString(aln_vec_info->feat3->range.GetFrom() - aln_vec_info->actual_range.GetTo()) + (string)" bp at 3' side: " + aln_vec_info->feat3->feat_str);
4241  allAlnFeatures += alignFeature;
4242  }
4243  }
4244  if(!allAlnFeatures.empty()) {
4245  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"all_aln_features",allAlnFeatures);
4246  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_feat_show","");
4247  }
4248  else {
4249  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"all_aln_features","");
4250  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_feat_show","hidden");
4251  }
4252  return alignParams;
4253 }
4254 
4255 void CDisplaySeqalign::x_PrintDynamicFeatures(CNcbiOstream& out,SAlnInfo* aln_vec_info)
4257  string l_EntrezSubseqUrl = CAlignFormatUtil::GetURLFromRegistry("ENTREZ_SUBSEQ");
4258 
4259  if(aln_vec_info->feat_list.size() > 0) { //has feature in this range
4260  out << " Features in this part of subject sequence:" << "\n";
4261  ITERATE(vector<SFeatInfo*>, iter, aln_vec_info->feat_list){
4262  out << " ";
4263  if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){
4264  string featStr = s_MapFeatureURL(l_EntrezSubseqUrl,
4265  NStr::NumericToString(aln_vec_info->subject_gi),
4266  m_IsDbNa ? "nucleotide" : "protein",
4267  (*iter)->range.GetFrom() +1 ,
4268  (*iter)->range.GetTo() + 1,
4269  m_Rid);
4270  out << featStr;
4271  }
4272  out << (*iter)->feat_str;
4273  if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){
4274  out << "</a>";
4275  }
4276  out << "\n";
4277  }
4278  } else { //show flank features
4279  if(aln_vec_info->feat5 || aln_vec_info->feat3){
4280  out << " Features flanking this part of subject sequence:" << "\n";
4281  }
4282  if(aln_vec_info->feat5){
4283  out << " ";
4284  if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){
4285  string featStr = s_MapFeatureURL(l_EntrezSubseqUrl,
4286  NStr::NumericToString(aln_vec_info->subject_gi),
4287  m_IsDbNa ? "nucleotide" : "protein",
4288  aln_vec_info->feat5->range.GetFrom() + 1 ,
4289  aln_vec_info->feat5->range.GetTo() + 1,
4290  m_Rid);
4291 
4292  out << featStr;
4293  }
4294  out << aln_vec_info->actual_range.GetFrom() - aln_vec_info->feat5->range.GetTo()
4295  << " bp at 5' side: " << aln_vec_info->feat5->feat_str;
4296  if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){
4297  out << "</a>";
4298  }
4299  out << "\n";
4300  }
4301  if(aln_vec_info->feat3){
4302  out << " ";
4303  if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){
4304  string featStr = s_MapFeatureURL(l_EntrezSubseqUrl,
4305  NStr::NumericToString(aln_vec_info->subject_gi),
4306  m_IsDbNa ? "nucleotide" : "protein",
4307  aln_vec_info->feat3->range.GetFrom() + 1 ,
4308  aln_vec_info->feat3->range.GetTo() + 1,
4309  m_Rid);
4310 
4311  out << featStr;
4312  }
4313  out << aln_vec_info->feat3->range.GetFrom() - aln_vec_info->actual_range.GetTo()
4314  << " bp at 3' side: " << aln_vec_info->feat3->feat_str;
4315  if(m_AlignOption&eHtml){
4316  out << "</a>";
4317  }
4318  out << "\n";
4319  }
4320  }
4321  if(aln_vec_info->feat_list.size() > 0 || aln_vec_info->feat5 || aln_vec_info->feat3 ){
4322  out << "\n";
4323  }
4324 }
4325 
4326 void
4327 CDisplaySeqalign::x_FillLocList(TSAlnSeqlocInfoList& loc_list,
4328  const list< CRef<CSeqLocInfo> >* masks) const
4329 {
4330  if ( !masks ) {
4331  return;
4332  }
4333 
4334  ITERATE(TMaskedQueryRegions, iter, *masks) {
4336  bool has_valid_loc = false;
4337  for (int i=0; i<m_AV->GetNumRows(); i++){
4338  const CSeq_interval& interval = (*iter)->GetInterval();
4339  TSeqRange loc_range(interval.GetFrom(), interval.GetTo());
4340  if(interval.GetId().Match(m_AV->GetSeqId(i)) &&
4341  m_AV->GetSeqRange(i).IntersectingWith(loc_range)){
4342  int actualAlnStart = 0, actualAlnStop = 0;
4343  if(m_AV->IsPositiveStrand(i)){
4344  actualAlnStart =
4345  m_AV->GetAlnPosFromSeqPos(i,
4346  interval.GetFrom(),
4347  CAlnMap::eBackwards, true);
4348  actualAlnStop =
4349  m_AV->GetAlnPosFromSeqPos(i,
4350  interval.GetTo(),
4351  CAlnMap::eBackwards, true);
4352  } else {
4353  actualAlnStart =
4354  m_AV->GetAlnPosFromSeqPos(i,
4355  interval.GetTo(),
4356  CAlnMap::eBackwards, true);
4357  actualAlnStop =
4358  m_AV->GetAlnPosFromSeqPos(i,
4359  interval.GetFrom(),
4360  CAlnMap::eBackwards, true);
4361  }
4362  alnloc->aln_range.Set(actualAlnStart, actualAlnStop);
4363  has_valid_loc = true;
4364  break;
4365  }
4366  }
4367  if (has_valid_loc) {
4368  alnloc->seqloc = *iter;
4369  loc_list.push_back(alnloc);
4370  }
4371  }
4372 }
4373 
4374 
4375 void
4376 CDisplaySeqalign::x_GetQueryFeatureList(int row_num, int aln_stop,
4377  vector<TSAlnFeatureInfoList>& retval)
4378  const
4379 {
4380  retval.clear();
4381  retval.resize(row_num);
4382  //list<SAlnFeatureInfo*>* bioseqFeature= new list<SAlnFeatureInfo*>[row_num];
4383  if(m_QueryFeature){
4384  for (list<FeatureInfo*>::iterator iter=m_QueryFeature->begin();
4385  iter!=m_QueryFeature->end(); iter++){
4386  for(int i = 0; i < row_num; i++){
4387  if((*iter)->seqloc->GetInt().GetId().Match(m_AV->GetSeqId(i))){
4388  int actualSeqStart = 0, actualSeqStop = 0;
4389  if(m_AV->IsPositiveStrand(i)){
4390  if((*iter)->seqloc->GetInt().GetFrom()
4391  < m_AV->GetSeqStart(i)){
4392  actualSeqStart = m_AV->GetSeqStart(i);
4393  } else {
4394  actualSeqStart = (*iter)->seqloc->GetInt().GetFrom();
4395  }
4396 
4397  if((*iter)->seqloc->GetInt().GetTo() >
4398  m_AV->GetSeqStop(i)){
4399  actualSeqStop = m_AV->GetSeqStop(i);
4400  } else {
4401  actualSeqStop = (*iter)->seqloc->GetInt().GetTo();
4402  }
4403  } else {
4404  if((*iter)->seqloc->GetInt().GetFrom()
4405  < m_AV->GetSeqStart(i)){
4406  actualSeqStart = (*iter)->seqloc->GetInt().GetFrom();
4407  } else {
4408  actualSeqStart = m_AV->GetSeqStart(i);
4409  }
4410 
4411  if((*iter)->seqloc->GetInt().GetTo() >
4412  m_AV->GetSeqStop(i)){
4413  actualSeqStop = (*iter)->seqloc->GetInt().GetTo();
4414  } else {
4415  actualSeqStop = m_AV->GetSeqStop(i);
4416  }
4417  }
4418  int alnFrom = m_AV->GetAlnPosFromSeqPos(i, actualSeqStart);
4419  int alnTo = m_AV->GetAlnPosFromSeqPos(i, actualSeqStop);
4420 
4421  CRef<SAlnFeatureInfo> featInfo(new SAlnFeatureInfo);
4422  string tempFeat = NcbiEmptyString;
4423  if (alnTo - alnFrom >= 0){
4424  x_SetFeatureInfo(featInfo, *((*iter)->seqloc), alnFrom,
4425  alnTo, aln_stop, (*iter)->feature_char,
4426  (*iter)->feature_id, tempFeat);
4427  retval[i].push_back(featInfo);
4428  }
4429  }
4430  }
4431  }
4432  }
4433 }
4434 
4435 static void s_MakeDomainString(int aln_from, int aln_to, const string& domain_name,
4436  string& final_domain) {
4437 
4438  string domain_string(aln_to - aln_from + 1, ' ');
4439 
4440  if (domain_string.size() > 2){
4441 
4442  for (int i = 0; i < (int)domain_string.size(); i++){
4443  domain_string[i] = '-';
4444  }
4445  domain_string[0] = '<';
4446  domain_string[domain_string.size()-1] = '>';
4447  //put the domain name in the middle of the string
4448  int midpoint = ((int)domain_string.size())/2;
4449  int first_possible_pos = 1;
4450  int actual_first_pos = max(first_possible_pos, midpoint - ((int)domain_name.size())/2);
4451 
4452  for (SIZE_TYPE i = actual_first_pos, j = 0; i < domain_string.size() - 1 && j < domain_name.size(); i ++, j ++){
4453  domain_string[i] = domain_name[j];
4454  }
4455  }
4456 
4457  for (SIZE_TYPE i = 0; i < domain_string.size(); i++){
4458  final_domain[i + aln_from] = domain_string[i];
4459  }
4460 }
4461 
4462 void CDisplaySeqalign::x_GetDomainInfo(int row_num, int aln_stop,
4463  vector<TSAlnFeatureInfoList>& retval) const
4464 {
4465 
4466  if(m_DomainInfo && !m_DomainInfo->empty()){
4467  string final_domain (m_AV->GetAlnStop() + 1, ' ');
4468  int last_aln_to = m_AV->GetAlnStop();
4469  for (list<CRef<DomainInfo> >::iterator iter=m_DomainInfo->begin();
4470  iter!=m_DomainInfo->end(); iter++){
4471  if((*iter)->seqloc->GetInt().GetId().Match(m_AV->GetSeqId(0))){
4472  int actualSeqStart = 0, actualSeqStop = 0;
4473  if(m_AV->IsPositiveStrand(0)){ //only show domain on positive strand
4474  actualSeqStart = max((int)m_AV->GetSeqStart(0),
4475  (int)(*iter)->seqloc->GetInt().GetFrom());
4476 
4477  actualSeqStop = min((int)m_AV->GetSeqStop(0),
4478  (int)(*iter)->seqloc->GetInt().GetTo());
4479 
4480  int alnFrom = m_AV->GetAlnPosFromSeqPos(0, actualSeqStart);
4481  //check if there is gap between this and last seq position on master
4482  if (actualSeqStart > 0 && (*iter)->is_subject_start_valid) {
4483  if (alnFrom -
4484  m_AV->GetAlnPosFromSeqPos(0, actualSeqStart - 1) > 1) {
4485  //if so then use subject seq to get domain boundary
4486  int subj_aln_from = m_AV->GetAlnPosFromSeqPos(1,
4487  (int)(*iter)->subject_seqloc->GetInt().GetFrom());
4488  if (subj_aln_from >= 0) {
4489  alnFrom = subj_aln_from;
4490  }
4491  }
4492  }
4493 
4494  int alnTo = m_AV->GetAlnPosFromSeqPos(0, actualSeqStop);
4495  //check if there is gap between this and next seq position on master
4496  if (actualSeqStop < (int)m_AV->GetSeqStop(0) &&
4497  (*iter)->is_subject_stop_valid) {
4498  if (m_AV->GetAlnPosFromSeqPos(0, actualSeqStop + 1) - alnTo > 1) {
4499  //if so then use subject seq to get domain boundary
4500  int subj_aln_to = m_AV->GetAlnPosFromSeqPos(1,
4501  (int)(*iter)->subject_seqloc->GetInt().GetTo());
4502  if (subj_aln_to >= 0) {
4503  alnTo = subj_aln_to;
4504  }
4505  }
4506  }
4507  int actual_aln_from = min(alnFrom,last_aln_to +1);
4508  if (actual_aln_from > alnTo) {
4509  //domain is not correct, no showing
4510  return;
4511  }
4512  s_MakeDomainString(actual_aln_from, alnTo, (*iter)->domain_name, final_domain);
4513 
4514  last_aln_to = alnTo;
4515 
4516  }
4517  }
4518  }
4519  CRef<SAlnFeatureInfo> featInfo(new SAlnFeatureInfo);
4520  CRef<CSeq_loc> seqloc(new CSeq_loc((CSeq_loc::TId &) m_DomainInfo->front()->seqloc->GetInt().GetId(),
4521  (CSeq_loc::TPoint) 0,
4522  (CSeq_loc::TPoint) aln_stop));
4523  x_SetFeatureInfo(featInfo, *(seqloc), 0,
4524  aln_stop, aln_stop, ' ',
4525  " ", final_domain);
4526  retval[0].push_back(featInfo);
4527  }
4528 }
4529 
4530 void CDisplaySeqalign::x_FillSeqid(string& id, int row) const
4532  static string kQuery("Query");
4533  static string kSubject("Sbjct");
4534 
4535 #ifdef CTOOLKIT_COMPATIBLE
4536  /* Facilitates comparing formatted output using diff */
4537  static bool value_set = false;
4538  if ( !value_set ) {
4539  if (getenv("CTOOLKIT_COMPATIBLE")) {
4540  kQuery.append(":");
4541  kSubject.append(":");
4542  }
4543  value_set = true;
4544  }
4545 #endif /* CTOOLKIT_COMPATIBLE */
4546 
4548  if(row==0){//query
4549  id=kQuery;
4550  } else {//hits
4551  if (!(m_AlignOption&eMergeAlign)){
4552  //hits for pairwise
4553  id=kSubject;
4554  } else {
4555  if(m_AlignOption&eShowGi){
4556  TGi gi = ZERO_GI;
4557  if(m_AV->GetSeqId(row).Which() == CSeq_id::e_Gi){
4558  gi = m_AV->GetSeqId(row).GetGi();
4559  }
4560  if(!(gi > ZERO_GI)){
4561  gi = CAlignFormatUtil::GetGiForSeqIdList(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId());
4562  }
4563  if(gi > ZERO_GI){
4564  id=NStr::NumericToString(gi);
4565  } else {
4566  const CRef<CSeq_id> wid
4567  = FindBestChoice(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId(),
4570  }
4571  } else {
4572  const CRef<CSeq_id> wid
4573  = FindBestChoice(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId(),
4576  }
4577  }
4578  }
4579  } else {
4580  if(m_AlignOption&eShowGi){
4581  TGi gi = ZERO_GI;
4582  if(m_AV->GetSeqId(row).Which() == CSeq_id::e_Gi){
4583  gi = m_AV->GetSeqId(row).GetGi();
4584  }
4585  if(!(gi > ZERO_GI)){
4586  gi = CAlignFormatUtil::GetGiForSeqIdList(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId());
4587  }
4588  if(gi > ZERO_GI){
4589  id=NStr::NumericToString(gi);
4590  } else {
4591  const CRef<CSeq_id> wid
4592  = FindBestChoice(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId(),
4595  }
4596  } else {
4597  const CRef<CSeq_id> wid
4598  = FindBestChoice(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId(),
4601  }
4602  }
4603 }
4604 
4605 
4607 {
4608  int num_align = 0;
4609  //get segs first and get hspNumber,segs and subjRange per sequence in alignment
4610  string toolUrl = NcbiEmptyString;
4611  if(m_AlignOption & eHtml){
4612  toolUrl = m_Reg->Get(m_BlastType, "TOOL_URL");
4613  }
4614  if( // Calculate m_AlnLinksParams->segs,hspNum, subjRange only for the following conditions
4615  (!(m_AlignOption & eMergeAlign) &&
4616  (toolUrl.find("dumpgnl.cgi") != string::npos
4617  || (m_AlignOption & eLinkout)
4619  /*need to construct segs for dumpgnl and
4620  get sub-sequence for long sequences*/
4621 
4622  for (CSeq_align_set::Tdata::const_iterator
4623  iter = actual_aln_list.Get().begin();
4624  iter != actual_aln_list.Get().end()
4625  && num_align<m_NumAlignToShow; iter++, num_align++) {
4626 
4627  CConstRef<CSeq_id> subid;
4628  subid = &((*iter)->GetSeq_id(1));
4629  string idString = subid->GetSeqIdString();
4630 
4631  x_CalcUrlLinksParams(**iter,idString,toolUrl);//sets m_AlnLinksParams->segs,hspNum, subjRange
4632  }
4633  }
4634 }
4635 
4636 
4637 
4638 void CDisplaySeqalign::x_CalcUrlLinksParams(const CSeq_align& align, string idString,string toolUrl)
4639 {
4640  //make alnvector
4641  CRef<CAlnVec> avRef = x_GetAlnVecForSeqalign(align);
4642 
4643  bool first = m_AlnLinksParams.count(idString) == 0;
4644  struct SAlnLinksParams *alnLinksParam = first ? new SAlnLinksParams : &m_AlnLinksParams[idString];
4646 
4647  if (toolUrl.find("dumpgnl.cgi") != string::npos || (m_AlignOption & eLinkout)) {
4648  if(!first){
4649  alnLinksParam->segs += ",";
4650  }
4651  alnLinksParam->segs += NStr::IntToString(avRef->GetSeqStart(1))
4652  + "-" +
4653  NStr::IntToString(avRef->GetSeqStop(1));
4654  }
4655 
4656 
4657  TSeqPos from = (avRef->GetSeqStart(1)> avRef->GetSeqStop(1)) ? avRef->GetSeqStop(1) : avRef->GetSeqStart(1);
4658  TSeqPos to = (avRef->GetSeqStart(1)> avRef->GetSeqStop(1)) ? avRef->GetSeqStart(1) : avRef->GetSeqStop(1);
4659  if(first) {
4660  alnLinksParam->subjRange = new CRange<TSeqPos>(from,to);
4661  alnLinksParam->flip = avRef->StrandSign(0) != avRef->StrandSign(1);
4662  }
4663  else{
4664  TSeqPos currFrom = alnLinksParam->subjRange->GetFrom();
4665  TSeqPos currTo = alnLinksParam->subjRange->GetTo();
4666  alnLinksParam->subjRange->SetFrom(min(from,currFrom));
4667  alnLinksParam->subjRange->SetTo(max(to,currTo));
4668  }
4669 
4670 
4672  alnLinksParam->hspNumber = (!first) ? alnLinksParam->hspNumber + 1 : 1;
4673  }
4674 
4675  if(first){
4676  m_AlnLinksParams.insert(map<string, struct SAlnLinksParams>::value_type(idString,*alnLinksParam));
4677  }
4678 }
4679 
4680 
4681 
4682 void CDisplaySeqalign::x_PreProcessSingleAlign(CSeq_align_set::Tdata::const_iterator currSeqAlignIter,
4683  CSeq_align_set &actual_aln_list,
4684  bool multipleSeqs)
4685 {
4686  CConstRef<CSeq_id> subid;
4687 
4688  string toolUrl;
4689  if(multipleSeqs && (m_AlignOption & eHtml)) {
4690  //actually this is needed for long sequences only
4691  toolUrl = m_Reg->Get(m_BlastType, "TOOL_URL");
4692  }
4693 
4694  string idString, prevIdString;
4695  for (CSeq_align_set::Tdata::const_iterator
4696  iter = currSeqAlignIter;
4697  iter != actual_aln_list.Get().end();iter++) {
4698 
4699  subid = &((*iter)->GetSeq_id(1));
4700  idString = subid->GetSeqIdString();
4701  if(prevIdString.empty() || prevIdString == idString) {
4702  x_CalcUrlLinksParams(**iter,idString,toolUrl);//sets m_AlnLinksParams->segs,hspNum, subjRange
4703  }
4704  else {
4705  break;
4706  }
4707  prevIdString = idString;
4708  }
4709 }
4710 
4711 
4712 void CDisplaySeqalign::DisplayPairwiseSeqalign(CNcbiOstream& out,unordered_set <string> selectedIDs) //(blast_rank = 1,2...)
4713 {
4714  string alignRows;
4715  unordered_set <string> :: const_iterator idsIter;
4716 
4717  CSeq_align_set actual_aln_list;
4718  //Not sure we need this - check with Jean
4720  *m_SeqalignSetRef);
4721  if (actual_aln_list.Get().empty()){
4722  return;
4723  }
4724  //scope for feature fetching
4725  //sets m_featScope, m_CanRetrieveSeq,m_DynamicFeature
4726  x_InitAlignParams(actual_aln_list);
4727 
4728  CConstRef<CSeq_id> previousId, subid;
4729 
4730  int idCount = 0;
4731  m_currAlignHsp = 0;
4732  bool showBlastDefline = false;
4733  for (CSeq_align_set::Tdata::const_iterator
4734  iter = actual_aln_list.Get().begin();
4735  iter != actual_aln_list.Get().end();iter++) {
4736 
4737  subid = &((*iter)->GetSeq_id(1));
4738 
4739 
4740  string currID;
4741  if(subid->Which() == CSeq_id::e_Gi) {
4742  TGi currGi = subid->GetGi();
4743  currID = NStr::NumericToString(currGi);
4744  }
4745  else {
4746  subid->GetLabel(&currID, CSeq_id::eContent);
4747  }
4748  idsIter = selectedIDs.find(currID);
4749 
4750  //seqid from seqalign not found in input seq list
4751  if(idsIter == selectedIDs.end() && idCount < (int)selectedIDs.size()) continue;
4752  if(idsIter == selectedIDs.end() && idCount >= (int)selectedIDs.size()) break;
4753 
4754  //reach here if currID from seqalign found in selectedIDs list
4755  if(previousId.Empty() ||
4756  !subid->Match(*previousId)){
4757  idCount++;
4758 
4759 
4760  //Calculates m_HSPNum for showing sorting links
4761  //If getSegs = true calculates m_segs for showing download chicklet for large seqs
4762  x_PreProcessSingleAlign(iter,actual_aln_list,selectedIDs.size() > 1);
4763  showBlastDefline = true;
4764 
4765  }
4766  else {
4767  showBlastDefline = false;
4768  }
4769 
4770  if(!previousId.Empty() &&
4771  !subid->Match(*previousId)){
4772  m_Scope.RemoveFromHistory(m_Scope.GetBioseqHandle(*previousId)); //release memory
4773  }
4774  previousId = subid;
4775  //make alnvector
4776  CRef<CAlnVec> avRef = x_GetAlnVecForSeqalign(**iter);
4777 
4778  if(!(avRef.Empty())){
4779  //Note: do not switch the set order per calnvec specs.
4781  avRef->SetGenCode(m_MasterGeneticCode, 0);
4782  try{
4783  const CBioseq_Handle& handle = avRef->GetBioseqHandle(1);
4784  if(handle){
4785  //save the current alnment regardless
4786  CRef<SAlnInfo> alnvecInfo(new SAlnInfo);
4787 
4788  int num_ident;
4790  alnvecInfo->score,
4791  alnvecInfo->bits,
4792  alnvecInfo->evalue,
4793  alnvecInfo->sum_n,
4794  num_ident,
4795  alnvecInfo->use_this_seqid,
4796  alnvecInfo->comp_adj_method);
4797 
4798  alnvecInfo->alnvec = avRef;
4799 
4800  x_DisplayAlnvecInfo(out,alnvecInfo,showBlastDefline);
4801  }
4802  } catch (const CException&){
4803  out << "Sequence with id "
4804  << (avRef->GetSeqId(1)).GetSeqIdString().c_str()
4805  <<" no longer exists in database...alignment skipped\n";
4806  }
4807  }
4808  }
4809 }
4810 
4811 END_SCOPE(align_format)
4813 
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define static
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
Sequence alignment display tool.
static const char kDownloadLink[]
static const char kLinkoutOrderStr[]
Default linkout order.
const int k_NumAsciiChar
Number of ASCII characters for populating matrix columns.
static const char kDownloadUrl[]
dumpgnl
static const char kDownloadImg[]
static const char kBl2seqUrl[]
static const char k_GetSeqSelectForm[]
static const char k_GetTreeViewForm[]
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
Definition: asci_blk.cpp:1632
#define BLAST_DEFAULT_MATRIX
Default matrix name: BLOSUM62.
Definition: blast_options.h:77
#define false
Definition: bool.h:36
AutoPtr –.
Definition: ncbimisc.hpp:401
static string GetIDUrl(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL for seqid.
static void GetAsciiProteinMatrix(const char *matrix_name, CNcbiMatrix< int > &retval)
Retrieve a scoring matrix for the provided matrix name.
static list< string > GetFullLinkoutUrl(const list< CRef< objects::CBlast_def_line > > &bdl, const string &rid, const string &cdd_rid, const string &entrez_term, bool is_na, bool structure_linkout_as_group, bool for_alignment, int cur_align, string &linkoutOrder, TTaxId taxid, string &database, int query_number, string &user_url, string &preComputedResID, ILinkoutDB *linkoutdb, const string &mv_build_name)
Get linkout membership for for the list of blast deflines.
static void GetAlnScores(const objects::CSeq_align &aln, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< TGi > &use_this_gi)
Extract score info from blast alingment.
static void BuildFormatQueryString(CCgiContext &ctx, string &cgi_query)
static void GetScoreString(double evalue, double bit_score, double total_bit_score, int raw_score, string &evalue_str, string &bit_score_str, string &total_bit_score_str, string &raw_score_str)
format evalue and bit_score
static string GetBareId(const objects::CSeq_id &id)
Get sequence id with no database source (bare accession)
static CAlignFormatUtil::DbType GetDbType(const objects::CSeq_align_set &actual_aln_list, objects::CScope &scope)
Set the database as gi type.
static string BuildUserUrl(const objects::CBioseq::TId &ids, TTaxId taxid, string user_url, string database, bool db_is_na, string rid, int query_number, bool for_alignment)
return the custom url (such as mapview)
static string MapTemplate(string inpString, string tmplParamName, Int8 templParamVal)
Replace template tags by real data.
static string GetURLFromRegistry(const string url_name, int index=-1)
retrieve URL from .ncbirc file combining host/port and format strings values.
static bool MatchSeqInSeqList(TGi cur_gi, CRef< objects::CSeq_id > &seqID, list< string > &use_this_seq, bool *isGiList=NULL)
Matches text seqID or gi with the list of seqIds or gis.
static int GetSeqLinkoutInfo(objects::CBioseq::TId &cur_id, ILinkoutDB **linkoutdb, const string &mv_build_name, TGi gi=INVALID_GI)
static CRef< objects::CSeq_id > GetDisplayIds(const objects::CBioseq_Handle &handle, const objects::CSeq_id &aln_id, list< TGi > &use_this_gi, TGi &gi, TTaxId &taxid)
Scan the the list of blast deflines and find seqID to be use in display.
static list< string > GetLinkoutUrl(int linkout, const objects::CBioseq::TId &ids, const string &rid, const string &cdd_rid, const string &entrez_term, bool is_na, TGi first_gi, bool structure_linkout_as_group, bool for_alignment, int cur_align, string preComputedResID)
Get the list of urls for linkouts.
@ eLinkTypeDefault
Defini