NCBI C++ ToolKit
showalign.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: showalign.cpp 102029 2024-03-20 13:34:34Z zaretska $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and thesubset U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jian Ye
27  *
28  * File Description:
29  * Sequence alignment display
30  *
31  */
32 #include <ncbi_pch.hpp>
33 
36 
37 #include <corelib/ncbiexpt.hpp>
38 #include <corelib/ncbiutil.hpp>
39 #include <corelib/ncbistre.hpp>
40 #include <corelib/ncbireg.hpp>
41 
42 #include <util/range.hpp>
43 #include <util/md5.hpp>
44 #include <objtools/blast/seqdb_reader/seqdb.hpp> // for CSeqDB::ExtractBlastDefline
45 
46 #include <objmgr/scope.hpp>
47 #include <objmgr/feat_ci.hpp>
49 
50 #include <objmgr/util/sequence.hpp>
51 #include <objmgr/util/feature.hpp>
52 
57 #include <objects/seq/Seqdesc.hpp>
58 #include <objects/seq/Bioseq.hpp>
59 
61 
64 
69 
72 
73 #include <stdio.h>
76 #include <html/htmlhelper.hpp>
77 #include <cgi/cgictx.hpp>
78 
81 USING_SCOPE(sequence);
82 BEGIN_SCOPE(align_format)
83 
86 static const string k_FrameConversion[k_NumFrame] = {"+1", "+2", "+3", "-1",
87  "-2", "-3"};
88 static const int k_GetSubseqThreshhold = 10000;
89 
90 ///threshhold to color mismatch. 98 means 98%
91 static const int k_ColorMismatchIdentity = 0;
92 static const int k_GetDynamicFeatureSeqLength = 200000;
93 static const string k_DumpGnlUrl = "/blast/dumpgnl.cgi";
94 static const int k_FeatureIdLen = 16;
95 const string color[]={"#000000", "#808080", "#FF0000"};
96 const string k_ColorRed = "#FF0000";
97 const string k_ColorPink = "#F805F5";
98 
99 static const char k_IntronChar = '~';
100 static const int k_IdStartMargin = 2;
101 static const int k_SeqStopMargin = 2;
102 static const int k_StartSequenceMargin = 2;
103 static const int k_AlignStatsMargin = 2;
104 static const int k_SequencePropertyLabelMargin = 2;
105 
106 const string k_DefaultAnchorTempl = "<a name=<@id_lbl@>></a>";
107 const string k_DefaultAnchorWithPosTempl = "<a name=#_<@resultPositionIndex@>_<@id_lbl@>></a>";
108 static const string k_DefaultSpaceMaintainerTempl = "<span class=\"smn\"><@chkbox@></span>";
109 static const string k_DefaultCheckboxTempl = "<input type=\"checkbox\" name=\"getSeqGi\" value=\"<@id_lbl@>\" onClick=\"synchronizeCheck(this.value, 'getSeqAlignment<@queryNumber@>', 'getSeqGi', this.checked)\">";
110 static const string k_DefaultCheckboxExTempl = "<input type=\"checkbox\" name=\"getSeqGi\" value=\"<@id_lbl@>\" checked=\"checked\" onClick=\"synchAl(this);\">";
111 
112 //highlight the seqid for pairwise-with-identity format
113 const string k_DefaultPairwiseWithIdntTempl = "<font color=\"#FF0000\"><b><@alndata@></b></font>";//k_ColorRed
114 const string k_DefaultFeaturesTempl = "<font color=\"#F805F5\"><b><@alndata@></b></font>";//k_ColorPink
115 const string k_DefaultMaskSeqLocTempl = "<font color=\"<@color@>\"><@alndata@></font>";
116 
117 
118 #ifdef USE_ORG_IMPL
119 static string k_GetSeqSubmitForm[] = {"<FORM method=\"post\" \
120 action=\"//www.ncbi.nlm.nih.gov:80/entrez/query.fcgi?SUBMIT=y\" \
121 name=\"%s%d\"><input type=button value=\"Get selected sequences\" \
122 onClick=\"finalSubmit(%d, 'getSeqAlignment%d', 'getSeqGi', '%s%d', %d)\"><input \
123 type=\"hidden\" name=\"db\" value=\"\"><input type=\"hidden\" name=\"term\" \
124 value=\"\"><input type=\"hidden\" name=\"doptcmdl\" value=\"docsum\"><input \
125 type=\"hidden\" name=\"cmd\" value=\"search\"></form>",
126 
127  "<FORM method=\"POST\" \
128 action=\"//www.ncbi.nlm.nih.gov/Traces/trace.cgi\" \
129 name=\"%s%d\"><input type=button value=\"Get selected sequences\" \
130 onClick=\"finalSubmit(%d, 'getSeqAlignment%d', 'getSeqGi', '%s%d', %d)\"><input \
131 type=\"hidden\" name=\"val\" value=\"\"><input \
132 type=\"hidden\" name=\"cmd\" value=\"retrieve\"></form>"
133 };
134 
135 static string k_GetSeqSelectForm = "<FORM><input \
136 type=\"button\" value=\"Select all\" onClick=\"handleCheckAll('select', \
137 'getSeqAlignment%d', 'getSeqGi')\"></form></td><td><FORM><input \
138 type=\"button\" value=\"Deselect all\" onClick=\"handleCheckAll('deselect', \
139 'getSeqAlignment%d', 'getSeqGi')\"></form>";
140 
141 
142 static string k_GetTreeViewForm = "<FORM method=\"post\" \
143 action=\"//www.ncbi.nlm.nih.gov/blast/treeview/blast_tree_view.cgi?request=page&rid=%s&queryID=%s&distmode=on\" \
144 name=\"tree%s%d\" target=\"trv%s\"> \
145 <input type=button value=\"Distance tree of results\" onClick=\"extractCheckedSeq('getSeqAlignment%d', 'getSeqGi', 'tree%s%d')\"> \
146 <input type=\"hidden\" name=\"sequenceSet\" value=\"\"><input type=\"hidden\" name=\"screenWidth\" value=\"\"></form>";
147 #endif
148 
149 
150 static const int k_MaxDeflinesToShow = 8;
151 static const int k_MinDeflinesToShow = 3;
152 
153 
155  CScope& scope,
156  list <CRef<CSeqLocInfo> >* mask_seqloc,
157  list <FeatureInfo*>* external_feature,
158  const char* matrix_name /* = BLAST_DEFAULT_MATRIX */)
159  : m_SeqalignSetRef(&seqalign),
160  m_Seqloc(mask_seqloc),
161  m_QueryFeature(external_feature),
162  m_Scope(scope),
163  m_LinkoutDB(NULL),
164  m_UseLongSeqIds(false)
165 {
166  m_AlignOption = 0;
167  m_SeqLocChar = eX;
168  m_SeqLocColor = eBlack;
169  m_LineLen = align_format::kDfltLineLength;
170  m_IsDbNa = true;
171  m_CanRetrieveSeq = false;
172  m_DbName = NcbiEmptyString;
173  m_NumAlignToShow = 1000000;
174  m_AlignType = eNotSet;
175  m_Rid = "0";
176  m_CddRid = "0";
177  m_EntrezTerm = NcbiEmptyString;
178  m_QueryNumber = 0;
179  m_BlastType = NcbiEmptyString;
180  m_MidLineStyle = eBar;
181  m_ConfigFile = NULL;
182  m_Reg = NULL;
183  m_DynamicFeature = NULL;
184  m_MasterGeneticCode = 1;
185  m_SlaveGeneticCode = 1;
186  m_AlignTemplates = NULL;
187  m_Ctx = NULL;
188  m_Matrix = NULL; //-RMH-
189  m_DomainInfo = NULL;
190  m_SeqPropertyLabel.reset(new vector<string>);
191  m_TranslatedFrameForLocalSeq = eFirst;
192  m_ResultPositionIndex = -1;
193  m_currAlignSeqListIndex = 1;
194  m_QueryAnchoredSetIndex = -1;
197  ? matrix_name
199 
200  // Use default score matrix if one with the provided name was not found.
201  // This may happen for a user's score matrix that was read from a file
202  // (using BLASTMAT environment variable).
203  if (mtx.GetData().empty()) {
205  }
206 
207  // -RMH- --- Need to see if we can retrieve our matrix this way.
208  // for now don't initialize if empty
209  //_ASSERT(!mtx.GetData().empty());
210  if ( !mtx.GetData().empty() )
211  {
212  m_Matrix = new int*[mtx.GetRows()];
213  for(size_t i = 0; i<mtx.GetRows(); ++i) {
214  m_Matrix[i] = new int[mtx.GetCols()];
215  }
216  // copy data from matrix
217  for(size_t i = 0; i<mtx.GetRows(); ++i) {
218  for (size_t j = 0; j < mtx.GetCols(); j++) {
219  m_Matrix[i][j] = mtx(i, j);
220  }
221  }
222  }
223 }
224 
225 
227 {
228  // -RMH- See above
229  if ( m_Matrix )
230  {
231  for(int i = 0; i<k_NumAsciiChar; ++i) {
232  delete [] m_Matrix[i];
233  }
234  delete [] m_Matrix;
235  if (m_ConfigFile) {
236  delete m_ConfigFile;
237  }
238  if (m_Reg) {
239  delete m_Reg;
240  }
241 
242  if(m_DynamicFeature){
243  delete m_DynamicFeature;
244  }
245  }
246 }
247 
248 //8.Display Identities,positives,frames etc
249 string CDisplaySeqalign::x_FormatIdentityInfo(string alignInfo, SAlnInfo* aln_vec_info)
250 {
251  int aln_stop = (int)m_AV->GetAlnStop();
252  int master_strand = m_AV->StrandSign(0);
253  int slave_strand = m_AV->StrandSign(1);
254  int master_frame = aln_vec_info->alnRowInfo->frame[0];
255  int slave_frame = aln_vec_info->alnRowInfo->frame[1];
256  bool aln_is_prot = (m_AlignType & eProt) != 0 ? true : false;
257 
258 
259  string alignParams = alignInfo;//Some already filled in x_DisplayAlignInfo
260 
261 
262  alignParams = CAlignFormatUtil::MapTemplate(alignParams, "aln_match",NStr::IntToString(aln_vec_info->match) + "/"+ NStr::IntToString(aln_stop+1));
263  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_ident",aln_vec_info->identity);
264 
265  if(aln_is_prot){
266  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_pos",NStr::IntToString(aln_vec_info->positive + aln_vec_info->match) + "/" + NStr::IntToString(aln_stop+1));
267  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_pos_prc",NStr::IntToString(((aln_vec_info->positive + aln_vec_info->match)*100)/(aln_stop+1)));
268  }
269  else {
270  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_strand",(master_strand==1 ? "Plus" : "Minus")+ (string)"/"+ (slave_strand==1? "Plus" : "Minus"));
271  }
272 
273  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_gaps",NStr::IntToString(aln_vec_info->gap) + "/" + NStr::IntToString(aln_stop+1));
274  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_gaps_prc",NStr::IntToString((aln_vec_info->gap*100)/(aln_stop+1)));
275 
276  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_params_frame",(master_frame != 0 || slave_frame != 0) ? m_AlignTemplates->alignInfoFrameTmpl: "");
277  if(master_frame != 0 && slave_frame != 0) {
278  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame",((master_frame > 0) ? "+" : "") + NStr::IntToString(master_frame)
279  + (string)"/"+((slave_frame > 0) ? "+" : "") + NStr::IntToString(slave_frame));
280  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","shown");
281  } else if (master_frame != 0){
282  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame",((master_frame > 0) ? "+" : "") + NStr::IntToString(master_frame));
283  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","shown");
284  } else if (slave_frame != 0){
285  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame",((slave_frame > 0) ? "+" : "") + NStr::IntToString(slave_frame)) ;
286  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","shown");
287  }
288  else {
289  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame","");
290  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_frame_show","");
291  }
292  return alignParams;
293 }
294 
295 
296 ///show blast identity, positive etc.
297 ///@param out: output stream
298 ///@param aln_stop: stop in aln coords
299 ///@param identity: identity
300 ///@param positive: positives
301 ///@param match: match
302 ///@param gap: gap
303 ///@param master_strand: plus strand = 1 and minus strand = -1
304 ///@param slave_strand: plus strand = 1 and minus strand = -1
305 ///@param master_frame: frame for master
306 ///@param slave_frame: frame for slave
307 ///@param aln_is_prot: is protein alignment?
308 ///
309 static void s_DisplayIdentityInfo(CNcbiOstream& out, int aln_stop,
310  int identity, int positive, int match,
311  int gap, int master_strand,
312  int slave_strand, int master_frame,
313  int slave_frame, bool aln_is_prot)
314 {
315  out<<" Identities = "<<match<<"/"<<(aln_stop+1)<<" ("<<identity<<"%"<<")";
316  if(aln_is_prot){
317  out<<", Positives = "<<(positive + match)<<"/"<<(aln_stop+1)
318  <<" ("<<CAlignFormatUtil::GetPercentMatch(positive + match, aln_stop+1)<<"%"<<")";
319  }
320  out<<", Gaps = "<<gap<<"/"<<(aln_stop+1)
321  <<" ("<<CAlignFormatUtil::GetPercentMatch(gap, aln_stop+1)<<"%"<<")"<<"\n";
322  if (!aln_is_prot){
323  out<<" Strand="<<(master_strand==1 ? "Plus" : "Minus")
324  <<"/"<<(slave_strand==1? "Plus" : "Minus")<<"\n";
325  }
326  if(master_frame != 0 && slave_frame != 0) {
327  out <<" Frame = " << ((master_frame > 0) ? "+" : "")
328  << master_frame <<"/"<<((slave_frame > 0) ? "+" : "")
329  << slave_frame<<"\n";
330  } else if (master_frame != 0){
331  out <<" Frame = " << ((master_frame > 0) ? "+" : "")
332  << master_frame << "\n";
333  } else if (slave_frame != 0){
334  out <<" Frame = " << ((slave_frame > 0) ? "+" : "")
335  << slave_frame <<"\n";
336  }
337  out<<"\n";
338 
339 }
340 
341 ///wrap line
342 ///@param out: output stream
343 ///@param str: string to wrap
344 ///
345 static void s_WrapOutputLine(CNcbiOstream& out, const string& str)
346 {
347  const int line_len = 60;
348  bool do_wrap = false;
349  int length = (int) str.size();
350  if (length > line_len) {
351  for (int i = 0; i < length; i ++){
352  if(i > 0 && i % line_len == 0){
353  do_wrap = true;
354  }
355  out << str[i];
356  if(do_wrap && isspace((unsigned char) str[i])){
357  out << "\n";
358  do_wrap = false;
359  }
360  }
361  } else {
362  out << str;
363  }
364 }
365 
366 ///To add style to bases for some conditions
367 ///@param seq: sequence
368 ///@param currIndex: current seq index
369 ///@startStyledOutput: condition for starting output into the string to be styled
370 ///@stopStyledOutput: condition for stopping output into the string to be styled
371 ///@tmpl: template used for output of styled string
372 ///@styledSeqStr: the string to be styled by appling template
373 ///@param out: output stream
374 ///
375 //This function appends seq[currIndex] to styledSeqStr if startStyledOutput==true or !styledSeqStr.empty() && !stopStyledOutput
376 //If stopStyledOutput==true or it is the end of the seq and styledSeqStr has data,
377 //Template like "<font color="#00000"><@alndata@></font>" or <span class="red"><@alndata@></span> is applied to styledSeqStr
378 // and output to CNcbiOstream
379 static bool s_ProcessStyledContent(string& seq, int currIndex, bool startStyledOutput, bool stopStyledOutput, string tmpl,string &styledSeqStr,CNcbiOstream& out)
380 {
381  bool isStyled = false;
382  if(startStyledOutput || (!styledSeqStr.empty() && !stopStyledOutput)){
383  styledSeqStr += seq[currIndex];
384  isStyled = true;
385  }
386  if(!styledSeqStr.empty() && (stopStyledOutput || currIndex == (int)seq.size() - 1) ) {
387  styledSeqStr = CAlignFormatUtil::MapTemplate(tmpl,"alndata",styledSeqStr);
388  out << styledSeqStr;
389  styledSeqStr = "";
390  }
391  return isStyled;
392 }
393 
394 ///To add color to bases other than identityChar
395 ///@param seq: sequence
396 ///@param identity_char: identity character
397 ///@param out: output stream
398 ///
399 static void s_ColorDifferentBases(string& seq, char identity_char,
400  CNcbiOstream& out){
401  std::string colorSeqStr;
402  for(int i = 0; i < (int)seq.size(); i ++){
403  bool isStyled = s_ProcessStyledContent(seq,i,seq[i] != identity_char,seq[i] == identity_char,k_DefaultPairwiseWithIdntTempl,colorSeqStr,out);
404  if(!isStyled) out << seq[i];
405  }
406 }
407 
408 ///return the frame for a given strand
409 ///Note that start is zero bases. It returns frame +/-(1-3). 0 indicates error
410 ///@param start: sequence start position
411 ///@param strand: strand
412 ///@param id: the seqid
413 ///@param scope: the scope
414 ///@return: the frame
415 ///
416 static int s_GetFrame (int start, ENa_strand strand, const CSeq_id& id,
417  CScope& sp)
418 {
419  int frame = 0;
420  if (strand == eNa_strand_plus) {
421  frame = (start % 3) + 1;
422  } else if (strand == eNa_strand_minus) {
423  frame = -(((int)sp.GetBioseqHandle(id).GetBioseqLength() - start - 1)
424  % 3 + 1);
425 
426  }
427  return frame;
428 }
429 
430 ///reture the frame for master seq in stdseg
431 ///@param ss: the input stdseg
432 ///@param scope: the scope
433 ///@return: the frame
434 ///
435 static int s_GetStdsegMasterFrame(const CStd_seg& ss, CScope& scope)
436 {
437  const CRef<CSeq_loc> slc = ss.GetLoc().front();
438  ENa_strand strand = GetStrand(*slc);
439  int frame = s_GetFrame(strand == eNa_strand_plus ?
440  GetStart(*slc, &scope) : GetStop(*slc, &scope),
441  strand == eNa_strand_plus ?
443  *(ss.GetIds().front()), scope);
444  return frame;
445 }
446 
447 
448 ///return concatenated exon sequence
449 ///@param feat: the feature containing this cds
450 ///@param feat_strand: the feature strand
451 ///@param range: the range list of seqloc
452 ///@param total_coding_len: the total exon length excluding intron
453 ///@param raw_cdr_product: the raw protein sequence
454 ///@return: the concatenated exon sequences with amino acid aligned to
455 ///to the second base of a codon
456 ///
457 static string s_GetConcatenatedExon(CFeat_CI& feat,
458  ENa_strand feat_strand,
459  list<CRange<TSeqPos> >& range,
460  TSeqPos total_coding_len,
461  string& raw_cdr_product, TSeqPos frame_adj)
462 {
463 
464  string concat_exon(total_coding_len, ' ');
465  TSeqPos frame = 1;
466  const CCdregion& cdr = feat->GetData().GetCdregion();
467  if(cdr.IsSetFrame()){
468  frame = cdr.GetFrame();
469  }
470  TSeqPos num_coding_base;
471  int num_base;
472  TSeqPos coding_start_base;
473  if(feat_strand == eNa_strand_minus){
474  coding_start_base = total_coding_len - 1 - (frame -1) - frame_adj;
475  num_base = total_coding_len - 1;
476  num_coding_base = 0;
477 
478  } else {
479  coding_start_base = 0;
480  coding_start_base += frame - 1 + frame_adj;
481  num_base = 0;
482  num_coding_base = 0;
483  }
484 
485  ITERATE(list<CRange<TSeqPos> >, iter, range){
486  //note that feature on minus strand needs to be
487  //filled backward.
488  if(feat_strand != eNa_strand_minus){
489  for(TSeqPos i = 0; i < iter->GetLength(); i ++){
490  if((TSeqPos)num_base >= coding_start_base){
491  num_coding_base ++;
492  if(num_coding_base % 3 == 2){
493  //a.a to the 2nd base
494  if(num_coding_base / 3 < raw_cdr_product.size()){
495  //make sure the coding region is no
496  //more than the protein seq as there
497  //could errors in ncbi record
498  concat_exon[num_base]
499  = raw_cdr_product[num_coding_base / 3];
500  }
501  }
502  }
503  num_base ++;
504  }
505  } else {
506 
507  for(TSeqPos i = 0; i < iter->GetLength() &&
508  num_base >= 0; i ++){
509  if((TSeqPos)num_base <= coding_start_base){
510  num_coding_base ++;
511  if(num_coding_base % 3 == 2){
512  //a.a to the 2nd base
513  if(num_coding_base / 3 <
514  raw_cdr_product.size() &&
515  coding_start_base >= num_coding_base){
516  //make sure the coding region is no
517  //more than the protein seq as there
518  //could errors in ncbi record
519  concat_exon[num_base]
520  = raw_cdr_product[num_coding_base / 3];
521  }
522  }
523  }
524  num_base --;
525  }
526  }
527  }
528  return concat_exon;
529 }
530 
531 ///map slave feature info to master seq
532 ///@param master_feat_range: master feature seqloc to be filled
533 ///@param feat: the feature in concern
534 ///@param slave_feat_range: feature info for slave
535 ///@param av: the alignment vector for master-slave seqalign
536 ///@param row: the row
537 ///@param frame_adj: frame adjustment
538 ///
539 
540 static void s_MapSlaveFeatureToMaster(list<CRange<TSeqPos> >& master_feat_range,
541  ENa_strand& master_feat_strand, CFeat_CI& feat,
542  list<CSeq_loc_CI::TRange>& slave_feat_range,
543  ENa_strand slave_feat_strand,
544  CAlnVec* av,
545  int row, TSeqPos frame_adj)
546 {
547  TSeqPos trans_frame = 1;
548  const CCdregion& cdr = feat->GetData().GetCdregion();
549  if(cdr.IsSetFrame()){
550  trans_frame = cdr.GetFrame();
551  }
552  trans_frame += frame_adj;
553 
554  TSeqPos prev_exon_len = 0;
555  bool is_first_in_range = true;
556 
557  if ((av->IsPositiveStrand(1) && slave_feat_strand == eNa_strand_plus) ||
558  (av->IsNegativeStrand(1) && slave_feat_strand == eNa_strand_minus)) {
559  master_feat_strand = eNa_strand_plus;
560  } else {
561  master_feat_strand = eNa_strand_minus;
562  }
563 
564  list<CSeq_loc_CI::TRange> acutal_slave_feat_range = slave_feat_range;
565 
566  ITERATE(list<CSeq_loc_CI::TRange>, iter_temp,
567  acutal_slave_feat_range){
568  CRange<TSeqPos> actual_feat_seq_range = av->GetSeqRange(row).
569  IntersectionWith(*iter_temp);
570  if(!actual_feat_seq_range.Empty()){
571  TSeqPos slave_aln_from = 0, slave_aln_to = 0;
572  TSeqPos frame_offset = 0;
573  int curr_exon_leading_len = 0;
574  //adjust frame
575  if (is_first_in_range) {
576  if (slave_feat_strand == eNa_strand_plus) {
577  curr_exon_leading_len
578  = actual_feat_seq_range.GetFrom() - iter_temp->GetFrom();
579 
580  } else {
581  curr_exon_leading_len
582  = iter_temp->GetTo() - actual_feat_seq_range.GetTo();
583  }
584  is_first_in_range = false;
585  frame_offset = (3 - (prev_exon_len + curr_exon_leading_len)%3
586  + (trans_frame - 1)) % 3;
587  }
588 
589  if (av->IsPositiveStrand(1) &&
590  slave_feat_strand == eNa_strand_plus) {
591  slave_aln_from
592  = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetFrom() +
593  frame_offset, CAlnMap::eRight );
594 
595  slave_aln_to =
596  av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo(),
598  } else if (av->IsNegativeStrand(1) &&
599  slave_feat_strand == eNa_strand_plus) {
600 
601  slave_aln_from
602  = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo(),
604 
605  slave_aln_to =
607  actual_feat_seq_range.GetFrom() +
608  frame_offset, CAlnMap::eLeft);
609  } else if (av->IsPositiveStrand(1) &&
610  slave_feat_strand == eNa_strand_minus) {
611  slave_aln_from
612  = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetFrom(),
614 
615  slave_aln_to =
616  av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo() -
617  frame_offset, CAlnMap::eLeft);
618 
619  } else if (av->IsNegativeStrand(1) &&
620  slave_feat_strand == eNa_strand_minus){
621  slave_aln_from
622  = av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetTo() -
623  frame_offset, CAlnMap::eRight );
624 
625  slave_aln_to =
626  av->GetAlnPosFromSeqPos(row, actual_feat_seq_range.GetFrom(),
628  }
629 
630  TSeqPos master_from =
631  av->GetSeqPosFromAlnPos(0, slave_aln_from, CAlnMap::eRight);
632 
633  TSeqPos master_to =
634  av->GetSeqPosFromAlnPos(0, slave_aln_to, CAlnMap::eLeft);
635 
636  CRange<TSeqPos> master_range(master_from, master_to);
637  master_feat_range.push_back(master_range);
638 
639  }
640  prev_exon_len += iter_temp->GetLength();
641  }
642 }
643 
644 
645 
646 ///return cds coded sequence and fill the id if found
647 ///@param genetic_code: the genetic code
648 ///@param feat: the feature containing this cds
649 ///@param scope: scope to fetch sequence
650 ///@param range: the range list of seqloc
651 ///@param handle: the bioseq handle
652 ///@param feat_strand: the feature strand
653 ///@param feat_id: the feature id to be filled
654 ///@param frame_adj: frame adjustment
655 ///@param mix_loc: is this seqloc mixed with other seqid?
656 ///@return: the encoded protein sequence
657 ///
658 static string s_GetCdsSequence(int genetic_code, CFeat_CI& feat,
659  CScope& scope, list<CRange<TSeqPos> >& range,
660  const CBioseq_Handle& handle,
661  ENa_strand feat_strand, string& feat_id,
662  TSeqPos frame_adj, bool mix_loc)
663 {
664  string raw_cdr_product = NcbiEmptyString;
665  if(feat->IsSetProduct() && feat->GetProduct().IsWhole() && !mix_loc){
666  //show actual aa if there is a cds product
667 
668  const CSeq_id& productId =
669  feat->GetProduct().GetWhole();
670  const CBioseq_Handle& productHandle
671  = scope.GetBioseqHandle(productId );
672  feat_id = "CDS:" +
673  CDeflineGenerator().GenerateDefline(productHandle).substr(0, k_FeatureIdLen);
674  productHandle.
675  GetSeqVector(CBioseq_Handle::eCoding_Iupac).
676  GetSeqData(0, productHandle.
677  GetBioseqLength(), raw_cdr_product);
678  } else {
679  CSeq_loc isolated_loc;
680  ITERATE(list<CRange<TSeqPos> >, iter, range){
681  TSeqPos from = iter->GetFrom();
682  TSeqPos to = iter->GetTo();
683  if(feat_strand == eNa_strand_plus){
684  isolated_loc.
685  Add(*(handle.GetRangeSeq_loc(from + frame_adj,
686  to,
687  feat_strand)));
688  } else {
689  isolated_loc.
690  Add(*(handle.GetRangeSeq_loc(from,
691  to - frame_adj,
692  feat_strand)));
693  }
694  }
695  if (genetic_code > 0) {
696  CGenetic_code gc;
698  ce->Select(CGenetic_code::C_E::e_Id);
699  ce->SetId(genetic_code);
700  gc.Set().push_back(ce);
701  isolated_loc.SetPartialStart(true, eExtreme_Biological);
702  isolated_loc.SetPartialStop (true, eExtreme_Biological);
703  CSeqTranslator::Translate(isolated_loc, handle.GetScope(),
704  raw_cdr_product, &gc);
705  }
706  }
707  return raw_cdr_product;
708 }
709 
710 ///fill the cds start positions (1 based)
711 ///@param line: the input cds line
712 ///@param concat_exon: exon only string
713 ///@param length_per_line: alignment length per line
714 ///@param feat_aln_start_totalexon: feature aln pos in concat_exon
715 ///@param strand: the alignment strand
716 ///@param start: start list to be filled
717 ///
718 static void s_FillCdsStartPosition(string& line, string& concat_exon,
719  size_t length_per_line,
720  TSeqPos feat_aln_start_totalexon,
721  ENa_strand seq_strand,
722  ENa_strand feat_strand,
723  list<TSeqPos>& start)
724 {
725  size_t actual_line_len = 0;
726  size_t aln_len = line.size();
727  TSeqPos previous_num_letter = 0;
728 
729  //the number of amino acids preceeding this exon start position
730  for (size_t i = 0; i <= feat_aln_start_totalexon; i ++){
731  if(feat_strand == eNa_strand_minus){
732  //remember the amino acid in this case goes backward
733  //therefore we count backward too
734 
735  int pos = concat_exon.size() -1 - i;
736  if(pos >= 0 && isalpha((unsigned char) concat_exon[pos])){
737  previous_num_letter ++;
738  }
739 
740  } else {
741  if(isalpha((unsigned char) concat_exon[i])){
742  previous_num_letter ++;
743  }
744  }
745  }
746 
747 
748  TSeqPos prev_num = 0;
749  //go through the entire feature line and get the amino acid position
750  //for each line
751  for(size_t i = 0; i < aln_len; i += actual_line_len){
752  //handle the last row which may be shorter
753  if(aln_len - i< length_per_line) {
754  actual_line_len = aln_len - i;
755  } else {
756  actual_line_len = length_per_line;
757  }
758  //the number of amino acids on this row
759  TSeqPos cur_num = 0;
760  bool has_intron = false;
761 
762  //go through each character on a row
763  for(size_t j = i; j < actual_line_len + i; j ++){
764  //don't count gap
765  if(isalpha((unsigned char) line[j])){
766  cur_num ++;
767  } else if(line[j] == k_IntronChar){
768  has_intron = true;
769  }
770  }
771 
772  if(cur_num > 0){
773  if(seq_strand == eNa_strand_plus){
774  if(feat_strand == eNa_strand_minus) {
775  start.push_back(previous_num_letter - prev_num);
776  } else {
777  start.push_back(previous_num_letter + prev_num);
778  }
779  } else {
780  if(feat_strand == eNa_strand_minus) {
781  start.push_back(previous_num_letter + prev_num);
782  } else {
783  start.push_back(previous_num_letter - prev_num);
784  }
785  }
786  } else if (has_intron) {
787  start.push_back(0); //sentinal for no show
788  }
789  prev_num += cur_num;
790  }
791 }
792 
793 ///make a new copy of master seq with feature info and return the scope
794 ///that contains this sequence
795 ///@param feat_range: the feature seqlocs
796 ///@param feat_seq_strand: the stand info
797 ///@param handle: the seq handle for the original master seq
798 ///@return: the scope containing the new master seq
799 ///
800 static CRef<CScope> s_MakeNewMasterSeq(list<list<CRange<TSeqPos> > >& feat_range,
801  list<ENa_strand>& feat_seq_strand,
802  const CBioseq_Handle& handle)
803 {
807  CRef<CScope> scope (new CScope(*obj));
808  scope->AddDefaults();
809  CRef<CBioseq> cbsp(new CBioseq());
810  cbsp->Assign(*(handle.GetCompleteBioseq()));
811 
812  CBioseq::TAnnot& anot_list = cbsp->SetAnnot();
813  CRef<CSeq_annot> anot(new CSeq_annot);
816  anot->SetData(*data);
817  CSeq_annot::TData::TFtable& ftable = anot->SetData().SetFtable();
818  int counter = 0;
819  ITERATE(list<list<CRange<TSeqPos> > >, iter, feat_range) {
820  counter ++;
821  CRef<CSeq_feat> seq_feat(new CSeq_feat);
822  CRef<CSeqFeatData> feat_data(new CSeqFeatData);
824  seq_feat->SetData(*feat_data);
825  seq_feat->SetComment("Putative " + NStr::IntToString(counter));
826  CRef<CSeq_loc> seq_loc (new CSeq_loc);
827 
828  ITERATE(list<CRange<TSeqPos> >, iter2, *iter) {
829  seq_loc->Add(*(handle.GetRangeSeq_loc(iter2->GetFrom(),
830  iter2->GetTo(),
831  feat_seq_strand.front())));
832  }
833  seq_feat->SetLocation(*seq_loc);
834  ftable.push_back(seq_feat);
835  feat_seq_strand.pop_front();
836  }
837  anot_list.push_back(anot);
838  CRef<CSeq_entry> entry(new CSeq_entry());
839  entry->SetSeq(*cbsp);
840  scope->AddTopLevelSeqEntry(*entry);
841 
842  return scope;
843 }
844 
845 //output feature lines
846 //@param reference_feat_line: the master feature line to be compared
847 //for coloring
848 //@param feat_line: the slave feature line
849 //@param color_feat_mismatch: color or not
850 //@param start: the alignment pos
851 //@param len: the length per line
852 //@param out: stream for output
853 //
854 static void s_OutputFeature(string& reference_feat_line,
855  string& feat_line,
856  bool color_feat_mismatch,
857  int start,
858  int len,
859  CNcbiOstream& out,
860  bool is_html)
861 {
862  if((int)feat_line.size() > start){
863  string actual_feat = feat_line.substr(start, len);
864  string actual_reference_feat = NcbiEmptyString;
865  if(reference_feat_line != NcbiEmptyString){
866  actual_reference_feat = reference_feat_line.substr(start, len);
867  }
868  if(color_feat_mismatch
869  && actual_reference_feat != NcbiEmptyString &&
870  !NStr::IsBlank(actual_reference_feat)){
871  string styledSequenceStr;
872  for(int i = 0; i < (int)actual_feat.size() &&
873  i < (int)actual_reference_feat.size(); i ++){
874  bool styledOutput = actual_feat[i] != actual_reference_feat[i] &&
875  (actual_feat[i] != ' ' && actual_feat[i] != k_IntronChar && actual_reference_feat[i] != k_IntronChar);
876  bool stopStyledOutput = (actual_feat[i] == actual_reference_feat[i]) && actual_feat[i] != ' ';
877  bool isStyled = s_ProcessStyledContent(actual_feat,i,styledOutput,stopStyledOutput, k_DefaultFeaturesTempl,styledSequenceStr,out);
878  if(!isStyled) out << actual_feat[i];
879  }
880  } else {
881  out << (is_html?CHTMLHelper::HTMLEncode(actual_feat):actual_feat);
882  }
883  }
884 
885 }
886 
887 
889  int row,
890  string& master_feat_str,
891  CNcbiOstream& out)
892 {
893  TSAlnFeatureInfoList& feature = alnRoInfo->bioseqFeature[row];
894  CAlnMap::TSignedRange alignment_range = alnRoInfo->currRange;
895  int aln_start = alnRoInfo->currPrintSegment;
896  int line_length = alnRoInfo->currActualLineLen;
897  int start_length = alnRoInfo->maxStartLen;
898  int id_length = alnRoInfo->maxIdLen;
899  if (alnRoInfo->show_align_stats) {
900  id_length += alnRoInfo->max_align_stats_len + k_AlignStatsMargin;
901  }
902  if (alnRoInfo->show_seq_property_label){
903  id_length += alnRoInfo->max_seq_property_label + k_SequencePropertyLabelMargin;
904  }
905  NON_CONST_ITERATE(TSAlnFeatureInfoList, iter, feature) {
906  //check blank string for cases where CDS is in range
907  //but since it must align with the 2nd codon and is
908  //actually not in range
909  if (alignment_range.IntersectingWith((*iter)->aln_range) &&
910  !(NStr::IsBlank((*iter)->feature_string.
911  substr(aln_start, line_length)) &&
915  string checkboxBuf = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox","");
916  out << checkboxBuf;
917  }
918  out<<(*iter)->feature->feature_id;
919  if((*iter)->feature_start.empty()){
921  AddSpace(out, id_length + k_IdStartMargin
922  +start_length + k_StartSequenceMargin
923  -(*iter)->feature->feature_id.size());
924  } else {
925  int feat_start = (*iter)->feature_start.front();
926  if(feat_start > 0){
928  AddSpace(out, id_length + k_IdStartMargin
929  -(*iter)->feature->feature_id.size());
930  out << feat_start;
932  AddSpace(out, start_length -
933  NStr::IntToString(feat_start).size() +
935  } else { //no show start
937  AddSpace(out, id_length + k_IdStartMargin
938  +start_length + k_StartSequenceMargin
939  -(*iter)->feature->feature_id.size());
940  }
941 
942  (*iter)->feature_start.pop_front();
943  }
944  bool color_cds_mismatch = false;
945  if((m_AlignOption & eHtml) &&
946  (m_AlignOption & eShowCdsFeature) && row > 0){
947  //only for slaves, only for cds feature
948  color_cds_mismatch = true;
949  } else if((m_AlignOption & eHtml) &&
952  //mostly for igblast
953  //only for slave
954  color_cds_mismatch = true;
955  }
956  s_OutputFeature(master_feat_str,
957  (*iter)->feature_string,
958  color_cds_mismatch, aln_start,
959  line_length, out, (m_AlignOption & eHtml));
960  if(row == 0){//set master feature as reference
961  master_feat_str = (*iter)->feature_string;
962  }
963  out<<"\n";
964  }
965  }
966 
967 }
968 
969 string CDisplaySeqalign::x_HTMLSeqIDLink(SAlnRowInfo *alnRoInfo, int row,TGi giToUse)
970 {
971  const CBioseq_Handle& bsp_handle = m_AV->GetBioseqHandle(row);
972  string urlLink = NcbiEmptyString;
973  const list<CRef<CSeq_id> >& ids = bsp_handle.GetBioseqCore()->GetId();
974  CAlignFormatUtil::SSeqURLInfo *seqUrlInfo = x_InitSeqUrl(giToUse,alnRoInfo->seqidArray[row],alnRoInfo->taxid[row],ids);
976  seqUrlInfo->defline = sequence::CDeflineGenerator().GenerateDefline(bsp_handle);
977  }
978  seqUrlInfo->useTemplates = true;
979  urlLink = CAlignFormatUtil::GetFullIDLink(seqUrlInfo,&ids);
980  delete seqUrlInfo;
981  return urlLink;
982 }
983 
985  TTaxId taxid,const list<CRef<CSeq_id> >& ids)
986 {
987  string idString = m_AV->GetSeqId(1).GetSeqIdString();
988  CRange<TSeqPos> range = (m_AlnLinksParams.count(idString) > 0 && m_AlnLinksParams[idString].subjRange) ?
989  CRange<TSeqPos>(m_AlnLinksParams[idString].subjRange->GetFrom() + 1,m_AlnLinksParams[idString].subjRange->GetTo() + 1) :
990  CRange<TSeqPos>(0,0);
991  bool flip = (m_AlnLinksParams.count(idString) > 0) ? m_AlnLinksParams[idString].flip : false;
992  string user_url= (!m_BlastType.empty()) ? m_Reg->Get(m_BlastType, "TOOL_URL") : "";
993  giToUse = (giToUse == ZERO_GI) ? CAlignFormatUtil::GetGiForSeqIdList(ids):giToUse;
996  giToUse,
997  accession,
998  0,// linkout not used any more in seqUrl
999  m_cur_align,
1000  true,
1001  (m_AlignOption & eNewTargetWindow) ? true : false,
1002  range,
1003  flip,
1004  taxid,
1005  (m_AlignOption & eShowInfoOnMouseOverSeqid) ? true : false);
1006  seqUrlInfo->resourcesUrl = (!m_BlastType.empty()) ? m_Reg->Get(m_BlastType, "RESOURCE_URL") : "";
1007  seqUrlInfo->advancedView = seqUrlInfo->useTemplates = m_AlignTemplates != NULL;
1008  return seqUrlInfo;
1009 }
1010 
1012  const list< CRef< CBlast_def_line > > &bdl_list,
1013  int lnkDispParams)
1014 {
1015  CAlignFormatUtil::SSeqURLInfo *seqUrlInfo = alnDispParams->seqUrlInfo;
1016  seqUrlInfo->hasTextSeqID = alnDispParams->hasTextSeqID;
1017  CRef<CSeq_id> seqID = alnDispParams->seqID;
1018  if(lnkDispParams & eDisplayResourcesLinks) {
1019  seqUrlInfo->segs = (lnkDispParams & eDisplayDownloadLink) ? x_GetSegs(1) : "";
1022  *seqID,
1023  m_Scope,
1024  customLinkTypes);
1025 
1027 
1028  //URL tp FASTA representation, includes genbank, trace and SNP
1030 
1031  //URL to FASTA for all regions
1033 
1034 
1035  if(m_AlignOption&eLinkout && (seqUrlInfo->hasTextSeqID)){
1037  m_LinkoutInfo.taxid = seqUrlInfo->taxid;
1038  m_LinkoutInfo.subjRange = seqUrlInfo->seqRange;
1039  if(bdl_list.size() > 0) {
1041  }
1042  else {
1044  }
1045 
1046  }
1047  }
1048 }
1049 
1050 
1051 void
1053 {
1054  ITERATE(TSeqLocInfoVector, sequence_masks, masks) {
1055  const CSeq_id& id = sequence_masks->front()->GetSeqId();
1056  m_SubjectMasks[id] = *sequence_masks;
1057  }
1058 }
1059 
1060 //align translation to 2nd base
1061 static string s_GetFinalTranslatedString(const CSeq_loc& loc, CScope& scope,
1062  int first_encoding_base, int align_length,
1063  const string& translation, const string& sequence,
1064  char gap_char){
1065 
1066  string feat(align_length, ' ');
1067  int num_base = 0;
1068  int j = 0;
1069 
1070  for (int i = first_encoding_base; i < (int) feat.size() &&
1071  j < (int)translation.size(); i ++) {
1072  if (sequence[i] != gap_char) {
1073  num_base ++;
1074 
1075  //aa residue to 2nd nuc position
1076  if (num_base%3 == 2) {
1077  feat[i] = translation[j];
1078  j ++;
1079  }
1080  }
1081  }
1082  return feat;
1083 }
1084 
1085 void CDisplaySeqalign::x_AddTranslationForLocalSeq(vector<TSAlnFeatureInfoList>& retval,
1086  vector<string>& sequence) const {
1087  if (m_AV->IsPositiveStrand(0) && m_AV->IsPositiveStrand(1)) {
1088 
1089  //find the first aln pos that both seq has no gaps for 3 consecutive pos.
1090  int non_gap_aln_pos = 0;
1091  CAlnVec::TResidue gap_char = m_AV->GetGapChar(0);
1092  int num_consecutive = 0;
1093  for (int i =0; i < (int) sequence[0].size(); i ++) {
1094  if (sequence[0][i] != gap_char &&
1095  sequence[1][i] != gap_char) {
1096 
1097  num_consecutive ++;
1098  if (num_consecutive >=3) {
1099  non_gap_aln_pos = i - 2;
1100  break;
1101  }
1102  } else {
1103  num_consecutive = 0;
1104  }
1105  }
1106 
1107 
1108  //master
1109  int master_frame_extra = m_AV->GetSeqPosFromAlnPos(0, non_gap_aln_pos)%3;
1110  int master_frame_start;
1111  //= m_AV->GetSeqPosFromSeqPos(0, 1, subject_frame_start);
1112  master_frame_start = m_AV->GetSeqPosFromAlnPos(0, non_gap_aln_pos) +
1113  (3 - (master_frame_extra - m_TranslatedFrameForLocalSeq))%3;
1114 
1115  CRef<CSeq_loc> master_loc(new CSeq_loc((CSeq_loc::TId &) m_AV->GetSeqId(0),
1116  master_frame_start,
1117  m_AV->GetSeqStop(0)));
1118  master_loc->SetPartialStart(true, eExtreme_Biological);
1119  master_loc->SetPartialStop (true, eExtreme_Biological);
1120  string master_translation;
1121  CSeqTranslator::Translate(*master_loc,
1122  m_Scope,
1123  master_translation);
1124  int master_first_encoding_base = m_AV->GetAlnPosFromSeqPos(0, master_frame_start);
1125  string master_feat = s_GetFinalTranslatedString(*master_loc, m_Scope,
1126  master_first_encoding_base,
1127  m_AV->GetAlnStop() + 1,
1128  master_translation,
1129  sequence[0], gap_char);
1130 
1131  CRef<SAlnFeatureInfo> master_featInfo(new SAlnFeatureInfo);
1132 
1133  x_SetFeatureInfo(master_featInfo, *master_loc, 0, m_AV->GetAlnStop(),
1134  m_AV->GetAlnStop(), ' ',
1135  " ", master_feat, -1);
1136 
1137  retval[0].push_back(master_featInfo);
1138 
1139  //subject
1140  int subject_frame_start = m_AV->GetSeqPosFromSeqPos(1, 0, master_frame_start);
1141 
1142  CRef<CSeq_loc> subject_loc(new CSeq_loc((CSeq_loc::TId &) m_AV->GetSeqId(1),
1143  (CSeq_loc::TPoint) subject_frame_start,
1144  (CSeq_loc::TPoint) m_AV->GetSeqStop(1)));
1145  subject_loc->SetPartialStart(true, eExtreme_Biological);
1146  subject_loc->SetPartialStop (true, eExtreme_Biological);
1147  string subject_translation;
1148  CSeqTranslator::Translate(*subject_loc,
1149  m_Scope,
1150  subject_translation);
1151  int subject_first_encoding_base = m_AV->GetAlnPosFromSeqPos(1, subject_frame_start);
1152  string subject_feat = s_GetFinalTranslatedString(*subject_loc, m_Scope,
1153  subject_first_encoding_base,
1154  m_AV->GetAlnStop() + 1,
1155  subject_translation,
1156  sequence[1], gap_char);
1157 
1158  CRef<SAlnFeatureInfo> subject_featInfo(new SAlnFeatureInfo);
1159 
1160  x_SetFeatureInfo(subject_featInfo, *subject_loc, 0, m_AV->GetAlnStop(),
1161  m_AV->GetAlnStop(), ' ',
1162  " ", subject_feat, -1);
1163 
1164  retval[1].push_back(subject_featInfo);
1165 
1166  }
1167 }
1168 
1169 //this is a special function to calculate pert_identity between master and a given row
1170 //for multiple alignment. Excluding leading and trailing gaps.
1171 void s_CalculateIdentity(const string& sequence_standard,
1172  const string& sequence , char gap_char,
1173  int& match, int& align_length){
1174  match = 0;
1175  align_length = 0;
1176  int start = 0;
1177  int end = (int)sequence.size() - 1;
1178  for(int i = 0; i < (int)sequence.size(); i++){
1179  if (sequence[i] != gap_char){
1180  start = i;
1181  break;
1182  }
1183  }
1184 
1185  for(int i = (int)sequence.size() - 1; i > 0; i--){
1186  if (sequence[i] != gap_char){
1187  end = i;
1188  break;
1189  }
1190  }
1191 
1192 
1193  for(int i = start; i <= end && i < (int)sequence.size() && i < (int)sequence_standard.size(); i++){
1194  if(sequence[i] == gap_char && sequence_standard[i] == gap_char) {
1195  //skip
1196  } else {
1197  if (sequence_standard[i]==sequence[i]){
1198  match ++;
1199  }
1200  align_length ++;
1201  }
1202  }
1203 }
1204 
1206 {
1207  size_t maxIdLen=0, maxStartLen=0;
1208  //, startLen=0, actualLineLen=0;
1209  //size_t aln_stop=m_AV->GetAlnStop();
1210  const int rowNum=m_AV->GetNumRows();
1212  m_AV->SetAnchor(0);
1213  }
1214  m_AV->SetGapChar('-');
1215 
1216  if (m_AlignOption & eShowEndGaps) {
1217  m_AV->SetEndChar('-');
1218  }
1219  else {
1220  m_AV->SetEndChar(' ');
1221  }
1222  vector<string> sequence(rowNum);
1223  vector<CAlnMap::TSeqPosList> seqStarts(rowNum);
1224  vector<CAlnMap::TSeqPosList> seqStops(rowNum);
1225  vector<CAlnMap::TSeqPosList> insertStart(rowNum);
1226  vector<CAlnMap::TSeqPosList> insertAlnStart(rowNum);
1227  vector<CAlnMap::TSeqPosList> insertLength(rowNum);
1228  vector<string> seqidArray(rowNum);
1229  string middleLine;
1230  vector<CAlnMap::TSignedRange> rowRng(rowNum);
1231  vector<int> frame(rowNum);
1232  vector<TTaxId> taxid(rowNum);
1233  int max_feature_num = 0;
1234  vector<int> match(rowNum-1);
1235  vector<double> percent_ident(rowNum-1);
1236  vector<int> align_length(rowNum-1);
1237  vector<string> align_stats(rowNum-1);
1238  vector<string> seq_property_label(rowNum-1);
1239  int max_align_stats = 0;
1240  int max_seq_property_label = 0;
1241 
1242  //Add external query feature info such as phi blast pattern
1243  vector<TSAlnFeatureInfoList> bioseqFeature;
1244  x_GetQueryFeatureList(rowNum, (int)m_AV->GetAlnStop(), bioseqFeature);
1245  if(m_DomainInfo && !m_DomainInfo->empty()){
1246  x_GetDomainInfo(rowNum, (int)m_AV->GetAlnStop(), bioseqFeature);
1247  }
1248  _ASSERT((int)bioseqFeature.size() == rowNum);
1249  // Mask locations for queries (first elem) and subjects (all other rows)
1250  vector<TSAlnSeqlocInfoList> masked_regions(rowNum);
1251  x_FillLocList(masked_regions[0], m_Seqloc);
1252 
1253  for (int row = 1; row < rowNum; row++) {
1254  const CSeq_id& id = m_AV->GetSeqId(row);
1255  x_FillLocList(masked_regions[row], &m_SubjectMasks[id]);
1256  }
1257 
1258  //prepare data for each row
1259  list<list<CRange<TSeqPos> > > feat_seq_range;
1260  list<ENa_strand> feat_seq_strand;
1261 
1262  for (int row=0; row<rowNum; row++) {
1263 
1264  string type_temp = m_BlastType;
1265  type_temp = NStr::TruncateSpaces(NStr::ToLower(type_temp));
1266  if((m_AlignTemplates == NULL && (type_temp == "mapview" || type_temp == "mapview_prev")) ||
1267  type_temp == "gsfasta" || type_temp == "gsfasta_prev"){
1268  taxid[row] = CAlignFormatUtil::GetTaxidForSeqid(m_AV->GetSeqId(row),
1269  m_Scope);
1270  } else if ((m_AlignOption & eHtml) && m_AV->GetSeqId(row).Which() == CSeq_id::e_Local && row > 0){
1271  //this is for adding url for local seqid, for example igblast db.
1272  taxid[row] = CAlignFormatUtil::GetTaxidForSeqid(m_AV->GetSeqId(row),
1273  m_Scope);
1274  } else {
1275  taxid[row] = ZERO_TAX_ID;
1276  }
1277  rowRng[row] = m_AV->GetSeqAlnRange(row);
1278  frame[row] = (m_AV->GetWidth(row) == 3 ?
1279  s_GetFrame(m_AV->IsPositiveStrand(row) ?
1280  m_AV->GetSeqStart(row) :
1281  m_AV->GetSeqStop(row),
1282  m_AV->IsPositiveStrand(row) ?
1284  m_AV->GetSeqId(row), m_Scope) : 0);
1285  //make sequence
1286  m_AV->GetWholeAlnSeqString(row, sequence[row], &insertAlnStart[row],
1287  &insertStart[row], &insertLength[row],
1288  (int)m_LineLen, &seqStarts[row], &seqStops[row]);
1290  m_AlignOption&eMergeAlign && m_AV->GetWidth(row) != 3) {
1291 
1292  s_CalculateIdentity(sequence[0], sequence[row], m_AV->GetGapChar(row),
1293  match[row-1], align_length[row-1]);
1294 
1295  if (align_length[row-1] > 0 ){
1296  percent_ident[row-1] = ((double)match[row-1])/align_length[row-1]*100;
1297  align_stats[row-1] = NStr::DoubleToString(percent_ident[row-1], 1, 0) +
1298  "% (" + NStr::IntToString(match[row-1]) + "/" +
1299  NStr::IntToString(align_length[row-1]) + ")" ;
1300  } else {//something is wrong
1301  percent_ident[row - 1] = 0;
1302  align_stats[row-1] = "0";
1303  }
1304 
1305  max_align_stats = max(max_align_stats,
1306  (int)align_stats[row-1].size());
1307  }
1308 
1309  //seq property label
1310  if(row > 0 &&
1312  m_AlignOption&eMergeAlign && m_AV->GetWidth(row) != 3) {
1313 
1314  if((int)m_SeqPropertyLabel->size() >= row -1){
1315  seq_property_label[row-1] = (*m_SeqPropertyLabel)[row]; //skip the first one which is for query
1316  } else {//something is wrong
1317  seq_property_label[row-1] = NcbiEmptyString;
1318  }
1319 
1320  max_seq_property_label = max(max_seq_property_label,
1321  (int)seq_property_label[row-1].size());
1322  }
1323 
1325  && m_AV->GetWidth(row) != 3
1326  && !(m_AlignType & eProt)) {
1327  x_AddTranslationForLocalSeq(bioseqFeature, sequence);
1328  }
1329  //make feature. Only for pairwise and untranslated for subject nuc seq
1330  if(!(m_AlignOption & eMasterAnchored) &&
1331  !(m_AlignOption & eMergeAlign) && m_AV->GetWidth(row) != 3 &&
1332  !(m_AlignType & eProt)){
1334  TGi master_gi = FindGi(m_AV->GetBioseqHandle(0).
1335  GetBioseqCore()->GetId());
1336  x_GetFeatureInfo(bioseqFeature[row], -1, *m_featScope,
1337  CSeqFeatData::e_Cdregion, row, sequence[row],
1338  feat_seq_range, feat_seq_strand,
1339  row == 1 && !(master_gi > ZERO_GI) ? true : false);
1340 
1341  if(!(feat_seq_range.empty()) && row == 1) {
1342  //make a new copy of master bioseq and add the feature from
1343  //slave to make putative cds feature
1344  CRef<CScope> master_scope_with_feat =
1345  s_MakeNewMasterSeq(feat_seq_range, feat_seq_strand,
1346  m_AV->GetBioseqHandle(0));
1347  int custom_genetic_code = -1;
1348  NON_CONST_ITERATE(CDisplaySeqalign::TSAlnFeatureInfoList, iter_feat, bioseqFeature[1]) {
1349  if ((*iter_feat)->genetic_code > 0) {
1350  custom_genetic_code = (*iter_feat)->genetic_code;
1351  break;
1352  }
1353  }
1354 
1355  //make feature string for master bioseq
1356  list<list<CRange<TSeqPos> > > temp_holder;
1357  x_GetFeatureInfo(bioseqFeature[0], custom_genetic_code, *master_scope_with_feat,
1358  CSeqFeatData::e_Cdregion, 0, sequence[0],
1359  temp_holder, feat_seq_strand, false);
1360  }
1361  }
1363  x_GetFeatureInfo(bioseqFeature[row], -1, *m_featScope,
1364  CSeqFeatData::e_Gene, row, sequence[row],
1365  feat_seq_range, feat_seq_strand, false);
1366  }
1367  }
1368  //make id
1369  x_FillSeqid(seqidArray[row], row);
1370  maxIdLen=max<size_t>(seqidArray[row].size(), maxIdLen);
1371  size_t maxCood=max<size_t>(m_AV->GetSeqStart(row), m_AV->GetSeqStop(row));
1372  maxStartLen = max<size_t>(NStr::SizetToString(maxCood).size(), maxStartLen);
1373  }
1374  for(int i = 0; i < rowNum; i ++){//adjust max id length for feature id
1375  int num_feature = 0;
1376  ITERATE(TSAlnFeatureInfoList, iter, bioseqFeature[i]) {
1377  maxIdLen=max<size_t>((*iter)->feature->feature_id.size(), maxIdLen);
1378  num_feature ++;
1379  if(num_feature > max_feature_num){
1380  max_feature_num = num_feature;
1381  }
1382  }
1383  } //end of preparing row data
1384  SAlnRowInfo *alnRoInfo = new SAlnRowInfo();
1385  alnRoInfo->sequence = sequence;
1386  alnRoInfo->seqStarts = seqStarts;
1387  alnRoInfo->seqStops = seqStops;
1388  alnRoInfo->insertStart = insertStart;
1389  alnRoInfo->insertAlnStart = insertAlnStart;
1390  alnRoInfo->insertLength = insertLength;
1391  alnRoInfo->seqidArray = seqidArray;
1392  alnRoInfo->middleLine = middleLine;
1393  alnRoInfo->rowRng = rowRng;
1394  alnRoInfo->frame = frame;
1395  alnRoInfo->taxid = taxid;
1396  alnRoInfo->bioseqFeature = bioseqFeature;
1397  alnRoInfo->masked_regions = masked_regions;
1398  alnRoInfo->seqidArray = seqidArray;
1399  alnRoInfo->maxIdLen = maxIdLen;
1400  alnRoInfo->maxStartLen = maxStartLen;
1401  alnRoInfo->max_feature_num = max_feature_num;
1402  alnRoInfo->colorMismatch = false;
1403  alnRoInfo->rowNum = rowNum;
1404  alnRoInfo->match = match;
1405  alnRoInfo->percent_ident = percent_ident;
1406  alnRoInfo->align_length = align_length;
1407  alnRoInfo->align_stats = align_stats;
1408  alnRoInfo->max_align_stats_len=max_align_stats;
1409  alnRoInfo->seq_property_label = seq_property_label;
1410  alnRoInfo->max_seq_property_label = max_seq_property_label;
1411  return alnRoInfo;
1412 }
1413 //uses m_AV m_LineLen m_AlignOption m_QueryNumber
1415 {
1416  size_t aln_stop=m_AV->GetAlnStop();
1417  int rowNum = alnRoInfo->rowNum;
1418  vector<int> prev_stop(rowNum);
1420 
1421  //only for untranslated alignment
1424  m_AV->GetWidth(0) != 3 && m_AV->GetWidth(1) != 3) ? true : false;
1425 
1426  //only for untranslated alignment
1429  m_AV->GetWidth(0) != 3 && m_AV->GetWidth(1) != 3) ? true : false;
1430 
1431  //output rows
1432  string formattedString;
1433  for(int j=0; j<=(int)aln_stop; j+=(int)m_LineLen){
1434  string rowdata = x_DisplayRowDataSet(alnRoInfo,j, prev_stop);
1435  formattedString += rowdata;
1436  }//end of displaying rows
1437  return formattedString;
1438 }
1439 
1441 {
1442  size_t aln_stop=m_AV->GetAlnStop();
1443  int rowNum = alnRoInfo->rowNum;
1444  vector<int> prev_stop(rowNum);
1445 
1446  //only for untranslated alignment
1449  m_AV->GetWidth(0) != 3 && m_AV->GetWidth(1) != 3) ? true : false;
1450 
1451  //only for untranslated alignment
1454  m_AV->GetWidth(0) != 3 && m_AV->GetWidth(1) != 3) ? true : false;
1455  int rowSetsCount = 1;
1456  //output rows
1457  for(int j=0; j<=(int)aln_stop; j+=(int)m_LineLen){
1458  //Used for download query range specified by m_QueryAnchoredSetIndex
1459  //Until m_QueryAnchoredSetIndex==rowSetsCount do not display alignment just calculate prev_stop, etc.
1460  if(m_QueryAnchoredSetIndex != -1 && m_QueryAnchoredSetIndex != rowSetsCount) {
1461  x_ProcessRowDataSet(alnRoInfo,j, prev_stop);
1462  }
1463  else {
1464  string rowdata = x_DisplayRowDataSet(alnRoInfo,j, prev_stop);
1465  if(m_AlignTemplates && !m_AlignTemplates->alignQueryAnchTempl.empty()) {//Templates will be used for query anchored display
1467  rowdata = CAlignFormatUtil::MapTemplate(rowdata,"currQueryAnchSet",NStr::IntToString(rowSetsCount));
1468  rowdata = CAlignFormatUtil::MapTemplate(rowdata,"nextQueryAnchSet",NStr::IntToString(rowSetsCount + 1));
1469  rowdata = CAlignFormatUtil::MapTemplate(rowdata,"prevQueryAnchSet",NStr::IntToString(rowSetsCount - 1));
1470  rowdata = CAlignFormatUtil::MapTemplate(rowdata,"fromQueryRange",NStr::IntToString(j + 1));
1471  rowdata = CAlignFormatUtil::MapTemplate(rowdata,"toQueryRange",NStr::IntToString(j + alnRoInfo->currActualLineLen));
1472  }
1473  out << rowdata;
1474  }
1475  rowSetsCount++;
1476  }//end of displaying rows
1477 }
1478 
1479 
1480 
1481 string CDisplaySeqalign::x_DisplayRowDataSet(SAlnRowInfo *alnRoInfo,int aln_start, vector<int> &prev_stop)
1482 {
1483  size_t actualLineLen=0;
1484  string master_feat_str = NcbiEmptyString;
1485  size_t aln_stop=m_AV->GetAlnStop();
1486 
1487  int rowNum = alnRoInfo->rowNum;
1489 
1490 
1491  //output according to aln coordinates
1492  if(aln_stop-aln_start+1<m_LineLen) {
1493  actualLineLen=aln_stop-aln_start+1;
1494  } else {
1495  actualLineLen=m_LineLen;
1496  }
1497  CAlnMap::TSignedRange curRange(aln_start, aln_start+(int)actualLineLen-1);
1498  alnRoInfo->currPrintSegment = aln_start;
1499  alnRoInfo->currActualLineLen = actualLineLen;
1500  alnRoInfo->currRange = curRange;
1501  //here is each row
1502  for (int row=0; row<rowNum; row++) {
1503  bool hasSequence = true;
1504  if (!(m_AlignOption & eShowGapOnlyLines)) {
1505  hasSequence = curRange.IntersectingWith(alnRoInfo->rowRng[row]);
1506  }
1507  //only output rows that have sequence
1508  if (hasSequence){
1509  int end = alnRoInfo->seqStops[row].front() + 1;
1510  bool has_mismatch = false;
1511  //change the alignment line to identity style
1512  if (row>0 && m_AlignOption & eShowIdentity){//check usage - pairwise - only
1513  for (int index = aln_start; index < aln_start + (int)actualLineLen &&
1514  index < (int)alnRoInfo->sequence[row].size(); index ++){
1515  if (alnRoInfo->sequence[row][index] == alnRoInfo->sequence[0][index] &&
1516  isalpha((unsigned char) alnRoInfo->sequence[row][index])) {
1517  alnRoInfo->sequence[row][index] = k_IdentityChar;
1518  } else if (!has_mismatch) {
1519  has_mismatch = true;
1520  }
1521  }
1522  }
1523  //feature for query
1524  if(row == 0){
1525  x_PrintFeatures(alnRoInfo, row, master_feat_str, out);
1526  }
1529  }
1530  else {
1531  x_DisplaySequenceIDForPairwise(alnRoInfo,row,has_mismatch,out);
1532  }
1533  //print out sequence line
1534  x_DisplaySequenceLine(alnRoInfo, row, prev_stop[row], out);
1536  //inserts for anchored view
1538  }
1539  //display subject sequence feature.
1540  if(row > 0){
1541  x_PrintFeatures(alnRoInfo, row, master_feat_str, out);
1542  }
1543  //display middle line for pairwise
1544  if (row == 0 && ((m_AlignOption & eShowMiddleLine)) && !(m_AlignOption&eMergeAlign)) {
1545  x_DisplayMiddLine(alnRoInfo, row,out);
1546  }
1547  prev_stop[row] = end;
1548  }
1549  if(!alnRoInfo->seqStarts[row].empty()){ //shouldn't need this check
1550  alnRoInfo->seqStarts[row].pop_front();
1551  }
1552  if(!alnRoInfo->seqStops[row].empty()){
1553  alnRoInfo->seqStops[row].pop_front();
1554  }
1555  }//end of displaying rows
1556  out<<"\n";
1557  string formattedString = CNcbiOstrstreamToString(out);
1558  return formattedString;
1559 }
1560 
1561 void CDisplaySeqalign::x_ProcessRowDataSet(SAlnRowInfo *alnRoInfo,int aln_start, vector<int> &prev_stop)
1562 {
1563  size_t actualLineLen=0;
1564  string master_feat_str = NcbiEmptyString;
1565  size_t aln_stop=m_AV->GetAlnStop();
1566 
1567  int rowNum = alnRoInfo->rowNum;
1568 
1569 
1570  //output according to aln coordinates
1571  if(aln_stop-aln_start+1<m_LineLen) {
1572  actualLineLen=aln_stop-aln_start+1;
1573  } else {
1574  actualLineLen=m_LineLen;
1575  }
1576  CAlnMap::TSignedRange curRange(aln_start, aln_start+(int)actualLineLen-1);
1577  alnRoInfo->currPrintSegment = aln_start;
1578  alnRoInfo->currActualLineLen = actualLineLen;
1579  alnRoInfo->currRange = curRange;
1580  //here is each row
1581  for (int row=0; row<rowNum; row++) {
1582  bool hasSequence = true;
1583  if (!(m_AlignOption & eShowGapOnlyLines)) {
1584  hasSequence = curRange.IntersectingWith(alnRoInfo->rowRng[row]);
1585  }
1586  //only output rows that have sequence
1587  if (hasSequence){
1588  int end = alnRoInfo->seqStops[row].front() + 1;
1589  prev_stop[row] = end;
1590  }
1591  if(!alnRoInfo->seqStarts[row].empty()){ //shouldn't need this check
1592  alnRoInfo->seqStarts[row].pop_front();
1593  }
1594  if(!alnRoInfo->seqStops[row].empty()){
1595  alnRoInfo->seqStops[row].pop_front();
1596  }
1597  }//end of displaying rows
1598 }
1599 
1601 {
1602  size_t startLen=0;
1603  int start = alnRoInfo->seqStarts[row].front() + 1; //+1 for 1 based
1604  int end = alnRoInfo->seqStops[row].front() + 1;
1605  int j = alnRoInfo->currPrintSegment;
1606  int actualLineLen = alnRoInfo->currActualLineLen;
1607  //print out sequence line
1608  //adjust space between id and start
1609  CAlignFormatUtil::AddSpace(out, alnRoInfo->maxIdLen-alnRoInfo->seqidArray[row].size() + k_IdStartMargin);
1610  //not to display start and stop number for empty row
1611  if ((j > 0 && end == prev_stop)
1612  || (j == 0 && start == 1 && end == 1)) {
1613  startLen = 0;
1614  } else {
1615  out << start;
1616  startLen=NStr::IntToString(start).size();
1617  }
1618 
1620  x_OutputSeq(alnRoInfo->sequence[row], m_AV->GetSeqId(row), j,
1621  (int)actualLineLen, alnRoInfo->frame[row], row,
1622  (row > 0 && alnRoInfo->colorMismatch)?true:false,
1623  alnRoInfo->masked_regions[row], out);
1625 
1626  //not to display stop number for empty row in the middle
1627  if (!(j > 0 && end == prev_stop)
1628  && !(j == 0 && start == 1 && end == 1)) {
1629  out << end;
1630  }
1631  out<<"\n";
1632 }
1633 
1635 {
1636  list<string> inserts;
1637  string insertPosString; //the one with "\" to indicate insert
1638  TSInsertInformationList insertList;
1639  int j = alnRoInfo->currPrintSegment;
1640  CAlnMap::TSignedRange curRange = alnRoInfo->currRange;
1641  x_GetInserts(insertList, alnRoInfo->insertAlnStart[row],
1642  alnRoInfo->insertStart[row], alnRoInfo->insertLength[row],
1643  j + (int)m_LineLen);
1644  x_FillInserts(row, curRange, j, inserts, insertPosString, insertList);
1645  bool insertAlready = false;
1646  for(list<string>::iterator iter = inserts.begin();
1647  iter != inserts.end(); iter ++){
1648  if(!insertAlready){
1651  string checkboxBuf = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox","");
1652  out << checkboxBuf;
1653  }
1654 
1655  int base_margin = alnRoInfo->maxIdLen + k_IdStartMargin + alnRoInfo->maxStartLen + k_StartSequenceMargin;
1656 
1657  if (alnRoInfo->show_align_stats) {
1658  base_margin += alnRoInfo->max_align_stats_len + k_AlignStatsMargin;
1659  }
1660  if (alnRoInfo->show_seq_property_label){
1661  base_margin += alnRoInfo->max_seq_property_label + k_SequencePropertyLabelMargin;
1662  }
1663  CAlignFormatUtil::AddSpace(out, base_margin);
1664  out << insertPosString<<"\n";
1665  }
1667  string checkboxBuf = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox","");
1668  out << checkboxBuf;
1669  }
1670  int base_margin = alnRoInfo->maxIdLen + k_IdStartMargin + alnRoInfo->maxStartLen + k_StartSequenceMargin;
1671 
1672  if (alnRoInfo->show_align_stats) {
1673  base_margin += alnRoInfo->max_align_stats_len + k_AlignStatsMargin;
1674  }
1675  if (alnRoInfo->show_seq_property_label){
1676  base_margin += alnRoInfo->max_seq_property_label + k_SequencePropertyLabelMargin;
1677  }
1678  CAlignFormatUtil::AddSpace(out, base_margin);
1679  out<<*iter<<"\n";
1680  insertAlready = true;
1681  }
1682 }
1683 
1685 {
1686  //highlight the seqid for pairwise-with-identity format
1688  && m_AlignOption&eShowIdentity && has_mismatch &&
1690  //highlight the seqid for pairwise-with-identity format
1691  string alnStr = CAlignFormatUtil::MapTemplate(k_DefaultPairwiseWithIdntTempl,"alndata",alnRoInfo->seqidArray[row]);
1692  out<< alnStr;
1693  }
1694  else {
1695  out<<alnRoInfo->seqidArray[row];
1696  }
1697 }
1698 
1700 {
1701  string urlLink = NcbiEmptyString;
1702  //setup url link for seqid
1703  TGi gi = ZERO_GI;
1704  if(m_AlignOption & eHtml){
1705  if(m_AV->GetSeqId(row).Which() == CSeq_id::e_Gi){
1706  gi = m_AV->GetSeqId(row).GetGi();
1707  }
1708  if(!(gi > ZERO_GI)){
1709  gi = CAlignFormatUtil::GetGiForSeqIdList(m_AV->GetBioseqHandle(row).
1710  GetBioseqCore()->GetId());
1711  }
1712  string anchorTmpl,checkBoxTmpl,id_lbl;
1713  bool showAnchor = (row == 0 && (m_AlignOption & eHyperLinkMasterSeqid)) || (row > 0 && (m_AlignOption & eHyperLinkSlaveSeqid));
1714  bool showCheckbox = ((m_AlignOption & eMergeAlign) && (m_AlignOption & eSequenceRetrieval) && m_CanRetrieveSeq) ||
1716  if(showAnchor){
1718  if (m_ResultPositionIndex >= 0){
1719  anchorTmpl = CAlignFormatUtil::MapTemplate(anchorTmpl,"resultPositionIndex",m_ResultPositionIndex);
1720  }
1721  anchorTmpl = CAlignFormatUtil::MapTemplate(anchorTmpl,"id_lbl",gi > ZERO_GI ?
1722  NStr::NumericToString(gi):alnRoInfo->seqidArray[row]);
1723  }
1724  //get sequence checkbox
1725  if(showCheckbox) {
1726  checkBoxTmpl = !(m_AlignOption & eShowCheckBox) ?
1728 
1729  checkBoxTmpl = CAlignFormatUtil::MapTemplate(k_DefaultSpaceMaintainerTempl,"chkbox",checkBoxTmpl);
1730  checkBoxTmpl = CAlignFormatUtil::MapTemplate(checkBoxTmpl,"queryNumber",NStr::IntToString(m_QueryNumber));
1732  const CRef<CSeq_id> seqID = FindBestChoice(m_AV->GetBioseqHandle(row).GetBioseqCore()->GetId(), CSeq_id::WorstRank);
1734  if(seqID->IsLocal()) {
1735  id_lbl = "lcl|" + id_lbl;
1736  }
1737  }
1738  }
1739  if(showCheckbox || showAnchor) {
1740  id_lbl = id_lbl.empty() ? ((gi > ZERO_GI) ? NStr::NumericToString(gi) : alnRoInfo->seqidArray[row]) : id_lbl;
1741  string displString = CAlignFormatUtil::MapTemplate(anchorTmpl + checkBoxTmpl,"id_lbl",id_lbl);
1742  out << displString;
1743  }
1744  }
1745 
1746  if(alnRoInfo->show_seq_property_label){
1747  if (row > 0){
1748 
1749  out<<alnRoInfo->seq_property_label[row-1];
1751  (int)alnRoInfo->seq_property_label[row-1].size() + k_SequencePropertyLabelMargin);
1752  } else {
1754  }
1755  }
1756 
1757  if(alnRoInfo->show_align_stats){
1758  if (row > 0){
1759  out<<alnRoInfo->align_stats[row-1];
1761  (int)alnRoInfo->align_stats[row-1].size() + k_AlignStatsMargin);
1762  } else {
1764  }
1765  }
1766  if(m_AlignOption & eHtml){
1767  if((row == 0 && (m_AlignOption & eHyperLinkMasterSeqid)) ||
1768  (row > 0 && (m_AlignOption & eHyperLinkSlaveSeqid))){
1769  m_cur_align = row;
1770  urlLink = x_HTMLSeqIDLink(alnRoInfo, row,gi);
1771  }
1772  }
1773  if(!urlLink.empty()) {
1774  out << urlLink;
1775  }
1776  else {
1777  out<<alnRoInfo->seqidArray[row];
1778  }
1779 }
1780 
1782 {
1783  int j = alnRoInfo->currPrintSegment;
1784  int actualLineLen = alnRoInfo->currActualLineLen;
1785  CSeq_id no_id;
1787  x_OutputSeq(alnRoInfo->middleLine, no_id, j, (int)actualLineLen, 0, row, false, alnRoInfo->masked_regions[row], out);
1788  out<<"\n";
1789 }
1790 
1792 {
1793  size_t aln_stop=m_AV->GetAlnStop();
1794 
1795  aln_vec_info->match = 0;
1796  aln_vec_info->positive = 0;
1797  aln_vec_info->gap = 0;
1798  aln_vec_info->identity = 0;
1799  x_FillIdentityInfo(aln_vec_info->alnRowInfo->sequence[0],
1800  aln_vec_info->alnRowInfo->sequence[1],
1801  aln_vec_info->match,
1802  aln_vec_info->positive,
1803  aln_vec_info->alnRowInfo->middleLine);
1805  aln_vec_info->identity = CAlignFormatUtil::GetPercentMatch(aln_vec_info->match, (int)aln_stop+1);
1806  if(aln_vec_info->identity >= k_ColorMismatchIdentity && aln_vec_info->identity <100 &&
1808  aln_vec_info->alnRowInfo->colorMismatch = true;
1809  }
1810  aln_vec_info->gap = x_GetNumGaps();
1811  }
1812 }
1813 
1815 {
1816  SAlnRowInfo *alnRoInfo = x_PrepareRowData();
1817 
1818  x_DisplayRowData(alnRoInfo,out);
1819  delete alnRoInfo;
1820 }
1821 
1823 {
1824 
1825  //make alnvector
1826  CRef<CAlnVec> avRef;
1827  CConstRef<CSeq_align> finalAln;
1828  if (align.GetSegs().Which() == CSeq_align::C_Segs::e_Std) {
1829  CRef<CSeq_align> densegAln = align.CreateDensegFromStdseg();
1831  finalAln = densegAln->CreateTranslatedDensegFromNADenseg();
1832  } else {
1833  finalAln = densegAln;
1834  }
1835  } else if(align.GetSegs().Which() ==
1838  finalAln = align.CreateTranslatedDensegFromNADenseg();
1839  } else {
1840  finalAln = &align;
1841  }
1842  } else if(align.GetSegs().Which() ==
1844  CRef<CSeq_align> densegAln =
1847  finalAln = densegAln->CreateTranslatedDensegFromNADenseg();
1848  } else {
1849  finalAln = densegAln;
1850  }
1851  } else {
1853  "Seq-align should be Denseg, Stdseg or Dendiag!");
1854  }
1855  CRef<CDense_seg> finalDenseg(new CDense_seg);
1856  const CTypeConstIterator<CDense_seg> ds = ConstBegin(*finalAln);
1857  if((ds->IsSetStrands()
1858  && ds->GetStrands().front()==eNa_strand_minus)
1859  && !(ds->IsSetWidths() && ds->GetWidths()[0] == 3)){
1860  //show plus strand if master is minus for non-translated case
1861  finalDenseg->Assign(*ds);
1862  finalDenseg->Reverse();
1863  avRef = new CAlnVec(*finalDenseg, m_Scope);
1864  } else {
1865  avRef = new CAlnVec(*ds, m_Scope);
1866  }
1867 
1869 
1870  return avRef;
1871 }
1872 
1873 //inits m_FeatObj,m_featScope,m_CanRetrieveSeq,m_ConfigFile,m_Reg,m_LinkoutOrder,m_DynamicFeature
1875 {
1876  //scope for feature fetching
1879  & eShowGeneFeature)){
1882  m_featScope = new CScope(*m_FeatObj); //for seq feature fetch
1883  string name = CGBDataLoader::GetLoaderNameFromArgs();
1884  m_featScope->AddDataLoader(name);
1885  }
1888  //set config file
1889  m_ConfigFile = new CNcbiIfstream(".ncbirc");
1891 
1892  if(!m_BlastType.empty()) m_LinkoutOrder = m_Reg->Get(m_BlastType,"LINKOUT_ORDER");
1894 
1895  string feat_file = m_Reg->Get("FEATURE_INFO", "FEATURE_FILE");
1896  string feat_file_index = m_Reg->Get("FEATURE_INFO",
1897  "FEATURE_FILE_INDEX");
1898  if(feat_file != NcbiEmptyString && feat_file_index != NcbiEmptyString){
1899  m_DynamicFeature = new CGetFeature(feat_file, feat_file_index);
1900  }
1901  }
1902  if(m_AlignOption&eLinkout) {
1903  string user_url = (!m_BlastType.empty()) ? m_Reg->Get(m_BlastType, "TOOL_URL") : "";
1904 
1905 
1909 
1910  CRef<CSeq_id> wid = FindBestChoice(m_Scope.GetBioseqHandle(actual_aln_list.Get().front()->GetSeq_id(0)).GetBioseqCore()->GetId(), CSeq_id::WorstRank);
1912  }
1913 }
1914 
1916 {
1917  CSeq_align_set actual_aln_list;
1919  *m_SeqalignSetRef);
1920  if (actual_aln_list.Get().empty()){
1921  return;
1922  }
1923 
1924  //inits m_FeatObj,m_featScope,m_CanRetrieveSeq,m_ConfigFile,m_Reg,m_LinkoutOrder,m_DynamicFeature
1925  x_InitAlignParams(actual_aln_list);
1926 
1927  //get sequence
1929  out<<"<form name=\"getSeqAlignment"<<m_QueryNumber<<"\">\n";
1930  }
1931  //begin to display
1932  int num_align = 0;
1934  m_currAlignHsp = 0;
1935  unique_ptr<CObjectOStream> out2(CObjectOStream::Open(eSerial_AsnText, out));
1936  //*out2 << *m_SeqalignSetRef;
1937  //get segs first and get hsp number - m_segs,m_Hsp,m_subjRange
1938  x_PreProcessSeqAlign(actual_aln_list);
1939  if(!(m_AlignOption&eMergeAlign)){
1940  /*pairwise alignment. Note we can't just show each alnment as we go
1941  because we will need seg information form all hsp's with the same id
1942  for genome url link. As a result we show hsp's with the same id
1943  as a group*/
1944 
1945  CConstRef<CSeq_id> previousId, subid;
1946  for (CSeq_align_set::Tdata::const_iterator
1947  iter = actual_aln_list.Get().begin();
1948  iter != actual_aln_list.Get().end()
1949  && num_align<m_NumAlignToShow; iter++, num_align++) {
1950 
1951  //make alnvector
1952  CRef<CAlnVec> avRef = x_GetAlnVecForSeqalign(**iter);
1953 
1954  if(!(avRef.Empty())){
1955  //Note: do not switch the set order per calnvec specs.
1957  avRef->SetGenCode(m_MasterGeneticCode, 0);
1958  try{
1959  const CBioseq_Handle& handle = avRef->GetBioseqHandle(1);
1960  if(handle){
1961 
1962  //save the current alnment regardless
1963  CRef<SAlnInfo> alnvecInfo(new SAlnInfo);
1964  int num_ident;
1966  alnvecInfo->score,
1967  alnvecInfo->bits,
1968  alnvecInfo->evalue,
1969  alnvecInfo->sum_n,
1970  num_ident,
1971  alnvecInfo->use_this_seqid,
1972  alnvecInfo->comp_adj_method);
1973  alnvecInfo->alnvec = avRef;
1974 
1975  subid=&(avRef->GetSeqId(1));
1976  bool showDefLine = previousId.Empty() || !subid->Match(*previousId);
1977  x_DisplayAlnvecInfo(out, alnvecInfo,showDefLine);
1978 
1979  previousId = subid;
1980  }
1981  } catch (const CException&){
1982  out << "Sequence with id "
1983  << (avRef->GetSeqId(1)).GetSeqIdString().c_str()
1984  <<" no longer exists in database...alignment skipped\n";
1985  continue;
1986  }
1987  }
1988  }
1989 
1990  } else if(m_AlignOption&eMergeAlign){ //multiple alignment
1991  vector< CRef<CAlnMix> > mix(k_NumFrame);
1992  //each for one frame for translated alignment
1993  for(int i = 0; i < k_NumFrame; i++){
1994  mix[i] = new CAlnMix(m_Scope);
1995  }
1996  num_align = 0;
1997  vector<CRef<CSeq_align_set> > alnVector(k_NumFrame);
1998  for(int i = 0; i < k_NumFrame; i ++){
1999  alnVector[i] = new CSeq_align_set;
2000  }
2001  for (CSeq_align_set::Tdata::const_iterator
2002  alnIter = actual_aln_list.Get().begin();
2003  alnIter != actual_aln_list.Get().end()
2004  && num_align<m_NumAlignToShow; alnIter ++, num_align++) {
2005 
2006  const CBioseq_Handle& subj_handle =
2007  m_Scope.GetBioseqHandle((*alnIter)->GetSeq_id(1));
2008  if(subj_handle){
2009  //need to convert to denseg for stdseg
2010  if((*alnIter)->GetSegs().Which() == CSeq_align::C_Segs::e_Std) {
2011  CTypeConstIterator<CStd_seg> ss = ConstBegin(**alnIter);
2012  CRef<CSeq_align> convertedDs =
2013  (*alnIter)->CreateDensegFromStdseg();
2014  if((convertedDs->GetSegs().GetDenseg().IsSetWidths()
2015  && convertedDs->GetSegs().GetDenseg().GetWidths()[0] == 3)
2017  //only do this for translated master
2018  int frame = s_GetStdsegMasterFrame(*ss, m_Scope);
2019  switch(frame){
2020  case 1:
2021  alnVector[0]->Set().push_back(convertedDs);
2022  break;
2023  case 2:
2024  alnVector[1]->Set().push_back(convertedDs);
2025  break;
2026  case 3:
2027  alnVector[2]->Set().push_back(convertedDs);
2028  break;
2029  case -1:
2030  alnVector[3]->Set().push_back(convertedDs);
2031  break;
2032  case -2:
2033  alnVector[4]->Set().push_back(convertedDs);
2034  break;
2035  case -3:
2036  alnVector[5]->Set().push_back(convertedDs);
2037  break;
2038  default:
2039  break;
2040  }
2041  }
2042  else {
2043  alnVector[0]->Set().push_back(convertedDs);
2044  }
2045  } else if((*alnIter)->GetSegs().Which() == CSeq_align::C_Segs::
2046  e_Denseg){
2047  alnVector[0]->Set().push_back(*alnIter);
2048  } else if((*alnIter)->GetSegs().Which() == CSeq_align::C_Segs::
2049  e_Dendiag){
2050  alnVector[0]->Set().\ push_back(CAlignFormatUtil::CreateDensegFromDendiag(**alnIter));
2051  } else {
2053  "Input Seq-align should be Denseg, Stdseg or Dendiag!");
2054  }
2055  }
2056  }
2057  for(int i = 0; i < (int)alnVector.size(); i ++){
2058  bool hasAln = false;
2060  alnRef = ConstBegin(*alnVector[i]); alnRef; ++alnRef){
2062  //*out2 << *ds;
2063  try{
2065  mix[i]->Add(*ds, CAlnMix::fForceTranslation);
2066  } else {
2067  if (ds->IsSetWidths() &&
2068  ds->GetWidths()[0] == 3 &&
2069  ds->IsSetStrands() &&
2070  ds->GetStrands().front()==eNa_strand_minus){
2071  mix[i]->Add(*ds, CAlnMix::fNegativeStrand);
2072  } else {
2073  mix[i]->Add(*ds, CAlnMix::fPreserveRows);
2074  }
2075  }
2076  } catch (const CException& e){
2077  _TRACE("Warning: " << e.what());
2078  continue;
2079  }
2080  hasAln = true;
2081  }
2082  if(hasAln){
2083  // *out2<<*alnVector[i];
2084  mix[i]->Merge(CAlnMix::fMinGap
2087  //*out2<<mix[i]->GetDenseg();
2088  }
2089  }
2090 
2091  int numDistinctFrames = 0;
2092  for(int i = 0; i < (int)alnVector.size(); i ++){
2093  if(!alnVector[i]->Get().empty()){
2094  numDistinctFrames ++;
2095  }
2096  }
2097  out<<"\n";
2098  for(int i = 0; i < k_NumFrame; i ++){
2099  try{
2100  CRef<CAlnVec> avRef (new CAlnVec (mix[i]->GetDenseg(),
2101  m_Scope));
2104  avRef->SetGenCode(m_MasterGeneticCode, 0);
2105  m_AV = avRef;
2106 
2107  if(numDistinctFrames > 1){
2108  out << "For reading frame " << k_FrameConversion[i]
2109  << " of query sequence:\n\n";
2110  }
2112  } catch (CException e){
2113  continue;
2114  }
2115  }
2116  }
2118  out<<"</form>\n";
2119  }
2120 }
2121 
2122 
2123 void CDisplaySeqalign::x_FillIdentityInfo(const string& sequence_standard,
2124  const string& sequence ,
2125  int& match, int& positive,
2126  string& middle_line)
2127 {
2128  match = 0;
2129  positive = 0;
2130  int min_length=min<int>((int)sequence_standard.size(), (int)sequence.size());
2132  middle_line = sequence;
2133  }
2134  for(int i=0; i<min_length; i++){
2135  if(sequence_standard[i]==sequence[i]){
2137  if(m_MidLineStyle == eBar ) {
2138  middle_line[i] = '|';
2139  } else if (m_MidLineStyle == eChar){
2140  middle_line[i] = sequence[i];
2141  }
2142  }
2143  match ++;
2144  } else {
2145  if ((m_AlignType&eProt)
2146  && m_Matrix[(int)sequence_standard[i]][(int)sequence[i]] > 0){
2147  positive ++;
2149  if (m_MidLineStyle == eChar){
2150  middle_line[i] = '+';
2151  }
2152  }
2153  } else {
2155  middle_line[i] = ' ';
2156  }
2157  }
2158  }
2159  }
2160 }
2161 
2162 
2163 int CDisplaySeqalign::x_GetLinkout(const objects::CSeq_id & id)
2165  int linkout = 0;
2166  if(m_AlignOption & eLinkout) {
2167  try {
2168  linkout = m_LinkoutDB
2170  : 0;
2171  }
2172  catch (const CException & e) {
2173  ERR_POST("Problem with linkoutdb: " + e.GetMsg());
2174  cerr << "[BLAST FORMATTER EXCEPTION] Problem with linkoutdb: " << e.GetMsg() << endl;
2175  m_AlignOption &= ~eLinkout; //Remove linkout bit for the rest of sequences
2176  linkout = 0;
2177  }
2178  }
2179  return linkout;
2180 }
2181 
2182 
2183 
2185  const CBioseq_Handle& bsp_handle,
2186  list<string> &use_this_seqid,
2187  TGi firstGi,
2188  int deflineNum)
2189 {
2190  SAlnDispParams *alnDispParams = NULL;
2191  const int kMaxDeflineNum = 10;
2192 
2193  bool isNa = bsp_handle.GetBioseqCore()->IsNa();
2194  int seqLength = (int)bsp_handle.GetBioseqLength();
2195 
2196  const list<CRef<CSeq_id> > ids = bdl->GetSeqid();
2198 
2200  TGi gi_in_use_this_gi = ZERO_GI;
2201  bool isGiList = false;
2202  bool match = CAlignFormatUtil::MatchSeqInSeqList(gi, wid, use_this_seqid,&isGiList);
2203  if(match && isGiList) gi_in_use_this_gi = gi;
2204 
2205  if(use_this_seqid.empty() || match) {
2206  firstGi = (firstGi == ZERO_GI) ? gi_in_use_this_gi : firstGi;
2207  alnDispParams = new SAlnDispParams();
2208  alnDispParams->gi = gi;
2209  alnDispParams->seqID = FindBestChoice(ids, CSeq_id::WorstRank); //change to use use_this_seq
2210  alnDispParams->hasTextSeqID = CAlignFormatUtil::GetTextSeqID(alnDispParams->seqID);
2211  alnDispParams->ids = bsp_handle.GetBioseqCore()->GetId();
2212  alnDispParams->label = CAlignFormatUtil::GetLabel(alnDispParams->seqID,CSeq_id::eContent);//Just accession without db part like ref| or pdbd|
2213 
2214 
2215  TTaxId taxid = ZERO_TAX_ID;
2216  string type_temp = m_BlastType;
2217  type_temp = NStr::TruncateSpaces(NStr::ToLower(type_temp));
2218  if(bdl->IsSetTaxid() && bdl->CanGetTaxid()){
2219  taxid = bdl->GetTaxid();
2220  }
2221 
2222  alnDispParams->seqUrlInfo = x_InitSeqUrl(gi_in_use_this_gi,alnDispParams->label,taxid,ids);
2223  if(m_AlignOption&eHtml){
2224  alnDispParams->id_url = CAlignFormatUtil::GetIDUrl(alnDispParams->seqUrlInfo,&ids);
2225  }
2227  int linkout = 0;
2228  if (alnDispParams->hasTextSeqID) {
2229  linkout = (deflineNum < kMaxDeflineNum) ? CAlignFormatUtil::GetSeqLinkoutInfo((CBioseq::TId &)ids,
2230  &m_LinkoutDB,
2232  alnDispParams->gi) : 0;
2234  }
2235 
2236  if(linkout != 0) {
2237  list<string> linkout_url = CAlignFormatUtil::
2238  GetLinkoutUrl(linkout, ids,
2239  m_Rid,
2241  isNa,
2242  firstGi,
2243  false, true, m_cur_align,m_PreComputedResID);
2244  ITERATE(list<string>, iter_linkout, linkout_url){
2245  alnDispParams->linkoutStr += *iter_linkout;
2246  }
2247  }
2248  if(seqLength > k_GetSubseqThreshhold){
2249  alnDispParams->dumpGnlUrl = x_GetDumpgnlLink(ids);
2250  }
2251 
2252  }
2253  if(bdl->IsSetTitle()){
2254  alnDispParams->title = bdl->GetTitle();
2255  }
2256  if(alnDispParams->title.empty()) {
2257  alnDispParams->title = CDeflineGenerator().GenerateDefline(bsp_handle);
2258  }
2259  }
2260  return alnDispParams;
2261 }
2262 
2263 
2264 
2266 {
2267  SAlnDispParams *alnDispParams = new SAlnDispParams();
2268  alnDispParams->gi = FindGi(bsp_handle.GetBioseqCore()->GetId());
2269  alnDispParams->seqID = FindBestChoice(bsp_handle.GetBioseqCore()->GetId(),CSeq_id::WorstRank);
2270  alnDispParams->label = CAlignFormatUtil::GetLabel(alnDispParams->seqID,CSeq_id::eContent);
2271  if(m_AlignOption&eHtml){
2272  alnDispParams->ids = bsp_handle.GetBioseqCore()->GetId();
2273  alnDispParams->seqUrlInfo = x_InitSeqUrl(alnDispParams->gi,alnDispParams->label,ZERO_TAX_ID,alnDispParams->ids);
2274  alnDispParams->id_url = CAlignFormatUtil::GetIDUrl(alnDispParams->seqUrlInfo,&alnDispParams->ids);
2275  }
2276  alnDispParams->title = CDeflineGenerator().GenerateDefline(bsp_handle);
2277  alnDispParams->hasTextSeqID = CAlignFormatUtil::GetTextSeqID(alnDispParams->seqID);
2278  return alnDispParams;
2279 }
2280 
2281 string
2282 CDisplaySeqalign::x_PrintDefLine(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_vec_info)
2284 {
2287  /* Facilitates comparing formatted output using diff */
2288  static string kLengthString("Length=");
2289 #ifdef CTOOLKIT_COMPATIBLE
2290  static bool value_set = false;
2291  if ( !value_set ) {
2292  if (getenv("CTOOLKIT_COMPATIBLE")) {
2293  kLengthString.assign(" Length = ");
2294  }
2295  value_set = true;
2296  }
2297 #endif /* CTOOLKIT_COMPATIBLE */
2298 
2299  if(bsp_handle){
2300  const CRef<CSeq_id> wid =
2301  FindBestChoice(bsp_handle.GetBioseqCore()->GetId(),
2303 
2304  const CRef<CBlast_def_line_set> bdlRef
2305  = CSeqDB::ExtractBlastDefline(bsp_handle);
2306  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
2307  bool isFirst = true;
2308  TGi firstGi = ZERO_GI;
2309 
2310  m_cur_align++;
2311 
2312  if(bdl.empty()){ //no blast defline struct, should be no such case now
2313  //actually not so fast...as we now fetch from entrez even when it's not in blast db
2314  //there is no blast defline in such case.
2315  CRef<SAlnDispParams> alnDispParams
2316  (x_FillAlnDispParams(bsp_handle));
2317  out << ">";
2319  && (m_AlignOption&eHtml) && m_CanRetrieveSeq && isFirst) {
2322  alnDispParams->gi > ZERO_GI ?
2323  NStr::NumericToString(alnDispParams->gi) : alnDispParams->label);
2324  out << buf;
2325  }
2326 
2327  if(m_AlignOption&eHtml){
2328 
2329  aln_vec_info->id_label = (alnDispParams->gi != ZERO_GI) ?
2330  NStr::NumericToString(alnDispParams->gi) : alnDispParams->label;
2331 
2332  out<<alnDispParams->id_url;
2333  }
2334 
2335  if(m_AlignOption&eShowGi && alnDispParams->gi > ZERO_GI &&
2336  !alnDispParams->seqID->IsGi()){
2337  out<<"gi|"<<alnDispParams->gi<<"|";
2338  }
2339  if(!((alnDispParams->seqID->AsFastaString().find("gnl|BL_ORD_ID") != string::npos) ||
2340  alnDispParams->seqID->AsFastaString().find("lcl|Subject_") != string::npos)){
2341  if (strncmp(alnDispParams->seqID->AsFastaString().c_str(), "lcl|", 4) == 0)
2342  out << alnDispParams->label;
2343  else {
2345  alnDispParams->gi > ZERO_GI)) {
2346  alnDispParams->seqID->WriteAsFasta(out);
2347  }
2348  else {
2349  out << CAlignFormatUtil::GetBareId(*alnDispParams->seqID);
2350  }
2351 
2352  }
2353  }
2354  if(m_AlignOption&eHtml){
2355  if(alnDispParams->id_url != NcbiEmptyString){
2356  out<<"</a>";
2357  }
2358  if(alnDispParams->gi != ZERO_GI){
2359  out<<"<a name="<<alnDispParams->gi<<"></a>";
2360  } else {
2361  out<<"<a name="<<alnDispParams->seqID->GetSeqIdString()<<"></a>";
2362  }
2363  }
2364  out <<" ";
2366  CHTMLHelper::HTMLEncode(alnDispParams->title) :
2367  alnDispParams->title);
2368 
2369  out<<"\n";
2370 
2371  } else {
2372  //print each defline
2373  bool bMultipleDeflines = false;
2374  int numBdl = 0;
2375  int maxNumBdl = (aln_vec_info->use_this_seqid.empty()) ? bdl.size() : aln_vec_info->use_this_seqid.size();
2376  for(list< CRef< CBlast_def_line > >::const_iterator
2377  iter = bdl.begin(); iter != bdl.end(); iter++){
2378 
2379  CRef<SAlnDispParams> alnDispParams
2380  (x_FillAlnDispParams(*iter, bsp_handle,
2381  aln_vec_info->use_this_seqid,
2382  firstGi, numBdl));
2383 
2384 
2385 
2386  if(alnDispParams) {
2387  numBdl++;
2388  if(isFirst){
2389  out << ">";
2390  } else{
2391  out << " ";
2392  if (m_AlignOption&eHtml && (int)(maxNumBdl) > k_MaxDeflinesToShow && numBdl == k_MinDeflinesToShow + 1){
2393  //Show first 3 deflines out of 8 or more, hide the rest
2394  string mdlTag = aln_vec_info->id_label;
2395  //string mdlTag = id_label + "_" + NStr::IntToString(m_cur_align);
2396  out << "<a href=\"#\" title=\"Other sequence titles\" onmouseover=\"showInfo(this)\" class=\"resArrowLinkW mdl hiding\" id=\"" <<
2397  mdlTag << "\">" << maxNumBdl - k_MinDeflinesToShow << " more sequence titles" << "</a>\n";
2398 
2399  out << " <div id=\"" << "info_" << mdlTag << "\" class=\"helpbox mdlbox hidden\">";
2400  bMultipleDeflines = true;
2401  }
2402  }
2403 
2404  if(isFirst){
2405  firstGi = alnDispParams->gi;
2406  }
2408  && (m_AlignOption&eHtml) && m_CanRetrieveSeq && isFirst) {
2411  alnDispParams->gi > ZERO_GI ?
2412  NStr::NumericToString(alnDispParams->gi) : alnDispParams->label);
2413  out << buf;
2414  }
2415 
2416  if(m_AlignOption&eHtml){
2417  out<< alnDispParams->id_url;
2418  }
2419 
2420  if(m_AlignOption&eShowGi && alnDispParams->gi > ZERO_GI &&
2421  !alnDispParams->seqID->IsGi()){
2422  out<<"gi|"<<alnDispParams->gi<<"|";
2423  }
2424  if(!(alnDispParams->seqID->AsFastaString().find("gnl|BL_ORD_ID") != string::npos) ||
2425  alnDispParams->seqID->AsFastaString().find("lcl|Subject_") != string::npos){
2426  if (strncmp(alnDispParams->seqID->AsFastaString().c_str(), "lcl|", 4) == 0) {
2427  out << alnDispParams->label;
2428  }
2429  else {
2430  if (m_UseLongSeqIds ||
2431  ((m_AlignOption & eShowGi) &&
2432  alnDispParams->gi > ZERO_GI)) {
2433 
2434  alnDispParams->seqID->WriteAsFasta(out);
2435  }
2436  else {
2438  *alnDispParams->seqID);
2439  }
2440  }
2441  }
2442  if(m_AlignOption&eHtml){
2443  if(alnDispParams->id_url != NcbiEmptyString){
2444  out<<"</a>";
2445  }
2446  if(alnDispParams->gi != ZERO_GI){
2447  out<<"<a name="<<alnDispParams->gi<<"></a>";
2448  aln_vec_info->id_label = NStr::NumericToString(alnDispParams->gi);
2449  } else {
2450  out<<"<a name="<<alnDispParams->seqID->GetSeqIdString(true)<<"></a>";
2451  aln_vec_info->id_label = alnDispParams->label;
2452  }
2453  if(m_AlignOption&eLinkout){
2454 
2455  out <<" ";
2456  out << alnDispParams->linkoutStr;
2457  if(!alnDispParams->dumpGnlUrl.empty()) {
2458 
2459  out<<alnDispParams->dumpGnlUrl;
2460  }
2461  }
2462  }
2463 
2464  if (out.tellp() > 1L) {
2465  out << " ";
2466  }
2467  if(!alnDispParams->title.empty()) {
2469  CHTMLHelper::
2470  HTMLEncode(alnDispParams->title) :
2471  alnDispParams->title);
2472  }
2473  out<<"\n";
2474  isFirst = false;
2475  }
2476  }
2477  if(m_AlignOption&eHtml && bMultipleDeflines) {
2478  out << "</div>";
2479  }
2480  }
2481  }
2482  out<<kLengthString<<bsp_handle.GetBioseqLength()<<"\n";
2483  string formattedString = CNcbiOstrstreamToString(out);
2484  return formattedString;
2485 }
2486 
2487 
2488 void CDisplaySeqalign::x_OutputSeq(string& sequence, const CSeq_id& id,
2489  int start, int len, int frame, int row,
2490  bool color_mismatch,
2491  const TSAlnSeqlocInfoList& loc_list,
2492  CNcbiOstream& out) const
2493 {
2494  _ASSERT((int)sequence.size() > start);
2495  list<CRange<int> > actualSeqloc;
2496  string actualSeq = sequence.substr(start, len);
2497 
2498  if(id.Which() != CSeq_id::e_not_set){
2499  /*only do this for sequence but not for others like middle line,
2500  features*/
2501  ITERATE(TSAlnSeqlocInfoList, iter, loc_list) {
2502  int from=(*iter)->aln_range.GetFrom();
2503  int to=(*iter)->aln_range.GetTo();
2504  int locFrame = (*iter)->seqloc->GetFrame();
2505  if(id.Match((*iter)->seqloc->GetInterval().GetId())
2506  && locFrame == frame){
2507  bool isFirstChar = true;
2508  CRange<int> eachSeqloc(0, 0);
2509  //go through each residule and mask it
2510  for (int i=max<int>(from, start);
2511  i<=min<int>(to, start+len -1); i++){
2512  //store seqloc start for font tag below
2513  if ((m_AlignOption & eHtml) && isFirstChar){
2514  isFirstChar = false;
2515  eachSeqloc.Set(i, eachSeqloc.GetTo());
2516  }
2517  if (m_SeqLocChar==eX){
2518  if(isalpha((unsigned char) actualSeq[i-start])){
2519  actualSeq[i-start]='X';
2520  }
2521  } else if (m_SeqLocChar==eN){
2522  actualSeq[i-start]='n';
2523  } else if (m_SeqLocChar==eLowerCase){
2524  actualSeq[i-start]=tolower((unsigned char) actualSeq[i-start]);
2525  }
2526  //store seqloc start for font tag below
2527  if ((m_AlignOption & eHtml)
2528  && i == min<int>(to, start+len)){
2529  eachSeqloc.Set(eachSeqloc.GetFrom(), i);
2530  }
2531  }
2532  if(!(eachSeqloc.GetFrom()==0&&eachSeqloc.GetTo()==0)){
2533  actualSeqloc.push_back(eachSeqloc);
2534  }
2535  }
2536  }
2537  }
2538 
2539  if(actualSeqloc.empty()){//no need to add font tag
2541  && color_mismatch && (m_AlignOption & eShowIdentity)){
2542  //color the mismatches. Only for rows without mask.
2543  //Otherwise it may confilicts with mask font tag.
2545  } else {
2546  out<<actualSeq;
2547  }
2548  } else {//now deal with font tag for mask for html display
2549  bool endTag = false;
2550  bool frontTag = false;
2551  string refStr;
2553  for (int i = 0; i < (int)actualSeq.size(); i ++){
2554  bool startStyledOutput = false,stopStyledOutput = false;
2555  for (list<CRange<int> >::iterator iter=actualSeqloc.begin();
2556  iter!=actualSeqloc.end(); iter++){
2557  int from = (*iter).GetFrom() - start;
2558  int to = (*iter).GetTo() - start;
2559  //start tag
2560  if(from == i){
2561  frontTag = true;
2562  }
2563  if(to == i && to > 0){
2564  endTag = true;
2565  }
2566  }
2567  startStyledOutput = frontTag;
2568  stopStyledOutput = endTag && frontTag;
2569  bool isStyled = s_ProcessStyledContent(actualSeq,i,startStyledOutput,stopStyledOutput,styledSqLocTmpl ,refStr,out);
2570  if(!isStyled) out<<actualSeq[i];
2571  if(endTag && frontTag){
2572  endTag = false;
2573  frontTag = false;
2574  }
2575  }
2576  }
2577 }
2578 
2581  int gap = 0;
2582  if (m_SeqalignSetRef->Get().front()->CanGetType() &&
2583  m_SeqalignSetRef->Get().front()->GetType() == CSeq_align_Base::eType_global)
2584  {
2585  for (int row=0; row<m_AV->GetNumRows(); row++) {
2586  // The row/antirow dance makes sure that we count gaps at the end of a global alignment.
2587  // Only two rows for a global (NW) alignment
2588  int antirow=1;
2589  if (row == 1)
2590  antirow = 0;
2591  CRef<CAlnMap::CAlnChunkVec> chunk_vec
2592  = m_AV->GetAlnChunks(row, m_AV->GetSeqAlnRange(antirow));
2593  for (int i=0; i<chunk_vec->size(); i++) {
2594  CConstRef<CAlnMap::CAlnChunk> chunk = (*chunk_vec)[i];
2595  if (chunk->IsGap()) {
2596  gap += (chunk->GetAlnRange().GetTo()
2597  - chunk->GetAlnRange().GetFrom() + 1);
2598  }
2599  }
2600  }
2601  }
2602  else
2603  {
2604  for (int row=0; row<m_AV->GetNumRows(); row++) {
2605  CRef<CAlnMap::CAlnChunkVec> chunk_vec
2606  = m_AV->GetAlnChunks(row, m_AV->GetSeqAlnRange(0));
2607  for (int i=0; i<chunk_vec->size(); i++) {
2608  CConstRef<CAlnMap::CAlnChunk> chunk = (*chunk_vec)[i];
2609  if (chunk->IsGap()) {
2610  gap += (chunk->GetAlnRange().GetTo()
2611  - chunk->GetAlnRange().GetFrom() + 1);
2612  }
2613  }
2614  }
2615  }
2616  return gap;
2617 }
2618 
2619 void CDisplaySeqalign::x_GetFeatureInfo(TSAlnFeatureInfoList& feature,
2620  int custom_genetic_code,
2621  CScope& scope,
2622  CSeqFeatData::E_Choice choice,
2623  int row, string& sequence,
2624  list<list<CRange<TSeqPos> > >& feat_range_list,
2625  list<ENa_strand>& feat_seq_strand,
2626  bool fill_feat_range ) const
2627 {
2628  //Only fetch features for seq that has a gi unless it's master seq
2629  const CSeq_id& id = m_AV->GetSeqId(row);
2630 
2631  TGi gi_temp = FindGi(m_AV->GetBioseqHandle(row).GetBioseqCore()->GetId());
2632  if(gi_temp > ZERO_GI || row == 0){
2633  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
2634  if(handle){
2635  TSeqPos seq_start = m_AV->GetSeqPosFromAlnPos(row, 0);
2636  TSeqPos seq_stop = m_AV->GetSeqPosFromAlnPos(row, m_AV->GetAlnStop());
2637  CRef<CSeq_loc> loc_ref =
2638  handle.
2639  GetRangeSeq_loc(min(seq_start, seq_stop),
2640  max(seq_start, seq_stop));
2641  SAnnotSelector sel(choice);
2643 
2644  for (CFeat_CI feat(scope, *loc_ref, sel); feat; ++feat) {
2645  const CSeq_loc& loc = feat->GetLocation();
2646  bool has_id = false;
2647  list<CSeq_loc_CI::TRange> isolated_range;
2648  ENa_strand feat_strand = eNa_strand_plus, prev_strand = eNa_strand_plus;
2649  bool first_loc = true, mixed_strand = false, mix_loc = false;
2650  CRange<TSeqPos> feat_seq_range;
2651  TSeqPos other_seqloc_length = 0;
2652  //isolate the seqloc corresponding to feature
2653  //as this is easier to manipulate and remove seqloc that is
2654  //not from the bioseq we are dealing with
2655  for(CSeq_loc_CI loc_it(loc); loc_it; ++loc_it){
2656  const CSeq_id& id_it = loc_it.GetSeq_id();
2657  if(IsSameBioseq(id_it, id, &scope)){
2658  isolated_range.push_back(loc_it.GetRange());
2659  if(first_loc){
2660  feat_seq_range = loc_it.GetRange();
2661  } else {
2662  feat_seq_range += loc_it.GetRange();
2663  }
2664  has_id = true;
2665  if(loc_it.IsSetStrand()){
2666  feat_strand = loc_it.GetStrand();
2667  if(feat_strand != eNa_strand_plus &&
2668  feat_strand != eNa_strand_minus){
2669  feat_strand = eNa_strand_plus;
2670  }
2671  } else {
2672  feat_strand = eNa_strand_plus;
2673  }
2674 
2675  if(!first_loc && prev_strand != feat_strand){
2676  mixed_strand = true;
2677  }
2678  first_loc = false;
2679  prev_strand = feat_strand;
2680  } else {
2681  //if seqloc has other seqids then need to remove other
2682  //seqid encoded amino acids in the front later
2683  if (first_loc) {
2684  other_seqloc_length += loc_it.GetRange().GetLength();
2685  mix_loc = true;
2686  }
2687  }
2688  }
2689  //give up if mixed strand or no id
2690  if(!has_id || mixed_strand){
2691  continue;
2692  }
2693 
2694  string featLable = NcbiEmptyString;
2695  string featId;
2696  char feat_char = ' ';
2697  string alternativeFeatStr = NcbiEmptyString;
2698  TSeqPos feat_aln_from = 0;
2699  TSeqPos feat_aln_to = 0;
2700  TSeqPos actual_feat_seq_start = 0, actual_feat_seq_stop = 0;
2701  feature::GetLabel(feat->GetOriginalFeature(), &featLable,
2702  feature::fFGL_Both, &scope);
2703  featId = featLable.substr(0, k_FeatureIdLen); //default
2704  TSeqPos aln_stop = m_AV->GetAlnStop();
2705  CRef<SAlnFeatureInfo> featInfo;
2706 
2707  //find the actual feature sequence start and stop
2708  if(m_AV->IsPositiveStrand(row)){
2709  actual_feat_seq_start =
2710  max(feat_seq_range.GetFrom(), seq_start);
2711  actual_feat_seq_stop =
2712  min(feat_seq_range.GetTo(), seq_stop);
2713 
2714  } else {
2715  actual_feat_seq_start =
2716  min(feat_seq_range.GetTo(), seq_start);
2717  actual_feat_seq_stop =
2718  max(feat_seq_range.GetFrom(), seq_stop);
2719  }
2720  int genetic_code = -1;
2721  //the feature alignment positions
2722  feat_aln_from =
2723  m_AV->GetAlnPosFromSeqPos(row, actual_feat_seq_start);
2724  feat_aln_to =
2725  m_AV->GetAlnPosFromSeqPos(row, actual_feat_seq_stop);
2726  if(choice == CSeqFeatData::e_Gene){
2727  featInfo.Reset(new SAlnFeatureInfo);
2728  feat_char = '^';
2729 
2730  } else if(choice == CSeqFeatData::e_Cdregion){
2731 
2732  string raw_cdr_product =
2733  s_GetCdsSequence(custom_genetic_code > 0? custom_genetic_code:m_SlaveGeneticCode, feat, scope,
2734  isolated_range, handle, feat_strand,
2735  featId, other_seqloc_length%3 == 0 ?
2736  0 : 3 - other_seqloc_length%3,
2737  mix_loc);
2738  if (feat->IsSetData() && feat->GetData().IsCdregion() && feat->GetData().GetCdregion().IsSetCode()) {
2739  genetic_code = feat->GetData().GetCdregion().GetCode().GetId();
2740  }
2741  if(raw_cdr_product == NcbiEmptyString){
2742  continue;
2743  }
2744  featInfo.Reset(new SAlnFeatureInfo);
2745 
2746  //line represents the amino acid line starting covering
2747  //the whole alignment. The idea is if there is no feature
2748  //in some range, then fill it with space and this won't
2749  //be shown
2750 
2751  string line(aln_stop+1, ' ');
2752  //pre-fill all cds region with intron char
2753  for (TSeqPos i = feat_aln_from; i <= feat_aln_to; i ++){
2754  line[i] = k_IntronChar;
2755  }
2756 
2757  //get total coding length
2758  TSeqPos total_coding_len = 0;
2759  ITERATE(list<CSeq_loc_CI::TRange>, iter, isolated_range){
2760  total_coding_len += iter->GetLength();
2761  }
2762 
2763  //fill concatenated exon (excluding intron)
2764  //with product
2765  //this is will be later used to
2766  //fill the feature line
2767  char gap_char = m_AV->GetGapChar(row);
2768  string concat_exon =
2769  s_GetConcatenatedExon(feat, feat_strand,
2770  isolated_range,
2771  total_coding_len,
2772  raw_cdr_product,
2773  other_seqloc_length%3 == 0 ?
2774  0 : 3 - other_seqloc_length%3);
2775 
2776 
2777  //fill slave feature info to make putative feature for
2778  //master sequence
2779  if (fill_feat_range) {
2780  list<CRange<TSeqPos> > master_feat_range;
2781  ENa_strand master_strand = eNa_strand_plus;
2782  s_MapSlaveFeatureToMaster(master_feat_range, master_strand,
2783  feat, isolated_range,
2784  feat_strand, m_AV, row,
2785  other_seqloc_length%3 == 0 ?
2786  0 :
2787  3 - other_seqloc_length%3);
2788  if(!(master_feat_range.empty())) {
2789  feat_range_list.push_back(master_feat_range);
2790  feat_seq_strand.push_back(master_strand);
2791  }
2792  }
2793 
2794 
2795  TSeqPos feat_aln_start_totalexon = 0;
2796  TSeqPos prev_feat_aln_start_totalexon = 0;
2797  TSeqPos prev_feat_seq_stop = 0;
2798  TSeqPos intron_size = 0;
2799  bool is_first = true;
2800  bool is_first_exon_start = true;
2801 
2802  //here things get complicated a bit. The idea is fill the
2803  //whole feature line in alignment coordinates with
2804  //amino acid on the second base of a condon
2805 
2806  //go through the feature seqloc and fill the feature line
2807 
2808  //Need to reverse the seqloc order for minus strand
2809  if(feat_strand == eNa_strand_minus){
2810  isolated_range.reverse();
2811  }
2812 
2813  ITERATE(list<CSeq_loc_CI::TRange>, iter, isolated_range){
2814  //intron refers to the distance between two exons
2815  //i.e. each seqloc is an exon
2816  //intron needs to be skipped
2817  if(!is_first){
2818  intron_size += iter->GetFrom()
2819  - prev_feat_seq_stop - 1;
2820  }
2821  CRange<TSeqPos> actual_feat_seq_range =
2822  loc_ref->GetTotalRange().
2823  IntersectionWith(*iter);
2824  if(!actual_feat_seq_range.Empty()){
2825  //the sequence start position in aln coordinates
2826  //that has a feature
2827  TSeqPos feat_aln_start;
2828  TSeqPos feat_aln_stop;
2829  if(m_AV->IsPositiveStrand(row)){
2830  feat_aln_start =
2831  m_AV->
2832  GetAlnPosFromSeqPos
2833  (row, actual_feat_seq_range.GetFrom());
2834  feat_aln_stop
2835  = m_AV->GetAlnPosFromSeqPos
2836  (row, actual_feat_seq_range.GetTo());
2837  } else {
2838  feat_aln_start =
2839  m_AV->
2840  GetAlnPosFromSeqPos
2841  (row, actual_feat_seq_range.GetTo());
2842  feat_aln_stop
2843  = m_AV->GetAlnPosFromSeqPos
2844  (row, actual_feat_seq_range.GetFrom());
2845  }
2846  //put actual amino acid on feature line
2847  //in aln coord
2848  for (TSeqPos i = feat_aln_start;
2849  i <= feat_aln_stop; i ++){
2850  if(sequence[i] != gap_char){
2851  //the amino acid position in
2852  //concatanated exon that corresponds
2853  //to the sequence position
2854  //note intron needs to be skipped
2855  //as it does not have cds feature
2856  TSeqPos product_adj_seq_pos
2857  = m_AV->GetSeqPosFromAlnPos(row, i) -
2858  intron_size - feat_seq_range.GetFrom();
2859  if(product_adj_seq_pos <
2860  concat_exon.size()){
2861  //fill the cds feature line with
2862  //actual amino acids
2863  line[i] =
2864  concat_exon[product_adj_seq_pos];
2865  //get the exon start position
2866  //note minus strand needs to be
2867  //counted backward
2868  if(m_AV->IsPositiveStrand(row)){
2869  //don't count gap
2870  if(is_first_exon_start &&
2871  isalpha((unsigned char) line[i])){
2872  if(feat_strand == eNa_strand_minus){
2873  feat_aln_start_totalexon =
2874  concat_exon.size()
2875  - product_adj_seq_pos + 1;
2876  is_first_exon_start = false;
2877 
2878  } else {
2879  feat_aln_start_totalexon =
2880  product_adj_seq_pos;
2881  is_first_exon_start = false;
2882  }
2883  }
2884 
2885  } else {
2886  if(feat_strand == eNa_strand_minus){
2887  if(is_first_exon_start &&
2888  isalpha((unsigned char) line[i])){
2889  feat_aln_start_totalexon =
2890  concat_exon.size()
2891  - product_adj_seq_pos + 1;
2892  is_first_exon_start = false;
2893  prev_feat_aln_start_totalexon =
2894  feat_aln_start_totalexon;
2895  }
2896  if(!is_first_exon_start){
2897  //need to get the
2898  //smallest start as
2899  //seqloc list is
2900  //reversed
2901  feat_aln_start_totalexon =
2902  min(TSeqPos(concat_exon.size()
2903  - product_adj_seq_pos + 1),
2904  prev_feat_aln_start_totalexon);
2905  prev_feat_aln_start_totalexon =
2906  feat_aln_start_totalexon;
2907  }
2908  } else {
2909  feat_aln_start_totalexon =
2910  max(prev_feat_aln_start_totalexon,
2911  product_adj_seq_pos);
2912 
2913  prev_feat_aln_start_totalexon =
2914  feat_aln_start_totalexon;
2915  }
2916  }
2917  }
2918  } else { //adding gap
2919  line[i] = ' ';
2920  }
2921 
2922  }
2923  }
2924 
2925  prev_feat_seq_stop = iter->GetTo();
2926  is_first = false;
2927  }
2928  alternativeFeatStr = line;
2929  s_FillCdsStartPosition(line, concat_exon, m_LineLen,
2930  feat_aln_start_totalexon,
2931  m_AV->IsPositiveStrand(row) ?
2933  feat_strand, featInfo->feature_start);
2934 
2935  }
2936 
2937  if(featInfo){
2938  x_SetFeatureInfo(featInfo, *loc_ref,
2939  feat_aln_from, feat_aln_to, aln_stop,
2940  feat_char, featId, alternativeFeatStr, genetic_code);
2941  feature.push_back(featInfo);
2942  }
2943  }
2944  }
2945  }
2946 }
2947 
2948 
2950  const CSeq_loc& seqloc, int aln_from,
2951  int aln_to, int aln_stop,
2952  char pattern_char, string pattern_id,
2953  string& alternative_feat_str,
2954  int genetic_code) const
2955 {
2956  CRef<FeatureInfo> feat(new FeatureInfo);
2957  feat->seqloc = &seqloc;
2958  feat->feature_char = pattern_char;
2959  feat->feature_id = pattern_id;
2960 
2961  if(alternative_feat_str != NcbiEmptyString){
2962  feat_info->feature_string = alternative_feat_str;
2963  } else {
2964  //fill feature string
2965  string line(aln_stop+1, ' ');
2966  for (int j = aln_from; j <= aln_to; j++){
2967  line[j] = feat->feature_char;
2968  }
2969  feat_info->feature_string = line;
2970  }
2971  feat_info->genetic_code = genetic_code;
2972  feat_info->aln_range.Set(aln_from, aln_to);
2973  feat_info->feature = feat;
2974 }
2975 
2976 ///add a "|" to the current insert for insert on next rows and return the
2977 ///insert end position.
2978 ///@param seq: the seq string
2979 ///@param insert_aln_pos: the position of insert
2980 ///@param aln_start: alnment start position
2981 ///@return: the insert end position
2982 ///
2983 static int x_AddBar(string& seq, int insert_alnpos, int aln_start){
2984  int end = (int)seq.size() -1 ;
2985  int barPos = insert_alnpos - aln_start + 1;
2986  string addOn;
2987  if(barPos - end > 1){
2988  string spacer(barPos - end - 1, ' ');
2989  addOn += spacer + "|";
2990  } else if (barPos - end == 1){
2991  addOn += "|";
2992  }
2993  seq += addOn;
2994  return max<int>((barPos - end), 0);
2995 }
2996 
2997 
2998 ///Add new insert seq to the current insert seq and return the end position of
2999 ///the latest insert
3000 ///@param cur_insert: the current insert string
3001 ///@param new_insert: the new insert string
3002 ///@param insert_alnpos: insert position
3003 ///@param aln_start: alnment start
3004 ///@return: the updated insert end position
3005 ///
3006 static int s_AdjustInsert(string& cur_insert, string& new_insert,
3007  int insert_alnpos, int aln_start)
3008 {
3009  int insertEnd = 0;
3010  int curInsertSize = (int)cur_insert.size();
3011  int insertLeftSpace = insert_alnpos - aln_start - curInsertSize + 2;
3012  //plus2 because insert is put after the position
3013  if(curInsertSize > 0){
3014  _ASSERT(insertLeftSpace >= 2);
3015  }
3016  int newInsertSize = (int)new_insert.size();
3017  if(insertLeftSpace - newInsertSize >= 1){
3018  //can insert with the end position right below the bar
3019  string spacer(insertLeftSpace - newInsertSize, ' ');
3020  cur_insert += spacer + new_insert;
3021 
3022  } else { //Need to insert beyond the insert postion
3023  if(curInsertSize > 0){
3024  cur_insert += " " + new_insert;
3025  } else { //can insert right at the firt position
3026  cur_insert += new_insert;
3027  }
3028  }
3029  insertEnd = aln_start + (int)cur_insert.size() -1 ; //-1 back to string position
3030  return insertEnd;
3031 }
3032 
3033 
3035  int aln_start,
3036  TSInsertInformationList& insert_list,
3037  list<string>& inserts) const {
3038  if(!insert_list.empty()){
3039  string bar(aln_range.GetLength(), ' ');
3040 
3041  string seq;
3042  TSInsertInformationList leftOverInsertList;
3043  bool isFirstInsert = true;
3044  int curInsertAlnStart = 0;
3045  int prvsInsertAlnEnd = 0;
3046 
3047  //go through each insert and fills the seq if it can
3048  //be filled on the same line. If not, go to the next line
3049  NON_CONST_ITERATE(TSInsertInformationList, iter, insert_list) {
3050  curInsertAlnStart = (*iter)->aln_start;
3051  //always fill the first insert. Also fill if there is enough space
3052  if(isFirstInsert || curInsertAlnStart - prvsInsertAlnEnd >= 1){
3053  bar[curInsertAlnStart-aln_start+1] = '|';
3054  int seqStart = (*iter)->seq_start;
3055  int seqEnd = seqStart + (*iter)->insert_len - 1;
3056  string newInsert;
3057  newInsert = m_AV->GetSeqString(newInsert, row, seqStart,
3058  seqEnd);
3059  prvsInsertAlnEnd = s_AdjustInsert(seq, newInsert,
3060  curInsertAlnStart, aln_start);
3061  isFirstInsert = false;
3062  } else { //if no space, save the chunk and go to next line
3063  bar[curInsertAlnStart-aln_start+1] = '|';
3064  //indicate insert goes to the next line
3065  prvsInsertAlnEnd += x_AddBar(seq, curInsertAlnStart, aln_start);
3066  //May need to add a bar after the current insert sequence
3067  //to indicate insert goes to the next line.
3068  leftOverInsertList.push_back(*iter);
3069  }
3070  }
3071  //save current insert. Note that each insert has a bar and sequence
3072  //below it
3073  inserts.push_back(bar);
3074  inserts.push_back(seq);
3075  //here recursively fill the chunk that don't have enough space
3076  x_DoFills(row, aln_range, aln_start, leftOverInsertList, inserts);
3077  }
3078 
3079 }
3080 
3081 
3083  int aln_start, list<string>& inserts,
3084  string& insert_pos_string,
3085  TSInsertInformationList& insert_list) const
3086 {
3087 
3088  string line(aln_range.GetLength(), ' ');
3089 
3090  ITERATE(TSInsertInformationList, iter, insert_list){
3091  int from = (*iter)->aln_start;
3092  line[from - aln_start + 1] = '\\';
3093  }
3094  insert_pos_string = line;
3095  //this is the line with "\" right after each insert position
3096 
3097  //here fills the insert sequence
3098  x_DoFills(row, aln_range, aln_start, insert_list, inserts);
3099 }
3100 
3101 
3102 void CDisplaySeqalign::x_GetInserts(TSInsertInformationList& insert_list,
3103  CAlnMap::TSeqPosList& insert_aln_start,
3104  CAlnMap::TSeqPosList& insert_seq_start,
3105  CAlnMap::TSeqPosList& insert_length,
3106  int line_aln_stop)
3107 {
3108 
3109  while(!insert_aln_start.empty()
3110  && (int)insert_aln_start.front() < line_aln_stop){
3112  insert->aln_start = insert_aln_start.front() - 1;
3113  //Need to minus one as we are inserting after this position
3114  insert->seq_start = insert_seq_start.front();
3115  insert->insert_len = insert_length.front();
3116  insert_list.push_back(insert);
3117  insert_aln_start.pop_front();
3118  insert_seq_start.pop_front();
3119  insert_length.pop_front();
3120  }
3121 
3122 }
3123 
3124 
3125 string CDisplaySeqalign::x_GetSegs(int row) const
3127  string segs = NcbiEmptyString;
3128  if(m_AlignOption & eMergeAlign){ //only show this hsp
3129  segs = NStr::IntToString(m_AV->GetSeqStart(row))
3130  + "-" + NStr::IntToString(m_AV->GetSeqStop(row));
3131  } else { //for all segs
3132  string idString = m_AV->GetSeqId(1).GetSeqIdString();
3134  if ( iter != m_AlnLinksParams.end() ){
3135  segs = iter->second.segs;
3136  }
3137  }
3138  return segs;
3139 }
3140 
3141 
3142 
3143 string CDisplaySeqalign::x_GetDumpgnlLink(const list<CRef<CSeq_id> >& ids) const
3145  string dowloadUrl;
3146  string segs = x_GetSegs(1); //row=1
3148  string url_with_parameters = CAlignFormatUtil::BuildUserUrl(ids, ZERO_TAX_ID, kDownloadUrl,
3149  m_DbName,
3151  true);
3152  if (url_with_parameters != NcbiEmptyString) {
3153  dowloadUrl = CAlignFormatUtil::MapTemplate(kDownloadLink,"download_url",url_with_parameters);
3154  dowloadUrl = CAlignFormatUtil::MapTemplate(dowloadUrl,"segs",segs);
3155  dowloadUrl = CAlignFormatUtil::MapTemplate(dowloadUrl,"lnk_displ",kDownloadImg);
3156  dowloadUrl = CAlignFormatUtil::MapTemplate(dowloadUrl,"label",label);
3157  }
3158  return dowloadUrl;
3159 }
3160 
3161 
3165  CRef<CSeq_align_set> alnSetRef(new CSeq_align_set);
3166 
3167  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()){
3168  const CSeq_align::TSegs& seg = (*iter)->GetSegs();
3169  if(seg.Which() == CSeq_align::C_Segs::e_Std){
3170  if(seg.GetStd().size() > 1){
3171  //has more than one stdseg. Need to seperate as each
3172  //is a distinct HSP
3173  ITERATE (CSeq_align::C_Segs::TStd, iterStdseg, seg.GetStd()){
3174  CRef<CSeq_align> aln(new CSeq_align);
3175  if((*iterStdseg)->IsSetScores()){
3176  aln->SetScore() = (*iterStdseg)->GetScores();
3177  }
3178  aln->SetSegs().SetStd().push_back(*iterStdseg);
3179  alnSetRef->Set().push_back(aln);
3180  }
3181 
3182  } else {
3183  alnSetRef->Set().push_back(*iter);
3184  }
3185  } else if(seg.Which() == CSeq_align::C_Segs::e_Dendiag){
3186  if(seg.GetDendiag().size() > 1){
3187  //has more than one dendiag. Need to seperate as each is
3188  //a distinct HSP
3189  ITERATE (CSeq_align::C_Segs::TDendiag, iterDendiag,
3190  seg.GetDendiag()){
3191  CRef<CSeq_align> aln(new CSeq_align);
3192  if((*iterDendiag)->IsSetScores()){
3193  aln->SetScore() = (*iterDendiag)->GetScores();
3194  }
3195  aln->SetSegs().SetDendiag().push_back(*iterDendiag);
3196  if((*iter)->IsSetType() && (*iter)->CanGetType()){
3197  aln->SetType((*iter)->GetType());
3198  }
3199 
3200  alnSetRef->Set().push_back(aln);
3201  }
3202 
3203  } else {
3204  alnSetRef->Set().push_back(*iter);
3205  }
3206  } else { //Denseg, doing nothing.
3207 
3208  alnSetRef->Set().push_back(*iter);
3209  }
3210  }
3211 
3212  return alnSetRef;
3213 }
3214 
3215 
3219  CRef<CSeq_align_set> alnSetRef(new CSeq_align_set);
3220 
3221  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()){
3222  const CSeq_align::TSegs& seg = (*iter)->GetSegs();
3223  if(seg.Which() == CSeq_align::C_Segs::e_Std){
3224  ITERATE (CSeq_align::C_Segs::TStd, iterStdseg, seg.GetStd()){
3225  CRef<CSeq_align> aln(new CSeq_align);
3226  if((*iterStdseg)->IsSetScores()){
3227  aln->SetScore() = (*iterStdseg)->GetScores();
3228  }
3229  aln->SetSegs().SetStd().push_back(*iterStdseg);
3230  alnSetRef->Set().push_back(aln);
3231  }
3232  } else if(seg.Which() == CSeq_align::C_Segs::e_Dendiag){
3233  ITERATE (CSeq_align::C_Segs::TDendiag, iterDendiag,
3234  seg.GetDendiag()){
3235  CRef<CSeq_align> aln(new CSeq_align);
3236  if((*iterDendiag)->IsSetScores()){
3237  aln->SetScore() = (*iterDendiag)->GetScores();
3238  }
3239  aln->SetSegs().SetDendiag().push_back(*iterDendiag);
3240  alnSetRef->Set().push_back(aln);
3241  }
3242  } else { //Denseg, doing nothing.
3243 
3244  alnSetRef->Set().push_back(*iter);
3245  }
3246  }
3247 
3248  return alnSetRef;
3249 }
3250 
3251 // this version will set aggregate scores
3255  CRef<CSeq_align_set> alnSetRef(new CSeq_align_set);
3256 
3257  NON_CONST_ITERATE(CSeq_align_set::Tdata, iter, alnset.Set()){
3258  bool first_align = true;
3259  CSeq_align::TSegs& seg = (*iter)->SetSegs();
3260  if(seg.Which() == CSeq_align::C_Segs::e_Std){
3261  ITERATE (CSeq_align::C_Segs::TStd, iterStdseg, seg.GetStd()){
3262  CRef<CSeq_align> aln(new CSeq_align);
3263  if((*iterStdseg)->IsSetScores()){
3264  aln->SetScore() = (*iterStdseg)->GetScores();
3265  if (first_align) {
3266  // add aggegate scores to first seg, which becomes first alignment for subject
3267  first_align = false;
3268  std::vector< CRef< CScore > >& scores_in = (*iter)->SetScore();
3269  NON_CONST_ITERATE (std::vector< CRef< CScore > >, it_in, scores_in){
3270  if ((*it_in)->IsSetId()) {
3271  CObject_id& score_id = (*it_in)->SetId();
3272  bool found = false;
3273  std::vector< CRef< CScore > >& scores_out = aln->SetScore();
3274  ITERATE (std::vector< CRef< CScore > >, it_out, scores_out){
3275  if ((*it_out)->IsSetId()) {
3276  if (score_id.Match ((*it_out)->GetId())) {
3277  found = true;
3278  }
3279  }
3280  }
3281  if (!found) {
3282  scores_out.push_back (*it_in);
3283  }
3284  }
3285  }
3286  }
3287  }
3288  aln->SetSegs().SetStd().push_back(*iterStdseg);
3289  alnSetRef->Set().push_back(aln);
3290  }
3291  } else if(seg.Which() == CSeq_align::C_Segs::e_Dendiag){
3292  ITERATE (CSeq_align::C_Segs::TDendiag, iterDendiag, seg.GetDendiag()){
3293  CRef<CSeq_align> aln(new CSeq_align);
3294  if((*iterDendiag)->IsSetScores()){
3295  aln->SetScore() = (*iterDendiag)->GetScores();
3296  if (first_align) {
3297  first_align = false;
3298  std::vector< CRef< CScore > >& scores_in = (*iter)->SetScore();
3299  NON_CONST_ITERATE (std::vector< CRef< CScore > >, it_in, scores_in){
3300  if ((*it_in)->IsSetId()) {
3301  CObject_id& score_id = (*it_in)->SetId();
3302  bool found = false;
3303  std::vector< CRef< CScore > >& scores_out = aln->SetScore();
3304  ITERATE (std::vector< CRef< CScore > >, it_out, scores_out){
3305  if ((*it_out)->IsSetId()) {
3306  if (score_id.Match ((*it_out)->GetId())) {
3307  found = true;
3308  }
3309  }
3310  }
3311  if (!found) {
3312  scores_out.push_back (*it_in);
3313  }
3314  }
3315  }
3316  }
3317  }
3318  aln->SetSegs().SetDendiag().push_back(*iterDendiag);
3319  alnSetRef->Set().push_back(aln);
3320  }
3321  } else { //Denseg, doing nothing.
3322 
3323  alnSetRef->Set().push_back(*iter);
3324  }
3325  }
3326 
3327  return alnSetRef;
3328 }
3329 
3330 
3331 
3332 string CDisplaySeqalign::x_GetGeneLinkUrl(int gene_id)
3334  string strGeneLinkUrl = CAlignFormatUtil::GetURLFromRegistry("GENE_INFO");
3336  (new char[strGeneLinkUrl.size() + 1024]);
3337  sprintf(buf.get(), strGeneLinkUrl.c_str(),
3338  gene_id,
3339  m_Rid.c_str(),
3340  m_IsDbNa ? "nucl" : "prot",
3341  m_cur_align);
3342  strGeneLinkUrl.assign(buf.get());
3343  return strGeneLinkUrl;
3344 }
3345 
3346 
3347 
3348 
3351  string query_buf;
3352  map< string, string> parameters_to_change;
3353  parameters_to_change.insert(map<string, string>::value_type("HSP_SORT", ""));
3354  CAlignFormatUtil::BuildFormatQueryString(*m_Ctx,parameters_to_change,query_buf);
3355  out << "\n";
3357  out << "Sort alignments for this subject sequence by:\n";
3359 
3360  string hsp_sort_value = m_Ctx->GetRequestValue("HSP_SORT").GetValue();
3361  int hsp_sort = hsp_sort_value == NcbiEmptyString ? 0 : NStr::StringToInt(hsp_sort_value);
3362 
3363  if (hsp_sort != CAlignFormatUtil::eEvalue) {
3364  out << "<a href=\"Blast.cgi?CMD=Get&" << query_buf
3365  << "&HSP_SORT="
3367  << "#" << id_label << "\">";
3368  }
3369 
3370  out << "E value";
3371  if (hsp_sort != CAlignFormatUtil::eEvalue) {
3372  out << "</a>";
3373  }
3374 
3376 
3377  if (hsp_sort != CAlignFormatUtil::eScore) {
3378  out << "<a href=\"Blast.cgi?CMD=Get&" << query_buf
3379  << "&HSP_SORT="
3381  << "#" << id_label << "\">";
3382  }
3383 
3384  out << "Score";
3385  if (hsp_sort != CAlignFormatUtil::eScore) {
3386  out << "</a>";
3387  }
3388 
3390 
3391  if (hsp_sort != CAlignFormatUtil::eHspPercentIdentity) {
3392  out << "<a href=\"Blast.cgi?CMD=Get&" << query_buf
3393  << "&HSP_SORT="
3395  << "#" << id_label << "\">";
3396  }
3397  out << "Percent identity";
3398  if (hsp_sort != CAlignFormatUtil::eHspPercentIdentity) {
3399  out << "</a>";
3400  }
3401  out << "\n";
3403  if (hsp_sort != CAlignFormatUtil::eQueryStart) {
3404  out << "<a href=\"Blast.cgi?CMD=Get&" << query_buf
3405  << "&HSP_SORT="
3407  << "#" << id_label << "\">";
3408  }
3409  out << "Query start position";
3410  if (hsp_sort != CAlignFormatUtil::eQueryStart) {
3411  out << "</a>";
3412  }
3414 
3415  if (hsp_sort != CAlignFormatUtil::eSubjectStart) {
3416  out << "<a href=\"Blast.cgi?CMD=Get&" << query_buf
3417  << "&HSP_SORT="
3419  << "#" << id_label << "\">";
3420  }
3421  out << "Subject start position";
3422  if (hsp_sort != CAlignFormatUtil::eSubjectStart) {
3423  out << "</a>";
3424  }
3425 
3426  out << "\n";
3427 }
3428 
3431  string alignSort = m_AlignTemplates->sortInfoTmpl;
3432  alignSort = CAlignFormatUtil::MapTemplate(alignSort,"id_label",m_CurrAlnID_DbLbl);
3433  alignSort = CAlignFormatUtil::MapTemplate(alignSort,"alnSeqGi",m_CurrAlnID_Lbl);
3434 
3435  string hsp_sort_value = m_Ctx ? m_Ctx->GetRequestValue("HSP_SORT").GetValue() : kEmptyStr;
3436  int hsp_sort = hsp_sort_value == NcbiEmptyString ? 0 : NStr::StringToInt(hsp_sort_value);
3437  for(int i = 0; i < 5; i++) {
3438  if(hsp_sort == i) {
3439  alignSort = CAlignFormatUtil::MapTemplate(alignSort,"sorted_" + NStr::IntToString(hsp_sort),"sortAlnArrowLinkW");
3440  }
3441  else {
3442  alignSort = CAlignFormatUtil::MapTemplate(alignSort,"sorted_" + NStr::IntToString(i),"");
3443  }
3444  }
3445  return alignSort;
3446 }
3447 
3450  const CBioseq_Handle& query_handle=m_AV->GetBioseqHandle(0);
3451  const CBioseq_Handle& subject_handle=m_AV->GetBioseqHandle(1);
3452  CSeq_id_Handle query_seqid = GetId(query_handle, eGetId_Best);
3453  CSeq_id_Handle subject_seqid = GetId(subject_handle, eGetId_Best);
3454  TGi query_gi = FindGi(query_handle.GetBioseqCore()->GetId());
3455  TGi subject_gi = FindGi(subject_handle.GetBioseqCore()->GetId());
3456 
3457  string url_link = CAlignFormatUtil::MapTemplate(kBl2seqUrl,"query",GI_TO(TIntId, query_gi));
3458  url_link = CAlignFormatUtil::MapTemplate(url_link,"subject", GI_TO(TIntId, subject_gi));
3459 
3460  out << url_link << "\n";
3461 }
3462 
3463 
3464 void CDisplaySeqalign::x_DisplayMpvAnchor(CNcbiOstream& out,SAlnInfo* aln_vec_info)
3466  //add id anchor for mapviewer link
3467  string type_temp = m_BlastType;
3468  type_temp = NStr::TruncateSpaces(NStr::ToLower(type_temp));
3469  if(m_AlignOption&eHtml &&
3470  (type_temp.find("genome") != string::npos ||
3471  type_temp == "mapview" ||
3472  type_temp == "mapview_prev" ||
3473  type_temp == "gsfasta" || type_temp == "gsfasta_prev")){
3474  string subj_id_str;
3475  char buffer[126];
3476  int master_start = m_AV->GetSeqStart(0) + 1;
3477  int master_stop = m_AV->GetSeqStop(0) + 1;
3478  int subject_start = m_AV->GetSeqStart(1) + 1;
3479  int subject_stop = m_AV->GetSeqStop(1) + 1;
3480 
3481  m_AV->GetSeqId(1).GetLabel(&subj_id_str, CSeq_id::eContent);
3482 
3483  sprintf(buffer, "<a name = %s_%d_%d_%d_%d_%d></a>",
3484  subj_id_str.c_str(), aln_vec_info->score,
3485  min(master_start, master_stop),
3486  max(master_start, master_stop),
3487  min(subject_start, subject_stop),
3488  max(subject_start, subject_stop));
3489 
3490  out << buffer << "\n";
3491  }
3492 }
3493 
3494 string CDisplaySeqalign::x_FormatAlnBlastInfo(SAlnInfo* aln_vec_info)
3496  string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3497  CAlignFormatUtil::GetScoreString(aln_vec_info->evalue,
3498  aln_vec_info->bits, 0, 0, evalue_buf,
3499  bit_score_buf, total_bit_buf, raw_score_buf);
3500 
3501  string alignParams = m_AlignTemplates->alignInfoTmpl;
3502 
3503  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_curr_num",NStr::IntToString(m_currAlignHsp + 1));
3504  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"alnSeqGi",m_CurrAlnID_Lbl);//not used now
3505 
3506  string hidePrevNaviagtion,hideNextNaviagtion, hideFirstNavigation;
3507  if(m_currAlignHsp == 0) {
3508  hidePrevNaviagtion = "disabled=\"disabled\"";
3509  hideFirstNavigation = "hidden";
3510  }
3511  if (m_currAlignHsp == m_TotalHSPNum - 1) {
3512  hideNextNaviagtion = "disabled=\"disabled\"";
3513  }
3514 
3515  const CRange<TSeqPos>& range = m_AV->GetSeqRange(1);
3516  TSeqPos from = (range.GetFrom()> range.GetTo()) ? range.GetTo() : range.GetFrom() + 1;
3517  TSeqPos to = (range.GetFrom()> range.GetTo()) ? range.GetFrom() : range.GetTo() + 1;
3518  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"fromHSP",from);
3519  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"toHSP",to);
3520 
3521  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_hide_prev",hidePrevNaviagtion);
3522  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_hide_next",hideNextNaviagtion);
3523  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_hide_fst",hideFirstNavigation);
3524  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"firstSeqID",m_CurrAlnAccession);//displays the first accession if multiple
3525  //current segment number = m_currAlignHsp + 1
3526  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_next_num",NStr::IntToString(m_currAlignHsp + 2));
3527  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_prev_num",NStr::IntToString(m_currAlignHsp));
3528 
3529 
3530  if (m_SeqalignSetRef->Get().front()->CanGetType() &&
3531  m_SeqalignSetRef->Get().front()->GetType() == CSeq_align_Base::eType_global)
3532  {
3533  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_score",aln_vec_info->score);
3534  }
3535  else
3536  {
3537  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_score",bit_score_buf);
3538  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_score_bits",aln_vec_info->score);
3539  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_eval",evalue_buf);
3540  if (aln_vec_info->sum_n > 0) {
3541  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_sumN",aln_vec_info->sum_n);
3542  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"sumNshow","shown");
3543  }
3544  else {
3545  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_sumN","");
3546  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"sumNshow","");
3547  }
3548 
3549  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_params_method",(aln_vec_info->comp_adj_method == 1 || aln_vec_info->comp_adj_method == 2) ? m_AlignTemplates->alignInfoMethodTmpl: "");
3550  if (aln_vec_info->comp_adj_method == 1){
3551  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth","Composition-based stats.");
3552  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth_hide","");//???? is that the same for all aligns???
3553  }
3554  else if (aln_vec_info->comp_adj_method == 2){
3555  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth","Compositional matrix adjust.");
3556  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth_hide","");//???? is that the same for all aligns???
3557  }
3558  else {
3559  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth_hide","hidden");//???? is that the same for all aligns???
3560  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_meth","");
3561  }
3562  }
3563  return alignParams;
3564 }
3565 //sumN - hidden, cbs_md - shown, aln_frame - hidden
3566 
3567 
3568 
3570  SAlnInfo* aln_vec_info)
3571 {
3572  string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3574  aln_vec_info->bits, 0, 0, evalue_buf,
3575  bit_score_buf, total_bit_buf, raw_score_buf);
3576 
3577  CRef<CSeq_align> first_aln = m_SeqalignSetRef->Get().front();
3578  if (m_SeqalignSetRef->Get().front()->CanGetType() &&
3579  m_SeqalignSetRef->Get().front()->GetType() == CSeq_align_Base::eType_global)
3580  {
3581  out<<" NW Score = "<< aln_vec_info->score;
3582  }
3583  else
3584  {
3585  // Disable bits score/evalue fields and only show raw
3586  // score for RMBlastN -RMH-
3588  {
3589  out<<" Score = "<<aln_vec_info->score<<"\n";
3590  }else
3591  {
3592  out<<" Score = "<<bit_score_buf<<" ";
3593  out<<"bits ("<<aln_vec_info->score<<"),"<<" ";
3594  out<<"Expect";
3595  if (aln_vec_info->sum_n > 0) {
3596  out << "(" << aln_vec_info->sum_n << ")";
3597  }
3598  out << " = " << evalue_buf;
3599  if (aln_vec_info->comp_adj_method == 1)
3600  out << ", Method: Composition-based stats.";
3601  else if (aln_vec_info->comp_adj_method == 2)
3602  out << ", Method: Compositional matrix adjust.";
3603  }
3604  }
3605  out << "\n";
3606 }
3607 
3608 //1. Display defline(s)
3609 //2. Display Gene info
3610 //3. Display Bl2Seq TBLASTX link
3611 //4. add id anchor for mapviewer link
3613  SAlnInfo* aln_vec_info,
3614  bool show_defline)
3615 {
3616  bool showSortControls = false;
3617  if(show_defline) {
3618  const CBioseq_Handle& bsp_handle=m_AV->GetBioseqHandle(1);
3621  //1. Display defline(s),Gene info
3622  string deflines = x_PrintDefLine(bsp_handle, aln_vec_info);
3623  out<< deflines;
3624  }
3625 
3628  //3. Display Bl2Seq TBLASTX link
3630  }
3631  out << "\n";
3632  }
3633  showSortControls = true;
3634  }
3636  //4. add id anchor for mapviewer link
3637  x_DisplayMpvAnchor(out,aln_vec_info);
3638  }
3639 
3640  //Displays sorting controls, features, Score, Expect, Idnt,Gaps,strand,positives,frames etc
3641  x_DisplaySingleAlignParams(out, aln_vec_info,showSortControls);
3642  x_DisplayRowData(aln_vec_info->alnRowInfo,out);
3643 }
3644 
3645 
3646 
3647 //fill one defline info, using <@ALN_DEFLINE_ROW@>
3648 string
3649 CDisplaySeqalign::x_MapDefLine(SAlnDispParams *alnDispParams,bool isFirst, bool linkout,bool hideDefline,int seqLength)
3651  /*
3652  string firstSeqClassInfo = (isFirst) ? "" : "hidden"; //hide ">" sign if not first seq align
3653  string alnDefLine = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alnDefLineTmpl,"alnSeqSt",firstSeqClassInfo);
3654  */
3655  string alnDefLine = m_AlignTemplates->alnDefLineTmpl;
3657 
3658  string alnGi = (m_AlignOption&eShowGi && alnDispParams->gi > ZERO_GI) ?
3659  "gi|" + NStr::NumericToString(alnDispParams->gi) + "|" : "";
3660  string seqid;
3661  if(!(alnDispParams->seqID->AsFastaString().find("gnl|BL_ORD_ID") != string::npos) ||
3662  alnDispParams->seqID->AsFastaString().find("lcl|Subject_") != string::npos){
3663  if (m_UseLongSeqIds) {
3664  seqid = alnDispParams->seqID->AsFastaString();
3665  }
3666  else {
3667  seqid = CAlignFormatUtil::GetBareId(*alnDispParams->seqID);
3668  }
3669  }
3670 
3671  if(alnDispParams->id_url != NcbiEmptyString) {
3672  string seqInfo = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alnSeqInfoTmpl,"aln_url",alnDispParams->id_url);
3673  string trgt = (m_AlignOption & eNewTargetWindow) ? "TARGET=\"EntrezView\"" : "";
3674 
3675  seqInfo = CAlignFormatUtil::MapTemplate(seqInfo,"aln_target",trgt);
3676  seqInfo = CAlignFormatUtil::MapTemplate(seqInfo,"aln_rid",m_Rid);
3677 
3678  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"seq_info",seqInfo);
3679  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"aln_gi",alnGi);
3680  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"aln_seqid",seqid);
3681  }
3682  else {
3683  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"seq_info",alnGi + seqid);
3684  }
3685  string hspNum,isFirstDflAttr;
3686  if(isFirst) {
3687  string totalHsps = m_Ctx ? m_Ctx->GetRequestValue("TOTAL_HSPS").GetValue() : kEmptyStr; //Future use
3688  m_TotalHSPNum = totalHsps.empty() ? m_AlnLinksParams[m_AV->GetSeqId(1).GetSeqIdString()].hspNumber : NStr::StringToInt(totalHsps);
3689  hspNum = (m_TotalHSPNum != 0) ? NStr::IntToString(m_TotalHSPNum) : "";
3690  }
3691  else {
3692  isFirstDflAttr = "hidden";
3693  }
3694  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"alnSeqLength", NStr::IntToString(seqLength));
3695  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"alnHspNum",hspNum);
3696  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"frstDfl",isFirstDflAttr);
3697  string alnIdLbl = (alnDispParams->gi != ZERO_GI) ?
3698  NStr::NumericToString(alnDispParams->gi) : alnDispParams->seqID->GetSeqIdString();
3699  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"alnIdLbl",alnIdLbl);
3700  string linkoutStr, dnldLinkStr;
3701  if (linkout) {
3702  linkoutStr = (!alnDispParams->linkoutStr.empty()) ? alnDispParams->linkoutStr : "";
3703  dnldLinkStr = alnDispParams->dumpGnlUrl;
3704  }
3705  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine ,"alnLinkout",linkoutStr);
3706  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine ,"dndlLinkt",dnldLinkStr);
3707  alnDefLine = CAlignFormatUtil::MapTemplate(alnDefLine,"alnTitle",CHTMLHelper::HTMLEncode(alnDispParams->title));
3708  return alnDefLine;
3709 }
3710 
3711 string
3712 CDisplaySeqalign::x_InitDefLinesHeader(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_vec_info)
3714  string deflines;
3715  string firstDefline;
3717  list<string>& use_this_seqid = aln_vec_info->use_this_seqid;
3718  if(bsp_handle){
3719  const CRef<CBlast_def_line_set> bdlRef = CSeqDB::ExtractBlastDefline(bsp_handle);
3720  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
3721  bool isFirst = true;
3722  TGi firstGi = ZERO_GI;
3723  m_NumBlastDefLines = 0;
3724  m_cur_align++;
3725  SAlnDispParams *alnDispParams;
3726  //fill length
3727  int seqLength = bsp_handle.GetBioseqLength();
3728  if(bdl.empty()){ //no blast defline struct, should be no such case now
3729  //actually not so fast...as we now fetch from entrez even when it's not in blast db
3730  //there is no blast defline in such case.
3731  alnDispParams = x_FillAlnDispParams(bsp_handle);
3732  string alnDefLine = x_MapDefLine(alnDispParams,isFirst,false,false,seqLength);
3733  m_CurrAlnID_Lbl = (alnDispParams->gi != ZERO_GI) ?
3734  NStr::NumericToString(alnDispParams->gi) : CAlignFormatUtil::GetLabel(alnDispParams->seqID);
3735 
3736  if (m_UseLongSeqIds || alnDispParams->seqID->IsLocal()) {
3737  m_CurrAlnAccession = alnDispParams->seqID->AsFastaString();
3738  }
3739  else {
3741  *alnDispParams->seqID);
3742  }
3744  x_InitAlignLinks(alnDispParams,bdl,eDisplayResourcesLinks);
3745  }
3746  delete alnDispParams;
3747  firstDefline = alnDefLine;
3749  } else {
3750  //format each defline
3751  int numBdl = 0;
3752  for(list< CRef< CBlast_def_line > >::const_iterator
3753  iter = bdl.begin(); iter != bdl.end(); iter++){
3754  alnDispParams = x_FillAlnDispParams(*iter,bsp_handle,use_this_seqid,firstGi,numBdl);
3755  if(alnDispParams) {
3756  numBdl++;
3757  bool hideDefline = (numBdl > 1)? true : false;
3758  string alnDefLine = x_MapDefLine(alnDispParams,isFirst,m_AlignOption&eLinkout,hideDefline,seqLength);
3759  if(isFirst){
3760  const CSeq_id& aln_id = m_AV->GetSeqId(1);
3761  TGi alnGi;
3762  CRef<CSeq_id> dispId = CAlignFormatUtil::GetDisplayIds(bsp_handle,aln_id,use_this_seqid,&alnGi);
3764  if(alnGi == ZERO_GI) {
3766  }
3767  else {
3769  }
3770 
3771  firstGi = alnGi;
3772 
3773  //This should probably change on dispId
3774  if (m_UseLongSeqIds) {
3776  alnDispParams->seqID->AsFastaString();
3777  }
3778  else {
3781  *alnDispParams->seqID);
3782  }
3783  if(m_CurrAlnAccession.find("gnl|BL_ORD_ID") != string::npos ||
3784  m_CurrAlnAccession.find("lcl|Subject_") != string::npos){
3785  ///Get first token of the title
3786  vector <string> parts;
3787  NStr::Split(alnDispParams->title," ",parts);
3788  if(parts.size() > 0) {
3789  m_CurrAlnAccession = parts[0];
3790  }
3791  }
3792  }
3793  //1. isFirst && firstGi == ZERO_GI - covers resource links for non-gis databases
3794  //2. alnDispParams->gi == firstGi - covers resource links for gi databases/
3795  if( (isFirst && firstGi == ZERO_GI) || (alnDispParams->gi == firstGi && firstGi != ZERO_GI) ) {
3796  //Get custom links only for the first gi
3797  int linksDisplayOption = eDisplayResourcesLinks;
3798  if(seqLength > k_GetSubseqThreshhold) {
3799  linksDisplayOption += eDisplayDownloadLink;
3800  }
3801  x_InitAlignLinks(alnDispParams,bdl,linksDisplayOption);
3802  firstDefline = alnDefLine;
3803  }
3804  else {
3805  deflines += alnDefLine; //this contains all deflines except the first one
3806  }
3807  if(isFirst) {
3808  isFirst = false;
3809  }
3810  if(m_AlignTemplates->alnTitlesTmpl.empty() && !firstDefline.empty()) {
3811  m_NumBlastDefLines = 1;
3812  break;
3813  }
3814 
3815  delete alnDispParams;
3816  }
3817  }
3818  m_NumBlastDefLines = numBdl;
3819  }
3820  if(m_NumBlastDefLines == 1) {
3821  deflines = firstDefline;
3822  }
3823  else {
3824  string alnTitles = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alnTitlesTmpl,"seqTitles",deflines);
3826  alnTitleslnk = CAlignFormatUtil::MapTemplate(alnTitleslnk,"allTitleNum",NStr::IntToString(m_NumBlastDefLines));
3827  alnTitleslnk = CAlignFormatUtil::MapTemplate(alnTitleslnk,"acc",m_CurrAlnAccession);
3828  alnTitleslnk = CAlignFormatUtil::MapTemplate(alnTitleslnk,"rid",m_Rid);
3829 
3830 
3831  deflines = firstDefline + alnTitleslnk + alnTitles;
3832  }
3833  }
3834  return deflines;
3835 }
3836 
3837 
3838 
3839 string
3840 CDisplaySeqalign::x_FormatDefLinesHeader(const CBioseq_Handle& bsp_handle,SAlnInfo* aln_vec_info)
3843  string deflines, linkOutStr,customLinkStr;
3844  list<string> linkoutStr;
3845 
3846  m_CurrAlnID_DbLbl = "";
3847  if(bsp_handle){
3848  deflines = x_InitDefLinesHeader(bsp_handle,aln_vec_info);
3849 
3850  if(m_CustomLinksList.size() > 0) {
3851  ITERATE(list<string>, iter_custList, m_CustomLinksList){
3852  customLinkStr += *iter_custList;
3853  }
3854  }
3855  if(m_LinkoutList.size() > 0) {
3856  ITERATE(list<string>, iter_List, m_LinkoutList){
3857  linkOutStr += *iter_List;
3858  }
3859  }
3860  }
3861  //fill deflines
3862  string alignInfo = CAlignFormatUtil::MapTemplate(m_AlignTemplates->alignHeaderTmpl,"aln_deflines",deflines);
3863 
3864  //fill multiple titles - not used now
3866  string alnSeqTitlesShow = (m_NumBlastDefLines > k_MaxDeflinesToShow) ? "" : "hidden";
3867  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqTitlesNum", NStr::IntToString(alnSeqTitlesNum));
3868  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqTitlesShow",alnSeqTitlesShow);
3869 
3870 
3871 
3872  //fill sequence checkbox
3873  string seqRetrieval = ((m_AlignOption&eSequenceRetrieval) && m_CanRetrieveSeq) ? "" : "hidden";
3874  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqGi",m_CurrAlnID_Lbl);
3875  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnQueryNum",NStr::IntToString(m_QueryNumber));
3876  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnSeqRet",seqRetrieval);
3877 
3878 
3879  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnLinkOutLinks",linkOutStr);
3880  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnCustomLinks",customLinkStr);
3881  //fill id info
3882  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"firstSeqID",m_CurrAlnAccession);
3883 
3884  string isGenbankAttr = (NStr::Find(customLinkStr,"GenBank") == NPOS && NStr::Find(customLinkStr,"GenPept") == NPOS)? "hidden" : "";
3885  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"dwGnbn",isGenbankAttr);
3886 
3887  string hideDndl = (m_BlastType == "sra")? "hidden":"";
3888  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"hideDndl",hideDndl);
3889  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"aln_ord_pos",NStr::IntToString(m_cur_align));
3890 
3891  //The next two lines are not used for now
3892  //alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnFASTA",m_FASTAlinkUrl);
3893  //alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnRegFASTA",m_AlignedRegionsUrl);
3894 
3895  //fill sort info
3896  string sortInfo;
3897  if(m_TotalHSPNum > 1) {
3898  //3. Display sort info
3899  sortInfo = x_FormatAlignSortInfo();
3900  }
3901  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"sortInfo",sortInfo);
3902 
3903  return alignInfo;
3904 }
3905 
3906 
3907 
3908 
3909 //1. Display defline(s)
3910 //2. Display Gene info
3911 //3. Display Bl2Seq TBLASTX link
3913  SAlnInfo* aln_vec_info,
3914  bool show_defline)
3915 {
3916  string alignHeader;
3917  string sortOneAln = m_Ctx ? m_Ctx->GetRequestValue("SORT_ONE_ALN").GetValue() : kEmptyStr;
3918  if(show_defline) {
3919  const CBioseq_Handle& bsp_handle=m_AV->GetBioseqHandle(1);
3920  //1. Display defline(s),Gene info
3921  string alignHeader = x_FormatDefLinesHeader(bsp_handle, aln_vec_info);
3922  if(sortOneAln.empty()) {
3923 
3924  out<< alignHeader;
3926  //3. Display Bl2Seq TBLASTX link
3928  }
3929 
3930  }
3931  //start counting hsp
3932  string currHsp = m_Ctx ? m_Ctx->GetRequestValue("HSP_START").GetValue() : kEmptyStr;
3933  m_currAlignHsp = currHsp.empty() ? 0: NStr::StringToInt(currHsp);
3934  }
3936  //4. add id anchor for mapviewer link
3937  x_DisplayMpvAnchor(out,aln_vec_info);
3938  }
3939 
3940  //Displays sorting controls, features, Score, Expect, Idnt,Gaps,strand,positives,frames etc
3941  string alignInfo = x_FormatSingleAlign(aln_vec_info);
3942  out << alignInfo;
3943 }
3944 
3946  SAlnInfo* aln_vec_info,
3947  bool show_defline)
3948 {
3949 
3950  m_AV = aln_vec_info->alnvec;
3951  //Calculate Dynamic Features in aln_vec_info
3952  x_PrepareDynamicFeatureInfo(aln_vec_info);
3953  //Calculate row data for actual alignment display
3954  aln_vec_info->alnRowInfo = x_PrepareRowData();
3955 
3956  //Calculate indentity data in aln_vec_info
3958  x_PrepareIdentityInfo(aln_vec_info);
3959  }
3960  if(!m_AlignTemplates) {
3961  x_ShowAlnvecInfo(out,aln_vec_info,show_defline);
3962  }
3963  else {
3964  x_ShowAlnvecInfoTemplate(out,aln_vec_info,show_defline);
3965  }
3966 
3967  delete aln_vec_info->alnRowInfo;
3968 
3969  out<<"\n";
3970 }
3971 
3972 
3973 //Displays features, Score Expect, Idnt,Gaps,strand
3975  SAlnInfo* aln_vec_info,
3976  bool showSortControls)
3977 {
3979 
3980  if(showSortControls && m_AlignOption&eHtml &&
3981  m_AlnLinksParams[m_AV->GetSeqId(1).GetSeqIdString()].hspNumber > 1 &&
3983  //3. Display sort info
3984  x_DisplayAlignSortInfo(out,aln_vec_info->id_label);
3985  }
3986 
3987  //output dynamic feature lines
3988  if(aln_vec_info->feat_list.size() > 0 || aln_vec_info->feat5 || aln_vec_info->feat3 ){
3989  //6. Display Dynamic Features
3990  x_PrintDynamicFeatures(out,aln_vec_info);
3991  }
3992 
3993  //7. Display score,bits,expect,method
3994  x_DisplayAlignInfo(out,aln_vec_info);
3995  }
3996 
3998  //8.Display Identities,positives,strand, frames etc
3999  //x_DisplayIdentityInfo(aln_vec_info->alnRowInfo, out);
4001  (int)m_AV->GetAlnStop(),
4002  aln_vec_info->identity,
4003  aln_vec_info->positive,
4004  aln_vec_info->match,
4005  aln_vec_info->gap,
4006  m_AV->StrandSign(0),
4007  m_AV->StrandSign(1),
4008  aln_vec_info->alnRowInfo->frame[0],
4009  aln_vec_info->alnRowInfo->frame[1],
4010  ((m_AlignType & eProt) != 0 ? true : false));
4011  }
4012 }
4013 
4014 //<div class="dflLnk hsp <@multiHSP@>"><label>Range <@fromHSP@> to <@toHSP@>:</label><@alnHSPLinks@></div>
4015 string CDisplaySeqalign:: x_FormatAlnHSPLinks(string &alignInfo)
4017 
4018  string hspLinks;
4019  if(m_HSPLinksList.size() > 0) {
4020  const CRange<TSeqPos>& range = m_AV->GetSeqRange(1);
4021  TSeqPos from = (range.GetFrom()> range.GetTo()) ? range.GetTo() : range.GetFrom() + 1;
4022  TSeqPos to = (range.GetFrom()> range.GetTo()) ? range.GetFrom() : range.GetTo() + 1;
4023 
4024  int addToRange = (int)((to - from) * 0.05);//add 5% to each side
4025  int fromAdjust = max(0,(int)from - addToRange);
4026  int toAdjust = to + addToRange;
4027  string customLinkStr;
4028  ITERATE(list<string>, iter_custList, m_HSPLinksList){
4029  string singleLink = CAlignFormatUtil::MapTemplate(*iter_custList,"from",fromAdjust);
4030  singleLink = CAlignFormatUtil::MapTemplate(singleLink,"to",toAdjust);
4031  singleLink = CAlignFormatUtil::MapTemplate(singleLink,"fromHSP",from);
4032  singleLink = CAlignFormatUtil::MapTemplate(singleLink,"toHSP",to);
4033  hspLinks += singleLink;
4034  }
4035  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"fromHSP",from);
4036  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"toHSP",to);
4037  }
4038  string multiHSP = (hspLinks.empty()) ? "hidden" : "" ;
4039 
4040 
4041  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"alnHSPLinks",hspLinks);
4042  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"multiHSP",multiHSP);
4043  alignInfo = CAlignFormatUtil::MapTemplate(alignInfo,"firstSeqID",m_CurrAlnAccession);
4044 
4045  return alignInfo;
4046 }
4047 
4048 //Displays features, Score Expect, Idnt,Gaps,strand
4049 string CDisplaySeqalign::x_FormatSingleAlign(SAlnInfo* aln_vec_info)
4051  string alignInfo;
4052 
4054 
4055  //7. Display score,bits,expect,method
4056  alignInfo = x_FormatAlnBlastInfo(aln_vec_info);
4057 
4058  //8.Display Identities,positives,strands, frames etc
4059  alignInfo = x_FormatIdentityInfo(alignInfo, aln_vec_info);
4060 
4061  //output dynamic feature lines
4062  //only for aln_vec_info->feat_list.size() > 0 || aln_vec_info->feat5 || aln_vec_info->feat3
4063  //6. Display Dynamic Features
4064  alignInfo = x_FormatDynamicFeaturesInfo(alignInfo, aln_vec_info);
4065  }
4066 
4067  alignInfo = (alignInfo.empty()) ? m_AlignTemplates->alignInfoTmpl : alignInfo;
4068  alignInfo = x_FormatAlnHSPLinks(alignInfo);
4069 
4070  m_currAlignHsp++;
4072 
4073  string alignRows = x_DisplayRowData(aln_vec_info->alnRowInfo);
4074  alignRows = CAlignFormatUtil::MapTemplate(alignRowsTemplate,"align_rows",alignRows);
4075  alignRows = CAlignFormatUtil::MapTemplate(alignRows,"aln_curr_num",NStr::IntToString(m_currAlignHsp));
4076  alignRows = CAlignFormatUtil::MapTemplate(alignRows,"alnSeqGi",m_CurrAlnID_Lbl);
4077 
4078  alignInfo += alignRows;
4079  return alignInfo;
4080 }
4081 
4082 
4083 
4084 void CDisplaySeqalign::x_PrepareDynamicFeatureInfo(SAlnInfo* aln_vec_info)
4086  aln_vec_info->feat5 = NULL;
4087  aln_vec_info->feat3 = NULL;
4088  aln_vec_info->feat_list.clear();
4089  //Calculate Dynamic Features in aln_vec_info
4091  && (int)m_AV->GetBioseqHandle(1).GetBioseqLength()
4093  if(m_DynamicFeature){
4094  const CRange<TSeqPos>& range = m_AV->GetSeqRange(1);
4095  aln_vec_info->actual_range = range;
4096  if(range.GetFrom() > range.GetTo()){
4097  aln_vec_info->actual_range.Set(range.GetTo(), range.GetFrom());
4098  }
4099  string id_str;
4100  const CBioseq_Handle& subject_handle=m_AV->GetBioseqHandle(1);
4102  wid->GetLabel(&id_str, CSeq_id::eBoth);
4103  aln_vec_info->subject_gi = FindGi(subject_handle.GetBioseqCore()->GetId());
4104  aln_vec_info->feat_list = m_DynamicFeature->GetFeatInfo(id_str, aln_vec_info->actual_range, aln_vec_info->feat5, aln_vec_info->feat3, 2);
4105  }
4106  }
4107 }
4108 
4109 static string s_MapFeatureURL(string viewerURL,
4110  string textSeqID,
4111  string db,
4112  int fromRange,
4113  int toRange,
4114  string rid)
4115 {
4116  string url_link = CAlignFormatUtil::MapTemplate(viewerURL,"db",db);
4117  url_link = CAlignFormatUtil::MapTemplate(url_link,"gi",textSeqID);
4118  url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",rid);
4119  url_link = CAlignFormatUtil::MapTemplate(url_link,"from",fromRange);
4120  url_link = CAlignFormatUtil::MapTemplate(url_link,"to",toRange);
4121  return url_link;
4122 }
4123 
4124 string CDisplaySeqalign::x_FormatOneDynamicFeature(string viewerURL,
4125  TGi subject_gi,
4126  int fromRange,
4127  int toRange,
4128  string featText)
4129 {
4130  string alignFeature = m_AlignTemplates->alignFeatureTmpl;
4131  string textSeqID;
4132 
4133  if(subject_gi > ZERO_GI) {
4134  //if(CAlignFormatUtil::GetTextSeqID((CConstRef<CSeq_id>)&m_AV->GetSeqId(1))) {
4135  alignFeature = CAlignFormatUtil::MapTemplate(alignFeature,"aln_feat_info",m_AlignTemplates->alignFeatureLinkTmpl);
4136  string url = s_MapFeatureURL(viewerURL,
4138  string(m_IsDbNa ? "nucleotide" : "protein"),
4139  fromRange + 1,
4140  toRange + 1,
4141  m_Rid);
4142  alignFeature = CAlignFormatUtil::MapTemplate(alignFeature,"aln_feat_url",url);
4143  alignFeature = CAlignFormatUtil::MapTemplate(alignFeature,"aln_feat",featText);
4144  }
4145  else {
4146  alignFeature = CAlignFormatUtil::MapTemplate(alignFeature,"aln_feat_info",featText);
4147  }
4148  return alignFeature;
4149 }
4150 
4151 
4152 //6. Display Dynamic Features
4153 string CDisplaySeqalign::x_FormatDynamicFeaturesInfo(string alignInfo, SAlnInfo* aln_vec_info)
4155  string alignParams = alignInfo;
4156  //string alignFeature = m_AlignTemplates->alignFeatureTmpl;
4157 
4158 
4159  string viewerURL = CAlignFormatUtil::GetURLFromRegistry("ENTREZ_SUBSEQ_TM");
4160 
4161  string allAlnFeatures = "";
4162  if(aln_vec_info->feat_list.size() > 0) { //has feature in this range
4163  ITERATE(vector<SFeatInfo*>, iter, aln_vec_info->feat_list){
4164 
4165  string alignFeature = x_FormatOneDynamicFeature(viewerURL,
4166  aln_vec_info->subject_gi,
4167  (*iter)->range.GetFrom(),
4168  (*iter)->range.GetTo(),
4169  (*iter)->feat_str);
4170 
4171  ///TO DO: NO hyperlink if aln_vec_info->subject_gi == 0
4172 
4173  allAlnFeatures += alignFeature;
4174  }
4175  } else { //show flank features
4176  if(aln_vec_info->feat5 || aln_vec_info->feat3){
4177  //TO DO: Check if we need that
4178  //out << " Features flanking this part of subject sequence:" << "\n";
4179  }
4180  if(aln_vec_info->feat5){
4181  string alignFeature = x_FormatOneDynamicFeature(viewerURL,
4182  aln_vec_info->subject_gi,
4183  aln_vec_info->feat5->range.GetFrom(),
4184  aln_vec_info->feat5->range.GetTo(),
4185  NStr::IntToString(aln_vec_info->actual_range.GetFrom() - aln_vec_info->feat5->range.GetTo()) + (string)" bp at 5' side: " + aln_vec_info->feat5->feat_str);
4186  allAlnFeatures += alignFeature;
4187  }
4188  if(aln_vec_info->feat3){
4189 
4190  string alignFeature = x_FormatOneDynamicFeature(viewerURL,
4191  aln_vec_info->subject_gi,
4192  aln_vec_info->feat3->range.GetFrom(),
4193  aln_vec_info->feat3->range.GetTo(),
4194  NStr::IntToString(aln_vec_info->feat3->range.GetFrom() - aln_vec_info->actual_range.GetTo()) + (string)" bp at 3' side: " + aln_vec_info->feat3->feat_str);
4195  allAlnFeatures += alignFeature;
4196  }
4197  }
4198  if(!allAlnFeatures.empty()) {
4199  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"all_aln_features",allAlnFeatures);
4200  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_feat_show","");
4201  }
4202  else {
4203  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"all_aln_features","");
4204  alignParams = CAlignFormatUtil::MapTemplate(alignParams,"aln_feat_show","hidden");
4205  }
4206  return alignParams;
4207 }
4208 
4209 void CDisplaySeqalign::x_PrintDynamicFeatures(CNcbiOstream& out,SAlnInfo* aln_vec_info)
4211  string l_EntrezSubseqUrl = CAlignFormatUtil::GetURLFromRegistry("ENTREZ_SUBSEQ");
4212 
4213  if(aln_vec_info->feat_list.size() > 0) { //has feature in this range
4214  out << " Features in this part of subject sequence:" << "\n";
4215  ITERATE(vector<SFeatInfo*>, iter, aln_vec_info->feat_list){
4216  out << " ";
4217  if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){
4218  string featStr = s_MapFeatureURL(l_EntrezSubseqUrl,
4219  NStr::NumericToString(aln_vec_info->subject_gi),
4220  m_IsDbNa ? "nucleotide" : "protein",
4221  (*iter)->range.GetFrom() +1 ,
4222  (*iter)->range.GetTo() + 1,
4223  m_Rid);
4224  out << featStr;
4225  }
4226  out << (*iter)->feat_str;
4227  if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){
4228  out << "</a>";
4229  }
4230  out << "\n";
4231  }
4232  } else { //show flank features
4233  if(aln_vec_info->feat5 || aln_vec_info->feat3){
4234  out << " Features flanking this part of subject sequence:" << "\n";
4235  }
4236  if(aln_vec_info->feat5){
4237  out << " ";
4238  if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){
4239  string featStr = s_MapFeatureURL(l_EntrezSubseqUrl,
4240  NStr::NumericToString(aln_vec_info->subject_gi),
4241  m_IsDbNa ? "nucleotide" : "protein",
4242  aln_vec_info->feat5->range.GetFrom() + 1 ,
4243  aln_vec_info->feat5->range.GetTo() + 1,
4244  m_Rid);
4245 
4246  out << featStr;
4247  }
4248  out << aln_vec_info->actual_range.GetFrom() - aln_vec_info->feat5->range.GetTo()
4249  << " bp at 5' side: " << aln_vec_info->feat5->feat_str;
4250  if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){
4251  out << "</a>";
4252  }
4253  out << "\n";
4254  }
4255  if(aln_vec_info->feat3){
4256  out << " ";
4257  if(m_AlignOption&eHtml && aln_vec_info->subject_gi > ZERO_GI){
4258  string featStr = s_MapFeatureURL(l_EntrezSubseqUrl,
4259  NStr::NumericToString(aln_vec_info->subject_gi),
4260  m_IsDbNa ? "nucleotide" : "protein",
4261  aln_vec_info->feat3->range.GetFrom() + 1 ,
4262  aln_vec_info->feat3->range.GetTo() + 1,
4263  m_Rid);
4264 
4265  out << featStr;
4266  }
4267  out << aln_vec_info->feat3->range.GetFrom() - aln_vec_info->actual_range.GetTo()
4268  << " bp at 3' side: " << aln_vec_info->feat3->feat_str;
4269  if(m_AlignOption&eHtml){
4270  out << "</a>";
4271  }
4272  out << "\n";
4273  }
4274  }
4275  if(aln_vec_info->feat_list.size() > 0 || aln_vec_info->feat5 || aln_vec_info->feat3 ){
4276  out << "\n";
4277  }
4278 }
4279 
4280 void
4281 CDisplaySeqalign::x_FillLocList(TSAlnSeqlocInfoList& loc_list,
4282  const list< CRef<CSeqLocInfo> >* masks) const
4283 {
4284  if ( !masks ) {
4285  return;
4286  }
4287 
4288  ITERATE(TMaskedQueryRegions, iter, *masks) {
4290  bool has_valid_loc = false;
4291  for (int i=0; i<m_AV->GetNumRows(); i++){
4292  const CSeq_interval& interval = (*iter)->GetInterval();
4293  TSeqRange loc_range(interval.GetFrom(), interval.GetTo());
4294  if(interval.GetId().Match(m_AV->GetSeqId(i)) &&
4295  m_AV->GetSeqRange(i).IntersectingWith(loc_range)){
4296  int actualAlnStart = 0, actualAlnStop = 0;
4297  if(m_AV->IsPositiveStrand(i)){
4298  actualAlnStart =
4299  m_AV->GetAlnPosFromSeqPos(i,
4300  interval.GetFrom(),
4301  CAlnMap::eBackwards, true);
4302  actualAlnStop =
4303  m_AV->GetAlnPosFromSeqPos(i,
4304  interval.GetTo(),
4305  CAlnMap::eBackwards, true);
4306  } else {
4307  actualAlnStart =
4308  m_AV->GetAlnPosFromSeqPos(i,
4309  interval.GetTo(),
4310  CAlnMap::eBackwards, true);
4311  actualAlnStop =
4312  m_AV->GetAlnPosFromSeqPos(i,
4313  interval.GetFrom(),
4314  CAlnMap::eBackwards, true);
4315  }
4316  alnloc->aln_range.Set(actualAlnStart, actualAlnStop);
4317  has_valid_loc = true;
4318  break;
4319  }
4320  }
4321  if (has_valid_loc) {
4322  alnloc->seqloc = *iter;
4323  loc_list.push_back(alnloc);
4324  }
4325  }
4326 }
4327 
4328 
4329 void
4330 CDisplaySeqalign::x_GetQueryFeatureList(int row_num, int aln_stop,
4331  vector<TSAlnFeatureInfoList>& retval)
4332  const
4333 {
4334  retval.clear();
4335  retval.resize(row_num);
4336  //list<SAlnFeatureInfo*>* bioseqFeature= new list<SAlnFeatureInfo*>[row_num];
4337  if(m_QueryFeature){
4338  for (list<FeatureInfo*>::iterator iter=m_QueryFeature->begin();
4339  iter!=m_QueryFeature->end(); iter++){
4340  for(int i = 0; i < row_num; i++){
4341  if((*iter)->seqloc->GetInt().GetId().Match(m_AV->GetSeqId(i))){
4342  int actualSeqStart = 0, actualSeqStop = 0;
4343  if(m_AV->IsPositiveStrand(i)){
4344  if((*iter)->seqloc->GetInt().GetFrom()
4345  < m_AV->GetSeqStart(i)){
4346  actualSeqStart = m_AV->GetSeqStart(i);
4347  } else {
4348  actualSeqStart = (*iter)->seqloc->GetInt().GetFrom();
4349  }
4350 
4351  if((*iter)->seqloc->GetInt().GetTo() >
4352  m_AV->GetSeqStop(i)){
4353  actualSeqStop = m_AV->GetSeqStop(i);
4354  } else {
4355  actualSeqStop = (*iter)->seqloc->GetInt().GetTo();
4356  }
4357  } else {
4358  if((*iter)->seqloc->GetInt().GetFrom()
4359  < m_AV->GetSeqStart(i)){
4360  actualSeqStart = (*iter)->seqloc->GetInt().GetFrom();
4361  } else {
4362  actualSeqStart = m_AV->GetSeqStart(i);
4363  }
4364 
4365  if((*iter)->seqloc->GetInt().GetTo() >
4366  m_AV->GetSeqStop(i)){
4367  actualSeqStop = (*iter)->seqloc->GetInt().GetTo();
4368  } else {
4369  actualSeqStop = m_AV->GetSeqStop(i);
4370  }
4371  }
4372  int alnFrom = m_AV->GetAlnPosFromSeqPos(i, actualSeqStart);
4373  int alnTo = m_AV->GetAlnPosFromSeqPos(i, actualSeqStop);
4374 
4375  CRef<SAlnFeatureInfo> featInfo(new SAlnFeatureInfo);
4376  string tempFeat = NcbiEmptyString;
4377  if (alnTo - alnFrom >= 0){
4378  x_SetFeatureInfo(featInfo, *((*iter)->seqloc), alnFrom,
4379  alnTo, aln_stop, (*iter)->feature_char,
4380  (*iter)->feature_id, tempFeat, -1);
4381  retval[i].push_back(featInfo);
4382  }
4383  }
4384  }
4385  }
4386  }
4387 }
4388 
4389 static void s_MakeDomainString(int aln_from, int aln_to, const string& domain_name,
4390  string& final_domain) {
4391 
4392  string domain_string(aln_to - aln_from + 1, ' ');
4393 
4394  if (domain_string.size() > 2){
4395 
4396  for (int i = 0; i < (int)domain_string.size(); i++){
4397  domain_string[i] = '-';
4398  }
4399  domain_string[0] = '<';
4400  domain_string[domain_string.size()-1] = '>';
4401  //put the domain name in the middle of the string
4402  int midpoint = ((int)domain_string.size())/2;
4403  int first_possible_pos = 1;
4404  int actual_first_pos = max(first_possible_pos, midpoint - ((int)domain_name.size())/2);
4405 
4406  for (SIZE_TYPE i = actual_first_pos, j = 0; i < domain_string.size() - 1 && j < domain_name.size(); i ++, j ++){
4407  domain_string[i] = domain_name[j];
4408  }
4409  }
4410 
4411  for (SIZE_TYPE i = 0; i < domain_string.size(); i++){
4412  final_domain[i + aln_from] = domain_string[i];
4413  }
4414 }
4415 
4416 void CDisplaySeqalign::x_GetDomainInfo(int row_num, int aln_stop,
4417  vector<TSAlnFeatureInfoList>& retval) const
4418 {
4419 
4420  if(m_DomainInfo && !m_DomainInfo->empty()){
4421  string final_domain (m_AV->GetAlnStop() + 1, ' ');
4422  int last_aln_to = m_AV->GetAlnStop();
4423  for (list<CRef<DomainInfo> >::iterator iter=m_DomainInfo->begin();
4424  iter!=m_DomainInfo->end(); iter++){
4425  if((*iter)->seqloc->GetInt().GetId().Match(m_AV->GetSeqId(0))){
4426  int actualSeqStart = 0, actualSeqStop = 0;
4427  if(m_AV->IsPositiveStrand(0)){ //only show domain on positive strand
4428  actualSeqStart = max((int)m_AV->GetSeqStart(0),
4429  (int)(*iter)->seqloc->GetInt().GetFrom());
4430 
4431  actualSeqStop = min((int)m_AV->GetSeqStop(0),
4432  (int)(*iter)->seqloc->GetInt().GetTo());
4433 
4434  int alnFrom = m_AV->GetAlnPosFromSeqPos(0, actualSeqStart);
4435  //check if there is gap between this and last seq position on master
4436  if (actualSeqStart > 0 && (*iter)->is_subject_start_valid) {
4437  if (alnFrom -
4438  m_AV->GetAlnPosFromSeqPos(0, actualSeqStart - 1) > 1) {
4439  //if so then use subject seq to get domain boundary
4440  int subj_aln_from = m_AV->GetAlnPosFromSeqPos(1,
4441  (int)(*iter)->subject_seqloc->GetInt().GetFrom());
4442  if (subj_aln_from >= 0) {
4443  alnFrom = subj_aln_from;
4444  }
4445  }
4446  }
4447 
4448  int alnTo = m_AV->GetAlnPosFromSeqPos(0, actualSeqStop);
4449  //check if there is gap between this and next seq position on master
4450  if (actualSeqStop < (int)m_AV->GetSeqStop(0) &&
4451  (*iter)->is_subject_stop_valid) {
4452  if (m_AV->GetAlnPosFromSeqPos(0, actualSeqStop + 1) - alnTo > 1) {
4453  //if so then use subject seq to get domain boundary
4454  int subj_aln_to = m_AV->GetAlnPosFromSeqPos(1,
4455  (int)(*iter)->subject_seqloc->GetInt().GetTo());
4456  if (subj_aln_to >= 0) {
4457  alnTo = subj_aln_to;
4458  }
4459  }
4460  }
4461  int actual_aln_from = min(alnFrom,last_aln_to +1);
4462  if (actual_aln_from > alnTo) {
4463  //domain is not correct, no showing
4464  return;
4465  }
4466  s_MakeDomainString(actual_aln_from, alnTo, (*iter)->domain_name, final_domain);
4467 
4468  last_aln_to = alnTo;
4469 
4470  }
4471  }
4472  }
4473  CRef<SAlnFeatureInfo> featInfo(new SAlnFeatureInfo);
4474  CRef<CSeq_loc> seqloc(new CSeq_loc((CSeq_loc::TId &) m_DomainInfo->front()->seqloc->GetInt().GetId(),
4475  (CSeq_loc::TPoint) 0,
4476  (CSeq_loc::TPoint) aln_stop));
4477  x_SetFeatureInfo(featInfo, *(seqloc), 0,
4478  aln_stop, aln_stop, ' ',
4479  " ", final_domain, -1);
4480  retval[0].push_back(featInfo);
4481  }
4482 }
4483 
4484 void CDisplaySeqalign::x_FillSeqid(string& id, int row) const
4486  static string kQuery("Query");
4487  static string kSubject("Sbjct");
4488 
4489 #ifdef CTOOLKIT_COMPATIBLE
4490  /* Facilitates comparing formatted output using diff */
4491  static bool value_set = false;
4492  if ( !value_set ) {
4493  if (getenv("CTOOLKIT_COMPATIBLE")) {
4494  kQuery.append(":");
4495  kSubject.append(":");
4496  }
4497  value_set = true;
4498  }
4499 #endif /* CTOOLKIT_COMPATIBLE */
4500 
4502  if(row==0){//query
4503  id=kQuery;
4504  } else {//hits
4505  if (!(m_AlignOption&eMergeAlign)){
4506  //hits for pairwise
4507  id=kSubject;
4508  } else {
4509  if(m_AlignOption&eShowGi){
4510  TGi gi = ZERO_GI;
4511  if(m_AV->GetSeqId(row).Which() == CSeq_id::e_Gi){
4512  gi = m_AV->GetSeqId(row).GetGi();
4513  }
4514  if(!(gi > ZERO_GI)){
4515  gi = CAlignFormatUtil::GetGiForSeqIdList(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId());
4516  }
4517  if(gi > ZERO_GI){
4518  id=NStr::NumericToString(gi);
4519  } else {
4520  const CRef<CSeq_id> wid
4521  = FindBestChoice(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId(),
4524  }
4525  } else {
4526  const CRef<CSeq_id> wid
4527  = FindBestChoice(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId(),
4530  }
4531  }
4532  }
4533  } else {
4534  if(m_AlignOption&eShowGi){
4535  TGi gi = ZERO_GI;
4536  if(m_AV->GetSeqId(row).Which() == CSeq_id::e_Gi){
4537  gi = m_AV->GetSeqId(row).GetGi();
4538  }
4539  if(!(gi > ZERO_GI)){
4540  gi = CAlignFormatUtil::GetGiForSeqIdList(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId());
4541  }
4542  if(gi > ZERO_GI){
4543  id=NStr::NumericToString(gi);
4544  } else {
4545  const CRef<CSeq_id> wid
4546  = FindBestChoice(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId(),
4549  }
4550  } else {
4551  const CRef<CSeq_id> wid
4552  = FindBestChoice(m_AV->GetBioseqHandle(row).\ GetBioseqCore()->GetId(),
4555  }
4556  }
4557 }
4558 
4559 
4561 {
4562  int num_align = 0;
4563  //get segs first and get hspNumber,segs and subjRange per sequence in alignment
4564  string toolUrl = NcbiEmptyString;
4565  if(m_AlignOption & eHtml){
4566  toolUrl = m_Reg->Get(m_BlastType, "TOOL_URL");
4567  }
4568  if( // Calculate m_AlnLinksParams->segs,hspNum, subjRange only for the following conditions
4569  (!(m_AlignOption & eMergeAlign) &&
4570  (toolUrl.find("dumpgnl.cgi") != string::npos
4571  || (m_AlignOption & eLinkout)
4573  /*need to construct segs for dumpgnl and
4574  get sub-sequence for long sequences*/
4575 
4576  for (CSeq_align_set::Tdata::const_iterator
4577  iter = actual_aln_list.Get().begin();
4578  iter != actual_aln_list.Get().end()
4579  && num_align<m_NumAlignToShow; iter++, num_align++) {
4580 
4581  CConstRef<CSeq_id> subid;
4582  subid = &((*iter)->GetSeq_id(1));
4583  string idString = subid->GetSeqIdString();
4584 
4585  x_CalcUrlLinksParams(**iter,idString,toolUrl);//sets m_AlnLinksParams->segs,hspNum, subjRange
4586  }
4587  }
4588 }
4589 
4590 
4591 
4592 void CDisplaySeqalign::x_CalcUrlLinksParams(const CSeq_align& align, string idString,string toolUrl)
4593 {
4594  //make alnvector
4595  CRef<CAlnVec> avRef = x_GetAlnVecForSeqalign(align);
4596 
4597  bool first = m_AlnLinksParams.count(idString) == 0;
4598  struct SAlnLinksParams *alnLinksParam = first ? new SAlnLinksParams : &m_AlnLinksParams[idString];
4600 
4601  if (toolUrl.find("dumpgnl.cgi") != string::npos || (m_AlignOption & eLinkout)) {
4602  if(!first){
4603  alnLinksParam->segs += ",";
4604  }
4605  alnLinksParam->segs += NStr::IntToString(avRef->GetSeqStart(1))
4606  + "-" +
4607  NStr::IntToString(avRef->GetSeqStop(1));
4608  }
4609 
4610 
4611  TSeqPos from = (avRef->GetSeqStart(1)> avRef->GetSeqStop(1)) ? avRef->GetSeqStop(1) : avRef->GetSeqStart(1);
4612  TSeqPos to = (avRef->GetSeqStart(1)> avRef->GetSeqStop(1)) ? avRef->GetSeqStart(1) : avRef->GetSeqStop(1);
4613  if(first) {
4614  alnLinksParam->subjRange = new CRange<TSeqPos>(from,to);
4615  alnLinksParam->flip = avRef->StrandSign(0) != avRef->StrandSign(1);
4616  }
4617  else{
4618  TSeqPos currFrom = alnLinksParam->subjRange->GetFrom();
4619  TSeqPos currTo = alnLinksParam->subjRange->GetTo();
4620  alnLinksParam->subjRange->SetFrom(min(from,currFrom));
4621  alnLinksParam->subjRange->SetTo(max(to,currTo));
4622  }
4623 
4624 
4626  alnLinksParam->hspNumber = (!first) ? alnLinksParam->hspNumber + 1 : 1;
4627  }
4628 
4629  if(first){
4630  m_AlnLinksParams.insert(map<string, struct SAlnLinksParams>::value_type(idString,*alnLinksParam));
4631  }
4632 }
4633 
4634 
4635 
4636 void CDisplaySeqalign::x_PreProcessSingleAlign(CSeq_align_set::Tdata::const_iterator currSeqAlignIter,
4637  CSeq_align_set &actual_aln_list,
4638  bool multipleSeqs)
4639 {
4640  CConstRef<CSeq_id> subid;
4641 
4642  string toolUrl;
4643  if(multipleSeqs && (m_AlignOption & eHtml)) {
4644  //actually this is needed for long sequences only
4645  toolUrl = m_Reg->Get(m_BlastType, "TOOL_URL");
4646  }
4647 
4648  string idString, prevIdString;
4649  for (CSeq_align_set::Tdata::const_iterator
4650  iter = currSeqAlignIter;
4651  iter != actual_aln_list.Get().end();iter++) {
4652 
4653  subid = &((*iter)->GetSeq_id(1));
4654  idString = subid->GetSeqIdString();
4655  if(prevIdString.empty() || prevIdString == idString) {
4656  x_CalcUrlLinksParams(**iter,idString,toolUrl);//sets m_AlnLinksParams->segs,hspNum, subjRange
4657  }
4658  else {
4659  break;
4660  }
4661  prevIdString = idString;
4662  }
4663 }
4664 
4665 
4666 void CDisplaySeqalign::DisplayPairwiseSeqalign(CNcbiOstream& out,unordered_set <string> selectedIDs) //(blast_rank = 1,2...)
4667 {
4668  string alignRows;
4669  unordered_set <string> :: const_iterator idsIter;
4670 
4671  CSeq_align_set actual_aln_list;
4672  //Not sure we need this - check with Jean
4674  *m_SeqalignSetRef);
4675  if (actual_aln_list.Get().empty()){
4676  return;
4677  }
4678  //scope for feature fetching
4679  //sets m_featScope, m_CanRetrieveSeq,m_DynamicFeature
4680  x_InitAlignParams(actual_aln_list);
4681 
4682  CConstRef<CSeq_id> previousId, subid;
4683 
4684  int idCount = 0;
4685  m_currAlignHsp = 0;
4686  bool showBlastDefline = false;
4687  for (CSeq_align_set::Tdata::const_iterator
4688  iter = actual_aln_list.Get().begin();
4689  iter != actual_aln_list.Get().end();iter++) {
4690 
4691  subid = &((*iter)->GetSeq_id(1));
4692 
4693 
4694  string currID;
4695  if(subid->Which() == CSeq_id::e_Gi) {
4696  TGi currGi = subid->GetGi();
4697  currID = NStr::NumericToString(currGi);
4698  }
4699  else {
4700  subid->GetLabel(&currID, CSeq_id::eContent);
4701  }
4702  idsIter = selectedIDs.find(currID);
4703 
4704  //seqid from seqalign not found in input seq list
4705  if(idsIter == selectedIDs.end() && idCount < (int)selectedIDs.size()) continue;
4706  if(idsIter == selectedIDs.end() && idCount >= (int)selectedIDs.size()) break;
4707 
4708  //reach here if currID from seqalign found in selectedIDs list
4709  if(previousId.Empty() ||
4710  !subid->Match(*previousId)){
4711  idCount++;
4712 
4713 
4714  //Calculates m_HSPNum for showing sorting links
4715  //If getSegs = true calculates m_segs for showing download chicklet for large seqs
4716  x_PreProcessSingleAlign(iter,actual_aln_list,selectedIDs.size() > 1);
4717  showBlastDefline = true;
4718 
4719  }
4720  else {
4721  showBlastDefline = false;
4722  }
4723 
4724  if(!previousId.Empty() &&
4725  !subid->Match(*previousId)){
4726  m_Scope.RemoveFromHistory(m_Scope.GetBioseqHandle(*previousId)); //release memory
4727  }
4728  previousId = subid;
4729  //make alnvector
4730  CRef<CAlnVec> avRef = x_GetAlnVecForSeqalign(**iter);
4731 
4732  if(!(avRef.Empty())){
4733  //Note: do not switch the set order per calnvec specs.
4735  avRef->SetGenCode(m_MasterGeneticCode, 0);
4736  try{
4737  const CBioseq_Handle& handle = avRef->GetBioseqHandle(1);
4738  if(handle){
4739  //save the current alnment regardless
4740  CRef<SAlnInfo> alnvecInfo(new SAlnInfo);
4741 
4742  int num_ident;
4744  alnvecInfo->score,
4745  alnvecInfo->bits,
4746  alnvecInfo->evalue,
4747  alnvecInfo->sum_n,
4748  num_ident,
4749  alnvecInfo->use_this_seqid,
4750  alnvecInfo->comp_adj_method);
4751 
4752  alnvecInfo->alnvec = avRef;
4753 
4754  x_DisplayAlnvecInfo(out,alnvecInfo,showBlastDefline);
4755  }
4756  } catch (const CException&){
4757  out << "Sequence with id "
4758  << (avRef->GetSeqId(1)).GetSeqIdString().c_str()
4759  <<" no longer exists in database...alignment skipped\n";
4760  }
4761  }
4762  }
4763 }
4764 
4765 END_SCOPE(align_format)
4767 
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define static
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
Sequence alignment display tool.
static const char kDownloadLink[]
static const char kLinkoutOrderStr[]
Default linkout order.
const int k_NumAsciiChar
Number of ASCII characters for populating matrix columns.
static const char kDownloadUrl[]
dumpgnl
static const char kDownloadImg[]
static const char kBl2seqUrl[]
static const char k_GetSeqSelectForm[]
static const char k_GetTreeViewForm[]
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
Definition: asci_blk.cpp:1685
#define BLAST_DEFAULT_MATRIX
Default matrix name: BLOSUM62.
Definition: blast_options.h:77
AutoPtr –.
Definition: ncbimisc.hpp:401
static string GetIDUrl(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL for seqid.
static void GetAsciiProteinMatrix(const char *matrix_name, CNcbiMatrix< int > &retval)
Retrieve a scoring matr