NCBI C++ ToolKit
align_format_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: align_format_util.cpp 102063 2024-03-25 14:32:04Z camacho $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jian Ye
27  * 12/2004
28  * File Description:
29  * blast formatter utilities
30  *
31  */
32 #include <ncbi_pch.hpp>
33 
34 #include <math.h> // For use of ceil
35 
37 
38 #include <corelib/ncbireg.hpp>
39 #include <corelib/ncbidiag.hpp>
40 #include <corelib/ncbistre.hpp>
41 #include <corelib/ncbiutil.hpp>
42 #include <corelib/ncbiobj.hpp>
43 #include <corelib/ncbifile.hpp>
44 #include <corelib/metareg.hpp>
45 #include <html/htmlhelper.hpp>
46 #include <cgi/cgictx.hpp>
48 
49 
57 #include <objects/seq/Seq_inst.hpp>
59 #include <objects/seq/Seqdesc.hpp>
60 #include <objmgr/seqdesc_ci.hpp>
63 
64 #include <objtools/blast/services/blast_services.hpp> // for CBlastServices
65 #include <objtools/blast/seqdb_reader/seqdb.hpp> // for CSeqDB
66 #include <objtools/blast/seqdb_reader/seqdbcommon.hpp> // for CSeqDBException
67 
72 
73 #include <stdio.h>
74 #include <sstream>
75 #include <iomanip>
76 
80 BEGIN_SCOPE(align_format)
81 
82 const char CAlignFormatUtil::kNoHitsFound[] = "No hits found";
83 
86 
87 const char k_PSymbol[ePMatrixSize+1] =
88 "ARNDCQEGHILKMFPSTWYVBZX";
89 
90 unique_ptr<CNcbiRegistry> CAlignFormatUtil::m_Reg;
93 
94 ///Get blast score information
95 ///@param scoreList: score container to extract score info from
96 ///@param score: place to extract the raw score to
97 ///@param bits: place to extract the bit score to
98 ///@param evalue: place to extract the e value to
99 ///@param sum_n: place to extract the sum_n to
100 ///@param num_ident: place to extract the num_ident to
101 ///@param use_this_gi: place to extract use_this_gi to
102 ///@return true if found score, false otherwise
103 ///
104 template<class container> bool
105 s_GetBlastScore(const container& scoreList,
106  int& score,
107  double& bits,
108  double& evalue,
109  int& sum_n,
110  int& num_ident,
111  list<TGi>& use_this_gi,
112  int& comp_adj_method)
113 {
114  const string k_GiPrefix = "gi:";
115  bool hasScore = false;
116  ITERATE (typename container, iter, scoreList) {
117  const CObject_id& id=(*iter)->GetId();
118  if (id.IsStr()) {
119  if (id.GetStr()=="score"){
120  score = (*iter)->GetValue().GetInt();
121  } else if (id.GetStr()=="bit_score"){
122  bits = (*iter)->GetValue().GetReal();
123  } else if (id.GetStr()=="e_value" || id.GetStr()=="sum_e") {
124  evalue = (*iter)->GetValue().GetReal();
125  hasScore = true;
126  } else if (id.GetStr()=="use_this_gi"){
127  Uint4 gi_v = (Uint4)((*iter)->GetValue().GetInt());
128  use_this_gi.push_back(GI_FROM(Uint4, gi_v));
129  } else if (id.GetStr()=="sum_n"){
130  sum_n = (*iter)->GetValue().GetInt();
131  } else if (id.GetStr()=="num_ident"){
132  num_ident = (*iter)->GetValue().GetInt();
133  } else if (id.GetStr()=="comp_adjustment_method") {
134  comp_adj_method = (*iter)->GetValue().GetInt();
135  }
136  else if(NStr::StartsWith(id.GetStr(),k_GiPrefix)) { //will be used when switch to 64bit GIs
137  string strGi = NStr::Replace(id.GetStr(),k_GiPrefix,"");
138  TGi gi = NStr::StringToNumeric<TGi>(strGi);
139  use_this_gi.push_back(gi);
140  }
141  }
142  }
143 
144  return hasScore;
145 }
146 
147 
148 ///Wrap a string to specified length. If break happens to be in
149 /// a word, it will extend the line length until the end of the word
150 ///@param str: input string
151 ///@param line_len: length of each line desired
152 ///@param out: stream to ouput
153 ///
154 void CAlignFormatUtil::x_WrapOutputLine(string str, size_t line_len,
155  CNcbiOstream& out, bool html)
156 {
157  list<string> string_l;
159  if (html) {
162  }
163  NStr::Wrap(str, line_len, string_l, flags);
164  list<string>::iterator iter = string_l.begin();
165  while(iter != string_l.end())
166  {
167  out << *iter;
168  out << "\n";
169  iter++;
170  }
171 }
172 
173 void CAlignFormatUtil::BlastPrintError(list<SBlastError>&
174  error_return,
175  bool error_post, CNcbiOstream& out)
176 {
177 
178  string errsevmsg[] = { "UNKNOWN","INFO","WARNING","ERROR",
179  "FATAL"};
180 
181  NON_CONST_ITERATE(list<SBlastError>, iter, error_return) {
182 
183  if(iter->level > 5){
184  iter->level = eDiag_Info;
185  }
186 
187  if(iter->level == 4){
188  iter->level = eDiag_Fatal;
189  } else{
190  iter->level = iter->level;
191  }
192 
193  if (error_post){
194  ERR_POST_EX(iter->level, 0, iter->message);
195  }
196  out << errsevmsg[iter->level] << ": " << iter->message << "\n";
197 
198  }
199 
200 }
201 
202 void CAlignFormatUtil::PrintTildeSepLines(string str, size_t line_len,
203  CNcbiOstream& out) {
204 
205  vector<string> split_line;
206  NStr::Split(str, "~", split_line);
207  ITERATE(vector<string>, iter, split_line) {
208  x_WrapOutputLine(*iter, line_len, out);
209  }
210 }
211 #ifdef DO_UNUSED
212 /// Initialize database statistics with data from BLAST servers
213 /// @param dbname name of a single BLAST database [in]
214 /// @param info structure to fill [in|out]
215 /// @return true if successfully filled, false otherwise (and a warning is
216 /// printed out)
217 static bool s_FillDbInfoRemotely(const string& dbname,
219 {
220  static CBlastServices rmt_blast_services;
222  blastdb->SetName(dbname);
223  blastdb->SetType() = info.is_protein
226  rmt_blast_services.GetDatabaseInfo(blastdb);
227 
228  info.name = dbname;
229  if ( !dbinfo ) {
230  return false;
231  }
232  info.definition = dbinfo->GetDescription();
233  if (info.definition.empty())
234  info.definition = info.name;
235  CTimeFormat tf("b d, Y H:m P", CTimeFormat::fFormat_Simple);
236  info.date = CTime(dbinfo->GetLast_updated()).AsString(tf);
237  info.total_length = dbinfo->GetTotal_length();
238  info.number_seqs = static_cast<int>(dbinfo->GetNum_sequences());
239  return true;
240 }
241 #endif
242 /// Initialize database statistics with data obtained from local BLAST
243 /// databases
244 /// @param dbname name of a single BLAST database [in]
245 /// @param info structure to fill [in|out]
246 /// @param dbfilt_algorithm filtering algorithm ID used for this search
247 /// [in]
248 /// @return true if successfully filled, false otherwise (and a warning is
249 /// printed out)
250 static bool
253  int dbfilt_algorithm)
254 {
255  CRef<CSeqDB> seqdb(new CSeqDB(dbname, info.is_protein
257  if ( !seqdb ) {
258  return false;
259  }
260  info.name = seqdb->GetDBNameList();
261  info.definition = seqdb->GetTitle();
262  if (info.definition.empty())
263  info.definition = info.name;
264  info.date = seqdb->GetDate();
265  info.total_length = seqdb->GetTotalLength();
266  info.number_seqs = seqdb->GetNumSeqs();
267 
268  // Process the filtering algorithm IDs
269  info.filt_algorithm_name.clear();
270  info.filt_algorithm_options.clear();
271  if (dbfilt_algorithm == -1) {
272  return true;
273  }
274 
275 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
276  (!defined(NCBI_COMPILER_MIPSPRO)) )
277  string filtering_algorithm;
278  seqdb->GetMaskAlgorithmDetails(dbfilt_algorithm,
279  filtering_algorithm,
280  info.filt_algorithm_name,
281  info.filt_algorithm_options);
282 #endif
283  return true;
284 }
285 
286 void
287 CAlignFormatUtil::FillScanModeBlastDbInfo(vector<CAlignFormatUtil::SDbInfo>& retval,
288  bool is_protein, int numSeqs, Int8 numLetters, string& tag)
289 {
290  retval.clear();
292  info.is_protein = is_protein;
293  if (tag == "")
294  info.definition = string("User specified sequence set.");
295  else
296  {
297  info.definition = string("User specified sequence set ") +
298  string("(Input: ") + tag + string(").");
299  }
300  info.number_seqs = numSeqs;
301  info.total_length = numLetters;
302  retval.push_back(info);
303 }
304 
305 void
306 CAlignFormatUtil::GetBlastDbInfo(vector<CAlignFormatUtil::SDbInfo>& retval,
307  const string& blastdb_names, bool is_protein,
308  int dbfilt_algorithm /* = -1 */,
309  bool is_remote /* = false */)
310 {
311  retval.clear();
312  if( is_remote ){
313  bool found_all = false;
314  static CBlastServices rmt_blast_services;
315  vector<string> missing_names;
316  vector< CRef<objects::CBlast4_database_info> > all_db_info =
317  rmt_blast_services.GetDatabaseInfo(blastdb_names,is_protein,&found_all,&missing_names);
318  if( !missing_names.empty() ){
319  string msg("'");
320  for(size_t ndx=0 ; ndx < missing_names.size(); ndx++){
321  msg += missing_names[ndx];
322  }
323  msg += string("' not found on NCBI servers.\n");
324  NCBI_THROW(CSeqDBException, eFileErr, msg);
325  }
326  for(size_t ndx=0 ; ndx < all_db_info.size(); ndx++){
328  objects::CBlast4_database_info &dbinfo = *all_db_info[ndx];
329  info.name = dbinfo.GetDatabase().GetName();
330  info.definition = dbinfo.GetDescription();
331  if (info.definition.empty())
332  info.definition = info.name;
333  CTimeFormat tf("b d, Y H:m P", CTimeFormat::fFormat_Simple);
334  info.date = CTime(dbinfo.GetLast_updated()).AsString(tf);
335  info.total_length = dbinfo.GetTotal_length();
336  info.number_seqs = static_cast<int>(dbinfo.GetNum_sequences());
337  if (info.total_length < 0) {
338  const string kDbName = NStr::TruncateSpaces(info.name);
339  if( ! s_FillDbInfoLocally(kDbName, info, dbfilt_algorithm) ){
340  string msg("'");
341  msg += kDbName;
342  msg += string("' has bad total length on NCBI servers.\n");
343  NCBI_THROW(CSeqDBException, eFileErr, msg);
344  }
345  }
346  retval.push_back(info);
347  }
348  return;
349  }
350  else{
351  vector<CTempString> dbs;
352  SeqDB_SplitQuoted(blastdb_names, dbs, true);
353  retval.reserve(dbs.size());
354 
355  ITERATE(vector<CTempString>, i, dbs) {
357  info.is_protein = is_protein;
358  bool success = false;
359  // Unsafe OK as kDbName only used in this loop.
360  const string kDbName = NStr::TruncateSpaces_Unsafe(*i);
361  if (kDbName.empty())
362  continue;
363 
364  success = s_FillDbInfoLocally(kDbName, info, dbfilt_algorithm);
365 
366  if (success) {
367  retval.push_back(info);
368  } else {
369  string msg("'");
370  msg += kDbName;
371  if (is_remote)
372  msg += string("' not found on NCBI servers.\n");
373  else
374  msg += string("' not found.\n");
375  NCBI_THROW(CSeqDBException, eFileErr, msg);
376  }
377  }
378  }
379 }
380 
381 void CAlignFormatUtil::PrintDbReport(const vector<SDbInfo>& dbinfo_list,
382  size_t line_length,
383  CNcbiOstream& out,
384  bool top)
385 {
386  if (top) {
387  const CAlignFormatUtil::SDbInfo* dbinfo = &(dbinfo_list.front());
388  out << "Database: ";
389 
390  string db_titles = dbinfo->definition;
391  Int8 tot_num_seqs = static_cast<Int8>(dbinfo->number_seqs);
392  Int8 tot_length = dbinfo->total_length;
393 
394  for (size_t i = 1; i < dbinfo_list.size(); i++) {
395  db_titles += "; " + dbinfo_list[i].definition;
396  tot_num_seqs += static_cast<Int8>(dbinfo_list[i].number_seqs);
397  tot_length += dbinfo_list[i].total_length;
398  }
399 
400  x_WrapOutputLine(db_titles, line_length, out);
401  if ( !dbinfo->filt_algorithm_name.empty() ) {
402  out << "Masked using: '" << dbinfo->filt_algorithm_name << "'";
403  if ( !dbinfo->filt_algorithm_options.empty() ) {
404  out << ", options: '" << dbinfo->filt_algorithm_options << "'";
405  }
406  out << endl;
407  }
409  out << NStr::Int8ToString(tot_num_seqs, NStr::fWithCommas) <<
410  " sequences; " <<
411  NStr::Int8ToString(tot_length, NStr::fWithCommas) <<
412  " total letters\n\n";
413  return;
414  }
415 
416  ITERATE(vector<SDbInfo>, dbinfo, dbinfo_list) {
417  if (dbinfo->subset == false) {
418  out << " Database: ";
419  x_WrapOutputLine(dbinfo->definition, line_length, out);
420 
421  if ( !dbinfo->filt_algorithm_name.empty() ) {
422  out << " Masked using: '" << dbinfo->filt_algorithm_name << "'";
423  if ( !dbinfo->filt_algorithm_options.empty() ) {
424  out << ", options: '" << dbinfo->filt_algorithm_options << "'";
425  }
426  out << endl;
427  }
428 
429  out << " Posted date: ";
430  out << dbinfo->date << "\n";
431 
432  out << " Number of letters in database: ";
433  out << NStr::Int8ToString(dbinfo->total_length,
434  NStr::fWithCommas) << "\n";
435  out << " Number of sequences in database: ";
436  out << NStr::IntToString(dbinfo->number_seqs,
437  NStr::fWithCommas) << "\n";
438 
439  } else {
440  out << " Subset of the database(s) listed below" << "\n";
441  out << " Number of letters searched: ";
442  out << NStr::Int8ToString(dbinfo->total_length,
443  NStr::fWithCommas) << "\n";
444  out << " Number of sequences searched: ";
445  out << NStr::IntToString(dbinfo->number_seqs,
446  NStr::fWithCommas) << "\n";
447  }
448  out << "\n";
449  }
450 
451 }
452 
453 void CAlignFormatUtil::PrintKAParameters(double lambda, double k, double h,
454  size_t line_len,
455  CNcbiOstream& out, bool gapped,
456  const Blast_GumbelBlk *gbp)
457 {
458 
459  char buffer[256];
460  if (gapped) {
461  out << "Gapped" << "\n";
462  }
463  out << "Lambda K H";
464  if (gbp) {
465  if (gapped) {
466  out << " a alpha sigma";
467  } else {
468  out << " a alpha";
469  }
470  }
471  out << "\n";
472  sprintf(buffer, "%#8.3g ", lambda);
473  out << buffer;
474  sprintf(buffer, "%#8.3g ", k);
475  out << buffer;
476  sprintf(buffer, "%#8.3g ", h);
477  out << buffer;
478  if (gbp) {
479  if (gapped) {
480  sprintf(buffer, "%#8.3g ", gbp->a);
481  out << buffer;
482  sprintf(buffer, "%#8.3g ", gbp->Alpha);
483  out << buffer;
484  sprintf(buffer, "%#8.3g ", gbp->Sigma);
485  out << buffer;
486  } else {
487  sprintf(buffer, "%#8.3g ", gbp->a_un);
488  out << buffer;
489  sprintf(buffer, "%#8.3g ", gbp->Alpha_un);
490  out << buffer;
491  }
492  //x_WrapOutputLine(buffer, line_len, out);
493  }
494  out << "\n";
495 }
496 
497 string
498 CAlignFormatUtil::GetSeqIdString(const CBioseq& cbs, bool believe_local_id)
499 {
500  const CBioseq::TId& ids = cbs.GetId();
501  return CAlignFormatUtil::GetSeqIdString(ids, believe_local_id);
502 }
503 
504 string
505 CAlignFormatUtil::GetSeqIdString(const list<CRef<CSeq_id> > & ids, bool believe_local_id)
506 {
507  string all_id_str = NcbiEmptyString;
509 
510  if (wid && (wid->Which()!= CSeq_id::e_Local || believe_local_id)){
511  TGi gi = FindGi(ids);
512 
513  bool use_long_seqids = false;
515  if (app) {
516  const CNcbiRegistry& registry = app->GetConfig();
517  use_long_seqids = (registry.Get("BLAST", "LONG_SEQID") == "1");
518  }
519  if (!use_long_seqids) {
520 
521  all_id_str = GetBareId(*wid);
522  }
523  else if (strncmp(wid->AsFastaString().c_str(), "lcl|", 4) == 0) {
524  if(gi == ZERO_GI){
525  all_id_str = wid->AsFastaString().substr(4);
526  } else {
527  all_id_str = "gi|" + NStr::NumericToString(gi) +
528  "|" + wid->AsFastaString().substr(4);
529  }
530  } else {
531  if(gi == ZERO_GI){
532  all_id_str = wid->AsFastaString();
533  } else {
534  all_id_str = "gi|" + NStr::NumericToString(gi) + "|" +
535  wid->AsFastaString();
536  }
537  }
538  }
539 
540  return all_id_str;
541 }
542 
543 string
545 {
546  string all_descr_str = NcbiEmptyString;
547 
548  if (cbs.IsSetDescr()) {
549  const CBioseq::TDescr& descr = cbs.GetDescr();
550  const CBioseq::TDescr::Tdata& data = descr.Get();
552  if((*iter)->IsTitle()) {
553  all_descr_str += (*iter)->GetTitle();
554  }
555  }
556  }
557  return all_descr_str;
558 }
559 
561  size_t line_len,
562  CNcbiOstream& out,
563  bool believe_query,
564  bool html,
565  bool tabular /* = false */,
566  const string& rid /* = kEmptyStr*/)
567 {
568  const string label("Query");
570  believe_query, html,
571  label, tabular, rid);
572 }
573 
574 void
576  size_t line_len,
577  CNcbiOstream& out,
578  bool believe_query,
579  bool html,
580  bool tabular /* = false */)
581 {
582  const string label("Subject");
584  believe_query, html,
585  label, tabular, kEmptyStr);
586 }
587 
588 void
590  size_t line_len,
591  CNcbiOstream& out,
592  bool believe_query,
593  bool html,
594  const string& label,
595  bool tabular /* = false */,
596  const string& rid /* = kEmptyStr*/)
597 {
598 
599  if (html) {
600  out << "<b>" << label << "=</b> ";
601  } else if (tabular) {
602  out << "# " << label << ": ";
603  } else {
604  out << label << "= ";
605  }
606 
607  string all_id_str = GetSeqIdString(cbs, believe_query);
608  all_id_str += " ";
609  all_id_str = NStr::TruncateSpaces(all_id_str + GetSeqDescrString(cbs));
610 
611  // For tabular output, there is no limit on the line length.
612  // There is also no extra line with the sequence length.
613  if (tabular) {
614  out << all_id_str;
615  } else {
616  x_WrapOutputLine(all_id_str, line_len, out, html);
617  if(cbs.IsSetInst() && cbs.GetInst().CanGetLength()){
618  out << "\nLength=";
619  out << cbs.GetInst().GetLength() <<"\n";
620  }
621  }
622 
623  if (rid != kEmptyStr) {
624  if (tabular) {
625  out << "\n" << "# RID: " << rid;
626  } else {
627  out << "\n" << "RID: " << rid << "\n";
628  }
629  }
630 }
631 
632 void CAlignFormatUtil::PrintPhiInfo(int num_patterns,
633  const string& pattern,
634  double prob,
635  vector<int>& offsets,
636  CNcbiOstream& out)
637 {
638  out << num_patterns << " occurrence(s) of pattern: " << "\n"
639  << pattern << " at position(s) ";
640 
641  bool first = true;
642  for (vector<int>::iterator it = offsets.begin();
643  it != offsets.end(); it++)
644  {
645  if (!first)
646  out << ", ";
647 
648  out << 1 + *it ;
649 
650  first = false;
651  }
652  out << " of query sequence" << "\n";
653  out << "pattern probability=" << prob << "\n";
654 
655 }
656 
658  int& score,
659  double& bits,
660  double& evalue,
661  int& sum_n,
662  int& num_ident,
663  list<TGi>& use_this_gi)
664 {
665  int comp_adj_method = 0; // dummy variable
666 
667  CAlignFormatUtil::GetAlnScores(aln, score, bits, evalue, sum_n,
668  num_ident, use_this_gi, comp_adj_method);
669 }
670 
672  int& score,
673  double& bits,
674  double& evalue,
675  int& sum_n,
676  int& num_ident,
677  list<string>& use_this_seq)
678 {
679  int comp_adj_method = 0; // dummy variable
680 
681  CAlignFormatUtil::GetAlnScores(aln, score, bits, evalue, sum_n,
682  num_ident, use_this_seq, comp_adj_method);
683 }
684 
685 
687  int& score,
688  double& bits,
689  double& evalue,
690  int& sum_n,
691  int& num_ident,
692  list<TGi>& use_this_gi,
693  int& comp_adj_method)
694 {
695  bool hasScore = false;
696  score = -1;
697  bits = -1;
698  evalue = -1;
699  sum_n = -1;
700  num_ident = -1;
701  comp_adj_method = 0;
702 
703  //look for scores at seqalign level first
704  hasScore = s_GetBlastScore(aln.GetScore(), score, bits, evalue,
705  sum_n, num_ident, use_this_gi, comp_adj_method);
706 
707  //look at the seg level
708  if(!hasScore){
709  const CSeq_align::TSegs& seg = aln.GetSegs();
710  if(seg.Which() == CSeq_align::C_Segs::e_Std){
711  s_GetBlastScore(seg.GetStd().front()->GetScores(),
712  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
713  } else if (seg.Which() == CSeq_align::C_Segs::e_Dendiag){
714  s_GetBlastScore(seg.GetDendiag().front()->GetScores(),
715  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
716  } else if (seg.Which() == CSeq_align::C_Segs::e_Denseg){
718  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
719  }
720  }
721  if(use_this_gi.size() == 0) {
722  GetUseThisSequence(aln,use_this_gi);
723  }
724 }
725 
726 //converts gi list to the list of gi:XXXXXXXX strings
727 static list<string> s_NumGiToStringGiList(list<TGi> use_this_gi)//for backward compatability
728 {
729  const string k_GiPrefix = "gi:";
730  list<string> use_this_seq;
731  ITERATE(list<TGi>, iter_gi, use_this_gi){
732  string strSeq = k_GiPrefix + NStr::NumericToString(*iter_gi);
733  use_this_seq.push_back(strSeq);
734  }
735  return use_this_seq;
736 }
737 
739  int& score,
740  double& bits,
741  double& evalue,
742  int& sum_n,
743  int& num_ident,
744  list<string>& use_this_seq,
745  int& comp_adj_method)
746 {
747  bool hasScore = false;
748  score = -1;
749  bits = -1;
750  evalue = -1;
751  sum_n = -1;
752  num_ident = -1;
753  comp_adj_method = 0;
754 
755  list<TGi> use_this_gi;
756  //look for scores at seqalign level first
757  hasScore = s_GetBlastScore(aln.GetScore(), score, bits, evalue,
758  sum_n, num_ident, use_this_gi, comp_adj_method);
759 
760  //look at the seg level
761  if(!hasScore){
762  const CSeq_align::TSegs& seg = aln.GetSegs();
763  if(seg.Which() == CSeq_align::C_Segs::e_Std){
764  s_GetBlastScore(seg.GetStd().front()->GetScores(),
765  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
766  } else if (seg.Which() == CSeq_align::C_Segs::e_Dendiag){
767  s_GetBlastScore(seg.GetDendiag().front()->GetScores(),
768  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
769  } else if (seg.Which() == CSeq_align::C_Segs::e_Denseg){
771  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
772  }
773  }
774  if(use_this_gi.size() == 0) {
775  GetUseThisSequence(aln,use_this_seq);
776  }
777  else {
778  use_this_seq = s_NumGiToStringGiList(use_this_gi);//for backward compatability
779  }
780 }
781 
783 {
784  string retval = NcbiEmptyString;
785 
786  if(dtg.GetTag().IsId())
787  retval = NStr::IntToString(dtg.GetTag().GetId());
788  else
789  retval = dtg.GetTag().GetStr();
790 
791  return retval;
792 }
793 
794 string CAlignFormatUtil::GetLabel(CConstRef<CSeq_id> id,bool with_version)
795 {
796  string retval = "";
797  if (id->Which() == CSeq_id::e_General){
798  const CDbtag& dtg = id->GetGeneral();
799  retval = CAlignFormatUtil::GetGnlID(dtg);
800  }
801  if (retval == "")
802  retval = id->GetSeqIdString(with_version);
803 
804  return retval;
805 }
806 
808 
809 {
810  for(auto i=0; i<number; i++){
811  out<<" ";
812  }
813 }
814 
816  double bit_score,
817  double total_bit_score,
818  int raw_score,
819  string& evalue_str,
820  string& bit_score_str,
821  string& total_bit_score_str,
822  string& raw_score_str)
823 {
824  char evalue_buf[100], bit_score_buf[100], total_bit_score_buf[100];
825 
826  /* Facilitates comparing formatted output using diff */
827  static string kBitScoreFormat("%4.1lf");
828 #ifdef CTOOLKIT_COMPATIBLE
829  static bool ctoolkit_compatible = false;
830  static bool value_set = false;
831  if ( !value_set ) {
832  if (getenv("CTOOLKIT_COMPATIBLE")) {
833  kBitScoreFormat.assign("%4.0lf");
834  ctoolkit_compatible = true;
835  }
836  value_set = true;
837  }
838 #endif /* CTOOLKIT_COMPATIBLE */
839 
840  if (evalue < 1.0e-180) {
841  snprintf(evalue_buf, sizeof(evalue_buf), "0.0");
842  } else if (evalue < 1.0e-99) {
843  snprintf(evalue_buf, sizeof(evalue_buf), "%2.0le", evalue);
844 #ifdef CTOOLKIT_COMPATIBLE
845  if (ctoolkit_compatible) {
846  strncpy(evalue_buf, evalue_buf+1, sizeof(evalue_buf-1));
847  }
848 #endif /* CTOOLKIT_COMPATIBLE */
849  } else if (evalue < 0.0009) {
850  snprintf(evalue_buf, sizeof(evalue_buf), "%3.0le", evalue);
851  } else if (evalue < 0.1) {
852  snprintf(evalue_buf, sizeof(evalue_buf), "%4.3lf", evalue);
853  } else if (evalue < 1.0) {
854  snprintf(evalue_buf, sizeof(evalue_buf), "%3.2lf", evalue);
855  } else if (evalue < 10.0) {
856  snprintf(evalue_buf, sizeof(evalue_buf), "%2.1lf", evalue);
857  } else {
858  snprintf(evalue_buf, sizeof(evalue_buf), "%2.0lf", evalue);
859  }
860 
861  if (bit_score > 99999){
862  snprintf(bit_score_buf, sizeof(bit_score_buf), "%5.3le", bit_score);
863  } else if (bit_score > 99.9){
864  snprintf(bit_score_buf, sizeof(bit_score_buf), "%3.0ld",
865  (long)bit_score);
866  } else {
867  snprintf(bit_score_buf, sizeof(bit_score_buf), kBitScoreFormat.c_str(),
868  bit_score);
869  }
870  if (total_bit_score > 99999){
871  snprintf(total_bit_score_buf, sizeof(total_bit_score_buf), "%5.3le",
872  total_bit_score);
873  } else if (total_bit_score > 99.9){
874  snprintf(total_bit_score_buf, sizeof(total_bit_score_buf), "%3.0ld",
875  (long)total_bit_score);
876  } else {
877  snprintf(total_bit_score_buf, sizeof(total_bit_score_buf), "%2.1lf",
878  total_bit_score);
879  }
880  evalue_str = evalue_buf;
881  bit_score_str = bit_score_buf;
882  total_bit_score_str = total_bit_score_buf;
883  if (raw_score <= 0)
884  raw_score = -1;
885  NStr::IntToString(raw_score_str, raw_score);
886 }
887 
888 
890  CSeq_align_set& new_aln,
891  unsigned int number)
892 {
893  CConstRef<CSeq_id> previous_id, subid;
894  bool is_first_aln = true;
895  unsigned int num_align = 0;
896  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
897 
898  if ((*iter)->GetSegs().IsDisc()) {
899  ++num_align;
900  } else {
901  subid = &((*iter)->GetSeq_id(1));
902  if(is_first_aln || (!is_first_aln && !subid->Match(*previous_id))){
903  ++num_align;
904  }
905 
906  if(num_align > number) {
907  break;
908  }
909 
910  is_first_aln = false;
911  previous_id = subid;
912  }
913  new_aln.Set().push_back(*iter);
914  }
915 }
916 
917 
919  unsigned int number)
920 {
921  CConstRef<CSeq_id> previous_id, subid;
922  bool is_first_aln = true;
923  unsigned int num_align = 0;
924  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
925 
926  if ((*iter)->GetSegs().IsDisc()) {
927  ++num_align;
928  } else {
929  subid = &((*iter)->GetSeq_id(1));
930  if(is_first_aln || (!is_first_aln && !subid->Match(*previous_id))){
931  ++num_align;
932  }
933 
934  if(num_align >= number) {
935  break;
936  }
937 
938  is_first_aln = false;
939  previous_id = subid;
940  }
941  }
942  return num_align;
943 }
944 
945 
947  CSeq_align_set& new_aln,
948  unsigned int number)
949 {
950  CConstRef<CSeq_id> previous_id, subid;
951  bool is_first_aln = true;
952  unsigned int num_align = 0;
953  bool finishCurrent = false;
954  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
955  if ((*iter)->GetSegs().IsDisc()) {
956  ++num_align;
957  } else {
958  subid = &((*iter)->GetSeq_id(1));
959  if(is_first_aln || (!is_first_aln && !subid->Match(*previous_id))){
960  finishCurrent = (num_align + 1 == number) ? true : false;
961  ++num_align;
962  }
963  is_first_aln = false;
964  previous_id = subid;
965  }
966  if(num_align > number && !finishCurrent) {
967  break;
968  }
969  new_aln.Set().push_back(*iter);
970  }
971 }
972 
973 
974 void
976  int& num_gaps, int& num_gap_opens)
977 {
978  num_gaps = num_gap_opens = align_length = 0;
979 
980  for (int row = 0; row < salv.GetNumRows(); row++) {
982  = salv.GetAlnChunks(row, salv.GetSeqAlnRange(0));
983  for (int i=0; i<chunk_vec->size(); i++) {
984  CConstRef<CAlnMap::CAlnChunk> chunk = (*chunk_vec)[i];
985  int chunk_length = chunk->GetAlnRange().GetLength();
986  // Gaps are counted on all rows: gap can only be in one of the rows
987  // for any given segment.
988  if (chunk->IsGap()) {
989  ++num_gap_opens;
990  num_gaps += chunk_length;
991  }
992  // To calculate alignment length, only one row is needed.
993  if (row == 0)
994  align_length += chunk_length;
995  }
996  }
997 }
998 
999 void
1001  const CSeq_align_set& source)
1002 {
1003  if (source.IsSet() && source.CanGet()) {
1004 
1005  for(CSeq_align_set::Tdata::const_iterator iter = source.Get().begin();
1006  iter != source.Get().end(); iter++) {
1007  if((*iter)->IsSetSegs()){
1008  const CSeq_align::TSegs& seg = (*iter)->GetSegs();
1009  if(seg.IsDisc()){
1010  const CSeq_align_set& set = seg.GetDisc();
1011  for(CSeq_align_set::Tdata::const_iterator iter2 =
1012  set.Get().begin(); iter2 != set.Get().end();
1013  iter2 ++) {
1014  target.Set().push_back(*iter2);
1015  }
1016  } else {
1017  target.Set().push_back(*iter);
1018  }
1019  }
1020  }
1021  }
1022 }
1023 
1026 {
1027  CRef<CSeq_align> sa(new CSeq_align);
1028  if ( !aln.GetSegs().IsDendiag()) {
1029  NCBI_THROW(CException, eUnknown, "Input Seq-align should be Dendiag!");
1030  }
1031 
1032  if(aln.IsSetType()){
1033  sa->SetType(aln.GetType());
1034  }
1035  if(aln.IsSetDim()){
1036  sa->SetDim(aln.GetDim());
1037  }
1038  if(aln.IsSetScore()){
1039  sa->SetScore() = aln.GetScore();
1040  }
1041  if(aln.IsSetBounds()){
1042  sa->SetBounds() = aln.GetBounds();
1043  }
1044 
1045  CDense_seg& ds = sa->SetSegs().SetDenseg();
1046 
1047  int counter = 0;
1048  ds.SetNumseg() = 0;
1050 
1051  if(counter == 0){//assume all dendiag segments have same dim and ids
1052  if((*iter)->IsSetDim()){
1053  ds.SetDim((*iter)->GetDim());
1054  }
1055  if((*iter)->IsSetIds()){
1056  ds.SetIds() = (*iter)->GetIds();
1057  }
1058  }
1059  ds.SetNumseg() ++;
1060  if((*iter)->IsSetStarts()){
1061  ITERATE(CDense_diag::TStarts, iterStarts, (*iter)->GetStarts()){
1062  ds.SetStarts().push_back(*iterStarts);
1063  }
1064  }
1065  if((*iter)->IsSetLen()){
1066  ds.SetLens().push_back((*iter)->GetLen());
1067  }
1068  if((*iter)->IsSetStrands()){
1069  ITERATE(CDense_diag::TStrands, iterStrands, (*iter)->GetStrands()){
1070  ds.SetStrands().push_back(*iterStrands);
1071  }
1072  }
1073  if((*iter)->IsSetScores()){
1074  ITERATE(CDense_diag::TScores, iterScores, (*iter)->GetScores()){
1075  ds.SetScores().push_back(*iterScores); //this might not have
1076  //right meaning
1077  }
1078  }
1079  counter ++;
1080  }
1081 
1082  return sa;
1083 }
1084 
1086 {
1087  TTaxId taxid = ZERO_TAX_ID;
1088  try{
1089  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
1090  const CRef<CBlast_def_line_set> bdlRef =
1092  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
1093  ITERATE(list<CRef<CBlast_def_line> >, iter_bdl, bdl) {
1094  CConstRef<CSeq_id> bdl_id =
1095  GetSeq_idByType((*iter_bdl)->GetSeqid(), id.Which());
1096  if(bdl_id && bdl_id->Match(id) &&
1097  (*iter_bdl)->IsSetTaxid() && (*iter_bdl)->CanGetTaxid()){
1098  taxid = (*iter_bdl)->GetTaxid();
1099  break;
1100  }
1101  }
1102  } catch (CException&) {
1103 
1104  }
1105  return taxid;
1106 }
1107 
1109  const CBioseq_Handle& handle)
1110 {
1111  int frame = 0;
1112  if (strand == eNa_strand_plus) {
1113  frame = (start % 3) + 1;
1114  } else if (strand == eNa_strand_minus) {
1115  frame = -(((int)handle.GetBioseqLength() - start - 1)
1116  % 3 + 1);
1117 
1118  }
1119  return frame;
1120 }
1121 
1122 
1125  seqalign_hit_list,
1126  bool do_translation
1127  )
1128 {
1129 
1130  kTranslation = do_translation;
1131  seqalign_hit_list.sort(SortHitByPercentIdentityDescendingEx);
1132 }
1133 
1134 
1137  const CRef<CSeq_align>& info2)
1138 {
1139 
1140  int score1, sum_n1, num_ident1;
1141  double bits1, evalue1;
1142  list<TGi> use_this_gi1;
1143 
1144  int score2, sum_n2, num_ident2;
1145  double bits2, evalue2;
1146  list<TGi> use_this_gi2;
1147 
1148 
1149  GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1150  GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1151 
1152  int length1 = GetAlignmentLength(*info1, kTranslation);
1153  int length2 = GetAlignmentLength(*info2, kTranslation);
1154  bool retval = false;
1155 
1156 
1157  if(length1 > 0 && length2 > 0 && num_ident1 > 0 &&num_ident2 > 0 ) {
1158  if (((double)num_ident1)/length1 == ((double)num_ident2)/length2) {
1159 
1160  retval = evalue1 < evalue2;
1161 
1162  } else {
1163  retval = ((double)num_ident1)/length1 >= ((double)num_ident2)/length2;
1164 
1165  }
1166  } else {
1167  retval = evalue1 < evalue2;
1168  }
1169  return retval;
1170 }
1171 
1174  const CRef<CSeq_align_set>& info2)
1175 {
1176  CRef<CSeq_align_set> i1(info1), i2(info2);
1177 
1178  i1->Set().sort(SortHspByScoreDescending);
1179  i2->Set().sort(SortHspByScoreDescending);
1180 
1181 
1182  int score1, sum_n1, num_ident1;
1183  double bits1, evalue1;
1184  list<TGi> use_this_gi1;
1185 
1186  int score2, sum_n2, num_ident2;
1187  double bits2, evalue2;
1188  list<TGi> use_this_gi2;
1189 
1190  GetAlnScores(*(info1->Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1191  GetAlnScores(*(info2->Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1192  return bits1 > bits2;
1193 }
1194 
1197  CRef<CSeq_align_set> const& info2)
1198 {
1199  int cov1 = GetMasterCoverage(*info1);
1200  int cov2 = GetMasterCoverage(*info2);
1201  bool retval = false;
1202 
1203  if (cov1 > cov2) {
1204  retval = cov1 > cov2;
1205  } else if (cov1 == cov2) {
1206  int score1, sum_n1, num_ident1;
1207  double bits1, evalue1;
1208  list<TGi> use_this_gi1;
1209 
1210  int score2, sum_n2, num_ident2;
1211  double bits2, evalue2;
1212  list<TGi> use_this_gi2;
1213  GetAlnScores(*(info1->Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1214  GetAlnScores(*(info2->Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1215  retval = evalue1 < evalue2;
1216  }
1217 
1218  return retval;
1219 }
1220 
1222  CRef<CSeq_align_set>& info2)
1223 {
1224  int start1 = 0, start2 = 0;
1225 
1226 
1227  info1->Set().sort(SortHspByMasterStartAscending);
1228  info2->Set().sort(SortHspByMasterStartAscending);
1229 
1230 
1231  start1 = min(info1->Get().front()->GetSeqStart(0),
1232  info1->Get().front()->GetSeqStop(0));
1233  start2 = min(info2->Get().front()->GetSeqStart(0),
1234  info2->Get().front()->GetSeqStop(0));
1235 
1236  if (start1 == start2) {
1237  //same start then arrange by bits score
1238  int score1, sum_n1, num_ident1;
1239  double bits1, evalue1;
1240  list<TGi> use_this_gi1;
1241 
1242  int score2, sum_n2, num_ident2;
1243  double bits2, evalue2;
1244  list<TGi> use_this_gi2;
1245 
1246 
1247  GetAlnScores(*(info1->Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1248  GetAlnScores(*(info1->Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1249  return evalue1 < evalue2;
1250 
1251  } else {
1252  return start1 < start2;
1253  }
1254 
1255 }
1256 
1259  const CRef<CSeq_align>& info2)
1260 {
1261 
1262  int score1, sum_n1, num_ident1;
1263  double bits1, evalue1;
1264  list<TGi> use_this_gi1;
1265 
1266  int score2, sum_n2, num_ident2;
1267  double bits2, evalue2;
1268  list<TGi> use_this_gi2;
1269 
1270 
1271  GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1272  GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1273  return bits1 > bits2;
1274 
1275 }
1276 
1279  const CRef<CSeq_align>& info2)
1280 {
1281  int start1 = 0, start2 = 0;
1282 
1283  start1 = min(info1->GetSeqStart(0), info1->GetSeqStop(0));
1284  start2 = min(info2->GetSeqStart(0), info2->GetSeqStop(0)) ;
1285 
1286  if (start1 == start2) {
1287  //same start then arrange by bits score
1288  int score1, sum_n1, num_ident1;
1289  double bits1, evalue1;
1290  list<TGi> use_this_gi1;
1291 
1292  int score2, sum_n2, num_ident2;
1293  double bits2, evalue2;
1294  list<TGi> use_this_gi2;
1295 
1296 
1297  GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1298  GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1299  return evalue1 < evalue2;
1300 
1301  } else {
1302 
1303  return start1 < start2;
1304  }
1305 }
1306 
1309  const CRef<CSeq_align>& info2)
1310 {
1311  int start1 = 0, start2 = 0;
1312 
1313  start1 = min(info1->GetSeqStart(1), info1->GetSeqStop(1));
1314  start2 = min(info2->GetSeqStart(1), info2->GetSeqStop(1)) ;
1315 
1316  if (start1 == start2) {
1317  //same start then arrange by bits score
1318  int score1, sum_n1, num_ident1;
1319  double bits1, evalue1;
1320  list<TGi> use_this_gi1;
1321 
1322  int score2, sum_n2, num_ident2;
1323  double bits2, evalue2;
1324  list<TGi> use_this_gi2;
1325 
1326 
1327  GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1328  GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1329  return evalue1 < evalue2;
1330 
1331  } else {
1332 
1333  return start1 < start2;
1334  }
1335 }
1336 
1337 int CAlignFormatUtil::GetAlignmentLength(const CSeq_align& aln, bool do_translation)
1338 {
1339 
1340  CRef<CSeq_align> final_aln;
1341 
1342  // Convert Std-seg and Dense-diag alignments to Dense-seg.
1343  // Std-segs are produced only for translated searches; Dense-diags only for
1344  // ungapped, not translated searches.
1345 
1346  if (aln.GetSegs().IsStd()) {
1347  CRef<CSeq_align> denseg_aln = aln.CreateDensegFromStdseg();
1348  // When both query and subject are translated, i.e. tblastx, convert
1349  // to a special type of Dense-seg.
1350  if (do_translation) {
1351  final_aln = denseg_aln->CreateTranslatedDensegFromNADenseg();
1352  } else {
1353  final_aln = denseg_aln;
1354 
1355  }
1356  } else if (aln.GetSegs().IsDendiag()) {
1357  final_aln = CreateDensegFromDendiag(aln);
1358  }
1359 
1360  const CDense_seg& ds = (final_aln ? final_aln->GetSegs().GetDenseg() :
1361  aln.GetSegs().GetDenseg());
1362 
1363  CAlnMap alnmap(ds);
1364  return alnmap.GetAlnStop() + 1;
1365 }
1366 
1368  CScope& scope,
1369  bool do_translation) {
1370  double identity = 0;
1371  CRef<CSeq_align> final_aln;
1372 
1373  // Convert Std-seg and Dense-diag alignments to Dense-seg.
1374  // Std-segs are produced only for translated searches; Dense-diags only for
1375  // ungapped, not translated searches.
1376 
1377  if (aln.GetSegs().IsStd()) {
1378  CRef<CSeq_align> denseg_aln = aln.CreateDensegFromStdseg();
1379  // When both query and subject are translated, i.e. tblastx, convert
1380  // to a special type of Dense-seg.
1381  if (do_translation) {
1382  final_aln = denseg_aln->CreateTranslatedDensegFromNADenseg();
1383  } else {
1384  final_aln = denseg_aln;
1385 
1386  }
1387  } else if (aln.GetSegs().IsDendiag()) {
1388  final_aln = CreateDensegFromDendiag(aln);
1389  }
1390 
1391  const CDense_seg& ds = (final_aln ? final_aln->GetSegs().GetDenseg() :
1392  aln.GetSegs().GetDenseg());
1393 
1394  CAlnVec alnvec(ds, scope);
1395  string query, subject;
1396 
1397  alnvec.SetAaCoding(CSeq_data::e_Ncbieaa);
1398  alnvec.GetWholeAlnSeqString(0, query);
1399  alnvec.GetWholeAlnSeqString(1, subject);
1400 
1401  int num_ident = 0;
1402  int length = (int)min(query.size(), subject.size());
1403 
1404  for (int i = 0; i < length; ++i) {
1405  if (query[i] == subject[i]) {
1406  ++num_ident;
1407  }
1408  }
1409 
1410  if (length > 0) {
1411  identity = ((double)num_ident)/length;
1412  }
1413 
1414  return identity;
1415 }
1416 
1417 
1419  const CRef<CSeq_align_set>& info2,
1420  double &percentIdent1,
1421  double &percentIdent2)
1422 {
1423 
1424  CRef<CSeq_align_set> i1(info1), i2(info2);
1425  percentIdent1 = -1;
1426  percentIdent2 = -1;
1427 
1430 
1433  return;
1434 }
1435 
1436 
1439  const CRef<CSeq_align_set>& info2)
1440 {
1441 
1442  CRef<CSeq_align_set> i1(info1), i2(info2);
1443 
1444  //i1->Set().sort(SortHspByPercentIdentityDescending);
1445  //i2->Set().sort(SortHspByPercentIdentityDescending);
1446 
1447 
1448  unique_ptr<CAlignFormatUtil::SSeqAlignSetCalcParams> seqSetInfo1( CAlignFormatUtil::GetSeqAlignSetCalcParamsFromASN(*info1));
1449  unique_ptr<CAlignFormatUtil::SSeqAlignSetCalcParams> seqSetInfo2( CAlignFormatUtil::GetSeqAlignSetCalcParamsFromASN(*info2));
1450  double evalue1 = seqSetInfo1->evalue;
1451  double evalue2 = seqSetInfo2->evalue;
1452  double percentIdent1 = seqSetInfo1->percent_identity;
1453  double percentIdent2 = seqSetInfo2->percent_identity;
1454 
1455  bool retval = false;
1456  if(percentIdent1 < 0 || percentIdent2 < 0) {
1457  s_CalcAlnPercentIdent(info1, info2,percentIdent1,percentIdent2);
1458  }
1459  if(percentIdent1 > 0 &&percentIdent2 > 0) {
1460  if (percentIdent1 == percentIdent2) {
1461  retval = evalue1 < evalue2;
1462 
1463  } else {
1464  retval = percentIdent1 >= percentIdent2;
1465  }
1466  } else {
1467  retval = evalue1 < evalue2;
1468  }
1469  return retval;
1470 }
1471 
1473  CRef<CSeq_align_set> const& info2)
1474 {
1475  int score1, score2, sum_n, num_ident;
1476  double bits, evalue;
1477  list<TGi> use_this_gi;
1478  double total_bits1 = 0, total_bits2 = 0;
1479 
1480  ITERATE(CSeq_align_set::Tdata, iter, info1->Get()) {
1481  CAlignFormatUtil::GetAlnScores(**iter, score1, bits, evalue,
1482  sum_n, num_ident, use_this_gi);
1483  total_bits1 += bits;
1484  }
1485 
1486  ITERATE(CSeq_align_set::Tdata, iter, info2->Get()) {
1487  CAlignFormatUtil::GetAlnScores(**iter, score2, bits, evalue,
1488  sum_n, num_ident, use_this_gi);
1489  total_bits2 += bits;
1490  }
1491 
1492 
1493  return total_bits1 >= total_bits2;
1494 
1495 }
1496 
1497 #ifndef NCBI_COMPILER_WORKSHOP
1498 /** Class to sort by linkout bit
1499  * @note this code doesn't compile under the Solaris' WorkShop, and because
1500  * this feature is only used inside NCBI (LinkoutDB), we disable this code.
1501  */
1503 {
1504 public:
1506  const string& mv_build_name)
1507  : m_LinkoutDB(linkoutdb), m_MapViewerBuildName(mv_build_name) {}
1508 
1509  bool operator() (const CRef<CSeq_align_set>& info1, const CRef<CSeq_align_set>& info2)
1510  {
1511  CConstRef<CSeq_id> id1, id2;
1512  id1 = &(info1->Get().front()->GetSeq_id(1));
1513  id2 = &(info2->Get().front()->GetSeq_id(1));
1514 
1515  int linkout1 = 0, linkout2 = 0;
1516  linkout1 = m_LinkoutDB
1518  : 0;
1519  linkout2 = m_LinkoutDB
1521  : 0;
1522 
1523  return (linkout1 & eGenomicSeq) <= (linkout2 & eGenomicSeq);
1524  }
1525 private:
1528 };
1529 #endif /* NCBI_COMPILER_WORKSHOP */
1530 
1532 SortHitByMolecularType(list< CRef<CSeq_align_set> >& seqalign_hit_list,
1533  CScope& scope, ILinkoutDB* linkoutdb,
1534  const string& mv_build_name)
1535 {
1536 
1537  kScope = &scope;
1538 #ifndef NCBI_COMPILER_WORKSHOP
1539  seqalign_hit_list.sort(CSortHitByMolecularTypeEx(linkoutdb, mv_build_name));
1540 #endif /* NCBI_COMPILER_WORKSHOP */
1541 }
1542 
1543 void CAlignFormatUtil::SortHit(list< CRef<CSeq_align_set> >& seqalign_hit_list,
1544  bool do_translation, CScope& scope, int
1545  sort_method, ILinkoutDB* linkoutdb,
1546  const string& mv_build_name)
1547 {
1548  kScope = &scope;
1549  kTranslation = do_translation;
1550 
1551  if (sort_method == 1) {
1552 #ifndef NCBI_COMPILER_WORKSHOP
1553  seqalign_hit_list.sort(CSortHitByMolecularTypeEx(linkoutdb,
1554  mv_build_name));
1555 #endif /* NCBI_COMPILER_WORKSHOP */
1556  } else if (sort_method == 2) {
1557  seqalign_hit_list.sort(SortHitByTotalScoreDescending);
1558  } else if (sort_method == 3) {
1559  seqalign_hit_list.sort(SortHitByPercentIdentityDescendingEx);
1560  }
1561 }
1562 
1565  target,
1566  int sort_method,
1567  const CSeq_align_set& source,
1568  CScope& scope,
1569  ILinkoutDB* linkoutdb,
1570  const string& mv_build_name)
1571 {
1572  CConstRef<CSeq_id> prevSubjectId;
1573  int count = 0;
1574  int linkoutPrev = 0;
1575  ITERATE(CSeq_align_set::Tdata, iter, source.Get()) {
1576 
1577  const CSeq_id& id = (*iter)->GetSeq_id(1);
1578  try {
1579  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
1580  if (handle) {
1581  int linkout;
1582  if(prevSubjectId.Empty() || !id.Match(*prevSubjectId)){
1583  prevSubjectId = &id;
1584  linkout = linkoutdb ? linkoutdb->GetLinkout(id, mv_build_name): 0;
1585  linkoutPrev = linkout;
1586  count++;
1587  }
1588  else {
1589  linkout = linkoutPrev;
1590  }
1591  if (linkout & eGenomicSeq) {
1592  if (sort_method == 1) {
1593  target[1]->Set().push_back(*iter);
1594  } else if (sort_method == 2){
1595  target[0]->Set().push_back(*iter);
1596  } else {
1597  target[1]->Set().push_back(*iter);
1598  }
1599  } else {
1600  if (sort_method == 1) {
1601  target[0]->Set().push_back(*iter);
1602  } else if (sort_method == 2) {
1603  target[1]->Set().push_back(*iter);
1604  } else {
1605  target[0]->Set().push_back(*iter);
1606  }
1607  }
1608  } else {
1609  target[0]->Set().push_back(*iter);
1610  }
1611 
1612  } catch (const CException&){
1613  target[0]->Set().push_back(*iter); //no bioseq found, leave untouched
1614  }
1615  }
1616 }
1617 
1619  const CSeq_align_set& source)
1620 {
1621  CConstRef<CSeq_id> previous_id;
1622  CRef<CSeq_align_set> temp;
1623 
1624  ITERATE(CSeq_align_set::Tdata, iter, source.Get()) {
1625  const CSeq_id& cur_id = (*iter)->GetSeq_id(1);
1626  if(previous_id.Empty()) {
1627  temp = new CSeq_align_set;
1628  temp->Set().push_back(*iter);
1629  target.push_back(temp);
1630  } else if (cur_id.Match(*previous_id)){
1631  temp->Set().push_back(*iter);
1632 
1633  } else {
1634  temp = new CSeq_align_set;
1635  temp->Set().push_back(*iter);
1636  target.push_back(temp);
1637  }
1638  previous_id = &cur_id;
1639  }
1640 
1641 }
1642 
1645 {
1646  CRef<CSeq_align_set> align_set (new CSeq_align_set);
1647  CConstRef<CSeq_id> previous_id;
1648  CRef<CSeq_align_set> temp;
1649  // list<CRef<CSeq_align_set> >::iterator iter;
1650 
1651  for (list<CRef<CSeq_align_set> >::iterator iter = source.begin(); iter != source.end(); iter ++) {
1652  ITERATE(CSeq_align_set::Tdata, iter2, (*iter)->Get()) {
1653  align_set->Set().push_back(*iter2);
1654  }
1655  }
1656  return align_set;
1657 }
1658 
1660  const CSeq_align_set& source)
1661 {
1662  CConstRef<CSeq_id> previous_id;
1663  CRef<CSeq_align_set> temp;
1664 
1666 
1667  for(size_t i = 0; i < seqIdList.size();i++) {
1668  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
1669  hitsMap.insert(map<string, CRef<CSeq_align_set> >::value_type(seqIdList[i],new_aln));
1670  }
1671  size_t count = 0;
1672  ITERATE(CSeq_align_set::Tdata, iter, source.Get()) {
1673  const CSeq_id& cur_id = (*iter)->GetSeq_id(1);
1674  if(previous_id.Empty() || !cur_id.Match(*previous_id)) {
1675  if(count >= seqIdList.size()) {
1676  break;
1677  }
1678  string idString = NStr::TruncateSpaces(cur_id.AsFastaString());
1679  if(hitsMap.find(idString) != hitsMap.end()) {
1680  temp = new CSeq_align_set;
1681  temp->Set().push_back(*iter);
1682  hitsMap[idString] = temp;
1683  count++;
1684  }
1685  else {
1686  temp.Reset();
1687  }
1688  }
1689  else if (cur_id.Match(*previous_id)){
1690  if(!temp.Empty()) {
1691  temp->Set().push_back(*iter);
1692  }
1693  }
1694  previous_id = &cur_id;
1695  }
1696  return hitsMap;
1697 }
1698 
1700 {
1701  vector <string> seqIds;
1702  NStr::Split(alignSeqList,",",seqIds);
1703 
1704  //SEQ_ALN_SET from ALIGNDB contains seq_aligns in random order
1705  //The followimg will create a map that contains seq-aln_set per gi from ALIGN_SEQ_LIST
1707 
1708  map < string, CRef<CSeq_align_set> >::iterator it;
1709  list< CRef<CSeq_align_set> > orderedSet;
1710  //orderedSet wil have seq aligns in th order of gi list
1711  for(size_t i = 0; i < seqIds.size(); i++) {
1712  if(hitsMap.find(seqIds[i]) != hitsMap.end()) {
1713  orderedSet.push_back(hitsMap[seqIds[i]]);
1714  }
1715  }
1716  //This should contain seq align set in the order of gis in the list
1717  all_aln_set = CAlignFormatUtil::HitListToHspList(orderedSet);
1718 }
1719 
1720 static bool s_GetSRASeqMetadata(const CBioseq::TId& ids,string &strRun, string &strSpotId,string &strReadIndex)
1721 {
1722  bool success = false;
1723  string link = NcbiEmptyString;
1725 
1726  if (!seqId.Empty())
1727  {
1728  // Get the SRA tag from seqId
1729  if (seqId->GetGeneral().CanGetDb() &&
1730  seqId->GetGeneral().CanGetTag() &&
1731  seqId->GetGeneral().GetTag().IsStr())
1732  {
1733  // Decode the tag to collect the SRA-specific indices
1734  string strTag = seqId->GetGeneral().GetTag().GetStr();
1735  if (!strTag.empty())
1736  {
1737  vector<string> vecInfo;
1738  try
1739  {
1740  NStr::Split(strTag, ".", vecInfo);
1741  }
1742  catch (...)
1743  {
1744  return false;
1745  }
1746 
1747  if (vecInfo.size() != 3)
1748  {
1749  return false;
1750  }
1751 
1752  strRun = vecInfo[0];
1753  strSpotId = vecInfo[1];
1754  strReadIndex = vecInfo[2];
1755  success = true;
1756  }
1757  }
1758  }
1759  return success;
1760 }
1761 
1762 string CAlignFormatUtil::BuildSRAUrl(const CBioseq::TId& ids, string user_url)
1763 {
1764  string strRun, strSpotId,strReadIndex;
1765  string link = NcbiEmptyString;
1766 
1767  if(s_GetSRASeqMetadata(ids,strRun,strSpotId,strReadIndex))
1768  {
1769  // Generate the SRA link to the identified spot
1770  link += user_url;
1771  link += "?run=" + strRun;
1772  link += "." + strSpotId;
1773  link += "." + strReadIndex;
1774  }
1775  return link;
1776 }
1777 
1779 {
1780  string gnl;
1781 
1784  const CRef<CSeq_id> id_accession = FindBestChoice(ids, CSeq_id::WorstRank);
1785 
1786  if(!id_general.Empty() && id_general->AsFastaString().find("gnl|BL_ORD_ID") != string::npos){
1787  return gnl;
1788  }
1789 
1790  const CSeq_id* bestid = NULL;
1791  if (id_general.Empty()){
1792  bestid = id_other;
1793  if (id_other.Empty()){
1794  bestid = id_accession;
1795  }
1796  } else {
1797  bestid = id_general;
1798  }
1799 
1800  if (bestid && bestid->Which() != CSeq_id::e_Gi){
1801  gnl = NStr::URLEncode(bestid->AsFastaString());
1802  }
1803  return gnl;
1804 }
1805 
1807  string user_url, string database,
1808  bool db_is_na, string rid, int query_number,
1809  bool for_alignment) {
1810 
1811  string link = NcbiEmptyString;
1813 
1814  if(!id_general.Empty()
1815  && id_general->AsFastaString().find("gnl|BL_ORD_ID") != string::npos){
1816  /* We do need to make security protected link to BLAST gnl */
1817  return NcbiEmptyString;
1818  }
1819  TGi gi = FindGi(ids);
1820  string bestID = s_GetBestIDForURL((CBioseq::TId &)ids);
1821 
1822 
1823  bool nodb_path = false;
1824  /* dumpgnl.cgi need to use path */
1825  if (user_url.find("dumpgnl.cgi") ==string::npos){
1826  nodb_path = true;
1827  }
1828  int length = (int)database.size();
1829  string str;
1830  char *chptr, *dbtmp;
1831  char tmpbuff[256];
1832  char* dbname = new char[sizeof(char)*length + 2];
1833  strcpy(dbname, database.c_str());
1834  if(nodb_path) {
1835  int i, j;
1836  dbtmp = new char[sizeof(char)*length + 2]; /* aditional space and NULL */
1837  memset(dbtmp, '\0', sizeof(char)*length + 2);
1838  for(i = 0; i < length; i++) {
1839  if(i > 0) {
1840  strcat(dbtmp, " "); //space between db
1841  }
1842  if(isspace((unsigned char) dbname[i]) || dbname[i] == ',') {/* Rolling spaces */
1843  continue;
1844  }
1845  j = 0;
1846  while (!isspace((unsigned char) dbname[i]) && j < 256 && i < length) {
1847  tmpbuff[j] = dbname[i];
1848  j++; i++;
1849  if(dbname[i] == ',') { /* Comma is valid delimiter */
1850  break;
1851  }
1852  }
1853  tmpbuff[j] = '\0';
1854  if((chptr = strrchr(tmpbuff, '/')) != NULL) {
1855  strcat(dbtmp, (char*)(chptr+1));
1856  } else {
1857  strcat(dbtmp, tmpbuff);
1858  }
1859 
1860  }
1861  } else {
1862  dbtmp = dbname;
1863  }
1864 
1865  char gnl[256];
1866  if (!bestID.empty()){
1867  strcpy(gnl, bestID.c_str());
1868 
1869  } else {
1870  gnl[0] = '\0';
1871  }
1872 
1873  str = NStr::URLEncode(dbtmp == NULL ? (char*) "nr" : dbtmp);
1874 
1875  if (user_url.find("?") == string::npos){
1876  link += user_url + "?" + "db=" + str + "&na=" + (db_is_na? "1" : "0");
1877  } else {
1878  if (user_url.find("=") != string::npos) {
1879  user_url += "&";
1880  }
1881  link += user_url + "db=" + str + "&na=" + (db_is_na? "1" : "0");
1882  }
1883 
1884  if (gnl[0] != '\0'){
1885  str = gnl;
1886  link += "&gnl=";
1887  link += str;
1888  }
1889  if (gi > ZERO_GI){
1890  link += "&gi=" + NStr::NumericToString(gi);
1891  link += "&term=" + NStr::NumericToString(gi) + NStr::URLEncode("[gi]");
1892  }
1893  if(taxid > ZERO_TAX_ID){
1894  link += "&taxid=" + NStr::NumericToString(taxid);
1895  }
1896  if (rid != NcbiEmptyString){
1897  link += "&RID=" + rid;
1898  }
1899 
1900  if (query_number > 0){
1901  link += "&QUERY_NUMBER=" + NStr::IntToString(query_number);
1902  }
1903 
1904  if (user_url.find("dumpgnl.cgi") ==string::npos){
1905  if (for_alignment)
1906  link += "&log$=nuclalign";
1907  else
1908  link += "&log$=nucltop";
1909  }
1910 
1911  if(nodb_path){
1912  delete [] dbtmp;
1913  }
1914  delete [] dbname;
1915  return link;
1916 }
1919  map< string, string>& parameters_to_change,
1920  string& cgi_query)
1921 {
1922 
1923  //add parameters to exclude
1924  parameters_to_change.insert(map<string, string>::
1925  value_type("service", ""));
1926  parameters_to_change.insert(map<string, string>::
1927  value_type("address", ""));
1928  parameters_to_change.insert(map<string, string>::
1929  value_type("platform", ""));
1930  parameters_to_change.insert(map<string, string>::
1931  value_type("_pgr", ""));
1932  parameters_to_change.insert(map<string, string>::
1933  value_type("client", ""));
1934  parameters_to_change.insert(map<string, string>::
1935  value_type("composition_based_statistics", ""));
1936 
1937  parameters_to_change.insert(map<string, string>::
1938  value_type("auto_format", ""));
1939  cgi_query = NcbiEmptyString;
1940  TCgiEntries& cgi_entry = ctx.GetRequest().GetEntries();
1941  bool is_first = true;
1942 
1943  for(TCgiEntriesI it=cgi_entry.begin(); it!=cgi_entry.end(); ++it) {
1944  string parameter = it->first;
1945  if (parameter != NcbiEmptyString) {
1946  if (parameters_to_change.count(NStr::ToLower(parameter)) > 0 ||
1947  parameters_to_change.count(NStr::ToUpper(parameter)) > 0) {
1948  if(parameters_to_change[NStr::ToLower(parameter)] !=
1949  NcbiEmptyString &&
1950  parameters_to_change[NStr::ToUpper(parameter)] !=
1951  NcbiEmptyString) {
1952  if (!is_first) {
1953  cgi_query += "&";
1954  }
1955  cgi_query +=
1956  it->first + "=" + parameters_to_change[it->first];
1957  is_first = false;
1958  }
1959  } else {
1960  if (!is_first) {
1961  cgi_query += "&";
1962  }
1963  cgi_query += it->first + "=" + it->second;
1964  is_first = false;
1965  }
1966 
1967  }
1968  }
1969 }
1970 
1972 
1973  string format_type = ctx.GetRequestValue("FORMAT_TYPE").GetValue();
1974  string ridstr = ctx.GetRequestValue("RID").GetValue();
1975  string align_view = ctx.GetRequestValue("ALIGNMENT_VIEW").GetValue();
1976 
1977  cgi_query += "RID=" + ridstr;
1978  cgi_query += "&FORMAT_TYPE=" + format_type;
1979  cgi_query += "&ALIGNMENT_VIEW=" + align_view;
1980 
1981  cgi_query += "&QUERY_NUMBER=" + ctx.GetRequestValue("QUERY_NUMBER").GetValue();
1982  cgi_query += "&FORMAT_OBJECT=" + ctx.GetRequestValue("FORMAT_OBJECT").GetValue();
1983  cgi_query += "&RUN_PSIBLAST=" + ctx.GetRequestValue("RUN_PSIBLAST").GetValue();
1984  cgi_query += "&I_THRESH=" + ctx.GetRequestValue("I_THRESH").GetValue();
1985 
1986  cgi_query += "&DESCRIPTIONS=" + ctx.GetRequestValue("DESCRIPTIONS").GetValue();
1987 
1988  cgi_query += "&ALIGNMENTS=" + ctx.GetRequestValue("ALIGNMENTS").GetValue();
1989 
1990  cgi_query += "&NUM_OVERVIEW=" + ctx.GetRequestValue("NUM_OVERVIEW").GetValue();
1991 
1992  cgi_query += "&NCBI_GI=" + ctx.GetRequestValue("NCBI_GI").GetValue();
1993 
1994  cgi_query += "&SHOW_OVERVIEW=" + ctx.GetRequestValue("SHOW_OVERVIEW").GetValue();
1995 
1996  cgi_query += "&SHOW_LINKOUT=" + ctx.GetRequestValue("SHOW_LINKOUT").GetValue();
1997 
1998  cgi_query += "&GET_SEQUENCE=" + ctx.GetRequestValue("GET_SEQUENCE").GetValue();
1999 
2000  cgi_query += "&MASK_CHAR=" + ctx.GetRequestValue("MASK_CHAR").GetValue();
2001  cgi_query += "&MASK_COLOR=" + ctx.GetRequestValue("MASK_COLOR").GetValue();
2002 
2003  cgi_query += "&SHOW_CDS_FEATURE=" + ctx.GetRequestValue("SHOW_CDS_FEATURE").GetValue();
2004 
2005  if (ctx.GetRequestValue("FORMAT_EQ_TEXT").GetValue() != NcbiEmptyString) {
2006  cgi_query += "&FORMAT_EQ_TEXT=" +
2008  GetRequestValue("FORMAT_EQ_TEXT").
2009  GetValue()));
2010  }
2011 
2012  if (ctx.GetRequestValue("FORMAT_EQ_OP").GetValue() != NcbiEmptyString) {
2013  cgi_query += "&FORMAT_EQ_OP=" +
2015  GetRequestValue("FORMAT_EQ_OP").
2016  GetValue()));
2017  }
2018 
2019  if (ctx.GetRequestValue("FORMAT_EQ_MENU").GetValue() != NcbiEmptyString) {
2020  cgi_query += "&FORMAT_EQ_MENU=" +
2022  GetRequestValue("FORMAT_EQ_MENU").
2023  GetValue()));
2024  }
2025 
2026  cgi_query += "&EXPECT_LOW=" + ctx.GetRequestValue("EXPECT_LOW").GetValue();
2027  cgi_query += "&EXPECT_HIGH=" + ctx.GetRequestValue("EXPECT_HIGH").GetValue();
2028 
2029  cgi_query += "&BL2SEQ_LINK=" + ctx.GetRequestValue("BL2SEQ_LINK").GetValue();
2030 
2031 }
2032 
2033 
2035  CScope& scope, ILinkoutDB* linkoutdb,
2036  const string& mv_build_name)
2037 {
2038  bool is_mixed = false;
2039  bool is_first = true;
2040  int prev_database = 0;
2041 
2042  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()) {
2043 
2044  const CSeq_id& id = (*iter)->GetSeq_id(1);
2045  int linkout = linkoutdb
2046  ? linkoutdb->GetLinkout(id, mv_build_name)
2047  : 0;
2048  int cur_database = (linkout & eGenomicSeq);
2049  if (!is_first && cur_database != prev_database) {
2050  is_mixed = true;
2051  break;
2052  }
2053  prev_database = cur_database;
2054  is_first = false;
2055  }
2056 
2057  return is_mixed;
2058 
2059 }
2060 
2061 
2063 {
2064  bool formatAsMixedDbs = false;
2065  string mixedDbs = ctx.GetRequestValue("MIXED_DATABASE").GetValue();
2066  if(!mixedDbs.empty()) {
2067  mixedDbs = NStr::ToLower(mixedDbs);
2068  formatAsMixedDbs = (mixedDbs == "on" || mixedDbs == "true" || mixedDbs == "yes") ? true : false;
2069  }
2070  return formatAsMixedDbs;
2071 }
2072 
2073 static string s_MapLinkoutGenParam(string &url_link_tmpl,
2074  const string& rid,
2075  string giList,
2076  bool for_alignment,
2077  int cur_align,
2078  string &label,
2079  string &lnk_displ,
2080  string lnk_tl_info = "",
2081  string lnk_title = "")
2082 {
2083  const string kLinkTitle=" title=\"View <@lnk_tl_info@> for <@label@>\" ";
2084  const string kLinkTarget="target=\"lnk" + rid + "\"";
2085  string lnkTitle = (lnk_title.empty()) ? kLinkTitle : lnk_title;
2086  string url_link = CAlignFormatUtil::MapTemplate(url_link_tmpl,"gi",giList);
2087  url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",rid);
2088  url_link = CAlignFormatUtil::MapTemplate(url_link,"log",for_alignment? "align" : "top");
2089  url_link = CAlignFormatUtil::MapTemplate(url_link,"blast_rank",NStr::IntToString(cur_align));
2090  lnkTitle = NStr::StartsWith(lnk_displ,"<img") ? "" : lnkTitle;
2091  string lnkTarget = NStr::StartsWith(lnk_displ,"<img") ? "" : kLinkTarget;
2092  url_link = CAlignFormatUtil::MapTemplate(url_link,"lnkTitle",lnkTitle);
2093  url_link = CAlignFormatUtil::MapTemplate(url_link,"lnkTarget",lnkTarget);
2094  url_link = CAlignFormatUtil::MapTemplate(url_link,"lnk_displ",lnk_displ);
2095  url_link = CAlignFormatUtil::MapTemplate(url_link,"lnk_tl_info",lnk_tl_info);
2096  url_link = CAlignFormatUtil::MapTemplate(url_link,"label",label);
2097  url_link = CAlignFormatUtil::MapProtocol(url_link);
2098  return url_link;
2099 }
2100 
2101 
2102 static list<string> s_GetLinkoutUrl(int linkout,
2103  string giList,
2104  string labelList,
2105  TGi first_gi,
2106  CAlignFormatUtil::SLinkoutInfo &linkoutInfo,
2107  bool textLink = true)
2108 
2109 {
2110  list<string> linkout_list;
2111  string url_link,lnk_displ,lnk_title,lnkTitleInfo;
2112 
2113  vector<string> accs;
2114  NStr::Split(labelList,",",accs);
2115  string firstAcc = (accs.size() > 0)? accs[0] : labelList;
2116 
2117  if (linkout & eUnigene) {
2118  url_link = CAlignFormatUtil::GetURLFromRegistry("UNIGEN");
2119  lnk_displ = textLink ? "UniGene" : kUnigeneImg;
2120 
2121  string termParam = NStr::Find(labelList,",") == NPOS ? kGeneTerm : ""; //kGeneTerm if only one seqid
2122  url_link = CAlignFormatUtil::MapTemplate(url_link,"termParam",termParam);
2123 
2124  lnkTitleInfo = "UniGene cluster";
2125  string uid = !linkoutInfo.is_na ? "[Protein Accession]" : "[Nucleotide Accession]";
2126  url_link = CAlignFormatUtil::MapTemplate(url_link,"uid",uid);
2127  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2128 
2129  if(textLink) {
2130  url_link = CAlignFormatUtil::MapTemplate(kUnigeneDispl,"lnk",url_link);
2131  }
2132  url_link = CAlignFormatUtil::MapProtocol(url_link);
2133  linkout_list.push_back(url_link);
2134  }
2135  if (linkout & eStructure){
2136  CSeq_id seqID(firstAcc);
2137  string struct_link = CAlignFormatUtil::GetURLFromRegistry(
2138  "STRUCTURE_URL");
2139 
2140  url_link = struct_link.empty() ? kStructureUrl : struct_link;
2141  string linkTitle;
2142  if(seqID.Which() == CSeq_id::e_Pdb) {
2143  lnk_displ = textLink ? "Structure" : kStructureImg;
2144  linkTitle = " title=\"View 3D structure <@label@>\"";
2145  }
2146  else {
2147  url_link = kStructureAlphaFoldUrl;
2148  lnk_displ = textLink ? "AlphaFold Structure" : kStructureImg;
2149  linkTitle = " title=\"View AlphaFold 3D structure <@label@>\"";
2150  }
2151 
2152 
2153 
2154  string molID,chainID;
2155  NStr::SplitInTwo(firstAcc,"_",molID,chainID);
2156  url_link = CAlignFormatUtil::MapTemplate(url_link,"molid",molID);
2157  url_link = CAlignFormatUtil::MapTemplate(url_link,"queryID",linkoutInfo.queryID);
2158  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,firstAcc,lnk_displ,"",linkTitle);
2159  if(textLink) {
2160  url_link = CAlignFormatUtil::MapTemplate(kStructureDispl,"lnk",url_link);
2161  }
2162  url_link = CAlignFormatUtil::MapProtocol(url_link);
2163  linkout_list.push_back(url_link);
2164  }
2165  if (linkout & eGeo){
2166  url_link = CAlignFormatUtil::GetURLFromRegistry("GEO");
2167  lnk_displ = textLink ? "GEO Profiles" : kGeoImg;
2168 
2169  lnkTitleInfo = "Expression profiles";
2170  //gilist contains comma separated gis
2171  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2172 
2173 
2174  if(textLink) {
2175  url_link = CAlignFormatUtil::MapTemplate(kGeoDispl,"lnk",url_link);
2176  }
2177  url_link = CAlignFormatUtil::MapProtocol(url_link);
2178  linkout_list.push_back(url_link);
2179  }
2180  if(linkout & eGene){
2181  url_link = CAlignFormatUtil::GetURLFromRegistry("GENE");
2182  if(textLink) {
2183  lnk_displ = "Gene";
2184  lnkTitleInfo = "gene information";
2185  }
2186  else {
2187  lnk_displ = kGeneImg;
2188  }
2189  string termParam = NStr::Find(labelList,",") == NPOS ? kGeneTerm : ""; //kGeneTerm if only one seqid
2190  url_link = CAlignFormatUtil::MapTemplate(url_link,"termParam",termParam);
2191 
2192  string uid = !linkoutInfo.is_na ? "[Protein Accession]" : "[Nucleotide Accession]";
2193  url_link = CAlignFormatUtil::MapTemplate(url_link,"uid",uid);
2194 
2195  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2196 
2197  if(textLink) {
2198  url_link = CAlignFormatUtil::MapTemplate(kGeneDispl,"lnk",url_link);
2199  }
2200  url_link = CAlignFormatUtil::MapProtocol(url_link);
2201  linkout_list.push_back(url_link);
2202  }
2203 
2204  if((linkout & eGenomicSeq) && first_gi != ZERO_GI){ //only for advanced view -> textlink = true
2205  if(textLink) {
2206  url_link = kMapviewBlastHitParams;
2207  lnk_displ = "Map Viewer";
2208 
2209  lnkTitleInfo = "BLAST hits on the " + linkoutInfo.taxName + " genome";
2210 
2211  url_link = CAlignFormatUtil::MapTemplate(url_link,"gnl",NStr::URLEncode(linkoutInfo.gnl));
2212  url_link = CAlignFormatUtil::MapTemplate(url_link,"db",linkoutInfo.database);
2213  url_link = CAlignFormatUtil::MapTemplate(url_link,"is_na",linkoutInfo.is_na? "1" : "0");
2214  string user_url = (linkoutInfo.user_url.empty()) ? kMapviewBlastHitUrl : linkoutInfo.user_url;
2215  url_link = CAlignFormatUtil::MapTemplate(url_link,"user_url",user_url);
2216 
2217  string taxIDStr = (linkoutInfo.taxid > ZERO_TAX_ID) ? NStr::NumericToString(linkoutInfo.taxid) : "";
2218  url_link = CAlignFormatUtil::MapTemplate(url_link,"taxid",taxIDStr);
2219 
2220  string queryNumStr = (linkoutInfo.query_number > 0) ? NStr::IntToString(linkoutInfo.query_number) : "";
2221  url_link = CAlignFormatUtil::MapTemplate(url_link,"query_number",queryNumStr); //gi,term
2222 
2223  string giStr = (first_gi > ZERO_GI)? NStr::NumericToString(first_gi) : "";
2224  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giStr,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2225 
2226  if(textLink) {
2227  url_link = CAlignFormatUtil::MapTemplate(kMapviwerDispl,"lnk",url_link);
2228  }
2229  url_link = CAlignFormatUtil::MapProtocol(url_link);
2230  linkout_list.push_back(url_link);
2231  }
2232  }
2233  else if((linkout & eMapviewer) && first_gi != ZERO_GI){
2234  url_link = kMapviwerUrl;
2235  lnk_displ = textLink ? "Map Viewer" : kMapviwerImg;
2236 
2237  string linkTitle = " title=\"View <@label@> aligned to the " + linkoutInfo.taxName + " genome\"";
2238  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,"",linkTitle);
2239 
2240  if(textLink) {
2241  url_link = CAlignFormatUtil::MapTemplate(kMapviwerDispl,"lnk",url_link);
2242  }
2243  url_link = CAlignFormatUtil::MapProtocol(url_link);
2244  linkout_list.push_back(url_link);
2245  }
2246  //View Bioassays involving <accession
2247  if(linkout & eBioAssay && linkoutInfo.is_na && first_gi != ZERO_GI){
2248  url_link = CAlignFormatUtil::GetURLFromRegistry("BIOASSAY_NUC");
2249  lnk_displ = textLink ? "PubChem BioAssay" : kBioAssayNucImg;
2250 
2251  string linkTitle = " title=\"View Bioassays involving <@label@>\"";
2252  //gilist contains comma separated gis, change it to the following
2253  giList = NStr::Replace(giList,",","[RNATargetGI] OR ");
2254  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,"",linkTitle);
2255 
2256  if(textLink) {
2257  url_link = CAlignFormatUtil::MapTemplate(kBioAssayDispl,"lnk",url_link);
2258  }
2259  url_link = CAlignFormatUtil::MapProtocol(url_link);
2260  linkout_list.push_back(url_link);
2261  }
2262  else if (linkout & eBioAssay && !linkoutInfo.is_na && first_gi != ZERO_GI) {
2263  url_link = CAlignFormatUtil::GetURLFromRegistry("BIOASSAY_PROT");
2264  lnk_displ = textLink ? "PubChem BioAssay" : kBioAssayProtImg;
2265 
2266  lnkTitleInfo ="Bioassay data";
2267  string linkTitle = " title=\"View Bioassays involving <@label@>\"";
2268  //gilist contains comma separated gis, change it to the following
2269  giList = NStr::Replace(giList,",","[PigGI] OR ");
2270  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,"",linkTitle);
2271 
2272  if(textLink) {
2273  url_link = CAlignFormatUtil::MapTemplate(kBioAssayDispl,"lnk",url_link);
2274  }
2275  url_link = CAlignFormatUtil::MapProtocol(url_link);
2276  linkout_list.push_back(url_link);
2277  }
2278  if(linkout & eReprMicrobialGenomes){
2279  url_link = CAlignFormatUtil::GetURLFromRegistry("REPR_MICROBIAL_GENOMES");
2280  lnk_displ = textLink ? "Genome" : kReprMicrobialGenomesImg;
2281 
2282  lnkTitleInfo = "genomic information";
2283  //gilist contains comma separated gis
2284  string uid = !linkoutInfo.is_na ? "Protein Accession" : "Nucleotide Accession";
2285  url_link = CAlignFormatUtil::MapTemplate(url_link,"uid",uid);
2286  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2287 
2288  if(textLink) {
2289  url_link = CAlignFormatUtil::MapTemplate(kReprMicrobialGenomesDispl,"lnk",url_link);
2290  }
2291  url_link = CAlignFormatUtil::MapProtocol(url_link);
2292  linkout_list.push_back(url_link);
2293  }
2294  if((linkout & eGenomeDataViewer) || (linkout & eTranscript)){
2295  string urlTag;
2296  lnk_displ = textLink ? "Genome Data Viewer" : kGenomeDataViewerImg;
2297  if(linkout & eTranscript) {
2298  urlTag = "GENOME_DATA_VIEWER_TRANSCR";
2299  lnkTitleInfo = "title=\"View the annotation of the transcript <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as the protein annotation and browse to other regions.\"";
2300  }
2301  else {
2302  urlTag = linkoutInfo.is_na ? "GENOME_DATA_VIEWER_NUC" : "GENOME_DATA_VIEWER_PROT";
2303  lnkTitleInfo = linkoutInfo.is_na ?
2304  "title=\"View BLAST hits for <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as hits and browse to other regions.\""
2305  :
2306  "title=\"View the annotation of the protein <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as the protein annotation and browse to other regions.\"";
2307  }
2308  url_link = CAlignFormatUtil::GetURLFromRegistry(urlTag);
2309  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,firstAcc,lnk_displ,"",lnkTitleInfo);
2310 
2311  url_link = CAlignFormatUtil::MapTemplate(url_link,"queryID",linkoutInfo.queryID);
2312 
2313  TSeqPos seqFrom = linkoutInfo.subjRange.GetFrom();
2314  seqFrom = (seqFrom == 0) ? seqFrom : seqFrom - 1;
2315 
2316  TSeqPos seqTo = linkoutInfo.subjRange.GetTo();
2317  seqTo = (seqTo == 0) ? seqTo : seqTo - 1;
2318 
2319  url_link = CAlignFormatUtil::MapTemplate(url_link,"from",seqFrom);//-1
2320  url_link = CAlignFormatUtil::MapTemplate(url_link,"to",seqTo);//-1
2321 
2322  if(textLink) {
2323  url_link = CAlignFormatUtil::MapTemplate(kGenomeDataViewerDispl,"lnk",url_link);
2324  }
2325  url_link = CAlignFormatUtil::MapProtocol(url_link);
2326  linkout_list.push_back(url_link);
2327  }
2328  return linkout_list;
2329 }
2330 
2331 ///Get list of linkouts for one sequence
2332 list<string> CAlignFormatUtil::GetLinkoutUrl(int linkout, const CBioseq::TId& ids,
2333  const string& rid,
2334  const string& cdd_rid,
2335  const string& entrez_term,
2336  bool is_na,
2337  TGi first_gi,
2338  bool structure_linkout_as_group,
2339  bool for_alignment, int cur_align,
2340  string preComputedResID)
2341 
2342 {
2343  list<string> linkout_list;
2344  TGi gi = FindGi(ids);
2346  string label;
2348  string giString = NStr::NumericToString(gi);
2349  first_gi = (first_gi == ZERO_GI) ? gi : first_gi;
2350 
2351 
2352 
2353  SLinkoutInfo linkoutInfo;
2354  linkoutInfo.Init(rid,
2355  cdd_rid,
2356  entrez_term,
2357  is_na,
2358  "", //database
2359  0, //query_number
2360  "", //user_url
2361  preComputedResID,
2362  "", //linkoutOrder
2363  structure_linkout_as_group,
2364  for_alignment);
2365 
2366  linkoutInfo.cur_align = cur_align;
2367  linkoutInfo.taxid = ZERO_TAX_ID;
2368 
2369  linkout_list = s_GetLinkoutUrl(linkout,
2370  giString,
2371  label,
2372  first_gi,
2373  linkoutInfo,
2374  false); //textlink
2375 
2376  return linkout_list;
2377 }
2378 
2379 static int s_LinkLetterToType(string linkLetter)
2380 {
2381  int linkType = 0;
2382  if(linkLetter == "U") {
2383  linkType = eUnigene;
2384  }
2385  else if(linkLetter == "S") {
2386  linkType = eStructure;
2387  }
2388  else if(linkLetter == "E") {
2389  linkType = eGeo;
2390  }
2391  else if(linkLetter == "G") {
2392  linkType = eGene;
2393  }
2394  else if(linkLetter == "M") {
2395  linkType = eMapviewer | eGenomicSeq;
2396  }
2397  else if(linkLetter == "N") {
2398  linkType = eGenomicSeq;
2399  }
2400  else if(linkLetter == "B") {
2401  linkType = eBioAssay;
2402  }
2403  else if(linkLetter == "R") {
2404  linkType = eReprMicrobialGenomes;
2405  }
2406  else if(linkLetter == "V") {
2407  linkType = eGenomeDataViewer;
2408  }
2409  else if(linkLetter == "T") {
2410  linkType = eTranscript;
2411  }
2412 
2413  return linkType;
2414 }
2415 
2416 
2417 static void s_AddLinkoutInfo(map<int, vector < CBioseq::TId > > &linkout_map,int linkout,CBioseq::TId &cur_id)
2418 {
2419  if(linkout_map.count(linkout) > 0){
2420  linkout_map[linkout].push_back(cur_id);
2421  }
2422  else {
2423  vector <CBioseq::TId > idList;
2424  idList.push_back(cur_id);
2425  linkout_map.insert(map<int, vector <CBioseq::TId > >::value_type(linkout,idList));
2426  }
2427 }
2428 
2430  ILinkoutDB **linkoutdb,
2431  const string& mv_build_name,
2432  TGi gi)
2433 {
2434  int linkout = 0;
2435 
2436  if(*linkoutdb) {
2437  if(gi == INVALID_GI) {
2438  gi = FindGi(cur_id);
2439  }
2440  try {
2441  if(gi > ZERO_GI) {
2442  linkout = (*linkoutdb)->GetLinkout(gi, mv_build_name);
2443  }
2444  else if(GetTextSeqID(cur_id)){
2446  linkout = (*linkoutdb)->GetLinkout(*seqID, mv_build_name);
2447  string str_id = seqID->GetSeqIdString(false);
2448  CRef<CSeq_id> seqIDNew(new CSeq_id(str_id));
2449  int linkoutWithoutVersion = (*linkoutdb)->GetLinkout(*seqIDNew, mv_build_name);
2450  if(linkoutWithoutVersion && (linkoutWithoutVersion | eStructure)) {
2451  linkout = linkout | linkoutWithoutVersion;
2452  }
2453  }
2454  }
2455  catch (const CException & e) {
2456  ERR_POST("Problem with linkoutdb: " + e.GetMsg());
2457  cerr << "[BLAST FORMATTER EXCEPTION] Problem with linkoutdb: " << e.GetMsg() << endl;
2458  *linkoutdb = NULL;
2459  }
2460  }
2461  return linkout;
2462 }
2463 
2464 void
2466  map<int, vector <CBioseq::TId > > &linkout_map,
2467  ILinkoutDB* linkoutdb,
2468  const string& mv_build_name)
2469 {
2470  if(!linkoutdb) return;
2471 
2472  int linkout = GetSeqLinkoutInfo(cur_id,
2473  &linkoutdb,
2474  mv_build_name);
2475 
2476  if(linkout & eGene){
2477  s_AddLinkoutInfo(linkout_map,eGene,cur_id);
2478  }
2479  if (linkout & eUnigene) {
2480  s_AddLinkoutInfo(linkout_map,eUnigene,cur_id);
2481  }
2482  if (linkout & eGeo){
2483  s_AddLinkoutInfo(linkout_map,eGeo,cur_id);
2484  }
2485  if (linkout & eStructure){
2486  s_AddLinkoutInfo(linkout_map,eStructure,cur_id);
2487  }
2488  //eGenomicSeq and eMapviewer cannot combine together
2489  if((linkout & eGenomicSeq) && (linkout & eMapviewer)){
2490  s_AddLinkoutInfo(linkout_map,eGenomicSeq,cur_id);
2491  }
2492  else if(linkout & eMapviewer){
2493  s_AddLinkoutInfo(linkout_map,eMapviewer,cur_id);
2494  }
2495  if(linkout & eBioAssay){
2496  s_AddLinkoutInfo(linkout_map,eBioAssay,cur_id);
2497  }
2498  if(linkout & eReprMicrobialGenomes){
2499  s_AddLinkoutInfo(linkout_map,eReprMicrobialGenomes,cur_id);
2500  }
2501 
2502  if(linkout & eGenomeDataViewer){
2503  s_AddLinkoutInfo(linkout_map,eGenomeDataViewer,cur_id);
2504  }
2505  if(linkout & eTranscript){
2506  s_AddLinkoutInfo(linkout_map,eTranscript,cur_id);
2507  }
2508 
2509 }
2510 
2511 void
2513  map<int, vector <CBioseq::TId > > &linkout_map,
2514  ILinkoutDB* linkoutdb,
2515  const string& mv_build_name)
2516 {
2517  const int kMaxDeflineNum = 10;
2518  int num = 0;
2519  for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2520  iter != bdl.end(); iter++){
2521  CBioseq::TId& cur_id = (CBioseq::TId &)(*iter)->GetSeqid();
2522 
2523  GetBdlLinkoutInfo(cur_id,
2524  linkout_map,
2525  linkoutdb,
2526  mv_build_name);
2527  num++;
2528  if(num > kMaxDeflineNum) break;
2529  }
2530 }
2531 
2532 static string s_GetTaxName(TTaxId taxid)
2533 {
2534  string taxName;
2535  try {
2536  if(taxid != ZERO_TAX_ID) {
2538  CSeqDB::GetTaxInfo(taxid, info);
2539  taxName = info.common_name;
2540  }
2541  }
2542  catch (CException&) {
2543 
2544  }
2545  return taxName;
2546 }
2547 
2549  const string& rid,
2550  bool is_na,
2551  bool for_alignment,
2552  int cur_align,
2553  list<string> &linkout_list)
2554 
2555 {
2556  //Identical Proteins
2557 
2559  if (CAlignFormatUtil::GetTextSeqID(wid)) {
2560  string label;
2562  string url_link = kIdenticalProteinsUrl;
2563  string lnk_displ = "Identical Proteins";
2564  url_link = s_MapLinkoutGenParam(url_link,rid,NStr::NumericToString(ZERO_GI),for_alignment, cur_align,label,lnk_displ);
2565  url_link = CAlignFormatUtil::MapTemplate(kIdenticalProteinsDispl,"lnk",url_link);
2566  url_link = CAlignFormatUtil::MapTemplate(url_link,"label",label);
2567  linkout_list.push_back(url_link);
2568  }
2569 }
2570 
2571 
2572 
2573 //reset:taxname,gnl
2574 static list<string> s_GetFullLinkoutUrl(CBioseq::TId& cur_id,
2575  CAlignFormatUtil::SLinkoutInfo &linkoutInfo,
2576  map<int, vector < CBioseq::TId > > &linkout_map,
2577  bool getIdentProteins)
2578 
2579 {
2580  list<string> linkout_list;
2581 
2582  vector<string> linkLetters;
2583  NStr::Split(linkoutInfo.linkoutOrder,",",linkLetters); //linkoutOrder = "G,U,M,V,E,S,B,R,T"
2584  for(size_t i = 0; i < linkLetters.size(); i++) {
2585  TGi first_gi = ZERO_GI;
2586  vector < CBioseq::TId > idList;
2587  int linkout = s_LinkLetterToType(linkLetters[i]);
2588  linkoutInfo.taxName.clear();
2589  if(linkout & (eMapviewer | eGenomicSeq)) {
2590  linkout = (linkout_map[eGenomicSeq].size() != 0) ? eGenomicSeq : eMapviewer;
2591  linkoutInfo.taxName = s_GetTaxName(linkoutInfo.taxid);
2592  }
2593  if(linkout_map.find(linkout) != linkout_map.end()) {
2594  idList = linkout_map[linkout];
2595  }
2596  bool disableLink = (linkout == 0 || idList.size() == 0 || ( (linkout & eStructure) && (linkoutInfo.cdd_rid == "" || linkoutInfo.cdd_rid == "0")));
2597 
2598  string giList,labelList;
2599  int seqVersion = ((linkout & eGenomeDataViewer) || (linkout & eTranscript)) ? true : false;
2600  for (size_t i = 0; i < idList.size(); i++) {
2601  const CBioseq::TId& ids = idList[i];
2602  TGi gi = FindGi(ids);
2603  if (first_gi == ZERO_GI) first_gi = gi;
2604 
2605 
2607  string label = CAlignFormatUtil::GetLabel(wid,seqVersion);
2608  if(!labelList.empty()) labelList += ",";
2609  labelList += label;
2610 
2611  //use only first gi for bioAssay protein
2612  if(!giList.empty() && (linkout & eBioAssay) && !linkoutInfo.is_na) continue;
2613  if(!giList.empty()) giList += ",";
2614  giList += NStr::NumericToString(gi);
2615  }
2616 
2617  linkoutInfo.gnl.clear();
2618  if(!disableLink && linkout == eGenomicSeq) {
2619  linkoutInfo.gnl = s_GetBestIDForURL(cur_id);
2620  }
2621 
2622  if(!disableLink) {//
2623  //The following list will contain only one entry for single linkout value
2624  list<string> one_linkout = s_GetLinkoutUrl(linkout,
2625  giList,
2626  labelList,
2627  first_gi,
2628  linkoutInfo);
2629  if(one_linkout.size() > 0) {
2630  list<string>::iterator iter = one_linkout.begin();
2631  linkout_list.push_back(*iter);
2632  }
2633  }
2634  }
2635  if(getIdentProteins) {
2636  s_AddOtherRelatedInfoLinks(cur_id,linkoutInfo.rid,linkoutInfo.is_na,linkoutInfo.for_alignment,linkoutInfo.cur_align,linkout_list);
2637  }
2638  return linkout_list;
2639 }
2640 
2641 list<string> CAlignFormatUtil::GetFullLinkoutUrl(const list< CRef< CBlast_def_line > > &bdl,
2642  CAlignFormatUtil::SLinkoutInfo &linkoutInfo)
2643 {
2644  list<string> linkout_list;
2645  map<int, vector < CBioseq::TId > > linkout_map;
2646  if(bdl.size() > 0) {
2647  GetBdlLinkoutInfo(bdl,linkout_map, linkoutInfo.linkoutdb, linkoutInfo.mv_build_name);
2648  list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2649  CBioseq::TId& cur_id = (CBioseq::TId &)(*iter)->GetSeqid();
2650  linkout_list = s_GetFullLinkoutUrl(cur_id,
2651  linkoutInfo,
2652  linkout_map,
2653  !linkoutInfo.is_na && bdl.size() > 1);
2654  }
2655  return linkout_list;
2656 }
2657 
2658 
2659 list<string> CAlignFormatUtil::GetFullLinkoutUrl(const list< CRef< CBlast_def_line > > &bdl,
2660  const string& rid,
2661  const string& cdd_rid,
2662  const string& entrez_term,
2663  bool is_na,
2664  bool structure_linkout_as_group,
2665  bool for_alignment,
2666  int cur_align,
2667  string& linkoutOrder,
2668  TTaxId taxid,
2669  string &database,
2670  int query_number,
2671  string &user_url,
2672  string &preComputedResID,
2673  ILinkoutDB* linkoutdb,
2674  const string& mv_build_name)
2675 
2676 {
2677  list<string> linkout_list;
2678  map<int, vector < CBioseq::TId > > linkout_map;
2679  if(bdl.size() > 0) {
2680  GetBdlLinkoutInfo(bdl,linkout_map, linkoutdb, mv_build_name);
2681  list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2682  CBioseq::TId& cur_id = (CBioseq::TId &)(*iter)->GetSeqid();
2683 
2684  SLinkoutInfo linkoutInfo;
2685  linkoutInfo.Init(rid,
2686  cdd_rid,
2687  entrez_term,
2688  is_na,
2689  database,
2690  query_number,
2691  user_url,
2692  preComputedResID,
2693  linkoutOrder,
2694  structure_linkout_as_group,
2695  for_alignment);
2696 
2697  linkoutInfo.cur_align = cur_align;
2698  linkoutInfo.taxid = taxid;
2699 
2700  linkout_list = s_GetFullLinkoutUrl(cur_id,
2701  linkoutInfo,
2702  linkout_map,
2703  !is_na && bdl.size() > 1);
2704  }
2705  return linkout_list;
2706 }
2707 
2708 
2710  CAlignFormatUtil::SLinkoutInfo &linkoutInfo,
2711  bool getIdentProteins)
2712 {
2713  list<string> linkout_list;
2714  map<int, vector < CBioseq::TId > > linkout_map;
2715 
2716  GetBdlLinkoutInfo(cur_id,linkout_map, linkoutInfo.linkoutdb, linkoutInfo.mv_build_name);
2717  linkout_list = s_GetFullLinkoutUrl(cur_id,
2718  linkoutInfo,
2719  linkout_map,
2720  getIdentProteins);
2721  return linkout_list;
2722 }
2723 
2725  const string& rid,
2726  const string& cdd_rid,
2727  const string& entrez_term,
2728  bool is_na,
2729  bool structure_linkout_as_group,
2730  bool for_alignment,
2731  int cur_align,
2732  string& linkoutOrder,
2733  TTaxId taxid,
2734  string &database,
2735  int query_number,
2736  string &user_url,
2737  string &preComputedResID,
2738  ILinkoutDB* linkoutdb,
2739  const string& mv_build_name,
2740  bool getIdentProteins)
2741 
2742 {
2743  list<string> linkout_list;
2744 
2745  map<int, vector < CBioseq::TId > > linkout_map;
2746  GetBdlLinkoutInfo(cur_id,linkout_map, linkoutdb, mv_build_name);
2747  SLinkoutInfo linkoutInfo;
2748  linkoutInfo.Init(rid,
2749  cdd_rid,
2750  entrez_term,
2751  is_na,
2752  database,
2753  query_number,
2754  user_url,
2755  preComputedResID,
2756  linkoutOrder,
2757  structure_linkout_as_group,
2758  for_alignment);
2759 
2760  linkoutInfo.cur_align = cur_align;
2761  linkoutInfo.taxid = taxid;
2762 
2763  linkout_list = s_GetFullLinkoutUrl(cur_id,
2764  linkoutInfo,
2765  linkout_map,
2766  getIdentProteins);
2767  return linkout_list;
2768 }
2769 
2770 
2771 static bool FromRangeAscendingSort(CRange<TSeqPos> const& info1,
2772  CRange<TSeqPos> const& info2)
2773 {
2774  return info1.GetFrom() < info2.GetFrom();
2775 }
2776 
2777 //0 for query, 1 for subject
2778 //Gets query and subject range lists,oppositeStrands param
2779 static bool s_ProcessAlignSet(const CSeq_align_set& alnset,
2780  list<CRange<TSeqPos> > &query_list,
2781  list<CRange<TSeqPos> > &subject_list)
2782 {
2783  bool oppositeStrands = false;
2784  bool isFirst = false;
2785  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()) {
2786  CRange<TSeqPos> query_range = (*iter)->GetSeqRange(0);
2787  //for minus strand
2788  if(query_range.GetFrom() > query_range.GetTo()){
2789  query_range.Set(query_range.GetTo(), query_range.GetFrom());
2790  }
2791  query_list.push_back(query_range);
2792 
2793  CRange<TSeqPos> subject_range = (*iter)->GetSeqRange(1);
2794  //for minus strand
2795  if(subject_range.GetFrom() > subject_range.GetTo()){
2796  subject_range.Set(subject_range.GetTo(), subject_range.GetFrom());
2797  }
2798  subject_list.push_back(subject_range);
2799 
2800  oppositeStrands = (!isFirst) ? (*iter)->GetSeqStrand(0) != (*iter)->GetSeqStrand(1) : oppositeStrands;
2801  isFirst = true;
2802  }
2803 
2804  query_list.sort(FromRangeAscendingSort);
2805  subject_list.sort(FromRangeAscendingSort);
2806  return oppositeStrands;
2807 }
2808 
2809 
2810 
2811 //0 for query, 1 for subject
2812 static list<CRange<TSeqPos> > s_MergeRangeList(list<CRange<TSeqPos> > &source)
2813 {
2814 
2815  list<CRange<TSeqPos> > merge_list;
2816 
2817  bool is_first = true;
2818  CRange<TSeqPos> prev_range (0, 0);
2819  ITERATE(list<CRange<TSeqPos> >, iter, source) {
2820 
2821  if (is_first) {
2822  merge_list.push_back(*iter);
2823  is_first= false;
2824  prev_range = *iter;
2825  } else {
2826  if (prev_range.IntersectingWith(*iter)) {
2827  merge_list.pop_back();
2828  CRange<TSeqPos> temp_range = prev_range.CombinationWith(*iter);
2829  merge_list.push_back(temp_range);
2830  prev_range = temp_range;
2831  } else {
2832  merge_list.push_back(*iter);
2833  prev_range = *iter;
2834  }
2835  }
2836 
2837  }
2838  return merge_list;
2839 }
2840 
2842 {
2843 
2844  list<CRange<TSeqPos> > merge_list;
2845 
2846  list<CRange<TSeqPos> > temp;
2847  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()) {
2848  CRange<TSeqPos> seq_range = (*iter)->GetSeqRange(0);
2849  //for minus strand
2850  if(seq_range.GetFrom() > seq_range.GetTo()){
2851  seq_range.Set(seq_range.GetTo(), seq_range.GetFrom());
2852  }
2853  temp.push_back(seq_range);
2854  }
2855 
2856  temp.sort(FromRangeAscendingSort);
2857 
2858  merge_list = s_MergeRangeList(temp);
2859 
2860  int master_covered_lenghth = 0;
2861  ITERATE(list<CRange<TSeqPos> >, iter, merge_list) {
2862  master_covered_lenghth += iter->GetLength();
2863  }
2864  return master_covered_lenghth;
2865 }
2866 
2867 
2868 
2869 CRange<TSeqPos> CAlignFormatUtil::GetSeqAlignCoverageParams(const CSeq_align_set& alnset,int *master_covered_lenghth,bool *flip)
2870 
2871 {
2872 
2873  list<CRange<TSeqPos> > query_list;
2874  list<CRange<TSeqPos> > subject_list;
2875 
2876  *flip = s_ProcessAlignSet(alnset,query_list,subject_list);
2877  query_list = s_MergeRangeList(query_list);
2878  subject_list = s_MergeRangeList(subject_list);
2879 
2880 
2881  *master_covered_lenghth = 0;
2882  ITERATE(list<CRange<TSeqPos> >, iter, query_list) {
2883  *master_covered_lenghth += iter->GetLength();
2884  }
2885 
2886  TSeqPos from = 0,to = 0;
2887  ITERATE(list<CRange<TSeqPos> >, iter, subject_list) {
2888  from = (from == 0) ? iter->GetFrom() : min(from,iter->GetFrom());
2889  to = max(to,iter->GetTo());
2890  }
2891  //cerr << "from,to = " << from << "," << to << endl;
2892  CRange<TSeqPos> subjectRange(from + 1, to + 1);
2893  return subjectRange;
2894 }
2895 
2898  CScope& scope,
2899  CSeq_align_set& aln_set,
2900  bool nuc_to_nuc_translation,
2901  int db_sort,
2902  int hit_sort,
2903  int hsp_sort,
2904  ILinkoutDB* linkoutdb,
2905  const string& mv_build_name) {
2906 
2907 
2908  if (db_sort == 0 && hit_sort < 1 && hsp_sort < 1)
2909  return (CRef<CSeq_align_set>) &aln_set;
2910 
2911  list< CRef<CSeq_align_set> > seqalign_hit_total_list;
2912  vector< CRef<CSeq_align_set> > seqalign_vec(2);
2913  seqalign_vec[0] = new CSeq_align_set;
2914  seqalign_vec[1] = new CSeq_align_set;
2915 
2916  if(IsMixedDatabase(ctx)) {
2917  SplitSeqalignByMolecularType(seqalign_vec, db_sort, aln_set, scope,
2918  linkoutdb, mv_build_name);
2919  }else {
2920  seqalign_vec[0] = const_cast<CSeq_align_set*>(&aln_set);
2921  }
2922 
2923 
2924  ITERATE(vector< CRef<CSeq_align_set> >, iter, seqalign_vec){
2925  list< CRef<CSeq_align_set> > one_seqalign_hit_total_list = SortOneSeqalignForSortableFormat(**iter,
2926  nuc_to_nuc_translation,
2927  hit_sort,
2928  hsp_sort);
2929 
2930  seqalign_hit_total_list.splice(seqalign_hit_total_list.end(),one_seqalign_hit_total_list);
2931 
2932  }
2933 
2934  return HitListToHspList(seqalign_hit_total_list);
2935 }
2936 list< CRef<CSeq_align_set> >
2938  bool nuc_to_nuc_translation,
2939  int hit_sort,
2940  int hsp_sort)
2941 {
2942  list< CRef<CSeq_align_set> > seqalign_hit_total_list;
2943  list< CRef<CSeq_align_set> > seqalign_hit_list;
2944  HspListToHitList(seqalign_hit_list, source);
2945 
2946  if (hit_sort == eTotalScore) {
2947  seqalign_hit_list.sort(SortHitByTotalScoreDescending);
2948  } else if (hit_sort == eHighestScore) {
2949  seqalign_hit_list.sort(CAlignFormatUtil::SortHitByScoreDescending);
2950  } else if (hit_sort == ePercentIdentity) {
2951  SortHitByPercentIdentityDescending(seqalign_hit_list,
2952  nuc_to_nuc_translation);
2953  } else if (hit_sort == eQueryCoverage) {
2954  seqalign_hit_list.sort(SortHitByMasterCoverageDescending);
2955  }
2956 
2957  ITERATE(list< CRef<CSeq_align_set> >, iter2, seqalign_hit_list) {
2958  CRef<CSeq_align_set> temp(*iter2);
2959  if (hsp_sort == eQueryStart) {
2960  temp->Set().sort(SortHspByMasterStartAscending);
2961  } else if (hsp_sort == eHspPercentIdentity) {
2963  } else if (hsp_sort == eScore) {
2964  temp->Set().sort(SortHspByScoreDescending);
2965  } else if (hsp_sort == eSubjectStart) {
2966  temp->Set().sort(SortHspBySubjectStartAscending);
2967 
2968  }
2969  seqalign_hit_total_list.push_back(temp);
2970  }
2971  return seqalign_hit_total_list;
2972 }
2973 
2976  bool nuc_to_nuc_translation,
2977  int hit_sort,
2978  int hsp_sort) {
2979 
2980  if (hit_sort <= eEvalue && hsp_sort <= eHspEvalue) {
2981  return (CRef<CSeq_align_set>) &aln_set;
2982  }
2983 
2984 // seqalign_vec[0] = const_cast<CSeq_align_set*>(&aln_set);
2985  list< CRef<CSeq_align_set> > seqalign_hit_total_list = SortOneSeqalignForSortableFormat(aln_set,
2986  nuc_to_nuc_translation,
2987  hit_sort,
2988  hsp_sort);
2989  return HitListToHspList(seqalign_hit_total_list);
2990 }
2991 
2992 
2994  double evalueLow,
2995  double evalueHigh)
2996 {
2997  int score, sum_n, num_ident;
2998  double bits, evalue;
2999  list<TGi> use_this_gi;
3000 
3001  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
3002 
3003  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
3004  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue,
3005  sum_n, num_ident, use_this_gi);
3006  //Add the next three lines to re-calculte seq align evalue to the obe that is displayed on the screen
3007  //string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3008  //CAlignFormatUtil::GetScoreString(evalue, bits, 0, 0, evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf);
3009  //evalue = NStr::StringToDouble(evalue_buf);
3010  if(evalue >= evalueLow && evalue <= evalueHigh) {
3011  new_aln->Set().push_back(*iter);
3012  }
3013  }
3014  return new_aln;
3015 
3016 }
3017 
3018 /// Returns percent match for an alignment.
3019 /// Normally we round up the value, unless that means that an
3020 /// alignment with mismatches would be 100%. In that case
3021 /// it becomes 99%.
3022 ///@param numerator: numerator in percent identity calculation.
3023 ///@param denominator: denominator in percent identity calculation.
3024 int CAlignFormatUtil::GetPercentMatch(int numerator, int denominator)
3025 {
3026  if (numerator == denominator)
3027  return 100;
3028  else {
3029  int retval =(int) (0.5 + 100.0*((double)numerator)/((double)denominator));
3030  retval = min(99, retval);
3031  return retval;
3032  }
3033 }
3034 
3035 double CAlignFormatUtil::GetPercentIdentity(int numerator, int denominator)
3036 {
3037  if (numerator == denominator)
3038  return 100;
3039  else {
3040  double retval =100*(double)numerator/(double)denominator;
3041  return retval;
3042  }
3043 }
3044 
3046  double percentIdentLow,
3047  double percentIdentHigh)
3048 {
3049  int score, sum_n, num_ident;
3050  double bits, evalue;
3051  list<TGi> use_this_gi;
3052 
3053  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
3054 
3055  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
3056  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue,
3057  sum_n, num_ident, use_this_gi);
3058  int seqAlnLength = GetAlignmentLength(**iter, kTranslation);
3059  if(seqAlnLength > 0 && num_ident > 0) {
3060  double alnPercentIdent = GetPercentIdentity(num_ident, seqAlnLength);
3061  if(alnPercentIdent >= percentIdentLow && alnPercentIdent <= percentIdentHigh) {
3062  new_aln->Set().push_back(*iter);
3063  }
3064  }
3065  }
3066  return new_aln;
3067 }
3068 
3070  double evalueLow,
3071  double evalueHigh,
3072  double percentIdentLow,
3073  double percentIdentHigh)
3074 {
3075  int score, sum_n, num_ident;
3076  double bits, evalue;
3077  list<TGi> use_this_gi;
3078 
3079  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
3080 
3081  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
3082  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue,
3083  sum_n, num_ident, use_this_gi);
3084  //Add the next three lines to re-calculte seq align evalue to the one that is displayed on the screen
3085  //string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3086  //CAlignFormatUtil::GetScoreString(evalue, bits, 0, 0, evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf);
3087  //evalue = NStr::StringToDouble(evalue_buf);
3088  int seqAlnLength = GetAlignmentLength(**iter, kTranslation);
3089  if(seqAlnLength > 0 && num_ident > 0) {
3090  int alnPercentIdent = GetPercentMatch(num_ident, seqAlnLength);
3091  if( (evalue >= evalueLow && evalue <= evalueHigh) &&
3092  (alnPercentIdent >= percentIdentLow && alnPercentIdent <= percentIdentHigh)) {
3093  new_aln->Set().push_back(*iter);
3094  }
3095  }
3096  }
3097  return new_aln;
3098 }
3099 
3101 {
3102  char buffer[512];
3103  sprintf(buffer, "%.*f", 2, value);
3104  double newVal = NStr::StringToDouble(buffer);
3105  return newVal;
3106 }
3107 
3108 static bool s_isAlnInFilteringRange(double evalue,
3109  double percentIdent,
3110  int queryCover,
3111  double evalueLow,
3112  double evalueHigh,
3113  double percentIdentLow,
3114  double percentIdentHigh,
3115  int queryCoverLow,
3116  int queryCoverHigh)
3117 {
3118 
3119 
3120  bool isInRange = false;
3121  //Adjust percent identity and evalue to display values
3122  percentIdent = adjustPercentIdentToDisplayValue(percentIdent);
3123  string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3124  double bits = 0;
3125  CAlignFormatUtil::GetScoreString(evalue, bits, 0, 0, evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf);
3126  evalue = NStr::StringToDouble(evalue_buf);
3127 
3128  if(evalueLow >= 0 && percentIdentLow >= 0 && queryCoverLow >= 0) {
3129  isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3130  (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh) &&
3131  (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3132  }
3133  else if(evalueLow >= 0 && percentIdentLow >= 0) {
3134  isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3135  (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3136  }
3137  else if(evalueLow >= 0 && queryCoverLow >= 0) {
3138  isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3139  (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3140  }
3141  else if(queryCoverLow >= 0 && percentIdentLow >= 0) {
3142  isInRange = (queryCover >= queryCoverLow && queryCover <= queryCoverHigh) &&
3143  (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3144  }
3145  else if(evalueLow >= 0) {
3146  isInRange = (evalue >= evalueLow && evalue <= evalueHigh);
3147  }
3148  else if(percentIdentLow >= 0) {
3149  isInRange = (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3150  }
3151  else if(queryCoverLow >= 0) {
3152  isInRange = (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3153  }
3154  return isInRange;
3155 }
3156 
3158  double evalueLow,
3159  double evalueHigh,
3160  double percentIdentLow,
3161  double percentIdentHigh,
3162  int queryCoverLow,
3163  int queryCoverHigh)
3164 {
3165  list< CRef<CSeq_align_set> > seqalign_hit_total_list;
3166  list< CRef<CSeq_align_set> > seqalign_hit_list;
3167 
3168  HspListToHitList(seqalign_hit_list, source_aln);
3169 
3170  ITERATE(list< CRef<CSeq_align_set> >, iter, seqalign_hit_list) {
3171  CRef<CSeq_align_set> temp(*iter);
3173 
3174  if(s_isAlnInFilteringRange(seqSetInfo->evalue,
3175  seqSetInfo->percent_identity,
3176  seqSetInfo->percent_coverage,
3177  evalueLow,
3178  evalueHigh,
3179  percentIdentLow,
3180  percentIdentHigh,
3181  queryCoverLow,
3182  queryCoverHigh)) {
3183  seqalign_hit_total_list.push_back(temp);
3184  }
3185  }
3186  return HitListToHspList(seqalign_hit_total_list);
3187 }
3188 
3190  int maxAligns,
3191  int maxHsps)
3192 {
3193  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
3194 
3195  CConstRef<CSeq_id> prevQueryId,prevSubjectId;
3196  int alignCount = 0,hspCount = 0;
3197  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
3198  const CSeq_id& newQueryId = (*iter)->GetSeq_id(0);
3199  if(prevQueryId.Empty() || !newQueryId.Match(*prevQueryId)){
3200  if (hspCount >= maxHsps) {
3201  break;
3202  }
3203  alignCount = 0;
3204  prevQueryId = &newQueryId;
3205  }
3206  if (alignCount < maxAligns) {
3207  const CSeq_id& newSubjectId = (*iter)->GetSeq_id(1);
3208  // Increment alignments count if subject sequence is different
3209  if(prevSubjectId.Empty() || !newSubjectId.Match(*prevSubjectId)){
3210  ++alignCount;
3211  prevSubjectId = &newSubjectId;
3212  }
3213  // Increment HSP count if the alignments limit is not reached
3214  ++hspCount;
3215  new_aln->Set().push_back(*iter);
3216  }
3217 
3218  }
3219  return new_aln;
3220 }
3221 
3222 
3224  int queryNumber)
3225 {
3226  if(queryNumber == 0) {
3227  return source_aln;
3228  }
3229  CRef<CSeq_align_set> new_aln;
3230 
3231  CConstRef<CSeq_id> prevQueryId;
3232  int currQueryNum = 0;
3233 
3234  ITERATE(CSeq_align_set::Tdata, iter, source_aln->Get()){
3235  const CSeq_id& newQueryId = (*iter)->GetSeq_id(0);
3236  if(prevQueryId.Empty() || !newQueryId.Match(*prevQueryId)){
3237  currQueryNum++;
3238  prevQueryId = &newQueryId;
3239  }
3240  //Record seq aligns corresponding to queryNumber
3241  if(currQueryNum == queryNumber) {
3242  if(new_aln.Empty()) {
3243  new_aln.Reset(new CSeq_align_set);
3244  }
3245  new_aln->Set().push_back(*iter);
3246  }
3247  else if(currQueryNum > queryNumber) {
3248  break;
3249  }
3250  }
3251  return new_aln;
3252 }
3253 
3254 
3256 {
3257  string l_cfg_file_name;
3259  if( getenv("GETURL_DEBUG") ) CAlignFormatUtil::m_geturl_debug_flag = l_dbg = true;
3260  if( !m_Reg ) {
3261  bool cfgExists = true;
3262  string l_ncbi_env;
3263  string l_fmtcfg_env;
3264  if( NULL != getenv("NCBI") ) l_ncbi_env = getenv("NCBI");
3265  if( NULL != getenv("FMTCFG") ) l_fmtcfg_env = getenv("FMTCFG");
3266  // config file name: value of FMTCFG or default ( .ncbirc )
3267  if( l_fmtcfg_env.empty() )
3268  l_cfg_file_name = ".ncbirc";
3269  else
3270  l_cfg_file_name = l_fmtcfg_env;
3271  // checkinf existance of configuration file
3272  CFile l_fchecker( l_cfg_file_name );
3273  cfgExists = l_fchecker.Exists();
3274  if( (!cfgExists) && (!l_ncbi_env.empty()) ) {
3275  if( l_ncbi_env.rfind("/") != (l_ncbi_env.length() -1 ))
3276  l_ncbi_env.append("/");
3277  l_cfg_file_name = l_ncbi_env + l_cfg_file_name;
3278  CFile l_fchecker2( l_cfg_file_name );
3279  cfgExists = l_fchecker2.Exists();
3280  }
3281  if(cfgExists) {
3282  CNcbiIfstream l_ConfigFile(l_cfg_file_name.c_str() );
3283  m_Reg.reset(new CNcbiRegistry(l_ConfigFile));
3284  if( l_dbg ) fprintf(stderr,"REGISTRY: %s\n",l_cfg_file_name.c_str());
3285  }
3286  }
3287  return;
3288 }
3289 
3290 //
3291 // get given url from registry file or return corresponding kNAME
3292 // value as default to preserve compatibility.
3293 //
3294 // algoritm:
3295 // 1) config file name is ".ncbirc" unless FMTCFG specifies another name
3296 // 2) try to read local configuration file before
3297 // checking location specified by the NCBI environment.
3298 // 3) if index != -1, use it as trailing version number for a key name,
3299 // ABCD_V0. try to read ABCD key if version variant doesn't exist.
3300 // 4) use INCLUDE_BASE_DIR key to specify base for all include files.
3301 // 5) treat "_FORMAT" key as filename first and string in second.
3302 // in case of existances of filename, read it starting from
3303 // location specified by INCLUDE_BASE_DIR key
3304 string CAlignFormatUtil::GetURLFromRegistry( const string url_name, int index){
3305  string result_url;
3306  string l_key, l_host_port, l_format;
3307  string l_secion_name = "BLASTFMTUTIL";
3308  string l_fmt_suffix = "_FORMAT";
3309  string l_host_port_suffix = "_HOST_PORT";
3310  string l_subst_pattern;
3311 
3312  if( !m_Reg ) {
3313  InitConfig();
3314  }
3315  if( !m_Reg ) return GetURLDefault(url_name,index); // can't read .ncbrc file
3316  string l_base_dir = m_Reg->Get(l_secion_name, "INCLUDE_BASE_DIR");
3317  if( !l_base_dir.empty() && ( l_base_dir.rfind("/") != (l_base_dir.length()-1)) ) {
3318  l_base_dir.append("/");
3319  }
3320 
3321 
3322  string default_host_port;
3323  string l_key_ndx;
3324  if( index >=0) {
3325  l_key_ndx = url_name + l_host_port_suffix + "_" + NStr::IntToString( index );
3326  l_subst_pattern="<@"+l_key_ndx+"@>";
3327  l_host_port = m_Reg->Get(l_secion_name, l_key_ndx); // try indexed
3328  }
3329  // next is initialization for non version/array type of settings
3330  if( l_host_port.empty()){ // not indexed or index wasn't found
3331  l_key = url_name + l_host_port_suffix; l_subst_pattern="<@"+l_key+"@>";
3332  l_host_port = m_Reg->Get(l_secion_name, l_key);
3333  }
3334  if( l_host_port.empty()) return GetURLDefault(url_name,index);
3335 
3336  // get format part
3337  l_key = url_name + l_fmt_suffix ; //"_FORMAT";
3338  l_key_ndx = l_key + "_" + NStr::IntToString( index );
3339  if( index >= 0 ){
3340  l_format = m_Reg->Get(l_secion_name, l_key_ndx);
3341  }
3342 
3343  if( l_format.empty() ) l_format = m_Reg->Get(l_secion_name, l_key);
3344  if( l_format.empty()) return GetURLDefault(url_name,index);
3345  // format found check wether this string or file name
3346  string l_format_file = l_base_dir + l_format;
3347  CFile l_fchecker( l_format_file );
3348  bool file_name_mode = l_fchecker.Exists();
3349  if( file_name_mode ) { // read whole content of the file to string buffer
3350  string l_inc_file_name = l_format_file;
3351  CNcbiIfstream l_file (l_inc_file_name.c_str(), ios::in|ios::binary|ios::ate);
3352  CT_POS_TYPE l_inc_size = l_file.tellg();
3353  // size_t l_buf_sz = (size_t) l_inc_size;
3354  char *l_mem = new char [ (size_t) l_inc_size + 1];
3355  memset( l_mem,0, (size_t) l_inc_size + 1 ) ;
3356  l_file.seekg( 0, ios::beg );
3357  l_file.read(l_mem, l_inc_size);
3358  l_file.close();
3359  l_format.erase(); l_format.reserve( (size_t)l_inc_size + 1 );
3360  l_format = l_mem;
3361  delete [] l_mem;
3362  }
3363 
3364  result_url = NStr::Replace(l_format,l_subst_pattern,l_host_port);
3365 
3366  if( result_url.empty()) return GetURLDefault(url_name,index);
3367  return result_url;
3368 }
3369 //
3370 // return default URL value for the given key.
3371 //
3372 string CAlignFormatUtil::GetURLDefault( const string url_name, int index) {
3373 
3374  string search_name = url_name;
3376  if( index >= 0 ) search_name += "_" + NStr::IntToString( index); // actual name for index value is NAME_{index}
3377 
3378  if( (url_it = sm_TagUrlMap.find( search_name ) ) != sm_TagUrlMap.end()) {
3379  string url_link = CAlignFormatUtil::MapProtocol(url_it->second);
3380  return url_link;
3381  }
3382 
3383  string error_msg = "CAlignFormatUtil::GetURLDefault:no_defualt_for"+url_name;
3384  if( index != -1 ) error_msg += "_index_"+ NStr::IntToString( index );
3385  return error_msg;
3386 }
3387 
3388 void
3390  CNcbiMatrix<int>& retval)
3391 {
3392  retval.Resize(0, 0, -1);
3393  if (matrix_name == NULL ||
3394  NStr::TruncateSpaces(string(matrix_name)).empty()) {
3395  return;
3396  }
3397 
3398  const SNCBIPackedScoreMatrix* packed_mtx =
3399  NCBISM_GetStandardMatrix(matrix_name);
3400  if (packed_mtx == NULL) {
3401  return;
3402  }
3403  retval.Resize(k_NumAsciiChar, k_NumAsciiChar, -1000);
3404 
3406  NCBISM_Unpack(packed_mtx, &mtx);
3407 
3408  for(int i = 0; i < ePMatrixSize; ++i){
3409  for(int j = 0; j < ePMatrixSize; ++j){
3410  retval((size_t)k_PSymbol[i], (size_t)k_PSymbol[j]) =
3411  mtx.s[(size_t)k_PSymbol[i]][(size_t)k_PSymbol[j]];
3412  }
3413  }
3414  for(int i = 0; i < ePMatrixSize; ++i) {
3415  retval((size_t)k_PSymbol[i], '*') = retval('*',(size_t)k_PSymbol[i]) = -4;
3416  }
3417  retval('*', '*') = 1;
3418  // this is to count Selenocysteine to Cysteine matches as positive
3419  retval('U', 'U') = retval('C', 'C');
3420  retval('U', 'C') = retval('C', 'C');
3421  retval('C', 'U') = retval('C', 'C');
3422 }
3423 
3424 
3425 string CAlignFormatUtil::MapTemplate(string inpString,string tmplParamName,Int8 templParamVal)
3426 {
3427  string outString;
3428  string tmplParam = "<@" + tmplParamName + "@>";
3429  NStr::Replace(inpString,tmplParam,NStr::NumericToString(templParamVal),outString);
3430  return outString;
3431 }
3432 
3433 string CAlignFormatUtil::MapTemplate(string inpString,string tmplParamName,string templParamVal)
3434 {
3435  string outString;
3436  string tmplParam = "<@" + tmplParamName + "@>";
3437  NStr::Replace(inpString,tmplParam,templParamVal,outString);
3438  return outString;
3439 }
3440 
3441 string CAlignFormatUtil::MapSpaceTemplate(string inpString,string tmplParamName,string templParamVal, unsigned int maxParamValLength, int spacesFormatFlag)
3442 {
3443  templParamVal = AddSpaces(templParamVal, maxParamValLength, spacesFormatFlag);
3444  string outString = MapTemplate(inpString,tmplParamName,templParamVal);
3445 
3446  return outString;
3447 }
3448 
3449 
3450 string CAlignFormatUtil::AddSpaces(string paramVal, size_t maxParamValLength, int spacesFormatFlag)
3451 {
3452  //if(!spacePos.empty()) {
3453  string spaceString;
3454  if(maxParamValLength >= paramVal.size()) {
3455  size_t numSpaces = maxParamValLength - paramVal.size() + 1;
3456  if(spacesFormatFlag & eSpacePosToCenter) {
3457  numSpaces = numSpaces/2;
3458  }
3459  spaceString.assign(numSpaces,' ');
3460  }
3461  else {
3462  paramVal = paramVal.substr(0, maxParamValLength - 3) + "...";
3463  spaceString += " ";
3464  }
3465  if(spacesFormatFlag & eSpacePosAtLineEnd) {
3466  paramVal = paramVal + spaceString;
3467  }
3468  else if(spacesFormatFlag & eSpacePosToCenter) {
3469  paramVal = spaceString + paramVal + spaceString;
3470  }
3471  else {
3472  paramVal = spaceString + paramVal;
3473  }
3474  if(spacesFormatFlag & eAddEOLAtLineStart) paramVal = "\n" + paramVal;
3475  if(spacesFormatFlag & eAddEOLAtLineEnd) paramVal = paramVal + "\n";
3476  //}
3477 
3478  return paramVal;
3479 }
3480 
3481 
3482 
3484 {
3485  CNcbiIfstream config_file(".ncbirc");
3486  CNcbiRegistry config_reg(config_file);
3487  string httpProt = "https:";
3488  if(!config_reg.Empty()) {
3489  if(config_reg.HasEntry("BLASTFMTUTIL","PROTOCOL")) {
3490  httpProt = config_reg.Get("BLASTFMTUTIL","PROTOCOL");
3491  }
3492  }
3493  return httpProt;
3494 }
3495 
3496 /*
3497 if(no config file) protocol = "https:"
3498 if(no "BLASTFMTUTIL","PROTOCOL" entry in config file) protocol = "https:"
3499 if(there is entry in config) protocol = entry which could be blank = ""
3500 */
3501 string CAlignFormatUtil::MapProtocol(string url_link)
3502 {
3503  if(m_Protocol.empty()){
3504  if(!m_Reg) {
3505  InitConfig();
3506  }
3507  m_Protocol = (m_Reg && m_Reg->HasEntry("BLASTFMTUTIL","PROTOCOL")) ? m_Protocol = m_Reg->Get("BLASTFMTUTIL","PROTOCOL") : "https:";
3508  }
3509  url_link = CAlignFormatUtil::MapTemplate(url_link,"protocol",m_Protocol);
3510  return url_link;
3511 }
3512 
3513 static string s_MapCommonUrlParams(string urlTemplate, CAlignFormatUtil::SSeqURLInfo *seqUrlInfo)
3514 {
3515  string db,logstr_moltype;
3516  if(seqUrlInfo->isDbNa) {
3517  db = "nucleotide";
3518  logstr_moltype = "nucl";
3519  } else {
3520  db = "protein";
3521  logstr_moltype ="prot";
3522  }
3523  string logstr_location = (seqUrlInfo->isAlignLink) ? "align" : "top";
3524  string url_link = CAlignFormatUtil::MapTemplate(urlTemplate,"db",db);
3525  url_link = CAlignFormatUtil::MapTemplate(url_link,"gi", GI_TO(TIntId, seqUrlInfo->gi));
3526  url_link = CAlignFormatUtil::MapTemplate(url_link,"log",logstr_moltype + logstr_location);
3527  url_link = CAlignFormatUtil::MapTemplate(url_link,"blast_rank",seqUrlInfo->blast_rank);
3528  url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",seqUrlInfo->rid);
3529  url_link = CAlignFormatUtil::MapTemplate(url_link,"acc",seqUrlInfo->accession);
3530  url_link = CAlignFormatUtil::MapProtocol(url_link);
3531  return url_link;
3532 }
3533 
3534 static string s_MapURLLink(string urlTemplate, CAlignFormatUtil::SSeqURLInfo *seqUrlInfo, const CBioseq::TId& ids)
3535 {
3536  //Add specific blasttype/user_url template mapping here
3537  string url_link = urlTemplate;
3538  if (seqUrlInfo->user_url.find("sra.cgi") != string::npos) {
3539  string strRun, strSpotId,strReadIndex;
3540  if(s_GetSRASeqMetadata(ids,strRun,strSpotId,strReadIndex)) {
3541  url_link = CAlignFormatUtil::MapTemplate(url_link,"run",strRun);
3542  url_link = CAlignFormatUtil::MapTemplate(url_link,"spotid",strSpotId);
3543  url_link = CAlignFormatUtil::MapTemplate(url_link,"readindex",strReadIndex);
3544  }
3545  }
3546  //This maps generic params like log, blast_rank, rid
3547  url_link = s_MapCommonUrlParams(url_link, seqUrlInfo);
3548  return url_link;
3549 }
3550 
3551 
3552 
3553 bool CAlignFormatUtil::IsWGSPattern(string &wgsAccession)
3554 {
3555  //const string kWgsAccessionPattern = "^[A-Z]{4}[0-9]{8,10}(\.[0-9]+){0,1}$"; //example AUXO013124042 or AUXO013124042.1
3556  const unsigned int kWgsProjLength = 4;
3557  const unsigned int kWgsProjIDLengthMin = 8;
3558  const unsigned int kWgsProjIDLengthMax = 10;
3559  bool isWGS = true;
3560 
3561  if (wgsAccession.size() < 6) {
3562  return false;
3563  }
3564 
3565  if(NStr::Find(wgsAccession, ".") != NPOS) { //Accession has version AUXO013124042.1
3566  string version;
3567  NStr::SplitInTwo(wgsAccession,".",wgsAccession,version);
3568  }
3569 
3570  string wgsProj = wgsAccession.substr(0,kWgsProjLength);
3571  for (size_t i = 0; i < wgsProj.length(); i ++){
3572  if(!isalpha(wgsProj[i]&0xff)) {
3573  isWGS = false;
3574  break;
3575  }
3576  }
3577  if(isWGS) {
3578  string wgsId = wgsAccession.substr(kWgsProjLength);
3579  if(wgsId.length() >= kWgsProjIDLengthMin && wgsId.length() <= kWgsProjIDLengthMax) {
3580  for (size_t i = 0; i < wgsId.length(); i ++){
3581  if(!isdigit(wgsId[i]&0xff)) {
3582  isWGS = false;
3583  break;
3584  }
3585  }
3586  }
3587  else {
3588  isWGS = false;
3589  }
3590  }
3591  return isWGS;
3592 }
3593 
3594 
3595 bool CAlignFormatUtil::IsWGSAccession(string &wgsAccession, string &wgsProjName)
3596 {
3597  const unsigned int kWgsProgNameLength = 6;
3598  bool isWGS = IsWGSPattern(wgsAccession);
3599  if(isWGS) {
3600  wgsProjName = wgsAccession.substr(0,kWgsProgNameLength);
3601  }
3602  return isWGS;
3603 }
3604 
3605 
3606 string CAlignFormatUtil::GetIDUrlGen(SSeqURLInfo *seqUrlInfo,const CBioseq::TId* ids)
3607 {
3608  string url_link = NcbiEmptyString;
3610 
3611  bool hasTextSeqID = GetTextSeqID(*ids);
3612  string title = "title=\"Show report for " + seqUrlInfo->accession + "\" ";
3613 
3614  string temp_class_info = kClassInfo; temp_class_info += " ";
3615  string wgsProj;
3616  string wgsAccession = seqUrlInfo->accession;
3617  bool isWGS = false;
3618  if (!(wid->Which() == CSeq_id::e_Local || wid->Which() == CSeq_id::e_General)){
3619  isWGS = CAlignFormatUtil::IsWGSAccession(wgsAccession, wgsProj);
3620  }
3621  if(isWGS && seqUrlInfo->useTemplates) {
3622  string wgsUrl = CAlignFormatUtil::GetURLFromRegistry("WGS");
3623  url_link = s_MapCommonUrlParams(wgsUrl, seqUrlInfo);
3624  url_link = CAlignFormatUtil::MapTemplate(url_link,"wgsproj",wgsProj);
3625  url_link = CAlignFormatUtil::MapTemplate(url_link,"wgsacc", wgsAccession);
3626  }
3627  else if (hasTextSeqID) {
3628  string entrezTag = (seqUrlInfo->useTemplates) ? "ENTREZ_TM" : "ENTREZ";
3629  string l_EntrezUrl = CAlignFormatUtil::GetURLFromRegistry(entrezTag);
3630  url_link = s_MapCommonUrlParams(l_EntrezUrl, seqUrlInfo);
3631 
3632  if(!seqUrlInfo->useTemplates) {
3633  url_link = CAlignFormatUtil::MapTemplate(url_link,"acc",seqUrlInfo->accession);
3634  temp_class_info = (!seqUrlInfo->defline.empty())? CAlignFormatUtil::MapTemplate(temp_class_info,"defline",NStr::JavaScriptEncode(seqUrlInfo->defline)):temp_class_info;
3635  url_link = CAlignFormatUtil::MapTemplate(url_link,"cssInf",(seqUrlInfo->addCssInfo) ? temp_class_info.c_str() : "");
3636  url_link = CAlignFormatUtil::MapTemplate(url_link,"target",seqUrlInfo->new_win ? "TARGET=\"EntrezView\"" : "");
3637  }
3638 
3639  } else {//seqid general, dbtag specified
3640  if(wid->Which() == CSeq_id::e_General){
3641  const CDbtag& dtg = wid->GetGeneral();
3642  const string& dbname = dtg.GetDb();
3643  if(NStr::CompareNocase(dbname, "TI") == 0){
3644  string actual_id = CAlignFormatUtil::GetGnlID(dtg);
3645  if(seqUrlInfo->useTemplates) {
3646  string l_TraceUrl = CAlignFormatUtil::GetURLFromRegistry("TRACE_CGI");
3647  url_link = l_TraceUrl + (string)"?cmd=retrieve&dopt=fasta&val=" + actual_id + "&RID=" + seqUrlInfo->rid;
3648  }
3649  else {
3650  url_link = CAlignFormatUtil::MapTemplate(kTraceUrl,"val",actual_id);
3651  temp_class_info = (!seqUrlInfo->defline.empty())? CAlignFormatUtil::MapTemplate(temp_class_info,"defline",seqUrlInfo->defline):temp_class_info;
3652  url_link = CAlignFormatUtil::MapTemplate(url_link,"cssInf",(seqUrlInfo->addCssInfo) ? temp_class_info.c_str() : "");
3653  url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",seqUrlInfo->rid);
3654  }
3655  }
3656  } else if (wid->Which() == CSeq_id::e_Local){
3657 
3658  string url_holder = CAlignFormatUtil::GetURLFromRegistry("LOCAL_ID");
3659 
3660  string user_url;
3661  if (m_Reg) {
3662  user_url = (seqUrlInfo->addCssInfo) ? m_Reg->Get("LOCAL_ID","TOOL_URL_ALIGN") : m_Reg->Get("LOCAL_ID","TOOL_URL");
3663  }
3664  string id_string;
3665  wid->GetLabel(&id_string, CSeq_id::eContent);
3666  url_link = CAlignFormatUtil::MapTemplate(user_url,"seq_id", NStr::URLEncode(id_string));
3667  url_link = CAlignFormatUtil::MapTemplate(url_link,"db_name", NStr::URLEncode(seqUrlInfo->database));
3668  url_link = CAlignFormatUtil::MapTemplate(url_link,"taxid", TAX_ID_TO(int, seqUrlInfo->taxid));
3669  temp_class_info = (!seqUrlInfo->defline.empty())? CAlignFormatUtil::MapTemplate(temp_class_info,"defline",seqUrlInfo->defline):temp_class_info;
3670  url_link = CAlignFormatUtil::MapTemplate(url_link,"cssInf",(seqUrlInfo->addCssInfo) ? temp_class_info.c_str() : "");
3671  url_link = CAlignFormatUtil::MapTemplate(url_link,"title", id_string);
3672  url_link = CAlignFormatUtil::MapTemplate(url_link,"target",seqUrlInfo->new_win ? "TARGET=\"EntrezView\"" : "");
3673  }
3674  }
3675  url_link = CAlignFormatUtil::MapProtocol(url_link);
3676  seqUrlInfo->seqUrl = url_link;
3677  return url_link;
3678 }
3679 
3680 string CAlignFormatUtil::GetIDUrlGen(SSeqURLInfo *seqUrlInfo,const CSeq_id& id,objects::CScope &scope)
3681 {
3682  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
3683  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
3684 
3685  string url_link = GetIDUrlGen(seqUrlInfo,ids);
3686  return url_link;
3687 }
3688 
3689 string CAlignFormatUtil::GetIDUrl(SSeqURLInfo *seqUrlInfo,const CBioseq::TId* ids)
3690 {
3691  string url_link = NcbiEmptyString;
3693 
3694  string title = "title=\"Show report for " + seqUrlInfo->accession + "\" ";
3695 
3696  if (seqUrlInfo->user_url != NcbiEmptyString &&
3697  !((seqUrlInfo->user_url.find("dumpgnl.cgi") != string::npos && seqUrlInfo->gi > ZERO_GI) ||
3698  (seqUrlInfo->user_url.find("maps.cgi") != string::npos))) {
3699 
3700  string url_with_parameters,toolURLParams;
3701  if(m_Reg && !seqUrlInfo->blastType.empty() && seqUrlInfo->blastType != "newblast") {
3702  toolURLParams = m_Reg->Get(seqUrlInfo->blastType, "TOOL_URL_PARAMS");
3703  }
3704  if(!toolURLParams.empty()) {
3705  string urlLinkTemplate = seqUrlInfo->user_url + toolURLParams;
3706  url_with_parameters = s_MapURLLink(urlLinkTemplate, seqUrlInfo, *ids);
3707  }
3708  else {
3709  if (seqUrlInfo->user_url.find("sra.cgi") != string::npos) {
3710  url_with_parameters = CAlignFormatUtil::BuildSRAUrl(*ids, seqUrlInfo->user_url);
3711  }
3712  else {
3713  url_with_parameters = CAlignFormatUtil::BuildUserUrl(*ids, seqUrlInfo->taxid, seqUrlInfo->user_url,
3714  seqUrlInfo->database,
3715  seqUrlInfo->isDbNa, seqUrlInfo->rid,
3716  seqUrlInfo->queryNumber,
3717  seqUrlInfo->isAlignLink);
3718  }
3719  }
3720  if (url_with_parameters != NcbiEmptyString) {
3721  if (!seqUrlInfo->useTemplates) {
3722  string deflineInfo;
3723  if(seqUrlInfo->addCssInfo) {
3724  deflineInfo = (!seqUrlInfo->defline.empty())? CAlignFormatUtil::MapTemplate(kClassInfo,"defline",seqUrlInfo->defline):kClassInfo;
3725  }
3726  url_link += "<a " + title + deflineInfo + "href=\"";
3727  }
3728  url_link += url_with_parameters;
3729  if (!seqUrlInfo->useTemplates) url_link += "\">";
3730  }
3731  }
3732  else {
3733  //use entrez or dbtag specified
3734  url_link = GetIDUrlGen(seqUrlInfo,ids);
3735  }
3736  seqUrlInfo->seqUrl = url_link;
3737  return url_link;
3738 }
3739 
3740 
3741 string CAlignFormatUtil::GetIDUrl(SSeqURLInfo *seqUrlInfo,const CSeq_id& id,objects::CScope &scope)
3742 {
3743  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
3744  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
3745 
3746 
3747  seqUrlInfo->blastType = NStr::TruncateSpaces(NStr::ToLower(seqUrlInfo->blastType));
3748 
3749  if(seqUrlInfo->taxid == INVALID_TAX_ID) { //taxid is not set
3750  seqUrlInfo->taxid = ZERO_TAX_ID;
3751  if ((seqUrlInfo->advancedView || seqUrlInfo->blastType == "mapview" || seqUrlInfo->blastType == "mapview_prev") ||
3752  seqUrlInfo->blastType == "gsfasta" || seqUrlInfo->blastType == "gsfasta_prev") {
3753  seqUrlInfo->taxid = GetTaxidForSeqid(id, scope);
3754  }
3755  }
3756  string url_link = GetIDUrl(seqUrlInfo,ids);
3757  return url_link;
3758 }
3759 
3760 //static const char kGenericLinkTemplate[] = "<a href=\"<@url@>\" target=\"lnk<@rid@>\" title=\"Show report for <@seqid@>\"><@gi@><@seqid@></a>";
3762 {
3763  string seqLink;
3764  string linkURL = GetIDUrl(seqUrlInfo,ids);
3765  if(!linkURL.empty()) {
3766  string linkTmpl = (seqUrlInfo->addCssInfo) ? kGenericLinkMouseoverTmpl : kGenericLinkTemplate;
3767  seqLink = CAlignFormatUtil::MapTemplate(linkTmpl,"url",linkURL);
3768  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"rid",seqUrlInfo->rid);
3769  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"seqid",seqUrlInfo->accession);
3770  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"gi", GI_TO(TIntId, seqUrlInfo->gi));
3771  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"target","EntrezView");
3772  if(seqUrlInfo->addCssInfo) {
3773  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"defline",NStr::JavaScriptEncode(seqUrlInfo->defline));
3774  }
3775  }
3776  return seqLink;
3777 }
3778 
3779 static string s_MapCustomLink(string linkUrl,string reportType,string accession, string linkText, string linktrg, string linkTitle = kCustomLinkTitle,string linkCls = "")
3780 {
3781  string link = CAlignFormatUtil::MapTemplate(kCustomLinkTemplate,"custom_url",linkUrl);
3782  link = CAlignFormatUtil::MapProtocol(link);
3783  link = CAlignFormatUtil::MapTemplate(link,"custom_title",linkTitle);
3784  link = CAlignFormatUtil::MapTemplate(link,"custom_report_type",reportType);
3785  link = CAlignFormatUtil::MapTemplate(link,"seqid",accession);
3786  link = CAlignFormatUtil::MapTemplate(link,"custom_lnk_displ",linkText);
3787  link = CAlignFormatUtil::MapTemplate(link,"custom_cls",linkCls);
3788  link = CAlignFormatUtil::MapTemplate(link,"custom_trg",linktrg);
3789  return link;
3790 }
3791 
3792 
3793 
3795  bool hspRange)
3796 {
3797  list<string> customLinksList;
3798  if (seqUrlInfo->hasTextSeqID) {
3799  //First show links to GenBank and FASTA
3800  string linkUrl,link,linkTiltle = kCustomLinkTitle;
3801 
3802  linkUrl = seqUrlInfo->seqUrl;
3803  if(NStr::Find(linkUrl, "report=genbank") == NPOS) { //Geo case
3804  linkUrl = s_MapCommonUrlParams(kEntrezTMUrl, seqUrlInfo);
3805  }
3806  string linkText = (seqUrlInfo->isDbNa) ? "GenBank" : "GenPept";
3807  if(hspRange) {
3808  linkUrl += "&from=<@fromHSP@>&to=<@toHSP@>";
3809  linkTiltle = "Aligned region spanning positions <@fromHSP@> to <@toHSP@> on <@seqid@>";
3810  }
3811  link = s_MapCustomLink(linkUrl,"genbank",seqUrlInfo->accession,linkText,"lnk" + seqUrlInfo->rid,linkTiltle);
3812  customLinksList.push_back(link);
3813  }
3814  return customLinksList;
3815 }
3816 
3818  bool hspRange)
3819 {
3820  //seqviewer
3821  string dbtype = (seqUrlInfo->isDbNa) ? "nuccore" : "protein";
3822  string seqViewUrl = (seqUrlInfo->gi > ZERO_GI)?kSeqViewerUrl:kSeqViewerUrlNonGi;
3823 
3824  string linkUrl = CAlignFormatUtil::MapTemplate(seqViewUrl,"rid",seqUrlInfo->rid);
3825 
3826  string seqViewerParams;
3827  if(m_Reg && !seqUrlInfo->blastType.empty() && seqUrlInfo->blastType != "newblast") {
3828  seqViewerParams = m_Reg->Get(seqUrlInfo->blastType, "SEQVIEW_PARAMS");
3829  }
3830  seqViewerParams = seqViewerParams.empty() ? kSeqViewerParams : seqViewerParams;
3831  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"seqViewerParams",seqViewerParams);
3832 
3833  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"dbtype",dbtype);
3834  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"gi", GI_TO(TIntId, seqUrlInfo->gi));
3835  string linkTitle = "Show alignment to <@seqid@> in <@custom_report_type@>";
3836  string link_loc;
3837  if(!hspRange) {
3838  int addToRange = (int) ((seqUrlInfo->seqRange.GetTo() - seqUrlInfo->seqRange.GetFrom()) * 0.05);//add 5% to each side
3839  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"from",max(0,(int)seqUrlInfo->seqRange.GetFrom() - addToRange));
3840  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"to",seqUrlInfo->seqRange.GetTo() + addToRange);
3841  link_loc = "fromSubj";
3842  //linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"flip",NStr::BoolToString(seqUrlInfo->flip));
3843  }
3844  else {
3845  link_loc = "fromHSP";
3846  linkTitle += " for <@fromHSP@> to <@toHSP@> range";
3847  }
3848  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"link_loc",link_loc);
3849 
3850  string title = (seqUrlInfo->isDbNa) ? "Nucleotide Graphics" : "Protein Graphics";
3851 
3852  string link = s_MapCustomLink(linkUrl,title,seqUrlInfo->accession, "Graphics","lnk" + seqUrlInfo->rid,linkTitle,"spr");
3853 
3854  return link;
3855 }
3856 
3858  bool hspRange)
3859 {
3860  list<string> customLinksList = GetGiLinksList(seqUrlInfo,hspRange); //ONLY FOR genBank seqUrlInfo->seqUrl has "report=genbank"
3861  string graphicLink = GetGraphiscLink(seqUrlInfo,hspRange);
3862  if(!graphicLink.empty()) {
3863  customLinksList.push_back(graphicLink);
3864  }
3865  return customLinksList;
3866 }
3867 
3868 int CAlignFormatUtil::SetCustomLinksTypes(SSeqURLInfo *seqUrlInfo, int customLinkTypesInp)
3869 {
3870  int customLinkTypes = customLinkTypesInp;
3871  if ( seqUrlInfo->gi > ZERO_GI) {
3872  customLinkTypes +=eLinkTypeGenLinks;
3873  }
3874  //else if(NStr::StartsWith(seqUrlInfo->accession,"ti:")) {//seqUrlInfo->seqUrl has "trace.cgi"
3875  else if(NStr::Find(seqUrlInfo->seqUrl,"trace.cgi") != NPOS ){
3876  customLinkTypes +=eLinkTypeTraceLinks;
3877  }
3878  else if(seqUrlInfo->blastType == "sra") {//seqUrlInfo->seqUrl has sra.cgi
3879  customLinkTypes +=eLinkTypeSRALinks;
3880  }
3881  else if(seqUrlInfo->blastType == "snp") {//seqUrlInfo->seqUrl has snp_ref.cgi
3882  customLinkTypes +=eLinkTypeSNPLinks;
3883  }
3884  else if(seqUrlInfo->blastType == "gsfasta") {//seqUrlInfo->seqUrl has GSfasta.cgi
3885  customLinkTypes +=eLinkTypeGSFastaLinks;
3886  }
3887  return customLinkTypes;
3888 }
3889 
3890 
3891 //kCustomLinkTemplate:
3892 //<a href="<@custom_url@>" class="<@custom_cls@>" title="Show <@custom_report_type@> report for <@seqid@>"><@custom_lnk_displ@></a>
3894  const CSeq_id& id,
3895  objects::CScope &scope,
3896  int customLinkTypes)
3897 {
3898  list<string> customLinksList;
3899  string linkUrl,link;
3900 
3901  customLinkTypes = SetCustomLinksTypes(seqUrlInfo, customLinkTypes);
3902  //First show links to GenBank and FASTA, then to Graphics
3903  customLinksList = GetSeqLinksList(seqUrlInfo);
3904  if(customLinkTypes & eLinkTypeTraceLinks) {
3905  linkUrl = seqUrlInfo->seqUrl;
3906  link = s_MapCustomLink(linkUrl,"Trace Archive FASTA",seqUrlInfo->accession, "FASTA","lnk" + seqUrlInfo->rid);
3907  customLinksList.push_back(link);
3908 
3909  linkUrl = NStr::Replace(seqUrlInfo->seqUrl,"fasta","trace");
3910  link = s_MapCustomLink(linkUrl,"Trace Archive Trace",seqUrlInfo->accession, "Trace","lnk" + seqUrlInfo->rid);
3911  customLinksList.push_back(link);
3912 
3913  linkUrl = NStr::Replace(seqUrlInfo->seqUrl,"fasta","quality");
3914  link = s_MapCustomLink(linkUrl,"Trace Archive Quality",seqUrlInfo->accession, "Quality","lnk" + seqUrlInfo->rid);
3915  customLinksList.push_back(link);
3916 
3917  linkUrl = NStr::Replace(seqUrlInfo->seqUrl,"fasta","info");
3918  link = s_MapCustomLink(linkUrl,"Trace Archive Info",seqUrlInfo->accession, "Info","lnk" + seqUrlInfo->rid);
3919  customLinksList.push_back(link);
3920  }
3921  else if(customLinkTypes & eLinkTypeSRALinks) {
3922  linkUrl = seqUrlInfo->seqUrl;
3923  link = s_MapCustomLink(linkUrl,"SRA",seqUrlInfo->accession, "SRA","lnk" + seqUrlInfo->rid);
3924  customLinksList.push_back(link);
3925  }
3926  else if(customLinkTypes & eLinkTypeSNPLinks) {
3927  linkUrl = seqUrlInfo->seqUrl;
3928  link = s_MapCustomLink(linkUrl,"SNP",seqUrlInfo->accession, "SNP","lnk" + seqUrlInfo->rid);
3929  customLinksList.push_back(link);
3930 
3931 
3932  //SNP accession=rs35885954
3933  string rs = NStr::Replace(seqUrlInfo->accession,"rs","");
3934  linkUrl = seqUrlInfo->resourcesUrl + rs + "?report=FLT";
3935 
3936 
3937  link = s_MapCustomLink(linkUrl,"Flatfile",seqUrlInfo->accession, "Flatfile","lnk" + seqUrlInfo->rid);
3938  customLinksList.push_back(link);
3939 
3940  linkUrl = NStr::Replace(linkUrl,"FLT","fasta");
3941  link = s_MapCustomLink(linkUrl,"FASTA",seqUrlInfo->accession, "FASTA","lnk" + seqUrlInfo->rid);
3942  customLinksList.push_back(link);
3943 
3944  linkUrl = NStr::Replace(linkUrl,"fasta","docsum");
3945  link = s_MapCustomLink(linkUrl,"Graphic summary ",seqUrlInfo->accession, "Graphic summary ","lnk" + seqUrlInfo->rid);
3946  customLinksList.push_back(link);
3947  }
3948  else if(customLinkTypes & eLinkTypeGSFastaLinks) {
3949  linkUrl = seqUrlInfo->seqUrl;
3950  link = s_MapCustomLink(linkUrl,"GSFASTA",seqUrlInfo->accession, "GSFASTA","lnk" + seqUrlInfo->rid);
3951  customLinksList.push_back(link);
3952  }
3953  return customLinksList;
3954 }
3955 
3956 
3958  const CSeq_id& id,
3959  objects::CScope &scope)
3960 {
3961  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
3962  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
3963  string linkUrl,link;
3964 
3965 
3966  linkUrl = CAlignFormatUtil::BuildUserUrl(*ids,
3967  ZERO_TAX_ID,
3968  kDownloadUrl,
3969  seqUrlInfo->database,
3970  seqUrlInfo->isDbNa,
3971  seqUrlInfo->rid,
3972  seqUrlInfo->queryNumber,
3973  true);
3974  if(!linkUrl.empty()) {
3975  linkUrl += "&segs="+ seqUrlInfo->segs;
3976  }
3977 
3978  return linkUrl;
3979 }
3980 
3981 
3982 
3984  const CSeq_id& id,
3985  objects::CScope &scope)
3986 
3987 {
3988  string linkUrl;
3989 
3990  int customLinkTypes = SetCustomLinksTypes(seqUrlInfo, CAlignFormatUtil::eLinkTypeDefault);
3991 
3992  if( (customLinkTypes & eLinkTypeGenLinks) || (customLinkTypes & eLinkTypeTraceLinks)){
3993  linkUrl = seqUrlInfo->seqUrl;
3994  linkUrl = NStr::Replace(linkUrl,"genbank","fasta");
3995  }
3996  else if(customLinkTypes & eLinkTypeSNPLinks) {
3997  linkUrl = seqUrlInfo->seqUrl;
3998  vector<string> parts;
3999  //SNP accession=dbSNP:rs35885954
4000  NStr::Split(seqUrlInfo->accession,":rs",parts,NStr::fSplit_MergeDelimiters);
4001  string rs;
4002  if(parts.size() > 1) {
4003  rs = parts[1];
4004  }
4005  linkUrl = seqUrlInfo->resourcesUrl + rs + "?report=fasta";
4006  }
4007  return linkUrl;
4008 }
4009 
4010 
4012 {
4013  //determine if the database has gi by looking at the 1st hit.
4014  //Could be wrong but simple for now
4016  CRef<CSeq_align> first_aln = actual_aln_list.Get().front();
4017  const CSeq_id& subject_id = first_aln->GetSeq_id(1);
4018 
4019  if (subject_id.Which() != CSeq_id::e_Local){
4020  const CBioseq_Handle& handleTemp = scope.GetBioseqHandle(subject_id);
4021  if(handleTemp){
4022  TGi giTemp = FindGi(handleTemp.GetBioseqCore()->GetId());
4023  if (giTemp > ZERO_GI || GetTextSeqID((CConstRef<CSeq_id>)&subject_id)) {
4024  type = eDbGi;
4025  } else if (subject_id.Which() == CSeq_id::e_General){
4026  const CDbtag& dtg = subject_id.GetGeneral();
4027  const string& dbName = dtg.GetDb();
4028  if(NStr::CompareNocase(dbName, "TI") == 0){
4029  type = eDbGeneral;
4030  }
4031  }
4032  }
4033  }
4034  return type;
4035 }
4036 
4039 {
4040  int score = 0;
4041  double bits = 0;
4042  double evalue = 0;
4043  int sum_n = 0;
4044  int num_ident = 0;
4045  list<TGi> use_this_gi;
4046 
4047  use_this_gi.clear();
4048  //Gets scores directly from seq align
4049  GetAlnScores(aln, score, bits, evalue, sum_n,
4050  num_ident, use_this_gi);
4051 
4052  unique_ptr<SSeqAlignSetCalcParams> seqSetInfo(new SSeqAlignSetCalcParams);
4053  seqSetInfo->sum_n = sum_n == -1 ? 1:sum_n ;
4054  seqSetInfo->id = &(aln.GetSeq_id(1));
4055  seqSetInfo->use_this_gi = use_this_gi;
4056  seqSetInfo->bit_score = bits;
4057  seqSetInfo->raw_score = score;
4058  seqSetInfo->evalue = evalue;
4059  seqSetInfo->match = num_ident;
4060  seqSetInfo->id = &(aln.GetSeq_id(1));
4061  seqSetInfo->subjRange = CRange<TSeqPos>(0,0);
4062  seqSetInfo->flip = false;
4063 
4064  return seqSetInfo.release();
4065 }
4066 
4067 
4068 
4070 CAlignFormatUtil::GetSeqAlignSetCalcParams(const CSeq_align_set& aln,int queryLength, bool do_translation)
4071 {
4072  int score = 0;
4073  double bits = 0;
4074  double evalue = 0;
4075  int sum_n = 0;
4076  int num_ident = 0;
4077  SSeqAlignSetCalcParams* seqSetInfo = NULL;
4078 
4079  if(aln.Get().empty())
4080  return seqSetInfo;
4081 
4082  seqSetInfo = GetSeqAlignCalcParams(*(aln.Get().front()));
4083 
4084  double total_bits = 0;
4085  double highest_bits = 0;
4086  double lowest_evalue = 0;
4087  int highest_length = 1;
4088  int highest_ident = 0;
4089  //int highest_identity = 0;
4090  double totalLen = 0;
4091 
4092  list<TGi> use_this_gi; // Not used here, but needed for GetAlnScores.
4093 
4094  seqSetInfo->subjRange = CAlignFormatUtil::GetSeqAlignCoverageParams(aln,&seqSetInfo->master_covered_length,&seqSetInfo->flip);
4095  seqSetInfo->percent_coverage = 100*seqSetInfo->master_covered_length/queryLength;
4096 
4097  ITERATE(CSeq_align_set::Tdata, iter, aln.Get()) {
4098  int align_length = CAlignFormatUtil::GetAlignmentLength(**iter, do_translation);
4099  totalLen += align_length;
4100 
4101  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue, sum_n,
4102  num_ident, use_this_gi);
4103  use_this_gi.clear();
4104 
4105  total_bits += bits;
4106 
4107 /// IMPORTANT: based on WB-1175, the trigger for setting the highest identity
4108 /// is not the highest identity value, but the identity value of
4109 /// the alignment with the highest score!
4110 ///
4111 /// if (100*num_ident/align_length > highest_identity) { -- this condition is disabled
4112 
4113  if (bits > highest_bits) { // this is the replacement condition (WB-1175)
4114  highest_length = align_length;
4115  highest_ident = num_ident;
4116 /// highest_identity = 100*num_ident/align_length;
4117  }
4118 
4119  if (bits > highest_bits) {
4120  highest_bits = bits;
4121  lowest_evalue = evalue;
4122  }
4123  }
4124  seqSetInfo->match = highest_ident;
4125  seqSetInfo->align_length = highest_length;
4126  seqSetInfo->percent_identity = CAlignFormatUtil::GetPercentIdentity(seqSetInfo->match, seqSetInfo->align_length);
4127 
4128  seqSetInfo->total_bit_score = total_bits;
4129  seqSetInfo->bit_score = highest_bits;
4130  seqSetInfo->evalue = lowest_evalue;
4131  seqSetInfo->hspNum = static_cast<int>(aln.Size());
4132  seqSetInfo->totalLen = (Int8)totalLen;
4133 
4134  return seqSetInfo;
4135 }
4136 
4138 {
4139  int score = 0;
4140  double bits = 0;
4141  double evalue = 0;
4142  int sum_n = 0;
4143  int num_ident = 0;
4144 
4145  if(aln.Get().empty())
4146  return -1;
4147 
4148  double highest_bits = 0;
4149  int highest_length = 1;
4150  int highest_ident = 0;
4151 
4152  list<TGi> use_this_gi; // Not used here, but needed for GetAlnScores.
4153 
4154  ITERATE(CSeq_align_set::Tdata, iter, aln.Get()) {
4155  int align_length = CAlignFormatUtil::GetAlignmentLength(**iter, do_translation);
4156 
4157  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue, sum_n,
4158  num_ident, use_this_gi);
4159 
4160 
4161 /// IMPORTANT: based on WB-1175, the trigger for setting the highest identity
4162 /// is not the highest identity value, but the identity value of
4163 /// the alignment with the highest score!
4164 ///
4165 /// if (100*num_ident/align_length > highest_identity) { -- this condition is disabled
4166 
4167  if (bits > highest_bits) { // this is the replacement condition (WB-1175)
4168  highest_length = align_length;
4169  highest_ident = num_ident;
4170 /// highest_identity = 100*num_ident/align_length;
4171  highest_bits = bits;
4172  }
4173  }
4174 
4175  double percent_identity = CAlignFormatUtil::GetPercentIdentity(highest_ident, highest_length);
4176  return percent_identity;
4177 }
4178 
4179 
4180 template<class container> bool
4181 s_GetBlastScore(const container& scoreList,
4182  double& evalue,
4183  double& bitScore,
4184  double& totalBitScore,
4185  int& percentCoverage,
4186  double& percentIdent,
4187  int& hspNum,
4188  double& totalLen,
4189  int &rawScore,
4190  int& sum_n,
4191  list<TGi>& use_this_gi)
4192 {
4193  const string k_GiPrefix = "gi:";
4194  bool hasScore = false;
4195 
4196 
4197  ITERATE (typename container, iter, scoreList) {
4198  const CObject_id& id=(*iter)->GetId();
4199  if (id.IsStr()) {
4200  hasScore = true;
4201  if (id.GetStr()=="seq_evalue") {
4202  evalue = (*iter)->GetValue().GetReal();
4203  } else if (id.GetStr()=="seq_bit_score"){
4204  bitScore = (*iter)->GetValue().GetReal();
4205  } else if (id.GetStr()=="seq_total_bit_score"){
4206  totalBitScore = (*iter)->GetValue().GetReal();
4207  } else if (id.GetStr()=="seq_percent_coverage"){
4208  percentCoverage = (*iter)->GetValue().GetInt();
4209  } else if (id.GetStr()=="seq_percent_identity" && (*iter)->GetValue().IsInt()){
4210  percentIdent = (*iter)->GetValue().GetInt();
4211  } else if (id.GetStr()=="seq_percent_identity" && (*iter)->GetValue().IsReal()){
4212  percentIdent = (*iter)->GetValue().GetReal();
4213  } else if (id.GetStr()=="seq_hspnum"){
4214  hspNum = (*iter)->GetValue().GetInt();
4215  } else if (id.GetStr()=="seq_align_totlen"){
4216  totalLen = (*iter)->GetValue().GetReal();
4217  } else if (id.GetStr()=="score"){
4218  rawScore = (*iter)->GetValue().GetInt();
4219  } else if (id.GetStr()=="use_this_gi"){
4220  Uint4 gi_v = (Uint4) ((*iter)->GetValue().GetInt());
4221  use_this_gi.push_back(GI_FROM(Uint4, gi_v));
4222  } else if (id.GetStr()=="sum_n"){
4223  sum_n = (*iter)->GetValue().GetInt();
4224  }
4225  else if(NStr::StartsWith(id.GetStr(),k_GiPrefix)) { //will be used when switch to 64bit GIs
4226  string strGi = NStr::Replace(id.GetStr(),k_GiPrefix,"");
4227  TGi gi = NStr::StringToNumeric<TGi>(strGi);
4228  use_this_gi.push_back(gi);
4229  }
4230  }
4231  }
4232  return hasScore;
4233 }
4234 
4235 
4236 void CAlignFormatUtil::GetUseThisSequence(const CSeq_align& aln,list<TGi>& use_this_gi)
4237 
4238 {
4239  const string k_GiPrefix = "gi:";
4240 
4241  if(!aln.CanGetExt() || aln.GetExt().size() == 0) return;
4242  const CUser_object &user = *(aln.GetExt().front());
4243 
4244  if (user.IsSetType() && user.GetType().IsStr() && user.GetType().GetStr() == "use_this_seqid" && user.IsSetData()) {
4245  const CUser_object::TData& fields = user.GetData();
4246  for (CUser_object::TData::const_iterator fit = fields.begin(); fit != fields.end(); ++fit) {
4247  const CUser_field& field = **fit;
4248 
4249  if (field.IsSetLabel() && field.GetLabel().IsStr() && field.GetLabel().GetStr() == "SEQIDS" &&
4250  field.IsSetData() && field.GetData().IsStrs()) {
4251  const CUser_field::C_Data::TStrs& strs = field.GetData().GetStrs();
4252  ITERATE(CUser_field::TData::TStrs, acc_iter, strs) {
4253  if(NStr::StartsWith(*acc_iter,k_GiPrefix)) { //will be used when switch to 64bit GIs
4254  string strGi = NStr::Replace(*acc_iter,k_GiPrefix,"");
4255  TGi gi = NStr::StringToNumeric<TGi>(strGi);
4256  use_this_gi.push_back(gi);
4257  }
4258  }
4259  }
4260  }
4261  }
4262 }
4263 
4264 
4265 /*use_this_seq will contain gi:nnnnnn or seqid:ssssss string list*/
4266 void CAlignFormatUtil::GetUseThisSequence(const CSeq_align& aln,list<string>& use_this_seq)
4267 
4268 {
4269  if(!aln.CanGetExt() || aln.GetExt().size() == 0) return;
4270  const CUser_object &user = *(aln.GetExt().front());
4271 
4272  if (user.IsSetType() && user.GetType().IsStr() && user.GetType().GetStr() == "use_this_seqid" && user.IsSetData()) {
4273  const CUser_object::TData& fields = user.GetData();
4274  for (CUser_object::TData::const_iterator fit = fields.begin(); fit != fields.end(); ++fit) {
4275  const CUser_field& field = **fit;
4276 
4277  if (field.IsSetLabel() && field.GetLabel().IsStr() && field.GetLabel().GetStr() == "SEQIDS" &&
4278  field.IsSetData() && field.GetData().IsStrs()) {
4279  const CUser_field::C_Data::TStrs& strs = field.GetData().GetStrs();
4280  ITERATE(CUser_field::TData::TStrs, acc_iter, strs) {
4281  use_this_seq.push_back(*acc_iter);
4282  }
4283  }
4284  }
4285  }
4286 }
4287 
4288 
4289 
4292 {
4293  bool hasScore = false;
4294  double evalue = -1;
4295  double bitScore = -1;
4296  double totalBitScore = -1;
4297  int percentCoverage = -1;
4298  double percentIdent = -1;
4299  int hspNum = 0;
4300  double totalLen = 0;
4301  int rawScore = -1;
4302  int sum_n = -1;
4303  list<TGi> use_this_gi;
4304  list<string> use_this_seq;
4305 
4306  const CSeq_align& aln = *(alnSet.Get().front());
4307 
4308  hasScore = s_GetBlastScore(aln.GetScore(),evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4309 
4310  if(!hasScore){
4311  const CSeq_align::TSegs& seg = aln.GetSegs();
4312  if(seg.Which() == CSeq_align::C_Segs::e_Std){
4313  s_GetBlastScore(seg.GetStd().front()->GetScores(),
4314  evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4315  } else if (seg.Which() == CSeq_align::C_Segs::e_Dendiag){
4316  s_GetBlastScore(seg.GetDendiag().front()->GetScores(),
4317  evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4318  } else if (seg.Which() == CSeq_align::C_Segs::e_Denseg){
4320  evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4321  }
4322  }
4323 
4324  if(use_this_gi.size() == 0) {
4325  GetUseThisSequence(aln,use_this_seq);
4326  }
4327  else {
4328  use_this_seq = s_NumGiToStringGiList(use_this_gi);//for backward compatability
4329  }
4330 
4331 
4332  unique_ptr<SSeqAlignSetCalcParams> seqSetInfo(new SSeqAlignSetCalcParams);
4333  seqSetInfo->evalue = evalue;
4334  seqSetInfo->bit_score = bitScore;
4335  seqSetInfo->total_bit_score = totalBitScore;
4336  seqSetInfo->percent_coverage = percentCoverage;
4337  seqSetInfo->percent_identity = percentIdent;
4338  seqSetInfo->hspNum = hspNum;
4339  seqSetInfo->totalLen = (Int8)totalLen;
4340 
4341  seqSetInfo->sum_n = sum_n == -1 ? 1:sum_n ;
4342  seqSetInfo->id = &(aln.GetSeq_id(1));
4343  seqSetInfo->use_this_gi = StringGiToNumGiList(use_this_seq);//for backward compatability
4344  seqSetInfo->use_this_seq = use_this_seq;
4345  seqSetInfo->raw_score = rawScore;//not used
4346 
4347  seqSetInfo->subjRange = CRange<TSeqPos>(0,0);
4348  seqSetInfo->flip = false;
4349 
4350  return seqSetInfo.release();
4351 }
4352 
4354  const CSeq_id& aln_id,
4355  list<TGi>& use_this_gi,
4356  TGi& gi)
4357 
4358 {
4359  TTaxId taxid = ZERO_TAX_ID;
4360  CRef<CSeq_id> wid = CAlignFormatUtil::GetDisplayIds(handle, aln_id, use_this_gi, gi, taxid);
4361  return wid;
4362 }
4363 
4365  const CSeq_id& aln_id,
4366  list<TGi>& use_this_gi,
4367  TGi& gi,
4368  TTaxId& taxid)
4369 
4370 {
4372  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
4373 
4374  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
4375  CRef<CSeq_id> wid;
4376 
4377  gi = ZERO_GI;
4378  taxid = ZERO_TAX_ID;
4379  if(bdl.empty()){
4380  wid = FindBestChoice(*ids, CSeq_id::WorstRank);
4381  gi = FindGi(*ids);
4382  } else {
4383  bool found = false;
4384  for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
4385  iter != bdl.end(); iter++){
4386  const CBioseq::TId* cur_id = &((*iter)->GetSeqid());
4387  TGi cur_gi = FindGi(*cur_id);
4388  wid = FindBestChoice(*cur_id, CSeq_id::WorstRank);
4389  if ((*iter)->IsSetTaxid() && (*iter)->CanGetTaxid()){
4390  taxid = (*iter)->GetTaxid();
4391  }
4392  if (!use_this_gi.empty()) {
4393  ITERATE(list<TGi>, iter_gi, use_this_gi){
4394  if(cur_gi == *iter_gi){
4395  found = true;
4396  break;
4397  }
4398  }
4399  } else {
4400  ITERATE(CBioseq::TId, iter_id, *cur_id) {
4401  if ((*iter_id)->Match(aln_id)
4402  || (aln_id.IsGeneral() && aln_id.GetGeneral().CanGetDb() &&
4403  (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4404  aln_id.GetGeneral().GetDb() == (*iter_id)->GetGeneral().GetDb())) {
4405  found = true;
4406  }
4407  }
4408  }
4409  if(found){
4410  gi = cur_gi;
4411  break;
4412  }
4413  }
4414  }
4415  return wid;
4416 }
4417 
4418 
4419 
4420 //removes "gi:" or "seqid:" prefix from gi:nnnnnnn or seqid:nnnnn
4421 static string s_UseThisSeqToTextSeqID(string use_this_seqid, bool &isGi)
4422 {
4423  const string k_GiPrefix = "gi:";
4424  const string k_SeqIDPrefix = "seqid:";
4425  isGi = false;
4426  string textSeqid;
4427  if(NStr::StartsWith(use_this_seqid,k_GiPrefix)) {
4428  textSeqid = NStr::Replace(use_this_seqid,k_GiPrefix,"");
4429  isGi = true;
4430  }
4431  else if(NStr::StartsWith(use_this_seqid,k_SeqIDPrefix)) {
4432  textSeqid = NStr::Replace(use_this_seqid,k_SeqIDPrefix,"");
4433  }
4434  else {//assume no prefix - gi
4435  if(NStr::StringToInt8(use_this_seqid,NStr::fConvErr_NoThrow)) {
4436  isGi = true;
4437  }
4438  }
4439  return textSeqid;
4440 }
4441 
4442 
4443 
4444 //assume that we have EITHER gi: OR seqid: in the list
4445 bool CAlignFormatUtil::IsGiList(list<string> &use_this_seq)
4446 {
4447  bool isGi = false;
4448  ITERATE(list<string>, iter_seq, use_this_seq){
4449  s_UseThisSeqToTextSeqID( *iter_seq, isGi);
4450  break;
4451  }
4452  return isGi;
4453 }
4454 
4455 list<TGi> CAlignFormatUtil::StringGiToNumGiList(list<string> &use_this_seq)
4456 {
4457  list<TGi> use_this_gi;
4458  ITERATE(list<string>, iter_seq, use_this_seq){
4459  bool isGi = false;
4460  string strGI = s_UseThisSeqToTextSeqID( *iter_seq, isGi);
4461  if(isGi) use_this_gi.push_back(NStr::StringToNumeric<TGi>(strGI));
4462  }
4463  return use_this_gi;
4464 }
4465 
4466 
4467 
4468 bool CAlignFormatUtil::MatchSeqInSeqList(TGi cur_gi, CRef<CSeq_id> &seqID, list<string> &use_this_seq,bool *isGiList)
4469 {
4470  bool found = false;
4471  bool isGi = false;
4472 
4473  string curSeqID = CAlignFormatUtil::GetLabel(seqID,true); //uses GetSeqIdString(true)
4474  ITERATE(list<string>, iter_seq, use_this_seq){
4475  isGi = false;
4476  string useThisSeq = s_UseThisSeqToTextSeqID(*iter_seq, isGi);
4477  if((isGi && cur_gi == NStr::StringToNumeric<TGi>((useThisSeq))) || (!isGi && curSeqID == useThisSeq)){
4478  found = true;
4479  break;
4480  }
4481  }
4482  if(isGiList) *isGiList = isGi;
4483  return found;
4484 }
4485 
4486 
4487 bool CAlignFormatUtil::MatchSeqInSeqList(CConstRef<CSeq_id> &alnSeqID, list<string> &use_this_seq,vector <string> &seqList)
4488 {
4489  bool isGi = false;
4490  string curSeqID;
4491  if(alnSeqID->IsGi()) {
4492  curSeqID = NStr::NumericToString(alnSeqID->GetGi());
4493  }
4494  else {
4495  curSeqID = CAlignFormatUtil::GetLabel(alnSeqID,true); //uses GetSeqIdString(true)
4496  }
4497  //match with seqid in seq_align
4498  bool found = std::find(seqList.begin(), seqList.end(), curSeqID) != seqList.end();
4499  if(!found) {
4500  //match in use_this_seq list
4501  ITERATE(list<string>, iter_seq, use_this_seq){
4502  string useThisSeq = s_UseThisSeqToTextSeqID(*iter_seq, isGi);
4503  found = std::find(seqList.begin(), seqList.end(), useThisSeq) != seqList.end();
4504  if(found){
4505  break;
4506  }
4507  }
4508  }
4509  return found;
4510 }
4511 
4512 bool CAlignFormatUtil::MatchSeqInUseThisSeqList(list<string> &use_this_seq, string textSeqIDToMatch)
4513 {
4514  bool has_match = false;
4515 
4516  ITERATE(list<string>, iter_seq, use_this_seq) {
4517  bool isGi;
4518  string useThisSeq = s_UseThisSeqToTextSeqID(*iter_seq, isGi);
4519  if(useThisSeq == textSeqIDToMatch) {
4520  has_match = true;
4521  break;
4522  }
4523  }
4524  return has_match;
4525 }
4526 
4528 {
4529  list<string> new_use_this_seq;
4530  bool hasAccType = false;
4531  bool isGI = false;
4532 
4533  ITERATE(list<string>, iter_seq, use_this_seq) {
4534  string useThisSeq = s_UseThisSeqToTextSeqID(*iter_seq, isGI);
4535  CSeq_id::EAccessionInfo useThisSeqAccType = CSeq_id::IdentifyAccession (useThisSeq);
4536  if(useThisSeqAccType != accessionType) {
4537  new_use_this_seq.push_back(useThisSeq);
4538  }
4539  else {
4540  hasAccType = true;
4541  }
4542  }
4543  use_this_seq = new_use_this_seq;
4544  return hasAccType;
4545 }
4546 
4548  const CSeq_id& aln_id,
4549  list<string>& use_this_seq,
4550  TGi *gi,
4551  TTaxId *taxid,
4552  string *textSeqID)
4553 
4554 {
4556  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
4557 
4558  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
4559  CRef<CSeq_id> wid;
4560 
4561  if(gi) *gi = ZERO_GI;
4562  if(taxid) *taxid = ZERO_TAX_ID;
4563  if(bdl.empty()){
4564  wid = FindBestChoice(*ids, CSeq_id::WorstRank);
4565  if(gi) *gi = FindGi(*ids);
4566  if(textSeqID) *textSeqID = GetLabel(wid,true);//uses GetSeqIdString(true)
4567  } else {
4568  bool found = false;
4569  for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
4570  iter != bdl.end(); iter++){
4571  const CBioseq::TId* cur_id = &((*iter)->GetSeqid());
4572  TGi cur_gi = FindGi(*cur_id);
4573  wid = FindBestChoice(*cur_id, CSeq_id::WorstRank);
4574  string curSeqID = GetLabel(wid,true);//uses GetSeqIdString(true)
4575  if (taxid && (*iter)->IsSetTaxid() && (*iter)->CanGetTaxid()){
4576  *taxid = (*iter)->GetTaxid();
4577  }
4578  if (!use_this_seq.empty()) {
4579  ITERATE(list<string>, iter_seq, use_this_seq){
4580  bool isGi = false;
4581  string useThisSeq = s_UseThisSeqToTextSeqID( *iter_seq, isGi);
4582  if((isGi && cur_gi == NStr::StringToNumeric<TGi>((useThisSeq))) || (!isGi && curSeqID == useThisSeq)){
4583  found = true;
4584  break;
4585  }
4586  }
4587  } else {
4588  ITERATE(CBioseq::TId, iter_id, *cur_id) {
4589  if ((*iter_id)->Match(aln_id)
4590  || (aln_id.IsGeneral() && aln_id.GetGeneral().CanGetDb() &&
4591  (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4592  aln_id.GetGeneral().GetDb() == (*iter_id)->GetGeneral().GetDb())) {
4593  found = true;
4594  }
4595  }
4596  }
4597  if(found){
4598  if(gi) *gi = cur_gi;
4599  if(textSeqID) *textSeqID = curSeqID;
4600  break;
4601  }
4602  }
4603  }
4604 
4605  return wid;
4606 }
4607 
4608 
4610  const CSeq_id& aln_id,
4611  list<TGi>& use_this_gi)
4612 
4613 
4614 {
4615  TGi gi = ZERO_GI;
4616 
4617  if(!bdl.empty()){
4618  bool found = false;
4619  for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
4620  iter != bdl.end(); iter++){
4621  const CBioseq::TId* cur_id = &((*iter)->GetSeqid());
4622  TGi cur_gi = FindGi(*cur_id);
4623  if (!use_this_gi.empty()) {
4624  ITERATE(list<TGi>, iter_gi, use_this_gi){
4625  if(cur_gi == *iter_gi){
4626  found = true;
4627  break;
4628  }
4629  }
4630  } else {
4631  ITERATE(CBioseq::TId, iter_id, *cur_id) {
4632  if ((*iter_id)->Match(aln_id)
4633  || (aln_id.IsGeneral() && aln_id.GetGeneral().CanGetDb() &&
4634  (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4635  aln_id.GetGeneral().GetDb() == (*iter_id)->GetGeneral().GetDb())) {
4636  found = true;
4637  }
4638  }
4639  }
4640  if(found){
4641  gi = cur_gi;
4642  break;
4643  }
4644  }
4645  }
4646  return gi;
4647 }
4648 
4650 {
4651  if(rng.GetFrom() > rng.GetTo()){
4652  rng.Set(rng.GetTo(), rng.GetFrom());
4653  }
4654  //cerr << "Query Rng: " << rng.GetFrom() << "-" << rng.GetTo() << endl;
4655  return rng;
4656 }
4657 
4659 {
4660  if(alnset.IsEmpty())
4661  return 0;
4662 
4663  bool isDenDiag = (alnset.Get().front()->GetSegs().Which() == CSeq_align::C_Segs::e_Dendiag) ?
4664  true : false;
4665 
4666  list<CRef<CSeq_align> >::iterator mItr=alnset.Set().begin();
4667  CRangeCollection<TSeqPos> subj_rng_coll((*mItr)->GetSeqRange(1));
4668  CRange<TSeqPos> q_rng((*mItr)->GetSeqRange(0));
4669  /*
4670  cerr << MSerial_AsnText << **mItr;
4671  cerr << (*mItr)->GetSeqRange(0).GetFrom() << endl;
4672  cerr << (*mItr)->GetSeqRange(0).GetTo() << endl;
4673  cerr << (*mItr)->GetSeqRange(0).GetToOpen() << endl;
4674  cerr << (*mItr)->GetSeqRange(1).GetFrom() << endl;
4675  cerr << (*mItr)->GetSeqRange(1).GetTo() << endl;
4676  cerr << (*mItr)->GetSeqRange(1).GetToOpen() << endl;
4677  */
4678  CRangeCollection<TSeqPos> query_rng_coll(s_FixMinusStrandRange(q_rng));
4679  ++mItr;
4680  for(;mItr != alnset.Set().end(); ++mItr) {
4681  const CRange<TSeqPos> align_subj_rng((*mItr)->GetSeqRange(1));
4682  // subject range should always be on the positive strand
4683  ASSERT(align_subj_rng.GetTo() > align_subj_rng.GetFrom());
4684  CRangeCollection<TSeqPos> coll(align_subj_rng);
4685  coll.Subtract(subj_rng_coll);
4686 
4687  if (coll.empty())
4688  continue;
4689 
4690  if(coll[0] == align_subj_rng) {
4691  CRange<TSeqPos> query_rng ((*mItr)->GetSeqRange(0));
4692  //cerr << "Subj Rng :" << align_subj_rng.GetFrom() << "-" << align_subj_rng.GetTo() << endl;
4693  query_rng_coll += s_FixMinusStrandRange(query_rng);
4694  subj_rng_coll += align_subj_rng;
4695  }
4696  else {
4697  ITERATE (CRangeCollection<TSeqPos>, uItr, coll) {
4698  CRange<TSeqPos> query_rng;
4699  const CRange<TSeqPos> & subj_rng = (*uItr);
4700  CRef<CSeq_align> densegAln
4701  = isDenDiag ? CAlignFormatUtil::CreateDensegFromDendiag(**mItr) : (*mItr);
4702 
4703  CAlnMap map(densegAln->GetSegs().GetDenseg());
4704  TSignedSeqPos subj_aln_start = map.GetAlnPosFromSeqPos(1,subj_rng.GetFrom());
4705  TSignedSeqPos subj_aln_end = map.GetAlnPosFromSeqPos(1,subj_rng.GetTo());
4706  query_rng.SetFrom(map.GetSeqPosFromAlnPos(0,subj_aln_start));
4707  query_rng.SetTo(map.GetSeqPosFromAlnPos(0,subj_aln_end));
4708 
4709  //cerr << "Subj Rng :" << subj_rng.GetFrom() << "-" << subj_rng.GetTo() << endl;
4710  query_rng_coll += s_FixMinusStrandRange(query_rng);
4711  subj_rng_coll += subj_rng;
4712  }
4713  }
4714  }
4715 
4716  return query_rng_coll.GetCoveredLength();
4717 }
4718 
4719 ///return id type specified or null ref
4720 ///@param ids: the input ids
4721 ///@param choice: id of choice
4722 ///@return: the id with specified type
4723 ///
4725  CSeq_id::E_Choice choice)
4726 {
4727  CRef<CSeq_id> cid;
4728 
4729  for (CBioseq::TId::const_iterator iter = ids.begin(); iter != ids.end();
4730  iter ++){
4731  if ((*iter)->Which() == choice){
4732  cid = *iter;
4733  break;
4734  }
4735  }
4736 
4737  return cid;
4738 }
4739 
4740 ///return gi from id list
4741 ///@param ids: the input ids
4742 ///@return: the gi if found
4743 ///
4745 {
4746  TGi gi = ZERO_GI;
4748  if (!(id.Empty())){
4749  return id->GetGi();
4750  }
4751  return gi;
4752 }
4753 
4755 {
4756  CSeqdesc_CI desc_t(bh, CSeqdesc::e_Title);
4757  string t = kEmptyStr;
4758  for (;desc_t; ++desc_t) {
4759  t += desc_t->GetTitle() + " ";
4760  }
4761  return t;
4762 }
4763 
4765 {
4766  string retval;
4767 
4768  if (id.IsGi() || id.IsPrf() || id.IsPir()) {
4769  retval = id.AsFastaString();
4770  }
4771  else {
4772  retval = id.GetSeqIdString(true);
4773  }
4774 
4775  return retval;
4776 }
4777 
4778 
4779 bool CAlignFormatUtil::GetTextSeqID(CConstRef<CSeq_id> seqID, string *textSeqID)
4780 {
4781  bool hasTextSeqID = true;
4782 
4783  const CTextseq_id* text_id = seqID->GetTextseq_Id();
4784  //returns non zero if e_Genbank,e_Embl,e_Ddbj,e_Pir,e_Swissprot,case e_Other,e_Prf,case e_Tpg,e_Tpe,case e_Tpd,case e_Gpipe, e_Named_annot_track
4785  if(!text_id) { //check for pdb and pat
4786  if(!(seqID->Which() == CSeq_id::e_Pdb) && !(seqID->Which() == CSeq_id::e_Patent) && !(seqID->Which() == CSeq_id::e_Gi)) {
4787  hasTextSeqID = false;
4788  }
4789  }
4790 
4791  if(hasTextSeqID && textSeqID) {
4792  seqID->GetLabel(textSeqID, CSeq_id::eContent);
4793  }
4794  return hasTextSeqID;
4795 }
4796 
4797 
4798 
4799 bool CAlignFormatUtil::GetTextSeqID(const list<CRef<CSeq_id> > & ids, string *textSeqID)
4800 {
4801  bool hasTextSeqID = false;
4802 
4803  CConstRef<CSeq_id> seqID = FindTextseq_id(ids);
4804  //returns non zero if e_Genbank,e_Embl,e_Ddbj,e_Pir,e_Swissprot,case e_Other,e_Prf,case e_Tpg,e_Tpe,case e_Tpd,case e_Gpipe, e_Named_annot_track
4805  if(seqID.Empty()) {
4806  seqID = GetSeq_idByType(ids, CSeq_id::e_Pdb);
4807  }
4808  if(seqID.Empty()) {
4809  seqID = GetSeq_idByType(ids, CSeq_id::e_Patent);
4810  }
4811  if(!seqID.Empty()) {
4812  hasTextSeqID = true;
4813  if(textSeqID) seqID->GetLabel(textSeqID, CSeq_id::eContent);
4814  }
4815  return hasTextSeqID;
4816 }
4817 
4819  vector <string> &seqList)
4820 {
4821  CConstRef<CSeq_id> previous_id, subid;
4822  list<string> use_this_seq;
4823  bool match = false;
4824 
4825  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
4826  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
4827  subid = &((*iter)->GetSeq_id(1));
4828  if(previous_id.Empty() || !subid->Match(*previous_id)){
4829  use_this_seq.clear();
4830  CAlignFormatUtil::GetUseThisSequence(**iter,use_this_seq);
4831  match = MatchSeqInSeqList(subid, use_this_seq,seqList);
4832  }
4833 
4834  previous_id = subid;
4835  if(match) {
4836  new_aln->Set().push_back(*iter);
4837  }
4838  }
4839  return new_aln;
4840 }
4841 
4842 
4843 END_SCOPE(align_format)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static void s_CalcAlnPercentIdent(const CRef< CSeq_align_set > &info1, const CRef< CSeq_align_set > &info2, double &percentIdent1, double &percentIdent2)
static string s_GetTaxName(TTaxId taxid)
static bool s_ProcessAlignSet(const CSeq_align_set &alnset, list< CRange< TSeqPos > > &query_list, list< CRange< TSeqPos > > &subject_list)
static CRef< CSeq_id > s_GetSeqIdByType(const list< CRef< CSeq_id > > &ids, CSeq_id::E_Choice choice)
return id type specified or null ref
static string s_MapURLLink(string urlTemplate, CAlignFormatUtil::SSeqURLInfo *seqUrlInfo, const CBioseq::TId &ids)
static list< CRange< TSeqPos > > s_MergeRangeList(list< CRange< TSeqPos > > &source)
static void s_AddLinkoutInfo(map< int, vector< CBioseq::TId > > &linkout_map, int linkout, CBioseq::TId &cur_id)
static bool s_GetSRASeqMetadata(const CBioseq::TId &ids, string &strRun, string &strSpotId, string &strReadIndex)
void s_AddOtherRelatedInfoLinks(CBioseq::TId &cur_id, const string &rid, bool is_na, bool for_alignment, int cur_align, list< string > &linkout_list)
bool kTranslation
static list< string > s_NumGiToStringGiList(list< TGi > use_this_gi)
static CRange< TSeqPos > & s_FixMinusStrandRange(CRange< TSeqPos > &rng)
static bool s_FillDbInfoLocally(const string &dbname, CAlignFormatUtil::SDbInfo &info, int dbfilt_algorithm)
Initialize database statistics with data obtained from local BLAST databases.
CRef< CScope > kScope
bool s_GetBlastScore(const container &scoreList, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< TGi > &use_this_gi, int &comp_adj_method)
Get blast score information.
static list< string > s_GetLinkoutUrl(int linkout, string giList, string labelList, TGi first_gi, CAlignFormatUtil::SLinkoutInfo &linkoutInfo, bool textLink=true)
string s_GetBestIDForURL(CBioseq::TId &ids)
static bool s_isAlnInFilteringRange(double evalue, double percentIdent, int queryCover, double evalueLow, double evalueHigh, double percentIdentLow, double percentIdentHigh, int queryCoverLow, int queryCoverHigh)
USING_SCOPE(ncbi)
static string s_MapCommonUrlParams(string urlTemplate, CAlignFormatUtil::SSeqURLInfo *seqUrlInfo)
static int s_LinkLetterToType(string linkLetter)
static double adjustPercentIdentToDisplayValue(double value)
static string s_MapCustomLink(string linkUrl, string reportType, string accession, string linkText, string linktrg, string linkTitle=kCustomLinkTitle, string linkCls="")
const char k_PSymbol[ePMatrixSize+1]
Residues.
static string s_UseThisSeqToTextSeqID(string use_this_seqid, bool &isGi)
static string s_MapLinkoutGenParam(string &url_link_tmpl, const string &rid, string giList, bool for_alignment, int cur_align, string &label, string &lnk_displ, string lnk_tl_info="", string lnk_title="")
static list< string > s_GetFullLinkoutUrl(CBioseq::TId &cur_id, CAlignFormatUtil::SLinkoutInfo &linkoutInfo, map< int, vector< CBioseq::TId > > &linkout_map, bool getIdentProteins)
static bool FromRangeAscendingSort(CRange< TSeqPos > const &info1, CRange< TSeqPos > const &info2)
static const char kBioAssayProtImg[]
static const char kSeqViewerUrl[]
static const char kStructureImg[]
const int k_NumAsciiChar
Number of ASCII characters for populating matrix columns.
static const string kMapviwerDispl
static const char kGenericLinkMouseoverTmpl[]
static const string kSeqViewerParams
static const char kEntrezTMUrl[]
static const char kMapviwerUrl[]
mapviewer linkout
static const char kCustomLinkTemplate[]
static const string kGenomeDataViewerDispl
static const char kStructureAlphaFoldUrl[]
static const char kDownloadUrl[]
dumpgnl
static const string kGeneDispl
static const string kUnigeneDispl
static const string kGeoDispl
static const char kClassInfo[]
blast related url
static const char kGeoImg[]
static const char kGeneTerm[]
static const char kIdenticalProteinsUrl[]
static const string kIdenticalProteinsDispl
static const char kGeneImg[]
static const string kReprMicrobialGenomesDispl
static const string kBioAssayDispl
static const char kReprMicrobialGenomesImg[]
static const char kGenomeDataViewerImg[]
static const string kMapviewBlastHitParams
static const string kMapviewBlastHitUrl
mapviewer linkout
static const char kUnigeneImg[]
static const char kStructureUrl[]
structure
static const char kBioAssayNucImg[]
static const char kGenericLinkTemplate[]
static const char kMapviwerImg[]
@ ePMatrixSize
static const char kTraceUrl[]
trace db
static const string kStructureDispl
static const char kCustomLinkTitle[]
static const char kSeqViewerUrlNonGi[]
Declares the CBlastServices class.
static string GetProtocol(void)
static CRef< objects::CSeq_align_set > FilterSeqalignBySeqList(objects::CSeq_align_set &source_aln, vector< string > &seqList)
function for Filtering seqalign by specific subjects
static void PrintPhiInfo(int num_patterns, const string &pattern, double prob, vector< int > &offsets, CNcbiOstream &out)
Prints out PHI-BLAST info for header (or footer)
static CAlignFormatUtil::SSeqAlignSetCalcParams * GetSeqAlignSetCalcParamsFromASN(const objects::CSeq_align_set &alnSet)
static string GetIDUrl(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL for seqid.
static int GetAlignmentLength(const objects::CSeq_align &aln, bool do_translation)
get the alignment length
static bool IsWGSAccession(string &accession, string &wgsProj)
Check if accession is WGS.
static void PruneSeqalign(const objects::CSeq_align_set &source_aln, objects::CSeq_align_set &new_aln, unsigned int num=static_cast< unsigned int >(kDfltArgNumAlignments))
Fill new alignset containing the specified number of alignments with unique slave seqids.
static int GetUniqSeqCoverage(objects::CSeq_align_set &alnset)
Calculate the uniq subject query coverage range (blastn only)
static void InitConfig()
static CAlignFormatUtil::SSeqAlignSetCalcParams * GetSeqAlignSetCalcParams(const objects::CSeq_align_set &aln, int queryLength, bool do_translation)
static void SortHitByMolecularType(list< CRef< objects::CSeq_align_set > > &seqalign_hit_list, objects::CScope &scope, ILinkoutDB *linkoutdb, const string &mv_build_name)
sort a list of seqalign set by molecular type
static void AcknowledgeBlastQuery(const objects::CBioseq &cbs, size_t line_len, CNcbiOstream &out, bool believe_query, bool html, bool tabular=false, const string &rid=kEmptyStr)
Print out blast query info.
static bool IsMixedDatabase(const objects::CSeq_align_set &alnset, objects::CScope &scope, ILinkoutDB *linkoutdb, const string &mv_build_name)
static list< CRef< objects::CSeq_align_set > > SortOneSeqalignForSortableFormat(const objects::CSeq_align_set &source, bool nuc_to_nuc_translation, int hit_sort, int hsp_sort)
static const char kNoHitsFound[]
The string containing the message that no hits were found.
static void GetAsciiProteinMatrix(const char *matrix_name, CNcbiMatrix< int > &retval)
Retrieve a scoring matrix for the provided matrix name.
static list< string > GetFullLinkoutUrl(const list< CRef< objects::CBlast_def_line > > &bdl, const string &rid, const string &cdd_rid, const string &entrez_term, bool is_na, bool structure_linkout_as_group, bool for_alignment, int cur_align, string &linkoutOrder, TTaxId taxid, string &database, int query_number, string &user_url, string &preComputedResID, ILinkoutDB *linkoutdb, const string &mv_build_name)
Get linkout membership for for the list of blast deflines.
static void x_AcknowledgeBlastSequence(const objects::CBioseq &cbs, size_t line_len, CNcbiOstream &out, bool believe_query, bool html, const string &label, bool tabular, const string &rid)
static void PrintDbReport(const vector< SDbInfo > &dbinfo_list, size_t line_length, CNcbiOstream &out, bool top=false)
Print out blast database information.
static void GetAlnScores(const objects::CSeq_align &aln, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< TGi > &use_this_gi)
Extract score info from blast alingment.
static void BuildFormatQueryString(CCgiContext &ctx, string &cgi_query)
static string GetSeqDescrString(const objects::CBioseq &cbs)
Returns a full description for a Bioseq, concatenating all available titles.
@ eAddEOLAtLineEnd
add EOL at the beginning of the string
@ eAddEOLAtLineStart
add spaces at the end of the string
@ eSpacePosAtLineEnd
add spaces at the begining of the string
static list< TGi > StringGiToNumGiList(list< string > &use_this_seq)
Convert if string gi list to TGi list.
static string AddSpaces(string paramVal, size_t maxParamLength, int spacesFormatFlag=eSpacePosToCenter)
Calculate the number of spaces and add them to paramVal.
static CRef< objects::CSeq_align_set > FilterSeqalignByPercentIdent(objects::CSeq_align_set &source_aln, double percentIdentLow, double percentIdentHigh)
function for Filtering seqalign by percent identity
static bool RemoveSeqsOfAccessionTypeFromSeqInUse(list< string > &use_this_seq, objects::CSeq_id::EAccessionInfo accesionType)
function to remove sequences of accesionType from use_this_seq list
static bool SortHitByMasterStartAscending(CRef< objects::CSeq_align_set > &info1, CRef< objects::CSeq_align_set > &info2)
sorting function for sorting a list of seqalign set by ascending mater start position
static void GetScoreString(double evalue, double bit_score, double total_bit_score, int raw_score, string &evalue_str, string &bit_score_str, string &total_bit_score_str, string &raw_score_str)
format evalue and bit_score
static map< string, CRef< objects::CSeq_align_set > > HspListToHitMap(vector< string > seqIdList, const objects::CSeq_align_set &source)
static string GetBareId(const objects::CSeq_id &id)
Get sequence id with no database source (bare accession)
static string GetGnlID(const objects::CDbtag &dtg)
Return ID for GNL label.
static bool m_geturl_debug_flag
static void SortHit(list< CRef< objects::CSeq_align_set > > &seqalign_hit_list, bool do_translation, objects::CScope &scope, int sort_method, ILinkoutDB *linkoutdb, const string &mv_build_name)
actual sorting function for SortHitByMolecularType
static void FillScanModeBlastDbInfo(vector< SDbInfo > &retval, bool is_protein, int numSeqs, Int8 numLetters, string &tag)
Fills one BLAST dbinfo structure.
static bool SortHitByTotalScoreDescending(CRef< objects::CSeq_align_set > const &info1, CRef< objects::CSeq_align_set > const &info2)
return the comparison result: 1st >= 2nd => true, false otherwise
static string GetIDUrlGen(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL for seqid that goes to entrez or trace.
static bool SortHspBySubjectStartAscending(const CRef< objects::CSeq_align > &info1, const CRef< objects::CSeq_align > &info2)
static CAlignFormatUtil::DbType GetDbType(const objects::CSeq_align_set &actual_aln_list, objects::CScope &scope)
Set the database as gi type.
static void PruneSeqalignAll(const objects::CSeq_align_set &source_aln, objects::CSeq_align_set &new_aln, unsigned int number)
Fill new alignset containing the specified number of alignments plus the rest of alignments for the l...
static void PrintTildeSepLines(string str, size_t line_len, CNcbiOstream &out)
Print out misc information separated by "~".
static string BuildUserUrl(const objects::CBioseq::TId &ids, TTaxId taxid, string user_url, string database, bool db_is_na, string rid, int query_number, bool for_alignment)
return the custom url (such as mapview)
static string MapTemplate(string inpString, string tmplParamName, Int8 templParamVal)
Replace template tags by real data.
static CAlignFormatUtil::SSeqAlignSetCalcParams * GetSeqAlignCalcParams(const objects::CSeq_align &aln)
static string GetURLFromRegistry(const string url_name, int index=-1)
retrieve URL from .ncbirc file combining host/port and format strings values.
static bool IsGiList(list< string > &use_this_seq)
Check if use_this_seq conatins gi list.
static double GetSeqAlignSetCalcPercentIdent(const objects::CSeq_align_set &aln, bool do_translation)
static string GetGraphiscLink(SSeqURLInfo *seqUrlInfo, bool hspRange=false)
static bool MatchSeqInSeqList(TGi cur_gi, CRef< objects::CSeq_id > &seqID, list< string > &use_this_seq, bool *isGiList=NULL)
Matches text seqID or gi with the list of seqIds or gis.
static int GetSeqLinkoutInfo(objects::CBioseq::TId &cur_id, ILinkoutDB **linkoutdb, const string &mv_build_name, TGi gi=INVALID_GI)
static CRef< objects::CSeq_id > GetDisplayIds(const objects::CBioseq_Handle &handle, const objects::CSeq_id &aln_id, list< TGi > &use_this_gi, TGi &gi, TTaxId &taxid)
Scan the the list of blast deflines and find seqID to be use in display.
static list< string > GetLinkoutUrl(int linkout, const objects::CBioseq::TId &ids, const string &rid, const string &cdd_rid, const string &entrez_term, bool is_na, TGi first_gi, bool structure_linkout_as_group, bool for_alignment, int cur_align, string preComputedResID)
Get the list of urls for linkouts.
static void PrintKAParameters(double lambda, double k, double h, size_t line_len, CNcbiOstream &out, bool gapped, const Blast_GumbelBlk *gbp=NULL)
Print out kappa, lamda blast parameters.
static CRef< objects::CSeq_align > CreateDensegFromDendiag(const objects::CSeq_align &aln)
Create denseseg representation for densediag seqalign.
static list< string > GetCustomLinksList(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope, int customLinkTypes=eLinkTypeDefault)
Create the list of string links for seqid that go.
static string GetURLDefault(const string url_name, int index=-1)
settings are not complete.
static CRef< objects::CSeq_align_set > FilterSeqalignByEval(objects::CSeq_align_set &source_aln, double evalueLow, double evalueHigh)
function for Filtering seqalign by expect value
static string GetFASTALinkURL(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL showing aligned regions info.
static bool GetTextSeqID(const list< CRef< objects::CSeq_id > > &ids, string *textSeqID=NULL)
static void GetBlastDbInfo(vector< SDbInfo > &retval, const string &blastdb_names, bool is_protein, int dbfilt_algorithm, bool is_remote=false)
Retrieve BLAST database information for presentation in BLAST report.
static void GetUseThisSequence(const objects::CSeq_align &aln, list< TGi > &use_this_gi)
Extract use_this_gi info from blast alingment.
static bool SortHitByPercentIdentityDescendingEx(const CRef< objects::CSeq_align_set > &info1, const CRef< objects::CSeq_align_set > &info2)
sorting function for sorting a list of seqalign set by descending identity
static void ExtractSeqAlignForSeqList(CRef< objects::CSeq_align_set > &all_aln_set, string alignSeqList)
extract seq_align_set coreesponding to seqid list
static int GetPercentMatch(int numerator, int denominator)
function for calculating percent match for an alignment.
static string GetSeqIdString(const objects::CBioseq &cbs, bool believe_local_id=true)
Returns a full '|'-delimited Seq-id string for a Bioseq.
static bool MatchSeqInUseThisSeqList(list< string > &use_this_seq, string textSeqIDToMatch)
static list< string > GetSeqLinksList(SSeqURLInfo *seqUrlInfo, bool hspRange=false)
static CRef< objects::CSeq_align_set > SortSeqalignForSortableFormat(CCgiContext &ctx, objects::CScope &scope, objects::CSeq_align_set &aln_set, bool nuc_to_nuc_translation, int db_order, int hit_order, int hsp_order, ILinkoutDB *linkoutdb, const string &mv_build_name)
static double GetPercentIdentity(const objects::CSeq_align &aln, objects::CScope &scope, bool do_translation)
calculate the percent identity for a seqalign
static void ExtractSeqalignSetFromDiscSegs(objects::CSeq_align_set &target, const objects::CSeq_align_set &source)
If a Seq-align-set contains Seq-aligns with discontinuous type segments, extract the underlying Seq-a...
static bool IsWGSPattern(string &wgsAccession)
Check if accession is WGS.
static bool SortHitByScoreDescending(const CRef< objects::CSeq_align_set > &info1, const CRef< objects::CSeq_align_set > &info2)
static CRef< objects::CSeq_align_set > FilterSeqalignByScoreParams(objects::CSeq_align_set &source_aln, double evalueLow, double evalueHigh, double percentIdentLow, double percentIdentHigh)
function for Filtering seqalign by expect value and percent identity
static void GetAlignLengths(objects::CAlnVec &salv, int &align_length, int &num_gaps, int &num_gap_opens)
Count alignment length, number of gap openings and total number of gaps in a single alignment.
static string BuildSRAUrl(const objects::CBioseq::TId &ids, string user_url)
return the SRA (Short Read Archive) URL
static bool SortHspByMasterStartAscending(const CRef< objects::CSeq_align > &info1, const CRef< objects::CSeq_align > &info2)
sorting function for sorting a list of seqalign by ascending mater start position
static int SetCustomLinksTypes(SSeqURLInfo *seqUrlInfo, int customLinkTypesInp)
Create info indicating what kind of links to display.
static int GetMasterCoverage(const objects::CSeq_align_set &alnset)
static unique_ptr< CNcbiRegistry > m_Reg
static int GetFrame(int start, objects::ENa_strand strand, const objects::CBioseq_Handle &handle)
return the frame for a given strand Note that start is zero bases.
static void GetBdlLinkoutInfo(const list< CRef< objects::CBlast_def_line > > &bdl, map< int, vector< objects::CBioseq::TId > > &linkout_map, ILinkoutDB *linkoutdb, const string &mv_build_name)
Create map that holds all linkouts for the list of blast deflines and corresponding seqIDs.
static bool SortHspByPercentIdentityDescending(const CRef< objects::CSeq_align > &info1, const CRef< objects::CSeq_align > &info2)
sorting function for sorting a list of seqalign by descending identity
static CRef< objects::CSeq_align_set > HitListToHspList(list< CRef< objects::CSeq_align_set > > &source)
extract all nested hsp's into a list
static string GetTitle(const objects::CBioseq_Handle &bh)
static TTaxId GetTaxidForSeqid(const objects::CSeq_id &id, objects::CScope &scope)
return the tax id for a seqid
static void AddSpace(CNcbiOstream &out, size_t number)
Add the specified white space.
static string GetAlignedRegionsURL(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL to FASTA info.
static string m_Protocol
static list< string > GetGiLinksList(SSeqURLInfo *seqUrlInfo, bool hspRange=false)
static void AcknowledgeBlastSubject(const objects::CBioseq &cbs, size_t line_len, CNcbiOstream &out, bool believe_query, bool html, bool tabular=false)
Print out blast subject info.
static TGi GetGiForSeqIdList(const list< CRef< objects::CSeq_id > > &ids)
return gi from id list
static void SplitSeqalignByMolecularType(vector< CRef< objects::CSeq_align_set > > &target, int sort_method, const objects::CSeq_align_set &source, objects::CScope &scope, ILinkoutDB *linkoutdb, const string &mv_build_name)
static bool SortHitByMasterCoverageDescending(CRef< objects::CSeq_align_set > const &info1, CRef< objects::CSeq_align_set > const &info2)
static void BlastPrintError(list< SBlastError > &error_return, bool error_post, CNcbiOstream &out)
Output blast errors.
static string MapSpaceTemplate(string inpString, string tmplParamName, string templParamVal, unsigned int maxParamLength, int spacesFormatFlag=eSpacePosAtLineEnd)
Replace template tags by real data and calculate and add spaces dependent on maxParamLength and space...
static void HspListToHitList(list< CRef< objects::CSeq_align_set > > &target, const objects::CSeq_align_set &source)
group hsp's with the same id togeter
static void SortHitByPercentIdentityDescending(list< CRef< objects::CSeq_align_set > > &seqalign_hit_list, bool do_translation)
sort a list of seqalign set by alignment identity
static string MapProtocol(string url_link)
static string GetFullIDLink(SSeqURLInfo *seqUrlInfo, const objects::CBioseq::TId *ids)
static bool SortHspByScoreDescending(const CRef< objects::CSeq_align > &info1, const CRef< objects::CSeq_align > &info2)
static CRef< objects::CSeq_align_set > LimitSeqalignByHsps(objects::CSeq_align_set &source_aln, int maxAligns, int maxHsps)
function for Limitting seqalign by hsps number (by default results are not cut off within the query)
static CRef< objects::CSeq_align_set > ExtractQuerySeqAlign(CRef< objects::CSeq_align_set > &source_aln, int queryNumber)
function for extracting seqalign for the query
static string GetLabel(CConstRef< objects::CSeq_id > id, bool with_version=false)
Return a label for an ID Tries to recreate behavior of GetLabel before a change that prepends "ti|" t...
static void x_WrapOutputLine(string str, size_t line_len, CNcbiOstream &out, bool html=false)
Wrap a string to specified length.
static unsigned int GetSubjectsNumber(const objects::CSeq_align_set &source_aln, unsigned int num)
Calculate number of subject sequnces in alignment limitted by num.
static CRange< TSeqPos > GetSeqAlignCoverageParams(const objects::CSeq_align_set &alnset, int *masterCoverage, bool *flip)
TDim GetNumRows(void) const
Definition: alnmap.hpp:517
TSeqPos GetAlnStop(TNumseg seg) const
Definition: alnmap.hpp:488
TSignedRange GetSeqAlnRange(TNumrow row) const
Definition: alnmap.hpp:691
CRef< CAlnChunkVec > GetAlnChunks(TNumrow row, const TSignedRange &range, TGetChunkFlags flags=fAlnSegsOnly) const
Definition: alnmap.cpp:1002
CBioseq_Handle –.
API for Remote Blast Services.
Definition: Dbtag.hpp:53
CFile –.
Definition: ncbifile.hpp:1604
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
void Resize(size_t i, size_t j, T val=T())
resize this matrix, filling the empty cells with a known value
Definition: matrix.hpp:390
CNcbiRegistry –.
Definition: ncbireg.hpp:913
bool empty() const
Definition: range_coll.hpp:102
TThisType & Subtract(const TRange &r)
Definition: range_coll.hpp:205