NCBI C++ ToolKit
align_format_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: align_format_util.cpp 100791 2023-09-13 12:42:42Z zaretska $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jian Ye
27  * 12/2004
28  * File Description:
29  * blast formatter utilities
30  *
31  */
32 #include <ncbi_pch.hpp>
33 
34 #include <math.h> // For use of ceil
35 
37 
38 #include <corelib/ncbireg.hpp>
39 #include <corelib/ncbidiag.hpp>
40 #include <corelib/ncbistre.hpp>
41 #include <corelib/ncbiutil.hpp>
42 #include <corelib/ncbiobj.hpp>
43 #include <corelib/ncbifile.hpp>
44 #include <corelib/metareg.hpp>
45 #include <html/htmlhelper.hpp>
46 #include <cgi/cgictx.hpp>
48 
49 
57 #include <objects/seq/Seq_inst.hpp>
59 #include <objects/seq/Seqdesc.hpp>
60 #include <objmgr/seqdesc_ci.hpp>
63 
64 #include <objtools/blast/services/blast_services.hpp> // for CBlastServices
65 #include <objtools/blast/seqdb_reader/seqdb.hpp> // for CSeqDB
66 #include <objtools/blast/seqdb_reader/seqdbcommon.hpp> // for CSeqDBException
67 
72 
73 #include <stdio.h>
74 #include <sstream>
75 #include <iomanip>
76 
80 BEGIN_SCOPE(align_format)
81 
82 const char CAlignFormatUtil::kNoHitsFound[] = "No hits found";
83 
86 
87 const char k_PSymbol[ePMatrixSize+1] =
88 "ARNDCQEGHILKMFPSTWYVBZX";
89 
90 unique_ptr<CNcbiRegistry> CAlignFormatUtil::m_Reg;
93 unique_ptr<CGeneInfoFileReader> CAlignFormatUtil::m_GeneInfoReader;
94 ///Get blast score information
95 ///@param scoreList: score container to extract score info from
96 ///@param score: place to extract the raw score to
97 ///@param bits: place to extract the bit score to
98 ///@param evalue: place to extract the e value to
99 ///@param sum_n: place to extract the sum_n to
100 ///@param num_ident: place to extract the num_ident to
101 ///@param use_this_gi: place to extract use_this_gi to
102 ///@return true if found score, false otherwise
103 ///
104 template<class container> bool
105 s_GetBlastScore(const container& scoreList,
106  int& score,
107  double& bits,
108  double& evalue,
109  int& sum_n,
110  int& num_ident,
111  list<TGi>& use_this_gi,
112  int& comp_adj_method)
113 {
114  const string k_GiPrefix = "gi:";
115  bool hasScore = false;
116  ITERATE (typename container, iter, scoreList) {
117  const CObject_id& id=(*iter)->GetId();
118  if (id.IsStr()) {
119  if (id.GetStr()=="score"){
120  score = (*iter)->GetValue().GetInt();
121  } else if (id.GetStr()=="bit_score"){
122  bits = (*iter)->GetValue().GetReal();
123  } else if (id.GetStr()=="e_value" || id.GetStr()=="sum_e") {
124  evalue = (*iter)->GetValue().GetReal();
125  hasScore = true;
126  } else if (id.GetStr()=="use_this_gi"){
127  Uint4 gi_v = (Uint4)((*iter)->GetValue().GetInt());
128  use_this_gi.push_back(GI_FROM(Uint4, gi_v));
129  } else if (id.GetStr()=="sum_n"){
130  sum_n = (*iter)->GetValue().GetInt();
131  } else if (id.GetStr()=="num_ident"){
132  num_ident = (*iter)->GetValue().GetInt();
133  } else if (id.GetStr()=="comp_adjustment_method") {
134  comp_adj_method = (*iter)->GetValue().GetInt();
135  }
136  else if(NStr::StartsWith(id.GetStr(),k_GiPrefix)) { //will be used when switch to 64bit GIs
137  string strGi = NStr::Replace(id.GetStr(),k_GiPrefix,"");
138  TGi gi = NStr::StringToNumeric<TGi>(strGi);
139  use_this_gi.push_back(gi);
140  }
141  }
142  }
143 
144  return hasScore;
145 }
146 
147 
148 ///Wrap a string to specified length. If break happens to be in
149 /// a word, it will extend the line length until the end of the word
150 ///@param str: input string
151 ///@param line_len: length of each line desired
152 ///@param out: stream to ouput
153 ///
154 void CAlignFormatUtil::x_WrapOutputLine(string str, size_t line_len,
155  CNcbiOstream& out, bool html)
156 {
157  list<string> string_l;
159  if (html) {
162  }
163  NStr::Wrap(str, line_len, string_l, flags);
164  list<string>::iterator iter = string_l.begin();
165  while(iter != string_l.end())
166  {
167  out << *iter;
168  out << "\n";
169  iter++;
170  }
171 }
172 
173 void CAlignFormatUtil::BlastPrintError(list<SBlastError>&
174  error_return,
175  bool error_post, CNcbiOstream& out)
176 {
177 
178  string errsevmsg[] = { "UNKNOWN","INFO","WARNING","ERROR",
179  "FATAL"};
180 
181  NON_CONST_ITERATE(list<SBlastError>, iter, error_return) {
182 
183  if(iter->level > 5){
184  iter->level = eDiag_Info;
185  }
186 
187  if(iter->level == 4){
188  iter->level = eDiag_Fatal;
189  } else{
190  iter->level = iter->level;
191  }
192 
193  if (error_post){
194  ERR_POST_EX(iter->level, 0, iter->message);
195  }
196  out << errsevmsg[iter->level] << ": " << iter->message << "\n";
197 
198  }
199 
200 }
201 
202 void CAlignFormatUtil::PrintTildeSepLines(string str, size_t line_len,
203  CNcbiOstream& out) {
204 
205  vector<string> split_line;
206  NStr::Split(str, "~", split_line);
207  ITERATE(vector<string>, iter, split_line) {
208  x_WrapOutputLine(*iter, line_len, out);
209  }
210 }
211 #ifdef DO_UNUSED
212 /// Initialize database statistics with data from BLAST servers
213 /// @param dbname name of a single BLAST database [in]
214 /// @param info structure to fill [in|out]
215 /// @return true if successfully filled, false otherwise (and a warning is
216 /// printed out)
217 static bool s_FillDbInfoRemotely(const string& dbname,
219 {
220  static CBlastServices rmt_blast_services;
222  blastdb->SetName(dbname);
223  blastdb->SetType() = info.is_protein
226  rmt_blast_services.GetDatabaseInfo(blastdb);
227 
228  info.name = dbname;
229  if ( !dbinfo ) {
230  return false;
231  }
232  info.definition = dbinfo->GetDescription();
233  if (info.definition.empty())
234  info.definition = info.name;
235  CTimeFormat tf("b d, Y H:m P", CTimeFormat::fFormat_Simple);
236  info.date = CTime(dbinfo->GetLast_updated()).AsString(tf);
237  info.total_length = dbinfo->GetTotal_length();
238  info.number_seqs = static_cast<int>(dbinfo->GetNum_sequences());
239  return true;
240 }
241 #endif
242 /// Initialize database statistics with data obtained from local BLAST
243 /// databases
244 /// @param dbname name of a single BLAST database [in]
245 /// @param info structure to fill [in|out]
246 /// @param dbfilt_algorithm filtering algorithm ID used for this search
247 /// [in]
248 /// @return true if successfully filled, false otherwise (and a warning is
249 /// printed out)
250 static bool
253  int dbfilt_algorithm)
254 {
255  CRef<CSeqDB> seqdb(new CSeqDB(dbname, info.is_protein
257  if ( !seqdb ) {
258  return false;
259  }
260  info.name = seqdb->GetDBNameList();
261  info.definition = seqdb->GetTitle();
262  if (info.definition.empty())
263  info.definition = info.name;
264  info.date = seqdb->GetDate();
265  info.total_length = seqdb->GetTotalLength();
266  info.number_seqs = seqdb->GetNumSeqs();
267 
268  // Process the filtering algorithm IDs
269  info.filt_algorithm_name.clear();
270  info.filt_algorithm_options.clear();
271  if (dbfilt_algorithm == -1) {
272  return true;
273  }
274 
275 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
276  (!defined(NCBI_COMPILER_MIPSPRO)) )
277  string filtering_algorithm;
278  seqdb->GetMaskAlgorithmDetails(dbfilt_algorithm,
279  filtering_algorithm,
280  info.filt_algorithm_name,
281  info.filt_algorithm_options);
282 #endif
283  return true;
284 }
285 
286 void
287 CAlignFormatUtil::FillScanModeBlastDbInfo(vector<CAlignFormatUtil::SDbInfo>& retval,
288  bool is_protein, int numSeqs, Int8 numLetters, string& tag)
289 {
290  retval.clear();
292  info.is_protein = is_protein;
293  if (tag == "")
294  info.definition = string("User specified sequence set.");
295  else
296  {
297  info.definition = string("User specified sequence set ") +
298  string("(Input: ") + tag + string(").");
299  }
300  info.number_seqs = numSeqs;
301  info.total_length = numLetters;
302  retval.push_back(info);
303 }
304 
305 void
306 CAlignFormatUtil::GetBlastDbInfo(vector<CAlignFormatUtil::SDbInfo>& retval,
307  const string& blastdb_names, bool is_protein,
308  int dbfilt_algorithm /* = -1 */,
309  bool is_remote /* = false */)
310 {
311  retval.clear();
312  if( is_remote ){
313  bool found_all = false;
314  static CBlastServices rmt_blast_services;
315  vector<string> missing_names;
316  vector< CRef<objects::CBlast4_database_info> > all_db_info =
317  rmt_blast_services.GetDatabaseInfo(blastdb_names,is_protein,&found_all,&missing_names);
318  if( !missing_names.empty() ){
319  string msg("'");
320  for(size_t ndx=0 ; ndx < missing_names.size(); ndx++){
321  msg += missing_names[ndx];
322  }
323  msg += string("' not found on NCBI servers.\n");
324  NCBI_THROW(CSeqDBException, eFileErr, msg);
325  }
326  for(size_t ndx=0 ; ndx < all_db_info.size(); ndx++){
328  objects::CBlast4_database_info &dbinfo = *all_db_info[ndx];
329  info.name = dbinfo.GetDatabase().GetName();
330  info.definition = dbinfo.GetDescription();
331  if (info.definition.empty())
332  info.definition = info.name;
333  CTimeFormat tf("b d, Y H:m P", CTimeFormat::fFormat_Simple);
334  info.date = CTime(dbinfo.GetLast_updated()).AsString(tf);
335  info.total_length = dbinfo.GetTotal_length();
336  info.number_seqs = static_cast<int>(dbinfo.GetNum_sequences());
337  if (info.total_length < 0) {
338  const string kDbName = NStr::TruncateSpaces(info.name);
339  if( ! s_FillDbInfoLocally(kDbName, info, dbfilt_algorithm) ){
340  string msg("'");
341  msg += kDbName;
342  msg += string("' has bad total length on NCBI servers.\n");
343  NCBI_THROW(CSeqDBException, eFileErr, msg);
344  }
345  }
346  retval.push_back(info);
347  }
348  return;
349  }
350  else{
351  vector<CTempString> dbs;
352  SeqDB_SplitQuoted(blastdb_names, dbs, true);
353  retval.reserve(dbs.size());
354 
355  ITERATE(vector<CTempString>, i, dbs) {
357  info.is_protein = is_protein;
358  bool success = false;
359  // Unsafe OK as kDbName only used in this loop.
360  const string kDbName = NStr::TruncateSpaces_Unsafe(*i);
361  if (kDbName.empty())
362  continue;
363 
364  success = s_FillDbInfoLocally(kDbName, info, dbfilt_algorithm);
365 
366  if (success) {
367  retval.push_back(info);
368  } else {
369  string msg("'");
370  msg += kDbName;
371  if (is_remote)
372  msg += string("' not found on NCBI servers.\n");
373  else
374  msg += string("' not found.\n");
375  NCBI_THROW(CSeqDBException, eFileErr, msg);
376  }
377  }
378  }
379 }
380 
381 void CAlignFormatUtil::PrintDbReport(const vector<SDbInfo>& dbinfo_list,
382  size_t line_length,
383  CNcbiOstream& out,
384  bool top)
385 {
386  if (top) {
387  const CAlignFormatUtil::SDbInfo* dbinfo = &(dbinfo_list.front());
388  out << "Database: ";
389 
390  string db_titles = dbinfo->definition;
391  Int8 tot_num_seqs = static_cast<Int8>(dbinfo->number_seqs);
392  Int8 tot_length = dbinfo->total_length;
393 
394  for (size_t i = 1; i < dbinfo_list.size(); i++) {
395  db_titles += "; " + dbinfo_list[i].definition;
396  tot_num_seqs += static_cast<Int8>(dbinfo_list[i].number_seqs);
397  tot_length += dbinfo_list[i].total_length;
398  }
399 
400  x_WrapOutputLine(db_titles, line_length, out);
401  if ( !dbinfo->filt_algorithm_name.empty() ) {
402  out << "Masked using: '" << dbinfo->filt_algorithm_name << "'";
403  if ( !dbinfo->filt_algorithm_options.empty() ) {
404  out << ", options: '" << dbinfo->filt_algorithm_options << "'";
405  }
406  out << endl;
407  }
409  out << NStr::Int8ToString(tot_num_seqs, NStr::fWithCommas) <<
410  " sequences; " <<
411  NStr::Int8ToString(tot_length, NStr::fWithCommas) <<
412  " total letters\n\n";
413  return;
414  }
415 
416  ITERATE(vector<SDbInfo>, dbinfo, dbinfo_list) {
417  if (dbinfo->subset == false) {
418  out << " Database: ";
419  x_WrapOutputLine(dbinfo->definition, line_length, out);
420 
421  if ( !dbinfo->filt_algorithm_name.empty() ) {
422  out << " Masked using: '" << dbinfo->filt_algorithm_name << "'";
423  if ( !dbinfo->filt_algorithm_options.empty() ) {
424  out << ", options: '" << dbinfo->filt_algorithm_options << "'";
425  }
426  out << endl;
427  }
428 
429  out << " Posted date: ";
430  out << dbinfo->date << "\n";
431 
432  out << " Number of letters in database: ";
433  out << NStr::Int8ToString(dbinfo->total_length,
434  NStr::fWithCommas) << "\n";
435  out << " Number of sequences in database: ";
436  out << NStr::IntToString(dbinfo->number_seqs,
437  NStr::fWithCommas) << "\n";
438 
439  } else {
440  out << " Subset of the database(s) listed below" << "\n";
441  out << " Number of letters searched: ";
442  out << NStr::Int8ToString(dbinfo->total_length,
443  NStr::fWithCommas) << "\n";
444  out << " Number of sequences searched: ";
445  out << NStr::IntToString(dbinfo->number_seqs,
446  NStr::fWithCommas) << "\n";
447  }
448  out << "\n";
449  }
450 
451 }
452 
453 void CAlignFormatUtil::PrintKAParameters(double lambda, double k, double h,
454  size_t line_len,
455  CNcbiOstream& out, bool gapped,
456  const Blast_GumbelBlk *gbp)
457 {
458 
459  char buffer[256];
460  if (gapped) {
461  out << "Gapped" << "\n";
462  }
463  out << "Lambda K H";
464  if (gbp) {
465  if (gapped) {
466  out << " a alpha sigma";
467  } else {
468  out << " a alpha";
469  }
470  }
471  out << "\n";
472  sprintf(buffer, "%#8.3g ", lambda);
473  out << buffer;
474  sprintf(buffer, "%#8.3g ", k);
475  out << buffer;
476  sprintf(buffer, "%#8.3g ", h);
477  out << buffer;
478  if (gbp) {
479  if (gapped) {
480  sprintf(buffer, "%#8.3g ", gbp->a);
481  out << buffer;
482  sprintf(buffer, "%#8.3g ", gbp->Alpha);
483  out << buffer;
484  sprintf(buffer, "%#8.3g ", gbp->Sigma);
485  out << buffer;
486  } else {
487  sprintf(buffer, "%#8.3g ", gbp->a_un);
488  out << buffer;
489  sprintf(buffer, "%#8.3g ", gbp->Alpha_un);
490  out << buffer;
491  }
492  //x_WrapOutputLine(buffer, line_len, out);
493  }
494  out << "\n";
495 }
496 
497 string
498 CAlignFormatUtil::GetSeqIdString(const CBioseq& cbs, bool believe_local_id)
499 {
500  const CBioseq::TId& ids = cbs.GetId();
501  return CAlignFormatUtil::GetSeqIdString(ids, believe_local_id);
502 }
503 
504 string
505 CAlignFormatUtil::GetSeqIdString(const list<CRef<CSeq_id> > & ids, bool believe_local_id)
506 {
507  string all_id_str = NcbiEmptyString;
509 
510  if (wid && (wid->Which()!= CSeq_id::e_Local || believe_local_id)){
511  TGi gi = FindGi(ids);
512 
513  bool use_long_seqids = false;
515  if (app) {
516  const CNcbiRegistry& registry = app->GetConfig();
517  use_long_seqids = (registry.Get("BLAST", "LONG_SEQID") == "1");
518  }
519  if (!use_long_seqids) {
520 
521  all_id_str = GetBareId(*wid);
522  }
523  else if (strncmp(wid->AsFastaString().c_str(), "lcl|", 4) == 0) {
524  if(gi == ZERO_GI){
525  all_id_str = wid->AsFastaString().substr(4);
526  } else {
527  all_id_str = "gi|" + NStr::NumericToString(gi) +
528  "|" + wid->AsFastaString().substr(4);
529  }
530  } else {
531  if(gi == ZERO_GI){
532  all_id_str = wid->AsFastaString();
533  } else {
534  all_id_str = "gi|" + NStr::NumericToString(gi) + "|" +
535  wid->AsFastaString();
536  }
537  }
538  }
539 
540  return all_id_str;
541 }
542 
543 string
545 {
546  string all_descr_str = NcbiEmptyString;
547 
548  if (cbs.IsSetDescr()) {
549  const CBioseq::TDescr& descr = cbs.GetDescr();
550  const CBioseq::TDescr::Tdata& data = descr.Get();
551  ITERATE(CBioseq::TDescr::Tdata, iter, data) {
552  if((*iter)->IsTitle()) {
553  all_descr_str += (*iter)->GetTitle();
554  }
555  }
556  }
557  return all_descr_str;
558 }
559 
561  size_t line_len,
562  CNcbiOstream& out,
563  bool believe_query,
564  bool html,
565  bool tabular /* = false */,
566  const string& rid /* = kEmptyStr*/)
567 {
568  const string label("Query");
570  believe_query, html,
571  label, tabular, rid);
572 }
573 
574 void
576  size_t line_len,
577  CNcbiOstream& out,
578  bool believe_query,
579  bool html,
580  bool tabular /* = false */)
581 {
582  const string label("Subject");
584  believe_query, html,
585  label, tabular, kEmptyStr);
586 }
587 
588 void
590  size_t line_len,
591  CNcbiOstream& out,
592  bool believe_query,
593  bool html,
594  const string& label,
595  bool tabular /* = false */,
596  const string& rid /* = kEmptyStr*/)
597 {
598 
599  if (html) {
600  out << "<b>" << label << "=</b> ";
601  } else if (tabular) {
602  out << "# " << label << ": ";
603  } else {
604  out << label << "= ";
605  }
606 
607  string all_id_str = GetSeqIdString(cbs, believe_query);
608  all_id_str += " ";
609  all_id_str = NStr::TruncateSpaces(all_id_str + GetSeqDescrString(cbs));
610 
611  // For tabular output, there is no limit on the line length.
612  // There is also no extra line with the sequence length.
613  if (tabular) {
614  out << all_id_str;
615  } else {
616  x_WrapOutputLine(all_id_str, line_len, out, html);
617  if(cbs.IsSetInst() && cbs.GetInst().CanGetLength()){
618  out << "\nLength=";
619  out << cbs.GetInst().GetLength() <<"\n";
620  }
621  }
622 
623  if (rid != kEmptyStr) {
624  if (tabular) {
625  out << "\n" << "# RID: " << rid;
626  } else {
627  out << "\n" << "RID: " << rid << "\n";
628  }
629  }
630 }
631 
632 void CAlignFormatUtil::PrintPhiInfo(int num_patterns,
633  const string& pattern,
634  double prob,
635  vector<int>& offsets,
636  CNcbiOstream& out)
637 {
638  out << num_patterns << " occurrence(s) of pattern: " << "\n"
639  << pattern << " at position(s) ";
640 
641  bool first = true;
642  for (vector<int>::iterator it = offsets.begin();
643  it != offsets.end(); it++)
644  {
645  if (!first)
646  out << ", ";
647 
648  out << 1 + *it ;
649 
650  first = false;
651  }
652  out << " of query sequence" << "\n";
653  out << "pattern probability=" << prob << "\n";
654 
655 }
656 
658  int& score,
659  double& bits,
660  double& evalue,
661  int& sum_n,
662  int& num_ident,
663  list<TGi>& use_this_gi)
664 {
665  int comp_adj_method = 0; // dummy variable
666 
667  CAlignFormatUtil::GetAlnScores(aln, score, bits, evalue, sum_n,
668  num_ident, use_this_gi, comp_adj_method);
669 }
670 
672  int& score,
673  double& bits,
674  double& evalue,
675  int& sum_n,
676  int& num_ident,
677  list<string>& use_this_seq)
678 {
679  int comp_adj_method = 0; // dummy variable
680 
681  CAlignFormatUtil::GetAlnScores(aln, score, bits, evalue, sum_n,
682  num_ident, use_this_seq, comp_adj_method);
683 }
684 
685 
687  int& score,
688  double& bits,
689  double& evalue,
690  int& sum_n,
691  int& num_ident,
692  list<TGi>& use_this_gi,
693  int& comp_adj_method)
694 {
695  bool hasScore = false;
696  score = -1;
697  bits = -1;
698  evalue = -1;
699  sum_n = -1;
700  num_ident = -1;
701  comp_adj_method = 0;
702 
703  //look for scores at seqalign level first
704  hasScore = s_GetBlastScore(aln.GetScore(), score, bits, evalue,
705  sum_n, num_ident, use_this_gi, comp_adj_method);
706 
707  //look at the seg level
708  if(!hasScore){
709  const CSeq_align::TSegs& seg = aln.GetSegs();
710  if(seg.Which() == CSeq_align::C_Segs::e_Std){
711  s_GetBlastScore(seg.GetStd().front()->GetScores(),
712  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
713  } else if (seg.Which() == CSeq_align::C_Segs::e_Dendiag){
714  s_GetBlastScore(seg.GetDendiag().front()->GetScores(),
715  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
716  } else if (seg.Which() == CSeq_align::C_Segs::e_Denseg){
718  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
719  }
720  }
721  if(use_this_gi.size() == 0) {
722  GetUseThisSequence(aln,use_this_gi);
723  }
724 }
725 
726 //converts gi list to the list of gi:XXXXXXXX strings
727 static list<string> s_NumGiToStringGiList(list<TGi> use_this_gi)//for backward compatability
728 {
729  const string k_GiPrefix = "gi:";
730  list<string> use_this_seq;
731  ITERATE(list<TGi>, iter_gi, use_this_gi){
732  string strSeq = k_GiPrefix + NStr::NumericToString(*iter_gi);
733  use_this_seq.push_back(strSeq);
734  }
735  return use_this_seq;
736 }
737 
739  int& score,
740  double& bits,
741  double& evalue,
742  int& sum_n,
743  int& num_ident,
744  list<string>& use_this_seq,
745  int& comp_adj_method)
746 {
747  bool hasScore = false;
748  score = -1;
749  bits = -1;
750  evalue = -1;
751  sum_n = -1;
752  num_ident = -1;
753  comp_adj_method = 0;
754 
755  list<TGi> use_this_gi;
756  //look for scores at seqalign level first
757  hasScore = s_GetBlastScore(aln.GetScore(), score, bits, evalue,
758  sum_n, num_ident, use_this_gi, comp_adj_method);
759 
760  //look at the seg level
761  if(!hasScore){
762  const CSeq_align::TSegs& seg = aln.GetSegs();
763  if(seg.Which() == CSeq_align::C_Segs::e_Std){
764  s_GetBlastScore(seg.GetStd().front()->GetScores(),
765  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
766  } else if (seg.Which() == CSeq_align::C_Segs::e_Dendiag){
767  s_GetBlastScore(seg.GetDendiag().front()->GetScores(),
768  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
769  } else if (seg.Which() == CSeq_align::C_Segs::e_Denseg){
771  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
772  }
773  }
774  if(use_this_gi.size() == 0) {
775  GetUseThisSequence(aln,use_this_seq);
776  }
777  else {
778  use_this_seq = s_NumGiToStringGiList(use_this_gi);//for backward compatability
779  }
780 }
781 
783 {
784  string retval = NcbiEmptyString;
785 
786  if(dtg.GetTag().IsId())
787  retval = NStr::IntToString(dtg.GetTag().GetId());
788  else
789  retval = dtg.GetTag().GetStr();
790 
791  return retval;
792 }
793 
794 string CAlignFormatUtil::GetLabel(CConstRef<CSeq_id> id,bool with_version)
795 {
796  string retval = "";
797  if (id->Which() == CSeq_id::e_General){
798  const CDbtag& dtg = id->GetGeneral();
799  retval = CAlignFormatUtil::GetGnlID(dtg);
800  }
801  if (retval == "")
802  retval = id->GetSeqIdString(with_version);
803 
804  return retval;
805 }
806 
808 
809 {
810  for(int i=0; i<number; i++){
811  out<<" ";
812  }
813 
814 }
815 
817  double bit_score,
818  double total_bit_score,
819  int raw_score,
820  string& evalue_str,
821  string& bit_score_str,
822  string& total_bit_score_str,
823  string& raw_score_str)
824 {
825  char evalue_buf[100], bit_score_buf[100], total_bit_score_buf[100];
826 
827  /* Facilitates comparing formatted output using diff */
828  static string kBitScoreFormat("%4.1lf");
829 #ifdef CTOOLKIT_COMPATIBLE
830  static bool ctoolkit_compatible = false;
831  static bool value_set = false;
832  if ( !value_set ) {
833  if (getenv("CTOOLKIT_COMPATIBLE")) {
834  kBitScoreFormat.assign("%4.0lf");
835  ctoolkit_compatible = true;
836  }
837  value_set = true;
838  }
839 #endif /* CTOOLKIT_COMPATIBLE */
840 
841  if (evalue < 1.0e-180) {
842  snprintf(evalue_buf, sizeof(evalue_buf), "0.0");
843  } else if (evalue < 1.0e-99) {
844  snprintf(evalue_buf, sizeof(evalue_buf), "%2.0le", evalue);
845 #ifdef CTOOLKIT_COMPATIBLE
846  if (ctoolkit_compatible) {
847  strncpy(evalue_buf, evalue_buf+1, sizeof(evalue_buf-1));
848  }
849 #endif /* CTOOLKIT_COMPATIBLE */
850  } else if (evalue < 0.0009) {
851  snprintf(evalue_buf, sizeof(evalue_buf), "%3.0le", evalue);
852  } else if (evalue < 0.1) {
853  snprintf(evalue_buf, sizeof(evalue_buf), "%4.3lf", evalue);
854  } else if (evalue < 1.0) {
855  snprintf(evalue_buf, sizeof(evalue_buf), "%3.2lf", evalue);
856  } else if (evalue < 10.0) {
857  snprintf(evalue_buf, sizeof(evalue_buf), "%2.1lf", evalue);
858  } else {
859  snprintf(evalue_buf, sizeof(evalue_buf), "%2.0lf", evalue);
860  }
861 
862  if (bit_score > 99999){
863  snprintf(bit_score_buf, sizeof(bit_score_buf), "%5.3le", bit_score);
864  } else if (bit_score > 99.9){
865  snprintf(bit_score_buf, sizeof(bit_score_buf), "%3.0ld",
866  (long)bit_score);
867  } else {
868  snprintf(bit_score_buf, sizeof(bit_score_buf), kBitScoreFormat.c_str(),
869  bit_score);
870  }
871  if (total_bit_score > 99999){
872  snprintf(total_bit_score_buf, sizeof(total_bit_score_buf), "%5.3le",
873  total_bit_score);
874  } else if (total_bit_score > 99.9){
875  snprintf(total_bit_score_buf, sizeof(total_bit_score_buf), "%3.0ld",
876  (long)total_bit_score);
877  } else {
878  snprintf(total_bit_score_buf, sizeof(total_bit_score_buf), "%2.1lf",
879  total_bit_score);
880  }
881  evalue_str = evalue_buf;
882  bit_score_str = bit_score_buf;
883  total_bit_score_str = total_bit_score_buf;
884  if (raw_score <= 0)
885  raw_score = -1;
886  NStr::IntToString(raw_score_str, raw_score);
887 }
888 
889 
891  CSeq_align_set& new_aln,
892  unsigned int number)
893 {
894  CConstRef<CSeq_id> previous_id, subid;
895  bool is_first_aln = true;
896  unsigned int num_align = 0;
897  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
898 
899  if ((*iter)->GetSegs().IsDisc()) {
900  ++num_align;
901  } else {
902  subid = &((*iter)->GetSeq_id(1));
903  if(is_first_aln || (!is_first_aln && !subid->Match(*previous_id))){
904  ++num_align;
905  }
906 
907  if(num_align > number) {
908  break;
909  }
910 
911  is_first_aln = false;
912  previous_id = subid;
913  }
914  new_aln.Set().push_back(*iter);
915  }
916 }
917 
918 
920  unsigned int number)
921 {
922  CConstRef<CSeq_id> previous_id, subid;
923  bool is_first_aln = true;
924  unsigned int num_align = 0;
925  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
926 
927  if ((*iter)->GetSegs().IsDisc()) {
928  ++num_align;
929  } else {
930  subid = &((*iter)->GetSeq_id(1));
931  if(is_first_aln || (!is_first_aln && !subid->Match(*previous_id))){
932  ++num_align;
933  }
934 
935  if(num_align >= number) {
936  break;
937  }
938 
939  is_first_aln = false;
940  previous_id = subid;
941  }
942  }
943  return num_align;
944 }
945 
946 
948  CSeq_align_set& new_aln,
949  unsigned int number)
950 {
951  CConstRef<CSeq_id> previous_id, subid;
952  bool is_first_aln = true;
953  unsigned int num_align = 0;
954  bool finishCurrent = false;
955  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
956  if ((*iter)->GetSegs().IsDisc()) {
957  ++num_align;
958  } else {
959  subid = &((*iter)->GetSeq_id(1));
960  if(is_first_aln || (!is_first_aln && !subid->Match(*previous_id))){
961  finishCurrent = (num_align + 1 == number) ? true : false;
962  ++num_align;
963  }
964  is_first_aln = false;
965  previous_id = subid;
966  }
967  if(num_align > number && !finishCurrent) {
968  break;
969  }
970  new_aln.Set().push_back(*iter);
971  }
972 }
973 
974 
975 void
977  int& num_gaps, int& num_gap_opens)
978 {
979  num_gaps = num_gap_opens = align_length = 0;
980 
981  for (int row = 0; row < salv.GetNumRows(); row++) {
983  = salv.GetAlnChunks(row, salv.GetSeqAlnRange(0));
984  for (int i=0; i<chunk_vec->size(); i++) {
985  CConstRef<CAlnMap::CAlnChunk> chunk = (*chunk_vec)[i];
986  int chunk_length = chunk->GetAlnRange().GetLength();
987  // Gaps are counted on all rows: gap can only be in one of the rows
988  // for any given segment.
989  if (chunk->IsGap()) {
990  ++num_gap_opens;
991  num_gaps += chunk_length;
992  }
993  // To calculate alignment length, only one row is needed.
994  if (row == 0)
995  align_length += chunk_length;
996  }
997  }
998 }
999 
1000 void
1002  const CSeq_align_set& source)
1003 {
1004  if (source.IsSet() && source.CanGet()) {
1005 
1006  for(CSeq_align_set::Tdata::const_iterator iter = source.Get().begin();
1007  iter != source.Get().end(); iter++) {
1008  if((*iter)->IsSetSegs()){
1009  const CSeq_align::TSegs& seg = (*iter)->GetSegs();
1010  if(seg.IsDisc()){
1011  const CSeq_align_set& set = seg.GetDisc();
1012  for(CSeq_align_set::Tdata::const_iterator iter2 =
1013  set.Get().begin(); iter2 != set.Get().end();
1014  iter2 ++) {
1015  target.Set().push_back(*iter2);
1016  }
1017  } else {
1018  target.Set().push_back(*iter);
1019  }
1020  }
1021  }
1022  }
1023 }
1024 
1027 {
1028  CRef<CSeq_align> sa(new CSeq_align);
1029  if ( !aln.GetSegs().IsDendiag()) {
1030  NCBI_THROW(CException, eUnknown, "Input Seq-align should be Dendiag!");
1031  }
1032 
1033  if(aln.IsSetType()){
1034  sa->SetType(aln.GetType());
1035  }
1036  if(aln.IsSetDim()){
1037  sa->SetDim(aln.GetDim());
1038  }
1039  if(aln.IsSetScore()){
1040  sa->SetScore() = aln.GetScore();
1041  }
1042  if(aln.IsSetBounds()){
1043  sa->SetBounds() = aln.GetBounds();
1044  }
1045 
1046  CDense_seg& ds = sa->SetSegs().SetDenseg();
1047 
1048  int counter = 0;
1049  ds.SetNumseg() = 0;
1051 
1052  if(counter == 0){//assume all dendiag segments have same dim and ids
1053  if((*iter)->IsSetDim()){
1054  ds.SetDim((*iter)->GetDim());
1055  }
1056  if((*iter)->IsSetIds()){
1057  ds.SetIds() = (*iter)->GetIds();
1058  }
1059  }
1060  ds.SetNumseg() ++;
1061  if((*iter)->IsSetStarts()){
1062  ITERATE(CDense_diag::TStarts, iterStarts, (*iter)->GetStarts()){
1063  ds.SetStarts().push_back(*iterStarts);
1064  }
1065  }
1066  if((*iter)->IsSetLen()){
1067  ds.SetLens().push_back((*iter)->GetLen());
1068  }
1069  if((*iter)->IsSetStrands()){
1070  ITERATE(CDense_diag::TStrands, iterStrands, (*iter)->GetStrands()){
1071  ds.SetStrands().push_back(*iterStrands);
1072  }
1073  }
1074  if((*iter)->IsSetScores()){
1075  ITERATE(CDense_diag::TScores, iterScores, (*iter)->GetScores()){
1076  ds.SetScores().push_back(*iterScores); //this might not have
1077  //right meaning
1078  }
1079  }
1080  counter ++;
1081  }
1082 
1083  return sa;
1084 }
1085 
1087 {
1088  TTaxId taxid = ZERO_TAX_ID;
1089  try{
1090  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
1091  const CRef<CBlast_def_line_set> bdlRef =
1093  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
1094  ITERATE(list<CRef<CBlast_def_line> >, iter_bdl, bdl) {
1095  CConstRef<CSeq_id> bdl_id =
1096  GetSeq_idByType((*iter_bdl)->GetSeqid(), id.Which());
1097  if(bdl_id && bdl_id->Match(id) &&
1098  (*iter_bdl)->IsSetTaxid() && (*iter_bdl)->CanGetTaxid()){
1099  taxid = (*iter_bdl)->GetTaxid();
1100  break;
1101  }
1102  }
1103  } catch (CException&) {
1104 
1105  }
1106  return taxid;
1107 }
1108 
1110  const CBioseq_Handle& handle)
1111 {
1112  int frame = 0;
1113  if (strand == eNa_strand_plus) {
1114  frame = (start % 3) + 1;
1115  } else if (strand == eNa_strand_minus) {
1116  frame = -(((int)handle.GetBioseqLength() - start - 1)
1117  % 3 + 1);
1118 
1119  }
1120  return frame;
1121 }
1122 
1123 
1126  seqalign_hit_list,
1127  bool do_translation
1128  )
1129 {
1130 
1131  kTranslation = do_translation;
1132  seqalign_hit_list.sort(SortHitByPercentIdentityDescendingEx);
1133 }
1134 
1135 
1138  const CRef<CSeq_align>& info2)
1139 {
1140 
1141  int score1, sum_n1, num_ident1;
1142  double bits1, evalue1;
1143  list<TGi> use_this_gi1;
1144 
1145  int score2, sum_n2, num_ident2;
1146  double bits2, evalue2;
1147  list<TGi> use_this_gi2;
1148 
1149 
1150  GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1151  GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1152 
1153  int length1 = GetAlignmentLength(*info1, kTranslation);
1154  int length2 = GetAlignmentLength(*info2, kTranslation);
1155  bool retval = false;
1156 
1157 
1158  if(length1 > 0 && length2 > 0 && num_ident1 > 0 &&num_ident2 > 0 ) {
1159  if (((double)num_ident1)/length1 == ((double)num_ident2)/length2) {
1160 
1161  retval = evalue1 < evalue2;
1162 
1163  } else {
1164  retval = ((double)num_ident1)/length1 >= ((double)num_ident2)/length2;
1165 
1166  }
1167  } else {
1168  retval = evalue1 < evalue2;
1169  }
1170  return retval;
1171 }
1172 
1175  const CRef<CSeq_align_set>& info2)
1176 {
1177  CRef<CSeq_align_set> i1(info1), i2(info2);
1178 
1179  i1->Set().sort(SortHspByScoreDescending);
1180  i2->Set().sort(SortHspByScoreDescending);
1181 
1182 
1183  int score1, sum_n1, num_ident1;
1184  double bits1, evalue1;
1185  list<TGi> use_this_gi1;
1186 
1187  int score2, sum_n2, num_ident2;
1188  double bits2, evalue2;
1189  list<TGi> use_this_gi2;
1190 
1191  GetAlnScores(*(info1->Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1192  GetAlnScores(*(info2->Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1193  return bits1 > bits2;
1194 }
1195 
1198  CRef<CSeq_align_set> const& info2)
1199 {
1200  int cov1 = GetMasterCoverage(*info1);
1201  int cov2 = GetMasterCoverage(*info2);
1202  bool retval = false;
1203 
1204  if (cov1 > cov2) {
1205  retval = cov1 > cov2;
1206  } else if (cov1 == cov2) {
1207  int score1, sum_n1, num_ident1;
1208  double bits1, evalue1;
1209  list<TGi> use_this_gi1;
1210 
1211  int score2, sum_n2, num_ident2;
1212  double bits2, evalue2;
1213  list<TGi> use_this_gi2;
1214  GetAlnScores(*(info1->Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1215  GetAlnScores(*(info2->Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1216  retval = evalue1 < evalue2;
1217  }
1218 
1219  return retval;
1220 }
1221 
1223  CRef<CSeq_align_set>& info2)
1224 {
1225  int start1 = 0, start2 = 0;
1226 
1227 
1228  info1->Set().sort(SortHspByMasterStartAscending);
1229  info2->Set().sort(SortHspByMasterStartAscending);
1230 
1231 
1232  start1 = min(info1->Get().front()->GetSeqStart(0),
1233  info1->Get().front()->GetSeqStop(0));
1234  start2 = min(info2->Get().front()->GetSeqStart(0),
1235  info2->Get().front()->GetSeqStop(0));
1236 
1237  if (start1 == start2) {
1238  //same start then arrange by bits score
1239  int score1, sum_n1, num_ident1;
1240  double bits1, evalue1;
1241  list<TGi> use_this_gi1;
1242 
1243  int score2, sum_n2, num_ident2;
1244  double bits2, evalue2;
1245  list<TGi> use_this_gi2;
1246 
1247 
1248  GetAlnScores(*(info1->Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1249  GetAlnScores(*(info1->Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1250  return evalue1 < evalue2;
1251 
1252  } else {
1253  return start1 < start2;
1254  }
1255 
1256 }
1257 
1260  const CRef<CSeq_align>& info2)
1261 {
1262 
1263  int score1, sum_n1, num_ident1;
1264  double bits1, evalue1;
1265  list<TGi> use_this_gi1;
1266 
1267  int score2, sum_n2, num_ident2;
1268  double bits2, evalue2;
1269  list<TGi> use_this_gi2;
1270 
1271 
1272  GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1273  GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1274  return bits1 > bits2;
1275 
1276 }
1277 
1280  const CRef<CSeq_align>& info2)
1281 {
1282  int start1 = 0, start2 = 0;
1283 
1284  start1 = min(info1->GetSeqStart(0), info1->GetSeqStop(0));
1285  start2 = min(info2->GetSeqStart(0), info2->GetSeqStop(0)) ;
1286 
1287  if (start1 == start2) {
1288  //same start then arrange by bits score
1289  int score1, sum_n1, num_ident1;
1290  double bits1, evalue1;
1291  list<TGi> use_this_gi1;
1292 
1293  int score2, sum_n2, num_ident2;
1294  double bits2, evalue2;
1295  list<TGi> use_this_gi2;
1296 
1297 
1298  GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1299  GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1300  return evalue1 < evalue2;
1301 
1302  } else {
1303 
1304  return start1 < start2;
1305  }
1306 }
1307 
1310  const CRef<CSeq_align>& info2)
1311 {
1312  int start1 = 0, start2 = 0;
1313 
1314  start1 = min(info1->GetSeqStart(1), info1->GetSeqStop(1));
1315  start2 = min(info2->GetSeqStart(1), info2->GetSeqStop(1)) ;
1316 
1317  if (start1 == start2) {
1318  //same start then arrange by bits score
1319  int score1, sum_n1, num_ident1;
1320  double bits1, evalue1;
1321  list<TGi> use_this_gi1;
1322 
1323  int score2, sum_n2, num_ident2;
1324  double bits2, evalue2;
1325  list<TGi> use_this_gi2;
1326 
1327 
1328  GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1329  GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1330  return evalue1 < evalue2;
1331 
1332  } else {
1333 
1334  return start1 < start2;
1335  }
1336 }
1337 
1338 int CAlignFormatUtil::GetAlignmentLength(const CSeq_align& aln, bool do_translation)
1339 {
1340 
1341  CRef<CSeq_align> final_aln;
1342 
1343  // Convert Std-seg and Dense-diag alignments to Dense-seg.
1344  // Std-segs are produced only for translated searches; Dense-diags only for
1345  // ungapped, not translated searches.
1346 
1347  if (aln.GetSegs().IsStd()) {
1348  CRef<CSeq_align> denseg_aln = aln.CreateDensegFromStdseg();
1349  // When both query and subject are translated, i.e. tblastx, convert
1350  // to a special type of Dense-seg.
1351  if (do_translation) {
1352  final_aln = denseg_aln->CreateTranslatedDensegFromNADenseg();
1353  } else {
1354  final_aln = denseg_aln;
1355 
1356  }
1357  } else if (aln.GetSegs().IsDendiag()) {
1358  final_aln = CreateDensegFromDendiag(aln);
1359  }
1360 
1361  const CDense_seg& ds = (final_aln ? final_aln->GetSegs().GetDenseg() :
1362  aln.GetSegs().GetDenseg());
1363 
1364  CAlnMap alnmap(ds);
1365  return alnmap.GetAlnStop() + 1;
1366 }
1367 
1369  CScope& scope,
1370  bool do_translation) {
1371  double identity = 0;
1372  CRef<CSeq_align> final_aln;
1373 
1374  // Convert Std-seg and Dense-diag alignments to Dense-seg.
1375  // Std-segs are produced only for translated searches; Dense-diags only for
1376  // ungapped, not translated searches.
1377 
1378  if (aln.GetSegs().IsStd()) {
1379  CRef<CSeq_align> denseg_aln = aln.CreateDensegFromStdseg();
1380  // When both query and subject are translated, i.e. tblastx, convert
1381  // to a special type of Dense-seg.
1382  if (do_translation) {
1383  final_aln = denseg_aln->CreateTranslatedDensegFromNADenseg();
1384  } else {
1385  final_aln = denseg_aln;
1386 
1387  }
1388  } else if (aln.GetSegs().IsDendiag()) {
1389  final_aln = CreateDensegFromDendiag(aln);
1390  }
1391 
1392  const CDense_seg& ds = (final_aln ? final_aln->GetSegs().GetDenseg() :
1393  aln.GetSegs().GetDenseg());
1394 
1395  CAlnVec alnvec(ds, scope);
1396  string query, subject;
1397 
1398  alnvec.SetAaCoding(CSeq_data::e_Ncbieaa);
1399  alnvec.GetWholeAlnSeqString(0, query);
1400  alnvec.GetWholeAlnSeqString(1, subject);
1401 
1402  int num_ident = 0;
1403  int length = (int)min(query.size(), subject.size());
1404 
1405  for (int i = 0; i < length; ++i) {
1406  if (query[i] == subject[i]) {
1407  ++num_ident;
1408  }
1409  }
1410 
1411  if (length > 0) {
1412  identity = ((double)num_ident)/length;
1413  }
1414 
1415  return identity;
1416 }
1417 
1418 
1420  const CRef<CSeq_align_set>& info2,
1421  double &percentIdent1,
1422  double &percentIdent2)
1423 {
1424 
1425  CRef<CSeq_align_set> i1(info1), i2(info2);
1426  percentIdent1 = -1;
1427  percentIdent2 = -1;
1428 
1431 
1434  return;
1435 }
1436 
1437 
1440  const CRef<CSeq_align_set>& info2)
1441 {
1442 
1443  CRef<CSeq_align_set> i1(info1), i2(info2);
1444 
1445  //i1->Set().sort(SortHspByPercentIdentityDescending);
1446  //i2->Set().sort(SortHspByPercentIdentityDescending);
1447 
1448 
1449  unique_ptr<CAlignFormatUtil::SSeqAlignSetCalcParams> seqSetInfo1( CAlignFormatUtil::GetSeqAlignSetCalcParamsFromASN(*info1));
1450  unique_ptr<CAlignFormatUtil::SSeqAlignSetCalcParams> seqSetInfo2( CAlignFormatUtil::GetSeqAlignSetCalcParamsFromASN(*info2));
1451  double evalue1 = seqSetInfo1->evalue;
1452  double evalue2 = seqSetInfo2->evalue;
1453  double percentIdent1 = seqSetInfo1->percent_identity;
1454  double percentIdent2 = seqSetInfo2->percent_identity;
1455 
1456  bool retval = false;
1457  if(percentIdent1 < 0 || percentIdent2 < 0) {
1458  s_CalcAlnPercentIdent(info1, info2,percentIdent1,percentIdent2);
1459  }
1460  if(percentIdent1 > 0 &&percentIdent2 > 0) {
1461  if (percentIdent1 == percentIdent2) {
1462  retval = evalue1 < evalue2;
1463 
1464  } else {
1465  retval = percentIdent1 >= percentIdent2;
1466  }
1467  } else {
1468  retval = evalue1 < evalue2;
1469  }
1470  return retval;
1471 }
1472 
1474  CRef<CSeq_align_set> const& info2)
1475 {
1476  int score1, score2, sum_n, num_ident;
1477  double bits, evalue;
1478  list<TGi> use_this_gi;
1479  double total_bits1 = 0, total_bits2 = 0;
1480 
1481  ITERATE(CSeq_align_set::Tdata, iter, info1->Get()) {
1482  CAlignFormatUtil::GetAlnScores(**iter, score1, bits, evalue,
1483  sum_n, num_ident, use_this_gi);
1484  total_bits1 += bits;
1485  }
1486 
1487  ITERATE(CSeq_align_set::Tdata, iter, info2->Get()) {
1488  CAlignFormatUtil::GetAlnScores(**iter, score2, bits, evalue,
1489  sum_n, num_ident, use_this_gi);
1490  total_bits2 += bits;
1491  }
1492 
1493 
1494  return total_bits1 >= total_bits2;
1495 
1496 }
1497 
1498 #ifndef NCBI_COMPILER_WORKSHOP
1499 /** Class to sort by linkout bit
1500  * @note this code doesn't compile under the Solaris' WorkShop, and because
1501  * this feature is only used inside NCBI (LinkoutDB), we disable this code.
1502  */
1504 {
1505 public:
1507  const string& mv_build_name)
1508  : m_LinkoutDB(linkoutdb), m_MapViewerBuildName(mv_build_name) {}
1509 
1510  bool operator() (const CRef<CSeq_align_set>& info1, const CRef<CSeq_align_set>& info2)
1511  {
1512  CConstRef<CSeq_id> id1, id2;
1513  id1 = &(info1->Get().front()->GetSeq_id(1));
1514  id2 = &(info2->Get().front()->GetSeq_id(1));
1515 
1516  int linkout1 = 0, linkout2 = 0;
1517  linkout1 = m_LinkoutDB
1519  : 0;
1520  linkout2 = m_LinkoutDB
1522  : 0;
1523 
1524  return (linkout1 & eGenomicSeq) <= (linkout2 & eGenomicSeq);
1525  }
1526 private:
1529 };
1530 #endif /* NCBI_COMPILER_WORKSHOP */
1531 
1533 SortHitByMolecularType(list< CRef<CSeq_align_set> >& seqalign_hit_list,
1534  CScope& scope, ILinkoutDB* linkoutdb,
1535  const string& mv_build_name)
1536 {
1537 
1538  kScope = &scope;
1539 #ifndef NCBI_COMPILER_WORKSHOP
1540  seqalign_hit_list.sort(CSortHitByMolecularTypeEx(linkoutdb, mv_build_name));
1541 #endif /* NCBI_COMPILER_WORKSHOP */
1542 }
1543 
1544 void CAlignFormatUtil::SortHit(list< CRef<CSeq_align_set> >& seqalign_hit_list,
1545  bool do_translation, CScope& scope, int
1546  sort_method, ILinkoutDB* linkoutdb,
1547  const string& mv_build_name)
1548 {
1549  kScope = &scope;
1550  kTranslation = do_translation;
1551 
1552  if (sort_method == 1) {
1553 #ifndef NCBI_COMPILER_WORKSHOP
1554  seqalign_hit_list.sort(CSortHitByMolecularTypeEx(linkoutdb,
1555  mv_build_name));
1556 #endif /* NCBI_COMPILER_WORKSHOP */
1557  } else if (sort_method == 2) {
1558  seqalign_hit_list.sort(SortHitByTotalScoreDescending);
1559  } else if (sort_method == 3) {
1560  seqalign_hit_list.sort(SortHitByPercentIdentityDescendingEx);
1561  }
1562 }
1563 
1566  target,
1567  int sort_method,
1568  const CSeq_align_set& source,
1569  CScope& scope,
1570  ILinkoutDB* linkoutdb,
1571  const string& mv_build_name)
1572 {
1573  CConstRef<CSeq_id> prevSubjectId;
1574  int count = 0;
1575  int linkoutPrev = 0;
1576  ITERATE(CSeq_align_set::Tdata, iter, source.Get()) {
1577 
1578  const CSeq_id& id = (*iter)->GetSeq_id(1);
1579  try {
1580  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
1581  if (handle) {
1582  int linkout;
1583  if(prevSubjectId.Empty() || !id.Match(*prevSubjectId)){
1584  prevSubjectId = &id;
1585  linkout = linkoutdb ? linkoutdb->GetLinkout(id, mv_build_name): 0;
1586  linkoutPrev = linkout;
1587  count++;
1588  }
1589  else {
1590  linkout = linkoutPrev;
1591  }
1592  if (linkout & eGenomicSeq) {
1593  if (sort_method == 1) {
1594  target[1]->Set().push_back(*iter);
1595  } else if (sort_method == 2){
1596  target[0]->Set().push_back(*iter);
1597  } else {
1598  target[1]->Set().push_back(*iter);
1599  }
1600  } else {
1601  if (sort_method == 1) {
1602  target[0]->Set().push_back(*iter);
1603  } else if (sort_method == 2) {
1604  target[1]->Set().push_back(*iter);
1605  } else {
1606  target[0]->Set().push_back(*iter);
1607  }
1608  }
1609  } else {
1610  target[0]->Set().push_back(*iter);
1611  }
1612 
1613  } catch (const CException&){
1614  target[0]->Set().push_back(*iter); //no bioseq found, leave untouched
1615  }
1616  }
1617 }
1618 
1620  const CSeq_align_set& source)
1621 {
1622  CConstRef<CSeq_id> previous_id;
1623  CRef<CSeq_align_set> temp;
1624 
1625  ITERATE(CSeq_align_set::Tdata, iter, source.Get()) {
1626  const CSeq_id& cur_id = (*iter)->GetSeq_id(1);
1627  if(previous_id.Empty()) {
1628  temp = new CSeq_align_set;
1629  temp->Set().push_back(*iter);
1630  target.push_back(temp);
1631  } else if (cur_id.Match(*previous_id)){
1632  temp->Set().push_back(*iter);
1633 
1634  } else {
1635  temp = new CSeq_align_set;
1636  temp->Set().push_back(*iter);
1637  target.push_back(temp);
1638  }
1639  previous_id = &cur_id;
1640  }
1641 
1642 }
1643 
1646 {
1647  CRef<CSeq_align_set> align_set (new CSeq_align_set);
1648  CConstRef<CSeq_id> previous_id;
1649  CRef<CSeq_align_set> temp;
1650  // list<CRef<CSeq_align_set> >::iterator iter;
1651 
1652  for (list<CRef<CSeq_align_set> >::iterator iter = source.begin(); iter != source.end(); iter ++) {
1653  ITERATE(CSeq_align_set::Tdata, iter2, (*iter)->Get()) {
1654  align_set->Set().push_back(*iter2);
1655  }
1656  }
1657  return align_set;
1658 }
1659 
1661  const CSeq_align_set& source)
1662 {
1663  CConstRef<CSeq_id> previous_id;
1664  CRef<CSeq_align_set> temp;
1665 
1667 
1668  for(size_t i = 0; i < seqIdList.size();i++) {
1669  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
1670  hitsMap.insert(map<string, CRef<CSeq_align_set> >::value_type(seqIdList[i],new_aln));
1671  }
1672  size_t count = 0;
1673  ITERATE(CSeq_align_set::Tdata, iter, source.Get()) {
1674  const CSeq_id& cur_id = (*iter)->GetSeq_id(1);
1675  if(previous_id.Empty() || !cur_id.Match(*previous_id)) {
1676  if(count >= seqIdList.size()) {
1677  break;
1678  }
1679  string idString = NStr::TruncateSpaces(cur_id.AsFastaString());
1680  if(hitsMap.find(idString) != hitsMap.end()) {
1681  temp = new CSeq_align_set;
1682  temp->Set().push_back(*iter);
1683  hitsMap[idString] = temp;
1684  count++;
1685  }
1686  else {
1687  temp.Reset();
1688  }
1689  }
1690  else if (cur_id.Match(*previous_id)){
1691  if(!temp.Empty()) {
1692  temp->Set().push_back(*iter);
1693  }
1694  }
1695  previous_id = &cur_id;
1696  }
1697  return hitsMap;
1698 }
1699 
1701 {
1702  vector <string> seqIds;
1703  NStr::Split(alignSeqList,",",seqIds);
1704 
1705  //SEQ_ALN_SET from ALIGNDB contains seq_aligns in random order
1706  //The followimg will create a map that contains seq-aln_set per gi from ALIGN_SEQ_LIST
1708 
1709  map < string, CRef<CSeq_align_set> >::iterator it;
1710  list< CRef<CSeq_align_set> > orderedSet;
1711  //orderedSet wil have seq aligns in th order of gi list
1712  for(size_t i = 0; i < seqIds.size(); i++) {
1713  if(hitsMap.find(seqIds[i]) != hitsMap.end()) {
1714  orderedSet.push_back(hitsMap[seqIds[i]]);
1715  }
1716  }
1717  //This should contain seq align set in the order of gis in the list
1718  all_aln_set = CAlignFormatUtil::HitListToHspList(orderedSet);
1719 }
1720 
1721 static bool s_GetSRASeqMetadata(const CBioseq::TId& ids,string &strRun, string &strSpotId,string &strReadIndex)
1722 {
1723  bool success = false;
1724  string link = NcbiEmptyString;
1726 
1727  if (!seqId.Empty())
1728  {
1729  // Get the SRA tag from seqId
1730  if (seqId->GetGeneral().CanGetDb() &&
1731  seqId->GetGeneral().CanGetTag() &&
1732  seqId->GetGeneral().GetTag().IsStr())
1733  {
1734  // Decode the tag to collect the SRA-specific indices
1735  string strTag = seqId->GetGeneral().GetTag().GetStr();
1736  if (!strTag.empty())
1737  {
1738  vector<string> vecInfo;
1739  try
1740  {
1741  NStr::Split(strTag, ".", vecInfo);
1742  }
1743  catch (...)
1744  {
1745  return false;
1746  }
1747 
1748  if (vecInfo.size() != 3)
1749  {
1750  return false;
1751  }
1752 
1753  strRun = vecInfo[0];
1754  strSpotId = vecInfo[1];
1755  strReadIndex = vecInfo[2];
1756  success = true;
1757  }
1758  }
1759  }
1760  return success;
1761 }
1762 
1763 string CAlignFormatUtil::BuildSRAUrl(const CBioseq::TId& ids, string user_url)
1764 {
1765  string strRun, strSpotId,strReadIndex;
1766  string link = NcbiEmptyString;
1767 
1768  if(s_GetSRASeqMetadata(ids,strRun,strSpotId,strReadIndex))
1769  {
1770  // Generate the SRA link to the identified spot
1771  link += user_url;
1772  link += "?run=" + strRun;
1773  link += "." + strSpotId;
1774  link += "." + strReadIndex;
1775  }
1776  return link;
1777 }
1778 
1780 {
1781  string gnl;
1782 
1785  const CRef<CSeq_id> id_accession = FindBestChoice(ids, CSeq_id::WorstRank);
1786 
1787  if(!id_general.Empty() && id_general->AsFastaString().find("gnl|BL_ORD_ID") != string::npos){
1788  return gnl;
1789  }
1790 
1791  const CSeq_id* bestid = NULL;
1792  if (id_general.Empty()){
1793  bestid = id_other;
1794  if (id_other.Empty()){
1795  bestid = id_accession;
1796  }
1797  } else {
1798  bestid = id_general;
1799  }
1800 
1801  if (bestid && bestid->Which() != CSeq_id::e_Gi){
1802  gnl = NStr::URLEncode(bestid->AsFastaString());
1803  }
1804  return gnl;
1805 }
1806 
1808  string user_url, string database,
1809  bool db_is_na, string rid, int query_number,
1810  bool for_alignment) {
1811 
1812  string link = NcbiEmptyString;
1814 
1815  if(!id_general.Empty()
1816  && id_general->AsFastaString().find("gnl|BL_ORD_ID") != string::npos){
1817  /* We do need to make security protected link to BLAST gnl */
1818  return NcbiEmptyString;
1819  }
1820  TGi gi = FindGi(ids);
1821  string bestID = s_GetBestIDForURL((CBioseq::TId &)ids);
1822 
1823 
1824  bool nodb_path = false;
1825  /* dumpgnl.cgi need to use path */
1826  if (user_url.find("dumpgnl.cgi") ==string::npos){
1827  nodb_path = true;
1828  }
1829  int length = (int)database.size();
1830  string str;
1831  char *chptr, *dbtmp;
1832  char tmpbuff[256];
1833  char* dbname = new char[sizeof(char)*length + 2];
1834  strcpy(dbname, database.c_str());
1835  if(nodb_path) {
1836  int i, j;
1837  dbtmp = new char[sizeof(char)*length + 2]; /* aditional space and NULL */
1838  memset(dbtmp, '\0', sizeof(char)*length + 2);
1839  for(i = 0; i < length; i++) {
1840  if(i > 0) {
1841  strcat(dbtmp, " "); //space between db
1842  }
1843  if(isspace((unsigned char) dbname[i]) || dbname[i] == ',') {/* Rolling spaces */
1844  continue;
1845  }
1846  j = 0;
1847  while (!isspace((unsigned char) dbname[i]) && j < 256 && i < length) {
1848  tmpbuff[j] = dbname[i];
1849  j++; i++;
1850  if(dbname[i] == ',') { /* Comma is valid delimiter */
1851  break;
1852  }
1853  }
1854  tmpbuff[j] = '\0';
1855  if((chptr = strrchr(tmpbuff, '/')) != NULL) {
1856  strcat(dbtmp, (char*)(chptr+1));
1857  } else {
1858  strcat(dbtmp, tmpbuff);
1859  }
1860 
1861  }
1862  } else {
1863  dbtmp = dbname;
1864  }
1865 
1866  char gnl[256];
1867  if (!bestID.empty()){
1868  strcpy(gnl, bestID.c_str());
1869 
1870  } else {
1871  gnl[0] = '\0';
1872  }
1873 
1874  str = NStr::URLEncode(dbtmp == NULL ? (char*) "nr" : dbtmp);
1875 
1876  if (user_url.find("?") == string::npos){
1877  link += user_url + "?" + "db=" + str + "&na=" + (db_is_na? "1" : "0");
1878  } else {
1879  if (user_url.find("=") != string::npos) {
1880  user_url += "&";
1881  }
1882  link += user_url + "db=" + str + "&na=" + (db_is_na? "1" : "0");
1883  }
1884 
1885  if (gnl[0] != '\0'){
1886  str = gnl;
1887  link += "&gnl=";
1888  link += str;
1889  }
1890  if (gi > ZERO_GI){
1891  link += "&gi=" + NStr::NumericToString(gi);
1892  link += "&term=" + NStr::NumericToString(gi) + NStr::URLEncode("[gi]");
1893  }
1894  if(taxid > ZERO_TAX_ID){
1895  link += "&taxid=" + NStr::NumericToString(taxid);
1896  }
1897  if (rid != NcbiEmptyString){
1898  link += "&RID=" + rid;
1899  }
1900 
1901  if (query_number > 0){
1902  link += "&QUERY_NUMBER=" + NStr::IntToString(query_number);
1903  }
1904 
1905  if (user_url.find("dumpgnl.cgi") ==string::npos){
1906  if (for_alignment)
1907  link += "&log$=nuclalign";
1908  else
1909  link += "&log$=nucltop";
1910  }
1911 
1912  if(nodb_path){
1913  delete [] dbtmp;
1914  }
1915  delete [] dbname;
1916  return link;
1917 }
1920  map< string, string>& parameters_to_change,
1921  string& cgi_query)
1922 {
1923 
1924  //add parameters to exclude
1925  parameters_to_change.insert(map<string, string>::
1926  value_type("service", ""));
1927  parameters_to_change.insert(map<string, string>::
1928  value_type("address", ""));
1929  parameters_to_change.insert(map<string, string>::
1930  value_type("platform", ""));
1931  parameters_to_change.insert(map<string, string>::
1932  value_type("_pgr", ""));
1933  parameters_to_change.insert(map<string, string>::
1934  value_type("client", ""));
1935  parameters_to_change.insert(map<string, string>::
1936  value_type("composition_based_statistics", ""));
1937 
1938  parameters_to_change.insert(map<string, string>::
1939  value_type("auto_format", ""));
1940  cgi_query = NcbiEmptyString;
1941  TCgiEntries& cgi_entry = ctx.GetRequest().GetEntries();
1942  bool is_first = true;
1943 
1944  for(TCgiEntriesI it=cgi_entry.begin(); it!=cgi_entry.end(); ++it) {
1945  string parameter = it->first;
1946  if (parameter != NcbiEmptyString) {
1947  if (parameters_to_change.count(NStr::ToLower(parameter)) > 0 ||
1948  parameters_to_change.count(NStr::ToUpper(parameter)) > 0) {
1949  if(parameters_to_change[NStr::ToLower(parameter)] !=
1950  NcbiEmptyString &&
1951  parameters_to_change[NStr::ToUpper(parameter)] !=
1952  NcbiEmptyString) {
1953  if (!is_first) {
1954  cgi_query += "&";
1955  }
1956  cgi_query +=
1957  it->first + "=" + parameters_to_change[it->first];
1958  is_first = false;
1959  }
1960  } else {
1961  if (!is_first) {
1962  cgi_query += "&";
1963  }
1964  cgi_query += it->first + "=" + it->second;
1965  is_first = false;
1966  }
1967 
1968  }
1969  }
1970 }
1971 
1973 
1974  string format_type = ctx.GetRequestValue("FORMAT_TYPE").GetValue();
1975  string ridstr = ctx.GetRequestValue("RID").GetValue();
1976  string align_view = ctx.GetRequestValue("ALIGNMENT_VIEW").GetValue();
1977 
1978  cgi_query += "RID=" + ridstr;
1979  cgi_query += "&FORMAT_TYPE=" + format_type;
1980  cgi_query += "&ALIGNMENT_VIEW=" + align_view;
1981 
1982  cgi_query += "&QUERY_NUMBER=" + ctx.GetRequestValue("QUERY_NUMBER").GetValue();
1983  cgi_query += "&FORMAT_OBJECT=" + ctx.GetRequestValue("FORMAT_OBJECT").GetValue();
1984  cgi_query += "&RUN_PSIBLAST=" + ctx.GetRequestValue("RUN_PSIBLAST").GetValue();
1985  cgi_query += "&I_THRESH=" + ctx.GetRequestValue("I_THRESH").GetValue();
1986 
1987  cgi_query += "&DESCRIPTIONS=" + ctx.GetRequestValue("DESCRIPTIONS").GetValue();
1988 
1989  cgi_query += "&ALIGNMENTS=" + ctx.GetRequestValue("ALIGNMENTS").GetValue();
1990 
1991  cgi_query += "&NUM_OVERVIEW=" + ctx.GetRequestValue("NUM_OVERVIEW").GetValue();
1992 
1993  cgi_query += "&NCBI_GI=" + ctx.GetRequestValue("NCBI_GI").GetValue();
1994 
1995  cgi_query += "&SHOW_OVERVIEW=" + ctx.GetRequestValue("SHOW_OVERVIEW").GetValue();
1996 
1997  cgi_query += "&SHOW_LINKOUT=" + ctx.GetRequestValue("SHOW_LINKOUT").GetValue();
1998 
1999  cgi_query += "&GET_SEQUENCE=" + ctx.GetRequestValue("GET_SEQUENCE").GetValue();
2000 
2001  cgi_query += "&MASK_CHAR=" + ctx.GetRequestValue("MASK_CHAR").GetValue();
2002  cgi_query += "&MASK_COLOR=" + ctx.GetRequestValue("MASK_COLOR").GetValue();
2003 
2004  cgi_query += "&SHOW_CDS_FEATURE=" + ctx.GetRequestValue("SHOW_CDS_FEATURE").GetValue();
2005 
2006  if (ctx.GetRequestValue("FORMAT_EQ_TEXT").GetValue() != NcbiEmptyString) {
2007  cgi_query += "&FORMAT_EQ_TEXT=" +
2009  GetRequestValue("FORMAT_EQ_TEXT").
2010  GetValue()));
2011  }
2012 
2013  if (ctx.GetRequestValue("FORMAT_EQ_OP").GetValue() != NcbiEmptyString) {
2014  cgi_query += "&FORMAT_EQ_OP=" +
2016  GetRequestValue("FORMAT_EQ_OP").
2017  GetValue()));
2018  }
2019 
2020  if (ctx.GetRequestValue("FORMAT_EQ_MENU").GetValue() != NcbiEmptyString) {
2021  cgi_query += "&FORMAT_EQ_MENU=" +
2023  GetRequestValue("FORMAT_EQ_MENU").
2024  GetValue()));
2025  }
2026 
2027  cgi_query += "&EXPECT_LOW=" + ctx.GetRequestValue("EXPECT_LOW").GetValue();
2028  cgi_query += "&EXPECT_HIGH=" + ctx.GetRequestValue("EXPECT_HIGH").GetValue();
2029 
2030  cgi_query += "&BL2SEQ_LINK=" + ctx.GetRequestValue("BL2SEQ_LINK").GetValue();
2031 
2032 }
2033 
2034 
2036  CScope& scope, ILinkoutDB* linkoutdb,
2037  const string& mv_build_name)
2038 {
2039  bool is_mixed = false;
2040  bool is_first = true;
2041  int prev_database = 0;
2042 
2043  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()) {
2044 
2045  const CSeq_id& id = (*iter)->GetSeq_id(1);
2046  int linkout = linkoutdb
2047  ? linkoutdb->GetLinkout(id, mv_build_name)
2048  : 0;
2049  int cur_database = (linkout & eGenomicSeq);
2050  if (!is_first && cur_database != prev_database) {
2051  is_mixed = true;
2052  break;
2053  }
2054  prev_database = cur_database;
2055  is_first = false;
2056  }
2057 
2058  return is_mixed;
2059 
2060 }
2061 
2062 
2064 {
2065  bool formatAsMixedDbs = false;
2066  string mixedDbs = ctx.GetRequestValue("MIXED_DATABASE").GetValue();
2067  if(!mixedDbs.empty()) {
2068  mixedDbs = NStr::ToLower(mixedDbs);
2069  formatAsMixedDbs = (mixedDbs == "on" || mixedDbs == "true" || mixedDbs == "yes") ? true : false;
2070  }
2071  return formatAsMixedDbs;
2072 }
2073 
2074 static string s_MapLinkoutGenParam(string &url_link_tmpl,
2075  const string& rid,
2076  string giList,
2077  bool for_alignment,
2078  int cur_align,
2079  string &label,
2080  string &lnk_displ,
2081  string lnk_tl_info = "",
2082  string lnk_title = "")
2083 {
2084  const string kLinkTitle=" title=\"View <@lnk_tl_info@> for <@label@>\" ";
2085  const string kLinkTarget="target=\"lnk" + rid + "\"";
2086  string lnkTitle = (lnk_title.empty()) ? kLinkTitle : lnk_title;
2087  string url_link = CAlignFormatUtil::MapTemplate(url_link_tmpl,"gi",giList);
2088  url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",rid);
2089  url_link = CAlignFormatUtil::MapTemplate(url_link,"log",for_alignment? "align" : "top");
2090  url_link = CAlignFormatUtil::MapTemplate(url_link,"blast_rank",NStr::IntToString(cur_align));
2091  lnkTitle = NStr::StartsWith(lnk_displ,"<img") ? "" : lnkTitle;
2092  string lnkTarget = NStr::StartsWith(lnk_displ,"<img") ? "" : kLinkTarget;
2093  url_link = CAlignFormatUtil::MapTemplate(url_link,"lnkTitle",lnkTitle);
2094  url_link = CAlignFormatUtil::MapTemplate(url_link,"lnkTarget",lnkTarget);
2095  url_link = CAlignFormatUtil::MapTemplate(url_link,"lnk_displ",lnk_displ);
2096  url_link = CAlignFormatUtil::MapTemplate(url_link,"lnk_tl_info",lnk_tl_info);
2097  url_link = CAlignFormatUtil::MapTemplate(url_link,"label",label);
2098  url_link = CAlignFormatUtil::MapProtocol(url_link);
2099  return url_link;
2100 }
2101 
2102 
2103 static list<string> s_GetLinkoutUrl(int linkout,
2104  string giList,
2105  string labelList,
2106  TGi first_gi,
2107  CAlignFormatUtil::SLinkoutInfo &linkoutInfo,
2108  bool textLink = true)
2109 
2110 {
2111  list<string> linkout_list;
2112  string url_link,lnk_displ,lnk_title,lnkTitleInfo;
2113 
2114  vector<string> accs;
2115  NStr::Split(labelList,",",accs);
2116  string firstAcc = (accs.size() > 0)? accs[0] : labelList;
2117 
2118  if (linkout & eUnigene) {
2119  url_link = CAlignFormatUtil::GetURLFromRegistry("UNIGEN");
2120  lnk_displ = textLink ? "UniGene" : kUnigeneImg;
2121 
2122  string termParam = NStr::Find(labelList,",") == NPOS ? kGeneTerm : ""; //kGeneTerm if only one seqid
2123  url_link = CAlignFormatUtil::MapTemplate(url_link,"termParam",termParam);
2124 
2125  lnkTitleInfo = "UniGene cluster";
2126  string uid = !linkoutInfo.is_na ? "[Protein Accession]" : "[Nucleotide Accession]";
2127  url_link = CAlignFormatUtil::MapTemplate(url_link,"uid",uid);
2128  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2129 
2130  if(textLink) {
2131  url_link = CAlignFormatUtil::MapTemplate(kUnigeneDispl,"lnk",url_link);
2132  }
2133  url_link = CAlignFormatUtil::MapProtocol(url_link);
2134  linkout_list.push_back(url_link);
2135  }
2136  if (linkout & eStructure){
2137  CSeq_id seqID(firstAcc);
2138  string struct_link = CAlignFormatUtil::GetURLFromRegistry(
2139  "STRUCTURE_URL");
2140 
2141  url_link = struct_link.empty() ? kStructureUrl : struct_link;
2142  string linkTitle;
2143  if(seqID.Which() == CSeq_id::e_Pdb) {
2144  lnk_displ = textLink ? "Structure" : kStructureImg;
2145  linkTitle = " title=\"View 3D structure <@label@>\"";
2146  }
2147  else {
2148  url_link = kStructureAlphaFoldUrl;
2149  lnk_displ = textLink ? "AlphaFold Structure" : kStructureImg;
2150  linkTitle = " title=\"View AlphaFold 3D structure <@label@>\"";
2151  }
2152 
2153 
2154 
2155  string molID,chainID;
2156  NStr::SplitInTwo(firstAcc,"_",molID,chainID);
2157  url_link = CAlignFormatUtil::MapTemplate(url_link,"molid",molID);
2158  url_link = CAlignFormatUtil::MapTemplate(url_link,"queryID",linkoutInfo.queryID);
2159  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,firstAcc,lnk_displ,"",linkTitle);
2160  if(textLink) {
2161  url_link = CAlignFormatUtil::MapTemplate(kStructureDispl,"lnk",url_link);
2162  }
2163  url_link = CAlignFormatUtil::MapProtocol(url_link);
2164  linkout_list.push_back(url_link);
2165  }
2166  if (linkout & eGeo){
2167  url_link = CAlignFormatUtil::GetURLFromRegistry("GEO");
2168  lnk_displ = textLink ? "GEO Profiles" : kGeoImg;
2169 
2170  lnkTitleInfo = "Expression profiles";
2171  //gilist contains comma separated gis
2172  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2173 
2174 
2175  if(textLink) {
2176  url_link = CAlignFormatUtil::MapTemplate(kGeoDispl,"lnk",url_link);
2177  }
2178  url_link = CAlignFormatUtil::MapProtocol(url_link);
2179  linkout_list.push_back(url_link);
2180  }
2181  if(linkout & eGene){
2182  url_link = CAlignFormatUtil::GetURLFromRegistry("GENE");
2183  if(textLink) {
2184  string geneSym = CAlignFormatUtil::GetGeneInfo(first_gi);
2185  lnk_displ = "Gene";
2186  lnkTitleInfo = "gene " + geneSym;
2187  }
2188  else {
2189  lnk_displ = kGeneImg;
2190  }
2191  string termParam = NStr::Find(labelList,",") == NPOS ? kGeneTerm : ""; //kGeneTerm if only one seqid
2192  url_link = CAlignFormatUtil::MapTemplate(url_link,"termParam",termParam);
2193 
2194  string uid = !linkoutInfo.is_na ? "[Protein Accession]" : "[Nucleotide Accession]";
2195  url_link = CAlignFormatUtil::MapTemplate(url_link,"uid",uid);
2196 
2197  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2198 
2199  if(textLink) {
2200  url_link = CAlignFormatUtil::MapTemplate(kGeneDispl,"lnk",url_link);
2201  }
2202  url_link = CAlignFormatUtil::MapProtocol(url_link);
2203  linkout_list.push_back(url_link);
2204  }
2205 
2206  if((linkout & eGenomicSeq) && first_gi != ZERO_GI){ //only for advanced view -> textlink = true
2207  if(textLink) {
2208  url_link = kMapviewBlastHitParams;
2209  lnk_displ = "Map Viewer";
2210 
2211  lnkTitleInfo = "BLAST hits on the " + linkoutInfo.taxName + " genome";
2212 
2213  url_link = CAlignFormatUtil::MapTemplate(url_link,"gnl",NStr::URLEncode(linkoutInfo.gnl));
2214  url_link = CAlignFormatUtil::MapTemplate(url_link,"db",linkoutInfo.database);
2215  url_link = CAlignFormatUtil::MapTemplate(url_link,"is_na",linkoutInfo.is_na? "1" : "0");
2216  string user_url = (linkoutInfo.user_url.empty()) ? kMapviewBlastHitUrl : linkoutInfo.user_url;
2217  url_link = CAlignFormatUtil::MapTemplate(url_link,"user_url",user_url);
2218 
2219  string taxIDStr = (linkoutInfo.taxid > ZERO_TAX_ID) ? NStr::NumericToString(linkoutInfo.taxid) : "";
2220  url_link = CAlignFormatUtil::MapTemplate(url_link,"taxid",taxIDStr);
2221 
2222  string queryNumStr = (linkoutInfo.query_number > 0) ? NStr::IntToString(linkoutInfo.query_number) : "";
2223  url_link = CAlignFormatUtil::MapTemplate(url_link,"query_number",queryNumStr); //gi,term
2224 
2225  string giStr = (first_gi > ZERO_GI)? NStr::NumericToString(first_gi) : "";
2226  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giStr,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2227 
2228  if(textLink) {
2229  url_link = CAlignFormatUtil::MapTemplate(kMapviwerDispl,"lnk",url_link);
2230  }
2231  url_link = CAlignFormatUtil::MapProtocol(url_link);
2232  linkout_list.push_back(url_link);
2233  }
2234  }
2235  else if((linkout & eMapviewer) && first_gi != ZERO_GI){
2236  url_link = kMapviwerUrl;
2237  lnk_displ = textLink ? "Map Viewer" : kMapviwerImg;
2238 
2239  string linkTitle = " title=\"View <@label@> aligned to the " + linkoutInfo.taxName + " genome\"";
2240  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,"",linkTitle);
2241 
2242  if(textLink) {
2243  url_link = CAlignFormatUtil::MapTemplate(kMapviwerDispl,"lnk",url_link);
2244  }
2245  url_link = CAlignFormatUtil::MapProtocol(url_link);
2246  linkout_list.push_back(url_link);
2247  }
2248  //View Bioassays involving <accession
2249  if(linkout & eBioAssay && linkoutInfo.is_na && first_gi != ZERO_GI){
2250  url_link = CAlignFormatUtil::GetURLFromRegistry("BIOASSAY_NUC");
2251  lnk_displ = textLink ? "PubChem BioAssay" : kBioAssayNucImg;
2252 
2253  string linkTitle = " title=\"View Bioassays involving <@label@>\"";
2254  //gilist contains comma separated gis, change it to the following
2255  giList = NStr::Replace(giList,",","[RNATargetGI] OR ");
2256  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,"",linkTitle);
2257 
2258  if(textLink) {
2259  url_link = CAlignFormatUtil::MapTemplate(kBioAssayDispl,"lnk",url_link);
2260  }
2261  url_link = CAlignFormatUtil::MapProtocol(url_link);
2262  linkout_list.push_back(url_link);
2263  }
2264  else if (linkout & eBioAssay && !linkoutInfo.is_na && first_gi != ZERO_GI) {
2265  url_link = CAlignFormatUtil::GetURLFromRegistry("BIOASSAY_PROT");
2266  lnk_displ = textLink ? "PubChem BioAssay" : kBioAssayProtImg;
2267 
2268  lnkTitleInfo ="Bioassay data";
2269  string linkTitle = " title=\"View Bioassays involving <@label@>\"";
2270  //gilist contains comma separated gis, change it to the following
2271  giList = NStr::Replace(giList,",","[PigGI] OR ");
2272  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,"",linkTitle);
2273 
2274  if(textLink) {
2275  url_link = CAlignFormatUtil::MapTemplate(kBioAssayDispl,"lnk",url_link);
2276  }
2277  url_link = CAlignFormatUtil::MapProtocol(url_link);
2278  linkout_list.push_back(url_link);
2279  }
2280  if(linkout & eReprMicrobialGenomes){
2281  url_link = CAlignFormatUtil::GetURLFromRegistry("REPR_MICROBIAL_GENOMES");
2282  lnk_displ = textLink ? "Genome" : kReprMicrobialGenomesImg;
2283 
2284  lnkTitleInfo = "genomic information";
2285  //gilist contains comma separated gis
2286  string uid = !linkoutInfo.is_na ? "Protein Accession" : "Nucleotide Accession";
2287  url_link = CAlignFormatUtil::MapTemplate(url_link,"uid",uid);
2288  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2289 
2290  if(textLink) {
2291  url_link = CAlignFormatUtil::MapTemplate(kReprMicrobialGenomesDispl,"lnk",url_link);
2292  }
2293  url_link = CAlignFormatUtil::MapProtocol(url_link);
2294  linkout_list.push_back(url_link);
2295  }
2296  if((linkout & eGenomeDataViewer) || (linkout & eTranscript)){
2297  string urlTag;
2298  lnk_displ = textLink ? "Genome Data Viewer" : kGenomeDataViewerImg;
2299  if(linkout & eTranscript) {
2300  urlTag = "GENOME_DATA_VIEWER_TRANSCR";
2301  lnkTitleInfo = "title=\"View the annotation of the transcript <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as the protein annotation and browse to other regions.\"";
2302  }
2303  else {
2304  urlTag = linkoutInfo.is_na ? "GENOME_DATA_VIEWER_NUC" : "GENOME_DATA_VIEWER_PROT";
2305  lnkTitleInfo = linkoutInfo.is_na ?
2306  "title=\"View BLAST hits for <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as hits and browse to other regions.\""
2307  :
2308  "title=\"View the annotation of the protein <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as the protein annotation and browse to other regions.\"";
2309  }
2310  url_link = CAlignFormatUtil::GetURLFromRegistry(urlTag);
2311  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,firstAcc,lnk_displ,"",lnkTitleInfo);
2312 
2313  url_link = CAlignFormatUtil::MapTemplate(url_link,"queryID",linkoutInfo.queryID);
2314 
2315  TSeqPos seqFrom = linkoutInfo.subjRange.GetFrom();
2316  seqFrom = (seqFrom == 0) ? seqFrom : seqFrom - 1;
2317 
2318  TSeqPos seqTo = linkoutInfo.subjRange.GetTo();
2319  seqTo = (seqTo == 0) ? seqTo : seqTo - 1;
2320 
2321  url_link = CAlignFormatUtil::MapTemplate(url_link,"from",seqFrom);//-1
2322  url_link = CAlignFormatUtil::MapTemplate(url_link,"to",seqTo);//-1
2323 
2324  if(textLink) {
2325  url_link = CAlignFormatUtil::MapTemplate(kGenomeDataViewerDispl,"lnk",url_link);
2326  }
2327  url_link = CAlignFormatUtil::MapProtocol(url_link);
2328  linkout_list.push_back(url_link);
2329  }
2330  return linkout_list;
2331 }
2332 
2333 ///Get list of linkouts for one sequence
2334 list<string> CAlignFormatUtil::GetLinkoutUrl(int linkout, const CBioseq::TId& ids,
2335  const string& rid,
2336  const string& cdd_rid,
2337  const string& entrez_term,
2338  bool is_na,
2339  TGi first_gi,
2340  bool structure_linkout_as_group,
2341  bool for_alignment, int cur_align,
2342  string preComputedResID)
2343 
2344 {
2345  list<string> linkout_list;
2346  TGi gi = FindGi(ids);
2348  string label;
2350  string giString = NStr::NumericToString(gi);
2351  first_gi = (first_gi == ZERO_GI) ? gi : first_gi;
2352 
2353 
2354 
2355  SLinkoutInfo linkoutInfo;
2356  linkoutInfo.Init(rid,
2357  cdd_rid,
2358  entrez_term,
2359  is_na,
2360  "", //database
2361  0, //query_number
2362  "", //user_url
2363  preComputedResID,
2364  "", //linkoutOrder
2365  structure_linkout_as_group,
2366  for_alignment);
2367 
2368  linkoutInfo.cur_align = cur_align;
2369  linkoutInfo.taxid = ZERO_TAX_ID;
2370 
2371  linkout_list = s_GetLinkoutUrl(linkout,
2372  giString,
2373  label,
2374  first_gi,
2375  linkoutInfo,
2376  false); //textlink
2377 
2378  return linkout_list;
2379 }
2380 
2381 static int s_LinkLetterToType(string linkLetter)
2382 {
2383  int linkType = 0;
2384  if(linkLetter == "U") {
2385  linkType = eUnigene;
2386  }
2387  else if(linkLetter == "S") {
2388  linkType = eStructure;
2389  }
2390  else if(linkLetter == "E") {
2391  linkType = eGeo;
2392  }
2393  else if(linkLetter == "G") {
2394  linkType = eGene;
2395  }
2396  else if(linkLetter == "M") {
2397  linkType = eMapviewer | eGenomicSeq;
2398  }
2399  else if(linkLetter == "N") {
2400  linkType = eGenomicSeq;
2401  }
2402  else if(linkLetter == "B") {
2403  linkType = eBioAssay;
2404  }
2405  else if(linkLetter == "R") {
2406  linkType = eReprMicrobialGenomes;
2407  }
2408  else if(linkLetter == "V") {
2409  linkType = eGenomeDataViewer;
2410  }
2411  else if(linkLetter == "T") {
2412  linkType = eTranscript;
2413  }
2414 
2415  return linkType;
2416 }
2417 
2418 
2419 static void s_AddLinkoutInfo(map<int, vector < CBioseq::TId > > &linkout_map,int linkout,CBioseq::TId &cur_id)
2420 {
2421  if(linkout_map.count(linkout) > 0){
2422  linkout_map[linkout].push_back(cur_id);
2423  }
2424  else {
2425  vector <CBioseq::TId > idList;
2426  idList.push_back(cur_id);
2427  linkout_map.insert(map<int, vector <CBioseq::TId > >::value_type(linkout,idList));
2428  }
2429 }
2430 
2432  ILinkoutDB **linkoutdb,
2433  const string& mv_build_name,
2434  TGi gi)
2435 {
2436  int linkout = 0;
2437 
2438  if(*linkoutdb) {
2439  if(gi == INVALID_GI) {
2440  gi = FindGi(cur_id);
2441  }
2442  try {
2443  if(gi > ZERO_GI) {
2444  linkout = (*linkoutdb)->GetLinkout(gi, mv_build_name);
2445  }
2446  else if(GetTextSeqID(cur_id)){
2448  linkout = (*linkoutdb)->GetLinkout(*seqID, mv_build_name);
2449  string str_id = seqID->GetSeqIdString(false);
2450  CRef<CSeq_id> seqIDNew(new CSeq_id(str_id));
2451  int linkoutWithoutVersion = (*linkoutdb)->GetLinkout(*seqIDNew, mv_build_name);
2452  if(linkoutWithoutVersion && (linkoutWithoutVersion | eStructure)) {
2453  linkout = linkout | linkoutWithoutVersion;
2454  }
2455  }
2456  }
2457  catch (const CException & e) {
2458  ERR_POST("Problem with linkoutdb: " + e.GetMsg());
2459  cerr << "[BLAST FORMATTER EXCEPTION] Problem with linkoutdb: " << e.GetMsg() << endl;
2460  *linkoutdb = NULL;
2461  }
2462  }
2463  return linkout;
2464 }
2465 
2466 void
2468  map<int, vector <CBioseq::TId > > &linkout_map,
2469  ILinkoutDB* linkoutdb,
2470  const string& mv_build_name)
2471 {
2472  if(!linkoutdb) return;
2473 
2474  int linkout = GetSeqLinkoutInfo(cur_id,
2475  &linkoutdb,
2476  mv_build_name);
2477 
2478  if(linkout & eGene){
2479  s_AddLinkoutInfo(linkout_map,eGene,cur_id);
2480  }
2481  if (linkout & eUnigene) {
2482  s_AddLinkoutInfo(linkout_map,eUnigene,cur_id);
2483  }
2484  if (linkout & eGeo){
2485  s_AddLinkoutInfo(linkout_map,eGeo,cur_id);
2486  }
2487  if (linkout & eStructure){
2488  s_AddLinkoutInfo(linkout_map,eStructure,cur_id);
2489  }
2490  //eGenomicSeq and eMapviewer cannot combine together
2491  if((linkout & eGenomicSeq) && (linkout & eMapviewer)){
2492  s_AddLinkoutInfo(linkout_map,eGenomicSeq,cur_id);
2493  }
2494  else if(linkout & eMapviewer){
2495  s_AddLinkoutInfo(linkout_map,eMapviewer,cur_id);
2496  }
2497  if(linkout & eBioAssay){
2498  s_AddLinkoutInfo(linkout_map,eBioAssay,cur_id);
2499  }
2500  if(linkout & eReprMicrobialGenomes){
2501  s_AddLinkoutInfo(linkout_map,eReprMicrobialGenomes,cur_id);
2502  }
2503 
2504  if(linkout & eGenomeDataViewer){
2505  s_AddLinkoutInfo(linkout_map,eGenomeDataViewer,cur_id);
2506  }
2507  if(linkout & eTranscript){
2508  s_AddLinkoutInfo(linkout_map,eTranscript,cur_id);
2509  }
2510 
2511 }
2512 
2513 void
2515  map<int, vector <CBioseq::TId > > &linkout_map,
2516  ILinkoutDB* linkoutdb,
2517  const string& mv_build_name)
2518 {
2519  const int kMaxDeflineNum = 10;
2520  int num = 0;
2521  for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2522  iter != bdl.end(); iter++){
2523  CBioseq::TId& cur_id = (CBioseq::TId &)(*iter)->GetSeqid();
2524 
2525  GetBdlLinkoutInfo(cur_id,
2526  linkout_map,
2527  linkoutdb,
2528  mv_build_name);
2529  num++;
2530  if(num > kMaxDeflineNum) break;
2531  }
2532 }
2533 
2534 static string s_GetTaxName(TTaxId taxid)
2535 {
2536  string taxName;
2537  try {
2538  if(taxid != ZERO_TAX_ID) {
2540  CSeqDB::GetTaxInfo(taxid, info);
2541  taxName = info.common_name;
2542  }
2543  }
2544  catch (CException&) {
2545 
2546  }
2547  return taxName;
2548 }
2549 
2551  const string& rid,
2552  bool is_na,
2553  bool for_alignment,
2554  int cur_align,
2555  list<string> &linkout_list)
2556 
2557 {
2558  //Identical Proteins
2559 
2561  if (CAlignFormatUtil::GetTextSeqID(wid)) {
2562  string label;
2564  string url_link = kIdenticalProteinsUrl;
2565  string lnk_displ = "Identical Proteins";
2566  url_link = s_MapLinkoutGenParam(url_link,rid,NStr::NumericToString(ZERO_GI),for_alignment, cur_align,label,lnk_displ);
2567  url_link = CAlignFormatUtil::MapTemplate(kIdenticalProteinsDispl,"lnk",url_link);
2568  url_link = CAlignFormatUtil::MapTemplate(url_link,"label",label);
2569  linkout_list.push_back(url_link);
2570  }
2571 }
2572 
2573 
2574 
2575 //reset:taxname,gnl
2576 static list<string> s_GetFullLinkoutUrl(CBioseq::TId& cur_id,
2577  CAlignFormatUtil::SLinkoutInfo &linkoutInfo,
2578  map<int, vector < CBioseq::TId > > &linkout_map,
2579  bool getIdentProteins)
2580 
2581 {
2582  list<string> linkout_list;
2583 
2584  vector<string> linkLetters;
2585  NStr::Split(linkoutInfo.linkoutOrder,",",linkLetters); //linkoutOrder = "G,U,M,V,E,S,B,R,T"
2586  for(size_t i = 0; i < linkLetters.size(); i++) {
2587  TGi first_gi = ZERO_GI;
2588  vector < CBioseq::TId > idList;
2589  int linkout = s_LinkLetterToType(linkLetters[i]);
2590  linkoutInfo.taxName.clear();
2591  if(linkout & (eMapviewer | eGenomicSeq)) {
2592  linkout = (linkout_map[eGenomicSeq].size() != 0) ? eGenomicSeq : eMapviewer;
2593  linkoutInfo.taxName = s_GetTaxName(linkoutInfo.taxid);
2594  }
2595  if(linkout_map.find(linkout) != linkout_map.end()) {
2596  idList = linkout_map[linkout];
2597  }
2598  bool disableLink = (linkout == 0 || idList.size() == 0 || ( (linkout & eStructure) && (linkoutInfo.cdd_rid == "" || linkoutInfo.cdd_rid == "0")));
2599 
2600  string giList,labelList;
2601  int seqVersion = ((linkout & eGenomeDataViewer) || (linkout & eTranscript)) ? true : false;
2602  for (size_t i = 0; i < idList.size(); i++) {
2603  const CBioseq::TId& ids = idList[i];
2604  TGi gi = FindGi(ids);
2605  if (first_gi == ZERO_GI) first_gi = gi;
2606 
2607 
2609  string label = CAlignFormatUtil::GetLabel(wid,seqVersion);
2610  if(!labelList.empty()) labelList += ",";
2611  labelList += label;
2612 
2613  //use only first gi for bioAssay protein
2614  if(!giList.empty() && (linkout & eBioAssay) && !linkoutInfo.is_na) continue;
2615  if(!giList.empty()) giList += ",";
2616  giList += NStr::NumericToString(gi);
2617  }
2618 
2619  linkoutInfo.gnl.clear();
2620  if(!disableLink && linkout == eGenomicSeq) {
2621  linkoutInfo.gnl = s_GetBestIDForURL(cur_id);
2622  }
2623 
2624  if(!disableLink) {//
2625  //The following list will contain only one entry for single linkout value
2626  list<string> one_linkout = s_GetLinkoutUrl(linkout,
2627  giList,
2628  labelList,
2629  first_gi,
2630  linkoutInfo);
2631  if(one_linkout.size() > 0) {
2632  list<string>::iterator iter = one_linkout.begin();
2633  linkout_list.push_back(*iter);
2634  }
2635  }
2636  }
2637  if(getIdentProteins) {
2638  s_AddOtherRelatedInfoLinks(cur_id,linkoutInfo.rid,linkoutInfo.is_na,linkoutInfo.for_alignment,linkoutInfo.cur_align,linkout_list);
2639  }
2640  return linkout_list;
2641 }
2642 
2643 list<string> CAlignFormatUtil::GetFullLinkoutUrl(const list< CRef< CBlast_def_line > > &bdl,
2644  CAlignFormatUtil::SLinkoutInfo &linkoutInfo)
2645 {
2646  list<string> linkout_list;
2647  map<int, vector < CBioseq::TId > > linkout_map;
2648  if(bdl.size() > 0) {
2649  GetBdlLinkoutInfo(bdl,linkout_map, linkoutInfo.linkoutdb, linkoutInfo.mv_build_name);
2650  list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2651  CBioseq::TId& cur_id = (CBioseq::TId &)(*iter)->GetSeqid();
2652  linkout_list = s_GetFullLinkoutUrl(cur_id,
2653  linkoutInfo,
2654  linkout_map,
2655  !linkoutInfo.is_na && bdl.size() > 1);
2656  }
2657  return linkout_list;
2658 }
2659 
2660 
2661 list<string> CAlignFormatUtil::GetFullLinkoutUrl(const list< CRef< CBlast_def_line > > &bdl,
2662  const string& rid,
2663  const string& cdd_rid,
2664  const string& entrez_term,
2665  bool is_na,
2666  bool structure_linkout_as_group,
2667  bool for_alignment,
2668  int cur_align,
2669  string& linkoutOrder,
2670  TTaxId taxid,
2671  string &database,
2672  int query_number,
2673  string &user_url,
2674  string &preComputedResID,
2675  ILinkoutDB* linkoutdb,
2676  const string& mv_build_name)
2677 
2678 {
2679  list<string> linkout_list;
2680  map<int, vector < CBioseq::TId > > linkout_map;
2681  if(bdl.size() > 0) {
2682  GetBdlLinkoutInfo(bdl,linkout_map, linkoutdb, mv_build_name);
2683  list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2684  CBioseq::TId& cur_id = (CBioseq::TId &)(*iter)->GetSeqid();
2685 
2686  SLinkoutInfo linkoutInfo;
2687  linkoutInfo.Init(rid,
2688  cdd_rid,
2689  entrez_term,
2690  is_na,
2691  database,
2692  query_number,
2693  user_url,
2694  preComputedResID,
2695  linkoutOrder,
2696  structure_linkout_as_group,
2697  for_alignment);
2698 
2699  linkoutInfo.cur_align = cur_align;
2700  linkoutInfo.taxid = taxid;
2701 
2702  linkout_list = s_GetFullLinkoutUrl(cur_id,
2703  linkoutInfo,
2704  linkout_map,
2705  !is_na && bdl.size() > 1);
2706  }
2707  return linkout_list;
2708 }
2709 
2710 
2712  CAlignFormatUtil::SLinkoutInfo &linkoutInfo,
2713  bool getIdentProteins)
2714 {
2715  list<string> linkout_list;
2716  map<int, vector < CBioseq::TId > > linkout_map;
2717 
2718  GetBdlLinkoutInfo(cur_id,linkout_map, linkoutInfo.linkoutdb, linkoutInfo.mv_build_name);
2719  linkout_list = s_GetFullLinkoutUrl(cur_id,
2720  linkoutInfo,
2721  linkout_map,
2722  getIdentProteins);
2723  return linkout_list;
2724 }
2725 
2727  const string& rid,
2728  const string& cdd_rid,
2729  const string& entrez_term,
2730  bool is_na,
2731  bool structure_linkout_as_group,
2732  bool for_alignment,
2733  int cur_align,
2734  string& linkoutOrder,
2735  TTaxId taxid,
2736  string &database,
2737  int query_number,
2738  string &user_url,
2739  string &preComputedResID,
2740  ILinkoutDB* linkoutdb,
2741  const string& mv_build_name,
2742  bool getIdentProteins)
2743 
2744 {
2745  list<string> linkout_list;
2746 
2747  map<int, vector < CBioseq::TId > > linkout_map;
2748  GetBdlLinkoutInfo(cur_id,linkout_map, linkoutdb, mv_build_name);
2749  SLinkoutInfo linkoutInfo;
2750  linkoutInfo.Init(rid,
2751  cdd_rid,
2752  entrez_term,
2753  is_na,
2754  database,
2755  query_number,
2756  user_url,
2757  preComputedResID,
2758  linkoutOrder,
2759  structure_linkout_as_group,
2760  for_alignment);
2761 
2762  linkoutInfo.cur_align = cur_align;
2763  linkoutInfo.taxid = taxid;
2764 
2765  linkout_list = s_GetFullLinkoutUrl(cur_id,
2766  linkoutInfo,
2767  linkout_map,
2768  getIdentProteins);
2769  return linkout_list;
2770 }
2771 
2772 
2773 static bool FromRangeAscendingSort(CRange<TSeqPos> const& info1,
2774  CRange<TSeqPos> const& info2)
2775 {
2776  return info1.GetFrom() < info2.GetFrom();
2777 }
2778 
2779 //0 for query, 1 for subject
2780 //Gets query and subject range lists,oppositeStrands param
2781 static bool s_ProcessAlignSet(const CSeq_align_set& alnset,
2782  list<CRange<TSeqPos> > &query_list,
2783  list<CRange<TSeqPos> > &subject_list)
2784 {
2785  bool oppositeStrands = false;
2786  bool isFirst = false;
2787  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()) {
2788  CRange<TSeqPos> query_range = (*iter)->GetSeqRange(0);
2789  //for minus strand
2790  if(query_range.GetFrom() > query_range.GetTo()){
2791  query_range.Set(query_range.GetTo(), query_range.GetFrom());
2792  }
2793  query_list.push_back(query_range);
2794 
2795  CRange<TSeqPos> subject_range = (*iter)->GetSeqRange(1);
2796  //for minus strand
2797  if(subject_range.GetFrom() > subject_range.GetTo()){
2798  subject_range.Set(subject_range.GetTo(), subject_range.GetFrom());
2799  }
2800  subject_list.push_back(subject_range);
2801 
2802  oppositeStrands = (!isFirst) ? (*iter)->GetSeqStrand(0) != (*iter)->GetSeqStrand(1) : oppositeStrands;
2803  isFirst = true;
2804  }
2805 
2806  query_list.sort(FromRangeAscendingSort);
2807  subject_list.sort(FromRangeAscendingSort);
2808  return oppositeStrands;
2809 }
2810 
2811 
2812 
2813 //0 for query, 1 for subject
2814 static list<CRange<TSeqPos> > s_MergeRangeList(list<CRange<TSeqPos> > &source)
2815 {
2816 
2817  list<CRange<TSeqPos> > merge_list;
2818 
2819  bool is_first = true;
2820  CRange<TSeqPos> prev_range (0, 0);
2821  ITERATE(list<CRange<TSeqPos> >, iter, source) {
2822 
2823  if (is_first) {
2824  merge_list.push_back(*iter);
2825  is_first= false;
2826  prev_range = *iter;
2827  } else {
2828  if (prev_range.IntersectingWith(*iter)) {
2829  merge_list.pop_back();
2830  CRange<TSeqPos> temp_range = prev_range.CombinationWith(*iter);
2831  merge_list.push_back(temp_range);
2832  prev_range = temp_range;
2833  } else {
2834  merge_list.push_back(*iter);
2835  prev_range = *iter;
2836  }
2837  }
2838 
2839  }
2840  return merge_list;
2841 }
2842 
2844 {
2845 
2846  list<CRange<TSeqPos> > merge_list;
2847 
2848  list<CRange<TSeqPos> > temp;
2849  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()) {
2850  CRange<TSeqPos> seq_range = (*iter)->GetSeqRange(0);
2851  //for minus strand
2852  if(seq_range.GetFrom() > seq_range.GetTo()){
2853  seq_range.Set(seq_range.GetTo(), seq_range.GetFrom());
2854  }
2855  temp.push_back(seq_range);
2856  }
2857 
2858  temp.sort(FromRangeAscendingSort);
2859 
2860  merge_list = s_MergeRangeList(temp);
2861 
2862  int master_covered_lenghth = 0;
2863  ITERATE(list<CRange<TSeqPos> >, iter, merge_list) {
2864  master_covered_lenghth += iter->GetLength();
2865  }
2866  return master_covered_lenghth;
2867 }
2868 
2869 
2870 
2871 CRange<TSeqPos> CAlignFormatUtil::GetSeqAlignCoverageParams(const CSeq_align_set& alnset,int *master_covered_lenghth,bool *flip)
2872 
2873 {
2874 
2875  list<CRange<TSeqPos> > query_list;
2876  list<CRange<TSeqPos> > subject_list;
2877 
2878  *flip = s_ProcessAlignSet(alnset,query_list,subject_list);
2879  query_list = s_MergeRangeList(query_list);
2880  subject_list = s_MergeRangeList(subject_list);
2881 
2882 
2883  *master_covered_lenghth = 0;
2884  ITERATE(list<CRange<TSeqPos> >, iter, query_list) {
2885  *master_covered_lenghth += iter->GetLength();
2886  }
2887 
2888  TSeqPos from = 0,to = 0;
2889  ITERATE(list<CRange<TSeqPos> >, iter, subject_list) {
2890  from = (from == 0) ? iter->GetFrom() : min(from,iter->GetFrom());
2891  to = max(to,iter->GetTo());
2892  }
2893  //cerr << "from,to = " << from << "," << to << endl;
2894  CRange<TSeqPos> subjectRange(from + 1, to + 1);
2895  return subjectRange;
2896 }
2897 
2900  CScope& scope,
2901  CSeq_align_set& aln_set,
2902  bool nuc_to_nuc_translation,
2903  int db_sort,
2904  int hit_sort,
2905  int hsp_sort,
2906  ILinkoutDB* linkoutdb,
2907  const string& mv_build_name) {
2908 
2909 
2910  if (db_sort == 0 && hit_sort < 1 && hsp_sort < 1)
2911  return (CRef<CSeq_align_set>) &aln_set;
2912 
2913  list< CRef<CSeq_align_set> > seqalign_hit_total_list;
2914  vector< CRef<CSeq_align_set> > seqalign_vec(2);
2915  seqalign_vec[0] = new CSeq_align_set;
2916  seqalign_vec[1] = new CSeq_align_set;
2917 
2918  if(IsMixedDatabase(ctx)) {
2919  SplitSeqalignByMolecularType(seqalign_vec, db_sort, aln_set, scope,
2920  linkoutdb, mv_build_name);
2921  }else {
2922  seqalign_vec[0] = const_cast<CSeq_align_set*>(&aln_set);
2923  }
2924 
2925 
2926  ITERATE(vector< CRef<CSeq_align_set> >, iter, seqalign_vec){
2927  list< CRef<CSeq_align_set> > one_seqalign_hit_total_list = SortOneSeqalignForSortableFormat(**iter,
2928  nuc_to_nuc_translation,
2929  hit_sort,
2930  hsp_sort);
2931 
2932  seqalign_hit_total_list.splice(seqalign_hit_total_list.end(),one_seqalign_hit_total_list);
2933 
2934  }
2935 
2936  return HitListToHspList(seqalign_hit_total_list);
2937 }
2938 list< CRef<CSeq_align_set> >
2940  bool nuc_to_nuc_translation,
2941  int hit_sort,
2942  int hsp_sort)
2943 {
2944  list< CRef<CSeq_align_set> > seqalign_hit_total_list;
2945  list< CRef<CSeq_align_set> > seqalign_hit_list;
2946  HspListToHitList(seqalign_hit_list, source);
2947 
2948  if (hit_sort == eTotalScore) {
2949  seqalign_hit_list.sort(SortHitByTotalScoreDescending);
2950  } else if (hit_sort == eHighestScore) {
2951  seqalign_hit_list.sort(CAlignFormatUtil::SortHitByScoreDescending);
2952  } else if (hit_sort == ePercentIdentity) {
2953  SortHitByPercentIdentityDescending(seqalign_hit_list,
2954  nuc_to_nuc_translation);
2955  } else if (hit_sort == eQueryCoverage) {
2956  seqalign_hit_list.sort(SortHitByMasterCoverageDescending);
2957  }
2958 
2959  ITERATE(list< CRef<CSeq_align_set> >, iter2, seqalign_hit_list) {
2960  CRef<CSeq_align_set> temp(*iter2);
2961  if (hsp_sort == eQueryStart) {
2962  temp->Set().sort(SortHspByMasterStartAscending);
2963  } else if (hsp_sort == eHspPercentIdentity) {
2965  } else if (hsp_sort == eScore) {
2966  temp->Set().sort(SortHspByScoreDescending);
2967  } else if (hsp_sort == eSubjectStart) {
2968  temp->Set().sort(SortHspBySubjectStartAscending);
2969 
2970  }
2971  seqalign_hit_total_list.push_back(temp);
2972  }
2973  return seqalign_hit_total_list;
2974 }
2975 
2978  bool nuc_to_nuc_translation,
2979  int hit_sort,
2980  int hsp_sort) {
2981 
2982  if (hit_sort <= eEvalue && hsp_sort <= eHspEvalue) {
2983  return (CRef<CSeq_align_set>) &aln_set;
2984  }
2985 
2986 // seqalign_vec[0] = const_cast<CSeq_align_set*>(&aln_set);
2987  list< CRef<CSeq_align_set> > seqalign_hit_total_list = SortOneSeqalignForSortableFormat(aln_set,
2988  nuc_to_nuc_translation,
2989  hit_sort,
2990  hsp_sort);
2991  return HitListToHspList(seqalign_hit_total_list);
2992 }
2993 
2994 
2996  double evalueLow,
2997  double evalueHigh)
2998 {
2999  int score, sum_n, num_ident;
3000  double bits, evalue;
3001  list<TGi> use_this_gi;
3002 
3003  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
3004 
3005  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
3006  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue,
3007  sum_n, num_ident, use_this_gi);
3008  //Add the next three lines to re-calculte seq align evalue to the obe that is displayed on the screen
3009  //string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3010  //CAlignFormatUtil::GetScoreString(evalue, bits, 0, 0, evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf);
3011  //evalue = NStr::StringToDouble(evalue_buf);
3012  if(evalue >= evalueLow && evalue <= evalueHigh) {
3013  new_aln->Set().push_back(*iter);
3014  }
3015  }
3016  return new_aln;
3017 
3018 }
3019 
3020 /// Returns percent match for an alignment.
3021 /// Normally we round up the value, unless that means that an
3022 /// alignment with mismatches would be 100%. In that case
3023 /// it becomes 99%.
3024 ///@param numerator: numerator in percent identity calculation.
3025 ///@param denominator: denominator in percent identity calculation.
3026 int CAlignFormatUtil::GetPercentMatch(int numerator, int denominator)
3027 {
3028  if (numerator == denominator)
3029  return 100;
3030  else {
3031  int retval =(int) (0.5 + 100.0*((double)numerator)/((double)denominator));
3032  retval = min(99, retval);
3033  return retval;
3034  }
3035 }
3036 
3037 double CAlignFormatUtil::GetPercentIdentity(int numerator, int denominator)
3038 {
3039  if (numerator == denominator)
3040  return 100;
3041  else {
3042  double retval =100*(double)numerator/(double)denominator;
3043  return retval;
3044  }
3045 }
3046 
3048  double percentIdentLow,
3049  double percentIdentHigh)
3050 {
3051  int score, sum_n, num_ident;
3052  double bits, evalue;
3053  list<TGi> use_this_gi;
3054 
3055  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
3056 
3057  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
3058  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue,
3059  sum_n, num_ident, use_this_gi);
3060  int seqAlnLength = GetAlignmentLength(**iter, kTranslation);
3061  if(seqAlnLength > 0 && num_ident > 0) {
3062  double alnPercentIdent = GetPercentIdentity(num_ident, seqAlnLength);
3063  if(alnPercentIdent >= percentIdentLow && alnPercentIdent <= percentIdentHigh) {
3064  new_aln->Set().push_back(*iter);
3065  }
3066  }
3067  }
3068  return new_aln;
3069 }
3070 
3072  double evalueLow,
3073  double evalueHigh,
3074  double percentIdentLow,
3075  double percentIdentHigh)
3076 {
3077  int score, sum_n, num_ident;
3078  double bits, evalue;
3079  list<TGi> use_this_gi;
3080 
3081  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
3082 
3083  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
3084  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue,
3085  sum_n, num_ident, use_this_gi);
3086  //Add the next three lines to re-calculte seq align evalue to the one that is displayed on the screen
3087  //string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3088  //CAlignFormatUtil::GetScoreString(evalue, bits, 0, 0, evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf);
3089  //evalue = NStr::StringToDouble(evalue_buf);
3090  int seqAlnLength = GetAlignmentLength(**iter, kTranslation);
3091  if(seqAlnLength > 0 && num_ident > 0) {
3092  int alnPercentIdent = GetPercentMatch(num_ident, seqAlnLength);
3093  if( (evalue >= evalueLow && evalue <= evalueHigh) &&
3094  (alnPercentIdent >= percentIdentLow && alnPercentIdent <= percentIdentHigh)) {
3095  new_aln->Set().push_back(*iter);
3096  }
3097  }
3098  }
3099  return new_aln;
3100 }
3101 
3103 {
3104  char buffer[512];
3105  sprintf(buffer, "%.*f", 2, value);
3106  double newVal = NStr::StringToDouble(buffer);
3107  return newVal;
3108 }
3109 
3110 static bool s_isAlnInFilteringRange(double evalue,
3111  double percentIdent,
3112  int queryCover,
3113  double evalueLow,
3114  double evalueHigh,
3115  double percentIdentLow,
3116  double percentIdentHigh,
3117  int queryCoverLow,
3118  int queryCoverHigh)
3119 {
3120 
3121 
3122  bool isInRange = false;
3123  //Adjust percent identity and evalue to display values
3124  percentIdent = adjustPercentIdentToDisplayValue(percentIdent);
3125  string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3126  double bits = 0;
3127  CAlignFormatUtil::GetScoreString(evalue, bits, 0, 0, evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf);
3128  evalue = NStr::StringToDouble(evalue_buf);
3129 
3130  if(evalueLow >= 0 && percentIdentLow >= 0 && queryCoverLow >= 0) {
3131  isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3132  (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh) &&
3133  (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3134  }
3135  else if(evalueLow >= 0 && percentIdentLow >= 0) {
3136  isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3137  (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3138  }
3139  else if(evalueLow >= 0 && queryCoverLow >= 0) {
3140  isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3141  (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3142  }
3143  else if(queryCoverLow >= 0 && percentIdentLow >= 0) {
3144  isInRange = (queryCover >= queryCoverLow && queryCover <= queryCoverHigh) &&
3145  (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3146  }
3147  else if(evalueLow >= 0) {
3148  isInRange = (evalue >= evalueLow && evalue <= evalueHigh);
3149  }
3150  else if(percentIdentLow >= 0) {
3151  isInRange = (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3152  }
3153  else if(queryCoverLow >= 0) {
3154  isInRange = (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3155  }
3156  return isInRange;
3157 }
3158 
3160  double evalueLow,
3161  double evalueHigh,
3162  double percentIdentLow,
3163  double percentIdentHigh,
3164  int queryCoverLow,
3165  int queryCoverHigh)
3166 {
3167  list< CRef<CSeq_align_set> > seqalign_hit_total_list;
3168  list< CRef<CSeq_align_set> > seqalign_hit_list;
3169 
3170  HspListToHitList(seqalign_hit_list, source_aln);
3171 
3172  ITERATE(list< CRef<CSeq_align_set> >, iter, seqalign_hit_list) {
3173  CRef<CSeq_align_set> temp(*iter);
3175 
3176  if(s_isAlnInFilteringRange(seqSetInfo->evalue,
3177  seqSetInfo->percent_identity,
3178  seqSetInfo->percent_coverage,
3179  evalueLow,
3180  evalueHigh,
3181  percentIdentLow,
3182  percentIdentHigh,
3183  queryCoverLow,
3184  queryCoverHigh)) {
3185  seqalign_hit_total_list.push_back(temp);
3186  }
3187  }
3188  return HitListToHspList(seqalign_hit_total_list);
3189 }
3190 
3192  int maxAligns,
3193  int maxHsps)
3194 {
3195  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
3196 
3197  CConstRef<CSeq_id> prevQueryId,prevSubjectId;
3198  int alignCount = 0,hspCount = 0;
3199  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
3200  const CSeq_id& newQueryId = (*iter)->GetSeq_id(0);
3201  if(prevQueryId.Empty() || !newQueryId.Match(*prevQueryId)){
3202  if (hspCount >= maxHsps) {
3203  break;
3204  }
3205  alignCount = 0;
3206  prevQueryId = &newQueryId;
3207  }
3208  if (alignCount < maxAligns) {
3209  const CSeq_id& newSubjectId = (*iter)->GetSeq_id(1);
3210  // Increment alignments count if subject sequence is different
3211  if(prevSubjectId.Empty() || !newSubjectId.Match(*prevSubjectId)){
3212  ++alignCount;
3213  prevSubjectId = &newSubjectId;
3214  }
3215  // Increment HSP count if the alignments limit is not reached
3216  ++hspCount;
3217  new_aln->Set().push_back(*iter);
3218  }
3219 
3220  }
3221  return new_aln;
3222 }
3223 
3224 
3226  int queryNumber)
3227 {
3228  if(queryNumber == 0) {
3229  return source_aln;
3230  }
3231  CRef<CSeq_align_set> new_aln;
3232 
3233  CConstRef<CSeq_id> prevQueryId;
3234  int currQueryNum = 0;
3235 
3236  ITERATE(CSeq_align_set::Tdata, iter, source_aln->Get()){
3237  const CSeq_id& newQueryId = (*iter)->GetSeq_id(0);
3238  if(prevQueryId.Empty() || !newQueryId.Match(*prevQueryId)){
3239  currQueryNum++;
3240  prevQueryId = &newQueryId;
3241  }
3242  //Record seq aligns corresponding to queryNumber
3243  if(currQueryNum == queryNumber) {
3244  if(new_aln.Empty()) {
3245  new_aln.Reset(new CSeq_align_set);
3246  }
3247  new_aln->Set().push_back(*iter);
3248  }
3249  else if(currQueryNum > queryNumber) {
3250  break;
3251  }
3252  }
3253  return new_aln;
3254 }
3255 
3256 
3258 {
3259  string l_cfg_file_name;
3261  if( getenv("GETURL_DEBUG") ) CAlignFormatUtil::m_geturl_debug_flag = l_dbg = true;
3262  if( !m_Reg ) {
3263  bool cfgExists = true;
3264  string l_ncbi_env;
3265  string l_fmtcfg_env;
3266  if( NULL != getenv("NCBI") ) l_ncbi_env = getenv("NCBI");
3267  if( NULL != getenv("FMTCFG") ) l_fmtcfg_env = getenv("FMTCFG");
3268  // config file name: value of FMTCFG or default ( .ncbirc )
3269  if( l_fmtcfg_env.empty() )
3270  l_cfg_file_name = ".ncbirc";
3271  else
3272  l_cfg_file_name = l_fmtcfg_env;
3273  // checkinf existance of configuration file
3274  CFile l_fchecker( l_cfg_file_name );
3275  cfgExists = l_fchecker.Exists();
3276  if( (!cfgExists) && (!l_ncbi_env.empty()) ) {
3277  if( l_ncbi_env.rfind("/") != (l_ncbi_env.length() -1 ))
3278  l_ncbi_env.append("/");
3279  l_cfg_file_name = l_ncbi_env + l_cfg_file_name;
3280  CFile l_fchecker2( l_cfg_file_name );
3281  cfgExists = l_fchecker2.Exists();
3282  }
3283  if(cfgExists) {
3284  CNcbiIfstream l_ConfigFile(l_cfg_file_name.c_str() );
3285  m_Reg.reset(new CNcbiRegistry(l_ConfigFile));
3286  if( l_dbg ) fprintf(stderr,"REGISTRY: %s\n",l_cfg_file_name.c_str());
3287  }
3288  }
3289  return;
3290 }
3291 
3292 //
3293 // get given url from registry file or return corresponding kNAME
3294 // value as default to preserve compatibility.
3295 //
3296 // algoritm:
3297 // 1) config file name is ".ncbirc" unless FMTCFG specifies another name
3298 // 2) try to read local configuration file before
3299 // checking location specified by the NCBI environment.
3300 // 3) if index != -1, use it as trailing version number for a key name,
3301 // ABCD_V0. try to read ABCD key if version variant doesn't exist.
3302 // 4) use INCLUDE_BASE_DIR key to specify base for all include files.
3303 // 5) treat "_FORMAT" key as filename first and string in second.
3304 // in case of existances of filename, read it starting from
3305 // location specified by INCLUDE_BASE_DIR key
3306 string CAlignFormatUtil::GetURLFromRegistry( const string url_name, int index){
3307  string result_url;
3308  string l_key, l_host_port, l_format;
3309  string l_secion_name = "BLASTFMTUTIL";
3310  string l_fmt_suffix = "_FORMAT";
3311  string l_host_port_suffix = "_HOST_PORT";
3312  string l_subst_pattern;
3313 
3314  if( !m_Reg ) {
3315  InitConfig();
3316  }
3317  if( !m_Reg ) return GetURLDefault(url_name,index); // can't read .ncbrc file
3318  string l_base_dir = m_Reg->Get(l_secion_name, "INCLUDE_BASE_DIR");
3319  if( !l_base_dir.empty() && ( l_base_dir.rfind("/") != (l_base_dir.length()-1)) ) {
3320  l_base_dir.append("/");
3321  }
3322 
3323 
3324  string default_host_port;
3325  string l_key_ndx;
3326  if( index >=0) {
3327  l_key_ndx = url_name + l_host_port_suffix + "_" + NStr::IntToString( index );
3328  l_subst_pattern="<@"+l_key_ndx+"@>";
3329  l_host_port = m_Reg->Get(l_secion_name, l_key_ndx); // try indexed
3330  }
3331  // next is initialization for non version/array type of settings
3332  if( l_host_port.empty()){ // not indexed or index wasn't found
3333  l_key = url_name + l_host_port_suffix; l_subst_pattern="<@"+l_key+"@>";
3334  l_host_port = m_Reg->Get(l_secion_name, l_key);
3335  }
3336  if( l_host_port.empty()) return GetURLDefault(url_name,index);
3337 
3338  // get format part
3339  l_key = url_name + l_fmt_suffix ; //"_FORMAT";
3340  l_key_ndx = l_key + "_" + NStr::IntToString( index );
3341  if( index >= 0 ){
3342  l_format = m_Reg->Get(l_secion_name, l_key_ndx);
3343  }
3344 
3345  if( l_format.empty() ) l_format = m_Reg->Get(l_secion_name, l_key);
3346  if( l_format.empty()) return GetURLDefault(url_name,index);
3347  // format found check wether this string or file name
3348  string l_format_file = l_base_dir + l_format;
3349  CFile l_fchecker( l_format_file );
3350  bool file_name_mode = l_fchecker.Exists();
3351  if( file_name_mode ) { // read whole content of the file to string buffer
3352  string l_inc_file_name = l_format_file;
3353  CNcbiIfstream l_file (l_inc_file_name.c_str(), ios::in|ios::binary|ios::ate);
3354  CT_POS_TYPE l_inc_size = l_file.tellg();
3355  // size_t l_buf_sz = (size_t) l_inc_size;
3356  char *l_mem = new char [ (size_t) l_inc_size + 1];
3357  memset( l_mem,0, (size_t) l_inc_size + 1 ) ;
3358  l_file.seekg( 0, ios::beg );
3359  l_file.read(l_mem, l_inc_size);
3360  l_file.close();
3361  l_format.erase(); l_format.reserve( (size_t)l_inc_size + 1 );
3362  l_format = l_mem;
3363  delete [] l_mem;
3364  }
3365 
3366  result_url = NStr::Replace(l_format,l_subst_pattern,l_host_port);
3367 
3368  if( result_url.empty()) return GetURLDefault(url_name,index);
3369  return result_url;
3370 }
3371 //
3372 // return default URL value for the given key.
3373 //
3374 string CAlignFormatUtil::GetURLDefault( const string url_name, int index) {
3375 
3376  string search_name = url_name;
3378  if( index >= 0 ) search_name += "_" + NStr::IntToString( index); // actual name for index value is NAME_{index}
3379 
3380  if( (url_it = sm_TagUrlMap.find( search_name ) ) != sm_TagUrlMap.end()) {
3381  string url_link = CAlignFormatUtil::MapProtocol(url_it->second);
3382  return url_link;
3383  }
3384 
3385  string error_msg = "CAlignFormatUtil::GetURLDefault:no_defualt_for"+url_name;
3386  if( index != -1 ) error_msg += "_index_"+ NStr::IntToString( index );
3387  return error_msg;
3388 }
3389 
3390 void
3392  CNcbiMatrix<int>& retval)
3393 {
3394  retval.Resize(0, 0, -1);
3395  if (matrix_name == NULL ||
3396  NStr::TruncateSpaces(string(matrix_name)).empty()) {
3397  return;
3398  }
3399 
3400  const SNCBIPackedScoreMatrix* packed_mtx =
3401  NCBISM_GetStandardMatrix(matrix_name);
3402  if (packed_mtx == NULL) {
3403  return;
3404  }
3405  retval.Resize(k_NumAsciiChar, k_NumAsciiChar, -1000);
3406 
3408  NCBISM_Unpack(packed_mtx, &mtx);
3409 
3410  for(int i = 0; i < ePMatrixSize; ++i){
3411  for(int j = 0; j < ePMatrixSize; ++j){
3412  retval((size_t)k_PSymbol[i], (size_t)k_PSymbol[j]) =
3413  mtx.s[(size_t)k_PSymbol[i]][(size_t)k_PSymbol[j]];
3414  }
3415  }
3416  for(int i = 0; i < ePMatrixSize; ++i) {
3417  retval((size_t)k_PSymbol[i], '*') = retval('*',(size_t)k_PSymbol[i]) = -4;
3418  }
3419  retval('*', '*') = 1;
3420  // this is to count Selenocysteine to Cysteine matches as positive
3421  retval('U', 'U') = retval('C', 'C');
3422  retval('U', 'C') = retval('C', 'C');
3423  retval('C', 'U') = retval('C', 'C');
3424 }
3425 
3426 
3427 string CAlignFormatUtil::MapTemplate(string inpString,string tmplParamName,Int8 templParamVal)
3428 {
3429  string outString;
3430  string tmplParam = "<@" + tmplParamName + "@>";
3431  NStr::Replace(inpString,tmplParam,NStr::NumericToString(templParamVal),outString);
3432  return outString;
3433 }
3434 
3435 string CAlignFormatUtil::MapTemplate(string inpString,string tmplParamName,string templParamVal)
3436 {
3437  string outString;
3438  string tmplParam = "<@" + tmplParamName + "@>";
3439  NStr::Replace(inpString,tmplParam,templParamVal,outString);
3440  return outString;
3441 }
3442 
3443 string CAlignFormatUtil::MapSpaceTemplate(string inpString,string tmplParamName,string templParamVal, unsigned int maxParamValLength, int spacesFormatFlag)
3444 {
3445  templParamVal = AddSpaces(templParamVal, maxParamValLength, spacesFormatFlag);
3446  string outString = MapTemplate(inpString,tmplParamName,templParamVal);
3447 
3448  return outString;
3449 }
3450 
3451 
3452 string CAlignFormatUtil::AddSpaces(string paramVal, size_t maxParamValLength, int spacesFormatFlag)
3453 {
3454  //if(!spacePos.empty()) {
3455  string spaceString;
3456  if(maxParamValLength >= paramVal.size()) {
3457  size_t numSpaces = maxParamValLength - paramVal.size() + 1;
3458  if(spacesFormatFlag & eSpacePosToCenter) {
3459  numSpaces = numSpaces/2;
3460  }
3461  spaceString.assign(numSpaces,' ');
3462  }
3463  else {
3464  paramVal = paramVal.substr(0, maxParamValLength - 3) + "...";
3465  spaceString += " ";
3466  }
3467  if(spacesFormatFlag & eSpacePosAtLineEnd) {
3468  paramVal = paramVal + spaceString;
3469  }
3470  else if(spacesFormatFlag & eSpacePosToCenter) {
3471  paramVal = spaceString + paramVal + spaceString;
3472  }
3473  else {
3474  paramVal = spaceString + paramVal;
3475  }
3476  if(spacesFormatFlag & eAddEOLAtLineStart) paramVal = "\n" + paramVal;
3477  if(spacesFormatFlag & eAddEOLAtLineEnd) paramVal = paramVal + "\n";
3478  //}
3479 
3480  return paramVal;
3481 }
3482 
3483 
3484 
3486 {
3487  CNcbiIfstream config_file(".ncbirc");
3488  CNcbiRegistry config_reg(config_file);
3489  string httpProt = "https:";
3490  if(!config_reg.Empty()) {
3491  if(config_reg.HasEntry("BLASTFMTUTIL","PROTOCOL")) {
3492  httpProt = config_reg.Get("BLASTFMTUTIL","PROTOCOL");
3493  }
3494  }
3495  return httpProt;
3496 }
3497 
3498 /*
3499 if(no config file) protocol = "https:"
3500 if(no "BLASTFMTUTIL","PROTOCOL" entry in config file) protocol = "https:"
3501 if(there is entry in config) protocol = entry which could be blank = ""
3502 */
3503 string CAlignFormatUtil::MapProtocol(string url_link)
3504 {
3505  if(m_Protocol.empty()){
3506  if(!m_Reg) {
3507  InitConfig();
3508  }
3509  m_Protocol = (m_Reg && m_Reg->HasEntry("BLASTFMTUTIL","PROTOCOL")) ? m_Protocol = m_Reg->Get("BLASTFMTUTIL","PROTOCOL") : "https:";
3510  }
3511  url_link = CAlignFormatUtil::MapTemplate(url_link,"protocol",m_Protocol);
3512  return url_link;
3513 }
3514 
3515 static string s_MapCommonUrlParams(string urlTemplate, CAlignFormatUtil::SSeqURLInfo *seqUrlInfo)
3516 {
3517  string db,logstr_moltype;
3518  if(seqUrlInfo->isDbNa) {
3519  db = "nucleotide";
3520  logstr_moltype = "nucl";
3521  } else {
3522  db = "protein";
3523  logstr_moltype ="prot";
3524  }
3525  string logstr_location = (seqUrlInfo->isAlignLink) ? "align" : "top";
3526  string url_link = CAlignFormatUtil::MapTemplate(urlTemplate,"db",db);
3527  url_link = CAlignFormatUtil::MapTemplate(url_link,"gi", GI_TO(TIntId, seqUrlInfo->gi));
3528  url_link = CAlignFormatUtil::MapTemplate(url_link,"log",logstr_moltype + logstr_location);
3529  url_link = CAlignFormatUtil::MapTemplate(url_link,"blast_rank",seqUrlInfo->blast_rank);
3530  url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",seqUrlInfo->rid);
3531  url_link = CAlignFormatUtil::MapTemplate(url_link,"acc",seqUrlInfo->accession);
3532  url_link = CAlignFormatUtil::MapProtocol(url_link);
3533  return url_link;
3534 }
3535 
3536 static string s_MapURLLink(string urlTemplate, CAlignFormatUtil::SSeqURLInfo *seqUrlInfo, const CBioseq::TId& ids)
3537 {
3538  //Add specific blasttype/user_url template mapping here
3539  string url_link = urlTemplate;
3540  if (seqUrlInfo->user_url.find("sra.cgi") != string::npos) {
3541  string strRun, strSpotId,strReadIndex;
3542  if(s_GetSRASeqMetadata(ids,strRun,strSpotId,strReadIndex)) {
3543  url_link = CAlignFormatUtil::MapTemplate(url_link,"run",strRun);
3544  url_link = CAlignFormatUtil::MapTemplate(url_link,"spotid",strSpotId);
3545  url_link = CAlignFormatUtil::MapTemplate(url_link,"readindex",strReadIndex);
3546  }
3547  }
3548  //This maps generic params like log, blast_rank, rid
3549  url_link = s_MapCommonUrlParams(url_link, seqUrlInfo);
3550  return url_link;
3551 }
3552 
3553 
3554 
3555 bool CAlignFormatUtil::IsWGSPattern(string &wgsAccession)
3556 {
3557  //const string kWgsAccessionPattern = "^[A-Z]{4}[0-9]{8,10}(\.[0-9]+){0,1}$"; //example AUXO013124042 or AUXO013124042.1
3558  const unsigned int kWgsProjLength = 4;
3559  const unsigned int kWgsProjIDLengthMin = 8;
3560  const unsigned int kWgsProjIDLengthMax = 10;
3561  bool isWGS = true;
3562 
3563  if (wgsAccession.size() < 6) {
3564  return false;
3565  }
3566 
3567  if(NStr::Find(wgsAccession, ".") != NPOS) { //Accession has version AUXO013124042.1
3568  string version;
3569  NStr::SplitInTwo(wgsAccession,".",wgsAccession,version);
3570  }
3571 
3572  string wgsProj = wgsAccession.substr(0,kWgsProjLength);
3573  for (size_t i = 0; i < wgsProj.length(); i ++){
3574  if(!isalpha(wgsProj[i]&0xff)) {
3575  isWGS = false;
3576  break;
3577  }
3578  }
3579  if(isWGS) {
3580  string wgsId = wgsAccession.substr(kWgsProjLength);
3581  if(wgsId.length() >= kWgsProjIDLengthMin && wgsId.length() <= kWgsProjIDLengthMax) {
3582  for (size_t i = 0; i < wgsId.length(); i ++){
3583  if(!isdigit(wgsId[i]&0xff)) {
3584  isWGS = false;
3585  break;
3586  }
3587  }
3588  }
3589  else {
3590  isWGS = false;
3591  }
3592  }
3593  return isWGS;
3594 }
3595 
3596 
3597 bool CAlignFormatUtil::IsWGSAccession(string &wgsAccession, string &wgsProjName)
3598 {
3599  const unsigned int kWgsProgNameLength = 6;
3600  bool isWGS = IsWGSPattern(wgsAccession);
3601  if(isWGS) {
3602  wgsProjName = wgsAccession.substr(0,kWgsProgNameLength);
3603  }
3604  return isWGS;
3605 }
3606 
3607 
3608 string CAlignFormatUtil::GetIDUrlGen(SSeqURLInfo *seqUrlInfo,const CBioseq::TId* ids)
3609 {
3610  string url_link = NcbiEmptyString;
3612 
3613  bool hasTextSeqID = GetTextSeqID(*ids);
3614  string title = "title=\"Show report for " + seqUrlInfo->accession + "\" ";
3615 
3616  string temp_class_info = kClassInfo; temp_class_info += " ";
3617  string wgsProj;
3618  string wgsAccession = seqUrlInfo->accession;
3619  bool isWGS = false;
3620  if (!(wid->Which() == CSeq_id::e_Local || wid->Which() == CSeq_id::e_General)){
3621  isWGS = CAlignFormatUtil::IsWGSAccession(wgsAccession, wgsProj);
3622  }
3623  if(isWGS && seqUrlInfo->useTemplates) {
3624  string wgsUrl = CAlignFormatUtil::GetURLFromRegistry("WGS");
3625  url_link = s_MapCommonUrlParams(wgsUrl, seqUrlInfo);
3626  url_link = CAlignFormatUtil::MapTemplate(url_link,"wgsproj",wgsProj);
3627  url_link = CAlignFormatUtil::MapTemplate(url_link,"wgsacc", wgsAccession);
3628  }
3629  else if (hasTextSeqID) {
3630  string entrezTag = (seqUrlInfo->useTemplates) ? "ENTREZ_TM" : "ENTREZ";
3631  string l_EntrezUrl = CAlignFormatUtil::GetURLFromRegistry(entrezTag);
3632  url_link = s_MapCommonUrlParams(l_EntrezUrl, seqUrlInfo);
3633 
3634  if(!seqUrlInfo->useTemplates) {
3635  url_link = CAlignFormatUtil::MapTemplate(url_link,"acc",seqUrlInfo->accession);
3636  temp_class_info = (!seqUrlInfo->defline.empty())? CAlignFormatUtil::MapTemplate(temp_class_info,"defline",NStr::JavaScriptEncode(seqUrlInfo->defline)):temp_class_info;
3637  url_link = CAlignFormatUtil::MapTemplate(url_link,"cssInf",(seqUrlInfo->addCssInfo) ? temp_class_info.c_str() : "");
3638  url_link = CAlignFormatUtil::MapTemplate(url_link,"target",seqUrlInfo->new_win ? "TARGET=\"EntrezView\"" : "");
3639  }
3640 
3641  } else {//seqid general, dbtag specified
3642  if(wid->Which() == CSeq_id::e_General){
3643  const CDbtag& dtg = wid->GetGeneral();
3644  const string& dbname = dtg.GetDb();
3645  if(NStr::CompareNocase(dbname, "TI") == 0){
3646  string actual_id = CAlignFormatUtil::GetGnlID(dtg);
3647  if(seqUrlInfo->useTemplates) {
3648  string l_TraceUrl = CAlignFormatUtil::GetURLFromRegistry("TRACE_CGI");
3649  url_link = l_TraceUrl + (string)"?cmd=retrieve&dopt=fasta&val=" + actual_id + "&RID=" + seqUrlInfo->rid;
3650  }
3651  else {
3652  url_link = CAlignFormatUtil::MapTemplate(kTraceUrl,"val",actual_id);
3653  temp_class_info = (!seqUrlInfo->defline.empty())? CAlignFormatUtil::MapTemplate(temp_class_info,"defline",seqUrlInfo->defline):temp_class_info;
3654  url_link = CAlignFormatUtil::MapTemplate(url_link,"cssInf",(seqUrlInfo->addCssInfo) ? temp_class_info.c_str() : "");
3655  url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",seqUrlInfo->rid);
3656  }
3657  }
3658  } else if (wid->Which() == CSeq_id::e_Local){
3659 
3660  string url_holder = CAlignFormatUtil::GetURLFromRegistry("LOCAL_ID");
3661 
3662  string user_url;
3663  if (m_Reg) {
3664  user_url = (seqUrlInfo->addCssInfo) ? m_Reg->Get("LOCAL_ID","TOOL_URL_ALIGN") : m_Reg->Get("LOCAL_ID","TOOL_URL");
3665  }
3666  string id_string;
3667  wid->GetLabel(&id_string, CSeq_id::eContent);
3668  url_link = CAlignFormatUtil::MapTemplate(user_url,"seq_id", NStr::URLEncode(id_string));
3669  url_link = CAlignFormatUtil::MapTemplate(url_link,"db_name", NStr::URLEncode(seqUrlInfo->database));
3670  url_link = CAlignFormatUtil::MapTemplate(url_link,"taxid", TAX_ID_TO(int, seqUrlInfo->taxid));
3671  temp_class_info = (!seqUrlInfo->defline.empty())? CAlignFormatUtil::MapTemplate(temp_class_info,"defline",seqUrlInfo->defline):temp_class_info;
3672  url_link = CAlignFormatUtil::MapTemplate(url_link,"cssInf",(seqUrlInfo->addCssInfo) ? temp_class_info.c_str() : "");
3673  url_link = CAlignFormatUtil::MapTemplate(url_link,"title", id_string);
3674  url_link = CAlignFormatUtil::MapTemplate(url_link,"target",seqUrlInfo->new_win ? "TARGET=\"EntrezView\"" : "");
3675  }
3676  }
3677  url_link = CAlignFormatUtil::MapProtocol(url_link);
3678  seqUrlInfo->seqUrl = url_link;
3679  return url_link;
3680 }
3681 
3682 string CAlignFormatUtil::GetIDUrlGen(SSeqURLInfo *seqUrlInfo,const CSeq_id& id,objects::CScope &scope)
3683 {
3684  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
3685  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
3686 
3687  string url_link = GetIDUrlGen(seqUrlInfo,ids);
3688  return url_link;
3689 }
3690 
3691 string CAlignFormatUtil::GetIDUrl(SSeqURLInfo *seqUrlInfo,const CBioseq::TId* ids)
3692 {
3693  string url_link = NcbiEmptyString;
3695 
3696  string title = "title=\"Show report for " + seqUrlInfo->accession + "\" ";
3697 
3698  if (seqUrlInfo->user_url != NcbiEmptyString &&
3699  !((seqUrlInfo->user_url.find("dumpgnl.cgi") != string::npos && seqUrlInfo->gi > ZERO_GI) ||
3700  (seqUrlInfo->user_url.find("maps.cgi") != string::npos))) {
3701 
3702  string url_with_parameters,toolURLParams;
3703  if(m_Reg && !seqUrlInfo->blastType.empty() && seqUrlInfo->blastType != "newblast") {
3704  toolURLParams = m_Reg->Get(seqUrlInfo->blastType, "TOOL_URL_PARAMS");
3705  }
3706  if(!toolURLParams.empty()) {
3707  string urlLinkTemplate = seqUrlInfo->user_url + toolURLParams;
3708  url_with_parameters = s_MapURLLink(urlLinkTemplate, seqUrlInfo, *ids);
3709  }
3710  else {
3711  if (seqUrlInfo->user_url.find("sra.cgi") != string::npos) {
3712  url_with_parameters = CAlignFormatUtil::BuildSRAUrl(*ids, seqUrlInfo->user_url);
3713  }
3714  else {
3715  url_with_parameters = CAlignFormatUtil::BuildUserUrl(*ids, seqUrlInfo->taxid, seqUrlInfo->user_url,
3716  seqUrlInfo->database,
3717  seqUrlInfo->isDbNa, seqUrlInfo->rid,
3718  seqUrlInfo->queryNumber,
3719  seqUrlInfo->isAlignLink);
3720  }
3721  }
3722  if (url_with_parameters != NcbiEmptyString) {
3723  if (!seqUrlInfo->useTemplates) {
3724  string deflineInfo;
3725  if(seqUrlInfo->addCssInfo) {
3726  deflineInfo = (!seqUrlInfo->defline.empty())? CAlignFormatUtil::MapTemplate(kClassInfo,"defline",seqUrlInfo->defline):kClassInfo;
3727  }
3728  url_link += "<a " + title + deflineInfo + "href=\"";
3729  }
3730  url_link += url_with_parameters;
3731  if (!seqUrlInfo->useTemplates) url_link += "\">";
3732  }
3733  }
3734  else {
3735  //use entrez or dbtag specified
3736  url_link = GetIDUrlGen(seqUrlInfo,ids);
3737  }
3738  seqUrlInfo->seqUrl = url_link;
3739  return url_link;
3740 }
3741 
3742 
3743 string CAlignFormatUtil::GetIDUrl(SSeqURLInfo *seqUrlInfo,const CSeq_id& id,objects::CScope &scope)
3744 {
3745  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
3746  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
3747 
3748 
3749  seqUrlInfo->blastType = NStr::TruncateSpaces(NStr::ToLower(seqUrlInfo->blastType));
3750 
3751  if(seqUrlInfo->taxid == INVALID_TAX_ID) { //taxid is not set
3752  seqUrlInfo->taxid = ZERO_TAX_ID;
3753  if ((seqUrlInfo->advancedView || seqUrlInfo->blastType == "mapview" || seqUrlInfo->blastType == "mapview_prev") ||
3754  seqUrlInfo->blastType == "gsfasta" || seqUrlInfo->blastType == "gsfasta_prev") {
3755  seqUrlInfo->taxid = GetTaxidForSeqid(id, scope);
3756  }
3757  }
3758  string url_link = GetIDUrl(seqUrlInfo,ids);
3759  return url_link;
3760 }
3761 
3762 //static const char kGenericLinkTemplate[] = "<a href=\"<@url@>\" target=\"lnk<@rid@>\" title=\"Show report for <@seqid@>\"><@gi@><@seqid@></a>";
3764 {
3765  string seqLink;
3766  string linkURL = GetIDUrl(seqUrlInfo,ids);
3767  if(!linkURL.empty()) {
3768  string linkTmpl = (seqUrlInfo->addCssInfo) ? kGenericLinkMouseoverTmpl : kGenericLinkTemplate;
3769  seqLink = CAlignFormatUtil::MapTemplate(linkTmpl,"url",linkURL);
3770  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"rid",seqUrlInfo->rid);
3771  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"seqid",seqUrlInfo->accession);
3772  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"gi", GI_TO(TIntId, seqUrlInfo->gi));
3773  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"target","EntrezView");
3774  if(seqUrlInfo->addCssInfo) {
3775  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"defline",NStr::JavaScriptEncode(seqUrlInfo->defline));
3776  }
3777  }
3778  return seqLink;
3779 }
3780 
3781 static string s_MapCustomLink(string linkUrl,string reportType,string accession, string linkText, string linktrg, string linkTitle = kCustomLinkTitle,string linkCls = "")
3782 {
3783  string link = CAlignFormatUtil::MapTemplate(kCustomLinkTemplate,"custom_url",linkUrl);
3784  link = CAlignFormatUtil::MapProtocol(link);
3785  link = CAlignFormatUtil::MapTemplate(link,"custom_title",linkTitle);
3786  link = CAlignFormatUtil::MapTemplate(link,"custom_report_type",reportType);
3787  link = CAlignFormatUtil::MapTemplate(link,"seqid",accession);
3788  link = CAlignFormatUtil::MapTemplate(link,"custom_lnk_displ",linkText);
3789  link = CAlignFormatUtil::MapTemplate(link,"custom_cls",linkCls);
3790  link = CAlignFormatUtil::MapTemplate(link,"custom_trg",linktrg);
3791  return link;
3792 }
3793 
3794 
3795 
3797  bool hspRange)
3798 {
3799  list<string> customLinksList;
3800  if (seqUrlInfo->hasTextSeqID) {
3801  //First show links to GenBank and FASTA
3802  string linkUrl,link,linkTiltle = kCustomLinkTitle;
3803 
3804  linkUrl = seqUrlInfo->seqUrl;
3805  if(NStr::Find(linkUrl, "report=genbank") == NPOS) { //Geo case
3806  linkUrl = s_MapCommonUrlParams(kEntrezTMUrl, seqUrlInfo);
3807  }
3808  string linkText = (seqUrlInfo->isDbNa) ? "GenBank" : "GenPept";
3809  if(hspRange) {
3810  linkUrl += "&from=<@fromHSP@>&to=<@toHSP@>";
3811  linkTiltle = "Aligned region spanning positions <@fromHSP@> to <@toHSP@> on <@seqid@>";
3812  }
3813  link = s_MapCustomLink(linkUrl,"genbank",seqUrlInfo->accession,linkText,"lnk" + seqUrlInfo->rid,linkTiltle);
3814  customLinksList.push_back(link);
3815  }
3816  return customLinksList;
3817 }
3818 
3820  bool hspRange)
3821 {
3822  //seqviewer
3823  string dbtype = (seqUrlInfo->isDbNa) ? "nuccore" : "protein";
3824  string seqViewUrl = (seqUrlInfo->gi > ZERO_GI)?kSeqViewerUrl:kSeqViewerUrlNonGi;
3825 
3826  string linkUrl = CAlignFormatUtil::MapTemplate(seqViewUrl,"rid",seqUrlInfo->rid);
3827 
3828  string seqViewerParams;
3829  if(m_Reg && !seqUrlInfo->blastType.empty() && seqUrlInfo->blastType != "newblast") {
3830  seqViewerParams = m_Reg->Get(seqUrlInfo->blastType, "SEQVIEW_PARAMS");
3831  }
3832  seqViewerParams = seqViewerParams.empty() ? kSeqViewerParams : seqViewerParams;
3833  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"seqViewerParams",seqViewerParams);
3834 
3835  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"dbtype",dbtype);
3836  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"gi", GI_TO(TIntId, seqUrlInfo->gi));
3837  string linkTitle = "Show alignment to <@seqid@> in <@custom_report_type@>";
3838  string link_loc;
3839  if(!hspRange) {
3840  int addToRange = (int) ((seqUrlInfo->seqRange.GetTo() - seqUrlInfo->seqRange.GetFrom()) * 0.05);//add 5% to each side
3841  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"from",max(0,(int)seqUrlInfo->seqRange.GetFrom() - addToRange));
3842  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"to",seqUrlInfo->seqRange.GetTo() + addToRange);
3843  link_loc = "fromSubj";
3844  //linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"flip",NStr::BoolToString(seqUrlInfo->flip));
3845  }
3846  else {
3847  link_loc = "fromHSP";
3848  linkTitle += " for <@fromHSP@> to <@toHSP@> range";
3849  }
3850  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"link_loc",link_loc);
3851 
3852  string title = (seqUrlInfo->isDbNa) ? "Nucleotide Graphics" : "Protein Graphics";
3853 
3854  string link = s_MapCustomLink(linkUrl,title,seqUrlInfo->accession, "Graphics","lnk" + seqUrlInfo->rid,linkTitle,"spr");
3855 
3856  return link;
3857 }
3858 
3860  bool hspRange)
3861 {
3862  list<string> customLinksList = GetGiLinksList(seqUrlInfo,hspRange); //ONLY FOR genBank seqUrlInfo->seqUrl has "report=genbank"
3863  string graphicLink = GetGraphiscLink(seqUrlInfo,hspRange);
3864  if(!graphicLink.empty()) {
3865  customLinksList.push_back(graphicLink);
3866  }
3867  return customLinksList;
3868 }
3869 
3870 int CAlignFormatUtil::SetCustomLinksTypes(SSeqURLInfo *seqUrlInfo, int customLinkTypesInp)
3871 {
3872  int customLinkTypes = customLinkTypesInp;
3873  if ( seqUrlInfo->gi > ZERO_GI) {
3874  customLinkTypes +=eLinkTypeGenLinks;
3875  }
3876  //else if(NStr::StartsWith(seqUrlInfo->accession,"ti:")) {//seqUrlInfo->seqUrl has "trace.cgi"
3877  else if(NStr::Find(seqUrlInfo->seqUrl,"trace.cgi") != NPOS ){
3878  customLinkTypes +=eLinkTypeTraceLinks;
3879  }
3880  else if(seqUrlInfo->blastType == "sra") {//seqUrlInfo->seqUrl has sra.cgi
3881  customLinkTypes +=eLinkTypeSRALinks;
3882  }
3883  else if(seqUrlInfo->blastType == "snp") {//seqUrlInfo->seqUrl has snp_ref.cgi
3884  customLinkTypes +=eLinkTypeSNPLinks;
3885  }
3886  else if(seqUrlInfo->blastType == "gsfasta") {//seqUrlInfo->seqUrl has GSfasta.cgi
3887  customLinkTypes +=eLinkTypeGSFastaLinks;
3888  }
3889  return customLinkTypes;
3890 }
3891 
3892 
3893 //kCustomLinkTemplate:
3894 //<a href="<@custom_url@>" class="<@custom_cls@>" title="Show <@custom_report_type@> report for <@seqid@>"><@custom_lnk_displ@></a>
3896  const CSeq_id& id,
3897  objects::CScope &scope,
3898  int customLinkTypes)
3899 {
3900  list<string> customLinksList;
3901  string linkUrl,link;
3902 
3903  customLinkTypes = SetCustomLinksTypes(seqUrlInfo, customLinkTypes);
3904  //First show links to GenBank and FASTA, then to Graphics
3905  customLinksList = GetSeqLinksList(seqUrlInfo);
3906  if(customLinkTypes & eLinkTypeTraceLinks) {
3907  linkUrl = seqUrlInfo->seqUrl;
3908  link = s_MapCustomLink(linkUrl,"Trace Archive FASTA",seqUrlInfo->accession, "FASTA","lnk" + seqUrlInfo->rid);
3909  customLinksList.push_back(link);
3910 
3911  linkUrl = NStr::Replace(seqUrlInfo->seqUrl,"fasta","trace");
3912  link = s_MapCustomLink(linkUrl,"Trace Archive Trace",seqUrlInfo->accession, "Trace","lnk" + seqUrlInfo->rid);
3913  customLinksList.push_back(link);
3914 
3915  linkUrl = NStr::Replace(seqUrlInfo->seqUrl,"fasta","quality");
3916  link = s_MapCustomLink(linkUrl,"Trace Archive Quality",seqUrlInfo->accession, "Quality","lnk" + seqUrlInfo->rid);
3917  customLinksList.push_back(link);
3918 
3919  linkUrl = NStr::Replace(seqUrlInfo->seqUrl,"fasta","info");
3920  link = s_MapCustomLink(linkUrl,"Trace Archive Info",seqUrlInfo->accession, "Info","lnk" + seqUrlInfo->rid);
3921  customLinksList.push_back(link);
3922  }
3923  else if(customLinkTypes & eLinkTypeSRALinks) {
3924  linkUrl = seqUrlInfo->seqUrl;
3925  link = s_MapCustomLink(linkUrl,"SRA",seqUrlInfo->accession, "SRA","lnk" + seqUrlInfo->rid);
3926  customLinksList.push_back(link);
3927  }
3928  else if(customLinkTypes & eLinkTypeSNPLinks) {
3929  linkUrl = seqUrlInfo->seqUrl;
3930  link = s_MapCustomLink(linkUrl,"SNP",seqUrlInfo->accession, "SNP","lnk" + seqUrlInfo->rid);
3931  customLinksList.push_back(link);
3932 
3933 
3934  //SNP accession=rs35885954
3935  string rs = NStr::Replace(seqUrlInfo->accession,"rs","");
3936  linkUrl = seqUrlInfo->resourcesUrl + rs + "?report=FLT";
3937 
3938 
3939  link = s_MapCustomLink(linkUrl,"Flatfile",seqUrlInfo->accession, "Flatfile","lnk" + seqUrlInfo->rid);
3940  customLinksList.push_back(link);
3941 
3942  linkUrl = NStr::Replace(linkUrl,"FLT","fasta");
3943  link = s_MapCustomLink(linkUrl,"FASTA",seqUrlInfo->accession, "FASTA","lnk" + seqUrlInfo->rid);
3944  customLinksList.push_back(link);
3945 
3946  linkUrl = NStr::Replace(linkUrl,"fasta","docsum");
3947  link = s_MapCustomLink(linkUrl,"Graphic summary ",seqUrlInfo->accession, "Graphic summary ","lnk" + seqUrlInfo->rid);
3948  customLinksList.push_back(link);
3949  }
3950  else if(customLinkTypes & eLinkTypeGSFastaLinks) {
3951  linkUrl = seqUrlInfo->seqUrl;
3952  link = s_MapCustomLink(linkUrl,"GSFASTA",seqUrlInfo->accession, "GSFASTA","lnk" + seqUrlInfo->rid);
3953  customLinksList.push_back(link);
3954  }
3955  return customLinksList;
3956 }
3957 
3958 
3960  const CSeq_id& id,
3961  objects::CScope &scope)
3962 {
3963  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
3964  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
3965  string linkUrl,link;
3966 
3967 
3968  linkUrl = CAlignFormatUtil::BuildUserUrl(*ids,
3969  ZERO_TAX_ID,
3970  kDownloadUrl,
3971  seqUrlInfo->database,
3972  seqUrlInfo->isDbNa,
3973  seqUrlInfo->rid,
3974  seqUrlInfo->queryNumber,
3975  true);
3976  if(!linkUrl.empty()) {
3977  linkUrl += "&segs="+ seqUrlInfo->segs;
3978  }
3979 
3980  return linkUrl;
3981 }
3982 
3983 
3984 
3986  const CSeq_id& id,
3987  objects::CScope &scope)
3988 
3989 {
3990  string linkUrl;
3991 
3992  int customLinkTypes = SetCustomLinksTypes(seqUrlInfo, CAlignFormatUtil::eLinkTypeDefault);
3993 
3994  if( (customLinkTypes & eLinkTypeGenLinks) || (customLinkTypes & eLinkTypeTraceLinks)){
3995  linkUrl = seqUrlInfo->seqUrl;
3996  linkUrl = NStr::Replace(linkUrl,"genbank","fasta");
3997  }
3998  else if(customLinkTypes & eLinkTypeSNPLinks) {
3999  linkUrl = seqUrlInfo->seqUrl;
4000  vector<string> parts;
4001  //SNP accession=dbSNP:rs35885954
4002  NStr::Split(seqUrlInfo->accession,":rs",parts,NStr::fSplit_MergeDelimiters);
4003  string rs;
4004  if(parts.size() > 1) {
4005  rs = parts[1];
4006  }
4007  linkUrl = seqUrlInfo->resourcesUrl + rs + "?report=fasta";
4008  }
4009  return linkUrl;
4010 }
4011 
4012 string CAlignFormatUtil::GetGeneInfo(TGi giForGeneLookup)
4013 {
4014  string geneSym;
4015  try
4016  {
4019  {
4020 
4021  if (m_GeneInfoReader.get() == 0)
4022  {
4023  m_GeneInfoReader.reset(new CGeneInfoFileReader(false));
4024  }
4025 
4026 
4028  m_GeneInfoReader->GetGeneInfoForGi(giForGeneLookup,infoList);
4029 
4030  CGeneInfoFileReader::TGeneInfoList::const_iterator itInfo = infoList.begin();
4031  for (; itInfo != infoList.end(); itInfo++)
4032  {
4033  CRef<CGeneInfo> info = *itInfo;
4034  geneSym = info->GetSymbol();
4035  break;//???
4036  }
4037  }
4038  }
4039  catch (CException& e)
4040  {
4041  geneSym = "(Gene info extraction error: " + e.GetMsg() + ")";
4042  cerr << "[BLAST FORMATTER EXCEPTION] Gene info extraction error: " << e.GetMsg() << endl;
4043  }
4044  catch (...)
4045  {
4046  geneSym = "(Gene info extraction error)";
4047  cerr << "[BLAST FORMATTER EXCEPTION] Gene info extraction error " << endl;
4048  }
4049  return geneSym;
4050 }
4051 
4052 
4054 {
4055  //determine if the database has gi by looking at the 1st hit.
4056  //Could be wrong but simple for now
4058  CRef<CSeq_align> first_aln = actual_aln_list.Get().front();
4059  const CSeq_id& subject_id = first_aln->GetSeq_id(1);
4060 
4061  if (subject_id.Which() != CSeq_id::e_Local){
4062  const CBioseq_Handle& handleTemp = scope.GetBioseqHandle(subject_id);
4063  if(handleTemp){
4064  TGi giTemp = FindGi(handleTemp.GetBioseqCore()->GetId());
4065  if (giTemp > ZERO_GI || GetTextSeqID((CConstRef<CSeq_id>)&subject_id)) {
4066  type = eDbGi;
4067  } else if (subject_id.Which() == CSeq_id::e_General){
4068  const CDbtag& dtg = subject_id.GetGeneral();
4069  const string& dbName = dtg.GetDb();
4070  if(NStr::CompareNocase(dbName, "TI") == 0){
4071  type = eDbGeneral;
4072  }
4073  }
4074  }
4075  }
4076  return type;
4077 }
4078 
4081 {
4082  int score = 0;
4083  double bits = 0;
4084  double evalue = 0;
4085  int sum_n = 0;
4086  int num_ident = 0;
4087  list<TGi> use_this_gi;
4088 
4089  use_this_gi.clear();
4090  //Gets scores directly from seq align
4091  GetAlnScores(aln, score, bits, evalue, sum_n,
4092  num_ident, use_this_gi);
4093 
4094  unique_ptr<SSeqAlignSetCalcParams> seqSetInfo(new SSeqAlignSetCalcParams);
4095  seqSetInfo->sum_n = sum_n == -1 ? 1:sum_n ;
4096  seqSetInfo->id = &(aln.GetSeq_id(1));
4097  seqSetInfo->use_this_gi = use_this_gi;
4098  seqSetInfo->bit_score = bits;
4099  seqSetInfo->raw_score = score;
4100  seqSetInfo->evalue = evalue;
4101  seqSetInfo->match = num_ident;
4102  seqSetInfo->id = &(aln.GetSeq_id(1));
4103  seqSetInfo->subjRange = CRange<TSeqPos>(0,0);
4104  seqSetInfo->flip = false;
4105 
4106  return seqSetInfo.release();
4107 }
4108 
4109 
4110 
4112 CAlignFormatUtil::GetSeqAlignSetCalcParams(const CSeq_align_set& aln,int queryLength, bool do_translation)
4113 {
4114  int score = 0;
4115  double bits = 0;
4116  double evalue = 0;
4117  int sum_n = 0;
4118  int num_ident = 0;
4119  SSeqAlignSetCalcParams* seqSetInfo = NULL;
4120 
4121  if(aln.Get().empty())
4122  return seqSetInfo;
4123 
4124  seqSetInfo = GetSeqAlignCalcParams(*(aln.Get().front()));
4125 
4126  double total_bits = 0;
4127  double highest_bits = 0;
4128  double lowest_evalue = 0;
4129  int highest_length = 1;
4130  int highest_ident = 0;
4131  //int highest_identity = 0;
4132  double totalLen = 0;
4133 
4134  list<TGi> use_this_gi; // Not used here, but needed for GetAlnScores.
4135 
4136  seqSetInfo->subjRange = CAlignFormatUtil::GetSeqAlignCoverageParams(aln,&seqSetInfo->master_covered_length,&seqSetInfo->flip);
4137  seqSetInfo->percent_coverage = 100*seqSetInfo->master_covered_length/queryLength;
4138 
4139  ITERATE(CSeq_align_set::Tdata, iter, aln.Get()) {
4140  int align_length = CAlignFormatUtil::GetAlignmentLength(**iter, do_translation);
4141  totalLen += align_length;
4142 
4143  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue, sum_n,
4144  num_ident, use_this_gi);
4145  use_this_gi.clear();
4146 
4147  total_bits += bits;
4148 
4149 /// IMPORTANT: based on WB-1175, the trigger for setting the highest identity
4150 /// is not the highest identity value, but the identity value of
4151 /// the alignment with the highest score!
4152 ///
4153 /// if (100*num_ident/align_length > highest_identity) { -- this condition is disabled
4154 
4155  if (bits > highest_bits) { // this is the replacement condition (WB-1175)
4156  highest_length = align_length;
4157  highest_ident = num_ident;
4158 /// highest_identity = 100*num_ident/align_length;
4159  }
4160 
4161  if (bits > highest_bits) {
4162  highest_bits = bits;
4163  lowest_evalue = evalue;
4164  }
4165  }
4166  seqSetInfo->match = highest_ident;
4167  seqSetInfo->align_length = highest_length;
4168  seqSetInfo->percent_identity = CAlignFormatUtil::GetPercentIdentity(seqSetInfo->match, seqSetInfo->align_length);
4169 
4170  seqSetInfo->total_bit_score = total_bits;
4171  seqSetInfo->bit_score = highest_bits;
4172  seqSetInfo->evalue = lowest_evalue;
4173  seqSetInfo->hspNum = static_cast<int>(aln.Size());
4174  seqSetInfo->totalLen = (Int8)totalLen;
4175 
4176  return seqSetInfo;
4177 }
4178 
4180 {
4181  int score = 0;
4182  double bits = 0;
4183  double evalue = 0;
4184  int sum_n = 0;
4185  int num_ident = 0;
4186 
4187  if(aln.Get().empty())
4188  return -1;
4189 
4190  double highest_bits = 0;
4191  int highest_length = 1;
4192  int highest_ident = 0;
4193 
4194  list<TGi> use_this_gi; // Not used here, but needed for GetAlnScores.
4195 
4196  ITERATE(CSeq_align_set::Tdata, iter, aln.Get()) {
4197  int align_length = CAlignFormatUtil::GetAlignmentLength(**iter, do_translation);
4198 
4199  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue, sum_n,
4200  num_ident, use_this_gi);
4201 
4202 
4203 /// IMPORTANT: based on WB-1175, the trigger for setting the highest identity
4204 /// is not the highest identity value, but the identity value of
4205 /// the alignment with the highest score!
4206 ///
4207 /// if (100*num_ident/align_length > highest_identity) { -- this condition is disabled
4208 
4209  if (bits > highest_bits) { // this is the replacement condition (WB-1175)
4210  highest_length = align_length;
4211  highest_ident = num_ident;
4212 /// highest_identity = 100*num_ident/align_length;
4213  highest_bits = bits;
4214  }
4215  }
4216 
4217  double percent_identity = CAlignFormatUtil::GetPercentIdentity(highest_ident, highest_length);
4218  return percent_identity;
4219 }
4220 
4221 
4222 template<class container> bool
4223 s_GetBlastScore(const container& scoreList,
4224  double& evalue,
4225  double& bitScore,
4226  double& totalBitScore,
4227  int& percentCoverage,
4228  double& percentIdent,
4229  int& hspNum,
4230  double& totalLen,
4231  int &rawScore,
4232  int& sum_n,
4233  list<TGi>& use_this_gi)
4234 {
4235  const string k_GiPrefix = "gi:";
4236  bool hasScore = false;
4237 
4238 
4239  ITERATE (typename container, iter, scoreList) {
4240  const CObject_id& id=(*iter)->GetId();
4241  if (id.IsStr()) {
4242  hasScore = true;
4243  if (id.GetStr()=="seq_evalue") {
4244  evalue = (*iter)->GetValue().GetReal();
4245  } else if (id.GetStr()=="seq_bit_score"){
4246  bitScore = (*iter)->GetValue().GetReal();
4247  } else if (id.GetStr()=="seq_total_bit_score"){
4248  totalBitScore = (*iter)->GetValue().GetReal();
4249  } else if (id.GetStr()=="seq_percent_coverage"){
4250  percentCoverage = (*iter)->GetValue().GetInt();
4251  } else if (id.GetStr()=="seq_percent_identity" && (*iter)->GetValue().IsInt()){
4252  percentIdent = (*iter)->GetValue().GetInt();
4253  } else if (id.GetStr()=="seq_percent_identity" && (*iter)->GetValue().IsReal()){
4254  percentIdent = (*iter)->GetValue().GetReal();
4255  } else if (id.GetStr()=="seq_hspnum"){
4256  hspNum = (*iter)->GetValue().GetInt();
4257  } else if (id.GetStr()=="seq_align_totlen"){
4258  totalLen = (*iter)->GetValue().GetReal();
4259  } else if (id.GetStr()=="score"){
4260  rawScore = (*iter)->GetValue().GetInt();
4261  } else if (id.GetStr()=="use_this_gi"){
4262  Uint4 gi_v = (Uint4) ((*iter)->GetValue().GetInt());
4263  use_this_gi.push_back(GI_FROM(Uint4, gi_v));
4264  } else if (id.GetStr()=="sum_n"){
4265  sum_n = (*iter)->GetValue().GetInt();
4266  }
4267  else if(NStr::StartsWith(id.GetStr(),k_GiPrefix)) { //will be used when switch to 64bit GIs
4268  string strGi = NStr::Replace(id.GetStr(),k_GiPrefix,"");
4269  TGi gi = NStr::StringToNumeric<TGi>(strGi);
4270  use_this_gi.push_back(gi);
4271  }
4272  }
4273  }
4274  return hasScore;
4275 }
4276 
4277 
4278 void CAlignFormatUtil::GetUseThisSequence(const CSeq_align& aln,list<TGi>& use_this_gi)
4279 
4280 {
4281  const string k_GiPrefix = "gi:";
4282 
4283  if(!aln.CanGetExt() || aln.GetExt().size() == 0) return;
4284  const CUser_object &user = *(aln.GetExt().front());
4285 
4286  if (user.IsSetType() && user.GetType().IsStr() && user.GetType().GetStr() == "use_this_seqid" && user.IsSetData()) {
4287  const CUser_object::TData& fields = user.GetData();
4288  for (CUser_object::TData::const_iterator fit = fields.begin(); fit != fields.end(); ++fit) {
4289  const CUser_field& field = **fit;
4290 
4291  if (field.IsSetLabel() && field.GetLabel().IsStr() && field.GetLabel().GetStr() == "SEQIDS" &&
4292  field.IsSetData() && field.GetData().IsStrs()) {
4293  const CUser_field::C_Data::TStrs& strs = field.GetData().GetStrs();
4294  ITERATE(CUser_field::TData::TStrs, acc_iter, strs) {
4295  if(NStr::StartsWith(*acc_iter,k_GiPrefix)) { //will be used when switch to 64bit GIs
4296  string strGi = NStr::Replace(*acc_iter,k_GiPrefix,"");
4297  TGi gi = NStr::StringToNumeric<TGi>(strGi);
4298  use_this_gi.push_back(gi);
4299  }
4300  }
4301  }
4302  }
4303  }
4304 }
4305 
4306 
4307 /*use_this_seq will contain gi:nnnnnn or seqid:ssssss string list*/
4308 void CAlignFormatUtil::GetUseThisSequence(const CSeq_align& aln,list<string>& use_this_seq)
4309 
4310 {
4311  if(!aln.CanGetExt() || aln.GetExt().size() == 0) return;
4312  const CUser_object &user = *(aln.GetExt().front());
4313 
4314  if (user.IsSetType() && user.GetType().IsStr() && user.GetType().GetStr() == "use_this_seqid" && user.IsSetData()) {
4315  const CUser_object::TData& fields = user.GetData();
4316  for (CUser_object::TData::const_iterator fit = fields.begin(); fit != fields.end(); ++fit) {
4317  const CUser_field& field = **fit;
4318 
4319  if (field.IsSetLabel() && field.GetLabel().IsStr() && field.GetLabel().GetStr() == "SEQIDS" &&
4320  field.IsSetData() && field.GetData().IsStrs()) {
4321  const CUser_field::C_Data::TStrs& strs = field.GetData().GetStrs();
4322  ITERATE(CUser_field::TData::TStrs, acc_iter, strs) {
4323  use_this_seq.push_back(*acc_iter);
4324  }
4325  }
4326  }
4327  }
4328 }
4329 
4330 
4331 
4334 {
4335  bool hasScore = false;
4336  double evalue = -1;
4337  double bitScore = -1;
4338  double totalBitScore = -1;
4339  int percentCoverage = -1;
4340  double percentIdent = -1;
4341  int hspNum = 0;
4342  double totalLen = 0;
4343  int rawScore = -1;
4344  int sum_n = -1;
4345  list<TGi> use_this_gi;
4346  list<string> use_this_seq;
4347 
4348  const CSeq_align& aln = *(alnSet.Get().front());
4349 
4350  hasScore = s_GetBlastScore(aln.GetScore(),evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4351 
4352  if(!hasScore){
4353  const CSeq_align::TSegs& seg = aln.GetSegs();
4354  if(seg.Which() == CSeq_align::C_Segs::e_Std){
4355  s_GetBlastScore(seg.GetStd().front()->GetScores(),
4356  evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4357  } else if (seg.Which() == CSeq_align::C_Segs::e_Dendiag){
4358  s_GetBlastScore(seg.GetDendiag().front()->GetScores(),
4359  evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4360  } else if (seg.Which() == CSeq_align::C_Segs::e_Denseg){
4362  evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4363  }
4364  }
4365 
4366  if(use_this_gi.size() == 0) {
4367  GetUseThisSequence(aln,use_this_seq);
4368  }
4369  else {
4370  use_this_seq = s_NumGiToStringGiList(use_this_gi);//for backward compatability
4371  }
4372 
4373 
4374  unique_ptr<SSeqAlignSetCalcParams> seqSetInfo(new SSeqAlignSetCalcParams);
4375  seqSetInfo->evalue = evalue;
4376  seqSetInfo->bit_score = bitScore;
4377  seqSetInfo->total_bit_score = totalBitScore;
4378  seqSetInfo->percent_coverage = percentCoverage;
4379  seqSetInfo->percent_identity = percentIdent;
4380  seqSetInfo->hspNum = hspNum;
4381  seqSetInfo->totalLen = (Int8)totalLen;
4382 
4383  seqSetInfo->sum_n = sum_n == -1 ? 1:sum_n ;
4384  seqSetInfo->id = &(aln.GetSeq_id(1));
4385  seqSetInfo->use_this_gi = StringGiToNumGiList(use_this_seq);//for backward compatability
4386  seqSetInfo->use_this_seq = use_this_seq;
4387  seqSetInfo->raw_score = rawScore;//not used
4388 
4389  seqSetInfo->subjRange = CRange<TSeqPos>(0,0);
4390  seqSetInfo->flip = false;
4391 
4392  return seqSetInfo.release();
4393 }
4394 
4396  const CSeq_id& aln_id,
4397  list<TGi>& use_this_gi,
4398  TGi& gi)
4399 
4400 {
4401  TTaxId taxid = ZERO_TAX_ID;
4402  CRef<CSeq_id> wid = CAlignFormatUtil::GetDisplayIds(handle, aln_id, use_this_gi, gi, taxid);
4403  return wid;
4404 }
4405 
4407  const CSeq_id& aln_id,
4408  list<TGi>& use_this_gi,
4409  TGi& gi,
4410  TTaxId& taxid)
4411 
4412 {
4414  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
4415 
4416  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
4417  CRef<CSeq_id> wid;
4418 
4419  gi = ZERO_GI;
4420  taxid = ZERO_TAX_ID;
4421  if(bdl.empty()){
4422  wid = FindBestChoice(*ids, CSeq_id::WorstRank);
4423  gi = FindGi(*ids);
4424  } else {
4425  bool found = false;
4426  for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
4427  iter != bdl.end(); iter++){
4428  const CBioseq::TId* cur_id = &((*iter)->GetSeqid());
4429  TGi cur_gi = FindGi(*cur_id);
4430  wid = FindBestChoice(*cur_id, CSeq_id::WorstRank);
4431  if ((*iter)->IsSetTaxid() && (*iter)->CanGetTaxid()){
4432  taxid = (*iter)->GetTaxid();
4433  }
4434  if (!use_this_gi.empty()) {
4435  ITERATE(list<TGi>, iter_gi, use_this_gi){
4436  if(cur_gi == *iter_gi){
4437  found = true;
4438  break;
4439  }
4440  }
4441  } else {
4442  ITERATE(CBioseq::TId, iter_id, *cur_id) {
4443  if ((*iter_id)->Match(aln_id)
4444  || (aln_id.IsGeneral() && aln_id.GetGeneral().CanGetDb() &&
4445  (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4446  aln_id.GetGeneral().GetDb() == (*iter_id)->GetGeneral().GetDb())) {
4447  found = true;
4448  }
4449  }
4450  }
4451  if(found){
4452  gi = cur_gi;
4453  break;
4454  }
4455  }
4456  }
4457  return wid;
4458 }
4459 
4460 
4461 
4462 //removes "gi:" or "seqid:" prefix from gi:nnnnnnn or seqid:nnnnn
4463 static string s_UseThisSeqToTextSeqID(string use_this_seqid, bool &isGi)
4464 {
4465  const string k_GiPrefix = "gi:";
4466  const string k_SeqIDPrefix = "seqid:";
4467  isGi = false;
4468  string textSeqid;
4469  if(NStr::StartsWith(use_this_seqid,k_GiPrefix)) {
4470  textSeqid = NStr::Replace(use_this_seqid,k_GiPrefix,"");
4471  isGi = true;
4472  }
4473  else if(NStr::StartsWith(use_this_seqid,k_SeqIDPrefix)) {
4474  textSeqid = NStr::Replace(use_this_seqid,k_SeqIDPrefix,"");
4475  }
4476  else {//assume no prefix - gi
4477  if(NStr::StringToInt8(use_this_seqid,NStr::fConvErr_NoThrow)) {
4478  isGi = true;
4479  }
4480  }
4481  return textSeqid;
4482 }
4483 
4484 
4485 
4486 //assume that we have EITHER gi: OR seqid: in the list
4487 bool CAlignFormatUtil::IsGiList(list<string> &use_this_seq)
4488 {
4489  bool isGi = false;
4490  ITERATE(list<string>, iter_seq, use_this_seq){
4491  s_UseThisSeqToTextSeqID( *iter_seq, isGi);
4492  break;
4493  }
4494  return isGi;
4495 }
4496 
4497 list<TGi> CAlignFormatUtil::StringGiToNumGiList(list<string> &use_this_seq)
4498 {
4499  list<TGi> use_this_gi;
4500  ITERATE(list<string>, iter_seq, use_this_seq){
4501  bool isGi = false;
4502  string strGI = s_UseThisSeqToTextSeqID( *iter_seq, isGi);
4503  if(isGi) use_this_gi.push_back(NStr::StringToNumeric<TGi>(strGI));
4504  }
4505  return use_this_gi;
4506 }
4507 
4508 
4509 
4510 bool CAlignFormatUtil::MatchSeqInSeqList(TGi cur_gi, CRef<CSeq_id> &seqID, list<string> &use_this_seq,bool *isGiList)
4511 {
4512  bool found = false;
4513  bool isGi = false;
4514 
4515  string curSeqID = CAlignFormatUtil::GetLabel(seqID,true); //uses GetSeqIdString(true)
4516  ITERATE(list<string>, iter_seq, use_this_seq){
4517  isGi = false;
4518  string useThisSeq = s_UseThisSeqToTextSeqID(*iter_seq, isGi);
4519  if((isGi && cur_gi == NStr::StringToNumeric<TGi>((useThisSeq))) || (!isGi && curSeqID == useThisSeq)){
4520  found = true;
4521  break;
4522  }
4523  }
4524  if(isGiList) *isGiList = isGi;
4525  return found;
4526 }
4527 
4528 
4529 bool CAlignFormatUtil::MatchSeqInSeqList(CConstRef<CSeq_id> &alnSeqID, list<string> &use_this_seq,vector <string> &seqList)
4530 {
4531  bool isGi = false;
4532  string curSeqID;
4533  if(alnSeqID->IsGi()) {
4534  curSeqID = NStr::NumericToString(alnSeqID->GetGi());
4535  }
4536  else {
4537  curSeqID = CAlignFormatUtil::GetLabel(alnSeqID,true); //uses GetSeqIdString(true)
4538  }
4539  //match with seqid in seq_align
4540  bool found = std::find(seqList.begin(), seqList.end(), curSeqID) != seqList.end();
4541  if(!found) {
4542  //match in use_this_seq list
4543  ITERATE(list<string>, iter_seq, use_this_seq){
4544  string useThisSeq = s_UseThisSeqToTextSeqID(*iter_seq, isGi);
4545  found = std::find(seqList.begin(), seqList.end(), useThisSeq) != seqList.end();
4546  if(found){
4547  break;
4548  }
4549  }
4550  }
4551  return found;
4552 }
4553 
4554 bool CAlignFormatUtil::MatchSeqInUseThisSeqList(list<string> &use_this_seq, string textSeqIDToMatch)
4555 {
4556  bool has_match = false;
4557 
4558  ITERATE(list<string>, iter_seq, use_this_seq) {
4559  bool isGi;
4560  string useThisSeq = s_UseThisSeqToTextSeqID(*iter_seq, isGi);
4561  if(useThisSeq == textSeqIDToMatch) {
4562  has_match = true;
4563  break;
4564  }
4565  }
4566  return has_match;
4567 }
4568 
4570 {
4571  list<string> new_use_this_seq;
4572  bool hasAccType = false;
4573  bool isGI = false;
4574 
4575  ITERATE(list<string>, iter_seq, use_this_seq) {
4576  string useThisSeq = s_UseThisSeqToTextSeqID(*iter_seq, isGI);
4577  CSeq_id::EAccessionInfo useThisSeqAccType = CSeq_id::IdentifyAccession (useThisSeq);
4578  if(useThisSeqAccType != accessionType) {
4579  new_use_this_seq.push_back(useThisSeq);
4580  }
4581  else {
4582  hasAccType = true;
4583  }
4584  }
4585  use_this_seq = new_use_this_seq;
4586  return hasAccType;
4587 }
4588 
4590  const CSeq_id& aln_id,
4591  list<string>& use_this_seq,
4592  TGi *gi,
4593  TTaxId *taxid,
4594  string *textSeqID)
4595 
4596 {
4598  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
4599 
4600  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
4601  CRef<CSeq_id> wid;
4602 
4603  if(gi) *gi = ZERO_GI;
4604  if(taxid) *taxid = ZERO_TAX_ID;
4605  if(bdl.empty()){
4606  wid = FindBestChoice(*ids, CSeq_id::WorstRank);
4607  if(gi) *gi = FindGi(*ids);
4608  if(textSeqID) *textSeqID = GetLabel(wid,true);//uses GetSeqIdString(true)
4609  } else {
4610  bool found = false;
4611  for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
4612  iter != bdl.end(); iter++){
4613  const CBioseq::TId* cur_id = &((*iter)->GetSeqid());
4614  TGi cur_gi = FindGi(*cur_id);
4615  wid = FindBestChoice(*cur_id, CSeq_id::WorstRank);
4616  string curSeqID = GetLabel(wid,true);//uses GetSeqIdString(true)
4617  if (taxid && (*iter)->IsSetTaxid() && (*iter)->CanGetTaxid()){
4618  *taxid = (*iter)->GetTaxid();
4619  }
4620  if (!use_this_seq.empty()) {
4621  ITERATE(list<string>, iter_seq, use_this_seq){
4622  bool isGi = false;
4623  string useThisSeq = s_UseThisSeqToTextSeqID( *iter_seq, isGi);
4624  if((isGi && cur_gi == NStr::StringToNumeric<TGi>((useThisSeq))) || (!isGi && curSeqID == useThisSeq)){
4625  found = true;
4626  break;
4627  }
4628  }
4629  } else {
4630  ITERATE(CBioseq::TId, iter_id, *cur_id) {
4631  if ((*iter_id)->Match(aln_id)
4632  || (aln_id.IsGeneral() && aln_id.GetGeneral().CanGetDb() &&
4633  (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4634  aln_id.GetGeneral().GetDb() == (*iter_id)->GetGeneral().GetDb())) {
4635  found = true;
4636  }
4637  }
4638  }
4639  if(found){
4640  if(gi) *gi = cur_gi;
4641  if(textSeqID) *textSeqID = curSeqID;
4642  break;
4643  }
4644  }
4645  }
4646 
4647  return wid;
4648 }
4649 
4650 
4652  const CSeq_id& aln_id,
4653  list<TGi>& use_this_gi)
4654 
4655 
4656 {
4657  TGi gi = ZERO_GI;
4658 
4659  if(!bdl.empty()){
4660  bool found = false;
4661  for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
4662  iter != bdl.end(); iter++){
4663  const CBioseq::TId* cur_id = &((*iter)->GetSeqid());
4664  TGi cur_gi = FindGi(*cur_id);
4665  if (!use_this_gi.empty()) {
4666  ITERATE(list<TGi>, iter_gi, use_this_gi){
4667  if(cur_gi == *iter_gi){
4668  found = true;
4669  break;
4670  }
4671  }
4672  } else {
4673  ITERATE(CBioseq::TId, iter_id, *cur_id) {
4674  if ((*iter_id)->Match(aln_id)
4675  || (aln_id.IsGeneral() && aln_id.GetGeneral().CanGetDb() &&
4676  (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4677  aln_id.GetGeneral().GetDb() == (*iter_id)->GetGeneral().GetDb())) {
4678  found = true;
4679  }
4680  }
4681  }
4682  if(found){
4683  gi = cur_gi;
4684  break;
4685  }
4686  }
4687  }
4688  return gi;
4689 }
4690 
4692 {
4693  if(rng.GetFrom() > rng.GetTo()){
4694  rng.Set(rng.GetTo(), rng.GetFrom());
4695  }
4696  //cerr << "Query Rng: " << rng.GetFrom() << "-" << rng.GetTo() << endl;
4697  return rng;
4698 }
4699 
4701 {
4702  if(alnset.IsEmpty())
4703  return 0;
4704 
4705  bool isDenDiag = (alnset.Get().front()->GetSegs().Which() == CSeq_align::C_Segs::e_Dendiag) ?
4706  true : false;
4707 
4708  list<CRef<CSeq_align> >::iterator mItr=alnset.Set().begin();
4709  CRangeCollection<TSeqPos> subj_rng_coll((*mItr)->GetSeqRange(1));
4710  CRange<TSeqPos> q_rng((*mItr)->GetSeqRange(0));
4711  /*
4712  cerr << MSerial_AsnText << **mItr;
4713  cerr << (*mItr)->GetSeqRange(0).GetFrom() << endl;
4714  cerr << (*mItr)->GetSeqRange(0).GetTo() << endl;
4715  cerr << (*mItr)->GetSeqRange(0).GetToOpen() << endl;
4716  cerr << (*mItr)->GetSeqRange(1).GetFrom() << endl;
4717  cerr << (*mItr)->GetSeqRange(1).GetTo() << endl;
4718  cerr << (*mItr)->GetSeqRange(1).GetToOpen() << endl;
4719  */
4720  CRangeCollection<TSeqPos> query_rng_coll(s_FixMinusStrandRange(q_rng));
4721  ++mItr;
4722  for(;mItr != alnset.Set().end(); ++mItr) {
4723  const CRange<TSeqPos> align_subj_rng((*mItr)->GetSeqRange(1));
4724  // subject range should always be on the positive strand
4725  ASSERT(align_subj_rng.GetTo() > align_subj_rng.GetFrom());
4726  CRangeCollection<TSeqPos> coll(align_subj_rng);
4727  coll.Subtract(subj_rng_coll);
4728 
4729  if (coll.empty())
4730  continue;
4731 
4732  if(coll[0] == align_subj_rng) {
4733  CRange<TSeqPos> query_rng ((*mItr)->GetSeqRange(0));
4734  //cerr << "Subj Rng :" << align_subj_rng.GetFrom() << "-" << align_subj_rng.GetTo() << endl;
4735  query_rng_coll += s_FixMinusStrandRange(query_rng);
4736  subj_rng_coll += align_subj_rng;
4737  }
4738  else {
4739  ITERATE (CRangeCollection<TSeqPos>, uItr, coll) {
4740  CRange<TSeqPos> query_rng;
4741  const CRange<TSeqPos> & subj_rng = (*uItr);
4742  CRef<CSeq_align> densegAln
4743  = isDenDiag ? CAlignFormatUtil::CreateDensegFromDendiag(**mItr) : (*mItr);
4744 
4745  CAlnMap map(densegAln->GetSegs().GetDenseg());
4746  TSignedSeqPos subj_aln_start = map.GetAlnPosFromSeqPos(1,subj_rng.GetFrom());
4747  TSignedSeqPos subj_aln_end = map.GetAlnPosFromSeqPos(1,subj_rng.GetTo());
4748  query_rng.SetFrom(map.GetSeqPosFromAlnPos(0,subj_aln_start));
4749  query_rng.SetTo(map.GetSeqPosFromAlnPos(0,subj_aln_end));
4750 
4751  //cerr << "Subj Rng :" << subj_rng.GetFrom() << "-" << subj_rng.GetTo() << endl;
4752  query_rng_coll += s_FixMinusStrandRange(query_rng);
4753  subj_rng_coll += subj_rng;
4754  }
4755  }
4756  }
4757 
4758  return query_rng_coll.GetCoveredLength();
4759 }
4760 
4761 ///return id type specified or null ref
4762 ///@param ids: the input ids
4763 ///@param choice: id of choice
4764 ///@return: the id with specified type
4765 ///
4767  CSeq_id::E_Choice choice)
4768 {
4769  CRef<CSeq_id> cid;
4770 
4771  for (CBioseq::TId::const_iterator iter = ids.begin(); iter != ids.end();
4772  iter ++){
4773  if ((*iter)->Which() == choice){
4774  cid = *iter;
4775  break;
4776  }
4777  }
4778 
4779  return cid;
4780 }
4781 
4782 ///return gi from id list
4783 ///@param ids: the input ids
4784 ///@return: the gi if found
4785 ///
4787 {
4788  TGi gi = ZERO_GI;
4790  if (!(id.Empty())){
4791  return id->GetGi();
4792  }
4793  return gi;
4794 }
4795 
4797 {
4798  CSeqdesc_CI desc_t(bh, CSeqdesc::e_Title);
4799  string t = kEmptyStr;
4800  for (;desc_t; ++desc_t) {
4801  t += desc_t->GetTitle() + " ";
4802  }
4803  return t;
4804 }
4805 
4807 {
4808  string retval;
4809 
4810  if (id.IsGi() || id.IsPrf() || id.IsPir()) {
4811  retval = id.AsFastaString();
4812  }
4813  else {
4814  retval = id.GetSeqIdString(true);
4815  }
4816 
4817  return retval;
4818 }
4819 
4820 
4821 bool CAlignFormatUtil::GetTextSeqID(CConstRef<CSeq_id> seqID, string *textSeqID)
4822 {
4823  bool hasTextSeqID = true;
4824 
4825  const CTextseq_id* text_id = seqID->GetTextseq_Id();
4826  //returns non zero if e_Genbank,e_Embl,e_Ddbj,e_Pir,e_Swissprot,case e_Other,e_Prf,case e_Tpg,e_Tpe,case e_Tpd,case e_Gpipe, e_Named_annot_track
4827  if(!text_id) { //check for pdb and pat
4828  if(!(seqID->Which() == CSeq_id::e_Pdb) && !(seqID->Which() == CSeq_id::e_Patent) && !(seqID->Which() == CSeq_id::e_Gi)) {
4829  hasTextSeqID = false;
4830  }
4831  }
4832 
4833  if(hasTextSeqID && textSeqID) {
4834  seqID->GetLabel(textSeqID, CSeq_id::eContent);
4835  }
4836  return hasTextSeqID;
4837 }
4838 
4839 
4840 
4841 bool CAlignFormatUtil::GetTextSeqID(const list<CRef<CSeq_id> > & ids, string *textSeqID)
4842 {
4843  bool hasTextSeqID = false;
4844 
4845  CConstRef<CSeq_id> seqID = FindTextseq_id(ids);
4846  //returns non zero if e_Genbank,e_Embl,e_Ddbj,e_Pir,e_Swissprot,case e_Other,e_Prf,case e_Tpg,e_Tpe,case e_Tpd,case e_Gpipe, e_Named_annot_track
4847  if(seqID.Empty()) {
4848  seqID = GetSeq_idByType(ids, CSeq_id::e_Pdb);
4849  }
4850  if(seqID.Empty()) {
4851  seqID = GetSeq_idByType(ids, CSeq_id::e_Patent);
4852  }
4853  if(!seqID.Empty()) {
4854  hasTextSeqID = true;
4855  if(textSeqID) seqID->GetLabel(textSeqID, CSeq_id::eContent);
4856  }
4857  return hasTextSeqID;
4858 }
4859 
4861  vector <string> &seqList)
4862 {
4863  CConstRef<CSeq_id> previous_id, subid;
4864  list<string> use_this_seq;
4865  bool match = false;
4866 
4867  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
4868  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
4869  subid = &((*iter)->GetSeq_id(1));
4870  if(previous_id.Empty() || !subid->Match(*previous_id)){
4871  use_this_seq.clear();
4872  CAlignFormatUtil::GetUseThisSequence(**iter,use_this_seq);
4873  match = MatchSeqInSeqList(subid, use_this_seq,seqList);
4874  }
4875 
4876  previous_id = subid;
4877  if(match) {
4878  new_aln->Set().push_back(*iter);
4879  }
4880  }
4881  return new_aln;
4882 }
4883 
4884 
4885 END_SCOPE(align_format)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static void s_CalcAlnPercentIdent(const CRef< CSeq_align_set > &info1, const CRef< CSeq_align_set > &info2, double &percentIdent1, double &percentIdent2)
static string s_GetTaxName(TTaxId taxid)
static bool s_ProcessAlignSet(const CSeq_align_set &alnset, list< CRange< TSeqPos > > &query_list, list< CRange< TSeqPos > > &subject_list)
static CRef< CSeq_id > s_GetSeqIdByType(const list< CRef< CSeq_id > > &ids, CSeq_id::E_Choice choice)
return id type specified or null ref
static string s_MapURLLink(string urlTemplate, CAlignFormatUtil::SSeqURLInfo *seqUrlInfo, const CBioseq::TId &ids)
static list< CRange< TSeqPos > > s_MergeRangeList(list< CRange< TSeqPos > > &source)
static void s_AddLinkoutInfo(map< int, vector< CBioseq::TId > > &linkout_map, int linkout, CBioseq::TId &cur_id)
static bool s_GetSRASeqMetadata(const CBioseq::TId &ids, string &strRun, string &strSpotId, string &strReadIndex)
void s_AddOtherRelatedInfoLinks(CBioseq::TId &cur_id, const string &rid, bool is_na, bool for_alignment, int cur_align, list< string > &linkout_list)
bool kTranslation
static list< string > s_NumGiToStringGiList(list< TGi > use_this_gi)
static CRange< TSeqPos > & s_FixMinusStrandRange(CRange< TSeqPos > &rng)
static bool s_FillDbInfoLocally(const string &dbname, CAlignFormatUtil::SDbInfo &info, int dbfilt_algorithm)
Initialize database statistics with data obtained from local BLAST databases.
CRef< CScope > kScope
bool s_GetBlastScore(const container &scoreList, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< TGi > &use_this_gi, int &comp_adj_method)
Get blast score information.
static list< string > s_GetLinkoutUrl(int linkout, string giList, string labelList, TGi first_gi, CAlignFormatUtil::SLinkoutInfo &linkoutInfo, bool textLink=true)
string s_GetBestIDForURL(CBioseq::TId &ids)
static bool s_isAlnInFilteringRange(double evalue, double percentIdent, int queryCover, double evalueLow, double evalueHigh, double percentIdentLow, double percentIdentHigh, int queryCoverLow, int queryCoverHigh)
USING_SCOPE(ncbi)
static string s_MapCommonUrlParams(string urlTemplate, CAlignFormatUtil::SSeqURLInfo *seqUrlInfo)
static int s_LinkLetterToType(string linkLetter)
static double adjustPercentIdentToDisplayValue(double value)
static string s_MapCustomLink(string linkUrl, string reportType, string accession, string linkText, string linktrg, string linkTitle=kCustomLinkTitle, string linkCls="")
const char k_PSymbol[ePMatrixSize+1]
Residues.
static string s_UseThisSeqToTextSeqID(string use_this_seqid, bool &isGi)
static string s_MapLinkoutGenParam(string &url_link_tmpl, const string &rid, string giList, bool for_alignment, int cur_align, string &label, string &lnk_displ, string lnk_tl_info="", string lnk_title="")
static list< string > s_GetFullLinkoutUrl(CBioseq::TId &cur_id, CAlignFormatUtil::SLinkoutInfo &linkoutInfo, map< int, vector< CBioseq::TId > > &linkout_map, bool getIdentProteins)
static bool FromRangeAscendingSort(CRange< TSeqPos > const &info1, CRange< TSeqPos > const &info2)
static const char kBioAssayProtImg[]
static const char kSeqViewerUrl[]
static const char kStructureImg[]
const int k_NumAsciiChar
Number of ASCII characters for populating matrix columns.
static const string kMapviwerDispl
static const char kGenericLinkMouseoverTmpl[]
static const string kSeqViewerParams
static const char kEntrezTMUrl[]
static const char kMapviwerUrl[]
mapviewer linkout
static const char kCustomLinkTemplate[]
static const string kGenomeDataViewerDispl
static const char kStructureAlphaFoldUrl[]
static const char kDownloadUrl[]
dumpgnl
static const string kGeneDispl
static const string kUnigeneDispl
static const string kGeoDispl
static const char kClassInfo[]
blast related url
@ ePMatrixSize
static const char kGeoImg[]
static const char kGeneTerm[]
static const char kIdenticalProteinsUrl[]
static const string kIdenticalProteinsDispl
static const char kGeneImg[]
static const string kReprMicrobialGenomesDispl
static const string kBioAssayDispl
static const char kReprMicrobialGenomesImg[]
static const char kGenomeDataViewerImg[]
static const string kMapviewBlastHitParams
static const string kMapviewBlastHitUrl
mapviewer linkout
static const char kUnigeneImg[]
static const char kStructureUrl[]
structure
static const char kBioAssayNucImg[]
static const char kGenericLinkTemplate[]
static const char kMapviwerImg[]
static const char kTraceUrl[]
trace db
static const string kStructureDispl
static const char kCustomLinkTitle[]
static const char kSeqViewerUrlNonGi[]
Declares the CBlastServices class.
static string GetProtocol(void)
static CRef< objects::CSeq_align_set > FilterSeqalignBySeqList(objects::CSeq_align_set &source_aln, vector< string > &seqList)
function for Filtering seqalign by specific subjects
static void PrintPhiInfo(int num_patterns, const string &pattern, double prob, vector< int > &offsets, CNcbiOstream &out)
Prints out PHI-BLAST info for header (or footer)
static CAlignFormatUtil::SSeqAlignSetCalcParams * GetSeqAlignSetCalcParamsFromASN(const objects::CSeq_align_set &alnSet)
static string GetIDUrl(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL for seqid.
static int GetAlignmentLength(const objects::CSeq_align &aln, bool do_translation)
get the alignment length
static bool IsWGSAccession(string &accession, string &wgsProj)
Check if accession is WGS.
static void PruneSeqalign(const objects::CSeq_align_set &source_aln, objects::CSeq_align_set &new_aln, unsigned int num=static_cast< unsigned int >(kDfltArgNumAlignments))
Fill new alignset containing the specified number of alignments with unique slave seqids.
static int GetUniqSeqCoverage(objects::CSeq_align_set &alnset)
Calculate the uniq subject query coverage range (blastn only)
static void InitConfig()
static CAlignFormatUtil::SSeqAlignSetCalcParams * GetSeqAlignSetCalcParams(const objects::CSeq_align_set &aln, int queryLength, bool do_translation)
static void SortHitByMolecularType(list< CRef< objects::CSeq_align_set > > &seqalign_hit_list, objects::CScope &scope, ILinkoutDB *linkoutdb, const string &mv_build_name)
sort a list of seqalign set by molecular type
static void AcknowledgeBlastQuery(const objects::CBioseq &cbs, size_t line_len, CNcbiOstream &out, bool believe_query, bool html, bool tabular=false, const string &rid=kEmptyStr)
Print out blast query info.
static bool IsMixedDatabase(const objects::CSeq_align_set &alnset, objects::CScope &scope, ILinkoutDB *linkoutdb, const string &mv_build_name)
static list< CRef< objects::CSeq_align_set > > SortOneSeqalignForSortableFormat(const objects::CSeq_align_set &source, bool nuc_to_nuc_translation, int hit_sort, int hsp_sort)
static const char kNoHitsFound[]
The string containing the message that no hits were found.
static void GetAsciiProteinMatrix(const char *matrix_name, CNcbiMatrix< int > &retval)
Retrieve a scoring matrix for the provided matrix name.
static list< string > GetFullLinkoutUrl(const list< CRef< objects::CBlast_def_line > > &bdl, const string &rid, const string &cdd_rid, const string &entrez_term, bool is_na, bool structure_linkout_as_group, bool for_alignment, int cur_align, string &linkoutOrder, TTaxId taxid, string &database, int query_number, string &user_url, string &preComputedResID, ILinkoutDB *linkoutdb, const string &mv_build_name)
Get linkout membership for for the list of blast deflines.
static void x_AcknowledgeBlastSequence(const objects::CBioseq &cbs, size_t line_len, CNcbiOstream &out, bool believe_query, bool html, const string &label, bool tabular, const string &rid)
static void PrintDbReport(const vector< SDbInfo > &dbinfo_list, size_t line_length, CNcbiOstream &out, bool top=false)
Print out blast database information.
static void GetAlnScores(const objects::CSeq_align &aln, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< TGi > &use_this_gi)
Extract score info from blast alingment.
static void BuildFormatQueryString(CCgiContext &ctx, string &cgi_query)
static string GetSeqDescrString(const objects::CBioseq &cbs)
Returns a full description for a Bioseq, concatenating all available titles.
@ eAddEOLAtLineEnd
add EOL at the beginning of the string
@ eAddEOLAtLineStart
add spaces at the end of the string
@ eSpacePosAtLineEnd
add spaces at the begining of the string
static list< TGi > StringGiToNumGiList(list< string > &use_this_seq)
Convert if string gi list to TGi list.
static string AddSpaces(string paramVal, size_t maxParamLength, int spacesFormatFlag=eSpacePosToCenter)
Calculate the number of spaces and add them to paramVal.
static CRef< objects::CSeq_align_set > FilterSeqalignByPercentIdent(objects::CSeq_align_set &source_aln, double percentIdentLow, double percentIdentHigh)
function for Filtering seqalign by percent identity
static bool RemoveSeqsOfAccessionTypeFromSeqInUse(list< string > &use_this_seq, objects::CSeq_id::EAccessionInfo accesionType)
function to remove sequences of accesionType from use_this_seq list
static bool SortHitByMasterStartAscending(CRef< objects::CSeq_align_set > &info1, CRef< objects::CSeq_align_set > &info2)
sorting function for sorting a list of seqalign set by ascending mater start position
static void GetScoreString(double evalue, double bit_score, double total_bit_score, int raw_score, string &evalue_str, string &bit_score_str, string &total_bit_score_str, string &raw_score_str)
format evalue and bit_score
static map< string, CRef< objects::CSeq_align_set > > HspListToHitMap(vector< string > seqIdList, const objects::CSeq_align_set &source)
static string GetBareId(const objects::CSeq_id &id)
Get sequence id with no database source (bare accession)
static string GetGnlID(const objects::CDbtag &dtg)
Return ID for GNL label.
static bool m_geturl_debug_flag
static void SortHit(list< CRef< objects::CSeq_align_set > > &seqalign_hit_list, bool do_translation, objects::CScope &scope, int sort_method, ILinkoutDB *linkoutdb, const string &mv_build_name)
actual sorting function for SortHitByMolecularType
static void FillScanModeBlastDbInfo(vector< SDbInfo > &retval, bool is_protein, int numSeqs, Int8 numLetters, string &tag)
Fills one BLAST dbinfo structure.
static string GetGeneInfo(TGi giForGeneLookup)
Get Gene symobol for gi.
static bool SortHitByTotalScoreDescending(CRef< objects::CSeq_align_set > const &info1, CRef< objects::CSeq_align_set > const &info2)
return the comparison result: 1st >= 2nd => true, false otherwise
static string GetIDUrlGen(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL for seqid that goes to entrez or trace.
static bool SortHspBySubjectStartAscending(const CRef< objects::CSeq_align > &info1, const CRef< objects::CSeq_align > &info2)
static CAlignFormatUtil::DbType GetDbType(const objects::CSeq_align_set &actual_aln_list, objects::CScope &scope)
Set the database as gi type.
static void PruneSeqalignAll(const objects::CSeq_align_set &source_aln, objects::CSeq_align_set &new_aln, unsigned int number)
Fill new alignset containing the specified number of alignments plus the rest of alignments for the l...
static void PrintTildeSepLines(string str, size_t line_len, CNcbiOstream &out)
Print out misc information separated by "~".
static string BuildUserUrl(const objects::CBioseq::TId &ids, TTaxId taxid, string user_url, string database, bool db_is_na, string rid, int query_number, bool for_alignment)
return the custom url (such as mapview)
static string MapTemplate(string inpString, string tmplParamName, Int8 templParamVal)
Replace template tags by real data.
static CAlignFormatUtil::SSeqAlignSetCalcParams * GetSeqAlignCalcParams(const objects::CSeq_align &aln)
static string GetURLFromRegistry(const string url_name, int index=-1)
retrieve URL from .ncbirc file combining host/port and format strings values.
static bool IsGiList(list< string > &use_this_seq)
Check if use_this_seq conatins gi list.
static double GetSeqAlignSetCalcPercentIdent(const objects::CSeq_align_set &aln, bool do_translation)
static string GetGraphiscLink(SSeqURLInfo *seqUrlInfo, bool hspRange=false)
static bool MatchSeqInSeqList(TGi cur_gi, CRef< objects::CSeq_id > &seqID, list< string > &use_this_seq, bool *isGiList=NULL)
Matches text seqID or gi with the list of seqIds or gis.
static int GetSeqLinkoutInfo(objects::CBioseq::TId &cur_id, ILinkoutDB **linkoutdb, const string &mv_build_name, TGi gi=INVALID_GI)
static CRef< objects::CSeq_id > GetDisplayIds(const objects::CBioseq_Handle &handle, const objects::CSeq_id &aln_id, list< TGi > &use_this_gi, TGi &gi, TTaxId &taxid)
Scan the the list of blast deflines and find seqID to be use in display.
static list< string > GetLinkoutUrl(int linkout, const objects::CBioseq::TId &ids, const string &rid, const string &cdd_rid, const string &entrez_term, bool is_na, TGi first_gi, bool structure_linkout_as_group, bool for_alignment, int cur_align, string preComputedResID)
Get the list of urls for linkouts.
static void PrintKAParameters(double lambda, double k, double h, size_t line_len, CNcbiOstream &out, bool gapped, const Blast_GumbelBlk *gbp=NULL)
Print out kappa, lamda blast parameters.
static CRef< objects::CSeq_align > CreateDensegFromDendiag(const objects::CSeq_align &aln)
Create denseseg representation for densediag seqalign.
static list< string > GetCustomLinksList(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope, int customLinkTypes=eLinkTypeDefault)
Create the list of string links for seqid that go.
static string GetURLDefault(const string url_name, int index=-1)
settings are not complete.
static CRef< objects::CSeq_align_set > FilterSeqalignByEval(objects::CSeq_align_set &source_aln, double evalueLow, double evalueHigh)
function for Filtering seqalign by expect value
static string GetFASTALinkURL(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL showing aligned regions info.
static bool GetTextSeqID(const list< CRef< objects::CSeq_id > > &ids, string *textSeqID=NULL)
static void GetBlastDbInfo(vector< SDbInfo > &retval, const string &blastdb_names, bool is_protein, int dbfilt_algorithm, bool is_remote=false)
Retrieve BLAST database information for presentation in BLAST report.
static void GetUseThisSequence(const objects::CSeq_align &aln, list< TGi > &use_this_gi)
Extract use_this_gi info from blast alingment.
static bool SortHitByPercentIdentityDescendingEx(const CRef< objects::CSeq_align_set > &info1, const CRef< objects::CSeq_align_set > &info2)
sorting function for sorting a list of seqalign set by descending identity
static void ExtractSeqAlignForSeqList(CRef< objects::CSeq_align_set > &all_aln_set, string alignSeqList)
extract seq_align_set coreesponding to seqid list
static int GetPercentMatch(int numerator, int denominator)
function for calculating percent match for an alignment.
static string GetSeqIdString(const objects::CBioseq &cbs, bool believe_local_id=true)
Returns a full '|'-delimited Seq-id string for a Bioseq.
static bool MatchSeqInUseThisSeqList(list< string > &use_this_seq, string textSeqIDToMatch)
static list< string > GetSeqLinksList(SSeqURLInfo *seqUrlInfo, bool hspRange=false)
static CRef< objects::CSeq_align_set > SortSeqalignForSortableFormat(CCgiContext &ctx, objects::CScope &scope, objects::CSeq_align_set &aln_set, bool nuc_to_nuc_translation, int db_order, int hit_order, int hsp_order, ILinkoutDB *linkoutdb, const string &mv_build_name)
static double GetPercentIdentity(const objects::CSeq_align &aln, objects::CScope &scope, bool do_translation)
calculate the percent identity for a seqalign
static void ExtractSeqalignSetFromDiscSegs(objects::CSeq_align_set &target, const objects::CSeq_align_set &source)
If a Seq-align-set contains Seq-aligns with discontinuous type segments, extract the underlying Seq-a...
static bool IsWGSPattern(string &wgsAccession)
Check if accession is WGS.
static bool SortHitByScoreDescending(const CRef< objects::CSeq_align_set > &info1, const CRef< objects::CSeq_align_set > &info2)
static CRef< objects::CSeq_align_set > FilterSeqalignByScoreParams(objects::CSeq_align_set &source_aln, double evalueLow, double evalueHigh, double percentIdentLow, double percentIdentHigh)
function for Filtering seqalign by expect value and percent identity
static void GetAlignLengths(objects::CAlnVec &salv, int &align_length, int &num_gaps, int &num_gap_opens)
Count alignment length, number of gap openings and total number of gaps in a single alignment.
static string BuildSRAUrl(const objects::CBioseq::TId &ids, string user_url)
return the SRA (Short Read Archive) URL
static bool SortHspByMasterStartAscending(const CRef< objects::CSeq_align > &info1, const CRef< objects::CSeq_align > &info2)
sorting function for sorting a list of seqalign by ascending mater start position
static int SetCustomLinksTypes(SSeqURLInfo *seqUrlInfo, int customLinkTypesInp)
Create info indicating what kind of links to display.
static int GetMasterCoverage(const objects::CSeq_align_set &alnset)
static unique_ptr< CNcbiRegistry > m_Reg
static int GetFrame(int start, objects::ENa_strand strand, const objects::CBioseq_Handle &handle)
return the frame for a given strand Note that start is zero bases.
static void GetBdlLinkoutInfo(const list< CRef< objects::CBlast_def_line > > &bdl, map< int, vector< objects::CBioseq::TId > > &linkout_map, ILinkoutDB *linkoutdb, const string &mv_build_name)
Create map that holds all linkouts for the list of blast deflines and corresponding seqIDs.
static bool SortHspByPercentIdentityDescending(const CRef< objects::CSeq_align > &info1, const CRef< objects::CSeq_align > &info2)
sorting function for sorting a list of seqalign by descending identity
static CRef< objects::CSeq_align_set > HitListToHspList(list< CRef< objects::CSeq_align_set > > &source)
extract all nested hsp's into a list
static string GetTitle(const objects::CBioseq_Handle &bh)
static TTaxId GetTaxidForSeqid(const objects::CSeq_id &id, objects::CScope &scope)
return the tax id for a seqid
static string GetAlignedRegionsURL(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL to FASTA info.
static void AddSpace(CNcbiOstream &out, int number)
Add the specified white space.
static string m_Protocol
static list< string > GetGiLinksList(SSeqURLInfo *seqUrlInfo, bool hspRange=false)
static void AcknowledgeBlastSubject(const objects::CBioseq &cbs, size_t line_len, CNcbiOstream &out, bool believe_query, bool html, bool tabular=false)
Print out blast subject info.
static unique_ptr< CGeneInfoFileReader > m_GeneInfoReader
static TGi GetGiForSeqIdList(const list< CRef< objects::CSeq_id > > &ids)
return gi from id list
static void SplitSeqalignByMolecularType(vector< CRef< objects::CSeq_align_set > > &target, int sort_method, const objects::CSeq_align_set &source, objects::CScope &scope, ILinkoutDB *linkoutdb, const string &mv_build_name)
static bool SortHitByMasterCoverageDescending(CRef< objects::CSeq_align_set > const &info1, CRef< objects::CSeq_align_set > const &info2)
static void BlastPrintError(list< SBlastError > &error_return, bool error_post, CNcbiOstream &out)
Output blast errors.
static string MapSpaceTemplate(string inpString, string tmplParamName, string templParamVal, unsigned int maxParamLength, int spacesFormatFlag=eSpacePosAtLineEnd)
Replace template tags by real data and calculate and add spaces dependent on maxParamLength and space...
static void HspListToHitList(list< CRef< objects::CSeq_align_set > > &target, const objects::CSeq_align_set &source)
group hsp's with the same id togeter
static void SortHitByPercentIdentityDescending(list< CRef< objects::CSeq_align_set > > &seqalign_hit_list, bool do_translation)
sort a list of seqalign set by alignment identity
static string MapProtocol(string url_link)
static string GetFullIDLink(SSeqURLInfo *seqUrlInfo, const objects::CBioseq::TId *ids)
static bool SortHspByScoreDescending(const CRef< objects::CSeq_align > &info1, const CRef< objects::CSeq_align > &info2)
static CRef< objects::CSeq_align_set > LimitSeqalignByHsps(objects::CSeq_align_set &source_aln, int maxAligns, int maxHsps)
function for Limitting seqalign by hsps number (by default results are not cut off within the query)
static CRef< objects::CSeq_align_set > ExtractQuerySeqAlign(CRef< objects::CSeq_align_set > &source_aln, int queryNumber)
function for extracting seqalign for the query
static string GetLabel(CConstRef< objects::CSeq_id > id, bool with_version=false)
Return a label for an ID Tries to recreate behavior of GetLabel before a change that prepends "ti|" t...
static void x_WrapOutputLine(string str, size_t line_len, CNcbiOstream &out, bool html=false)
Wrap a string to specified length.
static unsigned int GetSubjectsNumber(const objects::CSeq_align_set &source_aln, unsigned int num)
Calculate number of subject sequnces in alignment limitted by num.
static CRange< TSeqPos > GetSeqAlignCoverageParams(const objects::CSeq_align_set &alnset, int *masterCoverage, bool *flip)
TDim GetNumRows(void) const
Definition: alnmap.hpp:517
TSeqPos GetAlnStop(TNumseg seg) const
Definition: alnmap.hpp:488