NCBI C++ ToolKit
align_format_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: align_format_util.cpp 102916 2024-08-06 15:09:32Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jian Ye
27  * 12/2004
28  * File Description:
29  * blast formatter utilities
30  *
31  */
32 #include <ncbi_pch.hpp>
33 
34 #include <math.h> // For use of ceil
35 
37 
38 #include <corelib/ncbireg.hpp>
39 #include <corelib/ncbidiag.hpp>
40 #include <corelib/ncbistre.hpp>
41 #include <corelib/ncbiutil.hpp>
42 #include <corelib/ncbiobj.hpp>
43 #include <corelib/ncbifile.hpp>
44 #include <corelib/metareg.hpp>
45 #include <html/htmlhelper.hpp>
46 #include <cgi/cgictx.hpp>
48 
49 
57 #include <objects/seq/Seq_inst.hpp>
59 #include <objects/seq/Seqdesc.hpp>
60 #include <objmgr/seqdesc_ci.hpp>
63 
64 #include <objtools/blast/services/blast_services.hpp> // for CBlastServices
65 #include <objtools/blast/seqdb_reader/seqdb.hpp> // for CSeqDB
66 #include <objtools/blast/seqdb_reader/seqdbcommon.hpp> // for CSeqDBException
67 
72 
73 #include <stdio.h>
74 #include <sstream>
75 #include <iomanip>
76 
80 BEGIN_SCOPE(align_format)
81 
82 const char* CAlignFormatUtil::kNoHitsFound = "No hits found";
83 
84 static bool kTranslation;
86 
87 const char k_PSymbol[ePMatrixSize + 1] = "ARNDCQEGHILKMFPSTWYVBZX";
88 
89 unique_ptr<CNcbiRegistry> CAlignFormatUtil::m_Reg;
92 
93 ///Get blast score information
94 ///@param scoreList: score container to extract score info from
95 ///@param score: place to extract the raw score to
96 ///@param bits: place to extract the bit score to
97 ///@param evalue: place to extract the e value to
98 ///@param sum_n: place to extract the sum_n to
99 ///@param num_ident: place to extract the num_ident to
100 ///@param use_this_gi: place to extract use_this_gi to
101 ///@return true if found score, false otherwise
102 ///
103 template<class container> bool
104 s_GetBlastScore(const container& scoreList,
105  int& score,
106  double& bits,
107  double& evalue,
108  int& sum_n,
109  int& num_ident,
110  list<TGi>& use_this_gi,
111  int& comp_adj_method)
112 {
113  const string k_GiPrefix = "gi:";
114  bool hasScore = false;
115  ITERATE (typename container, iter, scoreList) {
116  const CObject_id& id=(*iter)->GetId();
117  if (id.IsStr()) {
118  if (id.GetStr()=="score"){
119  score = (*iter)->GetValue().GetInt();
120  } else if (id.GetStr()=="bit_score"){
121  bits = (*iter)->GetValue().GetReal();
122  } else if (id.GetStr()=="e_value" || id.GetStr()=="sum_e") {
123  evalue = (*iter)->GetValue().GetReal();
124  hasScore = true;
125  } else if (id.GetStr()=="use_this_gi"){
126  Uint4 gi_v = (Uint4)((*iter)->GetValue().GetInt());
127  use_this_gi.push_back(GI_FROM(Uint4, gi_v));
128  } else if (id.GetStr()=="sum_n"){
129  sum_n = (*iter)->GetValue().GetInt();
130  } else if (id.GetStr()=="num_ident"){
131  num_ident = (*iter)->GetValue().GetInt();
132  } else if (id.GetStr()=="comp_adjustment_method") {
133  comp_adj_method = (*iter)->GetValue().GetInt();
134  }
135  else if(NStr::StartsWith(id.GetStr(),k_GiPrefix)) { //will be used when switch to 64bit GIs
136  string strGi = NStr::Replace(id.GetStr(),k_GiPrefix,"");
137  TGi gi = NStr::StringToNumeric<TGi>(strGi);
138  use_this_gi.push_back(gi);
139  }
140  }
141  }
142 
143  return hasScore;
144 }
145 
146 
147 ///Wrap a string to specified length. If break happens to be in
148 /// a word, it will extend the line length until the end of the word
149 ///@param str: input string
150 ///@param line_len: length of each line desired
151 ///@param out: stream to ouput
152 ///
153 void CAlignFormatUtil::x_WrapOutputLine(string str, size_t line_len,
154  CNcbiOstream& out, bool html)
155 {
156  list<string> string_l;
158  if (html) {
161  }
162  NStr::Wrap(str, line_len, string_l, flags);
163  list<string>::iterator iter = string_l.begin();
164  while(iter != string_l.end())
165  {
166  out << *iter;
167  out << "\n";
168  iter++;
169  }
170 }
171 
172 void CAlignFormatUtil::BlastPrintError(list<SBlastError>&
173  error_return,
174  bool error_post, CNcbiOstream& out)
175 {
176 
177  string errsevmsg[] = { "UNKNOWN","INFO","WARNING","ERROR",
178  "FATAL"};
179 
180  NON_CONST_ITERATE(list<SBlastError>, iter, error_return) {
181 
182  if(iter->level > 5){
183  iter->level = eDiag_Info;
184  }
185 
186  if(iter->level == 4){
187  iter->level = eDiag_Fatal;
188  } else{
189  iter->level = iter->level;
190  }
191 
192  if (error_post){
193  ERR_POST_EX(iter->level, 0, iter->message);
194  }
195  out << errsevmsg[iter->level] << ": " << iter->message << "\n";
196 
197  }
198 
199 }
200 
201 void CAlignFormatUtil::PrintTildeSepLines(string str, size_t line_len,
202  CNcbiOstream& out) {
203 
204  vector<string> split_line;
205  NStr::Split(str, "~", split_line);
206  ITERATE(vector<string>, iter, split_line) {
207  x_WrapOutputLine(*iter, line_len, out);
208  }
209 }
210 #ifdef DO_UNUSED
211 /// Initialize database statistics with data from BLAST servers
212 /// @param dbname name of a single BLAST database [in]
213 /// @param info structure to fill [in|out]
214 /// @return true if successfully filled, false otherwise (and a warning is
215 /// printed out)
216 static bool s_FillDbInfoRemotely(const string& dbname,
218 {
219  static CBlastServices rmt_blast_services;
221  blastdb->SetName(dbname);
222  blastdb->SetType() = info.is_protein
225  rmt_blast_services.GetDatabaseInfo(blastdb);
226 
227  info.name = dbname;
228  if ( !dbinfo ) {
229  return false;
230  }
231  info.definition = dbinfo->GetDescription();
232  if (info.definition.empty())
233  info.definition = info.name;
234  CTimeFormat tf("b d, Y H:m P", CTimeFormat::fFormat_Simple);
235  info.date = CTime(dbinfo->GetLast_updated()).AsString(tf);
236  info.total_length = dbinfo->GetTotal_length();
237  info.number_seqs = static_cast<int>(dbinfo->GetNum_sequences());
238  return true;
239 }
240 #endif
241 /// Initialize database statistics with data obtained from local BLAST
242 /// databases
243 /// @param dbname name of a single BLAST database [in]
244 /// @param info structure to fill [in|out]
245 /// @param dbfilt_algorithm filtering algorithm ID used for this search
246 /// [in]
247 /// @return true if successfully filled, false otherwise (and a warning is
248 /// printed out)
249 static bool
252  int dbfilt_algorithm)
253 {
254  CRef<CSeqDB> seqdb(new CSeqDB(dbname, info.is_protein
256  if ( !seqdb ) {
257  return false;
258  }
259  info.name = seqdb->GetDBNameList();
260  info.definition = seqdb->GetTitle();
261  if (info.definition.empty())
262  info.definition = info.name;
263  info.date = seqdb->GetDate();
264  info.total_length = seqdb->GetTotalLength();
265  info.number_seqs = seqdb->GetNumSeqs();
266 
267  // Process the filtering algorithm IDs
268  info.filt_algorithm_name.clear();
269  info.filt_algorithm_options.clear();
270  if (dbfilt_algorithm == -1) {
271  return true;
272  }
273 
274 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
275  (!defined(NCBI_COMPILER_MIPSPRO)) )
276  string filtering_algorithm;
277  seqdb->GetMaskAlgorithmDetails(dbfilt_algorithm,
278  filtering_algorithm,
279  info.filt_algorithm_name,
280  info.filt_algorithm_options);
281 #endif
282  return true;
283 }
284 
285 void
286 CAlignFormatUtil::FillScanModeBlastDbInfo(vector<CAlignFormatUtil::SDbInfo>& retval,
287  bool is_protein, int numSeqs, Int8 numLetters, string& tag)
288 {
289  retval.clear();
291  info.is_protein = is_protein;
292  if (tag == "")
293  info.definition = string("User specified sequence set.");
294  else
295  {
296  info.definition = string("User specified sequence set ") +
297  string("(Input: ") + tag + string(").");
298  }
299  info.number_seqs = numSeqs;
300  info.total_length = numLetters;
301  retval.push_back(info);
302 }
303 
304 void
305 CAlignFormatUtil::GetBlastDbInfo(vector<CAlignFormatUtil::SDbInfo>& retval,
306  const string& blastdb_names, bool is_protein,
307  int dbfilt_algorithm /* = -1 */,
308  bool is_remote /* = false */)
309 {
310  retval.clear();
311  if( is_remote ){
312  bool found_all = false;
313  static CBlastServices rmt_blast_services;
314  vector<string> missing_names;
315  vector< CRef<objects::CBlast4_database_info> > all_db_info =
316  rmt_blast_services.GetDatabaseInfo(blastdb_names,is_protein,&found_all,&missing_names);
317  if( !missing_names.empty() ){
318  string msg("'");
319  for(size_t ndx=0 ; ndx < missing_names.size(); ndx++){
320  msg += missing_names[ndx];
321  }
322  msg += string("' not found on NCBI servers.\n");
323  NCBI_THROW(CSeqDBException, eFileErr, msg);
324  }
325  for(size_t ndx=0 ; ndx < all_db_info.size(); ndx++){
327  objects::CBlast4_database_info &dbinfo = *all_db_info[ndx];
328  info.name = dbinfo.GetDatabase().GetName();
329  info.definition = dbinfo.GetDescription();
330  if (info.definition.empty())
331  info.definition = info.name;
332  CTimeFormat tf("b d, Y H:m P", CTimeFormat::fFormat_Simple);
333  info.date = CTime(dbinfo.GetLast_updated()).AsString(tf);
334  info.total_length = dbinfo.GetTotal_length();
335  info.number_seqs = static_cast<int>(dbinfo.GetNum_sequences());
336  if (info.total_length < 0) {
337  const string kDbName = NStr::TruncateSpaces(info.name);
338  if( ! s_FillDbInfoLocally(kDbName, info, dbfilt_algorithm) ){
339  string msg("'");
340  msg += kDbName;
341  msg += string("' has bad total length on NCBI servers.\n");
342  NCBI_THROW(CSeqDBException, eFileErr, msg);
343  }
344  }
345  retval.push_back(info);
346  }
347  return;
348  }
349  else{
350  vector<CTempString> dbs;
351  SeqDB_SplitQuoted(blastdb_names, dbs, true);
352  retval.reserve(dbs.size());
353 
354  ITERATE(vector<CTempString>, i, dbs) {
356  info.is_protein = is_protein;
357  bool success = false;
358  // Unsafe OK as kDbName only used in this loop.
359  const string kDbName = NStr::TruncateSpaces_Unsafe(*i);
360  if (kDbName.empty())
361  continue;
362 
363  success = s_FillDbInfoLocally(kDbName, info, dbfilt_algorithm);
364 
365  if (success) {
366  retval.push_back(info);
367  } else {
368  string msg("'");
369  msg += kDbName;
370  if (is_remote)
371  msg += string("' not found on NCBI servers.\n");
372  else
373  msg += string("' not found.\n");
374  NCBI_THROW(CSeqDBException, eFileErr, msg);
375  }
376  }
377  }
378 }
379 
380 void CAlignFormatUtil::PrintDbReport(const vector<SDbInfo>& dbinfo_list,
381  size_t line_length,
382  CNcbiOstream& out,
383  bool top)
384 {
385  if (top) {
386  const CAlignFormatUtil::SDbInfo* dbinfo = &(dbinfo_list.front());
387  out << "Database: ";
388 
389  string db_titles = dbinfo->definition;
390  Int8 tot_num_seqs = static_cast<Int8>(dbinfo->number_seqs);
391  Int8 tot_length = dbinfo->total_length;
392 
393  for (size_t i = 1; i < dbinfo_list.size(); i++) {
394  db_titles += "; " + dbinfo_list[i].definition;
395  tot_num_seqs += static_cast<Int8>(dbinfo_list[i].number_seqs);
396  tot_length += dbinfo_list[i].total_length;
397  }
398 
399  x_WrapOutputLine(db_titles, line_length, out);
400  if ( !dbinfo->filt_algorithm_name.empty() ) {
401  out << "Masked using: '" << dbinfo->filt_algorithm_name << "'";
402  if ( !dbinfo->filt_algorithm_options.empty() ) {
403  out << ", options: '" << dbinfo->filt_algorithm_options << "'";
404  }
405  out << endl;
406  }
408  out << NStr::Int8ToString(tot_num_seqs, NStr::fWithCommas) <<
409  " sequences; " <<
410  NStr::Int8ToString(tot_length, NStr::fWithCommas) <<
411  " total letters\n\n";
412  return;
413  }
414 
415  ITERATE(vector<SDbInfo>, dbinfo, dbinfo_list) {
416  if (dbinfo->subset == false) {
417  out << " Database: ";
418  x_WrapOutputLine(dbinfo->definition, line_length, out);
419 
420  if ( !dbinfo->filt_algorithm_name.empty() ) {
421  out << " Masked using: '" << dbinfo->filt_algorithm_name << "'";
422  if ( !dbinfo->filt_algorithm_options.empty() ) {
423  out << ", options: '" << dbinfo->filt_algorithm_options << "'";
424  }
425  out << endl;
426  }
427 
428  out << " Posted date: ";
429  out << dbinfo->date << "\n";
430 
431  out << " Number of letters in database: ";
432  out << NStr::Int8ToString(dbinfo->total_length,
433  NStr::fWithCommas) << "\n";
434  out << " Number of sequences in database: ";
435  out << NStr::IntToString(dbinfo->number_seqs,
436  NStr::fWithCommas) << "\n";
437 
438  } else {
439  out << " Subset of the database(s) listed below" << "\n";
440  out << " Number of letters searched: ";
441  out << NStr::Int8ToString(dbinfo->total_length,
442  NStr::fWithCommas) << "\n";
443  out << " Number of sequences searched: ";
444  out << NStr::IntToString(dbinfo->number_seqs,
445  NStr::fWithCommas) << "\n";
446  }
447  out << "\n";
448  }
449 
450 }
451 
452 void CAlignFormatUtil::PrintKAParameters(double lambda, double k, double h,
453  size_t line_len,
454  CNcbiOstream& out, bool gapped,
455  const Blast_GumbelBlk *gbp)
456 {
457 
458  char buffer[256];
459  if (gapped) {
460  out << "Gapped" << "\n";
461  }
462  out << "Lambda K H";
463  if (gbp) {
464  if (gapped) {
465  out << " a alpha sigma";
466  } else {
467  out << " a alpha";
468  }
469  }
470  out << "\n";
471  sprintf(buffer, "%#8.3g ", lambda);
472  out << buffer;
473  sprintf(buffer, "%#8.3g ", k);
474  out << buffer;
475  sprintf(buffer, "%#8.3g ", h);
476  out << buffer;
477  if (gbp) {
478  if (gapped) {
479  sprintf(buffer, "%#8.3g ", gbp->a);
480  out << buffer;
481  sprintf(buffer, "%#8.3g ", gbp->Alpha);
482  out << buffer;
483  sprintf(buffer, "%#8.3g ", gbp->Sigma);
484  out << buffer;
485  } else {
486  sprintf(buffer, "%#8.3g ", gbp->a_un);
487  out << buffer;
488  sprintf(buffer, "%#8.3g ", gbp->Alpha_un);
489  out << buffer;
490  }
491  //x_WrapOutputLine(buffer, line_len, out);
492  }
493  out << "\n";
494 }
495 
496 string
497 CAlignFormatUtil::GetSeqIdString(const CBioseq& cbs, bool believe_local_id)
498 {
499  const CBioseq::TId& ids = cbs.GetId();
500  return CAlignFormatUtil::GetSeqIdString(ids, believe_local_id);
501 }
502 
503 string
504 CAlignFormatUtil::GetSeqIdString(const list<CRef<CSeq_id> > & ids, bool believe_local_id)
505 {
506  string all_id_str = NcbiEmptyString;
508 
509  if (wid && (wid->Which()!= CSeq_id::e_Local || believe_local_id)){
510  TGi gi = FindGi(ids);
511 
512  bool use_long_seqids = false;
514  if (app) {
515  const CNcbiRegistry& registry = app->GetConfig();
516  use_long_seqids = (registry.Get("BLAST", "LONG_SEQID") == "1");
517  }
518  if (!use_long_seqids) {
519 
520  all_id_str = GetBareId(*wid);
521  }
522  else if (strncmp(wid->AsFastaString().c_str(), "lcl|", 4) == 0) {
523  if(gi == ZERO_GI){
524  all_id_str = wid->AsFastaString().substr(4);
525  } else {
526  all_id_str = "gi|" + NStr::NumericToString(gi) +
527  "|" + wid->AsFastaString().substr(4);
528  }
529  } else {
530  if(gi == ZERO_GI){
531  all_id_str = wid->AsFastaString();
532  } else {
533  all_id_str = "gi|" + NStr::NumericToString(gi) + "|" +
534  wid->AsFastaString();
535  }
536  }
537  }
538 
539  return all_id_str;
540 }
541 
542 string
544 {
545  string all_descr_str = NcbiEmptyString;
546 
547  if (cbs.IsSetDescr()) {
548  const CBioseq::TDescr& descr = cbs.GetDescr();
549  const CBioseq::TDescr::Tdata& data = descr.Get();
551  if((*iter)->IsTitle()) {
552  all_descr_str += (*iter)->GetTitle();
553  }
554  }
555  }
556  return all_descr_str;
557 }
558 
560  size_t line_len,
561  CNcbiOstream& out,
562  bool believe_query,
563  bool html,
564  bool tabular /* = false */,
565  const string& rid /* = kEmptyStr*/)
566 {
567  const string label("Query");
569  believe_query, html,
570  label, tabular, rid);
571 }
572 
573 void
575  size_t line_len,
576  CNcbiOstream& out,
577  bool believe_query,
578  bool html,
579  bool tabular /* = false */)
580 {
581  const string label("Subject");
583  believe_query, html,
584  label, tabular, kEmptyStr);
585 }
586 
587 void
589  size_t line_len,
590  CNcbiOstream& out,
591  bool believe_query,
592  bool html,
593  const string& label,
594  bool tabular /* = false */,
595  const string& rid /* = kEmptyStr*/)
596 {
597 
598  if (html) {
599  out << "<b>" << label << "=</b> ";
600  } else if (tabular) {
601  out << "# " << label << ": ";
602  } else {
603  out << label << "= ";
604  }
605 
606  string all_id_str = GetSeqIdString(cbs, believe_query);
607  all_id_str += " ";
608  all_id_str = NStr::TruncateSpaces(all_id_str + GetSeqDescrString(cbs));
609 
610  // For tabular output, there is no limit on the line length.
611  // There is also no extra line with the sequence length.
612  if (tabular) {
613  out << all_id_str;
614  } else {
615  x_WrapOutputLine(all_id_str, line_len, out, html);
616  if(cbs.IsSetInst() && cbs.GetInst().CanGetLength()){
617  out << "\nLength=";
618  out << cbs.GetInst().GetLength() <<"\n";
619  }
620  }
621 
622  if (rid != kEmptyStr) {
623  if (tabular) {
624  out << "\n" << "# RID: " << rid;
625  } else {
626  out << "\n" << "RID: " << rid << "\n";
627  }
628  }
629 }
630 
631 void CAlignFormatUtil::PrintPhiInfo(int num_patterns,
632  const string& pattern,
633  double prob,
634  vector<int>& offsets,
635  CNcbiOstream& out)
636 {
637  out << num_patterns << " occurrence(s) of pattern: " << "\n"
638  << pattern << " at position(s) ";
639 
640  bool first = true;
641  for (vector<int>::iterator it = offsets.begin();
642  it != offsets.end(); it++)
643  {
644  if (!first)
645  out << ", ";
646 
647  out << 1 + *it ;
648 
649  first = false;
650  }
651  out << " of query sequence" << "\n";
652  out << "pattern probability=" << prob << "\n";
653 
654 }
655 
657  int& score,
658  double& bits,
659  double& evalue,
660  int& sum_n,
661  int& num_ident,
662  list<TGi>& use_this_gi)
663 {
664  int comp_adj_method = 0; // dummy variable
665 
666  CAlignFormatUtil::GetAlnScores(aln, score, bits, evalue, sum_n,
667  num_ident, use_this_gi, comp_adj_method);
668 }
669 
671  int& score,
672  double& bits,
673  double& evalue,
674  int& sum_n,
675  int& num_ident,
676  list<string>& use_this_seq)
677 {
678  int comp_adj_method = 0; // dummy variable
679 
680  CAlignFormatUtil::GetAlnScores(aln, score, bits, evalue, sum_n,
681  num_ident, use_this_seq, comp_adj_method);
682 }
683 
684 
686  int& score,
687  double& bits,
688  double& evalue,
689  int& sum_n,
690  int& num_ident,
691  list<TGi>& use_this_gi,
692  int& comp_adj_method)
693 {
694  bool hasScore = false;
695  score = -1;
696  bits = -1;
697  evalue = -1;
698  sum_n = -1;
699  num_ident = -1;
700  comp_adj_method = 0;
701 
702  //look for scores at seqalign level first
703  hasScore = s_GetBlastScore(aln.GetScore(), score, bits, evalue,
704  sum_n, num_ident, use_this_gi, comp_adj_method);
705 
706  //look at the seg level
707  if(!hasScore){
708  const CSeq_align::TSegs& seg = aln.GetSegs();
709  if(seg.Which() == CSeq_align::C_Segs::e_Std){
710  s_GetBlastScore(seg.GetStd().front()->GetScores(),
711  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
712  } else if (seg.Which() == CSeq_align::C_Segs::e_Dendiag){
713  s_GetBlastScore(seg.GetDendiag().front()->GetScores(),
714  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
715  } else if (seg.Which() == CSeq_align::C_Segs::e_Denseg){
717  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
718  }
719  }
720  if(use_this_gi.size() == 0) {
721  GetUseThisSequence(aln,use_this_gi);
722  }
723 }
724 
725 //converts gi list to the list of gi:XXXXXXXX strings
726 static list<string> s_NumGiToStringGiList(list<TGi> use_this_gi)//for backward compatability
727 {
728  const string k_GiPrefix = "gi:";
729  list<string> use_this_seq;
730  ITERATE(list<TGi>, iter_gi, use_this_gi){
731  string strSeq = k_GiPrefix + NStr::NumericToString(*iter_gi);
732  use_this_seq.push_back(strSeq);
733  }
734  return use_this_seq;
735 }
736 
738  int& score,
739  double& bits,
740  double& evalue,
741  int& sum_n,
742  int& num_ident,
743  list<string>& use_this_seq,
744  int& comp_adj_method)
745 {
746  bool hasScore = false;
747  score = -1;
748  bits = -1;
749  evalue = -1;
750  sum_n = -1;
751  num_ident = -1;
752  comp_adj_method = 0;
753 
754  list<TGi> use_this_gi;
755  //look for scores at seqalign level first
756  hasScore = s_GetBlastScore(aln.GetScore(), score, bits, evalue,
757  sum_n, num_ident, use_this_gi, comp_adj_method);
758 
759  //look at the seg level
760  if(!hasScore){
761  const CSeq_align::TSegs& seg = aln.GetSegs();
762  if(seg.Which() == CSeq_align::C_Segs::e_Std){
763  s_GetBlastScore(seg.GetStd().front()->GetScores(),
764  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
765  } else if (seg.Which() == CSeq_align::C_Segs::e_Dendiag){
766  s_GetBlastScore(seg.GetDendiag().front()->GetScores(),
767  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
768  } else if (seg.Which() == CSeq_align::C_Segs::e_Denseg){
770  score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
771  }
772  }
773  if(use_this_gi.size() == 0) {
774  GetUseThisSequence(aln,use_this_seq);
775  }
776  else {
777  use_this_seq = s_NumGiToStringGiList(use_this_gi);//for backward compatability
778  }
779 }
780 
782 {
783  string retval = NcbiEmptyString;
784 
785  if(dtg.GetTag().IsId())
786  retval = NStr::IntToString(dtg.GetTag().GetId());
787  else
788  retval = dtg.GetTag().GetStr();
789 
790  return retval;
791 }
792 
793 string CAlignFormatUtil::GetLabel(CConstRef<CSeq_id> id,bool with_version)
794 {
795  string retval = "";
796  if (id->Which() == CSeq_id::e_General){
797  const CDbtag& dtg = id->GetGeneral();
798  retval = CAlignFormatUtil::GetGnlID(dtg);
799  }
800  if (retval == "")
801  retval = id->GetSeqIdString(with_version);
802 
803  return retval;
804 }
805 
807 
808 {
809  for(auto i=0; i<number; i++){
810  out<<" ";
811  }
812 }
813 
815  double bit_score,
816  double total_bit_score,
817  int raw_score,
818  string& evalue_str,
819  string& bit_score_str,
820  string& total_bit_score_str,
821  string& raw_score_str)
822 {
823  char evalue_buf[100], bit_score_buf[100], total_bit_score_buf[100];
824 
825  /* Facilitates comparing formatted output using diff */
826  static string kBitScoreFormat("%4.1lf");
827 #ifdef CTOOLKIT_COMPATIBLE
828  static bool ctoolkit_compatible = false;
829  static bool value_set = false;
830  if ( !value_set ) {
831  if (getenv("CTOOLKIT_COMPATIBLE")) {
832  kBitScoreFormat.assign("%4.0lf");
833  ctoolkit_compatible = true;
834  }
835  value_set = true;
836  }
837 #endif /* CTOOLKIT_COMPATIBLE */
838 
839  if (evalue < 1.0e-180) {
840  snprintf(evalue_buf, sizeof(evalue_buf), "0.0");
841  } else if (evalue < 1.0e-99) {
842  snprintf(evalue_buf, sizeof(evalue_buf), "%2.0le", evalue);
843 #ifdef CTOOLKIT_COMPATIBLE
844  if (ctoolkit_compatible) {
845  strncpy(evalue_buf, evalue_buf+1, sizeof(evalue_buf-1));
846  }
847 #endif /* CTOOLKIT_COMPATIBLE */
848  } else if (evalue < 0.0009) {
849  snprintf(evalue_buf, sizeof(evalue_buf), "%3.0le", evalue);
850  } else if (evalue < 0.1) {
851  snprintf(evalue_buf, sizeof(evalue_buf), "%4.3lf", evalue);
852  } else if (evalue < 1.0) {
853  snprintf(evalue_buf, sizeof(evalue_buf), "%3.2lf", evalue);
854  } else if (evalue < 10.0) {
855  snprintf(evalue_buf, sizeof(evalue_buf), "%2.1lf", evalue);
856  } else {
857  snprintf(evalue_buf, sizeof(evalue_buf), "%2.0lf", evalue);
858  }
859 
860  if (bit_score > 99999){
861  snprintf(bit_score_buf, sizeof(bit_score_buf), "%5.3le", bit_score);
862  } else if (bit_score > 99.9){
863  snprintf(bit_score_buf, sizeof(bit_score_buf), "%3.0ld",
864  (long)bit_score);
865  } else {
866  snprintf(bit_score_buf, sizeof(bit_score_buf), kBitScoreFormat.c_str(),
867  bit_score);
868  }
869  if (total_bit_score > 99999){
870  snprintf(total_bit_score_buf, sizeof(total_bit_score_buf), "%5.3le",
871  total_bit_score);
872  } else if (total_bit_score > 99.9){
873  snprintf(total_bit_score_buf, sizeof(total_bit_score_buf), "%3.0ld",
874  (long)total_bit_score);
875  } else {
876  snprintf(total_bit_score_buf, sizeof(total_bit_score_buf), "%2.1lf",
877  total_bit_score);
878  }
879  evalue_str = evalue_buf;
880  bit_score_str = bit_score_buf;
881  total_bit_score_str = total_bit_score_buf;
882  if (raw_score <= 0)
883  raw_score = -1;
884  NStr::IntToString(raw_score_str, raw_score);
885 }
886 
887 
889  CSeq_align_set& new_aln,
890  unsigned int number)
891 {
892  CConstRef<CSeq_id> previous_id, subid;
893  bool is_first_aln = true;
894  unsigned int num_align = 0;
895  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
896 
897  if ((*iter)->GetSegs().IsDisc()) {
898  ++num_align;
899  } else {
900  subid = &((*iter)->GetSeq_id(1));
901  if(is_first_aln || (!is_first_aln && !subid->Match(*previous_id))){
902  ++num_align;
903  }
904 
905  if(num_align > number) {
906  break;
907  }
908 
909  is_first_aln = false;
910  previous_id = subid;
911  }
912  new_aln.Set().push_back(*iter);
913  }
914 }
915 
916 
918  unsigned int number)
919 {
920  CConstRef<CSeq_id> previous_id, subid;
921  bool is_first_aln = true;
922  unsigned int num_align = 0;
923  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
924 
925  if ((*iter)->GetSegs().IsDisc()) {
926  ++num_align;
927  } else {
928  subid = &((*iter)->GetSeq_id(1));
929  if(is_first_aln || (!is_first_aln && !subid->Match(*previous_id))){
930  ++num_align;
931  }
932 
933  if(num_align >= number) {
934  break;
935  }
936 
937  is_first_aln = false;
938  previous_id = subid;
939  }
940  }
941  return num_align;
942 }
943 
944 
946  CSeq_align_set& new_aln,
947  unsigned int number)
948 {
949  CConstRef<CSeq_id> previous_id, subid;
950  bool is_first_aln = true;
951  unsigned int num_align = 0;
952  bool finishCurrent = false;
953  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
954  if ((*iter)->GetSegs().IsDisc()) {
955  ++num_align;
956  } else {
957  subid = &((*iter)->GetSeq_id(1));
958  if(is_first_aln || (!is_first_aln && !subid->Match(*previous_id))){
959  finishCurrent = (num_align + 1 == number) ? true : false;
960  ++num_align;
961  }
962  is_first_aln = false;
963  previous_id = subid;
964  }
965  if(num_align > number && !finishCurrent) {
966  break;
967  }
968  new_aln.Set().push_back(*iter);
969  }
970 }
971 
972 
973 void
975  int& num_gaps, int& num_gap_opens)
976 {
977  num_gaps = num_gap_opens = align_length = 0;
978 
979  for (int row = 0; row < salv.GetNumRows(); row++) {
981  = salv.GetAlnChunks(row, salv.GetSeqAlnRange(0));
982  for (int i=0; i<chunk_vec->size(); i++) {
983  CConstRef<CAlnMap::CAlnChunk> chunk = (*chunk_vec)[i];
984  int chunk_length = chunk->GetAlnRange().GetLength();
985  // Gaps are counted on all rows: gap can only be in one of the rows
986  // for any given segment.
987  if (chunk->IsGap()) {
988  ++num_gap_opens;
989  num_gaps += chunk_length;
990  }
991  // To calculate alignment length, only one row is needed.
992  if (row == 0)
993  align_length += chunk_length;
994  }
995  }
996 }
997 
998 void
1000  const CSeq_align_set& source)
1001 {
1002  if (source.IsSet() && source.CanGet()) {
1003 
1004  for(CSeq_align_set::Tdata::const_iterator iter = source.Get().begin();
1005  iter != source.Get().end(); iter++) {
1006  if((*iter)->IsSetSegs()){
1007  const CSeq_align::TSegs& seg = (*iter)->GetSegs();
1008  if(seg.IsDisc()){
1009  const CSeq_align_set& set = seg.GetDisc();
1010  for(CSeq_align_set::Tdata::const_iterator iter2 =
1011  set.Get().begin(); iter2 != set.Get().end();
1012  iter2 ++) {
1013  target.Set().push_back(*iter2);
1014  }
1015  } else {
1016  target.Set().push_back(*iter);
1017  }
1018  }
1019  }
1020  }
1021 }
1022 
1025 {
1026  CRef<CSeq_align> sa(new CSeq_align);
1027  if ( !aln.GetSegs().IsDendiag()) {
1028  NCBI_THROW(CException, eUnknown, "Input Seq-align should be Dendiag!");
1029  }
1030 
1031  if(aln.IsSetType()){
1032  sa->SetType(aln.GetType());
1033  }
1034  if(aln.IsSetDim()){
1035  sa->SetDim(aln.GetDim());
1036  }
1037  if(aln.IsSetScore()){
1038  sa->SetScore() = aln.GetScore();
1039  }
1040  if(aln.IsSetBounds()){
1041  sa->SetBounds() = aln.GetBounds();
1042  }
1043 
1044  CDense_seg& ds = sa->SetSegs().SetDenseg();
1045 
1046  int counter = 0;
1047  ds.SetNumseg() = 0;
1049 
1050  if(counter == 0){//assume all dendiag segments have same dim and ids
1051  if((*iter)->IsSetDim()){
1052  ds.SetDim((*iter)->GetDim());
1053  }
1054  if((*iter)->IsSetIds()){
1055  ds.SetIds() = (*iter)->GetIds();
1056  }
1057  }
1058  ds.SetNumseg() ++;
1059  if((*iter)->IsSetStarts()){
1060  ITERATE(CDense_diag::TStarts, iterStarts, (*iter)->GetStarts()){
1061  ds.SetStarts().push_back(*iterStarts);
1062  }
1063  }
1064  if((*iter)->IsSetLen()){
1065  ds.SetLens().push_back((*iter)->GetLen());
1066  }
1067  if((*iter)->IsSetStrands()){
1068  ITERATE(CDense_diag::TStrands, iterStrands, (*iter)->GetStrands()){
1069  ds.SetStrands().push_back(*iterStrands);
1070  }
1071  }
1072  if((*iter)->IsSetScores()){
1073  ITERATE(CDense_diag::TScores, iterScores, (*iter)->GetScores()){
1074  ds.SetScores().push_back(*iterScores); //this might not have
1075  //right meaning
1076  }
1077  }
1078  counter ++;
1079  }
1080 
1081  return sa;
1082 }
1083 
1085 {
1086  TTaxId taxid = ZERO_TAX_ID;
1087  try{
1088  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
1089  const CRef<CBlast_def_line_set> bdlRef =
1091  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
1092  ITERATE(list<CRef<CBlast_def_line> >, iter_bdl, bdl) {
1093  CConstRef<CSeq_id> bdl_id =
1094  GetSeq_idByType((*iter_bdl)->GetSeqid(), id.Which());
1095  if(bdl_id && bdl_id->Match(id) &&
1096  (*iter_bdl)->IsSetTaxid() && (*iter_bdl)->CanGetTaxid()){
1097  taxid = (*iter_bdl)->GetTaxid();
1098  break;
1099  }
1100  }
1101  } catch (CException&) {
1102 
1103  }
1104  return taxid;
1105 }
1106 
1108  const CBioseq_Handle& handle)
1109 {
1110  int frame = 0;
1111  if (strand == eNa_strand_plus) {
1112  frame = (start % 3) + 1;
1113  } else if (strand == eNa_strand_minus) {
1114  frame = -(((int)handle.GetBioseqLength() - start - 1)
1115  % 3 + 1);
1116 
1117  }
1118  return frame;
1119 }
1120 
1121 
1124  seqalign_hit_list,
1125  bool do_translation
1126  )
1127 {
1128 
1129  kTranslation = do_translation;
1130  seqalign_hit_list.sort(SortHitByPercentIdentityDescendingEx);
1131 }
1132 
1133 
1136  const CRef<CSeq_align>& info2)
1137 {
1138 
1139  int score1, sum_n1, num_ident1;
1140  double bits1, evalue1;
1141  list<TGi> use_this_gi1;
1142 
1143  int score2, sum_n2, num_ident2;
1144  double bits2, evalue2;
1145  list<TGi> use_this_gi2;
1146 
1147 
1148  GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1149  GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1150 
1151  int length1 = GetAlignmentLength(*info1, kTranslation);
1152  int length2 = GetAlignmentLength(*info2, kTranslation);
1153  bool retval = false;
1154 
1155 
1156  if(length1 > 0 && length2 > 0 && num_ident1 > 0 &&num_ident2 > 0 ) {
1157  if (((double)num_ident1)/length1 == ((double)num_ident2)/length2) {
1158 
1159  retval = evalue1 < evalue2;
1160 
1161  } else {
1162  retval = ((double)num_ident1)/length1 >= ((double)num_ident2)/length2;
1163 
1164  }
1165  } else {
1166  retval = evalue1 < evalue2;
1167  }
1168  return retval;
1169 }
1170 
1173  const CRef<CSeq_align_set>& info2)
1174 {
1175  CRef<CSeq_align_set> i1(info1), i2(info2);
1176 
1177  i1->Set().sort(SortHspByScoreDescending);
1178  i2->Set().sort(SortHspByScoreDescending);
1179 
1180 
1181  int score1, sum_n1, num_ident1;
1182  double bits1, evalue1;
1183  list<TGi> use_this_gi1;
1184 
1185  int score2, sum_n2, num_ident2;
1186  double bits2, evalue2;
1187  list<TGi> use_this_gi2;
1188 
1189  GetAlnScores(*(info1->Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1190  GetAlnScores(*(info2->Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1191  return bits1 > bits2;
1192 }
1193 
1196  CRef<CSeq_align_set> const& info2)
1197 {
1198  int cov1 = GetMasterCoverage(*info1);
1199  int cov2 = GetMasterCoverage(*info2);
1200  bool retval = false;
1201 
1202  if (cov1 > cov2) {
1203  retval = cov1 > cov2;
1204  } else if (cov1 == cov2) {
1205  int score1, sum_n1, num_ident1;
1206  double bits1, evalue1;
1207  list<TGi> use_this_gi1;
1208 
1209  int score2, sum_n2, num_ident2;
1210  double bits2, evalue2;
1211  list<TGi> use_this_gi2;
1212  GetAlnScores(*(info1->Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1213  GetAlnScores(*(info2->Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1214  retval = evalue1 < evalue2;
1215  }
1216 
1217  return retval;
1218 }
1219 
1221  CRef<CSeq_align_set>& info2)
1222 {
1223  int start1 = 0, start2 = 0;
1224 
1225 
1226  info1->Set().sort(SortHspByMasterStartAscending);
1227  info2->Set().sort(SortHspByMasterStartAscending);
1228 
1229 
1230  start1 = min(info1->Get().front()->GetSeqStart(0),
1231  info1->Get().front()->GetSeqStop(0));
1232  start2 = min(info2->Get().front()->GetSeqStart(0),
1233  info2->Get().front()->GetSeqStop(0));
1234 
1235  if (start1 == start2) {
1236  //same start then arrange by bits score
1237  int score1, sum_n1, num_ident1;
1238  double bits1, evalue1;
1239  list<TGi> use_this_gi1;
1240 
1241  int score2, sum_n2, num_ident2;
1242  double bits2, evalue2;
1243  list<TGi> use_this_gi2;
1244 
1245 
1246  GetAlnScores(*(info1->Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1247  GetAlnScores(*(info1->Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1248  return evalue1 < evalue2;
1249 
1250  } else {
1251  return start1 < start2;
1252  }
1253 
1254 }
1255 
1258  const CRef<CSeq_align>& info2)
1259 {
1260 
1261  int score1, sum_n1, num_ident1;
1262  double bits1, evalue1;
1263  list<TGi> use_this_gi1;
1264 
1265  int score2, sum_n2, num_ident2;
1266  double bits2, evalue2;
1267  list<TGi> use_this_gi2;
1268 
1269 
1270  GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1271  GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1272  return bits1 > bits2;
1273 
1274 }
1275 
1278  const CRef<CSeq_align>& info2)
1279 {
1280  int start1 = 0, start2 = 0;
1281 
1282  start1 = min(info1->GetSeqStart(0), info1->GetSeqStop(0));
1283  start2 = min(info2->GetSeqStart(0), info2->GetSeqStop(0)) ;
1284 
1285  if (start1 == start2) {
1286  //same start then arrange by bits score
1287  int score1, sum_n1, num_ident1;
1288  double bits1, evalue1;
1289  list<TGi> use_this_gi1;
1290 
1291  int score2, sum_n2, num_ident2;
1292  double bits2, evalue2;
1293  list<TGi> use_this_gi2;
1294 
1295 
1296  GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1297  GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1298  return evalue1 < evalue2;
1299 
1300  } else {
1301 
1302  return start1 < start2;
1303  }
1304 }
1305 
1308  const CRef<CSeq_align>& info2)
1309 {
1310  int start1 = 0, start2 = 0;
1311 
1312  start1 = min(info1->GetSeqStart(1), info1->GetSeqStop(1));
1313  start2 = min(info2->GetSeqStart(1), info2->GetSeqStop(1)) ;
1314 
1315  if (start1 == start2) {
1316  //same start then arrange by bits score
1317  int score1, sum_n1, num_ident1;
1318  double bits1, evalue1;
1319  list<TGi> use_this_gi1;
1320 
1321  int score2, sum_n2, num_ident2;
1322  double bits2, evalue2;
1323  list<TGi> use_this_gi2;
1324 
1325 
1326  GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1327  GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1328  return evalue1 < evalue2;
1329 
1330  } else {
1331 
1332  return start1 < start2;
1333  }
1334 }
1335 
1336 int CAlignFormatUtil::GetAlignmentLength(const CSeq_align& aln, bool do_translation)
1337 {
1338 
1339  CRef<CSeq_align> final_aln;
1340 
1341  // Convert Std-seg and Dense-diag alignments to Dense-seg.
1342  // Std-segs are produced only for translated searches; Dense-diags only for
1343  // ungapped, not translated searches.
1344 
1345  if (aln.GetSegs().IsStd()) {
1346  CRef<CSeq_align> denseg_aln = aln.CreateDensegFromStdseg();
1347  // When both query and subject are translated, i.e. tblastx, convert
1348  // to a special type of Dense-seg.
1349  if (do_translation) {
1350  final_aln = denseg_aln->CreateTranslatedDensegFromNADenseg();
1351  } else {
1352  final_aln = denseg_aln;
1353 
1354  }
1355  } else if (aln.GetSegs().IsDendiag()) {
1356  final_aln = CreateDensegFromDendiag(aln);
1357  }
1358 
1359  const CDense_seg& ds = (final_aln ? final_aln->GetSegs().GetDenseg() :
1360  aln.GetSegs().GetDenseg());
1361 
1362  CAlnMap alnmap(ds);
1363  return alnmap.GetAlnStop() + 1;
1364 }
1365 
1367  CScope& scope,
1368  bool do_translation) {
1369  double identity = 0;
1370  CRef<CSeq_align> final_aln;
1371 
1372  // Convert Std-seg and Dense-diag alignments to Dense-seg.
1373  // Std-segs are produced only for translated searches; Dense-diags only for
1374  // ungapped, not translated searches.
1375 
1376  if (aln.GetSegs().IsStd()) {
1377  CRef<CSeq_align> denseg_aln = aln.CreateDensegFromStdseg();
1378  // When both query and subject are translated, i.e. tblastx, convert
1379  // to a special type of Dense-seg.
1380  if (do_translation) {
1381  final_aln = denseg_aln->CreateTranslatedDensegFromNADenseg();
1382  } else {
1383  final_aln = denseg_aln;
1384 
1385  }
1386  } else if (aln.GetSegs().IsDendiag()) {
1387  final_aln = CreateDensegFromDendiag(aln);
1388  }
1389 
1390  const CDense_seg& ds = (final_aln ? final_aln->GetSegs().GetDenseg() :
1391  aln.GetSegs().GetDenseg());
1392 
1393  CAlnVec alnvec(ds, scope);
1394  string query, subject;
1395 
1396  alnvec.SetAaCoding(CSeq_data::e_Ncbieaa);
1397  alnvec.GetWholeAlnSeqString(0, query);
1398  alnvec.GetWholeAlnSeqString(1, subject);
1399 
1400  int num_ident = 0;
1401  int length = (int)min(query.size(), subject.size());
1402 
1403  for (int i = 0; i < length; ++i) {
1404  if (query[i] == subject[i]) {
1405  ++num_ident;
1406  }
1407  }
1408 
1409  if (length > 0) {
1410  identity = ((double)num_ident)/length;
1411  }
1412 
1413  return identity;
1414 }
1415 
1416 
1418  const CRef<CSeq_align_set>& info2,
1419  double &percentIdent1,
1420  double &percentIdent2)
1421 {
1422 
1423  CRef<CSeq_align_set> i1(info1), i2(info2);
1424  percentIdent1 = -1;
1425  percentIdent2 = -1;
1426 
1429 
1432  return;
1433 }
1434 
1435 
1438  const CRef<CSeq_align_set>& info2)
1439 {
1440 
1441  CRef<CSeq_align_set> i1(info1), i2(info2);
1442 
1443  //i1->Set().sort(SortHspByPercentIdentityDescending);
1444  //i2->Set().sort(SortHspByPercentIdentityDescending);
1445 
1446 
1447  unique_ptr<CAlignFormatUtil::SSeqAlignSetCalcParams> seqSetInfo1( CAlignFormatUtil::GetSeqAlignSetCalcParamsFromASN(*info1));
1448  unique_ptr<CAlignFormatUtil::SSeqAlignSetCalcParams> seqSetInfo2( CAlignFormatUtil::GetSeqAlignSetCalcParamsFromASN(*info2));
1449  double evalue1 = seqSetInfo1->evalue;
1450  double evalue2 = seqSetInfo2->evalue;
1451  double percentIdent1 = seqSetInfo1->percent_identity;
1452  double percentIdent2 = seqSetInfo2->percent_identity;
1453 
1454  bool retval = false;
1455  if(percentIdent1 < 0 || percentIdent2 < 0) {
1456  s_CalcAlnPercentIdent(info1, info2,percentIdent1,percentIdent2);
1457  }
1458  if(percentIdent1 > 0 &&percentIdent2 > 0) {
1459  if (percentIdent1 == percentIdent2) {
1460  retval = evalue1 < evalue2;
1461 
1462  } else {
1463  retval = percentIdent1 >= percentIdent2;
1464  }
1465  } else {
1466  retval = evalue1 < evalue2;
1467  }
1468  return retval;
1469 }
1470 
1472  CRef<CSeq_align_set> const& info2)
1473 {
1474  int score1, score2, sum_n, num_ident;
1475  double bits, evalue;
1476  list<TGi> use_this_gi;
1477  double total_bits1 = 0, total_bits2 = 0;
1478 
1479  ITERATE(CSeq_align_set::Tdata, iter, info1->Get()) {
1480  CAlignFormatUtil::GetAlnScores(**iter, score1, bits, evalue,
1481  sum_n, num_ident, use_this_gi);
1482  total_bits1 += bits;
1483  }
1484 
1485  ITERATE(CSeq_align_set::Tdata, iter, info2->Get()) {
1486  CAlignFormatUtil::GetAlnScores(**iter, score2, bits, evalue,
1487  sum_n, num_ident, use_this_gi);
1488  total_bits2 += bits;
1489  }
1490 
1491 
1492  return total_bits1 >= total_bits2;
1493 
1494 }
1495 
1496 #ifndef NCBI_COMPILER_WORKSHOP
1497 /** Class to sort by linkout bit
1498  * @note this code doesn't compile under the Solaris' WorkShop, and because
1499  * this feature is only used inside NCBI (LinkoutDB), we disable this code.
1500  */
1502 {
1503 public:
1505  const string& mv_build_name)
1506  : m_LinkoutDB(linkoutdb), m_MapViewerBuildName(mv_build_name) {}
1507 
1508  bool operator() (const CRef<CSeq_align_set>& info1, const CRef<CSeq_align_set>& info2)
1509  {
1510  CConstRef<CSeq_id> id1, id2;
1511  id1 = &(info1->Get().front()->GetSeq_id(1));
1512  id2 = &(info2->Get().front()->GetSeq_id(1));
1513 
1514  int linkout1 = 0, linkout2 = 0;
1515  linkout1 = m_LinkoutDB
1517  : 0;
1518  linkout2 = m_LinkoutDB
1520  : 0;
1521 
1522  return (linkout1 & eGenomicSeq) <= (linkout2 & eGenomicSeq);
1523  }
1524 private:
1527 };
1528 #endif /* NCBI_COMPILER_WORKSHOP */
1529 
1531 SortHitByMolecularType(list< CRef<CSeq_align_set> >& seqalign_hit_list,
1532  CScope& scope, ILinkoutDB* linkoutdb,
1533  const string& mv_build_name)
1534 {
1535 
1536  kScope = &scope;
1537 #ifndef NCBI_COMPILER_WORKSHOP
1538  seqalign_hit_list.sort(CSortHitByMolecularTypeEx(linkoutdb, mv_build_name));
1539 #endif /* NCBI_COMPILER_WORKSHOP */
1540 }
1541 
1542 void CAlignFormatUtil::SortHit(list< CRef<CSeq_align_set> >& seqalign_hit_list,
1543  bool do_translation, CScope& scope, int
1544  sort_method, ILinkoutDB* linkoutdb,
1545  const string& mv_build_name)
1546 {
1547  kScope = &scope;
1548  kTranslation = do_translation;
1549 
1550  if (sort_method == 1) {
1551 #ifndef NCBI_COMPILER_WORKSHOP
1552  seqalign_hit_list.sort(CSortHitByMolecularTypeEx(linkoutdb,
1553  mv_build_name));
1554 #endif /* NCBI_COMPILER_WORKSHOP */
1555  } else if (sort_method == 2) {
1556  seqalign_hit_list.sort(SortHitByTotalScoreDescending);
1557  } else if (sort_method == 3) {
1558  seqalign_hit_list.sort(SortHitByPercentIdentityDescendingEx);
1559  }
1560 }
1561 
1564  target,
1565  int sort_method,
1566  const CSeq_align_set& source,
1567  CScope& scope,
1568  ILinkoutDB* linkoutdb,
1569  const string& mv_build_name)
1570 {
1571  CConstRef<CSeq_id> prevSubjectId;
1572  int count = 0;
1573  int linkoutPrev = 0;
1574  ITERATE(CSeq_align_set::Tdata, iter, source.Get()) {
1575 
1576  const CSeq_id& id = (*iter)->GetSeq_id(1);
1577  try {
1578  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
1579  if (handle) {
1580  int linkout;
1581  if(prevSubjectId.Empty() || !id.Match(*prevSubjectId)){
1582  prevSubjectId = &id;
1583  linkout = linkoutdb ? linkoutdb->GetLinkout(id, mv_build_name): 0;
1584  linkoutPrev = linkout;
1585  count++;
1586  }
1587  else {
1588  linkout = linkoutPrev;
1589  }
1590  if (linkout & eGenomicSeq) {
1591  if (sort_method == 1) {
1592  target[1]->Set().push_back(*iter);
1593  } else if (sort_method == 2){
1594  target[0]->Set().push_back(*iter);
1595  } else {
1596  target[1]->Set().push_back(*iter);
1597  }
1598  } else {
1599  if (sort_method == 1) {
1600  target[0]->Set().push_back(*iter);
1601  } else if (sort_method == 2) {
1602  target[1]->Set().push_back(*iter);
1603  } else {
1604  target[0]->Set().push_back(*iter);
1605  }
1606  }
1607  } else {
1608  target[0]->Set().push_back(*iter);
1609  }
1610 
1611  } catch (const CException&){
1612  target[0]->Set().push_back(*iter); //no bioseq found, leave untouched
1613  }
1614  }
1615 }
1616 
1618  const CSeq_align_set& source)
1619 {
1620  CConstRef<CSeq_id> previous_id;
1621  CRef<CSeq_align_set> temp;
1622 
1623  ITERATE(CSeq_align_set::Tdata, iter, source.Get()) {
1624  const CSeq_id& cur_id = (*iter)->GetSeq_id(1);
1625  if(previous_id.Empty()) {
1626  temp = new CSeq_align_set;
1627  temp->Set().push_back(*iter);
1628  target.push_back(temp);
1629  } else if (cur_id.Match(*previous_id)){
1630  temp->Set().push_back(*iter);
1631 
1632  } else {
1633  temp = new CSeq_align_set;
1634  temp->Set().push_back(*iter);
1635  target.push_back(temp);
1636  }
1637  previous_id = &cur_id;
1638  }
1639 
1640 }
1641 
1644 {
1645  CRef<CSeq_align_set> align_set (new CSeq_align_set);
1646  CConstRef<CSeq_id> previous_id;
1647  CRef<CSeq_align_set> temp;
1648  // list<CRef<CSeq_align_set> >::iterator iter;
1649 
1650  for (list<CRef<CSeq_align_set> >::iterator iter = source.begin(); iter != source.end(); iter ++) {
1651  ITERATE(CSeq_align_set::Tdata, iter2, (*iter)->Get()) {
1652  align_set->Set().push_back(*iter2);
1653  }
1654  }
1655  return align_set;
1656 }
1657 
1659  const CSeq_align_set& source)
1660 {
1661  CConstRef<CSeq_id> previous_id;
1662  CRef<CSeq_align_set> temp;
1663 
1665 
1666  for(size_t i = 0; i < seqIdList.size();i++) {
1667  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
1668  hitsMap.insert(map<string, CRef<CSeq_align_set> >::value_type(seqIdList[i],new_aln));
1669  }
1670  size_t count = 0;
1671  ITERATE(CSeq_align_set::Tdata, iter, source.Get()) {
1672  const CSeq_id& cur_id = (*iter)->GetSeq_id(1);
1673  if(previous_id.Empty() || !cur_id.Match(*previous_id)) {
1674  if(count >= seqIdList.size()) {
1675  break;
1676  }
1677  string idString = NStr::TruncateSpaces(cur_id.AsFastaString());
1678  if(hitsMap.find(idString) != hitsMap.end()) {
1679  temp = new CSeq_align_set;
1680  temp->Set().push_back(*iter);
1681  hitsMap[idString] = temp;
1682  count++;
1683  }
1684  else {
1685  temp.Reset();
1686  }
1687  }
1688  else if (cur_id.Match(*previous_id)){
1689  if(!temp.Empty()) {
1690  temp->Set().push_back(*iter);
1691  }
1692  }
1693  previous_id = &cur_id;
1694  }
1695  return hitsMap;
1696 }
1697 
1699 {
1700  vector <string> seqIds;
1701  NStr::Split(alignSeqList,",",seqIds);
1702 
1703  //SEQ_ALN_SET from ALIGNDB contains seq_aligns in random order
1704  //The followimg will create a map that contains seq-aln_set per gi from ALIGN_SEQ_LIST
1706 
1707  map < string, CRef<CSeq_align_set> >::iterator it;
1708  list< CRef<CSeq_align_set> > orderedSet;
1709  //orderedSet wil have seq aligns in th order of gi list
1710  for(size_t i = 0; i < seqIds.size(); i++) {
1711  if(hitsMap.find(seqIds[i]) != hitsMap.end()) {
1712  orderedSet.push_back(hitsMap[seqIds[i]]);
1713  }
1714  }
1715  //This should contain seq align set in the order of gis in the list
1716  all_aln_set = CAlignFormatUtil::HitListToHspList(orderedSet);
1717 }
1718 
1719 static bool s_GetSRASeqMetadata(const CBioseq::TId& ids,string &strRun, string &strSpotId,string &strReadIndex)
1720 {
1721  bool success = false;
1722  string link = NcbiEmptyString;
1724 
1725  if (!seqId.Empty())
1726  {
1727  // Get the SRA tag from seqId
1728  if (seqId->GetGeneral().CanGetDb() &&
1729  seqId->GetGeneral().CanGetTag() &&
1730  seqId->GetGeneral().GetTag().IsStr())
1731  {
1732  // Decode the tag to collect the SRA-specific indices
1733  string strTag = seqId->GetGeneral().GetTag().GetStr();
1734  if (!strTag.empty())
1735  {
1736  vector<string> vecInfo;
1737  try
1738  {
1739  NStr::Split(strTag, ".", vecInfo);
1740  }
1741  catch (...)
1742  {
1743  return false;
1744  }
1745 
1746  if (vecInfo.size() != 3)
1747  {
1748  return false;
1749  }
1750 
1751  strRun = vecInfo[0];
1752  strSpotId = vecInfo[1];
1753  strReadIndex = vecInfo[2];
1754  success = true;
1755  }
1756  }
1757  }
1758  return success;
1759 }
1760 
1761 string CAlignFormatUtil::BuildSRAUrl(const CBioseq::TId& ids, string user_url)
1762 {
1763  string strRun, strSpotId,strReadIndex;
1764  string link = NcbiEmptyString;
1765 
1766  if(s_GetSRASeqMetadata(ids,strRun,strSpotId,strReadIndex))
1767  {
1768  // Generate the SRA link to the identified spot
1769  link += user_url;
1770  link += "?run=" + strRun;
1771  link += "." + strSpotId;
1772  link += "." + strReadIndex;
1773  }
1774  return link;
1775 }
1776 
1778 {
1779  string gnl;
1780 
1783  const CRef<CSeq_id> id_accession = FindBestChoice(ids, CSeq_id::WorstRank);
1784 
1785  if(!id_general.Empty() && id_general->AsFastaString().find("gnl|BL_ORD_ID") != string::npos){
1786  return gnl;
1787  }
1788 
1789  const CSeq_id* bestid = NULL;
1790  if (id_general.Empty()){
1791  bestid = id_other;
1792  if (id_other.Empty()){
1793  bestid = id_accession;
1794  }
1795  } else {
1796  bestid = id_general;
1797  }
1798 
1799  if (bestid && bestid->Which() != CSeq_id::e_Gi){
1800  gnl = NStr::URLEncode(bestid->AsFastaString());
1801  }
1802  return gnl;
1803 }
1804 
1806  string user_url, string database,
1807  bool db_is_na, string rid, int query_number,
1808  bool for_alignment) {
1809 
1810  string link = NcbiEmptyString;
1812 
1813  if(!id_general.Empty()
1814  && id_general->AsFastaString().find("gnl|BL_ORD_ID") != string::npos){
1815  /* We do need to make security protected link to BLAST gnl */
1816  return NcbiEmptyString;
1817  }
1818  TGi gi = FindGi(ids);
1819  string bestID = s_GetBestIDForURL((CBioseq::TId &)ids);
1820 
1821 
1822  bool nodb_path = false;
1823  /* dumpgnl.cgi need to use path */
1824  if (user_url.find("dumpgnl.cgi") ==string::npos){
1825  nodb_path = true;
1826  }
1827  int length = (int)database.size();
1828  string str;
1829  char *chptr, *dbtmp;
1830  char tmpbuff[256];
1831  char* dbname = new char[sizeof(char)*length + 2];
1832  strcpy(dbname, database.c_str());
1833  if(nodb_path) {
1834  int i, j;
1835  dbtmp = new char[sizeof(char)*length + 2]; /* aditional space and NULL */
1836  memset(dbtmp, '\0', sizeof(char)*length + 2);
1837  for(i = 0; i < length; i++) {
1838  if(i > 0) {
1839  strcat(dbtmp, " "); //space between db
1840  }
1841  if(isspace((unsigned char) dbname[i]) || dbname[i] == ',') {/* Rolling spaces */
1842  continue;
1843  }
1844  j = 0;
1845  while (!isspace((unsigned char) dbname[i]) && j < 256 && i < length) {
1846  tmpbuff[j] = dbname[i];
1847  j++; i++;
1848  if(dbname[i] == ',') { /* Comma is valid delimiter */
1849  break;
1850  }
1851  }
1852  tmpbuff[j] = '\0';
1853  if((chptr = strrchr(tmpbuff, '/')) != NULL) {
1854  strcat(dbtmp, (char*)(chptr+1));
1855  } else {
1856  strcat(dbtmp, tmpbuff);
1857  }
1858 
1859  }
1860  } else {
1861  dbtmp = dbname;
1862  }
1863 
1864  char gnl[256];
1865  if (!bestID.empty()){
1866  strcpy(gnl, bestID.c_str());
1867 
1868  } else {
1869  gnl[0] = '\0';
1870  }
1871 
1872  str = NStr::URLEncode(dbtmp == NULL ? (char*) "nr" : dbtmp);
1873 
1874  if (user_url.find("?") == string::npos){
1875  link += user_url + "?" + "db=" + str + "&na=" + (db_is_na? "1" : "0");
1876  } else {
1877  if (user_url.find("=") != string::npos) {
1878  user_url += "&";
1879  }
1880  link += user_url + "db=" + str + "&na=" + (db_is_na? "1" : "0");
1881  }
1882 
1883  if (gnl[0] != '\0'){
1884  str = gnl;
1885  link += "&gnl=";
1886  link += str;
1887  }
1888  if (gi > ZERO_GI){
1889  link += "&gi=" + NStr::NumericToString(gi);
1890  link += "&term=" + NStr::NumericToString(gi) + NStr::URLEncode("[gi]");
1891  }
1892  if(taxid > ZERO_TAX_ID){
1893  link += "&taxid=" + NStr::NumericToString(taxid);
1894  }
1895  if (rid != NcbiEmptyString){
1896  link += "&RID=" + rid;
1897  }
1898 
1899  if (query_number > 0){
1900  link += "&QUERY_NUMBER=" + NStr::IntToString(query_number);
1901  }
1902 
1903  if (user_url.find("dumpgnl.cgi") ==string::npos){
1904  if (for_alignment)
1905  link += "&log$=nuclalign";
1906  else
1907  link += "&log$=nucltop";
1908  }
1909 
1910  if(nodb_path){
1911  delete [] dbtmp;
1912  }
1913  delete [] dbname;
1914  return link;
1915 }
1918  map< string, string>& parameters_to_change,
1919  string& cgi_query)
1920 {
1921 
1922  //add parameters to exclude
1923  parameters_to_change.insert(map<string, string>::
1924  value_type("service", ""));
1925  parameters_to_change.insert(map<string, string>::
1926  value_type("address", ""));
1927  parameters_to_change.insert(map<string, string>::
1928  value_type("platform", ""));
1929  parameters_to_change.insert(map<string, string>::
1930  value_type("_pgr", ""));
1931  parameters_to_change.insert(map<string, string>::
1932  value_type("client", ""));
1933  parameters_to_change.insert(map<string, string>::
1934  value_type("composition_based_statistics", ""));
1935 
1936  parameters_to_change.insert(map<string, string>::
1937  value_type("auto_format", ""));
1938  cgi_query = NcbiEmptyString;
1939  TCgiEntries& cgi_entry = ctx.GetRequest().GetEntries();
1940  bool is_first = true;
1941 
1942  for(TCgiEntriesI it=cgi_entry.begin(); it!=cgi_entry.end(); ++it) {
1943  string parameter = it->first;
1944  if (parameter != NcbiEmptyString) {
1945  if (parameters_to_change.count(NStr::ToLower(parameter)) > 0 ||
1946  parameters_to_change.count(NStr::ToUpper(parameter)) > 0) {
1947  if(parameters_to_change[NStr::ToLower(parameter)] !=
1948  NcbiEmptyString &&
1949  parameters_to_change[NStr::ToUpper(parameter)] !=
1950  NcbiEmptyString) {
1951  if (!is_first) {
1952  cgi_query += "&";
1953  }
1954  cgi_query +=
1955  it->first + "=" + parameters_to_change[it->first];
1956  is_first = false;
1957  }
1958  } else {
1959  if (!is_first) {
1960  cgi_query += "&";
1961  }
1962  cgi_query += it->first + "=" + it->second;
1963  is_first = false;
1964  }
1965 
1966  }
1967  }
1968 }
1969 
1971 
1972  string format_type = ctx.GetRequestValue("FORMAT_TYPE").GetValue();
1973  string ridstr = ctx.GetRequestValue("RID").GetValue();
1974  string align_view = ctx.GetRequestValue("ALIGNMENT_VIEW").GetValue();
1975 
1976  cgi_query += "RID=" + ridstr;
1977  cgi_query += "&FORMAT_TYPE=" + format_type;
1978  cgi_query += "&ALIGNMENT_VIEW=" + align_view;
1979 
1980  cgi_query += "&QUERY_NUMBER=" + ctx.GetRequestValue("QUERY_NUMBER").GetValue();
1981  cgi_query += "&FORMAT_OBJECT=" + ctx.GetRequestValue("FORMAT_OBJECT").GetValue();
1982  cgi_query += "&RUN_PSIBLAST=" + ctx.GetRequestValue("RUN_PSIBLAST").GetValue();
1983  cgi_query += "&I_THRESH=" + ctx.GetRequestValue("I_THRESH").GetValue();
1984 
1985  cgi_query += "&DESCRIPTIONS=" + ctx.GetRequestValue("DESCRIPTIONS").GetValue();
1986 
1987  cgi_query += "&ALIGNMENTS=" + ctx.GetRequestValue("ALIGNMENTS").GetValue();
1988 
1989  cgi_query += "&NUM_OVERVIEW=" + ctx.GetRequestValue("NUM_OVERVIEW").GetValue();
1990 
1991  cgi_query += "&NCBI_GI=" + ctx.GetRequestValue("NCBI_GI").GetValue();
1992 
1993  cgi_query += "&SHOW_OVERVIEW=" + ctx.GetRequestValue("SHOW_OVERVIEW").GetValue();
1994 
1995  cgi_query += "&SHOW_LINKOUT=" + ctx.GetRequestValue("SHOW_LINKOUT").GetValue();
1996 
1997  cgi_query += "&GET_SEQUENCE=" + ctx.GetRequestValue("GET_SEQUENCE").GetValue();
1998 
1999  cgi_query += "&MASK_CHAR=" + ctx.GetRequestValue("MASK_CHAR").GetValue();
2000  cgi_query += "&MASK_COLOR=" + ctx.GetRequestValue("MASK_COLOR").GetValue();
2001 
2002  cgi_query += "&SHOW_CDS_FEATURE=" + ctx.GetRequestValue("SHOW_CDS_FEATURE").GetValue();
2003 
2004  if (ctx.GetRequestValue("FORMAT_EQ_TEXT").GetValue() != NcbiEmptyString) {
2005  cgi_query += "&FORMAT_EQ_TEXT=" +
2007  GetRequestValue("FORMAT_EQ_TEXT").
2008  GetValue()));
2009  }
2010 
2011  if (ctx.GetRequestValue("FORMAT_EQ_OP").GetValue() != NcbiEmptyString) {
2012  cgi_query += "&FORMAT_EQ_OP=" +
2014  GetRequestValue("FORMAT_EQ_OP").
2015  GetValue()));
2016  }
2017 
2018  if (ctx.GetRequestValue("FORMAT_EQ_MENU").GetValue() != NcbiEmptyString) {
2019  cgi_query += "&FORMAT_EQ_MENU=" +
2021  GetRequestValue("FORMAT_EQ_MENU").
2022  GetValue()));
2023  }
2024 
2025  cgi_query += "&EXPECT_LOW=" + ctx.GetRequestValue("EXPECT_LOW").GetValue();
2026  cgi_query += "&EXPECT_HIGH=" + ctx.GetRequestValue("EXPECT_HIGH").GetValue();
2027 
2028  cgi_query += "&BL2SEQ_LINK=" + ctx.GetRequestValue("BL2SEQ_LINK").GetValue();
2029 
2030 }
2031 
2032 
2034  CScope& scope, ILinkoutDB* linkoutdb,
2035  const string& mv_build_name)
2036 {
2037  bool is_mixed = false;
2038  bool is_first = true;
2039  int prev_database = 0;
2040 
2041  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()) {
2042 
2043  const CSeq_id& id = (*iter)->GetSeq_id(1);
2044  int linkout = linkoutdb
2045  ? linkoutdb->GetLinkout(id, mv_build_name)
2046  : 0;
2047  int cur_database = (linkout & eGenomicSeq);
2048  if (!is_first && cur_database != prev_database) {
2049  is_mixed = true;
2050  break;
2051  }
2052  prev_database = cur_database;
2053  is_first = false;
2054  }
2055 
2056  return is_mixed;
2057 
2058 }
2059 
2060 
2062 {
2063  bool formatAsMixedDbs = false;
2064  string mixedDbs = ctx.GetRequestValue("MIXED_DATABASE").GetValue();
2065  if(!mixedDbs.empty()) {
2066  mixedDbs = NStr::ToLower(mixedDbs);
2067  formatAsMixedDbs = (mixedDbs == "on" || mixedDbs == "true" || mixedDbs == "yes") ? true : false;
2068  }
2069  return formatAsMixedDbs;
2070 }
2071 
2072 static string s_MapLinkoutGenParam(string &url_link_tmpl,
2073  const string& rid,
2074  string giList,
2075  bool for_alignment,
2076  int cur_align,
2077  string &label,
2078  string &lnk_displ,
2079  string lnk_tl_info = "",
2080  string lnk_title = "")
2081 {
2082  const string kLinkTitle=" title=\"View <@lnk_tl_info@> for <@label@>\" ";
2083  const string kLinkTarget="target=\"lnk" + rid + "\"";
2084  string lnkTitle = (lnk_title.empty()) ? kLinkTitle : lnk_title;
2085  string url_link = CAlignFormatUtil::MapTemplate(url_link_tmpl,"gi",giList);
2086  url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",rid);
2087  url_link = CAlignFormatUtil::MapTemplate(url_link,"log",for_alignment? "align" : "top");
2088  url_link = CAlignFormatUtil::MapTemplate(url_link,"blast_rank",NStr::IntToString(cur_align));
2089  lnkTitle = NStr::StartsWith(lnk_displ,"<img") ? "" : lnkTitle;
2090  string lnkTarget = NStr::StartsWith(lnk_displ,"<img") ? "" : kLinkTarget;
2091  url_link = CAlignFormatUtil::MapTemplate(url_link,"lnkTitle",lnkTitle);
2092  url_link = CAlignFormatUtil::MapTemplate(url_link,"lnkTarget",lnkTarget);
2093  url_link = CAlignFormatUtil::MapTemplate(url_link,"lnk_displ",lnk_displ);
2094  url_link = CAlignFormatUtil::MapTemplate(url_link,"lnk_tl_info",lnk_tl_info);
2095  url_link = CAlignFormatUtil::MapTemplate(url_link,"label",label);
2096  url_link = CAlignFormatUtil::MapProtocol(url_link);
2097  return url_link;
2098 }
2099 
2100 
2101 static list<string> s_GetLinkoutUrl(int linkout,
2102  string giList,
2103  string labelList,
2104  TGi first_gi,
2105  CAlignFormatUtil::SLinkoutInfo &linkoutInfo,
2106  bool textLink = true)
2107 
2108 {
2109  list<string> linkout_list;
2110  string url_link,lnk_displ,lnk_title,lnkTitleInfo;
2111 
2112  vector<string> accs;
2113  NStr::Split(labelList,",",accs);
2114  string firstAcc = (accs.size() > 0)? accs[0] : labelList;
2115 
2116  if (linkout & eUnigene) {
2117  url_link = CAlignFormatUtil::GetURLFromRegistry("UNIGEN");
2118  lnk_displ = textLink ? "UniGene" : kUnigeneImg;
2119 
2120  string termParam = NStr::Find(labelList,",") == NPOS ? kGeneTerm : ""; //kGeneTerm if only one seqid
2121  url_link = CAlignFormatUtil::MapTemplate(url_link,"termParam",termParam);
2122 
2123  lnkTitleInfo = "UniGene cluster";
2124  string uid = !linkoutInfo.is_na ? "[Protein Accession]" : "[Nucleotide Accession]";
2125  url_link = CAlignFormatUtil::MapTemplate(url_link,"uid",uid);
2126  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2127 
2128  if(textLink) {
2129  url_link = CAlignFormatUtil::MapTemplate(kUnigeneDispl,"lnk",url_link);
2130  }
2131  url_link = CAlignFormatUtil::MapProtocol(url_link);
2132  linkout_list.push_back(url_link);
2133  }
2134  if (linkout & eStructure){
2135  CSeq_id seqID(firstAcc);
2136  string struct_link = CAlignFormatUtil::GetURLFromRegistry(
2137  "STRUCTURE_URL");
2138 
2139  url_link = struct_link.empty() ? kStructureUrl : struct_link;
2140  string linkTitle;
2141  if(seqID.Which() == CSeq_id::e_Pdb) {
2142  lnk_displ = textLink ? "Structure" : kStructureImg;
2143  linkTitle = " title=\"View 3D structure <@label@>\"";
2144  }
2145  else {
2146  url_link = kStructureAlphaFoldUrl;
2147  lnk_displ = textLink ? "AlphaFold Structure" : kStructureImg;
2148  linkTitle = " title=\"View AlphaFold 3D structure <@label@>\"";
2149  }
2150 
2151 
2152 
2153  string molID,chainID;
2154  NStr::SplitInTwo(firstAcc,"_",molID,chainID);
2155  url_link = CAlignFormatUtil::MapTemplate(url_link,"molid",molID);
2156  url_link = CAlignFormatUtil::MapTemplate(url_link,"queryID",linkoutInfo.queryID);
2157  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,firstAcc,lnk_displ,"",linkTitle);
2158  if(textLink) {
2159  url_link = CAlignFormatUtil::MapTemplate(kStructureDispl,"lnk",url_link);
2160  }
2161  url_link = CAlignFormatUtil::MapProtocol(url_link);
2162  linkout_list.push_back(url_link);
2163  }
2164  if (linkout & eGeo){
2165  url_link = CAlignFormatUtil::GetURLFromRegistry("GEO");
2166  lnk_displ = textLink ? "GEO Profiles" : kGeoImg;
2167 
2168  lnkTitleInfo = "Expression profiles";
2169  //gilist contains comma separated gis
2170  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2171 
2172 
2173  if(textLink) {
2174  url_link = CAlignFormatUtil::MapTemplate(kGeoDispl,"lnk",url_link);
2175  }
2176  url_link = CAlignFormatUtil::MapProtocol(url_link);
2177  linkout_list.push_back(url_link);
2178  }
2179  if(linkout & eGene){
2180  url_link = CAlignFormatUtil::GetURLFromRegistry("GENE");
2181  if(textLink) {
2182  lnk_displ = "Gene";
2183  lnkTitleInfo = "gene information";
2184  }
2185  else {
2186  lnk_displ = kGeneImg;
2187  }
2188  string termParam = NStr::Find(labelList,",") == NPOS ? kGeneTerm : ""; //kGeneTerm if only one seqid
2189  url_link = CAlignFormatUtil::MapTemplate(url_link,"termParam",termParam);
2190 
2191  string uid = !linkoutInfo.is_na ? "[Protein Accession]" : "[Nucleotide Accession]";
2192  url_link = CAlignFormatUtil::MapTemplate(url_link,"uid",uid);
2193 
2194  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2195 
2196  if(textLink) {
2197  url_link = CAlignFormatUtil::MapTemplate(kGeneDispl,"lnk",url_link);
2198  }
2199  url_link = CAlignFormatUtil::MapProtocol(url_link);
2200  linkout_list.push_back(url_link);
2201  }
2202 
2203  if((linkout & eGenomicSeq) && first_gi != ZERO_GI){ //only for advanced view -> textlink = true
2204  if(textLink) {
2205  url_link = kMapviewBlastHitParams;
2206  lnk_displ = "Map Viewer";
2207 
2208  lnkTitleInfo = "BLAST hits on the " + linkoutInfo.taxName + " genome";
2209 
2210  url_link = CAlignFormatUtil::MapTemplate(url_link,"gnl",NStr::URLEncode(linkoutInfo.gnl));
2211  url_link = CAlignFormatUtil::MapTemplate(url_link,"db",linkoutInfo.database);
2212  url_link = CAlignFormatUtil::MapTemplate(url_link,"is_na",linkoutInfo.is_na? "1" : "0");
2213  string user_url = (linkoutInfo.user_url.empty()) ? kMapviewBlastHitUrl : linkoutInfo.user_url;
2214  url_link = CAlignFormatUtil::MapTemplate(url_link,"user_url",user_url);
2215 
2216  string taxIDStr = (linkoutInfo.taxid > ZERO_TAX_ID) ? NStr::NumericToString(linkoutInfo.taxid) : "";
2217  url_link = CAlignFormatUtil::MapTemplate(url_link,"taxid",taxIDStr);
2218 
2219  string queryNumStr = (linkoutInfo.query_number > 0) ? NStr::IntToString(linkoutInfo.query_number) : "";
2220  url_link = CAlignFormatUtil::MapTemplate(url_link,"query_number",queryNumStr); //gi,term
2221 
2222  string giStr = (first_gi > ZERO_GI)? NStr::NumericToString(first_gi) : "";
2223  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giStr,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2224 
2225  if(textLink) {
2226  url_link = CAlignFormatUtil::MapTemplate(kMapviwerDispl,"lnk",url_link);
2227  }
2228  url_link = CAlignFormatUtil::MapProtocol(url_link);
2229  linkout_list.push_back(url_link);
2230  }
2231  }
2232  else if((linkout & eMapviewer) && first_gi != ZERO_GI){
2233  url_link = kMapviwerUrl;
2234  lnk_displ = textLink ? "Map Viewer" : kMapviwerImg;
2235 
2236  string linkTitle = " title=\"View <@label@> aligned to the " + linkoutInfo.taxName + " genome\"";
2237  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,"",linkTitle);
2238 
2239  if(textLink) {
2240  url_link = CAlignFormatUtil::MapTemplate(kMapviwerDispl,"lnk",url_link);
2241  }
2242  url_link = CAlignFormatUtil::MapProtocol(url_link);
2243  linkout_list.push_back(url_link);
2244  }
2245  //View Bioassays involving <accession
2246  if(linkout & eBioAssay && linkoutInfo.is_na && first_gi != ZERO_GI){
2247  url_link = CAlignFormatUtil::GetURLFromRegistry("BIOASSAY_NUC");
2248  lnk_displ = textLink ? "PubChem BioAssay" : kBioAssayNucImg;
2249 
2250  string linkTitle = " title=\"View Bioassays involving <@label@>\"";
2251  //gilist contains comma separated gis, change it to the following
2252  giList = NStr::Replace(giList,",","[RNATargetGI] OR ");
2253  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,"",linkTitle);
2254 
2255  if(textLink) {
2256  url_link = CAlignFormatUtil::MapTemplate(kBioAssayDispl,"lnk",url_link);
2257  }
2258  url_link = CAlignFormatUtil::MapProtocol(url_link);
2259  linkout_list.push_back(url_link);
2260  }
2261  else if (linkout & eBioAssay && !linkoutInfo.is_na && first_gi != ZERO_GI) {
2262  url_link = CAlignFormatUtil::GetURLFromRegistry("BIOASSAY_PROT");
2263  lnk_displ = textLink ? "PubChem BioAssay" : kBioAssayProtImg;
2264 
2265  lnkTitleInfo ="Bioassay data";
2266  string linkTitle = " title=\"View Bioassays involving <@label@>\"";
2267  //gilist contains comma separated gis, change it to the following
2268  giList = NStr::Replace(giList,",","[PigGI] OR ");
2269  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,"",linkTitle);
2270 
2271  if(textLink) {
2272  url_link = CAlignFormatUtil::MapTemplate(kBioAssayDispl,"lnk",url_link);
2273  }
2274  url_link = CAlignFormatUtil::MapProtocol(url_link);
2275  linkout_list.push_back(url_link);
2276  }
2277  if(linkout & eReprMicrobialGenomes){
2278  url_link = CAlignFormatUtil::GetURLFromRegistry("REPR_MICROBIAL_GENOMES");
2279  lnk_displ = textLink ? "Genome" : kReprMicrobialGenomesImg;
2280 
2281  lnkTitleInfo = "genomic information";
2282  //gilist contains comma separated gis
2283  string uid = !linkoutInfo.is_na ? "Protein Accession" : "Nucleotide Accession";
2284  url_link = CAlignFormatUtil::MapTemplate(url_link,"uid",uid);
2285  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,labelList,lnk_displ,lnkTitleInfo);
2286 
2287  if(textLink) {
2288  url_link = CAlignFormatUtil::MapTemplate(kReprMicrobialGenomesDispl,"lnk",url_link);
2289  }
2290  url_link = CAlignFormatUtil::MapProtocol(url_link);
2291  linkout_list.push_back(url_link);
2292  }
2293  if((linkout & eGenomeDataViewer) || (linkout & eTranscript)){
2294  string urlTag;
2295  lnk_displ = textLink ? "Genome Data Viewer" : kGenomeDataViewerImg;
2296  if(linkout & eTranscript) {
2297  urlTag = "GENOME_DATA_VIEWER_TRANSCR";
2298  lnkTitleInfo = "title=\"View the annotation of the transcript <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as the protein annotation and browse to other regions.\"";
2299  }
2300  else {
2301  urlTag = linkoutInfo.is_na ? "GENOME_DATA_VIEWER_NUC" : "GENOME_DATA_VIEWER_PROT";
2302  lnkTitleInfo = linkoutInfo.is_na ?
2303  "title=\"View BLAST hits for <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as hits and browse to other regions.\""
2304  :
2305  "title=\"View the annotation of the protein <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as the protein annotation and browse to other regions.\"";
2306  }
2307  url_link = CAlignFormatUtil::GetURLFromRegistry(urlTag);
2308  url_link = s_MapLinkoutGenParam(url_link,linkoutInfo.rid,giList,linkoutInfo.for_alignment, linkoutInfo.cur_align,firstAcc,lnk_displ,"",lnkTitleInfo);
2309 
2310  url_link = CAlignFormatUtil::MapTemplate(url_link,"queryID",linkoutInfo.queryID);
2311 
2312  TSeqPos seqFrom = linkoutInfo.subjRange.GetFrom();
2313  seqFrom = (seqFrom == 0) ? seqFrom : seqFrom - 1;
2314 
2315  TSeqPos seqTo = linkoutInfo.subjRange.GetTo();
2316  seqTo = (seqTo == 0) ? seqTo : seqTo - 1;
2317 
2318  url_link = CAlignFormatUtil::MapTemplate(url_link,"from",seqFrom);//-1
2319  url_link = CAlignFormatUtil::MapTemplate(url_link,"to",seqTo);//-1
2320 
2321  if(textLink) {
2322  url_link = CAlignFormatUtil::MapTemplate(kGenomeDataViewerDispl,"lnk",url_link);
2323  }
2324  url_link = CAlignFormatUtil::MapProtocol(url_link);
2325  linkout_list.push_back(url_link);
2326  }
2327  return linkout_list;
2328 }
2329 
2330 ///Get list of linkouts for one sequence
2331 list<string> CAlignFormatUtil::GetLinkoutUrl(int linkout, const CBioseq::TId& ids,
2332  const string& rid,
2333  const string& cdd_rid,
2334  const string& entrez_term,
2335  bool is_na,
2336  TGi first_gi,
2337  bool structure_linkout_as_group,
2338  bool for_alignment, int cur_align,
2339  string preComputedResID)
2340 
2341 {
2342  list<string> linkout_list;
2343  TGi gi = FindGi(ids);
2345  string label;
2347  string giString = NStr::NumericToString(gi);
2348  first_gi = (first_gi == ZERO_GI) ? gi : first_gi;
2349 
2350 
2351 
2352  SLinkoutInfo linkoutInfo;
2353  linkoutInfo.Init(rid,
2354  cdd_rid,
2355  entrez_term,
2356  is_na,
2357  "", //database
2358  0, //query_number
2359  "", //user_url
2360  preComputedResID,
2361  "", //linkoutOrder
2362  structure_linkout_as_group,
2363  for_alignment);
2364 
2365  linkoutInfo.cur_align = cur_align;
2366  linkoutInfo.taxid = ZERO_TAX_ID;
2367 
2368  linkout_list = s_GetLinkoutUrl(linkout,
2369  giString,
2370  label,
2371  first_gi,
2372  linkoutInfo,
2373  false); //textlink
2374 
2375  return linkout_list;
2376 }
2377 
2378 static int s_LinkLetterToType(string linkLetter)
2379 {
2380  int linkType = 0;
2381  if(linkLetter == "U") {
2382  linkType = eUnigene;
2383  }
2384  else if(linkLetter == "S") {
2385  linkType = eStructure;
2386  }
2387  else if(linkLetter == "E") {
2388  linkType = eGeo;
2389  }
2390  else if(linkLetter == "G") {
2391  linkType = eGene;
2392  }
2393  else if(linkLetter == "M") {
2394  linkType = eMapviewer | eGenomicSeq;
2395  }
2396  else if(linkLetter == "N") {
2397  linkType = eGenomicSeq;
2398  }
2399  else if(linkLetter == "B") {
2400  linkType = eBioAssay;
2401  }
2402  else if(linkLetter == "R") {
2403  linkType = eReprMicrobialGenomes;
2404  }
2405  else if(linkLetter == "V") {
2406  linkType = eGenomeDataViewer;
2407  }
2408  else if(linkLetter == "T") {
2409  linkType = eTranscript;
2410  }
2411 
2412  return linkType;
2413 }
2414 
2415 
2416 static void s_AddLinkoutInfo(map<int, vector < CBioseq::TId > > &linkout_map,int linkout,CBioseq::TId &cur_id)
2417 {
2418  if(linkout_map.count(linkout) > 0){
2419  linkout_map[linkout].push_back(cur_id);
2420  }
2421  else {
2422  vector <CBioseq::TId > idList;
2423  idList.push_back(cur_id);
2424  linkout_map.insert(map<int, vector <CBioseq::TId > >::value_type(linkout,idList));
2425  }
2426 }
2427 
2429  ILinkoutDB **linkoutdb,
2430  const string& mv_build_name,
2431  TGi gi)
2432 {
2433  int linkout = 0;
2434 
2435  if(*linkoutdb) {
2436  if(gi == INVALID_GI) {
2437  gi = FindGi(cur_id);
2438  }
2439  try {
2440  if(gi > ZERO_GI) {
2441  linkout = (*linkoutdb)->GetLinkout(gi, mv_build_name);
2442  }
2443  else if(GetTextSeqID(cur_id)){
2445  linkout = (*linkoutdb)->GetLinkout(*seqID, mv_build_name);
2446  string str_id = seqID->GetSeqIdString(false);
2447  CRef<CSeq_id> seqIDNew(new CSeq_id(str_id));
2448  int linkoutWithoutVersion = (*linkoutdb)->GetLinkout(*seqIDNew, mv_build_name);
2449  if(linkoutWithoutVersion && (linkoutWithoutVersion | eStructure)) {
2450  linkout = linkout | linkoutWithoutVersion;
2451  }
2452  }
2453  }
2454  catch (const CException & e) {
2455  ERR_POST("Problem with linkoutdb: " + e.GetMsg());
2456  cerr << "[BLAST FORMATTER EXCEPTION] Problem with linkoutdb: " << e.GetMsg() << endl;
2457  *linkoutdb = NULL;
2458  }
2459  }
2460  return linkout;
2461 }
2462 
2463 void
2465  map<int, vector <CBioseq::TId > > &linkout_map,
2466  ILinkoutDB* linkoutdb,
2467  const string& mv_build_name)
2468 {
2469  if(!linkoutdb) return;
2470 
2471  int linkout = GetSeqLinkoutInfo(cur_id,
2472  &linkoutdb,
2473  mv_build_name);
2474 
2475  if(linkout & eGene){
2476  s_AddLinkoutInfo(linkout_map,eGene,cur_id);
2477  }
2478  if (linkout & eUnigene) {
2479  s_AddLinkoutInfo(linkout_map,eUnigene,cur_id);
2480  }
2481  if (linkout & eGeo){
2482  s_AddLinkoutInfo(linkout_map,eGeo,cur_id);
2483  }
2484  if (linkout & eStructure){
2485  s_AddLinkoutInfo(linkout_map,eStructure,cur_id);
2486  }
2487  //eGenomicSeq and eMapviewer cannot combine together
2488  if((linkout & eGenomicSeq) && (linkout & eMapviewer)){
2489  s_AddLinkoutInfo(linkout_map,eGenomicSeq,cur_id);
2490  }
2491  else if(linkout & eMapviewer){
2492  s_AddLinkoutInfo(linkout_map,eMapviewer,cur_id);
2493  }
2494  if(linkout & eBioAssay){
2495  s_AddLinkoutInfo(linkout_map,eBioAssay,cur_id);
2496  }
2497  if(linkout & eReprMicrobialGenomes){
2498  s_AddLinkoutInfo(linkout_map,eReprMicrobialGenomes,cur_id);
2499  }
2500 
2501  if(linkout & eGenomeDataViewer){
2502  s_AddLinkoutInfo(linkout_map,eGenomeDataViewer,cur_id);
2503  }
2504  if(linkout & eTranscript){
2505  s_AddLinkoutInfo(linkout_map,eTranscript,cur_id);
2506  }
2507 
2508 }
2509 
2510 void
2512  map<int, vector <CBioseq::TId > > &linkout_map,
2513  ILinkoutDB* linkoutdb,
2514  const string& mv_build_name)
2515 {
2516  const int kMaxDeflineNum = 10;
2517  int num = 0;
2518  for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2519  iter != bdl.end(); iter++){
2520  CBioseq::TId& cur_id = (CBioseq::TId &)(*iter)->GetSeqid();
2521 
2522  GetBdlLinkoutInfo(cur_id,
2523  linkout_map,
2524  linkoutdb,
2525  mv_build_name);
2526  num++;
2527  if(num > kMaxDeflineNum) break;
2528  }
2529 }
2530 
2531 static string s_GetTaxName(TTaxId taxid)
2532 {
2533  string taxName;
2534  try {
2535  if(taxid != ZERO_TAX_ID) {
2537  CSeqDB::GetTaxInfo(taxid, info);
2538  taxName = info.common_name;
2539  }
2540  }
2541  catch (CException&) {
2542 
2543  }
2544  return taxName;
2545 }
2546 
2548  const string& rid,
2549  bool is_na,
2550  bool for_alignment,
2551  int cur_align,
2552  list<string> &linkout_list)
2553 
2554 {
2555  //Identical Proteins
2556 
2558  if (CAlignFormatUtil::GetTextSeqID(wid)) {
2559  string label;
2561  string url_link = kIdenticalProteinsUrl;
2562  string lnk_displ = "Identical Proteins";
2563  url_link = s_MapLinkoutGenParam(url_link,rid,NStr::NumericToString(ZERO_GI),for_alignment, cur_align,label,lnk_displ);
2564  url_link = CAlignFormatUtil::MapTemplate(kIdenticalProteinsDispl,"lnk",url_link);
2565  url_link = CAlignFormatUtil::MapTemplate(url_link,"label",label);
2566  linkout_list.push_back(url_link);
2567  }
2568 }
2569 
2570 
2571 
2572 //reset:taxname,gnl
2573 static list<string> s_GetFullLinkoutUrl(CBioseq::TId& cur_id,
2574  CAlignFormatUtil::SLinkoutInfo &linkoutInfo,
2575  map<int, vector < CBioseq::TId > > &linkout_map,
2576  bool getIdentProteins)
2577 
2578 {
2579  list<string> linkout_list;
2580 
2581  vector<string> linkLetters;
2582  NStr::Split(linkoutInfo.linkoutOrder,",",linkLetters); //linkoutOrder = "G,U,M,V,E,S,B,R,T"
2583  for(size_t i = 0; i < linkLetters.size(); i++) {
2584  TGi first_gi = ZERO_GI;
2585  vector < CBioseq::TId > idList;
2586  int linkout = s_LinkLetterToType(linkLetters[i]);
2587  linkoutInfo.taxName.clear();
2588  if(linkout & (eMapviewer | eGenomicSeq)) {
2589  linkout = (linkout_map[eGenomicSeq].size() != 0) ? eGenomicSeq : eMapviewer;
2590  linkoutInfo.taxName = s_GetTaxName(linkoutInfo.taxid);
2591  }
2592  if(linkout_map.find(linkout) != linkout_map.end()) {
2593  idList = linkout_map[linkout];
2594  }
2595  bool disableLink = (linkout == 0 || idList.size() == 0 || ( (linkout & eStructure) && (linkoutInfo.cdd_rid == "" || linkoutInfo.cdd_rid == "0")));
2596 
2597  string giList,labelList;
2598  int seqVersion = ((linkout & eGenomeDataViewer) || (linkout & eTranscript)) ? true : false;
2599  for (size_t i = 0; i < idList.size(); i++) {
2600  const CBioseq::TId& ids = idList[i];
2601  TGi gi = FindGi(ids);
2602  if (first_gi == ZERO_GI) first_gi = gi;
2603 
2604 
2606  string label = CAlignFormatUtil::GetLabel(wid,seqVersion);
2607  if(!labelList.empty()) labelList += ",";
2608  labelList += label;
2609 
2610  //use only first gi for bioAssay protein
2611  if(!giList.empty() && (linkout & eBioAssay) && !linkoutInfo.is_na) continue;
2612  if(!giList.empty()) giList += ",";
2613  giList += NStr::NumericToString(gi);
2614  }
2615 
2616  linkoutInfo.gnl.clear();
2617  if(!disableLink && linkout == eGenomicSeq) {
2618  linkoutInfo.gnl = s_GetBestIDForURL(cur_id);
2619  }
2620 
2621  if(!disableLink) {//
2622  //The following list will contain only one entry for single linkout value
2623  list<string> one_linkout = s_GetLinkoutUrl(linkout,
2624  giList,
2625  labelList,
2626  first_gi,
2627  linkoutInfo);
2628  if(one_linkout.size() > 0) {
2629  list<string>::iterator iter = one_linkout.begin();
2630  linkout_list.push_back(*iter);
2631  }
2632  }
2633  }
2634  if(getIdentProteins) {
2635  s_AddOtherRelatedInfoLinks(cur_id,linkoutInfo.rid,linkoutInfo.is_na,linkoutInfo.for_alignment,linkoutInfo.cur_align,linkout_list);
2636  }
2637  return linkout_list;
2638 }
2639 
2640 list<string> CAlignFormatUtil::GetFullLinkoutUrl(const list< CRef< CBlast_def_line > > &bdl,
2641  CAlignFormatUtil::SLinkoutInfo &linkoutInfo)
2642 {
2643  list<string> linkout_list;
2644  map<int, vector < CBioseq::TId > > linkout_map;
2645  if(bdl.size() > 0) {
2646  GetBdlLinkoutInfo(bdl,linkout_map, linkoutInfo.linkoutdb, linkoutInfo.mv_build_name);
2647  list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2648  CBioseq::TId& cur_id = (CBioseq::TId &)(*iter)->GetSeqid();
2649  linkout_list = s_GetFullLinkoutUrl(cur_id,
2650  linkoutInfo,
2651  linkout_map,
2652  !linkoutInfo.is_na && bdl.size() > 1);
2653  }
2654  return linkout_list;
2655 }
2656 
2657 
2658 list<string> CAlignFormatUtil::GetFullLinkoutUrl(const list< CRef< CBlast_def_line > > &bdl,
2659  const string& rid,
2660  const string& cdd_rid,
2661  const string& entrez_term,
2662  bool is_na,
2663  bool structure_linkout_as_group,
2664  bool for_alignment,
2665  int cur_align,
2666  string& linkoutOrder,
2667  TTaxId taxid,
2668  string &database,
2669  int query_number,
2670  string &user_url,
2671  string &preComputedResID,
2672  ILinkoutDB* linkoutdb,
2673  const string& mv_build_name)
2674 
2675 {
2676  list<string> linkout_list;
2677  map<int, vector < CBioseq::TId > > linkout_map;
2678  if(bdl.size() > 0) {
2679  GetBdlLinkoutInfo(bdl,linkout_map, linkoutdb, mv_build_name);
2680  list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2681  CBioseq::TId& cur_id = (CBioseq::TId &)(*iter)->GetSeqid();
2682 
2683  SLinkoutInfo linkoutInfo;
2684  linkoutInfo.Init(rid,
2685  cdd_rid,
2686  entrez_term,
2687  is_na,
2688  database,
2689  query_number,
2690  user_url,
2691  preComputedResID,
2692  linkoutOrder,
2693  structure_linkout_as_group,
2694  for_alignment);
2695 
2696  linkoutInfo.cur_align = cur_align;
2697  linkoutInfo.taxid = taxid;
2698 
2699  linkout_list = s_GetFullLinkoutUrl(cur_id,
2700  linkoutInfo,
2701  linkout_map,
2702  !is_na && bdl.size() > 1);
2703  }
2704  return linkout_list;
2705 }
2706 
2707 
2709  CAlignFormatUtil::SLinkoutInfo &linkoutInfo,
2710  bool getIdentProteins)
2711 {
2712  list<string> linkout_list;
2713  map<int, vector < CBioseq::TId > > linkout_map;
2714 
2715  GetBdlLinkoutInfo(cur_id,linkout_map, linkoutInfo.linkoutdb, linkoutInfo.mv_build_name);
2716  linkout_list = s_GetFullLinkoutUrl(cur_id,
2717  linkoutInfo,
2718  linkout_map,
2719  getIdentProteins);
2720  return linkout_list;
2721 }
2722 
2724  const string& rid,
2725  const string& cdd_rid,
2726  const string& entrez_term,
2727  bool is_na,
2728  bool structure_linkout_as_group,
2729  bool for_alignment,
2730  int cur_align,
2731  string& linkoutOrder,
2732  TTaxId taxid,
2733  string &database,
2734  int query_number,
2735  string &user_url,
2736  string &preComputedResID,
2737  ILinkoutDB* linkoutdb,
2738  const string& mv_build_name,
2739  bool getIdentProteins)
2740 
2741 {
2742  list<string> linkout_list;
2743 
2744  map<int, vector < CBioseq::TId > > linkout_map;
2745  GetBdlLinkoutInfo(cur_id,linkout_map, linkoutdb, mv_build_name);
2746  SLinkoutInfo linkoutInfo;
2747  linkoutInfo.Init(rid,
2748  cdd_rid,
2749  entrez_term,
2750  is_na,
2751  database,
2752  query_number,
2753  user_url,
2754  preComputedResID,
2755  linkoutOrder,
2756  structure_linkout_as_group,
2757  for_alignment);
2758 
2759  linkoutInfo.cur_align = cur_align;
2760  linkoutInfo.taxid = taxid;
2761 
2762  linkout_list = s_GetFullLinkoutUrl(cur_id,
2763  linkoutInfo,
2764  linkout_map,
2765  getIdentProteins);
2766  return linkout_list;
2767 }
2768 
2769 
2770 static bool FromRangeAscendingSort(CRange<TSeqPos> const& info1,
2771  CRange<TSeqPos> const& info2)
2772 {
2773  return info1.GetFrom() < info2.GetFrom();
2774 }
2775 
2776 //0 for query, 1 for subject
2777 //Gets query and subject range lists,oppositeStrands param
2778 static bool s_ProcessAlignSet(const CSeq_align_set& alnset,
2779  list<CRange<TSeqPos> > &query_list,
2780  list<CRange<TSeqPos> > &subject_list)
2781 {
2782  bool oppositeStrands = false;
2783  bool isFirst = false;
2784  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()) {
2785  CRange<TSeqPos> query_range = (*iter)->GetSeqRange(0);
2786  //for minus strand
2787  if(query_range.GetFrom() > query_range.GetTo()){
2788  query_range.Set(query_range.GetTo(), query_range.GetFrom());
2789  }
2790  query_list.push_back(query_range);
2791 
2792  CRange<TSeqPos> subject_range = (*iter)->GetSeqRange(1);
2793  //for minus strand
2794  if(subject_range.GetFrom() > subject_range.GetTo()){
2795  subject_range.Set(subject_range.GetTo(), subject_range.GetFrom());
2796  }
2797  subject_list.push_back(subject_range);
2798 
2799  oppositeStrands = (!isFirst) ? (*iter)->GetSeqStrand(0) != (*iter)->GetSeqStrand(1) : oppositeStrands;
2800  isFirst = true;
2801  }
2802 
2803  query_list.sort(FromRangeAscendingSort);
2804  subject_list.sort(FromRangeAscendingSort);
2805  return oppositeStrands;
2806 }
2807 
2808 
2809 
2810 //0 for query, 1 for subject
2811 static list<CRange<TSeqPos> > s_MergeRangeList(list<CRange<TSeqPos> > &source)
2812 {
2813 
2814  list<CRange<TSeqPos> > merge_list;
2815 
2816  bool is_first = true;
2817  CRange<TSeqPos> prev_range (0, 0);
2818  ITERATE(list<CRange<TSeqPos> >, iter, source) {
2819 
2820  if (is_first) {
2821  merge_list.push_back(*iter);
2822  is_first= false;
2823  prev_range = *iter;
2824  } else {
2825  if (prev_range.IntersectingWith(*iter)) {
2826  merge_list.pop_back();
2827  CRange<TSeqPos> temp_range = prev_range.CombinationWith(*iter);
2828  merge_list.push_back(temp_range);
2829  prev_range = temp_range;
2830  } else {
2831  merge_list.push_back(*iter);
2832  prev_range = *iter;
2833  }
2834  }
2835 
2836  }
2837  return merge_list;
2838 }
2839 
2841 {
2842 
2843  list<CRange<TSeqPos> > merge_list;
2844 
2845  list<CRange<TSeqPos> > temp;
2846  ITERATE(CSeq_align_set::Tdata, iter, alnset.Get()) {
2847  CRange<TSeqPos> seq_range = (*iter)->GetSeqRange(0);
2848  //for minus strand
2849  if(seq_range.GetFrom() > seq_range.GetTo()){
2850  seq_range.Set(seq_range.GetTo(), seq_range.GetFrom());
2851  }
2852  temp.push_back(seq_range);
2853  }
2854 
2855  temp.sort(FromRangeAscendingSort);
2856 
2857  merge_list = s_MergeRangeList(temp);
2858 
2859  int master_covered_lenghth = 0;
2860  ITERATE(list<CRange<TSeqPos> >, iter, merge_list) {
2861  master_covered_lenghth += iter->GetLength();
2862  }
2863  return master_covered_lenghth;
2864 }
2865 
2866 
2867 
2868 CRange<TSeqPos> CAlignFormatUtil::GetSeqAlignCoverageParams(const CSeq_align_set& alnset,int *master_covered_lenghth,bool *flip)
2869 
2870 {
2871 
2872  list<CRange<TSeqPos> > query_list;
2873  list<CRange<TSeqPos> > subject_list;
2874 
2875  *flip = s_ProcessAlignSet(alnset,query_list,subject_list);
2876  query_list = s_MergeRangeList(query_list);
2877  subject_list = s_MergeRangeList(subject_list);
2878 
2879 
2880  *master_covered_lenghth = 0;
2881  ITERATE(list<CRange<TSeqPos> >, iter, query_list) {
2882  *master_covered_lenghth += iter->GetLength();
2883  }
2884 
2885  TSeqPos from = 0,to = 0;
2886  ITERATE(list<CRange<TSeqPos> >, iter, subject_list) {
2887  from = (from == 0) ? iter->GetFrom() : min(from,iter->GetFrom());
2888  to = max(to,iter->GetTo());
2889  }
2890  //cerr << "from,to = " << from << "," << to << endl;
2891  CRange<TSeqPos> subjectRange(from + 1, to + 1);
2892  return subjectRange;
2893 }
2894 
2897  CScope& scope,
2898  CSeq_align_set& aln_set,
2899  bool nuc_to_nuc_translation,
2900  int db_sort,
2901  int hit_sort,
2902  int hsp_sort,
2903  ILinkoutDB* linkoutdb,
2904  const string& mv_build_name) {
2905 
2906 
2907  if (db_sort == 0 && hit_sort < 1 && hsp_sort < 1)
2908  return (CRef<CSeq_align_set>) &aln_set;
2909 
2910  list< CRef<CSeq_align_set> > seqalign_hit_total_list;
2911  vector< CRef<CSeq_align_set> > seqalign_vec(2);
2912  seqalign_vec[0] = new CSeq_align_set;
2913  seqalign_vec[1] = new CSeq_align_set;
2914 
2915  if(IsMixedDatabase(ctx)) {
2916  SplitSeqalignByMolecularType(seqalign_vec, db_sort, aln_set, scope,
2917  linkoutdb, mv_build_name);
2918  }else {
2919  seqalign_vec[0] = const_cast<CSeq_align_set*>(&aln_set);
2920  }
2921 
2922 
2923  ITERATE(vector< CRef<CSeq_align_set> >, iter, seqalign_vec){
2924  list< CRef<CSeq_align_set> > one_seqalign_hit_total_list = SortOneSeqalignForSortableFormat(**iter,
2925  nuc_to_nuc_translation,
2926  hit_sort,
2927  hsp_sort);
2928 
2929  seqalign_hit_total_list.splice(seqalign_hit_total_list.end(),one_seqalign_hit_total_list);
2930 
2931  }
2932 
2933  return HitListToHspList(seqalign_hit_total_list);
2934 }
2935 list< CRef<CSeq_align_set> >
2937  bool nuc_to_nuc_translation,
2938  int hit_sort,
2939  int hsp_sort)
2940 {
2941  list< CRef<CSeq_align_set> > seqalign_hit_total_list;
2942  list< CRef<CSeq_align_set> > seqalign_hit_list;
2943  HspListToHitList(seqalign_hit_list, source);
2944 
2945  if (hit_sort == eTotalScore) {
2946  seqalign_hit_list.sort(SortHitByTotalScoreDescending);
2947  } else if (hit_sort == eHighestScore) {
2948  seqalign_hit_list.sort(CAlignFormatUtil::SortHitByScoreDescending);
2949  } else if (hit_sort == ePercentIdentity) {
2950  SortHitByPercentIdentityDescending(seqalign_hit_list,
2951  nuc_to_nuc_translation);
2952  } else if (hit_sort == eQueryCoverage) {
2953  seqalign_hit_list.sort(SortHitByMasterCoverageDescending);
2954  }
2955 
2956  ITERATE(list< CRef<CSeq_align_set> >, iter2, seqalign_hit_list) {
2957  CRef<CSeq_align_set> temp(*iter2);
2958  if (hsp_sort == eQueryStart) {
2959  temp->Set().sort(SortHspByMasterStartAscending);
2960  } else if (hsp_sort == eHspPercentIdentity) {
2962  } else if (hsp_sort == eScore) {
2963  temp->Set().sort(SortHspByScoreDescending);
2964  } else if (hsp_sort == eSubjectStart) {
2965  temp->Set().sort(SortHspBySubjectStartAscending);
2966 
2967  }
2968  seqalign_hit_total_list.push_back(temp);
2969  }
2970  return seqalign_hit_total_list;
2971 }
2972 
2975  bool nuc_to_nuc_translation,
2976  int hit_sort,
2977  int hsp_sort) {
2978 
2979  if (hit_sort <= eEvalue && hsp_sort <= eHspEvalue) {
2980  return (CRef<CSeq_align_set>) &aln_set;
2981  }
2982 
2983 // seqalign_vec[0] = const_cast<CSeq_align_set*>(&aln_set);
2984  list< CRef<CSeq_align_set> > seqalign_hit_total_list = SortOneSeqalignForSortableFormat(aln_set,
2985  nuc_to_nuc_translation,
2986  hit_sort,
2987  hsp_sort);
2988  return HitListToHspList(seqalign_hit_total_list);
2989 }
2990 
2991 
2993  double evalueLow,
2994  double evalueHigh)
2995 {
2996  int score, sum_n, num_ident;
2997  double bits, evalue;
2998  list<TGi> use_this_gi;
2999 
3000  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
3001 
3002  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
3003  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue,
3004  sum_n, num_ident, use_this_gi);
3005  //Add the next three lines to re-calculte seq align evalue to the obe that is displayed on the screen
3006  //string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3007  //CAlignFormatUtil::GetScoreString(evalue, bits, 0, 0, evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf);
3008  //evalue = NStr::StringToDouble(evalue_buf);
3009  if(evalue >= evalueLow && evalue <= evalueHigh) {
3010  new_aln->Set().push_back(*iter);
3011  }
3012  }
3013  return new_aln;
3014 
3015 }
3016 
3017 /// Returns percent match for an alignment.
3018 /// Normally we round up the value, unless that means that an
3019 /// alignment with mismatches would be 100%. In that case
3020 /// it becomes 99%.
3021 ///@param numerator: numerator in percent identity calculation.
3022 ///@param denominator: denominator in percent identity calculation.
3023 int CAlignFormatUtil::GetPercentMatch(int numerator, int denominator)
3024 {
3025  if (numerator == denominator)
3026  return 100;
3027  else {
3028  int retval =(int) (0.5 + 100.0*((double)numerator)/((double)denominator));
3029  retval = min(99, retval);
3030  return retval;
3031  }
3032 }
3033 
3034 double CAlignFormatUtil::GetPercentIdentity(int numerator, int denominator)
3035 {
3036  if (numerator == denominator)
3037  return 100;
3038  else {
3039  double retval =100*(double)numerator/(double)denominator;
3040  return retval;
3041  }
3042 }
3043 
3045  double percentIdentLow,
3046  double percentIdentHigh)
3047 {
3048  int score, sum_n, num_ident;
3049  double bits, evalue;
3050  list<TGi> use_this_gi;
3051 
3052  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
3053 
3054  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
3055  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue,
3056  sum_n, num_ident, use_this_gi);
3057  int seqAlnLength = GetAlignmentLength(**iter, kTranslation);
3058  if(seqAlnLength > 0 && num_ident > 0) {
3059  double alnPercentIdent = GetPercentIdentity(num_ident, seqAlnLength);
3060  if(alnPercentIdent >= percentIdentLow && alnPercentIdent <= percentIdentHigh) {
3061  new_aln->Set().push_back(*iter);
3062  }
3063  }
3064  }
3065  return new_aln;
3066 }
3067 
3069  double evalueLow,
3070  double evalueHigh,
3071  double percentIdentLow,
3072  double percentIdentHigh)
3073 {
3074  int score, sum_n, num_ident;
3075  double bits, evalue;
3076  list<TGi> use_this_gi;
3077 
3078  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
3079 
3080  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
3081  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue,
3082  sum_n, num_ident, use_this_gi);
3083  //Add the next three lines to re-calculte seq align evalue to the one that is displayed on the screen
3084  //string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3085  //CAlignFormatUtil::GetScoreString(evalue, bits, 0, 0, evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf);
3086  //evalue = NStr::StringToDouble(evalue_buf);
3087  int seqAlnLength = GetAlignmentLength(**iter, kTranslation);
3088  if(seqAlnLength > 0 && num_ident > 0) {
3089  int alnPercentIdent = GetPercentMatch(num_ident, seqAlnLength);
3090  if( (evalue >= evalueLow && evalue <= evalueHigh) &&
3091  (alnPercentIdent >= percentIdentLow && alnPercentIdent <= percentIdentHigh)) {
3092  new_aln->Set().push_back(*iter);
3093  }
3094  }
3095  }
3096  return new_aln;
3097 }
3098 
3100 {
3101  char buffer[512];
3102  sprintf(buffer, "%.*f", 2, value);
3103  double newVal = NStr::StringToDouble(buffer);
3104  return newVal;
3105 }
3106 
3107 static bool s_isAlnInFilteringRange(double evalue,
3108  double percentIdent,
3109  int queryCover,
3110  double evalueLow,
3111  double evalueHigh,
3112  double percentIdentLow,
3113  double percentIdentHigh,
3114  int queryCoverLow,
3115  int queryCoverHigh)
3116 {
3117 
3118 
3119  bool isInRange = false;
3120  //Adjust percent identity and evalue to display values
3121  percentIdent = adjustPercentIdentToDisplayValue(percentIdent);
3122  string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3123  double bits = 0;
3124  CAlignFormatUtil::GetScoreString(evalue, bits, 0, 0, evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf);
3125  evalue = NStr::StringToDouble(evalue_buf);
3126 
3127  if(evalueLow >= 0 && percentIdentLow >= 0 && queryCoverLow >= 0) {
3128  isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3129  (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh) &&
3130  (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3131  }
3132  else if(evalueLow >= 0 && percentIdentLow >= 0) {
3133  isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3134  (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3135  }
3136  else if(evalueLow >= 0 && queryCoverLow >= 0) {
3137  isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3138  (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3139  }
3140  else if(queryCoverLow >= 0 && percentIdentLow >= 0) {
3141  isInRange = (queryCover >= queryCoverLow && queryCover <= queryCoverHigh) &&
3142  (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3143  }
3144  else if(evalueLow >= 0) {
3145  isInRange = (evalue >= evalueLow && evalue <= evalueHigh);
3146  }
3147  else if(percentIdentLow >= 0) {
3148  isInRange = (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3149  }
3150  else if(queryCoverLow >= 0) {
3151  isInRange = (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3152  }
3153  return isInRange;
3154 }
3155 
3157  double evalueLow,
3158  double evalueHigh,
3159  double percentIdentLow,
3160  double percentIdentHigh,
3161  int queryCoverLow,
3162  int queryCoverHigh)
3163 {
3164  list< CRef<CSeq_align_set> > seqalign_hit_total_list;
3165  list< CRef<CSeq_align_set> > seqalign_hit_list;
3166 
3167  HspListToHitList(seqalign_hit_list, source_aln);
3168 
3169  ITERATE(list< CRef<CSeq_align_set> >, iter, seqalign_hit_list) {
3170  CRef<CSeq_align_set> temp(*iter);
3172 
3173  if(s_isAlnInFilteringRange(seqSetInfo->evalue,
3174  seqSetInfo->percent_identity,
3175  seqSetInfo->percent_coverage,
3176  evalueLow,
3177  evalueHigh,
3178  percentIdentLow,
3179  percentIdentHigh,
3180  queryCoverLow,
3181  queryCoverHigh)) {
3182  seqalign_hit_total_list.push_back(temp);
3183  }
3184  }
3185  return HitListToHspList(seqalign_hit_total_list);
3186 }
3187 
3189  int maxAligns,
3190  int maxHsps)
3191 {
3192  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
3193 
3194  CConstRef<CSeq_id> prevQueryId,prevSubjectId;
3195  int alignCount = 0,hspCount = 0;
3196  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
3197  const CSeq_id& newQueryId = (*iter)->GetSeq_id(0);
3198  if(prevQueryId.Empty() || !newQueryId.Match(*prevQueryId)){
3199  if (hspCount >= maxHsps) {
3200  break;
3201  }
3202  alignCount = 0;
3203  prevQueryId = &newQueryId;
3204  }
3205  if (alignCount < maxAligns) {
3206  const CSeq_id& newSubjectId = (*iter)->GetSeq_id(1);
3207  // Increment alignments count if subject sequence is different
3208  if(prevSubjectId.Empty() || !newSubjectId.Match(*prevSubjectId)){
3209  ++alignCount;
3210  prevSubjectId = &newSubjectId;
3211  }
3212  // Increment HSP count if the alignments limit is not reached
3213  ++hspCount;
3214  new_aln->Set().push_back(*iter);
3215  }
3216 
3217  }
3218  return new_aln;
3219 }
3220 
3221 
3223  int queryNumber)
3224 {
3225  if(queryNumber == 0) {
3226  return source_aln;
3227  }
3228  CRef<CSeq_align_set> new_aln;
3229 
3230  CConstRef<CSeq_id> prevQueryId;
3231  int currQueryNum = 0;
3232 
3233  ITERATE(CSeq_align_set::Tdata, iter, source_aln->Get()){
3234  const CSeq_id& newQueryId = (*iter)->GetSeq_id(0);
3235  if(prevQueryId.Empty() || !newQueryId.Match(*prevQueryId)){
3236  currQueryNum++;
3237  prevQueryId = &newQueryId;
3238  }
3239  //Record seq aligns corresponding to queryNumber
3240  if(currQueryNum == queryNumber) {
3241  if(new_aln.Empty()) {
3242  new_aln.Reset(new CSeq_align_set);
3243  }
3244  new_aln->Set().push_back(*iter);
3245  }
3246  else if(currQueryNum > queryNumber) {
3247  break;
3248  }
3249  }
3250  return new_aln;
3251 }
3252 
3253 
3255 {
3256  string l_cfg_file_name;
3258  if( getenv("GETURL_DEBUG") ) CAlignFormatUtil::m_geturl_debug_flag = l_dbg = true;
3259  if( !m_Reg ) {
3260  bool cfgExists = true;
3261  string l_ncbi_env;
3262  string l_fmtcfg_env;
3263  if( NULL != getenv("NCBI") ) l_ncbi_env = getenv("NCBI");
3264  if( NULL != getenv("FMTCFG") ) l_fmtcfg_env = getenv("FMTCFG");
3265  // config file name: value of FMTCFG or default ( .ncbirc )
3266  if( l_fmtcfg_env.empty() )
3267  l_cfg_file_name = ".ncbirc";
3268  else
3269  l_cfg_file_name = l_fmtcfg_env;
3270  // checkinf existance of configuration file
3271  CFile l_fchecker( l_cfg_file_name );
3272  cfgExists = l_fchecker.Exists();
3273  if( (!cfgExists) && (!l_ncbi_env.empty()) ) {
3274  if( l_ncbi_env.rfind("/") != (l_ncbi_env.length() -1 ))
3275  l_ncbi_env.append("/");
3276  l_cfg_file_name = l_ncbi_env + l_cfg_file_name;
3277  CFile l_fchecker2( l_cfg_file_name );
3278  cfgExists = l_fchecker2.Exists();
3279  }
3280  if(cfgExists) {
3281  CNcbiIfstream l_ConfigFile(l_cfg_file_name.c_str() );
3282  m_Reg.reset(new CNcbiRegistry(l_ConfigFile));
3283  if( l_dbg ) fprintf(stderr,"REGISTRY: %s\n",l_cfg_file_name.c_str());
3284  }
3285  }
3286  return;
3287 }
3288 
3289 //
3290 // get given url from registry file or return corresponding kNAME
3291 // value as default to preserve compatibility.
3292 //
3293 // algoritm:
3294 // 1) config file name is ".ncbirc" unless FMTCFG specifies another name
3295 // 2) try to read local configuration file before
3296 // checking location specified by the NCBI environment.
3297 // 3) if index != -1, use it as trailing version number for a key name,
3298 // ABCD_V0. try to read ABCD key if version variant doesn't exist.
3299 // 4) use INCLUDE_BASE_DIR key to specify base for all include files.
3300 // 5) treat "_FORMAT" key as filename first and string in second.
3301 // in case of existances of filename, read it starting from
3302 // location specified by INCLUDE_BASE_DIR key
3303 string CAlignFormatUtil::GetURLFromRegistry( const string url_name, int index){
3304  string result_url;
3305  string l_key, l_host_port, l_format;
3306  string l_secion_name = "BLASTFMTUTIL";
3307  string l_fmt_suffix = "_FORMAT";
3308  string l_host_port_suffix = "_HOST_PORT";
3309  string l_subst_pattern;
3310 
3311  if( !m_Reg ) {
3312  InitConfig();
3313  }
3314  if( !m_Reg ) return GetURLDefault(url_name,index); // can't read .ncbrc file
3315  string l_base_dir = m_Reg->Get(l_secion_name, "INCLUDE_BASE_DIR");
3316  if( !l_base_dir.empty() && ( l_base_dir.rfind("/") != (l_base_dir.length()-1)) ) {
3317  l_base_dir.append("/");
3318  }
3319 
3320 
3321  string default_host_port;
3322  string l_key_ndx;
3323  if( index >=0) {
3324  l_key_ndx = url_name + l_host_port_suffix + "_" + NStr::IntToString( index );
3325  l_subst_pattern="<@"+l_key_ndx+"@>";
3326  l_host_port = m_Reg->Get(l_secion_name, l_key_ndx); // try indexed
3327  }
3328  // next is initialization for non version/array type of settings
3329  if( l_host_port.empty()){ // not indexed or index wasn't found
3330  l_key = url_name + l_host_port_suffix; l_subst_pattern="<@"+l_key+"@>";
3331  l_host_port = m_Reg->Get(l_secion_name, l_key);
3332  }
3333  if( l_host_port.empty()) return GetURLDefault(url_name,index);
3334 
3335  // get format part
3336  l_key = url_name + l_fmt_suffix ; //"_FORMAT";
3337  l_key_ndx = l_key + "_" + NStr::IntToString( index );
3338  if( index >= 0 ){
3339  l_format = m_Reg->Get(l_secion_name, l_key_ndx);
3340  }
3341 
3342  if( l_format.empty() ) l_format = m_Reg->Get(l_secion_name, l_key);
3343  if( l_format.empty()) return GetURLDefault(url_name,index);
3344  // format found check wether this string or file name
3345  string l_format_file = l_base_dir + l_format;
3346  CFile l_fchecker( l_format_file );
3347  bool file_name_mode = l_fchecker.Exists();
3348  if( file_name_mode ) { // read whole content of the file to string buffer
3349  string l_inc_file_name = l_format_file;
3350  CNcbiIfstream l_file (l_inc_file_name.c_str(), ios::in|ios::binary|ios::ate);
3351  CT_POS_TYPE l_inc_size = l_file.tellg();
3352  // size_t l_buf_sz = (size_t) l_inc_size;
3353  char *l_mem = new char [ (size_t) l_inc_size + 1];
3354  memset( l_mem,0, (size_t) l_inc_size + 1 ) ;
3355  l_file.seekg( 0, ios::beg );
3356  l_file.read(l_mem, l_inc_size);
3357  l_file.close();
3358  l_format.erase(); l_format.reserve( (size_t)l_inc_size + 1 );
3359  l_format = l_mem;
3360  delete [] l_mem;
3361  }
3362 
3363  result_url = NStr::Replace(l_format,l_subst_pattern,l_host_port);
3364 
3365  if( result_url.empty()) return GetURLDefault(url_name,index);
3366  return result_url;
3367 }
3368 //
3369 // return default URL value for the given key.
3370 //
3371 string CAlignFormatUtil::GetURLDefault( const string url_name, int index) {
3372 
3373  string search_name = url_name;
3375  if( index >= 0 ) search_name += "_" + NStr::IntToString( index); // actual name for index value is NAME_{index}
3376 
3377  if( (url_it = sm_TagUrlMap.find( search_name ) ) != sm_TagUrlMap.end()) {
3378  string url_link = CAlignFormatUtil::MapProtocol(url_it->second);
3379  return url_link;
3380  }
3381 
3382  string error_msg = "CAlignFormatUtil::GetURLDefault:no_defualt_for"+url_name;
3383  if( index != -1 ) error_msg += "_index_"+ NStr::IntToString( index );
3384  return error_msg;
3385 }
3386 
3387 void
3389  CNcbiMatrix<int>& retval)
3390 {
3391  retval.Resize(0, 0, -1);
3392  if (matrix_name == NULL ||
3393  NStr::TruncateSpaces(string(matrix_name)).empty()) {
3394  return;
3395  }
3396 
3397  const SNCBIPackedScoreMatrix* packed_mtx =
3398  NCBISM_GetStandardMatrix(matrix_name);
3399  if (packed_mtx == NULL) {
3400  return;
3401  }
3402  retval.Resize(k_NumAsciiChar, k_NumAsciiChar, -1000);
3403 
3405  NCBISM_Unpack(packed_mtx, &mtx);
3406 
3407  for(int i = 0; i < ePMatrixSize; ++i){
3408  for(int j = 0; j < ePMatrixSize; ++j){
3409  retval((size_t)k_PSymbol[i], (size_t)k_PSymbol[j]) =
3410  mtx.s[(size_t)k_PSymbol[i]][(size_t)k_PSymbol[j]];
3411  }
3412  }
3413  for(int i = 0; i < ePMatrixSize; ++i) {
3414  retval((size_t)k_PSymbol[i], '*') = retval('*',(size_t)k_PSymbol[i]) = -4;
3415  }
3416  retval('*', '*') = 1;
3417  // this is to count Selenocysteine to Cysteine matches as positive
3418  retval('U', 'U') = retval('C', 'C');
3419  retval('U', 'C') = retval('C', 'C');
3420  retval('C', 'U') = retval('C', 'C');
3421 }
3422 
3423 
3424 string CAlignFormatUtil::MapTemplate(string inpString,string tmplParamName,Int8 templParamVal)
3425 {
3426  string outString;
3427  string tmplParam = "<@" + tmplParamName + "@>";
3428  NStr::Replace(inpString,tmplParam,NStr::NumericToString(templParamVal),outString);
3429  return outString;
3430 }
3431 
3432 string CAlignFormatUtil::MapTemplate(string inpString,string tmplParamName,string templParamVal)
3433 {
3434  string outString;
3435  string tmplParam = "<@" + tmplParamName + "@>";
3436  NStr::Replace(inpString,tmplParam,templParamVal,outString);
3437  return outString;
3438 }
3439 
3440 string CAlignFormatUtil::MapSpaceTemplate(string inpString,string tmplParamName,string templParamVal, unsigned int maxParamValLength, int spacesFormatFlag)
3441 {
3442  templParamVal = AddSpaces(templParamVal, maxParamValLength, spacesFormatFlag);
3443  string outString = MapTemplate(inpString,tmplParamName,templParamVal);
3444 
3445  return outString;
3446 }
3447 
3448 
3449 string CAlignFormatUtil::AddSpaces(string paramVal, size_t maxParamValLength, int spacesFormatFlag)
3450 {
3451  //if(!spacePos.empty()) {
3452  string spaceString;
3453  if(maxParamValLength >= paramVal.size()) {
3454  size_t numSpaces = maxParamValLength - paramVal.size() + 1;
3455  if(spacesFormatFlag & eSpacePosToCenter) {
3456  numSpaces = numSpaces/2;
3457  }
3458  spaceString.assign(numSpaces,' ');
3459  }
3460  else {
3461  paramVal = paramVal.substr(0, maxParamValLength - 3) + "...";
3462  spaceString += " ";
3463  }
3464  if(spacesFormatFlag & eSpacePosAtLineEnd) {
3465  paramVal = paramVal + spaceString;
3466  }
3467  else if(spacesFormatFlag & eSpacePosToCenter) {
3468  paramVal = spaceString + paramVal + spaceString;
3469  }
3470  else {
3471  paramVal = spaceString + paramVal;
3472  }
3473  if(spacesFormatFlag & eAddEOLAtLineStart) paramVal = "\n" + paramVal;
3474  if(spacesFormatFlag & eAddEOLAtLineEnd) paramVal = paramVal + "\n";
3475  //}
3476 
3477  return paramVal;
3478 }
3479 
3480 
3481 
3483 {
3484  CNcbiIfstream config_file(".ncbirc");
3485  CNcbiRegistry config_reg(config_file);
3486  string httpProt = "https:";
3487  if(!config_reg.Empty()) {
3488  if(config_reg.HasEntry("BLASTFMTUTIL","PROTOCOL")) {
3489  httpProt = config_reg.Get("BLASTFMTUTIL","PROTOCOL");
3490  }
3491  }
3492  return httpProt;
3493 }
3494 
3495 /*
3496 if(no config file) protocol = "https:"
3497 if(no "BLASTFMTUTIL","PROTOCOL" entry in config file) protocol = "https:"
3498 if(there is entry in config) protocol = entry which could be blank = ""
3499 */
3500 string CAlignFormatUtil::MapProtocol(string url_link)
3501 {
3502  if(m_Protocol.empty()){
3503  if(!m_Reg) {
3504  InitConfig();
3505  }
3506  m_Protocol = (m_Reg && m_Reg->HasEntry("BLASTFMTUTIL","PROTOCOL")) ? m_Protocol = m_Reg->Get("BLASTFMTUTIL","PROTOCOL") : "https:";
3507  }
3508  url_link = CAlignFormatUtil::MapTemplate(url_link,"protocol",m_Protocol);
3509  return url_link;
3510 }
3511 
3512 static string s_MapCommonUrlParams(string urlTemplate, CAlignFormatUtil::SSeqURLInfo *seqUrlInfo)
3513 {
3514  string db,logstr_moltype;
3515  if(seqUrlInfo->isDbNa) {
3516  db = "nucleotide";
3517  logstr_moltype = "nucl";
3518  } else {
3519  db = "protein";
3520  logstr_moltype ="prot";
3521  }
3522  string logstr_location = (seqUrlInfo->isAlignLink) ? "align" : "top";
3523  string url_link = CAlignFormatUtil::MapTemplate(urlTemplate,"db",db);
3524  url_link = CAlignFormatUtil::MapTemplate(url_link,"gi", GI_TO(TIntId, seqUrlInfo->gi));
3525  url_link = CAlignFormatUtil::MapTemplate(url_link,"log",logstr_moltype + logstr_location);
3526  url_link = CAlignFormatUtil::MapTemplate(url_link,"blast_rank",seqUrlInfo->blast_rank);
3527  url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",seqUrlInfo->rid);
3528  url_link = CAlignFormatUtil::MapTemplate(url_link,"acc",seqUrlInfo->accession);
3529  url_link = CAlignFormatUtil::MapProtocol(url_link);
3530  return url_link;
3531 }
3532 
3533 static string s_MapURLLink(string urlTemplate, CAlignFormatUtil::SSeqURLInfo *seqUrlInfo, const CBioseq::TId& ids)
3534 {
3535  //Add specific blasttype/user_url template mapping here
3536  string url_link = urlTemplate;
3537  if (seqUrlInfo->user_url.find("sra.cgi") != string::npos) {
3538  string strRun, strSpotId,strReadIndex;
3539  if(s_GetSRASeqMetadata(ids,strRun,strSpotId,strReadIndex)) {
3540  url_link = CAlignFormatUtil::MapTemplate(url_link,"run",strRun);
3541  url_link = CAlignFormatUtil::MapTemplate(url_link,"spotid",strSpotId);
3542  url_link = CAlignFormatUtil::MapTemplate(url_link,"readindex",strReadIndex);
3543  }
3544  }
3545  //This maps generic params like log, blast_rank, rid
3546  url_link = s_MapCommonUrlParams(url_link, seqUrlInfo);
3547  return url_link;
3548 }
3549 
3550 
3551 
3552 bool CAlignFormatUtil::IsWGSPattern(string &wgsAccession)
3553 {
3554  //const string kWgsAccessionPattern = "^[A-Z]{4}[0-9]{8,10}(\.[0-9]+){0,1}$"; //example AUXO013124042 or AUXO013124042.1
3555  const unsigned int kWgsProjLength = 4;
3556  const unsigned int kWgsProjIDLengthMin = 8;
3557  const unsigned int kWgsProjIDLengthMax = 10;
3558  bool isWGS = true;
3559 
3560  if (wgsAccession.size() < 6) {
3561  return false;
3562  }
3563 
3564  if(NStr::Find(wgsAccession, ".") != NPOS) { //Accession has version AUXO013124042.1
3565  string version;
3566  NStr::SplitInTwo(wgsAccession,".",wgsAccession,version);
3567  }
3568 
3569  string wgsProj = wgsAccession.substr(0,kWgsProjLength);
3570  for (size_t i = 0; i < wgsProj.length(); i ++){
3571  if(!isalpha(wgsProj[i]&0xff)) {
3572  isWGS = false;
3573  break;
3574  }
3575  }
3576  if(isWGS) {
3577  string wgsId = wgsAccession.substr(kWgsProjLength);
3578  if(wgsId.length() >= kWgsProjIDLengthMin && wgsId.length() <= kWgsProjIDLengthMax) {
3579  for (size_t i = 0; i < wgsId.length(); i ++){
3580  if(!isdigit(wgsId[i]&0xff)) {
3581  isWGS = false;
3582  break;
3583  }
3584  }
3585  }
3586  else {
3587  isWGS = false;
3588  }
3589  }
3590  return isWGS;
3591 }
3592 
3593 
3594 bool CAlignFormatUtil::IsWGSAccession(string &wgsAccession, string &wgsProjName)
3595 {
3596  const unsigned int kWgsProgNameLength = 6;
3597  bool isWGS = IsWGSPattern(wgsAccession);
3598  if(isWGS) {
3599  wgsProjName = wgsAccession.substr(0,kWgsProgNameLength);
3600  }
3601  return isWGS;
3602 }
3603 
3604 
3605 string CAlignFormatUtil::GetIDUrlGen(SSeqURLInfo *seqUrlInfo,const CBioseq::TId* ids)
3606 {
3607  string url_link = NcbiEmptyString;
3609 
3610  bool hasTextSeqID = GetTextSeqID(*ids);
3611  string title = "title=\"Show report for " + seqUrlInfo->accession + "\" ";
3612 
3613  string temp_class_info = kClassInfo; temp_class_info += " ";
3614  string wgsProj;
3615  string wgsAccession = seqUrlInfo->accession;
3616  bool isWGS = false;
3617  if (!(wid->Which() == CSeq_id::e_Local || wid->Which() == CSeq_id::e_General)){
3618  isWGS = CAlignFormatUtil::IsWGSAccession(wgsAccession, wgsProj);
3619  }
3620  if(isWGS && seqUrlInfo->useTemplates) {
3621  string wgsUrl = CAlignFormatUtil::GetURLFromRegistry("WGS");
3622  url_link = s_MapCommonUrlParams(wgsUrl, seqUrlInfo);
3623  url_link = CAlignFormatUtil::MapTemplate(url_link,"wgsproj",wgsProj);
3624  url_link = CAlignFormatUtil::MapTemplate(url_link,"wgsacc", wgsAccession);
3625  }
3626  else if (hasTextSeqID) {
3627  string entrezTag = (seqUrlInfo->useTemplates) ? "ENTREZ_TM" : "ENTREZ";
3628  string l_EntrezUrl = CAlignFormatUtil::GetURLFromRegistry(entrezTag);
3629  url_link = s_MapCommonUrlParams(l_EntrezUrl, seqUrlInfo);
3630 
3631  if(!seqUrlInfo->useTemplates) {
3632  url_link = CAlignFormatUtil::MapTemplate(url_link,"acc",seqUrlInfo->accession);
3633  temp_class_info = (!seqUrlInfo->defline.empty())? CAlignFormatUtil::MapTemplate(temp_class_info,"defline",NStr::JavaScriptEncode(seqUrlInfo->defline)):temp_class_info;
3634  url_link = CAlignFormatUtil::MapTemplate(url_link,"cssInf",(seqUrlInfo->addCssInfo) ? temp_class_info.c_str() : "");
3635  url_link = CAlignFormatUtil::MapTemplate(url_link,"target",seqUrlInfo->new_win ? "TARGET=\"EntrezView\"" : "");
3636  }
3637 
3638  } else {//seqid general, dbtag specified
3639  if(wid->Which() == CSeq_id::e_General){
3640  const CDbtag& dtg = wid->GetGeneral();
3641  const string& dbname = dtg.GetDb();
3642  if(NStr::CompareNocase(dbname, "TI") == 0){
3643  string actual_id = CAlignFormatUtil::GetGnlID(dtg);
3644  if(seqUrlInfo->useTemplates) {
3645  string l_TraceUrl = CAlignFormatUtil::GetURLFromRegistry("TRACE_CGI");
3646  url_link = l_TraceUrl + (string)"?cmd=retrieve&dopt=fasta&val=" + actual_id + "&RID=" + seqUrlInfo->rid;
3647  }
3648  else {
3649  url_link = CAlignFormatUtil::MapTemplate(kTraceUrl,"val",actual_id);
3650  temp_class_info = (!seqUrlInfo->defline.empty())? CAlignFormatUtil::MapTemplate(temp_class_info,"defline",seqUrlInfo->defline):temp_class_info;
3651  url_link = CAlignFormatUtil::MapTemplate(url_link,"cssInf",(seqUrlInfo->addCssInfo) ? temp_class_info.c_str() : "");
3652  url_link = CAlignFormatUtil::MapTemplate(url_link,"rid",seqUrlInfo->rid);
3653  }
3654  }
3655  } else if (wid->Which() == CSeq_id::e_Local){
3656 
3657  string url_holder = CAlignFormatUtil::GetURLFromRegistry("LOCAL_ID");
3658 
3659  string user_url;
3660  if (m_Reg) {
3661  user_url = (seqUrlInfo->addCssInfo) ? m_Reg->Get("LOCAL_ID","TOOL_URL_ALIGN") : m_Reg->Get("LOCAL_ID","TOOL_URL");
3662  }
3663  string id_string;
3664  wid->GetLabel(&id_string, CSeq_id::eContent);
3665  url_link = CAlignFormatUtil::MapTemplate(user_url,"seq_id", NStr::URLEncode(id_string));
3666  url_link = CAlignFormatUtil::MapTemplate(url_link,"db_name", NStr::URLEncode(seqUrlInfo->database));
3667  url_link = CAlignFormatUtil::MapTemplate(url_link,"taxid", TAX_ID_TO(int, seqUrlInfo->taxid));
3668  temp_class_info = (!seqUrlInfo->defline.empty())? CAlignFormatUtil::MapTemplate(temp_class_info,"defline",seqUrlInfo->defline):temp_class_info;
3669  url_link = CAlignFormatUtil::MapTemplate(url_link,"cssInf",(seqUrlInfo->addCssInfo) ? temp_class_info.c_str() : "");
3670  url_link = CAlignFormatUtil::MapTemplate(url_link,"title", id_string);
3671  url_link = CAlignFormatUtil::MapTemplate(url_link,"target",seqUrlInfo->new_win ? "TARGET=\"EntrezView\"" : "");
3672  }
3673  }
3674  url_link = CAlignFormatUtil::MapProtocol(url_link);
3675  seqUrlInfo->seqUrl = url_link;
3676  return url_link;
3677 }
3678 
3679 string CAlignFormatUtil::GetIDUrlGen(SSeqURLInfo *seqUrlInfo,const CSeq_id& id,objects::CScope &scope)
3680 {
3681  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
3682  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
3683 
3684  string url_link = GetIDUrlGen(seqUrlInfo,ids);
3685  return url_link;
3686 }
3687 
3688 string CAlignFormatUtil::GetIDUrl(SSeqURLInfo *seqUrlInfo,const CBioseq::TId* ids)
3689 {
3690  string url_link = NcbiEmptyString;
3692 
3693  string title = "title=\"Show report for " + seqUrlInfo->accession + "\" ";
3694 
3695  if (seqUrlInfo->user_url != NcbiEmptyString &&
3696  !((seqUrlInfo->user_url.find("dumpgnl.cgi") != string::npos && seqUrlInfo->gi > ZERO_GI) ||
3697  (seqUrlInfo->user_url.find("maps.cgi") != string::npos))) {
3698 
3699  string url_with_parameters,toolURLParams;
3700  if(m_Reg && !seqUrlInfo->blastType.empty() && seqUrlInfo->blastType != "newblast") {
3701  toolURLParams = m_Reg->Get(seqUrlInfo->blastType, "TOOL_URL_PARAMS");
3702  }
3703  if(!toolURLParams.empty()) {
3704  string urlLinkTemplate = seqUrlInfo->user_url + toolURLParams;
3705  url_with_parameters = s_MapURLLink(urlLinkTemplate, seqUrlInfo, *ids);
3706  }
3707  else {
3708  if (seqUrlInfo->user_url.find("sra.cgi") != string::npos) {
3709  url_with_parameters = CAlignFormatUtil::BuildSRAUrl(*ids, seqUrlInfo->user_url);
3710  }
3711  else {
3712  url_with_parameters = CAlignFormatUtil::BuildUserUrl(*ids, seqUrlInfo->taxid, seqUrlInfo->user_url,
3713  seqUrlInfo->database,
3714  seqUrlInfo->isDbNa, seqUrlInfo->rid,
3715  seqUrlInfo->queryNumber,
3716  seqUrlInfo->isAlignLink);
3717  }
3718  }
3719  if (url_with_parameters != NcbiEmptyString) {
3720  if (!seqUrlInfo->useTemplates) {
3721  string deflineInfo;
3722  if(seqUrlInfo->addCssInfo) {
3723  deflineInfo = (!seqUrlInfo->defline.empty())? CAlignFormatUtil::MapTemplate(kClassInfo,"defline",seqUrlInfo->defline):kClassInfo;
3724  }
3725  url_link += "<a " + title + deflineInfo + "href=\"";
3726  }
3727  url_link += url_with_parameters;
3728  if (!seqUrlInfo->useTemplates) url_link += "\">";
3729  }
3730  }
3731  else {
3732  //use entrez or dbtag specified
3733  url_link = GetIDUrlGen(seqUrlInfo,ids);
3734  }
3735  seqUrlInfo->seqUrl = url_link;
3736  return url_link;
3737 }
3738 
3739 
3740 string CAlignFormatUtil::GetIDUrl(SSeqURLInfo *seqUrlInfo,const CSeq_id& id,objects::CScope &scope)
3741 {
3742  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
3743  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
3744 
3745 
3746  seqUrlInfo->blastType = NStr::TruncateSpaces(NStr::ToLower(seqUrlInfo->blastType));
3747 
3748  if(seqUrlInfo->taxid == INVALID_TAX_ID) { //taxid is not set
3749  seqUrlInfo->taxid = ZERO_TAX_ID;
3750  if ((seqUrlInfo->advancedView || seqUrlInfo->blastType == "mapview" || seqUrlInfo->blastType == "mapview_prev") ||
3751  seqUrlInfo->blastType == "gsfasta" || seqUrlInfo->blastType == "gsfasta_prev") {
3752  seqUrlInfo->taxid = GetTaxidForSeqid(id, scope);
3753  }
3754  }
3755  string url_link = GetIDUrl(seqUrlInfo,ids);
3756  return url_link;
3757 }
3758 
3759 //static const char kGenericLinkTemplate[] = "<a href=\"<@url@>\" target=\"lnk<@rid@>\" title=\"Show report for <@seqid@>\"><@gi@><@seqid@></a>";
3761 {
3762  string seqLink;
3763  string linkURL = GetIDUrl(seqUrlInfo,ids);
3764  if(!linkURL.empty()) {
3765  string linkTmpl = (seqUrlInfo->addCssInfo) ? kGenericLinkMouseoverTmpl : kGenericLinkTemplate;
3766  seqLink = CAlignFormatUtil::MapTemplate(linkTmpl,"url",linkURL);
3767  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"rid",seqUrlInfo->rid);
3768  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"seqid",seqUrlInfo->accession);
3769  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"gi", GI_TO(TIntId, seqUrlInfo->gi));
3770  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"target","EntrezView");
3771  if(seqUrlInfo->addCssInfo) {
3772  seqLink = CAlignFormatUtil::MapTemplate(seqLink,"defline",NStr::JavaScriptEncode(seqUrlInfo->defline));
3773  }
3774  }
3775  return seqLink;
3776 }
3777 
3778 static string s_MapCustomLink(string linkUrl,string reportType,string accession, string linkText, string linktrg, string linkTitle = kCustomLinkTitle,string linkCls = "")
3779 {
3780  string link = CAlignFormatUtil::MapTemplate(kCustomLinkTemplate,"custom_url",linkUrl);
3781  link = CAlignFormatUtil::MapProtocol(link);
3782  link = CAlignFormatUtil::MapTemplate(link,"custom_title",linkTitle);
3783  link = CAlignFormatUtil::MapTemplate(link,"custom_report_type",reportType);
3784  link = CAlignFormatUtil::MapTemplate(link,"seqid",accession);
3785  link = CAlignFormatUtil::MapTemplate(link,"custom_lnk_displ",linkText);
3786  link = CAlignFormatUtil::MapTemplate(link,"custom_cls",linkCls);
3787  link = CAlignFormatUtil::MapTemplate(link,"custom_trg",linktrg);
3788  return link;
3789 }
3790 
3791 
3792 
3794  bool hspRange)
3795 {
3796  list<string> customLinksList;
3797  if (seqUrlInfo->hasTextSeqID) {
3798  //First show links to GenBank and FASTA
3799  string linkUrl,link,linkTiltle = kCustomLinkTitle;
3800 
3801  linkUrl = seqUrlInfo->seqUrl;
3802  if(NStr::Find(linkUrl, "report=genbank") == NPOS) { //Geo case
3803  linkUrl = s_MapCommonUrlParams(kEntrezTMUrl, seqUrlInfo);
3804  }
3805  string linkText = (seqUrlInfo->isDbNa) ? "GenBank" : "GenPept";
3806  if(hspRange) {
3807  linkUrl += "&from=<@fromHSP@>&to=<@toHSP@>";
3808  linkTiltle = "Aligned region spanning positions <@fromHSP@> to <@toHSP@> on <@seqid@>";
3809  }
3810  link = s_MapCustomLink(linkUrl,"genbank",seqUrlInfo->accession,linkText,"lnk" + seqUrlInfo->rid,linkTiltle);
3811  customLinksList.push_back(link);
3812  }
3813  return customLinksList;
3814 }
3815 
3817  bool hspRange)
3818 {
3819  //seqviewer
3820  string dbtype = (seqUrlInfo->isDbNa) ? "nuccore" : "protein";
3821  string seqViewUrl = (seqUrlInfo->gi > ZERO_GI)?kSeqViewerUrl:kSeqViewerUrlNonGi;
3822 
3823  string linkUrl = CAlignFormatUtil::MapTemplate(seqViewUrl,"rid",seqUrlInfo->rid);
3824 
3825  string seqViewerParams;
3826  if(m_Reg && !seqUrlInfo->blastType.empty() && seqUrlInfo->blastType != "newblast") {
3827  seqViewerParams = m_Reg->Get(seqUrlInfo->blastType, "SEQVIEW_PARAMS");
3828  }
3829  seqViewerParams = seqViewerParams.empty() ? kSeqViewerParams : seqViewerParams;
3830  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"seqViewerParams",seqViewerParams);
3831 
3832  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"dbtype",dbtype);
3833  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"gi", GI_TO(TIntId, seqUrlInfo->gi));
3834  string linkTitle = "Show alignment to <@seqid@> in <@custom_report_type@>";
3835  string link_loc;
3836  if(!hspRange) {
3837  int addToRange = (int) ((seqUrlInfo->seqRange.GetTo() - seqUrlInfo->seqRange.GetFrom()) * 0.05);//add 5% to each side
3838  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"from",max(0,(int)seqUrlInfo->seqRange.GetFrom() - addToRange));
3839  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"to",seqUrlInfo->seqRange.GetTo() + addToRange);
3840  link_loc = "fromSubj";
3841  //linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"flip",NStr::BoolToString(seqUrlInfo->flip));
3842  }
3843  else {
3844  link_loc = "fromHSP";
3845  linkTitle += " for <@fromHSP@> to <@toHSP@> range";
3846  }
3847  linkUrl = CAlignFormatUtil::MapTemplate(linkUrl,"link_loc",link_loc);
3848 
3849  string title = (seqUrlInfo->isDbNa) ? "Nucleotide Graphics" : "Protein Graphics";
3850 
3851  string link = s_MapCustomLink(linkUrl,title,seqUrlInfo->accession, "Graphics","lnk" + seqUrlInfo->rid,linkTitle,"spr");
3852 
3853  return link;
3854 }
3855 
3857  bool hspRange)
3858 {
3859  list<string> customLinksList = GetGiLinksList(seqUrlInfo,hspRange); //ONLY FOR genBank seqUrlInfo->seqUrl has "report=genbank"
3860  string graphicLink = GetGraphiscLink(seqUrlInfo,hspRange);
3861  if(!graphicLink.empty()) {
3862  customLinksList.push_back(graphicLink);
3863  }
3864  return customLinksList;
3865 }
3866 
3867 int CAlignFormatUtil::SetCustomLinksTypes(SSeqURLInfo *seqUrlInfo, int customLinkTypesInp)
3868 {
3869  int customLinkTypes = customLinkTypesInp;
3870  if ( seqUrlInfo->gi > ZERO_GI) {
3871  customLinkTypes +=eLinkTypeGenLinks;
3872  }
3873  //else if(NStr::StartsWith(seqUrlInfo->accession,"ti:")) {//seqUrlInfo->seqUrl has "trace.cgi"
3874  else if(NStr::Find(seqUrlInfo->seqUrl,"trace.cgi") != NPOS ){
3875  customLinkTypes +=eLinkTypeTraceLinks;
3876  }
3877  else if(seqUrlInfo->blastType == "sra") {//seqUrlInfo->seqUrl has sra.cgi
3878  customLinkTypes +=eLinkTypeSRALinks;
3879  }
3880  else if(seqUrlInfo->blastType == "snp") {//seqUrlInfo->seqUrl has snp_ref.cgi
3881  customLinkTypes +=eLinkTypeSNPLinks;
3882  }
3883  else if(seqUrlInfo->blastType == "gsfasta") {//seqUrlInfo->seqUrl has GSfasta.cgi
3884  customLinkTypes +=eLinkTypeGSFastaLinks;
3885  }
3886  return customLinkTypes;
3887 }
3888 
3889 
3890 //kCustomLinkTemplate:
3891 //<a href="<@custom_url@>" class="<@custom_cls@>" title="Show <@custom_report_type@> report for <@seqid@>"><@custom_lnk_displ@></a>
3893  const CSeq_id& id,
3894  objects::CScope &scope,
3895  int customLinkTypes)
3896 {
3897  list<string> customLinksList;
3898  string linkUrl,link;
3899 
3900  customLinkTypes = SetCustomLinksTypes(seqUrlInfo, customLinkTypes);
3901  //First show links to GenBank and FASTA, then to Graphics
3902  customLinksList = GetSeqLinksList(seqUrlInfo);
3903  if(customLinkTypes & eLinkTypeTraceLinks) {
3904  linkUrl = seqUrlInfo->seqUrl;
3905  link = s_MapCustomLink(linkUrl,"Trace Archive FASTA",seqUrlInfo->accession, "FASTA","lnk" + seqUrlInfo->rid);
3906  customLinksList.push_back(link);
3907 
3908  linkUrl = NStr::Replace(seqUrlInfo->seqUrl,"fasta","trace");
3909  link = s_MapCustomLink(linkUrl,"Trace Archive Trace",seqUrlInfo->accession, "Trace","lnk" + seqUrlInfo->rid);
3910  customLinksList.push_back(link);
3911 
3912  linkUrl = NStr::Replace(seqUrlInfo->seqUrl,"fasta","quality");
3913  link = s_MapCustomLink(linkUrl,"Trace Archive Quality",seqUrlInfo->accession, "Quality","lnk" + seqUrlInfo->rid);
3914  customLinksList.push_back(link);
3915 
3916  linkUrl = NStr::Replace(seqUrlInfo->seqUrl,"fasta","info");
3917  link = s_MapCustomLink(linkUrl,"Trace Archive Info",seqUrlInfo->accession, "Info","lnk" + seqUrlInfo->rid);
3918  customLinksList.push_back(link);
3919  }
3920  else if(customLinkTypes & eLinkTypeSRALinks) {
3921  linkUrl = seqUrlInfo->seqUrl;
3922  link = s_MapCustomLink(linkUrl,"SRA",seqUrlInfo->accession, "SRA","lnk" + seqUrlInfo->rid);
3923  customLinksList.push_back(link);
3924  }
3925  else if(customLinkTypes & eLinkTypeSNPLinks) {
3926  linkUrl = seqUrlInfo->seqUrl;
3927  link = s_MapCustomLink(linkUrl,"SNP",seqUrlInfo->accession, "SNP","lnk" + seqUrlInfo->rid);
3928  customLinksList.push_back(link);
3929 
3930 
3931  //SNP accession=rs35885954
3932  string rs = NStr::Replace(seqUrlInfo->accession,"rs","");
3933  linkUrl = seqUrlInfo->resourcesUrl + rs + "?report=FLT";
3934 
3935 
3936  link = s_MapCustomLink(linkUrl,"Flatfile",seqUrlInfo->accession, "Flatfile","lnk" + seqUrlInfo->rid);
3937  customLinksList.push_back(link);
3938 
3939  linkUrl = NStr::Replace(linkUrl,"FLT","fasta");
3940  link = s_MapCustomLink(linkUrl,"FASTA",seqUrlInfo->accession, "FASTA","lnk" + seqUrlInfo->rid);
3941  customLinksList.push_back(link);
3942 
3943  linkUrl = NStr::Replace(linkUrl,"fasta","docsum");
3944  link = s_MapCustomLink(linkUrl,"Graphic summary ",seqUrlInfo->accession, "Graphic summary ","lnk" + seqUrlInfo->rid);
3945  customLinksList.push_back(link);
3946  }
3947  else if(customLinkTypes & eLinkTypeGSFastaLinks) {
3948  linkUrl = seqUrlInfo->seqUrl;
3949  link = s_MapCustomLink(linkUrl,"GSFASTA",seqUrlInfo->accession, "GSFASTA","lnk" + seqUrlInfo->rid);
3950  customLinksList.push_back(link);
3951  }
3952  return customLinksList;
3953 }
3954 
3955 
3957  const CSeq_id& id,
3958  objects::CScope &scope)
3959 {
3960  const CBioseq_Handle& handle = scope.GetBioseqHandle(id);
3961  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
3962  string linkUrl,link;
3963 
3964 
3965  linkUrl = CAlignFormatUtil::BuildUserUrl(*ids,
3966  ZERO_TAX_ID,
3967  kDownloadUrl,
3968  seqUrlInfo->database,
3969  seqUrlInfo->isDbNa,
3970  seqUrlInfo->rid,
3971  seqUrlInfo->queryNumber,
3972  true);
3973  if(!linkUrl.empty()) {
3974  linkUrl += "&segs="+ seqUrlInfo->segs;
3975  }
3976 
3977  return linkUrl;
3978 }
3979 
3980 
3981 
3983  const CSeq_id& id,
3984  objects::CScope &scope)
3985 
3986 {
3987  string linkUrl;
3988 
3989  int customLinkTypes = SetCustomLinksTypes(seqUrlInfo, CAlignFormatUtil::eLinkTypeDefault);
3990 
3991  if( (customLinkTypes & eLinkTypeGenLinks) || (customLinkTypes & eLinkTypeTraceLinks)){
3992  linkUrl = seqUrlInfo->seqUrl;
3993  linkUrl = NStr::Replace(linkUrl,"genbank","fasta");
3994  }
3995  else if(customLinkTypes & eLinkTypeSNPLinks) {
3996  linkUrl = seqUrlInfo->seqUrl;
3997  vector<string> parts;
3998  //SNP accession=dbSNP:rs35885954
3999  NStr::Split(seqUrlInfo->accession,":rs",parts,NStr::fSplit_MergeDelimiters);
4000  string rs;
4001  if(parts.size() > 1) {
4002  rs = parts[1];
4003  }
4004  linkUrl = seqUrlInfo->resourcesUrl + rs + "?report=fasta";
4005  }
4006  return linkUrl;
4007 }
4008 
4009 
4011 {
4012  //determine if the database has gi by looking at the 1st hit.
4013  //Could be wrong but simple for now
4015  CRef<CSeq_align> first_aln = actual_aln_list.Get().front();
4016  const CSeq_id& subject_id = first_aln->GetSeq_id(1);
4017 
4018  if (subject_id.Which() != CSeq_id::e_Local){
4019  const CBioseq_Handle& handleTemp = scope.GetBioseqHandle(subject_id);
4020  if(handleTemp){
4021  TGi giTemp = FindGi(handleTemp.GetBioseqCore()->GetId());
4022  if (giTemp > ZERO_GI || GetTextSeqID((CConstRef<CSeq_id>)&subject_id)) {
4023  type = eDbGi;
4024  } else if (subject_id.Which() == CSeq_id::e_General){
4025  const CDbtag& dtg = subject_id.GetGeneral();
4026  const string& dbName = dtg.GetDb();
4027  if(NStr::CompareNocase(dbName, "TI") == 0){
4028  type = eDbGeneral;
4029  }
4030  }
4031  }
4032  }
4033  return type;
4034 }
4035 
4038 {
4039  int score = 0;
4040  double bits = 0;
4041  double evalue = 0;
4042  int sum_n = 0;
4043  int num_ident = 0;
4044  list<TGi> use_this_gi;
4045 
4046  use_this_gi.clear();
4047  //Gets scores directly from seq align
4048  GetAlnScores(aln, score, bits, evalue, sum_n,
4049  num_ident, use_this_gi);
4050 
4051  unique_ptr<SSeqAlignSetCalcParams> seqSetInfo(new SSeqAlignSetCalcParams);
4052  seqSetInfo->sum_n = sum_n == -1 ? 1:sum_n ;
4053  seqSetInfo->id = &(aln.GetSeq_id(1));
4054  seqSetInfo->use_this_gi = use_this_gi;
4055  seqSetInfo->bit_score = bits;
4056  seqSetInfo->raw_score = score;
4057  seqSetInfo->evalue = evalue;
4058  seqSetInfo->match = num_ident;
4059  seqSetInfo->id = &(aln.GetSeq_id(1));
4060  seqSetInfo->subjRange = CRange<TSeqPos>(0,0);
4061  seqSetInfo->flip = false;
4062 
4063  return seqSetInfo.release();
4064 }
4065 
4066 
4067 
4069 CAlignFormatUtil::GetSeqAlignSetCalcParams(const CSeq_align_set& aln,int queryLength, bool do_translation)
4070 {
4071  int score = 0;
4072  double bits = 0;
4073  double evalue = 0;
4074  int sum_n = 0;
4075  int num_ident = 0;
4076  SSeqAlignSetCalcParams* seqSetInfo = NULL;
4077 
4078  if(aln.Get().empty())
4079  return seqSetInfo;
4080 
4081  seqSetInfo = GetSeqAlignCalcParams(*(aln.Get().front()));
4082 
4083  double total_bits = 0;
4084  double highest_bits = 0;
4085  double lowest_evalue = 0;
4086  int highest_length = 1;
4087  int highest_ident = 0;
4088  //int highest_identity = 0;
4089  double totalLen = 0;
4090 
4091  list<TGi> use_this_gi; // Not used here, but needed for GetAlnScores.
4092 
4093  seqSetInfo->subjRange = CAlignFormatUtil::GetSeqAlignCoverageParams(aln,&seqSetInfo->master_covered_length,&seqSetInfo->flip);
4094  seqSetInfo->percent_coverage = 100*seqSetInfo->master_covered_length/queryLength;
4095 
4096  ITERATE(CSeq_align_set::Tdata, iter, aln.Get()) {
4097  int align_length = CAlignFormatUtil::GetAlignmentLength(**iter, do_translation);
4098  totalLen += align_length;
4099 
4100  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue, sum_n,
4101  num_ident, use_this_gi);
4102  use_this_gi.clear();
4103 
4104  total_bits += bits;
4105 
4106 /// IMPORTANT: based on WB-1175, the trigger for setting the highest identity
4107 /// is not the highest identity value, but the identity value of
4108 /// the alignment with the highest score!
4109 ///
4110 /// if (100*num_ident/align_length > highest_identity) { -- this condition is disabled
4111 
4112  if (bits > highest_bits) { // this is the replacement condition (WB-1175)
4113  highest_length = align_length;
4114  highest_ident = num_ident;
4115 /// highest_identity = 100*num_ident/align_length;
4116  }
4117 
4118  if (bits > highest_bits) {
4119  highest_bits = bits;
4120  lowest_evalue = evalue;
4121  }
4122  }
4123  seqSetInfo->match = highest_ident;
4124  seqSetInfo->align_length = highest_length;
4125  seqSetInfo->percent_identity = CAlignFormatUtil::GetPercentIdentity(seqSetInfo->match, seqSetInfo->align_length);
4126 
4127  seqSetInfo->total_bit_score = total_bits;
4128  seqSetInfo->bit_score = highest_bits;
4129  seqSetInfo->evalue = lowest_evalue;
4130  seqSetInfo->hspNum = static_cast<int>(aln.Size());
4131  seqSetInfo->totalLen = (Int8)totalLen;
4132 
4133  return seqSetInfo;
4134 }
4135 
4137 {
4138  int score = 0;
4139  double bits = 0;
4140  double evalue = 0;
4141  int sum_n = 0;
4142  int num_ident = 0;
4143 
4144  if(aln.Get().empty())
4145  return -1;
4146 
4147  double highest_bits = 0;
4148  int highest_length = 1;
4149  int highest_ident = 0;
4150 
4151  list<TGi> use_this_gi; // Not used here, but needed for GetAlnScores.
4152 
4153  ITERATE(CSeq_align_set::Tdata, iter, aln.Get()) {
4154  int align_length = CAlignFormatUtil::GetAlignmentLength(**iter, do_translation);
4155 
4156  CAlignFormatUtil::GetAlnScores(**iter, score, bits, evalue, sum_n,
4157  num_ident, use_this_gi);
4158 
4159 
4160 /// IMPORTANT: based on WB-1175, the trigger for setting the highest identity
4161 /// is not the highest identity value, but the identity value of
4162 /// the alignment with the highest score!
4163 ///
4164 /// if (100*num_ident/align_length > highest_identity) { -- this condition is disabled
4165 
4166  if (bits > highest_bits) { // this is the replacement condition (WB-1175)
4167  highest_length = align_length;
4168  highest_ident = num_ident;
4169 /// highest_identity = 100*num_ident/align_length;
4170  highest_bits = bits;
4171  }
4172  }
4173 
4174  double percent_identity = CAlignFormatUtil::GetPercentIdentity(highest_ident, highest_length);
4175  return percent_identity;
4176 }
4177 
4178 
4179 template<class container> bool
4180 s_GetBlastScore(const container& scoreList,
4181  double& evalue,
4182  double& bitScore,
4183  double& totalBitScore,
4184  int& percentCoverage,
4185  double& percentIdent,
4186  int& hspNum,
4187  double& totalLen,
4188  int &rawScore,
4189  int& sum_n,
4190  list<TGi>& use_this_gi)
4191 {
4192  const string k_GiPrefix = "gi:";
4193  bool hasScore = false;
4194 
4195 
4196  ITERATE (typename container, iter, scoreList) {
4197  const CObject_id& id=(*iter)->GetId();
4198  if (id.IsStr()) {
4199  hasScore = true;
4200  if (id.GetStr()=="seq_evalue") {
4201  evalue = (*iter)->GetValue().GetReal();
4202  } else if (id.GetStr()=="seq_bit_score"){
4203  bitScore = (*iter)->GetValue().GetReal();
4204  } else if (id.GetStr()=="seq_total_bit_score"){
4205  totalBitScore = (*iter)->GetValue().GetReal();
4206  } else if (id.GetStr()=="seq_percent_coverage"){
4207  percentCoverage = (*iter)->GetValue().GetInt();
4208  } else if (id.GetStr()=="seq_percent_identity" && (*iter)->GetValue().IsInt()){
4209  percentIdent = (*iter)->GetValue().GetInt();
4210  } else if (id.GetStr()=="seq_percent_identity" && (*iter)->GetValue().IsReal()){
4211  percentIdent = (*iter)->GetValue().GetReal();
4212  } else if (id.GetStr()=="seq_hspnum"){
4213  hspNum = (*iter)->GetValue().GetInt();
4214  } else if (id.GetStr()=="seq_align_totlen"){
4215  totalLen = (*iter)->GetValue().GetReal();
4216  } else if (id.GetStr()=="score"){
4217  rawScore = (*iter)->GetValue().GetInt();
4218  } else if (id.GetStr()=="use_this_gi"){
4219  Uint4 gi_v = (Uint4) ((*iter)->GetValue().GetInt());
4220  use_this_gi.push_back(GI_FROM(Uint4, gi_v));
4221  } else if (id.GetStr()=="sum_n"){
4222  sum_n = (*iter)->GetValue().GetInt();
4223  }
4224  else if(NStr::StartsWith(id.GetStr(),k_GiPrefix)) { //will be used when switch to 64bit GIs
4225  string strGi = NStr::Replace(id.GetStr(),k_GiPrefix,"");
4226  TGi gi = NStr::StringToNumeric<TGi>(strGi);
4227  use_this_gi.push_back(gi);
4228  }
4229  }
4230  }
4231  return hasScore;
4232 }
4233 
4234 
4235 void CAlignFormatUtil::GetUseThisSequence(const CSeq_align& aln,list<TGi>& use_this_gi)
4236 
4237 {
4238  const string k_GiPrefix = "gi:";
4239 
4240  if(!aln.CanGetExt() || aln.GetExt().size() == 0) return;
4241  const CUser_object &user = *(aln.GetExt().front());
4242 
4243  if (user.IsSetType() && user.GetType().IsStr() && user.GetType().GetStr() == "use_this_seqid" && user.IsSetData()) {
4244  const CUser_object::TData& fields = user.GetData();
4245  for (CUser_object::TData::const_iterator fit = fields.begin(); fit != fields.end(); ++fit) {
4246  const CUser_field& field = **fit;
4247 
4248  if (field.IsSetLabel() && field.GetLabel().IsStr() && field.GetLabel().GetStr() == "SEQIDS" &&
4249  field.IsSetData() && field.GetData().IsStrs()) {
4250  const CUser_field::C_Data::TStrs& strs = field.GetData().GetStrs();
4251  ITERATE(CUser_field::TData::TStrs, acc_iter, strs) {
4252  if(NStr::StartsWith(*acc_iter,k_GiPrefix)) { //will be used when switch to 64bit GIs
4253  string strGi = NStr::Replace(*acc_iter,k_GiPrefix,"");
4254  TGi gi = NStr::StringToNumeric<TGi>(strGi);
4255  use_this_gi.push_back(gi);
4256  }
4257  }
4258  }
4259  }
4260  }
4261 }
4262 
4263 
4264 /*use_this_seq will contain gi:nnnnnn or seqid:ssssss string list*/
4265 void CAlignFormatUtil::GetUseThisSequence(const CSeq_align& aln,list<string>& use_this_seq)
4266 
4267 {
4268  if(!aln.CanGetExt() || aln.GetExt().size() == 0) return;
4269  const CUser_object &user = *(aln.GetExt().front());
4270 
4271  if (user.IsSetType() && user.GetType().IsStr() && user.GetType().GetStr() == "use_this_seqid" && user.IsSetData()) {
4272  const CUser_object::TData& fields = user.GetData();
4273  for (CUser_object::TData::const_iterator fit = fields.begin(); fit != fields.end(); ++fit) {
4274  const CUser_field& field = **fit;
4275 
4276  if (field.IsSetLabel() && field.GetLabel().IsStr() && field.GetLabel().GetStr() == "SEQIDS" &&
4277  field.IsSetData() && field.GetData().IsStrs()) {
4278  const CUser_field::C_Data::TStrs& strs = field.GetData().GetStrs();
4279  ITERATE(CUser_field::TData::TStrs, acc_iter, strs) {
4280  use_this_seq.push_back(*acc_iter);
4281  }
4282  }
4283  }
4284  }
4285 }
4286 
4287 
4288 
4291 {
4292  bool hasScore = false;
4293  double evalue = -1;
4294  double bitScore = -1;
4295  double totalBitScore = -1;
4296  int percentCoverage = -1;
4297  double percentIdent = -1;
4298  int hspNum = 0;
4299  double totalLen = 0;
4300  int rawScore = -1;
4301  int sum_n = -1;
4302  list<TGi> use_this_gi;
4303  list<string> use_this_seq;
4304 
4305  const CSeq_align& aln = *(alnSet.Get().front());
4306 
4307  hasScore = s_GetBlastScore(aln.GetScore(),evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4308 
4309  if(!hasScore){
4310  const CSeq_align::TSegs& seg = aln.GetSegs();
4311  if(seg.Which() == CSeq_align::C_Segs::e_Std){
4312  s_GetBlastScore(seg.GetStd().front()->GetScores(),
4313  evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4314  } else if (seg.Which() == CSeq_align::C_Segs::e_Dendiag){
4315  s_GetBlastScore(seg.GetDendiag().front()->GetScores(),
4316  evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4317  } else if (seg.Which() == CSeq_align::C_Segs::e_Denseg){
4319  evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4320  }
4321  }
4322 
4323  if(use_this_gi.size() == 0) {
4324  GetUseThisSequence(aln,use_this_seq);
4325  }
4326  else {
4327  use_this_seq = s_NumGiToStringGiList(use_this_gi);//for backward compatability
4328  }
4329 
4330 
4331  unique_ptr<SSeqAlignSetCalcParams> seqSetInfo(new SSeqAlignSetCalcParams);
4332  seqSetInfo->evalue = evalue;
4333  seqSetInfo->bit_score = bitScore;
4334  seqSetInfo->total_bit_score = totalBitScore;
4335  seqSetInfo->percent_coverage = percentCoverage;
4336  seqSetInfo->percent_identity = percentIdent;
4337  seqSetInfo->hspNum = hspNum;
4338  seqSetInfo->totalLen = (Int8)totalLen;
4339 
4340  seqSetInfo->sum_n = sum_n == -1 ? 1:sum_n ;
4341  seqSetInfo->id = &(aln.GetSeq_id(1));
4342  seqSetInfo->use_this_gi = StringGiToNumGiList(use_this_seq);//for backward compatability
4343  seqSetInfo->use_this_seq = use_this_seq;
4344  seqSetInfo->raw_score = rawScore;//not used
4345 
4346  seqSetInfo->subjRange = CRange<TSeqPos>(0,0);
4347  seqSetInfo->flip = false;
4348 
4349  return seqSetInfo.release();
4350 }
4351 
4353  const CSeq_id& aln_id,
4354  list<TGi>& use_this_gi,
4355  TGi& gi)
4356 
4357 {
4358  TTaxId taxid = ZERO_TAX_ID;
4359  CRef<CSeq_id> wid = CAlignFormatUtil::GetDisplayIds(handle, aln_id, use_this_gi, gi, taxid);
4360  return wid;
4361 }
4362 
4364  const CSeq_id& aln_id,
4365  list<TGi>& use_this_gi,
4366  TGi& gi,
4367  TTaxId& taxid)
4368 
4369 {
4371  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
4372 
4373  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
4374  CRef<CSeq_id> wid;
4375 
4376  gi = ZERO_GI;
4377  taxid = ZERO_TAX_ID;
4378  if(bdl.empty()){
4379  wid = FindBestChoice(*ids, CSeq_id::WorstRank);
4380  gi = FindGi(*ids);
4381  } else {
4382  bool found = false;
4383  for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
4384  iter != bdl.end(); iter++){
4385  const CBioseq::TId* cur_id = &((*iter)->GetSeqid());
4386  TGi cur_gi = FindGi(*cur_id);
4387  wid = FindBestChoice(*cur_id, CSeq_id::WorstRank);
4388  if ((*iter)->IsSetTaxid() && (*iter)->CanGetTaxid()){
4389  taxid = (*iter)->GetTaxid();
4390  }
4391  if (!use_this_gi.empty()) {
4392  ITERATE(list<TGi>, iter_gi, use_this_gi){
4393  if(cur_gi == *iter_gi){
4394  found = true;
4395  break;
4396  }
4397  }
4398  } else {
4399  ITERATE(CBioseq::TId, iter_id, *cur_id) {
4400  if ((*iter_id)->Match(aln_id)
4401  || (aln_id.IsGeneral() && aln_id.GetGeneral().CanGetDb() &&
4402  (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4403  aln_id.GetGeneral().GetDb() == (*iter_id)->GetGeneral().GetDb())) {
4404  found = true;
4405  }
4406  }
4407  }
4408  if(found){
4409  gi = cur_gi;
4410  break;
4411  }
4412  }
4413  }
4414  return wid;
4415 }
4416 
4417 
4418 
4419 //removes "gi:" or "seqid:" prefix from gi:nnnnnnn or seqid:nnnnn
4420 static string s_UseThisSeqToTextSeqID(string use_this_seqid, bool &isGi)
4421 {
4422  const string k_GiPrefix = "gi:";
4423  const string k_SeqIDPrefix = "seqid:";
4424  isGi = false;
4425  string textSeqid;
4426  if(NStr::StartsWith(use_this_seqid,k_GiPrefix)) {
4427  textSeqid = NStr::Replace(use_this_seqid,k_GiPrefix,"");
4428  isGi = true;
4429  }
4430  else if(NStr::StartsWith(use_this_seqid,k_SeqIDPrefix)) {
4431  textSeqid = NStr::Replace(use_this_seqid,k_SeqIDPrefix,"");
4432  }
4433  else {//assume no prefix - gi
4434  if(NStr::StringToInt8(use_this_seqid,NStr::fConvErr_NoThrow)) {
4435  isGi = true;
4436  }
4437  }
4438  return textSeqid;
4439 }
4440 
4441 
4442 
4443 //assume that we have EITHER gi: OR seqid: in the list
4444 bool CAlignFormatUtil::IsGiList(list<string> &use_this_seq)
4445 {
4446  bool isGi = false;
4447  ITERATE(list<string>, iter_seq, use_this_seq){
4448  s_UseThisSeqToTextSeqID( *iter_seq, isGi);
4449  break;
4450  }
4451  return isGi;
4452 }
4453 
4454 list<TGi> CAlignFormatUtil::StringGiToNumGiList(list<string> &use_this_seq)
4455 {
4456  list<TGi> use_this_gi;
4457  ITERATE(list<string>, iter_seq, use_this_seq){
4458  bool isGi = false;
4459  string strGI = s_UseThisSeqToTextSeqID( *iter_seq, isGi);
4460  if(isGi) use_this_gi.push_back(NStr::StringToNumeric<TGi>(strGI));
4461  }
4462  return use_this_gi;
4463 }
4464 
4465 
4466 
4467 bool CAlignFormatUtil::MatchSeqInSeqList(TGi cur_gi, CRef<CSeq_id> &seqID, list<string> &use_this_seq,bool *isGiList)
4468 {
4469  bool found = false;
4470  bool isGi = false;
4471 
4472  string curSeqID = CAlignFormatUtil::GetLabel(seqID,true); //uses GetSeqIdString(true)
4473  ITERATE(list<string>, iter_seq, use_this_seq){
4474  isGi = false;
4475  string useThisSeq = s_UseThisSeqToTextSeqID(*iter_seq, isGi);
4476  if((isGi && cur_gi == NStr::StringToNumeric<TGi>((useThisSeq))) || (!isGi && curSeqID == useThisSeq)){
4477  found = true;
4478  break;
4479  }
4480  }
4481  if(isGiList) *isGiList = isGi;
4482  return found;
4483 }
4484 
4485 
4486 bool CAlignFormatUtil::MatchSeqInSeqList(CConstRef<CSeq_id> &alnSeqID, list<string> &use_this_seq,vector <string> &seqList)
4487 {
4488  bool isGi = false;
4489  string curSeqID;
4490  if(alnSeqID->IsGi()) {
4491  curSeqID = NStr::NumericToString(alnSeqID->GetGi());
4492  }
4493  else {
4494  curSeqID = CAlignFormatUtil::GetLabel(alnSeqID,true); //uses GetSeqIdString(true)
4495  }
4496  //match with seqid in seq_align
4497  bool found = std::find(seqList.begin(), seqList.end(), curSeqID) != seqList.end();
4498  if(!found) {
4499  //match in use_this_seq list
4500  ITERATE(list<string>, iter_seq, use_this_seq){
4501  string useThisSeq = s_UseThisSeqToTextSeqID(*iter_seq, isGi);
4502  found = std::find(seqList.begin(), seqList.end(), useThisSeq) != seqList.end();
4503  if(found){
4504  break;
4505  }
4506  }
4507  }
4508  return found;
4509 }
4510 
4511 bool CAlignFormatUtil::MatchSeqInUseThisSeqList(list<string> &use_this_seq, string textSeqIDToMatch)
4512 {
4513  bool has_match = false;
4514 
4515  ITERATE(list<string>, iter_seq, use_this_seq) {
4516  bool isGi;
4517  string useThisSeq = s_UseThisSeqToTextSeqID(*iter_seq, isGi);
4518  if(useThisSeq == textSeqIDToMatch) {
4519  has_match = true;
4520  break;
4521  }
4522  }
4523  return has_match;
4524 }
4525 
4527 {
4528  list<string> new_use_this_seq;
4529  bool hasAccType = false;
4530  bool isGI = false;
4531 
4532  ITERATE(list<string>, iter_seq, use_this_seq) {
4533  string useThisSeq = s_UseThisSeqToTextSeqID(*iter_seq, isGI);
4534  CSeq_id::EAccessionInfo useThisSeqAccType = CSeq_id::IdentifyAccession (useThisSeq);
4535  if(useThisSeqAccType != accessionType) {
4536  new_use_this_seq.push_back(useThisSeq);
4537  }
4538  else {
4539  hasAccType = true;
4540  }
4541  }
4542  use_this_seq = new_use_this_seq;
4543  return hasAccType;
4544 }
4545 
4547  const CSeq_id& aln_id,
4548  list<string>& use_this_seq,
4549  TGi *gi,
4550  TTaxId *taxid,
4551  string *textSeqID)
4552 
4553 {
4555  const list< CRef< CBlast_def_line > > &bdl = (bdlRef.Empty()) ? list< CRef< CBlast_def_line > >() : bdlRef->Get();
4556 
4557  const CBioseq::TId* ids = &handle.GetBioseqCore()->GetId();
4558  CRef<CSeq_id> wid;
4559 
4560  if(gi) *gi = ZERO_GI;
4561  if(taxid) *taxid = ZERO_TAX_ID;
4562  if(bdl.empty()){
4563  wid = FindBestChoice(*ids, CSeq_id::WorstRank);
4564  if(gi) *gi = FindGi(*ids);
4565  if(textSeqID) *textSeqID = GetLabel(wid,true);//uses GetSeqIdString(true)
4566  } else {
4567  bool found = false;
4568  for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
4569  iter != bdl.end(); iter++){
4570  const CBioseq::TId* cur_id = &((*iter)->GetSeqid());
4571  TGi cur_gi = FindGi(*cur_id);
4572  wid = FindBestChoice(*cur_id, CSeq_id::WorstRank);
4573  string curSeqID = GetLabel(wid,true);//uses GetSeqIdString(true)
4574  if (taxid && (*iter)->IsSetTaxid() && (*iter)->CanGetTaxid()){
4575  *taxid = (*iter)->GetTaxid();
4576  }
4577  if (!use_this_seq.empty()) {
4578  ITERATE(list<string>, iter_seq, use_this_seq){
4579  bool isGi = false;
4580  string useThisSeq = s_UseThisSeqToTextSeqID( *iter_seq, isGi);
4581  if((isGi && cur_gi == NStr::StringToNumeric<TGi>((useThisSeq))) || (!isGi && curSeqID == useThisSeq)){
4582  found = true;
4583  break;
4584  }
4585  }
4586  } else {
4587  ITERATE(CBioseq::TId, iter_id, *cur_id) {
4588  if ((*iter_id)->Match(aln_id)
4589  || (aln_id.IsGeneral() && aln_id.GetGeneral().CanGetDb() &&
4590  (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4591  aln_id.GetGeneral().GetDb() == (*iter_id)->GetGeneral().GetDb())) {
4592  found = true;
4593  }
4594  }
4595  }
4596  if(found){
4597  if(gi) *gi = cur_gi;
4598  if(textSeqID) *textSeqID = curSeqID;
4599  break;
4600  }
4601  }
4602  }
4603 
4604  return wid;
4605 }
4606 
4607 
4609  const CSeq_id& aln_id,
4610  list<TGi>& use_this_gi)
4611 
4612 
4613 {
4614  TGi gi = ZERO_GI;
4615 
4616  if(!bdl.empty()){
4617  bool found = false;
4618  for(list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
4619  iter != bdl.end(); iter++){
4620  const CBioseq::TId* cur_id = &((*iter)->GetSeqid());
4621  TGi cur_gi = FindGi(*cur_id);
4622  if (!use_this_gi.empty()) {
4623  ITERATE(list<TGi>, iter_gi, use_this_gi){
4624  if(cur_gi == *iter_gi){
4625  found = true;
4626  break;
4627  }
4628  }
4629  } else {
4630  ITERATE(CBioseq::TId, iter_id, *cur_id) {
4631  if ((*iter_id)->Match(aln_id)
4632  || (aln_id.IsGeneral() && aln_id.GetGeneral().CanGetDb() &&
4633  (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4634  aln_id.GetGeneral().GetDb() == (*iter_id)->GetGeneral().GetDb())) {
4635  found = true;
4636  }
4637  }
4638  }
4639  if(found){
4640  gi = cur_gi;
4641  break;
4642  }
4643  }
4644  }
4645  return gi;
4646 }
4647 
4649 {
4650  if(rng.GetFrom() > rng.GetTo()){
4651  rng.Set(rng.GetTo(), rng.GetFrom());
4652  }
4653  //cerr << "Query Rng: " << rng.GetFrom() << "-" << rng.GetTo() << endl;
4654  return rng;
4655 }
4656 
4658 {
4659  if(alnset.IsEmpty())
4660  return 0;
4661 
4662  bool isDenDiag = (alnset.Get().front()->GetSegs().Which() == CSeq_align::C_Segs::e_Dendiag) ?
4663  true : false;
4664 
4665  list<CRef<CSeq_align> >::iterator mItr=alnset.Set().begin();
4666  CRangeCollection<TSeqPos> subj_rng_coll((*mItr)->GetSeqRange(1));
4667  CRange<TSeqPos> q_rng((*mItr)->GetSeqRange(0));
4668  /*
4669  cerr << MSerial_AsnText << **mItr;
4670  cerr << (*mItr)->GetSeqRange(0).GetFrom() << endl;
4671  cerr << (*mItr)->GetSeqRange(0).GetTo() << endl;
4672  cerr << (*mItr)->GetSeqRange(0).GetToOpen() << endl;
4673  cerr << (*mItr)->GetSeqRange(1).GetFrom() << endl;
4674  cerr << (*mItr)->GetSeqRange(1).GetTo() << endl;
4675  cerr << (*mItr)->GetSeqRange(1).GetToOpen() << endl;
4676  */
4677  CRangeCollection<TSeqPos> query_rng_coll(s_FixMinusStrandRange(q_rng));
4678  ++mItr;
4679  for(;mItr != alnset.Set().end(); ++mItr) {
4680  const CRange<TSeqPos> align_subj_rng((*mItr)->GetSeqRange(1));
4681  // subject range should always be on the positive strand
4682  ASSERT(align_subj_rng.GetTo() > align_subj_rng.GetFrom());
4683  CRangeCollection<TSeqPos> coll(align_subj_rng);
4684  coll.Subtract(subj_rng_coll);
4685 
4686  if (coll.empty())
4687  continue;
4688 
4689  if(coll[0] == align_subj_rng) {
4690  CRange<TSeqPos> query_rng ((*mItr)->GetSeqRange(0));
4691  //cerr << "Subj Rng :" << align_subj_rng.GetFrom() << "-" << align_subj_rng.GetTo() << endl;
4692  query_rng_coll += s_FixMinusStrandRange(query_rng);
4693  subj_rng_coll += align_subj_rng;
4694  }
4695  else {
4696  ITERATE (CRangeCollection<TSeqPos>, uItr, coll) {
4697  CRange<TSeqPos> query_rng;
4698  const CRange<TSeqPos> & subj_rng = (*uItr);
4699  CRef<CSeq_align> densegAln
4700  = isDenDiag ? CAlignFormatUtil::CreateDensegFromDendiag(**mItr) : (*mItr);
4701 
4702  CAlnMap map(densegAln->GetSegs().GetDenseg());
4703  TSignedSeqPos subj_aln_start = map.GetAlnPosFromSeqPos(1,subj_rng.GetFrom());
4704  TSignedSeqPos subj_aln_end = map.GetAlnPosFromSeqPos(1,subj_rng.GetTo());
4705  query_rng.SetFrom(map.GetSeqPosFromAlnPos(0,subj_aln_start));
4706  query_rng.SetTo(map.GetSeqPosFromAlnPos(0,subj_aln_end));
4707 
4708  //cerr << "Subj Rng :" << subj_rng.GetFrom() << "-" << subj_rng.GetTo() << endl;
4709  query_rng_coll += s_FixMinusStrandRange(query_rng);
4710  subj_rng_coll += subj_rng;
4711  }
4712  }
4713  }
4714 
4715  return query_rng_coll.GetCoveredLength();
4716 }
4717 
4718 ///return id type specified or null ref
4719 ///@param ids: the input ids
4720 ///@param choice: id of choice
4721 ///@return: the id with specified type
4722 ///
4724  CSeq_id::E_Choice choice)
4725 {
4726  CRef<CSeq_id> cid;
4727 
4728  for (CBioseq::TId::const_iterator iter = ids.begin(); iter != ids.end();
4729  iter ++){
4730  if ((*iter)->Which() == choice){
4731  cid = *iter;
4732  break;
4733  }
4734  }
4735 
4736  return cid;
4737 }
4738 
4739 ///return gi from id list
4740 ///@param ids: the input ids
4741 ///@return: the gi if found
4742 ///
4744 {
4745  TGi gi = ZERO_GI;
4747  if (!(id.Empty())){
4748  return id->GetGi();
4749  }
4750  return gi;
4751 }
4752 
4754 {
4755  CSeqdesc_CI desc_t(bh, CSeqdesc::e_Title);
4756  string t = kEmptyStr;
4757  for (;desc_t; ++desc_t) {
4758  t += desc_t->GetTitle() + " ";
4759  }
4760  return t;
4761 }
4762 
4764 {
4765  string retval;
4766 
4767  if (id.IsGi() || id.IsPrf() || id.IsPir()) {
4768  retval = id.AsFastaString();
4769  }
4770  else {
4771  retval = id.GetSeqIdString(true);
4772  }
4773 
4774  return retval;
4775 }
4776 
4777 
4778 bool CAlignFormatUtil::GetTextSeqID(CConstRef<CSeq_id> seqID, string *textSeqID)
4779 {
4780  bool hasTextSeqID = true;
4781 
4782  const CTextseq_id* text_id = seqID->GetTextseq_Id();
4783  //returns non zero if e_Genbank,e_Embl,e_Ddbj,e_Pir,e_Swissprot,case e_Other,e_Prf,case e_Tpg,e_Tpe,case e_Tpd,case e_Gpipe, e_Named_annot_track
4784  if(!text_id) { //check for pdb and pat
4785  if(!(seqID->Which() == CSeq_id::e_Pdb) && !(seqID->Which() == CSeq_id::e_Patent) && !(seqID->Which() == CSeq_id::e_Gi)) {
4786  hasTextSeqID = false;
4787  }
4788  }
4789 
4790  if(hasTextSeqID && textSeqID) {
4791  seqID->GetLabel(textSeqID, CSeq_id::eContent);
4792  }
4793  return hasTextSeqID;
4794 }
4795 
4796 
4797 
4798 bool CAlignFormatUtil::GetTextSeqID(const list<CRef<CSeq_id> > & ids, string *textSeqID)
4799 {
4800  bool hasTextSeqID = false;
4801 
4802  CConstRef<CSeq_id> seqID = FindTextseq_id(ids);
4803  //returns non zero if e_Genbank,e_Embl,e_Ddbj,e_Pir,e_Swissprot,case e_Other,e_Prf,case e_Tpg,e_Tpe,case e_Tpd,case e_Gpipe, e_Named_annot_track
4804  if(seqID.Empty()) {
4805  seqID = GetSeq_idByType(ids, CSeq_id::e_Pdb);
4806  }
4807  if(seqID.Empty()) {
4808  seqID = GetSeq_idByType(ids, CSeq_id::e_Patent);
4809  }
4810  if(!seqID.Empty()) {
4811  hasTextSeqID = true;
4812  if(textSeqID) seqID->GetLabel(textSeqID, CSeq_id::eContent);
4813  }
4814  return hasTextSeqID;
4815 }
4816 
4818  vector <string> &seqList)
4819 {
4820  CConstRef<CSeq_id> previous_id, subid;
4821  list<string> use_this_seq;
4822  bool match = false;
4823 
4824  CRef<CSeq_align_set> new_aln(new CSeq_align_set);
4825  ITERATE(CSeq_align_set::Tdata, iter, source_aln.Get()){
4826  subid = &((*iter)->GetSeq_id(1));
4827  if(previous_id.Empty() || !subid->Match(*previous_id)){
4828  use_this_seq.clear();
4829  CAlignFormatUtil::GetUseThisSequence(**iter,use_this_seq);
4830  match = MatchSeqInSeqList(subid, use_this_seq,seqList);
4831  }
4832 
4833  previous_id = subid;
4834  if(match) {
4835  new_aln->Set().push_back(*iter);
4836  }
4837  }
4838  return new_aln;
4839 }
4840 
4841 
4842 END_SCOPE(align_format)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static void s_CalcAlnPercentIdent(const CRef< CSeq_align_set > &info1, const CRef< CSeq_align_set > &info2, double &percentIdent1, double &percentIdent2)
static string s_GetTaxName(TTaxId taxid)
static bool s_ProcessAlignSet(const CSeq_align_set &alnset, list< CRange< TSeqPos > > &query_list, list< CRange< TSeqPos > > &subject_list)
static CRef< CSeq_id > s_GetSeqIdByType(const list< CRef< CSeq_id > > &ids, CSeq_id::E_Choice choice)
return id type specified or null ref
static string s_MapURLLink(string urlTemplate, CAlignFormatUtil::SSeqURLInfo *seqUrlInfo, const CBioseq::TId &ids)
static list< CRange< TSeqPos > > s_MergeRangeList(list< CRange< TSeqPos > > &source)
static void s_AddLinkoutInfo(map< int, vector< CBioseq::TId > > &linkout_map, int linkout, CBioseq::TId &cur_id)
static bool s_GetSRASeqMetadata(const CBioseq::TId &ids, string &strRun, string &strSpotId, string &strReadIndex)
void s_AddOtherRelatedInfoLinks(CBioseq::TId &cur_id, const string &rid, bool is_na, bool for_alignment, int cur_align, list< string > &linkout_list)
static bool kTranslation
static list< string > s_NumGiToStringGiList(list< TGi > use_this_gi)
static CRange< TSeqPos > & s_FixMinusStrandRange(CRange< TSeqPos > &rng)
static bool s_FillDbInfoLocally(const string &dbname, CAlignFormatUtil::SDbInfo &info, int dbfilt_algorithm)
Initialize database statistics with data obtained from local BLAST databases.
static CRef< CScope > kScope
bool s_GetBlastScore(const container &scoreList, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< TGi > &use_this_gi, int &comp_adj_method)
Get blast score information.
static list< string > s_GetLinkoutUrl(int linkout, string giList, string labelList, TGi first_gi, CAlignFormatUtil::SLinkoutInfo &linkoutInfo, bool textLink=true)
string s_GetBestIDForURL(CBioseq::TId &ids)
static bool s_isAlnInFilteringRange(double evalue, double percentIdent, int queryCover, double evalueLow, double evalueHigh, double percentIdentLow, double percentIdentHigh, int queryCoverLow, int queryCoverHigh)
USING_SCOPE(ncbi)
static string s_MapCommonUrlParams(string urlTemplate, CAlignFormatUtil::SSeqURLInfo *seqUrlInfo)
static int s_LinkLetterToType(string linkLetter)
static double adjustPercentIdentToDisplayValue(double value)
static string s_MapCustomLink(string linkUrl, string reportType, string accession, string linkText, string linktrg, string linkTitle=kCustomLinkTitle, string linkCls="")
const char k_PSymbol[ePMatrixSize+1]
Residues.
static string s_UseThisSeqToTextSeqID(string use_this_seqid, bool &isGi)
static string s_MapLinkoutGenParam(string &url_link_tmpl, const string &rid, string giList, bool for_alignment, int cur_align, string &label, string &lnk_displ, string lnk_tl_info="", string lnk_title="")
static list< string > s_GetFullLinkoutUrl(CBioseq::TId &cur_id, CAlignFormatUtil::SLinkoutInfo &linkoutInfo, map< int, vector< CBioseq::TId > > &linkout_map, bool getIdentProteins)
static bool FromRangeAscendingSort(CRange< TSeqPos > const &info1, CRange< TSeqPos > const &info2)
static const char kGeneDispl[]
static const char kBioAssayProtImg[]
static const char kSeqViewerParams[]
static const char kSeqViewerUrl[]
static const char kStructureImg[]
const int k_NumAsciiChar
Number of ASCII characters for populating matrix columns.
static const char kGenericLinkMouseoverTmpl[]
static const char kEntrezTMUrl[]
static const char kMapviwerUrl[]
mapviewer linkout
static const char kStructureDispl[]
static const char kCustomLinkTemplate[]
static const char kStructureAlphaFoldUrl[]
static const char kDownloadUrl[]
dumpgnl
static const char kGeoDispl[]
@ ePMatrixSize
static const char kClassInfo[]
blast related url
static const char kGeoImg[]
static const char kGeneTerm[]
static const char kIdenticalProteinsUrl[]
static const char kGeneImg[]
static const char kBioAssayDispl[]
static const char kIdenticalProteinsDispl[]
static const char kGenomeDataViewerDispl[]
static const char kReprMicrobialGenomesImg[]
static const char kMapviwerDispl[]
static const char kGenomeDataViewerImg[]
static const char kUnigeneImg[]
static const char kUnigeneDispl[]
static const char kStructureUrl[]
structure
static const char kBioAssayNucImg[]
static const char kGenericLinkTemplate[]
static const char kMapviwerImg[]
static const char kTraceUrl[]
trace db
static const char kMapviewBlastHitUrl[]
mapviewer linkout
static const char kReprMicrobialGenomesDispl[]
static const char kMapviewBlastHitParams[]
static const char kCustomLinkTitle[]
static const char kSeqViewerUrlNonGi[]
Declares the CBlastServices class.
static string GetProtocol(void)
static const char * kNoHitsFound
The string containing the message that no hits were found.
static CRef< objects::CSeq_align_set > FilterSeqalignBySeqList(objects::CSeq_align_set &source_aln, vector< string > &seqList)
function for Filtering seqalign by specific subjects
static void PrintPhiInfo(int num_patterns, const string &pattern, double prob, vector< int > &offsets, CNcbiOstream &out)
Prints out PHI-BLAST info for header (or footer)
static CAlignFormatUtil::SSeqAlignSetCalcParams * GetSeqAlignSetCalcParamsFromASN(const objects::CSeq_align_set &alnSet)
static string GetIDUrl(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL for seqid.
static int GetAlignmentLength(const objects::CSeq_align &aln, bool do_translation)
get the alignment length
static bool IsWGSAccession(string &accession, string &wgsProj)
Check if accession is WGS.
static void PruneSeqalign(const objects::CSeq_align_set &source_aln, objects::CSeq_align_set &new_aln, unsigned int num=static_cast< unsigned int >(kDfltArgNumAlignments))
Fill new alignset containing the specified number of alignments with unique slave seqids.
static int GetUniqSeqCoverage(objects::CSeq_align_set &alnset)
Calculate the uniq subject query coverage range (blastn only)
static void InitConfig()
static CAlignFormatUtil::SSeqAlignSetCalcParams * GetSeqAlignSetCalcParams(const objects::CSeq_align_set &aln, int queryLength, bool do_translation)
static void SortHitByMolecularType(list< CRef< objects::CSeq_align_set > > &seqalign_hit_list, objects::CScope &scope, ILinkoutDB *linkoutdb, const string &mv_build_name)
sort a list of seqalign set by molecular type
static void AcknowledgeBlastQuery(const objects::CBioseq &cbs, size_t line_len, CNcbiOstream &out, bool believe_query, bool html, bool tabular=false, const string &rid=kEmptyStr)
Print out blast query info.
static bool IsMixedDatabase(const objects::CSeq_align_set &alnset, objects::CScope &scope, ILinkoutDB *linkoutdb, const string &mv_build_name)
static list< CRef< objects::CSeq_align_set > > SortOneSeqalignForSortableFormat(const objects::CSeq_align_set &source, bool nuc_to_nuc_translation, int hit_sort, int hsp_sort)
static void GetAsciiProteinMatrix(const char *matrix_name, CNcbiMatrix< int > &retval)
Retrieve a scoring matrix for the provided matrix name.
static list< string > GetFullLinkoutUrl(const list< CRef< objects::CBlast_def_line > > &bdl, const string &rid, const string &cdd_rid, const string &entrez_term, bool is_na, bool structure_linkout_as_group, bool for_alignment, int cur_align, string &linkoutOrder, TTaxId taxid, string &database, int query_number, string &user_url, string &preComputedResID, ILinkoutDB *linkoutdb, const string &mv_build_name)
Get linkout membership for for the list of blast deflines.
static void x_AcknowledgeBlastSequence(const objects::CBioseq &cbs, size_t line_len, CNcbiOstream &out, bool believe_query, bool html, const string &label, bool tabular, const string &rid)
static void PrintDbReport(const vector< SDbInfo > &dbinfo_list, size_t line_length, CNcbiOstream &out, bool top=false)
Print out blast database information.
static void GetAlnScores(const objects::CSeq_align &aln, int &score, double &bits, double &evalue, int &sum_n, int &num_ident, list< TGi > &use_this_gi)
Extract score info from blast alingment.
static void BuildFormatQueryString(CCgiContext &ctx, string &cgi_query)
static string GetSeqDescrString(const objects::CBioseq &cbs)
Returns a full description for a Bioseq, concatenating all available titles.
@ eAddEOLAtLineEnd
add EOL at the beginning of the string
@ eAddEOLAtLineStart
add spaces at the end of the string
@ eSpacePosAtLineEnd
add spaces at the begining of the string
static list< TGi > StringGiToNumGiList(list< string > &use_this_seq)
Convert if string gi list to TGi list.
static string AddSpaces(string paramVal, size_t maxParamLength, int spacesFormatFlag=eSpacePosToCenter)
Calculate the number of spaces and add them to paramVal.
static CRef< objects::CSeq_align_set > FilterSeqalignByPercentIdent(objects::CSeq_align_set &source_aln, double percentIdentLow, double percentIdentHigh)
function for Filtering seqalign by percent identity
static bool RemoveSeqsOfAccessionTypeFromSeqInUse(list< string > &use_this_seq, objects::CSeq_id::EAccessionInfo accesionType)
function to remove sequences of accesionType from use_this_seq list
static bool SortHitByMasterStartAscending(CRef< objects::CSeq_align_set > &info1, CRef< objects::CSeq_align_set > &info2)
sorting function for sorting a list of seqalign set by ascending mater start position
static void GetScoreString(double evalue, double bit_score, double total_bit_score, int raw_score, string &evalue_str, string &bit_score_str, string &total_bit_score_str, string &raw_score_str)
format evalue and bit_score
static map< string, CRef< objects::CSeq_align_set > > HspListToHitMap(vector< string > seqIdList, const objects::CSeq_align_set &source)
static string GetBareId(const objects::CSeq_id &id)
Get sequence id with no database source (bare accession)
static string GetGnlID(const objects::CDbtag &dtg)
Return ID for GNL label.
static bool m_geturl_debug_flag
static void SortHit(list< CRef< objects::CSeq_align_set > > &seqalign_hit_list, bool do_translation, objects::CScope &scope, int sort_method, ILinkoutDB *linkoutdb, const string &mv_build_name)
actual sorting function for SortHitByMolecularType
static void FillScanModeBlastDbInfo(vector< SDbInfo > &retval, bool is_protein, int numSeqs, Int8 numLetters, string &tag)
Fills one BLAST dbinfo structure.
static bool SortHitByTotalScoreDescending(CRef< objects::CSeq_align_set > const &info1, CRef< objects::CSeq_align_set > const &info2)
return the comparison result: 1st >= 2nd => true, false otherwise
static string GetIDUrlGen(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL for seqid that goes to entrez or trace.
static bool SortHspBySubjectStartAscending(const CRef< objects::CSeq_align > &info1, const CRef< objects::CSeq_align > &info2)
static CAlignFormatUtil::DbType GetDbType(const objects::CSeq_align_set &actual_aln_list, objects::CScope &scope)
Set the database as gi type.
static void PruneSeqalignAll(const objects::CSeq_align_set &source_aln, objects::CSeq_align_set &new_aln, unsigned int number)
Fill new alignset containing the specified number of alignments plus the rest of alignments for the l...
static void PrintTildeSepLines(string str, size_t line_len, CNcbiOstream &out)
Print out misc information separated by "~".
static string BuildUserUrl(const objects::CBioseq::TId &ids, TTaxId taxid, string user_url, string database, bool db_is_na, string rid, int query_number, bool for_alignment)
return the custom url (such as mapview)
static string MapTemplate(string inpString, string tmplParamName, Int8 templParamVal)
Replace template tags by real data.
static CAlignFormatUtil::SSeqAlignSetCalcParams * GetSeqAlignCalcParams(const objects::CSeq_align &aln)
static string GetURLFromRegistry(const string url_name, int index=-1)
retrieve URL from .ncbirc file combining host/port and format strings values.
static bool IsGiList(list< string > &use_this_seq)
Check if use_this_seq conatins gi list.
static double GetSeqAlignSetCalcPercentIdent(const objects::CSeq_align_set &aln, bool do_translation)
static string GetGraphiscLink(SSeqURLInfo *seqUrlInfo, bool hspRange=false)
static bool MatchSeqInSeqList(TGi cur_gi, CRef< objects::CSeq_id > &seqID, list< string > &use_this_seq, bool *isGiList=NULL)
Matches text seqID or gi with the list of seqIds or gis.
static int GetSeqLinkoutInfo(objects::CBioseq::TId &cur_id, ILinkoutDB **linkoutdb, const string &mv_build_name, TGi gi=INVALID_GI)
static CRef< objects::CSeq_id > GetDisplayIds(const objects::CBioseq_Handle &handle, const objects::CSeq_id &aln_id, list< TGi > &use_this_gi, TGi &gi, TTaxId &taxid)
Scan the the list of blast deflines and find seqID to be use in display.
static list< string > GetLinkoutUrl(int linkout, const objects::CBioseq::TId &ids, const string &rid, const string &cdd_rid, const string &entrez_term, bool is_na, TGi first_gi, bool structure_linkout_as_group, bool for_alignment, int cur_align, string preComputedResID)
Get the list of urls for linkouts.
static void PrintKAParameters(double lambda, double k, double h, size_t line_len, CNcbiOstream &out, bool gapped, const Blast_GumbelBlk *gbp=NULL)
Print out kappa, lamda blast parameters.
static CRef< objects::CSeq_align > CreateDensegFromDendiag(const objects::CSeq_align &aln)
Create denseseg representation for densediag seqalign.
static list< string > GetCustomLinksList(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope, int customLinkTypes=eLinkTypeDefault)
Create the list of string links for seqid that go.
static string GetURLDefault(const string url_name, int index=-1)
settings are not complete.
static CRef< objects::CSeq_align_set > FilterSeqalignByEval(objects::CSeq_align_set &source_aln, double evalueLow, double evalueHigh)
function for Filtering seqalign by expect value
static string GetFASTALinkURL(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL showing aligned regions info.
static bool GetTextSeqID(const list< CRef< objects::CSeq_id > > &ids, string *textSeqID=NULL)
static void GetBlastDbInfo(vector< SDbInfo > &retval, const string &blastdb_names, bool is_protein, int dbfilt_algorithm, bool is_remote=false)
Retrieve BLAST database information for presentation in BLAST report.
static void GetUseThisSequence(const objects::CSeq_align &aln, list< TGi > &use_this_gi)
Extract use_this_gi info from blast alingment.
static bool SortHitByPercentIdentityDescendingEx(const CRef< objects::CSeq_align_set > &info1, const CRef< objects::CSeq_align_set > &info2)
sorting function for sorting a list of seqalign set by descending identity
static void ExtractSeqAlignForSeqList(CRef< objects::CSeq_align_set > &all_aln_set, string alignSeqList)
extract seq_align_set coreesponding to seqid list
static int GetPercentMatch(int numerator, int denominator)
function for calculating percent match for an alignment.
static string GetSeqIdString(const objects::CBioseq &cbs, bool believe_local_id=true)
Returns a full '|'-delimited Seq-id string for a Bioseq.
static bool MatchSeqInUseThisSeqList(list< string > &use_this_seq, string textSeqIDToMatch)
static list< string > GetSeqLinksList(SSeqURLInfo *seqUrlInfo, bool hspRange=false)
static CRef< objects::CSeq_align_set > SortSeqalignForSortableFormat(CCgiContext &ctx, objects::CScope &scope, objects::CSeq_align_set &aln_set, bool nuc_to_nuc_translation, int db_order, int hit_order, int hsp_order, ILinkoutDB *linkoutdb, const string &mv_build_name)
static double GetPercentIdentity(const objects::CSeq_align &aln, objects::CScope &scope, bool do_translation)
calculate the percent identity for a seqalign
static void ExtractSeqalignSetFromDiscSegs(objects::CSeq_align_set &target, const objects::CSeq_align_set &source)
If a Seq-align-set contains Seq-aligns with discontinuous type segments, extract the underlying Seq-a...
static bool IsWGSPattern(string &wgsAccession)
Check if accession is WGS.
static bool SortHitByScoreDescending(const CRef< objects::CSeq_align_set > &info1, const CRef< objects::CSeq_align_set > &info2)
static CRef< objects::CSeq_align_set > FilterSeqalignByScoreParams(objects::CSeq_align_set &source_aln, double evalueLow, double evalueHigh, double percentIdentLow, double percentIdentHigh)
function for Filtering seqalign by expect value and percent identity
static void GetAlignLengths(objects::CAlnVec &salv, int &align_length, int &num_gaps, int &num_gap_opens)
Count alignment length, number of gap openings and total number of gaps in a single alignment.
static string BuildSRAUrl(const objects::CBioseq::TId &ids, string user_url)
return the SRA (Short Read Archive) URL
static bool SortHspByMasterStartAscending(const CRef< objects::CSeq_align > &info1, const CRef< objects::CSeq_align > &info2)
sorting function for sorting a list of seqalign by ascending mater start position
static int SetCustomLinksTypes(SSeqURLInfo *seqUrlInfo, int customLinkTypesInp)
Create info indicating what kind of links to display.
static int GetMasterCoverage(const objects::CSeq_align_set &alnset)
static unique_ptr< CNcbiRegistry > m_Reg
static int GetFrame(int start, objects::ENa_strand strand, const objects::CBioseq_Handle &handle)
return the frame for a given strand Note that start is zero bases.
static void GetBdlLinkoutInfo(const list< CRef< objects::CBlast_def_line > > &bdl, map< int, vector< objects::CBioseq::TId > > &linkout_map, ILinkoutDB *linkoutdb, const string &mv_build_name)
Create map that holds all linkouts for the list of blast deflines and corresponding seqIDs.
static bool SortHspByPercentIdentityDescending(const CRef< objects::CSeq_align > &info1, const CRef< objects::CSeq_align > &info2)
sorting function for sorting a list of seqalign by descending identity
static CRef< objects::CSeq_align_set > HitListToHspList(list< CRef< objects::CSeq_align_set > > &source)
extract all nested hsp's into a list
static string GetTitle(const objects::CBioseq_Handle &bh)
static TTaxId GetTaxidForSeqid(const objects::CSeq_id &id, objects::CScope &scope)
return the tax id for a seqid
static void AddSpace(CNcbiOstream &out, size_t number)
Add the specified white space.
static string GetAlignedRegionsURL(SSeqURLInfo *seqUrlInfo, const objects::CSeq_id &id, objects::CScope &scope)
Create URL to FASTA info.
static string m_Protocol
static list< string > GetGiLinksList(SSeqURLInfo *seqUrlInfo, bool hspRange=false)
static void AcknowledgeBlastSubject(const objects::CBioseq &cbs, size_t line_len, CNcbiOstream &out, bool believe_query, bool html, bool tabular=false)
Print out blast subject info.
static TGi GetGiForSeqIdList(const list< CRef< objects::CSeq_id > > &ids)
return gi from id list
static void SplitSeqalignByMolecularType(vector< CRef< objects::CSeq_align_set > > &target, int sort_method, const objects::CSeq_align_set &source, objects::CScope &scope, ILinkoutDB *linkoutdb, const string &mv_build_name)
static bool SortHitByMasterCoverageDescending(CRef< objects::CSeq_align_set > const &info1, CRef< objects::CSeq_align_set > const &info2)
static void BlastPrintError(list< SBlastError > &error_return, bool error_post, CNcbiOstream &out)
Output blast errors.
static string MapSpaceTemplate(string inpString, string tmplParamName, string templParamVal, unsigned int maxParamLength, int spacesFormatFlag=eSpacePosAtLineEnd)
Replace template tags by real data and calculate and add spaces dependent on maxParamLength and space...
static void HspListToHitList(list< CRef< objects::CSeq_align_set > > &target, const objects::CSeq_align_set &source)
group hsp's with the same id togeter
static void SortHitByPercentIdentityDescending(list< CRef< objects::CSeq_align_set > > &seqalign_hit_list, bool do_translation)
sort a list of seqalign set by alignment identity
static string MapProtocol(string url_link)
static string GetFullIDLink(SSeqURLInfo *seqUrlInfo, const objects::CBioseq::TId *ids)
static bool SortHspByScoreDescending(const CRef< objects::CSeq_align > &info1, const CRef< objects::CSeq_align > &info2)
static CRef< objects::CSeq_align_set > LimitSeqalignByHsps(objects::CSeq_align_set &source_aln, int maxAligns, int maxHsps)
function for Limitting seqalign by hsps number (by default results are not cut off within the query)
static CRef< objects::CSeq_align_set > ExtractQuerySeqAlign(CRef< objects::CSeq_align_set > &source_aln, int queryNumber)
function for extracting seqalign for the query
static string GetLabel(CConstRef< objects::CSeq_id > id, bool with_version=false)
Return a label for an ID Tries to recreate behavior of GetLabel before a change that prepends "ti|" t...
static void x_WrapOutputLine(string str, size_t line_len, CNcbiOstream &out, bool html=false)
Wrap a string to specified length.
static unsigned int GetSubjectsNumber(const objects::CSeq_align_set &source_aln, unsigned int num)
Calculate number of subject sequnces in alignment limitted by num.
static CRange< TSeqPos > GetSeqAlignCoverageParams(const objects::CSeq_align_set &alnset, int *masterCoverage, bool *flip)
TDim GetNumRows(void) const
Definition: alnmap.hpp:517
TSeqPos GetAlnStop(TNumseg seg) const
Definition: alnmap.hpp:488
TSignedRange GetSeqAlnRange(TNumrow row) const
Definition: alnmap.hpp:691
CRef< CAlnChunkVec > GetAlnChunks(TNumrow row, const TSignedRange &range, TGetChunkFlags flags=fAlnSegsOnly) const
Definition: alnmap.cpp:1002
CBioseq_Handle –.
API for Remote Blast Services.
Definition: Dbtag.hpp:53
CFile –.
Definition: ncbifile.hpp:1605
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264