NCBI C++ ToolKit
rps.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25 
26 /*****************************************************************************
27 
28 File name: rps.cpp
29 
30 Author: Jason Papadopoulos
31 
32 Contents: Use RPS blast to find domain hits
33 
34 ******************************************************************************/
35 
36 #include <ncbi_pch.hpp>
43 #include <algo/cobalt/cobalt.hpp>
44 
50 
51 #include <serial/iterator.hpp>
52 
53 #include <algorithm>
54 
55 /// @file rps.cpp
56 /// Use RPS blast to find domain hits
57 
59 BEGIN_SCOPE(cobalt)
60 
61 USING_SCOPE(blast);
63 
64 /// Given an RPS blast database, load a list of block offsets
65 /// for each database sequence. The list is resident in a text
66 /// file, where each line is as follows
67 /// <pre>
68 /// [seq ID] [oid of block] [start block offset] [end block offset]
69 /// </pre>
70 /// Note that block offsets are zero-based
71 /// @param blockfile Name of file containing list of offsets [in]
72 /// @param blocklist the list of offsets read from file [out]
73 ///
74 void
75 CMultiAligner::x_LoadBlockBoundaries(string blockfile,
76  vector<SSegmentLoc>& blocklist)
77 {
78  CNcbiIfstream blockstream(blockfile.c_str());
79  if (blockstream.bad() || blockstream.fail())
80  NCBI_THROW(CBlastException, eInvalidArgument,
81  "Cannot open RPS blockfile");
82 
83  char buf[64];
85  int oid = 0;
86  int block_idx;
87  int start, end;
88 
89  blockstream >> buf;
90  blockstream >> block_idx;
91  blockstream >> start;
92  blockstream >> end;
93  blocklist.push_back(SSegmentLoc(oid, start, end));
94 
95  while (!blockstream.eof()) {
96  blockstream >> buf;
97  // This allows for new line at the end of block file
98  if (blockstream.eof()) {
99  break;
100  }
101  blockstream >> block_idx;
102  blockstream >> start;
103  blockstream >> end;
104 
105  if (block_idx == 0)
106  oid++;
107 
108  blocklist.push_back(SSegmentLoc(oid, start, end));
109  }
110 }
111 
112 
113 void
115  vector<SSegmentLoc>& blocklist,
116  CProfileData& profile_data)
117 {
118  // scale up the gap penalties used by the aligner, to match
119  // the scaling used by the RPS PSSMs
120 
121  /// @todo FIXME the scale factor should be chosen dynamically
122 
129  m_Aligner.SetEndSpaceFree(false, false, false, false);
130 
131  // for each RPS hit
132 
133  for (int i = 0; i < rps_hits.Size(); i++) {
134 
135  CHit *hit = rps_hits.GetHit(i);
137  int db_seq = hit->m_SeqIndex2;
138  int *db_seq_offsets = profile_data.GetSeqOffsets();
139  int **pssm = profile_data.GetPssm() + db_seq_offsets[db_seq];
140  int db_seq_length = db_seq_offsets[db_seq + 1] - db_seq_offsets[db_seq];
141  int last_fudge = 0;
142 
143  _ASSERT(!(hit->HasSubHits()));
144 
145  // ignore this alignment if its extent is less than
146  // 60% of the extent of query and DB sequence
147 
148  if ((hit->m_SeqRange1.GetLength() < 0.6 * query.GetLength()) &&
149  (hit->m_SeqRange2.GetLength() < 0.6 * db_seq_length)) {
150  rps_hits.SetKeepHit(i, false);
151  continue;
152  }
153 
154  SSegmentLoc target(db_seq, hit->m_SeqRange2.GetFrom(),
155  hit->m_SeqRange2.GetTo());
156 
157  // locate the first block in the subject sequence
158  // that contains a piece of the HSP
159 
160  vector<SSegmentLoc>::iterator
161  itr = lower_bound(blocklist.begin(), blocklist.end(),
162  target, compare_sseg_db_idx());
163 
164  _ASSERT(itr != blocklist.end() &&
165  target.seq_index == itr->seq_index);
166 
167  // walk up to the first block that is not
168  // in front of the alignment
169 
170  while (itr != blocklist.end() &&
171  itr->seq_index == target.seq_index &&
172  itr->GetTo() < target.GetFrom()) {
173  itr++;
174  }
175 
176  vector<SSegmentLoc>::iterator prev_itr(itr);
177  vector<SSegmentLoc>::iterator next_itr(itr);
178  if (itr != blocklist.begin()) {
179  prev_itr--;
180  }
181  next_itr++;
182 
183  // for each block that contains a portion of the
184  // original alignment
185 
186  while (itr != blocklist.end() && itr->seq_index == db_seq
187  && itr->GetFrom() < target.GetTo()) {
188 
189  const int kMaxFudge = 6;
190  TRange q_range, new_s_range;
191  TRange tback_range;
192 
193  // calculate the offsets into the subject sequence
194  // that correspond to the current block
195 
196  TRange s_range(itr->range.IntersectionWith(target.range));
197  _ASSERT(!s_range.Empty() && itr->range.Contains(s_range));
198 
199  int left_fudge, right_fudge;
200 
201  // calculate how much extra room on the query sequence
202  // to allow for realignment. The size of the 'fudge'
203  // to the left is the different between the length of
204  // the loop region to the left and the length of the
205  // previous fudge, up to a limit of kMaxFudge
206 
207  if (itr == blocklist.begin() ||
208  prev_itr == blocklist.begin() ||
209  prev_itr->seq_index != db_seq) {
210  left_fudge = 0;
211  }
212  else {
213  left_fudge = s_range.GetFrom() -
214  prev_itr->GetTo() - last_fudge - 1;
215  left_fudge = min(left_fudge, kMaxFudge);
216  }
217 
218  // The extra room on the right is half the length
219  // of the loop region to the right, up to the same limit
220 
221  if (itr == blocklist.end() ||
222  next_itr == blocklist.end() ||
223  next_itr->seq_index != db_seq) {
224  right_fudge = 0;
225  }
226  else {
227  right_fudge = (next_itr->GetFrom() - s_range.GetTo() - 1) / 2;
228  right_fudge = min(right_fudge, kMaxFudge);
229  }
230 
231  last_fudge = right_fudge;
232 
233  // compute the start and stop offsets into the
234  // query sequence that correspond to the subject range
235  // specified by the current block.
236 
237  hit->GetRangeFromSeq2(s_range, q_range, new_s_range, tback_range);
238 
239  // pre-advance the iterators
240 
241  if (prev_itr != itr) {
242  prev_itr++;
243  }
244  itr++;
245  if (next_itr != blocklist.end()) {
246  next_itr++;
247  }
248 
249  // Throw away alignments whose query range is too small
250 
251  if (q_range.GetLength() <= CHit::kMinHitSize)
252  continue;
253 
254  // or for which the difference between query and database
255  // regions is too large (i.e. query sequence has a big gap)
256 
257  if (s_range.GetLength() > 3 * q_range.GetLength() / 2) {
258  if (m_Options->GetVerbose()) {
259  printf("ignore aligning query %d %d-%d db %d block %d-%d\n",
260  hit->m_SeqIndex1, q_range.GetFrom(), q_range.GetTo(),
261  db_seq, s_range.GetFrom(), s_range.GetTo());
262  }
263  continue;
264  }
265 
266  q_range.SetFrom(max(hit->m_SeqRange1.GetFrom(),
267  q_range.GetFrom() - left_fudge));
268  q_range.SetTo(min(hit->m_SeqRange1.GetTo(),
269  q_range.GetTo() + right_fudge));
270 
271  // Now realign the block to the query sequence
272 
273  m_Aligner.SetSequences((const int **)(pssm + s_range.GetFrom()),
274  s_range.GetLength(),
275  (const char *)query.GetSequence() + q_range.GetFrom(),
276  q_range.GetLength());
277 
278  int score = m_Aligner.Run();
279  const CNWAligner::TTranscript tback(m_Aligner.GetTranscript(false));
280  int tback_size = tback.size();
281  CEditScript final_script;
282 
283  if ((tback[0] == CNWAligner::eTS_Delete &&
284  tback[tback_size-1] == CNWAligner::eTS_Insert) ||
285  (tback[0] == CNWAligner::eTS_Insert &&
286  tback[tback_size-1] == CNWAligner::eTS_Delete)) {
287 
288  // The query region falls outside the DB region.
289  // Throw away the alignment and reuse the original one.
290 
291  hit->GetRangeFromSeq2(s_range, q_range, s_range, tback_range);
292 
293  // throw away alignments that are too small
294 
295  if (q_range.GetLength() <= CHit::kMinHitSize ||
296  s_range.GetLength() <= CHit::kMinHitSize)
297  continue;
298  score = hit->GetEditScript().GetScore(
299  tback_range,
301  hit->m_SeqRange2.GetFrom()),
302  query, pssm,
304  final_script = hit->GetEditScript().MakeEditScript(tback_range);
305  }
306  else {
307 
308  // strip off leading and trailing gaps in the
309  // database sequence. Modify the alignment score
310  // accordingly
311 
312  int first_tback = 0;
313  int last_tback = tback_size - 1;
314  int q_start = q_range.GetFrom();
315  int q_stop = q_range.GetTo();
316  int s_start = s_range.GetFrom();
317  int s_stop = s_range.GetTo();
318 
319  for (int k = 0; k < tback_size &&
320  tback[k] != CNWAligner::eTS_Match; k++) {
321  first_tback++;
322  if (tback[k] == CNWAligner::eTS_Delete)
323  s_start++;
324  else if (tback[k] == CNWAligner::eTS_Insert)
325  q_start++;
326 
327  score -= m_Aligner.GetWs();
328  if (k == 0)
329  score -= m_Aligner.GetEndWg();
330  else if (tback[k] != tback[k-1])
331  score -= m_Aligner.GetWg();
332  }
333 
334  for (int k = tback_size - 1; k >= 0 &&
335  tback[k] != CNWAligner::eTS_Match; k--) {
336  last_tback--;
337  if (tback[k] == CNWAligner::eTS_Delete)
338  s_stop--;
339  else if (tback[k] == CNWAligner::eTS_Insert)
340  q_stop--;
341 
342  score -= m_Aligner.GetWs();
343  if (k == tback_size - 1)
344  score -= m_Aligner.GetEndWg();
345  else if (tback[k] != tback[k+1])
346  score -= m_Aligner.GetWg();
347  }
348 
349  // throw away alignments that are too small
350 
351  q_range.Set(q_start, q_stop);
352  s_range.Set(s_start, s_stop);
353  if (q_range.GetLength() <= CHit::kMinHitSize ||
354  s_range.GetLength() <= CHit::kMinHitSize)
355  continue;
356 
357  _ASSERT(tback[first_tback] == CNWAligner::eTS_Match);
358  _ASSERT(tback[last_tback] == CNWAligner::eTS_Match);
359 
360  final_script = CEditScript::MakeEditScript(tback,
361  TRange(first_tback, last_tback));
362  }
363 
364  // save the new block alignment if the rounded-down
365  // version of its score is positive
366 
367  if (score > kRpsScaleFactor / 2) {
368  hit->InsertSubHit(new CHit(hit->m_SeqIndex1,
369  hit->m_SeqIndex2,
370  q_range, s_range,
371  score, final_script));
372  }
373  }
374 
375  // finish processing hit i
376 
377  if (hit->HasSubHits()) {
378  hit->ResolveSubHitConflicts(query, pssm,
379  m_Aligner.GetWg(),
380  m_Aligner.GetWs());
381  hit->AddUpSubHits();
382  }
383  else {
384  rps_hits.SetKeepHit(i, false);
385  }
386 
387  // check for interrupt
390  "Alignment interrupted");
391  }
392  }
393 
394  // remove RPS hits that do not have block alignments,
395  // or were deleted for some other reason
396 
397  rps_hits.PurgeUnwantedHits();
398 
399  // restore the original gap penalties
400 
407 }
408 
409 
410 void
412  const vector<int>& indices,
413  CHitList& rps_hits)
414 {
415  _ASSERT(queries.size() == indices.size());
416 
417  int num_queries = queries.size();
418 
420 
421  // deliberately set the cutoff e-value too high, to
422  // account for alignments where the gapped score is
423  // very different from the ungapped score
424 
425  opts->SetEvalueThreshold(max(m_Options->GetRpsEvalue(), 10.0));
426  opts->SetFilterString("F");
428  (dynamic_cast<CBlastRPSOptionsHandle*>(opts.GetNonNullPointer()))
429  ->SetCompositionBasedStats(false);
430 
431  // run RPS blast
432 
433  CSearchDatabase search_database(m_Options->GetRpsDb(),
435  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(queries));
436 
437  CLocalBlast blaster(query_factory, opts, search_database);
438  CSearchResultSet results = *blaster.Run();
439 
440  // convert the results to the internal format used by
441  // the rest of CMultiAligner
442 
444 
445  // iterate over queries
446 
447  for (int i = 0; i < num_queries; i++) {
448 
449  // iterate over hitlists
450 
451  ITERATE(CSeq_align_set::Tdata, itr, results[i].GetSeqAlign()->Get()) {
452 
453  // iterate over hits
454 
455  const CSeq_align& s = **itr;
456  const CDense_seg& denseg = s.GetSegs().GetDenseg();
457  int align_score = 0;
458  double evalue = 0;
459 
460  // compute the score of the hit
461 
462  ITERATE(CSeq_align::TScore, score_itr, s.GetScore()) {
463  const CScore& curr_score = **score_itr;
464  if (curr_score.GetId().GetStr() == "score")
465  align_score = curr_score.GetValue().GetInt();
466  else if (curr_score.GetId().GetStr() == "e_value")
467  evalue = curr_score.GetValue().GetReal();
468  }
469 
470  // check if the hit is worth saving
471  if (evalue > m_Options->GetRpsEvalue())
472  continue;
473 
474  // locate the ID of the database sequence that
475  // produced the hit, and save the hit
476 
477  int db_oid;
478  seqdb.SeqidToOid(*denseg.GetIds()[1], db_oid);
479  rps_hits.AddToHitList(new CHit(indices[i], db_oid,
480  align_score, denseg));
481  }
482 
483  // check for interrupt
486  "Alignment interrupted");
487  }
488  }
489 
490 
491  //-------------------------------------------------------
492  if (m_Options->GetVerbose()) {
493  printf("RPS hits:\n");
494  for (int i = 0; i < rps_hits.Size(); i++) {
495  CHit *hit = rps_hits.GetHit(i);
496  printf("query %d %4d - %4d db %d %4d - %4d score %d\n",
497  hit->m_SeqIndex1,
498  hit->m_SeqRange1.GetFrom(),
499  hit->m_SeqRange1.GetTo(),
500  hit->m_SeqIndex2,
501  hit->m_SeqRange2.GetFrom(),
502  hit->m_SeqRange2.GetTo(),
503  hit->m_Score);
504  }
505  printf("\n\n");
506  }
507  //-------------------------------------------------------
508 }
509 
510 
511 void
513  CProfileData& profile_data)
514 {
515  if (rps_hits.Empty()) {
516  return;
517  }
518 
519  rps_hits.SortByScore();
520 
521  // for each hit
522 
523  for (int i = 0; i < rps_hits.Size(); i++) {
524  CHit *hit = rps_hits.GetHit(i);
525 
526  _ASSERT(hit->HasSubHits());
527 
528  // skip hit i if it overlaps on the query sequence
529  // with a higher-scoring HSP.
530 
531  int j;
532  for (j = 0; j < i; j++) {
533  CHit *better_hit = rps_hits.GetHit(j);
534 
535  if (better_hit->m_SeqIndex1 != hit->m_SeqIndex1)
536  continue;
537 
538  if (rps_hits.GetKeepHit(j) == true &&
539  better_hit->m_SeqRange1.IntersectingWith(hit->m_SeqRange1))
540  break;
541  }
542  if (j < i) {
543  continue;
544  }
545 
546  // The hit does not conflict; use the traceback of each block
547  // to locate each position where a substitution occurs,
548  // and assign the appropriate column of residue frequencies
549  // at that position
550 
552  CSequence::TFreqMatrix& matrix = query.GetFreqs();
553  _ASSERT(hit->m_SeqIndex1 < (int)m_RPSLocs.size());
554  m_RPSLocs[hit->m_SeqIndex1].clear();
555 
556  Int4** ref_freqs = profile_data.GetResFreqs() +
557  (profile_data.GetSeqOffsets())[hit->m_SeqIndex2];
558 
559  double domain_res_freq_boost = m_Options->GetDomainResFreqBoost();
560 
561  NON_CONST_ITERATE(vector<CHit *>, itr, hit->GetSubHit()) {
562  CHit *subhit = *itr;
563  vector<TOffsetPair> sub_list(
565  TOffsetPair(subhit->m_SeqRange1.GetFrom(),
566  subhit->m_SeqRange2.GetFrom()) ));
567 
568  for (j = 0; j < (int)sub_list.size(); j += 2) {
569  TOffsetPair& start_pair(sub_list[j]);
570  TOffsetPair& stop_pair(sub_list[j+1]);
571  int q = start_pair.first;
572  int s = start_pair.second;
573 
574  _ASSERT(stop_pair.second - stop_pair.first ==
575  start_pair.second - start_pair.first);
576  _ASSERT(stop_pair.first-1 < query.GetLength());
577 
578  for (int k = 0; k < stop_pair.first - start_pair.first; k++) {
579  for (int m = 0; m < kAlphabetSize; m++) {
580  matrix(q+k, m) =
581  (1 - domain_res_freq_boost) *
582  ((double)ref_freqs[s+k][m] / FREQ_RATIO_SCALE);
583 
584  }
585  matrix(q+k, query.GetLetter(q+k)) += domain_res_freq_boost;
586  }
587  // mark range as RPS-identified conserved domain
588  m_RPSLocs[hit->m_SeqIndex1].push_back(TRange(start_pair.first,
589  stop_pair.first));
590  }
591  }
592 
593  // check for interrupt
596  "Alignment interrupted");
597  }
598  }
599 }
600 
601 
602 void
604 {
605  // Assign background residue frequencies to otherwise
606  // unassigned columns. The actual residue at a given
607  // position is upweighted by a specified amount, and
608  // all other frequencies are downweighted
609 
611  Blast_ResFreq *std_freqs = Blast_ResFreqNew(sbp);
612  Blast_ResFreqStdComp(sbp, std_freqs);
613  double local_res_freq_boost = m_Options->GetLocalResFreqBoost();
614 
615  for (size_t i = 0; i < m_QueryData.size(); i++) {
617  CSequence::TFreqMatrix& matrix = query.GetFreqs();
618 
619  for (int j = 0; j < query.GetLength(); j++) {
620  for (int k = 0; k < kAlphabetSize; k++) {
621  matrix(j, k) = (1 - local_res_freq_boost) *
622  std_freqs->prob[k];
623  }
624  matrix(j, query.GetLetter(j)) += local_res_freq_boost;
625  }
626 
627  // check for interrupt
630  "Alignment interrupted");
631  }
632  }
633 
635 
636  for (size_t i = 0; i < m_AllQueryData.size(); i++) {
638  CSequence::TFreqMatrix& matrix = query.GetFreqs();
639  for (int j = 0; j < query.GetLength(); j++) {
640  for (int k = 0; k < kAlphabetSize; k++) {
641  matrix(j, k) = (1 - local_res_freq_boost) *
642  std_freqs->prob[k];
643  }
644  matrix(j, query.GetLetter(j)) += local_res_freq_boost;
645  }
646  }
647 
649  }
650 
651  Blast_ResFreqFree(std_freqs);
652  BlastScoreBlkFree(sbp);
653 }
654 
655 
656 bool compare_seqids(const pair<const CSeq_id*, int>& a,
657  const pair<const CSeq_id*, int>& b)
658 {
659  _ASSERT(a.first && b.first);
660  return a.first->CompareOrdered(*b.first) > 0;
661 }
662 
663 void
665  const vector<int>& indices,
666  const CBlast4_archive& archive)
667 {
668  // This function sets pre-computed alignments with of queries
669  // with conserved domains. Note that the results need not include all
670  // cobalt queries and not all domain queries need to be cobalt sequences
671 
672  _ASSERT(!pre_queries.empty());
673  _ASSERT(pre_queries.size() == indices.size());
674 
675  // initialize all queries as not searched for conserved domains
676  m_IsDomainSearched.resize(m_tQueries.size(), false);
678 
679  // create a sorted list query seq_ids
680  vector< pair<const CSeq_id*, int> > queries;
681  queries.reserve(pre_queries.size());
682  for (size_t i=0;i < pre_queries.size();i++) {
683  _ASSERT(pre_queries[i].seqloc->GetId());
684  queries.push_back(make_pair(pre_queries[i].seqloc->GetId(), indices[i]));
685  }
686  sort(queries.begin(), queries.end(), compare_seqids);
687 
688  // mark queries for which domain search was done,
689  // we use query list here in case the search retruned no results
690  const CBlast4_queries& b4_queries
691  = archive.GetRequest().GetBody().GetQueue_search().GetQueries();
692 
693  if (!b4_queries.IsSeq_loc_list() && !b4_queries.IsBioseq_set()) {
694  NCBI_THROW(CMultiAlignerException, eInvalidInput, "Unsupported BLAST"
695  " 4 archive format");
696  }
697 
698  // if domain queries are seq_locs
699  if (b4_queries.IsSeq_loc_list()) {
700  ITERATE (list< CRef<CSeq_loc> >, it, b4_queries.GetSeq_loc_list()) {
701 
702  // iterate over domain queries
703 
704  // search for the query in the list of sequence to align
705  pair<const CSeq_id*, int> p((*it)->GetId(), -1);
706  vector< pair<const CSeq_id*, int> >::iterator id_itr
707  = lower_bound(queries.begin(), queries.end(), p,
709 
710  // if query was found, then mark it as searched
711  if (id_itr != queries.end()
712  && id_itr->first->CompareOrdered(*p.first) == 0) {
713  m_IsDomainSearched[id_itr->second] = true;
714  }
715  }
716  }
717 
718  // if domain queries are bioseqs
719  if (b4_queries.IsBioseq_set()) {
721  eDetectLoops));
722  // iterate over domain queries
723  for (; itr; ++itr) {
724 
725  // search for the query in the list of sequences to align
726  pair<const CSeq_id*, int> p(itr->GetFirstId(), -1);
727  vector< pair<const CSeq_id*, int> >::iterator id_itr
728  = lower_bound(queries.begin(), queries.end(), p,
730 
731  // if query was found, then mark it as searched
732  if (id_itr != queries.end()
733  && id_itr->first->CompareOrdered(*p.first) == 0) {
734  m_IsDomainSearched[id_itr->second] = true;
735  }
736  }
737  }
738  //-------------------------------------------------------
739  if (m_Options->GetVerbose()) {
740  printf("Pre-computed RPS queries:\n");
741  for (size_t i=0;i < pre_queries.size();i++) {
742  _ASSERT(indices[i] < (int)m_IsDomainSearched.size());
743  if (m_IsDomainSearched[indices[i]]) {
744  printf("query: %d\n", indices[i]);
745  }
746  }
747  printf("\n");
748  }
749  //-------------------------------------------------------
750 
751  // check if at least one domain query matched cobalt query
752  bool is_presearched = false;
753  ITERATE (vector<bool>, it, m_IsDomainSearched) {
754  if (*it) {
755  is_presearched = true;
756  }
757  }
758  // if not, there is no need to analyze domain hits
759  if (!is_presearched) {
760  // empty array indicates that pre-computed domain hits are not set
761  m_IsDomainSearched.clear();
762  return;
763  }
764 
766  is_presearched = false;
767 
768  // get domain hits
769  const CSeq_align_set& aligns = archive.GetResults().GetAlignments();
770  int query_idx = -1;
771  const CSeq_id* last_query_id = NULL;
772  ITERATE (CSeq_align_set::Tdata, it, aligns.Get()) {
773 
774  // iterate over hits
775 
776  const CSeq_align& s = **it;
777  const CDense_seg& denseg = s.GetSegs().GetDenseg();
778  int align_score = 0;
779  double evalue = 0;
780 
781  // find query index in m_tQueries
782  const CSeq_id& query_id = s.GetSeq_id(0);
783 
784  // search for query in sequences to align only if the current hit
785  // query is different from the previous one
786  if (!last_query_id || query_id.CompareOrdered(*last_query_id) != 0) {
787 
788  // find query seq id
789  pair<const CSeq_id*, int> p(&query_id, -1);
790  vector< pair<const CSeq_id*, int> >::iterator id_itr
791  = lower_bound(queries.begin(), queries.end(), p,
793 
794  // if the hit query is not to be aligned, then skip processing
795  // this Seq_align
796  if (id_itr == queries.end()
797  || id_itr->first->CompareOrdered(*p.first) != 0) {
798 
799  query_idx = -1;
800  continue;
801  }
802  query_idx = id_itr->second;
803  last_query_id = id_itr->first;
804  }
805  if (query_idx < 0) {
806  continue;
807  }
808 
809  // compute the score of the hit
810 
811  ITERATE(CSeq_align::TScore, score_itr, s.GetScore()) {
812  const CScore& curr_score = **score_itr;
813  if (curr_score.GetId().GetStr() == "score")
814  align_score = curr_score.GetValue().GetInt();
815  else if (curr_score.GetId().GetStr() == "e_value")
816  evalue = curr_score.GetValue().GetReal();
817  }
818 
819  // check if the hit is worth saving
820  if (evalue > m_Options->GetRpsEvalue())
821  continue;
822 
823  // locate the ID of the database sequence that
824  // produced the hit, and save the hit
825 
826  int db_oid;
827  seqdb.SeqidToOid(*denseg.GetIds()[1], db_oid);
828  if (db_oid < 0) {
829  NCBI_THROW(CMultiAlignerException, eInvalidInput, "The pre-computed"
830  " subject domain " + denseg.GetIds()[1]->AsFastaString()
831  + " does not exist in the domain database "
832  + m_Options->GetRpsDb());
833  }
834  m_DomainHits.AddToHitList(new CHit(query_idx, db_oid,
835  align_score, denseg));
836 
837  is_presearched = true;
838  }
839 
840  if (!is_presearched) {
841  m_IsDomainSearched.clear();
842  }
843 
844  //-------------------------------------------------------
845  if (m_Options->GetVerbose()) {
846  printf("Pre-computed RPS hits:\n");
847  for (int i = 0; i < m_DomainHits.Size(); i++) {
848  CHit *hit = m_DomainHits.GetHit(i);
849  printf("query %d %4d - %4d db %d %4d - %4d score %d\n",
850  hit->m_SeqIndex1,
851  hit->m_SeqRange1.GetFrom(),
852  hit->m_SeqRange1.GetTo(),
853  hit->m_SeqIndex2,
854  hit->m_SeqRange2.GetFrom(),
855  hit->m_SeqRange2.GetTo(),
856  hit->m_Score);
857  }
858  printf("\n\n");
859  }
860  //-------------------------------------------------------
861 
862 
863 }
864 
865 void
867  const vector<int>& indices)
868 {
869  string rps_db = m_Options->GetRpsDb();
870  string blockfile = rps_db + ".blocks";
871  string freqfile = rps_db + ".freq";
872 
873  if (rps_db.empty()) {
874  return;
875  }
876 
877  // set pre-computed domain hits if available
878  if (m_Options->CanGetDomainHits()) {
879  x_SetDomainHits(queries, indices, *m_Options->GetDomainHits());
880  }
881 
883 
884  // empty previously found hits
886 
887 
888  // if there are no pre-computed domain hits search for domain in all
889  // queries
890  if (m_IsDomainSearched.empty()) {
892 
893  // run RPS blast
894 
895  x_FindRPSHits(queries, indices, m_DomainHits);
896  }
897  else {
898  // otherwise, search only queries that were not searched for
899  // pre-computed results
900 
901  _ASSERT(m_IsDomainSearched.size() == m_tQueries.size());
902 
903  // find if there is at least one query that was not pre-searched
904  bool do_search = false;
905  for (size_t i=0;i < indices.size();i++) {
906  _ASSERT(indices[i] < (int)m_IsDomainSearched.size());
907  if (!m_IsDomainSearched[indices[i]]) {
908  do_search = true;
909  break;
910  }
911  }
912 
913  // search for domains
914  if (do_search) {
915  TSeqLocVector queries_not_searched;
916  vector<int> indices_not_searched;
917  for (size_t i=0;i < queries.size();i++) {
918  if (!m_IsDomainSearched[indices[i]]) {
919  queries_not_searched.push_back(queries[i]);
920  indices_not_searched.push_back(indices[i]);
921  }
922  }
923  // run RPS blast
924  x_FindRPSHits(queries_not_searched, indices_not_searched,
925  m_DomainHits);
926  }
927  }
928 
929  // check for interrupt
932  "Alignment interrupted");
933  }
934 
935  vector<SSegmentLoc> blocklist;
936  x_LoadBlockBoundaries(blockfile, blocklist);
937 
938  // Load the RPS PSSMs and perform block realignment
939 
940  CProfileData profile_data;
941  profile_data.Load(CProfileData::eGetPssm, rps_db);
942  x_RealignBlocks(m_DomainHits, blocklist, profile_data);
943  blocklist.clear();
944  profile_data.Clear();
945 
946  //-------------------------------------------------------
947  if (m_Options->GetVerbose()) {
948  printf("\n\nBlock alignments with conflicts resolved:\n");
949  for (int i = 0; i < m_DomainHits.Size(); i++) {
950  CHit *hit = m_DomainHits.GetHit(i);
951  NON_CONST_ITERATE(vector<CHit *>, itr, hit->GetSubHit()) {
952  CHit *subhit = *itr;
953  printf("query %d %4d - %4d db %d %4d - %4d score %d ",
954  subhit->m_SeqIndex1,
955  subhit->m_SeqRange1.GetFrom(),
956  subhit->m_SeqRange1.GetTo(),
957  subhit->m_SeqIndex2,
958  subhit->m_SeqRange2.GetFrom(),
959  subhit->m_SeqRange2.GetTo(),
960  subhit->m_Score);
961 
962  printf("\n");
963  }
964  }
965  printf("\n\n");
966  }
967  //-------------------------------------------------------
968 
969  if (m_DomainHits.Empty())
970  return;
971 
973 
974  // propagate the residue frequencies of the best
975  // RPS hits onto the query sequences
976 
977  m_RPSLocs.resize(m_tQueries.size());
978  profile_data.Load(CProfileData::eGetResFreqs, rps_db, freqfile);
979  x_AssignRPSResFreqs(m_DomainHits, profile_data);
980  profile_data.Clear();
981 
982  // Connect together RPS hits to the same region of the
983  // same database sequence
984 
986 
987  // Remove the scaling on the scores
988 
989  const int kRpsScale = CMultiAligner::kRpsScaleFactor;
990  for (int i = 0; i < m_CombinedHits.Size(); i++) {
991  CHit *hit = m_CombinedHits.GetHit(i);
992  hit->m_Score = (hit->m_Score + kRpsScale/2) / kRpsScale;
993  NON_CONST_ITERATE(CHit::TSubHit, subitr, hit->GetSubHit()) {
994  CHit *subhit = *subitr;
995  subhit->m_Score = (subhit->m_Score + kRpsScale/2) / kRpsScale;
996  }
997  }
998 
999  //-------------------------------------------------------
1000  if (m_Options->GetVerbose()) {
1001  printf("\n\nMatched block alignments:\n");
1002  for (int i = 0; i < m_CombinedHits.Size(); i++) {
1003  CHit *hit = m_CombinedHits.GetHit(i);
1004  NON_CONST_ITERATE(vector<CHit *>, itr, hit->GetSubHit()) {
1005  CHit *subhit = *itr;
1006  printf("query %d %4d - %4d query %d %4d - %4d score %d\n",
1007  subhit->m_SeqIndex1,
1008  subhit->m_SeqRange1.GetFrom(),
1009  subhit->m_SeqRange1.GetTo(),
1010  subhit->m_SeqIndex2,
1011  subhit->m_SeqRange2.GetFrom(),
1012  subhit->m_SeqRange2.GetTo(),
1013  subhit->m_Score);
1014  }
1015  }
1016  printf("\n\n");
1017  }
1018  //-------------------------------------------------------
1019 }
1020 
1021 END_SCOPE(cobalt)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static const int kAlphabetSize
The aligner internally works only with the ncbistdaa alphabet.
Definition: base.hpp:119
CLocalRange< TOffset > TRange
define for the fundamental building block of sequence ranges
Definition: base.hpp:115
pair< TOffset, TOffset > TOffsetPair
Basic type specifying a range on a sequence.
Definition: base.hpp:52
RPS BLAST structure definitions.
#define FREQ_RATIO_SCALE
header for RPS blast frequency ratios ('.freq') file
Definition: blast_rps.h:83
Declares the CBlastRPSOptionsHandle class.
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
Definition: blast_stat.c:965
Blast_ResFreq * Blast_ResFreqFree(Blast_ResFreq *rfp)
Deallocates Blast_ResFreq and prob0 element.
Definition: blast_stat.c:1689
Int2 Blast_ResFreqStdComp(const BlastScoreBlk *sbp, Blast_ResFreq *rfp)
Calculates residues frequencies given a standard distribution.
Definition: blast_stat.c:1887
Blast_ResFreq * Blast_ResFreqNew(const BlastScoreBlk *sbp)
Allocates a new Blast_ResFreq structure and fills in the prob element based upon the contents of sbp.
Definition: blast_stat.c:1708
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
Definition: blast_stat.c:884
@ eRPSBlast
protein-pssm (reverse-position-specific BLAST)
Definition: blast_types.hpp:63
CBlast4_archive –.
Defines BLAST error codes (user errors included)
Handle to the rpsblast options to the BLAST algorithm.
Interface for the traceback from blast hits.
Definition: traceback.hpp:55
int GetScore(TRange tback_range, TOffsetPair start_offsets, CSequence &seq1, int **seq2_pssm, int gap_open, int gap_extend)
Compute the score associated with (a portion of) an alignment Assumes that seq1 is a sequence and tha...
Definition: traceback.cpp:343
CEditScript MakeEditScript(TRange tback_range)
Return an edit script corresponding to a subset of the complete traceback available.
Definition: traceback.cpp:153
vector< TOffsetPair > ListMatchRegions(TOffsetPair start_offsets)
Compile a list of regions in the current edit script that contain substitutions.
Definition: traceback.cpp:444
An ordered collection of CHit objects.
Definition: hitlist.hpp:50
int Size() const
Retrieve number of hits in list.
Definition: hitlist.hpp:75
void SetKeepHit(int index, bool keep)
Set whether a hit in the hitlist will be scheduled for deletion.
Definition: hitlist.hpp:126
void PurgeAllHits()
Delete all hits unconditionally.
Definition: hitlist.hpp:148
bool Empty()
Determine whether a list contains no hits.
Definition: hitlist.hpp:79
CHit * GetHit(int index)
Retrieve a hit from the hitlist.
Definition: hitlist.hpp:93
void PurgeUnwantedHits()
Delete all hits scheduled to be deleted.
Definition: hitlist.hpp:134
bool GetKeepHit(int index)
Determine whether a hit in the hitlist has been scheduled for deletion.
Definition: hitlist.hpp:115
void SortByScore()
Sort the hits in the hitlist in order of decreasing score.
Definition: hitlist.cpp:349
void MatchOverlappingSubHits(CHitList &matched_list)
For each pair of hits with the same sequence2, produce a list of hits between sequence1 of the first ...
Definition: hitlist.cpp:221
void AddToHitList(CHit *hit)
Append a hit to the hitlist.
Definition: hitlist.hpp:84
A generalized representation of a pairwise alignment.
Definition: hit.hpp:86
TSubHit & GetSubHit()
Retrieve a list of subhits.
Definition: hit.hpp:185
void ResolveSubHitConflicts(CSequence &seq1, int **seq2_pssm, CNWAligner::TScore gap_open, CNWAligner::TScore gap_extend)
If pairs of subhits have overlapping ranges, either delete one or change one so that the overlap is a...
Definition: hit.cpp:285
void InsertSubHit(CHit *hit)
Add a to a CHit's list of subhits.
Definition: hit.hpp:180
void GetRangeFromSeq2(TRange seq_range2, TRange &seq_range1, TRange &new_seq_range2, TRange &traceback_range)
Retrieve the seq1 range corresponding to a specified seq2 range.
Definition: hit.cpp:109
void AddUpSubHits()
Sum the score of all subhits, and make the sequence ranges the union of the ranges of all subhits.
Definition: hit.cpp:87
int m_Score
Score of alignment.
Definition: hit.hpp:104
CEditScript & GetEditScript()
Retrieve the traceback associated with a CHit.
Definition: hit.hpp:190
int m_SeqIndex1
Numerical identifier for first sequence in alignment.
Definition: hit.hpp:97
int m_SeqIndex2
Numerical identifier for second sequence in alignment.
Definition: hit.hpp:101
TRange m_SeqRange1
The range of offsets on the first sequence.
Definition: hit.hpp:107
static const int kMinHitSize
Not always used, but useful to avoid extremely small hits.
Definition: hit.hpp:90
TRange m_SeqRange2
The range of offsets on the second sequence.
Definition: hit.hpp:110
bool HasSubHits()
Query if a CHit has a hierarchy of subhits available.
Definition: hit.hpp:195
vector< CHit * > TSubHit
Hits can be grouped hierarchically.
Definition: hit.hpp:93
Class to perform a BLAST search on local BLAST databases Note that PHI-BLAST can be run using this cl...
Definition: local_blast.hpp:62
TScore GetEndGapExtendPenalty(void) const
Get gap extension penalty for end gaps in pairwise global alignment of profiles.
Definition: options.hpp:658
string GetRpsDb(void) const
Get RPS Blast data base name.
Definition: options.hpp:362
double GetLocalResFreqBoost(void) const
Get frequency boost for a letter that appears in query sequence in given position.
Definition: options.hpp:592
int GetDomainHitlistSize(void) const
Get hitlist size (per sequence) for domain searches.
Definition: options.hpp:501
CConstRef< objects::CBlast4_archive > GetDomainHits(void) const
Get pre-computed domain hits.
Definition: options.hpp:708
TScore GetGapExtendPenalty(void) const
Get gap extension penlaty for middle gaps in pairwise global alignment of profiles.
Definition: options.hpp:632
bool CanGetDomainHits(void) const
Are pre-computed domain hits set.
Definition: options.hpp:714
TScore GetGapOpenPenalty(void) const
Get gap opening penalty for middle gaps in pairwise global alignment of profiles.
Definition: options.hpp:619
TScore GetEndGapOpenPenalty(void) const
Get gap opening penalty for end gaps in pairwise global alignment of profiles.
Definition: options.hpp:645
double GetRpsEvalue(void) const
Get e-value threshold for accepting RPS Blast hits.
Definition: options.hpp:490
@ eToPrototype
All cluster elements are aligner to cluster prototype.
Definition: options.hpp:266
bool GetVerbose(void) const
Get verbose mode.
Definition: options.hpp:691
double GetDomainResFreqBoost(void) const
Get boost for residue frequencies in conserved domains from RPS data base.
Definition: options.hpp:514
Simultaneously align multiple protein sequences.
Definition: cobalt.hpp:69
vector< CSequence > m_AllQueryData
Definition: cobalt.hpp:728
CMultiAlignerOptions::EInClustAlnMethod m_ClustAlnMethod
Definition: cobalt.hpp:744
SProgress m_ProgressMonitor
Definition: cobalt.hpp:737
vector< CRef< objects::CSeq_loc > > m_tQueries
Definition: cobalt.hpp:688
void x_MakeClusterResidueFrequencies()
Compute profile residue frequencies for clusters.
Definition: cobalt.cpp:1377
CHitList m_CombinedHits
Definition: cobalt.hpp:718
void x_AssignRPSResFreqs(CHitList &rps_hits, CProfileData &profile_data)
Definition: rps.cpp:512
@ eInterrupt
Alignment interruped through callback function.
Definition: cobalt.hpp:83
vector< CSequence > m_QueryData
Definition: cobalt.hpp:691
vector< bool > m_IsDomainSearched
Marks sequences with pre-computed domain hits.
Definition: cobalt.hpp:724
void x_AssignDefaultResFreqs()
Definition: rps.cpp:603
vector< vector< TRange > > m_RPSLocs
Definition: cobalt.hpp:731
void x_LoadBlockBoundaries(string blockfile, vector< SSegmentLoc > &blocklist)
Given an RPS blast database, load a list of block offsets for each database sequence.
Definition: rps.cpp:75
void x_FindRPSHits(blast::TSeqLocVector &queries, const vector< int > &indices, CHitList &rps_hits)
Definition: rps.cpp:411
CHitList m_DomainHits
Definition: cobalt.hpp:716
CConstRef< CMultiAlignerOptions > m_Options
Definition: cobalt.hpp:686
void x_SetDomainHits(const blast::TSeqLocVector &queruies, const vector< int > &indices, const objects::CBlast4_archive &archive)
Set pre-computed domain hits using BLAST archive format.
Definition: rps.cpp:664
void x_FindDomainHits(blast::TSeqLocVector &queries, const vector< int > &indices)
Run RPS blast on seletced input sequences and postprocess the results.
Definition: rps.cpp:866
void x_RealignBlocks(CHitList &rps_hits, vector< SSegmentLoc > &blocklist, CProfileData &profile_data)
Definition: rps.cpp:114
static const int kRpsScaleFactor
Definition: cobalt.hpp:500
CPSSMAligner m_Aligner
Definition: cobalt.hpp:710
@ eDomainHitsSearch
Definition: cobalt.hpp:93
FInterruptFn m_Interrupt
Definition: cobalt.hpp:736
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Represent databases of PSSM data and residue frequencies.
Definition: resfreq.hpp:50
Int4 ** GetResFreqs() const
Assuming the database is a list of columns of profiles, frequencies, retrieve a list of all of the pr...
Definition: resfreq.hpp:89
@ eGetResFreqs
Retrieve residue frequencies.
Definition: resfreq.hpp:54
@ eGetPssm
Retrieve PSSMs.
Definition: resfreq.hpp:55
void Clear()
Free previously loaded PSSM or profile data.
Definition: resfreq.cpp:114
void Load(EMapChoice choice, string dbname, string resfreq_file="")
Load information from a given database.
Definition: resfreq.cpp:48
Int4 * GetSeqOffsets() const
Retrieve a list of offsets where database sequences begin.
Definition: resfreq.hpp:69
Int4 ** GetPssm() const
Assuming the database is a list of PSSM columns, retrieve a list of all of the PSSMs in the database ...
Definition: resfreq.hpp:79
Definition: Score.hpp:57
Blast Search Subject.
Search Results for All Queries.
CSeqDB.
Definition: seqdb.hpp:161
@ eProtein
Definition: seqdb.hpp:174
bool SeqidToOid(const CSeq_id &seqid, int &oid) const
Translate a Seq-id to any matching OID.
Definition: seqdb.cpp:903
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
Class for representing protein sequences.
Definition: seq.hpp:54
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
Interface for CMultiAligner.
static char tmp[3200]
Definition: utf8.c:42
void SetStartWg(TScore value)
TTranscript GetTranscript(bool reversed=true) const
Definition: nw_aligner.cpp:909
void SetEndWs(TScore value)
virtual CNWAligner::TScore Run(void)
TScore GetWs(void) const
Definition: nw_aligner.hpp:167
TScore GetWg(void) const
Definition: nw_aligner.hpp:166
void SetEndWg(TScore value)
TScore GetEndWg() const
vector< ETranscriptSymbol > TTranscript
Definition: nw_aligner.hpp:199
void SetWs(TScore value)
void SetSequences(const char *seq1, size_t len1, const char *seq2, size_t len2, bool verify=true)
void SetWg(TScore value)
void SetEndSpaceFree(bool Left1, bool Right1, bool Left2, bool Right2)
Definition: nw_aligner.cpp:192
void SetStartWs(TScore value)
void SetEvalueThreshold(double eval)
Sets EvalueThreshold.
CRef< CSearchResultSet > Run()
Executes the search.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
#define BLASTAA_SEQ_CODE
== Seq_code_ncbistdaa
void SetHitlistSize(int s)
Sets HitlistSize.
void SetFilterString(const char *f, bool clear=true)
Sets FilterString.
@ eBlastDbIsProtein
protein
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
int CompareOrdered(const CSeq_id &sid2) const
Definition: Seq_id.cpp:486
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:1012
@ eDetectLoops
Definition: iterator.hpp:998
TObjectType * GetNonNullPointer(void)
Get pointer value and throw a null pointer exception if pointer is null.
Definition: ncbiobj.hpp:968
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
position_type GetLength(void) const
Definition: range.hpp:158
bool IntersectingWith(const TThisType &r) const
Definition: range.hpp:331
TThisType & Set(position_type from, position_type to)
Definition: range.hpp:188
bool Empty(void) const
Definition: range.hpp:148
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
bool IsSeq_loc_list(void) const
Check if variant Seq_loc_list is selected.
const TRequest & GetRequest(void) const
Get the Request member data.
const TQueue_search & GetQueue_search(void) const
Get the variant data.
const TBioseq_set & GetBioseq_set(void) const
Get the variant data.
bool IsBioseq_set(void) const
Check if variant Bioseq_set is selected.
const TSeq_loc_list & GetSeq_loc_list(void) const
Get the variant data.
const TQueries & GetQueries(void) const
Get the Queries member data.
const TAlignments & GetAlignments(void) const
Get the Alignments member data.
const TResults & GetResults(void) const
Get the Results member data.
const TBody & GetBody(void) const
Get the Body member data.
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
vector< CRef< CScore > > TScore
Definition: Seq_align_.hpp:398
TInt GetInt(void) const
Get the variant data.
Definition: Score_.hpp:411
const TValue & GetValue(void) const
Get the Value member data.
Definition: Score_.hpp:465
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Dense_seg_.hpp:505
list< CRef< CSeq_align > > Tdata
const TScore & GetScore(void) const
Get the Score member data.
Definition: Seq_align_.hpp:896
TReal GetReal(void) const
Get the variant data.
Definition: Score_.hpp:384
const TId & GetId(void) const
Get the Id member data.
Definition: Score_.hpp:444
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
char * buf
int i
Main class to perform a BLAST search on the local machine.
const TYPE & Get(const CNamedParameterList *param)
constexpr auto sort(_Init &&init)
unsigned int a
Definition: ncbi_localip.c:102
T max(T x_, T y_)
T min(T x_, T y_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
USING_SCOPE(blast)
bool compare_seqids(const pair< const CSeq_id *, int > &a, const pair< const CSeq_id *, int > &b)
Definition: rps.cpp:656
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Structure used for scoring calculations.
Definition: blast_stat.h:177
Stores the letter frequency of a sequence or database.
Definition: blast_stat.h:273
double * prob
letter probs, (possible) non-zero offset.
Definition: blast_stat.h:275
EAlignmentStage stage
Definition: cobalt.hpp:103
TOffset GetFrom() const
Definition: cobalt.hpp:494
TOffset GetTo() const
Definition: cobalt.hpp:495
static string query
#define _ASSERT
Modified on Tue May 21 10:59:54 2024 by modify_doxy.py rev. 669887