NCBI C++ ToolKit
phiblast_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: phiblast_unit_test.cpp 100942 2023-10-03 17:36:50Z ucko $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Ilya Dondoshansky
27 *
28 * File Description:
29 * Unit test module to test the PHI BLAST functions.
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 
42 #include <blast_setup.hpp>
43 
45 
46 using namespace std;
48 USING_SCOPE(blast);
49 
51 
52 public:
57 
58  void x_SetupSequenceBlk(const string& seq, BLAST_SequenceBlk** seq_blk) {
59  BlastSeqBlkNew(seq_blk);
60  Uint1* buffer = (Uint1*) malloc(seq.size() + 2);
61  buffer[0] = buffer[seq.size()+1] = 0;
62  memcpy(buffer+1, seq.c_str(), seq.size());
63  // Convert to ncbistdaa encoding
64  for (unsigned int index = 1; index <= seq.size(); ++index)
65  buffer[index] = AMINOACID_TO_NCBISTDAA[buffer[index]];
66 
67  BlastSeqBlkSetSequence(*seq_blk, buffer, seq.size());
68  }
69 
70  /// After the initial set-up is done, finds pattern occurrences in query and
71  /// fills the pattern information in the BlastQueryInfo structure.
73  const string kQuerySeq("GPLRQIFVEFLERSCTAEFSGFLLYKELGRRLKKTNPVVAEIFSLMSR"
74  "DEARHAGFLNKGLSDFNLALDLGFLTKARKYTFFKPKFIFYATYLSEK"
75  "IGYWRYITIFRHLKANPEYQVYPIFKYFENWCQDENRHGDFFSALL");
76  SPHIPatternSearchBlk* pattern_blk = (SPHIPatternSearchBlk*) m_Lookup->lut;
77  CBLAST_SequenceBlk query_blk;
78  x_SetupSequenceBlk(kQuerySeq, &query_blk);
80  CBlast_Message blast_msg;
81  BlastSeqLocNew(&location, 0, kQuerySeq.size()-1);
82  Blast_SetPHIPatternInfo(m_Program, pattern_blk, query_blk,
83  location, m_QueryInfo, &blast_msg);
85  BOOST_REQUIRE(location == NULL);
86  }
87 
89  BOOST_REQUIRE_EQUAL(8, gap_align->score);
90  BOOST_REQUIRE_EQUAL(94, gap_align->query_start);
91  BOOST_REQUIRE_EQUAL(142, gap_align->query_stop);
92  BOOST_REQUIRE_EQUAL(8, gap_align->subject_start);
93  BOOST_REQUIRE_EQUAL(61, gap_align->subject_stop);
94  /* Check several values in the edit script. */
95  BOOST_REQUIRE_EQUAL(3, gap_align->edit_script->num[0]);
96  BOOST_REQUIRE_EQUAL(5, gap_align->edit_script->num[1]);
97  GapEditScript* esp = gap_align->edit_script;
98  BOOST_REQUIRE_EQUAL(3, esp->size);
99  BOOST_REQUIRE_EQUAL(45, esp->num[2]);
100  }
101 
102  /// Set up: initializes the PHI "lookup table", aka the SPHIPatternSearchBlk
103  /// structure, the score block and the query information structure.
105  m_Program = eBlastTypePhiBlastp;
106  CBlastScoringOptions score_options;
107  BlastScoringOptionsNew(m_Program, &score_options);
109  // Nothing is needed from BlastQueryInfo except that it's allocated,
110  // and last_context is set to 0.
111  m_QueryInfo.Reset(BlastQueryInfoNew(m_Program, 1));
112  // In PHI BLAST, query block is not needed neither for score block setup,
113  // nor for lookup table set up!
114  BlastSetup_ScoreBlkInit(NULL, m_QueryInfo, score_options, m_Program,
115  &m_ScoreBlk, 1.0, &msg, &BlastFindMatrixPath);
116 
117  }
118 
119  void setUpLookupTable(string pattern)
120  {
121  CLookupTableOptions lookup_options;
122  LookupTableOptionsNew(m_Program, &lookup_options);
123  lookup_options->phi_pattern = strdup(pattern.c_str());
124  // Lookup segments and rps info arguments are irrelevant and passed as
125  // NULL.
126  LookupTableWrapInit(NULL, lookup_options, NULL, NULL, m_ScoreBlk, &m_Lookup, NULL, NULL, NULL);
127  }
128 
130  m_ScoreBlk.Reset();
131  m_Lookup.Reset();
132  }
133 
135  const int kNumPatterns = 4;
136  const SPHIPatternInfo kPatOccurrences[kNumPatterns] =
137  { {100,20}, {200,18}, {300,22}, {400, 21} };
138  SPHIQueryInfo* pat_info = SPHIQueryInfoNew();
139  pat_info->num_patterns = pat_info->allocated_size = kNumPatterns;
140  // Occurrences array has already been allocated to size 8, so memcpy
141  // is safe here.
142  memcpy(pat_info->occurrences, kPatOccurrences,
143  kNumPatterns*sizeof(SPHIPatternInfo));
144 
145  return pat_info;
146  }
147 
148  static BlastHSPList* x_SetupHSPList(int index) {
149  const int kHspMax = 10;
150  const int kNumRepetitions = 4;
151  BlastHSPList* hsp_list = Blast_HSPListNew(kHspMax);
152  hsp_list->oid = index;
153  for (int hsp_index = 0; hsp_index < kHspMax; ++hsp_index) {
154  BlastHSP* hsp = Blast_HSPNew();
155  hsp->score = 200 - 2*index - 5*hsp_index;
156  hsp->evalue = ((double)1)/hsp->score;
157  hsp->pat_info = (SPHIHspInfo*) calloc(1, sizeof(SPHIHspInfo));
158  hsp->pat_info->index = hsp_index % kNumRepetitions;
159  Blast_HSPListSaveHSP(hsp_list, hsp);
160  }
161  return hsp_list;
162  }
163 
164  static BlastHSPResults* x_SetupResults(const int kHitlistSize) {
166  for (int index = 0; index < kHitlistSize; ++index) {
167  BlastHSPList* hsp_list = x_SetupHSPList(index);
168  Blast_HSPResultsInsertHSPList(results, hsp_list, kHitlistSize);
169  }
170  return results;
171  }
172 
173  static bool
175  int index;
176  for (index = 0; index < hitlist->hsplist_count - 1; ++index) {
177  if (hitlist->hsplist_array[index]->best_evalue >
178  hitlist->hsplist_array[index+1]->best_evalue)
179  break;
180  }
181  return (index == hitlist->hsplist_count - 1);
182  }
183 
184  static void
185  x_CheckSplitResults(BlastHSPResults** results_array, int num_results)
186  {
187  const int kNumHspLists = 20;
188  for (int hitlist_index = 0; hitlist_index < num_results;
189  ++hitlist_index) {
190  BOOST_REQUIRE(results_array[hitlist_index] != NULL);
191  BlastHitList* hitlist =
192  results_array[hitlist_index]->hitlist_array[0];
193  BOOST_REQUIRE_EQUAL(kNumHspLists, hitlist->hsplist_count);
194  BOOST_REQUIRE(x_CheckIncreasingBestEvalues(hitlist));
195  const int kHspCnt = (13-hitlist_index)/num_results;
196  for (int hsplist_index = 0; hsplist_index < kNumHspLists;
197  ++hsplist_index) {
198  BlastHSPList* hsplist = hitlist->hsplist_array[hsplist_index];
199  BOOST_REQUIRE_EQUAL(kHspCnt, hsplist->hspcnt);
200  BOOST_REQUIRE_EQUAL(hsplist_index, hsplist->oid);
201  BOOST_REQUIRE(Blast_HSPListIsSortedByScore(hsplist) == TRUE);
202  for (int hsp_index = 0; hsp_index < kHspCnt; ++hsp_index) {
203  BlastHSP* hsp = hsplist->hsp_array[hsp_index];
204  BOOST_REQUIRE_EQUAL(hitlist_index,
205  hsp->pat_info->index);
206  }
207  }
208  results_array[hitlist_index] =
209  Blast_HSPResultsFree(results_array[hitlist_index]);
210  }
211  sfree(results_array);
212  }
213 
214 };
215 
216 BOOST_FIXTURE_TEST_SUITE(phiblast, CPhiblastTestFixture)
217 
218 /// Tests the values in the PHI BLAST lookup table.
219 BOOST_AUTO_TEST_CASE(testPHILookupTableLong) {
220  setUpLookupTable("[ED]-x(32,40)-E-x(2)-H");
221  // Test score block contents
222  BOOST_REQUIRE(m_ScoreBlk->kbp_gap == m_ScoreBlk->kbp_gap_std);
223  BOOST_REQUIRE(m_ScoreBlk->kbp == m_ScoreBlk->kbp_std);
224  BOOST_REQUIRE_EQUAL(0.5, m_ScoreBlk->kbp_gap[0]->paramC);
225  BOOST_REQUIRE(m_ScoreBlk->kbp_gap[0]->H != 0);
226  BOOST_REQUIRE_EQUAL(m_ScoreBlk->kbp[0]->Lambda,
227  m_ScoreBlk->kbp_gap[0]->Lambda);
228  BOOST_REQUIRE_EQUAL(m_ScoreBlk->kbp[0]->K, m_ScoreBlk->kbp_gap[0]->K);
229 
230  // Test pattern items structure contents
231  SPHIPatternSearchBlk* pattern_blk = (SPHIPatternSearchBlk*) m_Lookup->lut;
232 
233  BOOST_REQUIRE(pattern_blk->flagPatternLength == eVeryLong);
234  BOOST_REQUIRE_EQUAL(37, pattern_blk->minPatternMatchLength);
235  BOOST_REQUIRE_CLOSE(0.0013, pattern_blk->patternProbability, 1);
236  BOOST_REQUIRE_EQUAL(3, pattern_blk->multi_word_items->numWords);
237  BOOST_REQUIRE(pattern_blk->multi_word_items->extra_long_items != NULL);
238 }
239 
240 /// Tests the values in the PHI BLAST lookup table.
241 BOOST_AUTO_TEST_CASE(testPHILookupTableShort) {
242  setUpLookupTable("LLY");
243  // Test score block contents
244  BOOST_REQUIRE(m_ScoreBlk->kbp_gap == m_ScoreBlk->kbp_gap_std);
245  BOOST_REQUIRE(m_ScoreBlk->kbp == m_ScoreBlk->kbp_std);
246  BOOST_REQUIRE_EQUAL(0.5, m_ScoreBlk->kbp_gap[0]->paramC);
247  BOOST_REQUIRE(m_ScoreBlk->kbp_gap[0]->H != 0);
248  BOOST_REQUIRE_EQUAL(m_ScoreBlk->kbp[0]->Lambda,
249  m_ScoreBlk->kbp_gap[0]->Lambda);
250  BOOST_REQUIRE_EQUAL(m_ScoreBlk->kbp[0]->K, m_ScoreBlk->kbp_gap[0]->K);
251 
252  // Test pattern items structure contents
253  SPHIPatternSearchBlk* pattern_blk = (SPHIPatternSearchBlk*) m_Lookup->lut;
254 
255  BOOST_REQUIRE(pattern_blk->flagPatternLength == eOneWord);
256  BOOST_REQUIRE_EQUAL(3, pattern_blk->minPatternMatchLength);
257  BOOST_REQUIRE_CLOSE(0.000262, pattern_blk->patternProbability, 1);
258  BOOST_REQUIRE_EQUAL(0, pattern_blk->multi_word_items->numWords);
259  BOOST_REQUIRE(pattern_blk->multi_word_items->extra_long_items == NULL);
260  BOOST_REQUIRE_EQUAL(4, pattern_blk->one_word_items->match_mask);
261  BOOST_REQUIRE(pattern_blk->one_word_items->whichPositionPtr != NULL);
262 }
263 
264 /// Tests the finding of pattern occurrences in query.
265 BOOST_AUTO_TEST_CASE(testFindQueryOccurrencesLong) {
266  setUpLookupTable("[ED]-x(32,40)-E-x(2)-H");
267  x_FindQueryOccurrences();
268  SPHIQueryInfo* pattern_info = m_QueryInfo->pattern_info;
269  BOOST_REQUIRE(pattern_info != NULL);
270  BOOST_REQUIRE_EQUAL(3, pattern_info->num_patterns);
271  BOOST_REQUIRE_CLOSE(0.0013, pattern_info->probability, 1);
272  // Check that minimal pattern length has been saved in the length
273  // adjustment field.
274  BOOST_REQUIRE_EQUAL(37, m_QueryInfo->contexts[0].length_adjustment);
275 }
276 
277 /// Tests the finding of pattern occurrences in query.
278 BOOST_AUTO_TEST_CASE(testFindQueryOccurrencesShort) {
279  setUpLookupTable("LLY");
280  x_FindQueryOccurrences();
281  SPHIQueryInfo* pattern_info = m_QueryInfo->pattern_info;
282  BOOST_REQUIRE(pattern_info != NULL);
283  BOOST_REQUIRE_EQUAL(1, pattern_info->num_patterns);
284  BOOST_REQUIRE_CLOSE(0.000262, pattern_info->probability, 1);
285  // Check that minimal pattern length has been saved in the length
286  // adjustment field.
287  BOOST_REQUIRE_EQUAL(3, m_QueryInfo->contexts[0].length_adjustment);
288 }
289 
290 /// Tests PHI BLAST calculation of e-values
291 BOOST_AUTO_TEST_CASE(testPHICalcEvalues) {
292  const int kNumDbHits = 33;
293  setUpLookupTable("[ED]-x(32,40)-E-x(2)-H");
294  x_FindQueryOccurrences();
295 
296  SPHIPatternSearchBlk pattern_blk;
297  pattern_blk.num_patterns_db = kNumDbHits;
298 
299  BlastHSPList* hsp_list = Blast_HSPListNew(0);
300  hsp_list->hspcnt = 1;
301 
302  BlastHSP* hsp = hsp_list->hsp_array[0] = Blast_HSPNew();
303  hsp->score = 527;
304 
305  Blast_HSPListPHIGetEvalues(hsp_list, m_ScoreBlk, m_QueryInfo, &pattern_blk);
306 
307  BOOST_REQUIRE_CLOSE(7.568e-59, hsp->evalue, 1);
308 
309  hsp_list = Blast_HSPListFree(hsp_list);
310  BOOST_REQUIRE(hsp_list == NULL);
311 }
312 
313 /// Tests finding of pattern occurrences in subject.
314 BOOST_AUTO_TEST_CASE(testPHIScanSubject) {
315  setUpLookupTable("[ED]-x(32,40)-E-x(2)-H");
316  const string
317  kSubjectSeq("GETRKLFVEFLERSCTAEFSGFLLYKELGRRLKGKSPVLAECFNLMSRDEARHAG"
318  "FLNKALSDFNLSLDLGFLTKSRNYTFFKPKFIFYATYLSEKIGYWRYITIYRHLE"
319  "AHPEDRVYPIFRFFENWCQDENRHGDFFDAIMKSQPQILNDWKARLWSRF");
320  const int kNumHits = 3;
321  const int kStarts[kNumHits] = { 8, 11, 94 };
322  const int kEnds[kNumHits] = { 52, 52, 133 };
323 
324  Int4 start_offset = 0;
325  CBLAST_SequenceBlk subject_blk;
326  x_SetupSequenceBlk(kSubjectSeq, &subject_blk);
327  BlastOffsetPair* offset_pairs = (BlastOffsetPair*)
328  calloc(GetOffsetArraySize(m_Lookup), sizeof(BlastOffsetPair));
329  // Query block and array size arguments are not used when scanning
330  // subject for pattern hits, so pass NULL and 0 for respective arguments.
331  Int4 hit_count =
332  PHIBlastScanSubject(m_Lookup, NULL, subject_blk, &start_offset,
333  offset_pairs, 0);
334  BOOST_REQUIRE_EQUAL(kNumHits, hit_count);
335  for (int index = 0; index < kNumHits; ++index) {
336  BOOST_REQUIRE_EQUAL(kStarts[index],
337  (int) offset_pairs[index].phi_offsets.s_start);
338  BOOST_REQUIRE_EQUAL(kEnds[index],
339  (int) offset_pairs[index].phi_offsets.s_end);
340  }
341  sfree(offset_pairs);
342 }
343 
344 BOOST_AUTO_TEST_CASE(testPHIGappedAlignmentWithTraceback) {
345  setUpLookupTable("[ED]-x(32,40)-E-x(2)-H");
346  const string
347  kQuerySeq("GPLRQIFVEFLERSCTAEFSGFLLYKELGRRLKKTNPVVAEIFSLMSRDEARHAGFL"
348  "NKGLSDFNLALDLGFLTKARKYTFFKPKFIFYATYLSEKIGYWRYITIFRHLKANPE"
349  "YQVYPIFKYFENWCQDENRHGDFFSALL");
350  const string
351  kSubjectSeq("GETRKLFVEFLERSCTAEFSGFLLYKELGRRLKGKSPVLAECFNLMSRDEARHAG"
352  "FLNKALSDFNLSLDLGFLTKSRNYTFFKPKFIFYATYLSEKIGYWRYITIYRHLE"
353  "AHPEDRVYPIFRFFENWCQDENRHGDFFDAIMKSQPQILNDWKARLWSRF");
354  const int kQueryPatLength = 40;
355  const int kQueryStart = 94;
356  CBLAST_SequenceBlk query_blk;
357  x_SetupSequenceBlk(kQuerySeq, &query_blk);
358  CBLAST_SequenceBlk subject_blk;
359  x_SetupSequenceBlk(kSubjectSeq, &subject_blk);
360  const int kSubjectPatLength = 45;
361  const int kSubjectStart = 8;
362 
363  CBlastScoringOptions score_opts;
364  BlastScoringOptionsNew(m_Program, &score_opts);
365  CBlastScoringParameters score_params;
366  BlastScoringParametersNew(score_opts, m_ScoreBlk, &score_params);
367  CBlastExtensionOptions ext_opts;
368  BlastExtensionOptionsNew(m_Program, &ext_opts, score_opts->gapped_calculation);
369  CBlastExtensionParameters ext_params;
370  BlastExtensionParametersNew(m_Program, ext_opts, m_ScoreBlk,
371  m_QueryInfo, &ext_params);
372  CBlastGapAlignStruct gap_align;
373 
374  BLAST_GapAlignStructNew(score_params, ext_params,
375  subject_blk->length, m_ScoreBlk,
376  &gap_align);
377 
378  SPHIPatternSearchBlk* pattern_blk =
379  (SPHIPatternSearchBlk*) m_Lookup->lut;
381  subject_blk->sequence, gap_align,
382  score_params, kQueryStart, kSubjectStart,
383  query_blk->length, subject_blk->length,
384  kQueryPatLength, kSubjectPatLength,
385  pattern_blk);
386 
387  x_CheckGappedAlignmentResults(gap_align);
388 }
389 
390 BOOST_AUTO_TEST_CASE(testPHIBlastHSPResultsSplit) {
391  setUpLookupTable("[ED]-x(32,40)-E-x(2)-H");
392  SPHIQueryInfo* pattern_info = x_SetupPatternInfo();
393  BlastHSPResults* results = x_SetupResults(20);
394 
395  BlastHSPResults** results_array =
397 
398  x_CheckSplitResults(results_array, pattern_info->num_patterns);
400  BOOST_REQUIRE(results == NULL);
402  BOOST_REQUIRE(pattern_info == NULL);
403 }
404 
405 BOOST_AUTO_TEST_CASE(testPHIBlastHSPResultsSplitNoHits) {
406  setUpLookupTable("[ED]-x(32,40)-E-x(2)-H");
407  SPHIQueryInfo* pattern_info = x_SetupPatternInfo();
408  BlastHSPResults* results = x_SetupResults(0);
409 
410  BlastHSPResults** results_array =
412 
413  BOOST_REQUIRE(results_array != NULL);
414  BOOST_REQUIRE(results_array[0] == NULL);
415 
416  sfree(results_array);
418  BOOST_REQUIRE(results == NULL);
420  BOOST_REQUIRE(pattern_info == NULL);
421 }
422 
423 // mainly tests cutoff score. Would more logically belong in blastoptions-cppunit.cpp,
424 // but set up functions are here.
425 BOOST_AUTO_TEST_CASE(testPHIBlastHitSavingParameters) {
426  const EBlastProgramType kBlastProgram = eBlastTypePhiBlastp;
427  const bool kIsGapped = true;
428  setUpLookupTable("[ED]-x(32,40)-E-x(2)-H");
429  x_FindQueryOccurrences();
430 
431  BlastExtensionOptions* ext_options = NULL;
432  BlastExtensionOptionsNew(kBlastProgram, &ext_options, kIsGapped);
433 
434  BlastHitSavingOptions* hit_options;
435  BlastHitSavingOptionsNew(kBlastProgram, &hit_options, kIsGapped);
436 
437  m_QueryInfo->contexts[0].eff_searchsp = 10000000;
438  const int k_avg_subject_length=343;
439  BlastHitSavingParameters* hit_params;
440  BlastHitSavingParametersNew(kBlastProgram, hit_options, m_ScoreBlk, m_QueryInfo, k_avg_subject_length, 0, &hit_params);
441 
442  BOOST_REQUIRE_EQUAL(28, hit_params->cutoffs[0].cutoff_score);
443  BOOST_REQUIRE_EQUAL(28, hit_params->cutoff_score_min);
444 
445  ext_options = BlastExtensionOptionsFree(ext_options);
446  BOOST_REQUIRE(ext_options == NULL);
447  hit_params = BlastHitSavingParametersFree(hit_params);
448  BOOST_REQUIRE(hit_params == NULL);
449  hit_options = BlastHitSavingOptionsFree(hit_options);
450  BOOST_REQUIRE(hit_options == NULL);
451 }
452 
453 
455 
456 /*
457 * ===========================================================================
458 *
459 * $Log: phiblast-cppunit.cpp,v $
460 * Revision 1.14 2008/01/31 22:07:00 madden
461 * Change call to LookupTableWrapInit as part of fix for SB-44
462 *
463 * Revision 1.13 2007/10/22 19:16:10 madden
464 * BlastExtensionOptionsNew has Boolean gapped arg
465 *
466 * Revision 1.12 2006/11/16 15:17:28 madden
467 * Add testPHIBlastHSPResultsSplitNoHits
468 *
469 * Revision 1.11 2006/09/15 13:12:05 madden
470 * Change to LookupTableWrapInit prototype
471 *
472 * Revision 1.10 2006/09/01 15:12:10 papadopo
473 * change name of cutoff values to check
474 *
475 * Revision 1.9 2006/07/19 13:30:36 madden
476 * Refactored setup to allow different patterns.
477 * Added tests for short pattern.
478 * Added tearDown method
479 *
480 * Revision 1.8 2006/06/29 16:25:24 camacho
481 * Changed BlastHitSavingOptionsNew signature
482 *
483 * Revision 1.7 2006/06/05 13:34:05 madden
484 * Changes to remove [GS]etMatrixPath and use callback instead
485 *
486 * Revision 1.6 2006/05/22 13:34:00 madden
487 * Add testPHIBlastHitSavingParameters
488 *
489 * Revision 1.5 2006/02/15 15:09:43 madden
490 * Changes for GapEditScript structure change
491 *
492 * Revision 1.4 2006/01/12 20:42:51 camacho
493 * Fix calls to BLAST_MainSetUp to include Blast_Message argument, use BlastQueryInfoNew
494 *
495 * Revision 1.3 2005/05/26 14:43:51 dondosha
496 * Added testPHIBlastHSPResultsSplit to check splitting of PHI BLAST results into an array of results corresponding to different pattern occurrences
497 *
498 * Revision 1.2 2005/05/04 16:15:00 papadopo
499 * modify expected traceback to account for bugfixes in engine
500 *
501 * Revision 1.1 2005/04/27 20:09:56 dondosha
502 * PHI BLAST unit tests
503 *
504 *
505 * ===========================================================================
506 */
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
union BlastOffsetPair BlastOffsetPair
This symbol enables the verbose option in makeblastdb and other BLAST+ search command line applicatio...
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
Definition: blast_filter.c:737
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Definition: blast_filter.c:608
Int2 BLAST_GapAlignStructNew(const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, Uint4 max_subject_length, BlastScoreBlk *sbp, BlastGapAlignStruct **gap_align_ptr)
Initializes the BlastGapAlignStruct structure.
BlastHSP * Blast_HSPNew(void)
Allocate and zeros out memory for an HSP structure.
Definition: blast_hits.c:141
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
Definition: blast_hits.c:3364
void Blast_HSPListPHIGetEvalues(BlastHSPList *hsp_list, BlastScoreBlk *sbp, const BlastQueryInfo *query_info, const SPHIPatternSearchBlk *pattern_blk)
Calculate e-values for a PHI BLAST HSP list.
Definition: blast_hits.c:1955
Int2 Blast_HSPResultsInsertHSPList(BlastHSPResults *results, BlastHSPList *hsp_list, Int4 hitlist_size)
Blast_HSPResultsInsertHSPList Insert an HSP list to the appropriate place in the results structure.
Definition: blast_hits.c:3552
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
Definition: blast_hits.c:1558
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
Definition: blast_hits.c:3344
Boolean Blast_HSPListIsSortedByScore(const BlastHSPList *hsp_list)
Check if HSP list is sorted by score.
Definition: blast_hits.c:1358
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
Definition: blast_hits.c:1754
BlastHSPResults ** PHIBlast_HSPResultsSplit(const BlastHSPResults *results, const SPHIQueryInfo *pattern_info)
Splits the BlastHSPResults structure for a PHI BLAST search into an array of BlastHSPResults structur...
Definition: blast_hits.c:3570
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
Definition: blast_hits.c:1542
BlastHitSavingOptions * BlastHitSavingOptionsFree(BlastHitSavingOptions *options)
Deallocate memory for BlastHitSavingOptions.
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
Int2 LookupTableOptionsNew(EBlastProgramType program, LookupTableOptions **options)
Allocate memory for lookup table options and fill with default values.
BlastExtensionOptions * BlastExtensionOptionsFree(BlastExtensionOptions *options)
Deallocate memory for BlastExtensionOptions.
Int2 BlastHitSavingOptionsNew(EBlastProgramType program, BlastHitSavingOptions **options, Boolean gapped_calculation)
Allocate memory for BlastHitSavingOptions.
Int2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions **options, Boolean gapped)
Allocate memory for BlastExtensionOptions and fill with default values.
BlastHitSavingParameters * BlastHitSavingParametersFree(BlastHitSavingParameters *parameters)
Deallocate memory for BlastHitSavingOptions*.
Int2 BlastExtensionParametersNew(EBlastProgramType blast_program, const BlastExtensionOptions *options, BlastScoreBlk *sbp, BlastQueryInfo *query_info, BlastExtensionParameters **parameters)
Calculate the raw values for the X-dropoff parameters.
Int2 BlastScoringParametersNew(const BlastScoringOptions *options, BlastScoreBlk *sbp, BlastScoringParameters **parameters)
Calculate scaled cutoff scores and gap penalties.
Int2 BlastHitSavingParametersNew(EBlastProgramType program_number, const BlastHitSavingOptions *options, const BlastScoreBlk *sbp, const BlastQueryInfo *query_info, Int4 avg_subject_length, Int4 compositionBasedStats, BlastHitSavingParameters **parameters)
Allocate memory and initialize the BlastHitSavingParameters structure.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypePhiBlastp
Definition: blast_program.h:86
BlastQueryInfo * BlastQueryInfoNew(EBlastProgramType program, int num_queries)
Allocate memory for query information structure.
Utilities initialize/setup BLAST.
Int2 BlastSetup_ScoreBlkInit(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const BlastScoringOptions *scoring_options, EBlastProgramType program_number, BlastScoreBlk **sbpp, double scale_factor, Blast_Message **blast_message, GET_MATRIX_PATH get_path)
Initializes the score block structure.
Definition: blast_setup.c:456
Int2 Blast_SetPHIPatternInfo(EBlastProgramType program, const SPHIPatternSearchBlk *pattern_blk, const BLAST_SequenceBlk *query, const BlastSeqLoc *lookup_segments, BlastQueryInfo *query_info, Blast_Message **blast_message)
In a PHI BLAST search, adds pattern information to the BlastQueryInfo structure.
Definition: blast_setup.c:1065
Internal auxiliary setup classes/functions for C++ BLAST APIs.
Int2 BlastSeqBlkSetSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen)
Stores the sequence in the sequence block structure.
Definition: blast_util.c:147
Int2 BlastSeqBlkNew(BLAST_SequenceBlk **retval)
Allocates a new sequence block structure.
Definition: blast_util.c:133
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
Wrapper class for BLAST_SequenceBlk .
Definition: blast_aux.hpp:309
Wrapper class for BlastExtensionOptions .
Definition: blast_aux.hpp:323
Wrapper class for BlastExtensionParameters .
Definition: blast_aux.hpp:324
Wrapper class for BlastGapAlignStruct .
Definition: blast_aux.hpp:342
Wrapper class for BlastQueryInfo .
Definition: blast_aux.hpp:311
Wrapper class for BlastScoreBlk .
Definition: blast_aux.hpp:333
Wrapper class for BlastScoringOptions .
Definition: blast_aux.hpp:334
Wrapper class for BlastScoringParameters .
Definition: blast_aux.hpp:335
Wrapper class for Blast_Message .
Definition: blast_aux.hpp:352
Wrapper class for LookupTableOptions .
Definition: blast_aux.hpp:314
Wrapper class for LookupTableWrap .
Definition: blast_aux.hpp:315
static bool x_CheckIncreasingBestEvalues(BlastHitList *hitlist)
void x_SetupSequenceBlk(const string &seq, BLAST_SequenceBlk **seq_blk)
EBlastProgramType m_Program
CBlastQueryInfo m_QueryInfo
static BlastHSPList * x_SetupHSPList(int index)
CPhiblastTestFixture()
Set up: initializes the PHI "lookup table", aka the SPHIPatternSearchBlk structure,...
void setUpLookupTable(string pattern)
void x_FindQueryOccurrences(void)
After the initial set-up is done, finds pattern occurrences in query and fills the pattern informatio...
static SPHIQueryInfo * x_SetupPatternInfo(void)
static void x_CheckSplitResults(BlastHSPResults **results_array, int num_results)
void x_CheckGappedAlignmentResults(BlastGapAlignStruct *gap_align)
static BlastHSPResults * x_SetupResults(const int kHitlistSize)
CLookupTableWrap m_Lookup
Ensure direct dependencies on enough of the core xncbi library to satisfy shared libraries that depen...
static const char location[]
Definition: config.c:97
void Reset(LookupTableWrap *p=NULL)
Definition: blast_aux.hpp:315
const Uint1 AMINOACID_TO_NCBISTDAA[]
Translates between ncbieaa and ncbistdaa.
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
void Reset(BlastScoreBlk *p=NULL)
Definition: blast_aux.hpp:333
void Reset(BlastQueryInfo *p=NULL)
Definition: blast_aux.hpp:311
#define NULL
Definition: ncbistd.hpp:225
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
Wrapper for all lookup tables used in BLAST.
Int4 GetOffsetArraySize(LookupTableWrap *lookup)
Determine the size of the offsets arrays to be filled by the ScanSubject function.
Definition: lookup_wrap.c:255
Int2 LookupTableWrapInit(BLAST_SequenceBlk *query, const LookupTableOptions *lookup_options, const QuerySetUpOptions *query_options, BlastSeqLoc *lookup_segments, BlastScoreBlk *sbp, LookupTableWrap **lookup_wrap_ptr, const BlastRPSInfo *rps_info, Blast_Message **error_msg, BlastSeqSrc *seqsrc)
Create the lookup table for all query words.
Definition: lookup_wrap.c:47
#define strdup
Definition: ncbi_ansi_ext.h:70
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
@ eVeryLong
Is pattern too long for a simple multi-word processing?
Definition: pattern.h:76
@ eOneWord
Does pattern consist of a single word?
Definition: pattern.h:74
SPHIQueryInfo * SPHIQueryInfoFree(SPHIQueryInfo *pat_info)
Frees the pattern information structure.
Definition: pattern.c:496
SPHIQueryInfo * SPHIQueryInfoNew(void)
Allocates the pattern occurrences structure.
Definition: pattern.c:478
static int * results[]
static int pattern_info(int what, void *where, BOOL unsetok)
Definition: pcre2test.c:4156
static uint8_t * buffer
Definition: pcre2test.c:1016
Function prototypes used for PHI BLAST gapped extension and gapped extension with traceback.
Int2 PHIGappedAlignmentWithTraceback(Uint1 *query, Uint1 *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, Int4 q_pat_length, Int4 s_pat_length, SPHIPatternSearchBlk *pattern_blk)
Perform a gapped alignment with traceback for PHI BLAST.
Definition: phi_gapalign.c:837
Pseudo lookup table structure and database scanning functions used in PHI-BLAST.
Int4 PHIBlastScanSubject(const LookupTableWrap *lookup_wrap, const BLAST_SequenceBlk *query_blk, const BLAST_SequenceBlk *subject, Int4 *offset, BlastOffsetPair *offset_pairs, Int4 array_size)
Scans the subject sequence from "offset" to the end of the sequence.
Definition: phi_lookup.c:725
USING_SCOPE(blast)
BOOST_AUTO_TEST_CASE(testPHILookupTableLong)
Tests the values in the PHI BLAST lookup table.
USING_NCBI_SCOPE
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Structure to hold a sequence.
Definition: blast_def.h:242
Int4 length
Length of sequence.
Definition: blast_def.h:246
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
Options used for gapped extension These include: a.
Structure supporting the gapped alignment.
Int4 query_stop
query end offseet of current alignment
Int4 subject_start
subject start offset current alignment
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
Int4 score
Return value: alignment score.
GapEditScript * edit_script
The traceback (gap) information.
Int4 cutoff_score
Raw cutoff score corresponding to the e-value provided by the user if no sum stats,...
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Definition: blast_hits.h:154
Int4 hspcnt
Number of HSPs saved.
Definition: blast_hits.h:158
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Definition: blast_hits.h:157
double best_evalue
Smallest e-value for HSPs in this list.
Definition: blast_hits.h:162
The structure to contain all BLAST results, for multiple queries.
Definition: blast_hits.h:183
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Definition: blast_hits.h:185
Structure holding all information about an HSP.
Definition: blast_hits.h:126
SPHIHspInfo * pat_info
In PHI BLAST, information about this pattern match.
Definition: blast_hits.h:142
double evalue
This HSP's e-value.
Definition: blast_hits.h:130
Int4 score
This HSP's raw score.
Definition: blast_hits.h:127
The structure to contain all BLAST results for one query sequence.
Definition: blast_hits.h:169
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Definition: blast_hits.h:176
Int4 hsplist_count
Filled size of the HSP lists array.
Definition: blast_hits.h:170
Options used when evaluating and saving hits These include: a.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
BlastGappedCutoffs * cutoffs
per-context gapped cutoff information
Int4 cutoff_score_min
smallest cutoff score across all contexts
Boolean gapped_calculation
gap-free search if FALSE
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
Edit script: linked list of correspondencies between two sequences.
Definition: gapinfo.h:57
Int4 * num
Array of number of operations.
Definition: gapinfo.h:59
Int4 size
Size of above arrays.
Definition: gapinfo.h:60
char * phi_pattern
PHI-BLAST pattern.
void * lut
Pointer to the actual lookup table structure.
Definition: lookup_wrap.h:52
SExtraLongPatternItems * extra_long_items
Additional items necessary if pattern contains pieces longer than a word.
Definition: pattern.h:147
Int4 numWords
Number of words need to hold bit representation of pattern.
Definition: pattern.h:132
In PHI BLAST: information about pattern match in a given HSP.
Definition: blast_hits.h:104
Int4 index
Index of query pattern occurrence for this HSP.
Definition: blast_hits.h:105
Information about a single pattern occurence in the query.
Definition: blast_def.h:292
Structure containing all auxiliary information needed in a pattern search.
Definition: pattern.h:155
SShortPatternItems * one_word_items
Items necessary when pattern fits in one word.
Definition: pattern.h:163
EPatternType flagPatternLength
Indicates if the whole pattern fits in 1 word, each of several parts of the pattern fit in a word,...
Definition: pattern.h:159
double patternProbability
Probability of this letter combination.
Definition: pattern.h:160
Int4 minPatternMatchLength
Minimum length of string to match this pattern.
Definition: pattern.h:161
Int4 num_patterns_db
Number of patterns actually found during the database search.
Definition: pattern.h:168
SLongPatternItems * multi_word_items
Additional items, when pattern requires multiple words.
Definition: pattern.h:166
In PHI BLAST, structure containing information about all pattern occurrences in query.
Definition: blast_def.h:300
Int4 allocated_size
Allocated size of the occurrences array.
Definition: blast_def.h:304
Int4 num_patterns
Number of pattern occurrences in query.
Definition: blast_def.h:301
SPHIPatternInfo * occurrences
Array of pattern occurrence information structures.
Definition: blast_def.h:302
Int4 * whichPositionPtr
Array of positions where pattern lettern should match, for a single word of the pattern.
Definition: pattern.h:97
Int4 match_mask
Bit mask representation of input pattern for patterns that fit in a word.
Definition: pattern.h:95
Utility stuff for more convenient using of Boost.Test library.
This symbol enables the verbose option in makeblastdb and other BLAST+ search command line applicatio...
Definition: blast_def.h:141
Uint4 s_start
Start offset of pattern in subject.
Definition: blast_def.h:147
Uint4 s_end
End offset of pattern in subject.
Definition: blast_def.h:148
voidp malloc(uInt size)
voidp calloc(uInt items, uInt size)
Modified on Fri Sep 20 14:57:48 2024 by modify_doxy.py rev. 669887