NCBI C++ ToolKit
rmblast_blasthits_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: rmblast_blasthits_unit_test.cpp 81515 2018-03-09 13:57:58Z camacho $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Robert Hubley ( boiler plate - Ilya Dondoshansky )
27 *
28 * File Description:
29 * Unit test module to test hit saving procedures in RMBlast
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 
36 #include "blast_setup.hpp"
37 #include "blast_objmgr_priv.hpp"
38 #include "test_objmgr.hpp"
39 
46 #include "blast_hits_priv.h"
47 
48 extern "C" int h_score_compare_hsps(const void* v1, const void* v2)
49 {
50  BlastHSP* h1,* h2;
51 
52  h1 = *((BlastHSP**) v1);
53  h2 = *((BlastHSP**) v2);
54 
55  if (h1->score > h2->score)
56  return -1;
57  else if (h1->score < h2->score)
58  return 1;
59  return 0;
60 }
61 
62 using namespace ncbi;
63 using namespace ncbi::objects;
64 using namespace ncbi::blast;
65 
66 BOOST_AUTO_TEST_SUITE(blasthits)
67 
68  static void
70  bool gapped, bool is_prot)
71  {
72  BlastScoringOptions* options =
74  if (gapped) {
75  options->gapped_calculation = TRUE;
76  options->gap_open = 1;
77  options->gap_extend = 1;
78  }
79  if (is_prot) {
80  options->matrix = strdup("BLOSUM62");
81  } else {
82  options->reward = 1;
83  options->penalty = -2;
84  }
85  *options_ptr = options;
86  }
87 
88  static void
90  {
91  *hsplist_ptr = Blast_HSPListNew(4);
92  for(unsigned int i = 0; i < 4; i++ ){
93  unsigned int factor = i+1;
94  BlastHSP* hsp = Blast_HSPNew();
95 
96  hsp->query.offset = 0;
97  hsp->query.end = 12/factor;
98  hsp->subject.offset = 0;
99  hsp->subject.frame = 1;
100  hsp->subject.end = 12/factor;
101  hsp->score = 45/factor;
102  (*hsplist_ptr)->hsp_array[i] = hsp;
103  (*hsplist_ptr)->hspcnt ++;
104  }
105  }
106 
107  /** RMBlast function to filter HSPs with a raw score < HitSavingOptions->cutoff_score
108  * -RMH-
109  */
110  BOOST_AUTO_TEST_CASE(testHSPListReapByRawScore)
111  {
112  BlastHSPList* hsp_list = NULL;
113  EBlastProgramType program_number = eBlastTypeBlastn;
114  BlastScoringOptions* scoring_options = NULL;
115  BlastHitSavingOptions* hit_options = NULL;
116 
117  s_SetupScoringOptionsForReevaluateHSP(&scoring_options, false, true);
118  BlastHitSavingOptionsNew(program_number, &hit_options,
119  scoring_options->gapped_calculation);
120 
121  s_SetupHSPListTransl(&hsp_list);
122 
123  BOOST_REQUIRE_EQUAL(4, (int) hsp_list->hspcnt);
124  hit_options->cutoff_score = 15;
125  Blast_HSPListReapByRawScore(hsp_list, hit_options);
126  BOOST_REQUIRE_EQUAL(3, (int) hsp_list->hspcnt);
127  hit_options->cutoff_score = 20;
128  Blast_HSPListReapByRawScore(hsp_list, hit_options);
129  BOOST_REQUIRE_EQUAL(2, (int) hsp_list->hspcnt);
130 
131  Blast_HSPListFree(hsp_list);
132  BlastHitSavingOptionsFree(hit_options);
133  BlastScoringOptionsFree(scoring_options);
134  }
135 
136  /** RMBlast function to filter HSPs based on the percentage over overlap
137  * along the query ranges.
138  * -RMH-
139  */
140  BOOST_AUTO_TEST_CASE(testHSPResultsApplyMasklevel)
141  {
142  EBlastProgramType program_number = eBlastTypeBlastn;
143  BlastHSPList* hsp_list = Blast_HSPListNew(0);
144 
145  const int kMaxHspCount = 26;
146  const int kSubjectOffsets[kMaxHspCount] =
147  { 10,382,14,83,4,382,1000,203,54,32,64,382,89,183,813,132,14,1344,321,224,34,8341,1344,254,861,834 };
148  const int kQueryOffsets[kMaxHspCount] =
149  { 1,4,14,21,24,34,41,44,54,61,64,74,81,84,94,101,104,114,121,124,134,141,144,154,161,164 };
150  const int kLengths[kMaxHspCount] =
151  { 17,9,9,17,9,9,17,9,9,17,9,9,17,9,9,17,9,9,17,9,9,17,9,9,17,9 };
152  const int kScores[kMaxHspCount] =
153  { 93,85,85,93,85,85,93,85,85,93,85,85,93,85,85,93,85,85,93,85,85,93,85,85,93,85 };
154 
155  int index;
156  hsp_list->hspcnt = kMaxHspCount;
157  for (index = 0; index < kMaxHspCount; ++index) {
158  hsp_list->hsp_array[index] =
159  (BlastHSP*) calloc(1, sizeof(BlastHSP));
160  hsp_list->hsp_array[index]->query.offset = kQueryOffsets[index];
161  hsp_list->hsp_array[index]->subject.offset = kSubjectOffsets[index];
162  hsp_list->hsp_array[index]->query.end =
163  kQueryOffsets[index] + kLengths[index];
164  hsp_list->hsp_array[index]->subject.end =
165  kSubjectOffsets[index] + kLengths[index];
166  hsp_list->hsp_array[index]->score = kScores[index];
167  hsp_list->hsp_array[index]->context = 0;
168  }
169 
170  BlastHSPResults* results = Blast_HSPResultsNew(1);
171  Blast_HSPResultsInsertHSPList( results, hsp_list, 1 );
172 
173  BlastQueryInfo* query_info = BlastQueryInfoNew(program_number, 1);
174  query_info->contexts[0].query_length = 750;
175  query_info->contexts[0].frame = 1;
176  query_info->contexts[0].is_valid = true;
177  for (int i=1; i<=query_info->last_context; i++)
178  query_info->contexts[i].is_valid = false;
179 
180  // masklevel 80 [ 17 remaining ]
181  // 1,14,21,34,41,54,61,74,81,94,101,114,121,134,141,154,161
182  Blast_HSPResultsApplyMasklevel( results, query_info, 80, 750 );
183  BOOST_REQUIRE_EQUAL(17, (int) results->hitlist_array[0]->hsplist_array[0]->hspcnt);
184  // masklevel 25 [ 9 remaining ]
185  // 1,21,41,61,81,101,121,141,161
186  Blast_HSPResultsApplyMasklevel( results, query_info, 25, 750 );
187  BOOST_REQUIRE_EQUAL(9, (int) results->hitlist_array[0]->hsplist_array[0]->hspcnt);
188 
189  BlastQueryInfoFree(query_info);
190  Blast_HSPResultsFree(results);
191  }
192 
Definitions used throughout BLAST.
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
Structures and API used for saving BLAST hits.
Int2 Blast_HSPListReapByRawScore(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Discard the HSPs above the raw threshold from the HSP list.
Definition: blast_hits.c:2070
BlastHSP * Blast_HSPNew(void)
Allocate and zeros out memory for an HSP structure.
Definition: blast_hits.c:141
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
Definition: blast_hits.c:3358
Int2 Blast_HSPResultsInsertHSPList(BlastHSPResults *results, BlastHSPList *hsp_list, Int4 hitlist_size)
Blast_HSPResultsInsertHSPList Insert an HSP list to the appropriate place in the results structure.
Definition: blast_hits.c:3546
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
Definition: blast_hits.c:1558
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
Definition: blast_hits.c:3338
Int2 Blast_HSPResultsApplyMasklevel(BlastHSPResults *results, const BlastQueryInfo *query_info, Int4 masklevel, Int4 query_length)
Apply Cross_match like masklevel to HSP list.
Definition: blast_hits.c:3459
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
Definition: blast_hits.c:1542
Utilities for dealing with BLAST HSPs in the core of BLAST.
Definitions which are dependant on the NCBI C++ Object Manager.
BlastHitSavingOptions * BlastHitSavingOptionsFree(BlastHitSavingOptions *options)
Deallocate memory for BlastHitSavingOptions.
Int2 BlastHitSavingOptionsNew(EBlastProgramType program, BlastHitSavingOptions **options, Boolean gapped_calculation)
Allocate memory for BlastHitSavingOptions.
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypeBlastn
Definition: blast_program.h:74
BlastQueryInfo * BlastQueryInfoFree(BlastQueryInfo *query_info)
Deallocate memory for query information structure.
BlastQueryInfo * BlastQueryInfoNew(EBlastProgramType program, int num_queries)
Allocate memory for query information structure.
Utilities initialize/setup BLAST.
Internal auxiliary setup classes/functions for C++ BLAST APIs.
Various auxiliary BLAST utility functions.
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
Defines the interface to interact with the genetic code singleton object.
#define NULL
Definition: ncbistd.hpp:225
const CVect2< U > & v2
Definition: globals.hpp:440
int i
Magic spell ;-) needed for some weird compilers... very empiric.
#define strdup
Definition: ncbi_ansi_ext.h:70
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
int h_score_compare_hsps(const void *v1, const void *v2)
static void s_SetupHSPListTransl(BlastHSPList **hsplist_ptr)
BOOST_AUTO_TEST_CASE(testHSPListReapByRawScore)
RMBlast function to filter HSPs with a raw score < HitSavingOptions->cutoff_score -RMH-.
static void s_SetupScoringOptionsForReevaluateHSP(BlastScoringOptions **options_ptr, bool gapped, bool is_prot)
Int4 query_length
Length of this query, strand or frame.
Boolean is_valid
Determine if this context is valid or not.
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Int4 hspcnt
Number of HSPs saved.
Definition: blast_hits.h:158
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Definition: blast_hits.h:157
The structure to contain all BLAST results, for multiple queries.
Definition: blast_hits.h:183
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Definition: blast_hits.h:185
Structure holding all information about an HSP.
Definition: blast_hits.h:126
BlastSeg query
Query sequence info.
Definition: blast_hits.h:131
Int4 context
Context number of query.
Definition: blast_hits.h:133
BlastSeg subject
Subject sequence info.
Definition: blast_hits.h:132
Int4 score
This HSP's raw score.
Definition: blast_hits.h:127
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Definition: blast_hits.h:176
Options used when evaluating and saving hits These include: a.
Int4 cutoff_score
The (raw) score cut-off threshold.
The query related information.
BlastContextInfo * contexts
Information per context.
Int4 last_context
Index of the last element of the context array.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int2 penalty
Penalty for a mismatch.
Int4 gap_open
Extra penalty for starting a gap.
Int4 gap_extend
Penalty for each gap residue.
Int2 reward
Reward for a match.
Boolean gapped_calculation
gap-free search if FALSE
char * matrix
Name of the matrix containing all scores: needed for finding neighboring words.
Int4 end
End of hsp.
Definition: blast_hits.h:99
Int2 frame
Translation frame.
Definition: blast_hits.h:97
Int4 offset
Start of hsp.
Definition: blast_hits.h:98
Utility stuff for more convenient using of Boost.Test library.
voidp calloc(uInt items, uInt size)
Modified on Fri Apr 12 17:16:25 2024 by modify_doxy.py rev. 669887