NCBI C++ ToolKit
showalign_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: showalign_unit_test.cpp 99914 2023-05-19 19:23:15Z kans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jian Ye
27  *
28  * File Description:
29  * Unit tests for showalign
30  *
31  * ===========================================================================
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbifile.hpp>
36 
37 #include <corelib/ncbistl.hpp>
38 #include <serial/serial.hpp>
39 #include <serial/objistr.hpp>
40 #include <serial/objostr.hpp>
42 
44 
45 
46 #include "blast_test_util.hpp"
47 #define NCBI_BOOST_NO_AUTO_TEST_MAIN
48 #include <corelib/test_boost.hpp>
49 
52 USING_SCOPE(align_format);
53 using namespace TestUtil;
54 
55 BOOST_AUTO_TEST_SUITE(showalign)
56 
57 BOOST_AUTO_TEST_CASE(TestPerformance)
58 {
59  const string seqAlignFileName_in = "data/in_showalign_aln";
61 
62  ifstream in(seqAlignFileName_in.c_str());
63  in >> MSerial_AsnText >> *san;
64 
65  CRef<CSeq_align_set> fileSeqAlignSet(new CSeq_align_set);
66  fileSeqAlignSet->Set() = san->GetData().GetAlign();
67 
68  const string kDbName("nucl_dbs");
70  TestUtil::CBlastOM tmp_data_loader(kDbName, kDbType, CBlastOM::eLocal);
71  CRef<CScope> scope = tmp_data_loader.NewScope();
72  CDisplaySeqalign ds(*fileSeqAlignSet, *scope);
73  CNcbiOfstream dumpster("/dev/null"); // we don't care about the output
74  ds.DisplaySeqalign(dumpster);
76 }
77 #ifdef _DEBUG
78 const int kPerformanceTimeout = 120;
79 #else
80 const int kPerformanceTimeout = 30;
81 #endif
83 
84 BOOST_AUTO_TEST_CASE(TestAccConversion)
85 {
86  const string seqAlignFileName_in = "data/in_showalign_use_this_gi.asn";
88 
89  ifstream in(seqAlignFileName_in.c_str());
90  in >> MSerial_AsnText >> *san;
91  in.close();
92 
93  CRef<CSeq_align_set> fileSeqAlignSet(new CSeq_align_set);
94  fileSeqAlignSet->Set() = san->GetData().GetAlign();
95 
96  const string kDbName("nr");
98  TestUtil::CBlastOM tmp_data_loader(kDbName, kDbType, CBlastOM::eLocal);
99  CRef<CScope> scope = tmp_data_loader.NewScope();
100 
101  CDisplaySeqalign ds(*fileSeqAlignSet, *scope);
102  ds.SetDbName(kDbName);
103  ds.SetDbType((kDbType == CBlastDbDataLoader::eProtein));
106  ds.SetAlignOption(flags);
108  CNcbiOstrstream output_stream;
109  ds.DisplaySeqalign(output_stream);
110  string output = CNcbiOstrstreamToString(output_stream);
111  BOOST_REQUIRE(output.find("XP_011512980.1") != NPOS);
112  BOOST_REQUIRE(output.find("NP_004146.1") != NPOS);
113  BOOST_REQUIRE(output.find("AAH96218.1") != NPOS);
115 }
116 
117 BOOST_AUTO_TEST_CASE(TestAccConversionExt)
118 {
119  const string seqAlignFileName_in = "data/in_showalign_use_this_gi_ext.asn";
120  CRef<CSeq_annot> san(new CSeq_annot);
121 
122  ifstream in(seqAlignFileName_in.c_str());
123  in >> MSerial_AsnText >> *san;
124  in.close();
125 
126  CRef<CSeq_align_set> fileSeqAlignSet(new CSeq_align_set);
127  fileSeqAlignSet->Set() = san->GetData().GetAlign();
128 
129  const string kDbName("nr");
131  TestUtil::CBlastOM tmp_data_loader(kDbName, kDbType, CBlastOM::eLocal);
132  CRef<CScope> scope = tmp_data_loader.NewScope();
133 
134  CDisplaySeqalign ds(*fileSeqAlignSet, *scope);
135  ds.SetDbName(kDbName);
136  ds.SetDbType((kDbType == CBlastDbDataLoader::eProtein));
139  ds.SetAlignOption(flags);
141  CNcbiOstrstream output_stream;
142  ds.DisplaySeqalign(output_stream);
143  string output = CNcbiOstrstreamToString(output_stream);
144  BOOST_REQUIRE(output.find("NP_001263315.1") != NPOS);
145  BOOST_REQUIRE(output.find("XP_015137661.1") != NPOS);
146  BOOST_REQUIRE(output.find("AUD54591.1") != NPOS);
147  BOOST_REQUIRE(output.find("BAM13279.1") != NPOS);
148  BOOST_REQUIRE(output.find("P01013.1") != NPOS);
150 }
151 
153 {
154  const string seqAlignFileName_in = "data/blastn.vs.ecoli.asn";
155  CRef<CSeq_annot> san(new CSeq_annot);
156 
157  ifstream in(seqAlignFileName_in.c_str());
158  in >> MSerial_AsnText >> *san;
159  in.close();
160 
161  CRef<CSeq_align_set> fileSeqAlignSet(new CSeq_align_set);
162  fileSeqAlignSet->Set() = san->GetData().GetAlign();
163 
164  const string kDbName("nt");
166  TestUtil::CBlastOM tmp_data_loader(kDbName, kDbType, location);
167  {{ // to limit the scope of the objects declared in this block
168  CRef<CScope> scope = tmp_data_loader.NewScope();
169 
170  CDisplaySeqalign ds(*fileSeqAlignSet, *scope);
171  ds.SetDbName(kDbName);
175  if (long_seqids) {
177  ds.UseLongSequenceIds();
178  }
179  ds.SetAlignOption(flags);
181  CNcbiOstrstream output_stream;
182  ds.DisplaySeqalign(output_stream);
183  string output = CNcbiOstrstreamToString(output_stream);
184  if (!long_seqids) {
185  BOOST_REQUIRE(output.find(">AE000304.1 ") != NPOS);
186  }
187  else {
188  BOOST_REQUIRE(output.find(">gi|1788470|gb|AE000304.1|AE000304 ") != NPOS);
189  }
190  BOOST_REQUIRE(output.find("Escherichia coli K-12 MG1655 section 9 of 400 of ") != NPOS ||
191  output.find("Escherichia coli K12 MG1655 section 9 of 400 of ") != NPOS ||
192  output.find("Escherichia coli K12 substr. MG1655") != NPOS);
193  BOOST_REQUIRE(output.find("Sbjct 259 GCCTGATGCGACGCTGGCGCGTCTTATCAGGCCTAC 294") != NPOS);
194  BOOST_REQUIRE(output.find("Length=11852") != NPOS);
195  BOOST_REQUIRE(output.find("Query 5636 GTAGG-CAGGATAAGGCGTTCACGCCGCATCCGGCA 5670") != NPOS);
196  BOOST_REQUIRE(output.find(" Score = 54.7 bits (29), Expect = 2e-0")
197  != NPOS);
198  }}
199  tmp_data_loader.RevokeBlastDbDataLoader();
200  return true;
201 }
202 
203 
204 // Note: this essentially disables the performance tests for the automated
205 // toolkit builds, which implies that the BLAST team should be running these
206 // themselves (CVSROOT/individual/camacho/scripts/autobuild.pl should do this)
208 {
209  if (CNcbiApplication::Instance()->GetEnvironment()
210  .Get("NCBI_AUTOMATED_BUILD") == "1") {
211  // Suppress timeout
212  typedef SNcbiTestTCTimeout<BOOST_AUTO_TC_UNIQUE_ID(TestPerformance)> TTimeout;
213  static TTimeout new_timeout(kMax_Int);
214  }
215 }
216 
217 BOOST_AUTO_TEST_CASE(TestSimpleAlignment_LocalBlastDBLoader)
218 {
219  BOOST_REQUIRE(TestSimpleAlignment(CBlastOM::eLocal, false));
220  BOOST_REQUIRE(TestSimpleAlignment(CBlastOM::eLocal, true));
221 }
222 
223 BOOST_AUTO_TEST_CASE(TestSimpleAlignment_RmtBlastDBLoader)
224 {
225  BOOST_REQUIRE(TestSimpleAlignment(CBlastOM::eRemote, false));
226  BOOST_REQUIRE(TestSimpleAlignment(CBlastOM::eRemote, true));
227 }
228 
229 #ifndef NCBI_INT4_GI
231 {
232  const string seqAlignFileName_in = "data/large_gi.asn";
233  CRef<CSeq_annot> san(new CSeq_annot);
234 
235  ifstream in(seqAlignFileName_in.c_str());
236  in >> MSerial_AsnText >> *san;
237  in.close();
238 
239  CRef<CSeq_align_set> fileSeqAlignSet(new CSeq_align_set);
240  fileSeqAlignSet->Set() = san->GetData().GetAlign();
241 
242  const string kDbName("data/nucl_32b_gi");
244  TestUtil::CBlastOM tmp_data_loader(kDbName, kDbType, CBlastOM::eLocal);
245  CRef<CScope> scope = tmp_data_loader.NewScope();
246 
247  CDisplaySeqalign ds(*fileSeqAlignSet, *scope);
248  ds.SetDbName(kDbName);
253  ds.SetAlignOption(flags);
255  CNcbiOstrstream output_stream;
256  ds.DisplaySeqalign(output_stream);
257  string output = CNcbiOstrstreamToString(output_stream);
258  BOOST_REQUIRE(output.find("2271278971") != NPOS);
259  BOOST_REQUIRE(output.find("698036805") != NPOS);
260  BOOST_REQUIRE(output.find("4294967295") != NPOS);
261  BOOST_REQUIRE(output.find("2966748774") != NPOS);
263 }
264 #endif
265 
266 
Sequence alignment display tool.
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
EDbType
Describes the type of blast database to use.
Definition: bdbloader.hpp:57
@ eNucleotide
nucleotide database
Definition: bdbloader.hpp:58
@ eProtein
protein database
Definition: bdbloader.hpp:59
void SetSeqLocChar(SeqLocCharOption option=eX)
character style for seqloc display such as masked region
Definition: showalign.hpp:292
void SetAlignOption(int option)
Set functions.
Definition: showalign.hpp:284
void DisplaySeqalign(CNcbiOstream &out)
call this to display seqalign
Definition: showalign.cpp:1906
void SetDbType(bool is_na)
database type.
Definition: showalign.hpp:361
void SetDbName(string name)
set blast database name
Definition: showalign.hpp:354
void UseLongSequenceIds(void)
Sets usage of long sequence ids (database|accession)
Definition: showalign.hpp:421
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:244
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
Class which registers the BLAST database and Genbank data loaders as a non-default data loaders with ...
void RevokeBlastDbDataLoader()
Removes the BLAST database data loader from the object manager.
ncbi::CRef< ncbi::objects::CScope > NewScope()
Create a new scope with the default set to the BLAST database data loader for the BLAST database spec...
static const char location[]
Definition: config.c:97
static uch flags
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
void RevokeAllDataLoaders(void)
Revoke all registered data loaders, even if they were still used.
CObjectManager & GetObjectManager(void)
Get object manager controlling this scope.
Definition: scope.cpp:89
#define kMax_Int
Definition: ncbi_limits.h:184
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
#define NPOS
Definition: ncbistr.hpp:133
Tdata & Set(void)
Assign a value to data member.
const TAlign & GetAlign(void) const
Get the variant data.
Definition: Seq_annot_.hpp:641
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
const TYPE & Get(const CNamedParameterList *param)
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
The NCBI C++/STL use hints.
std::istream & in(std::istream &in_, double &x_)
Utilities to develop and debug unit tests for BLAST.
static SQLCHAR output[256]
Definition: print.c:5
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
USING_SCOPE(objects)
const int kPerformanceTimeout
bool TestSimpleAlignment(CBlastOM::ELocation location, bool long_seqids)
BOOST_AUTO_TEST_CASE_TIMEOUT(TestPerformance, kPerformanceTimeout)
NCBITEST_AUTO_INIT()
USING_NCBI_SCOPE
BOOST_AUTO_TEST_CASE(TestPerformance)
Copy of auto_tc_exp_fail from Boost.Test to store the value of timeout for each test.
Utility stuff for more convenient using of Boost.Test library.
Modified on Sat Dec 02 09:20:35 2023 by modify_doxy.py rev. 669887