NCBI C++ ToolKit
seqmasks_out_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seqmasks_out_unit_test.cpp 98976 2023-01-30 14:32:39Z madden $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 */
26 
27 #include <ncbi_pch.hpp>
28 
29 #include <corelib/test_boost.hpp>
30 #if BOOST_VERSION >= 105900
31 # include <boost/test/tools/output_test_stream.hpp>
32 #else
33 # include <boost/test/output_test_stream.hpp>
34 #endif
35 
36 #include <serial/iterator.hpp>
39 
41 #include <objmgr/scope.hpp>
42 #include <objmgr/bioseq_handle.hpp>
43 
45 
53 
56 
58 
62  }
63 
67 
68  size_t GetExpectedNumberOfMasks() const { return 1U; }
69  TSeqPos GetMaskStart() const { return 78U; }
70  TSeqPos GetMaskStop() const { return 89U; }
71 
72 private:
74  m_Masks.clear();
76  ++itr) {
78  itr->GetStop(eExtreme_Positional));
79  m_Masks.push_back(m);
80  }
81  }
82 
85  }
86 
89  }
90 
91  CBioseq_Handle x_LoadMaskedBioseq(bool parse_seqids) {
92  const char* kDataFile = "data/nt.555.mfsa";
93  CNcbiIfstream in(kDataFile);
94  _ASSERT(in);
95 
97  scope->AddDefaults();
98 
100  if (parse_seqids) {
102  } else {
104  }
105  CFastaReader fsa_reader(in, flags);
106  CRef<CSeq_loc> mask = fsa_reader.SaveMask();
107  CRef<CSeq_entry> se = fsa_reader.ReadOneSeq();
108  _ASSERT(se->IsSeq());
109  scope->AddTopLevelSeqEntry(*se);
110  const CSeq_id& best_id = fsa_reader.GetBestID();
112  return scope->GetBioseqHandle(best_id);
113  }
114 };
115 
116 BOOST_FIXTURE_TEST_SUITE(seqmasks_io_tests, seqmasks_io_fixture)
117 
118 BOOST_AUTO_TEST_CASE(WriteFastaParseSeqids)
119 {
120  boost::test_tools::output_test_stream out("data/sample_parse_seqids_fasta.out");
121  const bool kParseSeqids(true);
122  unique_ptr<CMaskWriter> writer(new CMaskWriterFasta(out));
123  writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
124  BOOST_CHECK(out.match_pattern());
125 
126  //writer.reset(new CMaskWriterFasta(cout));
127  //writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
128 }
129 
130 BOOST_AUTO_TEST_CASE(WriteFastaNoParseSeqids)
131 {
132  boost::test_tools::output_test_stream out("data/sample_fasta.out");
133  const bool kParseSeqids(false);
134  unique_ptr<CMaskWriter> writer(new CMaskWriterFasta(out));
135  writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
136  BOOST_CHECK(out.match_pattern());
137  //writer.reset(new CMaskWriterFasta(cout));
138  //writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
139 }
140 
141 BOOST_AUTO_TEST_CASE(WriteAcclistParseSeqids)
142 {
143  boost::test_tools::output_test_stream out("data/sample_parse_seqids_acclist.out");
144  const bool kParseSeqids(true);
145  unique_ptr<CMaskWriter> writer(new CMaskWriterTabular(out));
146  writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
147  BOOST_CHECK(out.match_pattern());
148 
149  //writer.reset(new CMaskWriterTabular(cout));
150  //writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
151 }
152 
153 BOOST_AUTO_TEST_CASE(WriteAcclistNoParseSeqids)
154 {
155  boost::test_tools::output_test_stream out("data/sample_acclist.out");
156  const bool kParseSeqids(false);
157  unique_ptr<CMaskWriter> writer(new CMaskWriterTabular(out));
158  writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
159  BOOST_CHECK(out.match_pattern());
160 
161  //writer.reset(new CMaskWriterTabular(cout));
162  //writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
163 }
164 
165 BOOST_AUTO_TEST_CASE(WriteIntervalParseSeqids)
166 {
167  boost::test_tools::output_test_stream out("data/sample_parse_seqids_interval.out");
168  const bool kParseSeqids(true);
169  unique_ptr<CMaskWriter> writer(new CMaskWriterInt(out));
170  writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
171  BOOST_CHECK(out.match_pattern());
172 
173  //writer.reset(new CMaskWriterInt(cout));
174  //writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
175 }
176 
177 BOOST_AUTO_TEST_CASE(WriteIntervalNoParseSeqids)
178 {
179  boost::test_tools::output_test_stream out("data/sample_interval.out");
180  const bool kParseSeqids(false);
181  unique_ptr<CMaskWriter> writer(new CMaskWriterInt(out));
182  writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
183  BOOST_CHECK(out.match_pattern());
184 
185  //writer.reset(new CMaskWriterInt(cout));
186  //writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
187 }
188 
189 static const int kAlgoId(2);
190 static const string kAlgoOptions("window=64; level=20; linker=1");
191 
192 BOOST_AUTO_TEST_CASE(WriteMaskInfoAsn1TextParseSeqids)
193 {
194  boost::test_tools::output_test_stream out("data/sample_parse_seqids_maskinfo_asn1_text.out");
195  const bool kParseSeqids(true);
196  unique_ptr<CMaskWriter> writer(new CMaskWriterBlastDbMaskInfo(out,
197  "maskinfo_asn1_text",
198  kAlgoId,
200  kAlgoOptions));
201  writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
202  BOOST_CHECK(out.match_pattern());
203 }
204 
205 BOOST_AUTO_TEST_CASE(WriteMaskInfoAsn1TextNoParseSeqids)
206 {
207  boost::test_tools::output_test_stream out("data/sample_maskinfo_asn1_text.out");
208  const bool kParseSeqids(false);
209  unique_ptr<CMaskWriter> writer(new CMaskWriterBlastDbMaskInfo(out,
210  "maskinfo_asn1_text",
211  kAlgoId,
213  kAlgoOptions));
214  writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
215  BOOST_CHECK(out.match_pattern());
216 }
217 
218 BOOST_AUTO_TEST_CASE(WriteMaskInfoAsn1BinaryParseSeqids)
219 {
220  const bool kMatchOrSave(true);
221  const bool kTextOrBin(false);
222  boost::test_tools::output_test_stream
223  out("data/sample_parse_seqids_maskinfo_asn1_bin.out",
224  kMatchOrSave, kTextOrBin);
225  const bool kParseSeqids(true);
226  unique_ptr<CMaskWriter> writer(new CMaskWriterBlastDbMaskInfo(out,
227  "maskinfo_asn1_bin",
228  kAlgoId,
230  kAlgoOptions));
231  writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
232  BOOST_CHECK(out.match_pattern());
233 }
234 
235 BOOST_AUTO_TEST_CASE(WriteMaskInfoAsn1BinaryNoParseSeqids)
236 {
237  const bool kMatchOrSave(true);
238  const bool kTextOrBin(false);
239  boost::test_tools::output_test_stream out("data/sample_maskinfo_asn1_bin.out",
240  kMatchOrSave, kTextOrBin);
241  const bool kParseSeqids(false);
242  unique_ptr<CMaskWriter> writer(new CMaskWriterBlastDbMaskInfo(out,
243  "maskinfo_asn1_bin",
244  kAlgoId,
246  kAlgoOptions));
247  writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
248  BOOST_CHECK(out.match_pattern());
249 }
250 
251 BOOST_AUTO_TEST_CASE(WriteMaskInfoXmlParseSeqids)
252 {
253  boost::test_tools::output_test_stream out("data/sample_parse_seqids_maskinfo_xml.out");
254  const bool kParseSeqids(true);
255  unique_ptr<CMaskWriter> writer(new CMaskWriterBlastDbMaskInfo(out,
256  "maskinfo_xml",
257  kAlgoId,
259  kAlgoOptions));
260  writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
261  BOOST_CHECK(out.match_pattern());
262 }
263 
264 BOOST_AUTO_TEST_CASE(WriteMaskInfoXmlNoParseSeqids)
265 {
266  boost::test_tools::output_test_stream out("data/sample_maskinfo_xml.out");
267  const bool kParseSeqids(false);
268  unique_ptr<CMaskWriter> writer(new CMaskWriterBlastDbMaskInfo(out,
269  "maskinfo_xml",
270  kAlgoId,
272  kAlgoOptions));
273  writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
274  BOOST_CHECK(out.match_pattern());
275 }
276 
277 BOOST_AUTO_TEST_CASE(WriteSeqLocAsn1TextParseSeqids)
278 {
279  boost::test_tools::output_test_stream out(
281  "data/sample_prefacc_parse_seqids_seqloc_asn1_text.out" :
282  "data/sample_parse_seqids_seqloc_asn1_text.out");
283  const bool kParseSeqids(true);
284  unique_ptr<CMaskWriter> writer(new CMaskWriterSeqLoc(out, "seqloc_asn1_text"));
285  writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
286  BOOST_CHECK(out.match_pattern());
287 }
288 
289 BOOST_AUTO_TEST_CASE(WriteSeqLocAsn1TextNoParseSeqids)
290 {
291  boost::test_tools::output_test_stream out("data/sample_seqloc_asn1_text.out");
292  const bool kParseSeqids(false);
293  unique_ptr<CMaskWriter> writer(new CMaskWriterSeqLoc(out, "seqloc_asn1_text"));
294  writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
295  BOOST_CHECK(out.match_pattern());
296  BOOST_REQUIRE_EQUAL(GetExpectedNumberOfMasks(), m_Masks.size());
297  BOOST_CHECK_EQUAL(GetMaskStart(), m_Masks[0].first);
298  BOOST_CHECK_EQUAL(GetMaskStop(), m_Masks[0].second);
299 
300  //writer.reset(new CMaskWriterSeqLoc(cout, "seqloc_asn1_text"));
301  //writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
302 }
303 
304 BOOST_AUTO_TEST_CASE(WriteSeqLocAsn1BinaryParseSeqids)
305 {
306  const bool kMatchOrSave(true);
307  const bool kTextOrBin(false);
308  boost::test_tools::output_test_stream
309  out(
311  "data/sample_prefacc_parse_seqids_seqloc_asn1_bin.out" :
312  "data/sample_parse_seqids_seqloc_asn1_bin.out",
313  kMatchOrSave,
314  kTextOrBin);
315  const bool kParseSeqids(true);
316  unique_ptr<CMaskWriter> writer(new CMaskWriterSeqLoc(out, "seqloc_asn1_bin"));
317  writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
318  BOOST_CHECK(out.match_pattern());
319 }
320 
321 BOOST_AUTO_TEST_CASE(WriteSeqLocAsn1BinaryNoParseSeqids)
322 {
323  const bool kMatchOrSave(true);
324  const bool kTextOrBin(false);
325  boost::test_tools::output_test_stream
326  out("data/sample_seqloc_asn1_bin.out", kMatchOrSave, kTextOrBin);
327  const bool kParseSeqids(false);
328  unique_ptr<CMaskWriter> writer(new CMaskWriterSeqLoc(out, "seqloc_asn1_bin"));
329  writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
330  BOOST_CHECK(out.match_pattern());
331 }
332 
333 BOOST_AUTO_TEST_CASE(WriteSeqLocXmlParseSeqids)
334 {
335  boost::test_tools::output_test_stream out(
337  "data/sample_prefacc_parse_seqids_seqloc_xml.out" :
338  "data/sample_parse_seqids_seqloc_xml.out");
339  const bool kParseSeqids(true);
340  unique_ptr<CMaskWriter> writer(new CMaskWriterSeqLoc(out, "seqloc_xml"));
341  writer->Print(m_BioseqHandleWithGi, m_Masks, kParseSeqids);
342  BOOST_CHECK(out.match_pattern());
343 }
344 
345 BOOST_AUTO_TEST_CASE(WriteSeqLocXmlNoParseSeqids)
346 {
347  boost::test_tools::output_test_stream out("data/sample_seqloc_xml.out");
348  const bool kParseSeqids(false);
349  unique_ptr<CMaskWriter> writer(new CMaskWriterSeqLoc(out, "seqloc_xml"));
350  writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
351  BOOST_CHECK(out.match_pattern());
352 
353  //writer.reset(new CMaskWriterSeqLoc(cout, "seqloc_xml"));
354  //writer->Print(m_BioseqHandleWithLocalId, m_Masks, kParseSeqids);
355 }
356 
357 
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
ncbi::TMaskedQueryRegions mask
CBioseq_Handle –.
Base class for reading FASTA sequences.
Definition: fasta.hpp:80
Output filter to print masked sequence locations as Blast-db-mask-info objects.
Output filter to write masked data in fasta format.
Output filter to print masked sequences as sets of intervals.
Output filter to print masked sequence locations as NCBI Seq-loc objects.
Output filter to print masked sequences as sets of intervals one per line.
pair< TSeqPos, TSeqPos > TMaskedInterval
Type representing a masked interval within a sequence.
Definition: mask_writer.hpp:78
vector< TMaskedInterval > TMaskList
A type representing the total of masking information about a sequence.
Definition: mask_writer.hpp:85
CScope –.
Definition: scope.hpp:92
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
static uch flags
std::ofstream out("events_result.xml")
main entry point for tests
Operators to edit gaps in sequences.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)
Read a single effective sequence, which may turn out to be a segmented set.
Definition: fasta.cpp:312
long TFlags
binary OR of EFlags
Definition: fasta.hpp:117
CRef< CSeq_loc > SaveMask(void)
Directs the *following* call to ReadOneSeq to note the locations of lowercase letters.
Definition: fasta.cpp:474
const CSeq_id & GetBestID(void) const
Definition: fasta.hpp:171
@ fNoParseID
Generate an ID (whole defline -> title)
Definition: fasta.hpp:90
@ fUniqueIDs
Forbid duplicate IDs.
Definition: fasta.hpp:101
@ fAssumeNuc
Assume nucs unless accns indicate otherwise.
Definition: fasta.hpp:87
static bool PreferAccessionOverGi(void)
Check if the option to prefer accession.version over GI is enabled (SeqId/PreferAccessionOverGi or SE...
Definition: Seq_id.cpp:3404
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:1012
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
@ eBlast_filter_program_dust
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
std::istream & in(std::istream &in_, double &x_)
The Object manager core.
USING_SCOPE(objects)
static const int kAlgoId(2)
BOOST_AUTO_TEST_CASE(WriteFastaParseSeqids)
static const string kAlgoOptions("window=64; level=20; linker=1")
CBioseq_Handle m_BioseqHandleWithLocalId
CBioseq_Handle m_BioseqHandleWithGi
CMaskWriter::TMaskList m_Masks
CBioseq_Handle x_LoadMaskedBioseq(bool parse_seqids)
size_t GetExpectedNumberOfMasks() const
void x_ConvertMasks(CRef< CSeq_loc > mask)
#define _ASSERT
Utility stuff for more convenient using of Boost.Test library.
Modified on Wed Sep 04 15:03:33 2024 by modify_doxy.py rev. 669887