NCBI C++ ToolKit
genomic_compart_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: genomic_compart_unit_test.cpp 102906 2024-08-05 14:36:21Z mozese2 $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Eyal Mozes
27 *
28 * File Description:
29 * Unit tests for FindCompartments()
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
36 // This header must be included before all Boost.Test headers if there are any
37 #include <corelib/test_boost.hpp>
38 #include <corelib/ncbifile.hpp>
42 
43 #include <common/test_assert.h> /* This header must go last */
44 
47 
52 
54  SCompartmentData(const string &line);
55  SCompartmentData(const CSeq_align_set &compartment)
56  : num_alignments(compartment.Get().size())
57  {
58  CSeq_align align;
59  align.SetSegs().SetDisc(const_cast<CSeq_align_set &>(compartment));
60  query_range = align.GetSeqRange(0);
61  subject_range = align.GetSeqRange(1);
62  }
63 
64  bool operator==(const SCompartmentData &o) const
65  {
66  return num_alignments == o.num_alignments &&
67  query_range == o.query_range &&
69  }
70 
71  bool operator<(const SCompartmentData &o) const
72  {
73  return num_alignments < o.num_alignments ||
75  (query_range < o.query_range ||
76  (query_range == o.query_range &&
78  }
79 };
80 
82 {
83  size_t blank1 = line.find(' '),
84  blank2 = line.rfind(' ');
85  size_t dots1 = line.find(".."),
86  dots2 = line.rfind("..");
87  num_alignments = NStr::StringToUInt(line.substr(0,blank1));
89  line.substr(blank1+1, dots1-blank1-1))-1);
91  line.substr(dots1+2, blank2-dots1-2)));
93  line.substr(blank2+1, dots2-blank2-1))-1);
94  subject_range.SetToOpen(NStr::StringToUInt(line.substr(dots2+2)));
95 }
96 
98 {
99  ostr << d.num_alignments << ' ' << d.query_range.GetFrom()+1 << ".."
100  << d.query_range.GetTo()+1 << ' ' << d.subject_range.GetFrom()+1
101  << ".." << d.subject_range.GetTo()+1;
102  return ostr;
103 }
104 
106 {
107  // Here we make descriptions of command line parameters that we are
108  // going to use.
109 
110  arg_desc->AddKey("expected-results", "ResultsFile",
111  "File containing FindCompartments() results",
113  arg_desc->AddKey("input-dir", "InputDirectory",
114  "Directory containint input alignment sets",
116 }
117 
118 BOOST_AUTO_TEST_CASE(Test_Align_Filter)
119 {
120  const CArgs& args = CNcbiApplication::Instance()->GetArgs();
121 
122  CNcbiIstream& istr = args["expected-results"].AsInputFile();
123  string line;
124  while (NcbiGetlineEOL(istr, line)) {
125  string input_name, intersections;
126  NStr::SplitInTwo(line, " ", input_name, intersections);
127  int opts;
128  if (intersections == "none") {
129  opts = fCompart_Defaults;
130  } else if (intersections == "either") {
132  } else if (intersections == "both") {
134  } else if (intersections == "query") {
136  } else if (intersections == "subject") {
138  } else {
140  "Intersection option not recognized: " +
141  intersections);
142  }
144 
145  cerr << "input=" << input_name
146  << " intersections=" << intersections
147  << endl;
148 
149  set<SCompartmentData> expected_compartments;
150  while (NcbiGetlineEOL(istr, line) && !line.empty()) {
151  expected_compartments.insert(line);
152  }
153  list< CRef<CSeq_align> > alignments;
154  CNcbiIfstream align_istr(
155  CDirEntry::MakePath(args["input-dir"].AsString(),
156  input_name, "asn").c_str());
157  for (;;) {
158  try {
159  CRef<CSeq_align> alignment(new CSeq_align);
160  align_istr >> MSerial_AsnText >> *alignment;
161  alignments.push_back(alignment);
162  }
163  catch (CEofException&) {
164  break;
165  }
166  }
167 
168  list< CRef<CSeq_align_set> > comparts;
169  FindCompartments(alignments, comparts, opts);
170  set<SCompartmentData> actual_compartments;
171  ITERATE (list< CRef<CSeq_align_set> >, it, comparts) {
172  actual_compartments.insert(**it);
173  }
174 
175  ITERATE (set<SCompartmentData>, it, actual_compartments) {
176  cerr << *it << endl;
177  }
178 
179  BOOST_CHECK_EQUAL(expected_compartments.size(),
180  actual_compartments.size());
181  for(set<SCompartmentData>::const_iterator expected_it =
182  expected_compartments.begin(),
183  actual_it = actual_compartments.begin();
184  expected_it != expected_compartments.end();
185  ++expected_it, ++actual_it)
186  {
187  BOOST_CHECK_EQUAL(*expected_it, *actual_it);
188  }
189  }
190 }
CArgs –.
Definition: ncbiargs.hpp:379
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
size_type size() const
Definition: set.hpp:132
const_iterator end() const
Definition: set.hpp:136
parent_type::const_iterator const_iterator
Definition: set.hpp:79
void FindCompartments(const list< CRef< CSeq_align > > &aligns, list< CRef< CSeq_align_set > > &align_sets, TCompartOptions options=fCompart_Defaults, float diff_len_filter=3.0f)
@ fCompart_Defaults
@ fCompart_AllowIntersectionsSubject
@ fCompart_AllowIntersectionsBoth
@ fCompart_AllowIntersections
@ fCompart_AllowIntersectionsQuery
@ fCompart_AllowInconsistentIntersection
USING_SCOPE(objects)
CNcbiOstream & operator<<(CNcbiOstream &ostr, const SCompartmentData &d)
NCBITEST_INIT_CMDLINE(arg_desc)
BOOST_AUTO_TEST_CASE(Test_Align_Filter)
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
Definition: ncbifile.cpp:413
@ eUnknown
Definition: app_popup.hpp:72
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
TThisType & SetToOpen(position_type toOpen)
Definition: range.hpp:175
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3545
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
const TYPE & Get(const CNamedParameterList *param)
const struct ncbi::grid::netcache::search::fields::SIZE size
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
bool operator<(const SCompartmentData &o) const
SCompartmentData(const CSeq_align_set &compartment)
bool operator==(const SCompartmentData &o) const
Utility stuff for more convenient using of Boost.Test library.
Modified on Fri Sep 20 14:58:01 2024 by modify_doxy.py rev. 669887