NCBI C++ ToolKit
collection_scores_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: collection_scores_unit_test.cpp 70778 2016-01-20 14:23:14Z kotliaro $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Alex Kotliarov
27 *
28 * File Description:
29 * Unit tests for the CAlignmentCollectionScore.
30 *
31 *
32 * ===========================================================================
33 */
34 
35 #include <ncbi_pch.hpp>
36 
37 // This macro should be defined before inclusion of test_boost.hpp in all
38 // "*.cpp" files inside executable except one. It is like function main() for
39 // non-Boost.Test executables is defined only in one *.cpp file - other files
40 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
41 // then test_boost.hpp will define such "main()" function for tests.
42 //
43 // Usually if your unit tests contain only one *.cpp file you should not
44 // care about this macro at all.
45 //
46 //#undef NCBI_BOOST_NO_AUTO_TEST_MAIN
47 
48 
49 // This header must be included before all Boost.Test headers if there are any
50 #include <corelib/test_boost.hpp>
55 #include <objmgr/scope.hpp>
57 
58 #include <serial/serial.hpp>
59 #include <serial/objistr.hpp>
60 
61 #include <cmath>
62 #include <sstream>
63 
64 #include <common/test_assert.h> /* This header must go last */
65 
68 
69 static string MakeKey(CScoreValue const&);
70 static string MakeKey(CSeq_align const&, string const&);
71 
73 {
74  // Here we make descriptions of command line parameters that we are
75  // going to use.
76  arg_desc->AddKey("seq-entry",
77  "SeqEntryPath",
78  "Path to sequences.",
81 
82  arg_desc->AddKey("aligns",
83  "InputData",
84  "Concatenated Seq-aligns used to generate gene models",
87 
88  arg_desc->AddFlag("input-binary",
89  "Input data is ASN.1 Binary.");
90 
91 }
92 
93 BOOST_AUTO_TEST_CASE(Test_AlignmentColl)
94 {
95  const CArgs& args = CNcbiApplication::Instance()->GetArgs();
96 
98  CRef<CScope> scope(new CScope(*om));
99  scope->AddDefaults();
100 
101  bool ibinary = args["input-binary"];
102 
103  CRef<CSeq_entry> entry(new CSeq_entry);;
104  args["seq-entry"].AsInputFile() >> MSerial_AsnBinary >> *entry;
105  scope->AddTopLevelSeqEntry(*entry);
106 
107  CNcbiIstream& is = args["aligns"].AsInputFile();
109  list<CRef<CSeq_align> > coll;
110  while ( is ) {
112  try {
113  if ( ibinary ) {
114  is >> MSerial_AsnBinary >> tmp;
115  }
116  else {
117  is >> MSerial_AsnText >> tmp;
118  }
119  }
120  catch (CEofException) {
121  break;
122  }
123  coll.splice(coll.end(), tmp.Set());
124  }
125 
126  aligns->Set() = coll;
127 
128 
129  BOOST_CHECK( 120 == aligns->Get().size());
130 
132 
133  // Retrieve qcovs scores: seq_percent_coverage.
134  vector<CScoreValue> qcovs = score->Get("seq_percent_coverage", *aligns);
135 
136  const double epsilon = 0.0001;
137  const double coverage = 99.8013;
138 
139  BOOST_CHECK( 120 == qcovs.size() );
140  BOOST_CHECK( qcovs.front().GetQueryId().AsString() == "lcl|Sequence_1" );
141  BOOST_CHECK( qcovs.front().GetSubjectId().AsString() == "gi|219857159" );
142  BOOST_CHECK( qcovs.front().GetName() == "seq_percent_coverage" );
143  BOOST_CHECK( abs(qcovs.front().GetValue() - coverage) < epsilon );
144 
145  // Retrieve qcovus scores: uniq_seq_percent_coverage.
146  vector<CScoreValue> qcovus = score->Get("uniq_seq_percent_coverage", *aligns);
147 
148  BOOST_CHECK( 120 == qcovus.size() );
149  BOOST_CHECK( qcovus.front().GetQueryId().AsString() == "lcl|Sequence_1" );
150  BOOST_CHECK( qcovus.front().GetSubjectId().AsString() == "gi|219857159" );
151  BOOST_CHECK( qcovus.front().GetName() == "uniq_seq_percent_coverage" );
152  BOOST_CHECK( abs(qcovus.front().GetValue() - coverage) < epsilon );
153 
154 
155  char const* names[] = {"seq_percent_coverage", "uniq_seq_percent_coverage"};
156  vector<string> score_names(names, names + sizeof(names) / sizeof(char*));
157 
158  // Retrieve scores as a group.
159  vector<CScoreValue> all = score->Get("subjects-sequence-coverage-group", score_names, *aligns);
160  BOOST_CHECK( 240 == all.size() );
161 
162  map<string, double> score_table;
163  for ( vector<CScoreValue>::const_iterator i = qcovs.begin(); i != qcovs.end(); ++i ) {
164  string key = MakeKey(*i);
165  score_table.insert(make_pair(MakeKey(*i), i->GetValue()));
166  }
167 
168  for ( vector<CScoreValue>::const_iterator i = qcovus.begin(); i != qcovus.end(); ++i ) {
169  string key = MakeKey(*i);
170  score_table.insert(make_pair(MakeKey(*i), i->GetValue()));
171  }
172 
173  for ( vector<CScoreValue>::const_iterator i = all.begin(); i != all.end(); ++i ) {
174  string key = MakeKey(*i);
175  BOOST_CHECK ( score_table.count(key) > 0 );
176  BOOST_CHECK ( abs(score_table.at(key) - i->GetValue()) < epsilon );
177  }
178 
179  // Set named scores.
180  score->Set("subjects-sequence-coverage-group", score_names, *aligns);
181 
182  for (list<CRef<CSeq_align> >::const_iterator i = aligns->Get().begin(); i != aligns->Get().end(); ++i ) {
183  {{
184  string key = MakeKey(**i, "seq_percent_coverage");
185 
186  BOOST_CHECK ( score_table.count(key) > 0 );
187  double value = 0.;
188  (*i)->GetNamedScore("seq_percent_coverage", value);
189  BOOST_CHECK ( abs(score_table.at(key) - value) < epsilon );
190  }}
191 
192  {{
193  string key = MakeKey(**i, "uniq_seq_percent_coverage");
194 
195  BOOST_CHECK ( score_table.count(key) > 0 );
196  double value = 0.;
197  (*i)->GetNamedScore("uniq_seq_percent_coverage", value);
198  BOOST_CHECK ( abs(score_table.at(key) - value) < epsilon );
199  }}
200  }
201 }
202 
203 string MakeKey(CScoreValue const& value)
204 {
205  ostringstream oss;
206  oss << value.GetQueryId().AsString()
207  << '\t'
208  << value.GetSubjectId().AsString()
209  << '\t'
210  << value.GetName();
211  return oss.str();
212 }
213 
214 string MakeKey(CSeq_align const& align, string const& score_name)
215 {
216  ostringstream oss;
217  oss << align.GetSeq_id(0).AsFastaString()
218  << '\t'
219  << align.GetSeq_id(1).AsFastaString()
220  << '\t'
221  << score_name;
222  return oss.str();
223 }
224 
CArgs –.
Definition: ncbiargs.hpp:379
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CScope –.
Definition: scope.hpp:92
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
Definition: Seq_entry.hpp:56
static CRef< IAlignmentCollectionScore > GetInstance()
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
USING_SCOPE(objects)
NCBITEST_INIT_CMDLINE(arg_desc)
BOOST_AUTO_TEST_CASE(Test_AlignmentColl)
static string MakeKey(CScoreValue const &)
static const struct name_t names[]
static char tmp[3200]
Definition: utf8.c:42
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
@ fBinary
Open as binary file; for eInputFile, eOutputFile, eIOFile.
Definition: ncbiargs.hpp:620
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
const float epsilon
Definition: math.hpp:61
#define MSerial_AsnBinary
Definition: serialbase.hpp:697
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
Tdata & Set(void)
Assign a value to data member.
const Tdata & Get(void) const
Get the member data.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
int i
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
#define abs(a)
Definition: ncbi_heapmgr.c:130
The Object manager core.
CRef< objects::CObjectManager > om
Utility stuff for more convenient using of Boost.Test library.
Modified on Tue Apr 30 06:40:44 2024 by modify_doxy.py rev. 669887