NCBI C++ ToolKit
unit_test_feature_table_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1  /* $Id: unit_test_feature_table_reader.cpp 99483 2023-04-04 17:43:43Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin, Michael Kornbluh
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 
33 #include <ncbi_pch.hpp>
34 
35 #include <corelib/ncbiapp.hpp>
36 #include <corelib/ncbi_system.hpp>
37 
38 
39 // This header must be included before all Boost.Test headers if there are any
40 #include <corelib/test_boost.hpp>
41 
61 #include <objmgr/util/sequence.hpp>
62 
67 
70 
71 namespace {
72  class CMessageListenerLenientIgnoreProgress:
74  {
75  public:
76  CMessageListenerLenientIgnoreProgress() {};
77  ~CMessageListenerLenientIgnoreProgress() {};
78 
79  bool
80  PutError(
81  const ILineError& err )
82  {
83  if( err.Severity() > eDiag_Info ) {
84  StoreError(err);
85  }
86  return true;
87  };
88  };
89 
90  // convert a string of text ASN.1 into an object of class TObj
91  template<typename TObj>
92  CRef<TObj> s_StrToObject(CTempString asn_text)
93  {
94  CRef<TObj> new_obj(new TObj);
95  CNcbiIstrstream strmAsnText(asn_text.data());
96  BOOST_REQUIRE_NO_THROW(
97  strmAsnText >> MSerial_AsnText >> *new_obj );
98  return new_obj;
99  }
100 
101 }
102 
103 static const char * sc_Table1 = "\
104 >Feature lcl|seq_1\n\
105 1\t336\tgene\n\
106 \t\t\tgene\ta\n\
107 \t\t\tgene_desc\tb\n\
108 1\t336\tCDS\n\
109 \t\t\tproduct c\n\
110 \t\t\tprot_desc\td\n\
111 \t\t\tprotein_id\tlcl|seq_1_1\n\
112 \t\t\tnote\te\n\
113 ";
114 
115 
116 static bool s_IgnoreError(const ILineError& line_error)
117 {
118  if( line_error.Severity() <= eDiag_Info ) {
119  return true;
120  }
121  return false;
122 }
123 
125  const CTempString & str,
126  const CTempString & pattern,
127  vector<size_t> & out_vecOfLinesThatMatch )
128 {
129  out_vecOfLinesThatMatch.clear();
130 
131  vector<CTempString> vecOfLineContents;
132  NStr::Split(str, "\n", vecOfLineContents, 0);
133  ITERATE_0_IDX(ii, vecOfLineContents.size()) {
134  if( NPOS != NStr::Find(vecOfLineContents[ii], pattern)) {
135  out_vecOfLinesThatMatch.push_back(ii + 1); // line nums are 1-based
136  }
137  }
138 }
139 
140 static size_t s_CountOccurrences(const CTempString & str, const CTempString & pattern )
141 {
142  vector<CTempString> vecPiecesBetweenTheWordError;
144  str, pattern, vecPiecesBetweenTheWordError );
145  size_t iNumErrsExpected = ( vecPiecesBetweenTheWordError.size() - 1 );
146  BOOST_REQUIRE(iNumErrsExpected > 0);
147  return iNumErrsExpected;
148 }
149 
150 typedef list<ILineError::EProblem> TErrList;
151 
152 static void
154  ILineErrorListener * pMessageListener,
155  TErrList expected_errors // yes, *copy* the container so we can modify
156  )
157 {
158  for (size_t i = 0; i < pMessageListener->Count(); i++) {
159  const ILineError& line_error = pMessageListener->GetError(i);
160  string error_text = line_error.Message();
161  if( s_IgnoreError(line_error) ) {
162  // certain error types may be ignored
163  } else if( expected_errors.empty() ) {
164  BOOST_ERROR("More errors occurred than expected at " << error_text);
165  } else {
166  BOOST_CHECK_EQUAL(
167  line_error.ProblemStr() + "(" + error_text + ")",
168  ILineError::ProblemStr(expected_errors.front()) +
169  "(" + error_text + ")" );
170  expected_errors.pop_front();
171  }
172  }
173 
174  BOOST_CHECK_MESSAGE( expected_errors.empty(),
175  "There were " << expected_errors.size()
176  << " expected errors which did not occur." );
177 }
178 
180  const char * str,
181  // no errors expected by default
182  const TErrList & expected_errors = TErrList(),
183  CFeature_table_reader::TFlags additional_flags = 0,
184  ILineErrorListener* pMessageListener = nullptr,
185  CSimpleTableFilter* p_tbl_filter = nullptr);
186 
188  const char * str,
189  const TErrList & expected_errors,
190  CFeature_table_reader::TFlags additional_flags,
191  ILineErrorListener * pMessageListener,
192  CSimpleTableFilter *p_tbl_filter)
193 {
194  CNcbiIstrstream istr(str);
195  CRef<ILineReader> reader = ILineReader::New(istr);
196 
197  unique_ptr<CSimpleTableFilter> tbl_filter;
198  if( ! p_tbl_filter ) {
199  tbl_filter.reset(new CSimpleTableFilter(ITableFilter::eAction_Okay));
200  p_tbl_filter = tbl_filter.get();
201  p_tbl_filter->SetActionForFeat(
203  }
204 
205  unique_ptr<ILineErrorListener> p_temp_err_container;
206  if( ! pMessageListener ) {
207  p_temp_err_container.reset( new CMessageListenerLenientIgnoreProgress );
208  pMessageListener = p_temp_err_container.get();
209  }
210 
212  (*reader, // of type ILineReader, which is like istream but line-oriented
213  additional_flags, // flags also available: fKeepBadKey and fTranslateBadKey (to /standard_name=...)
214  pMessageListener, // holds errors found during reading
215  p_tbl_filter // used to make it act certain ways on
216  // certain feats. In particular, in bankit we consider "source" and "REFERENCE" to be disallowed
217  );
218 
219  s_CheckErrorsVersusExpected( pMessageListener, expected_errors );
220 
221  BOOST_REQUIRE(annot);
222  BOOST_REQUIRE(annot->IsFtable());
223 
224  // Make sure that annot can be serialized out
225  CNcbiOstrstream asn_text_str;
226  asn_text_str << MSerial_AsnText << *annot;
227 
228  return annot;
229 }
230 
231 typedef list< CRef<CSeq_annot> > TAnnotRefList;
232 typedef unique_ptr<TAnnotRefList> TAnnotRefListPtr;
233 
234 static TAnnotRefListPtr
236  const char * str,
237  // no errors expected by default
238  const TErrList & expected_errors = TErrList(),
239  CFeature_table_reader::TFlags additional_flags = 0,
240  ILineErrorListener* pMessageListener = nullptr);
241 
242 static TAnnotRefListPtr
244  const char * str,
245  const TErrList & expected_errors,
246  CFeature_table_reader::TFlags additional_flags,
247  ILineErrorListener * pMessageListener )
248 {
249  TAnnotRefListPtr pAnnotRefList( new TAnnotRefList );
250 
251  CNcbiIstrstream istr(str);
252  CRef<ILineReader> reader = ILineReader::New(istr);
253 
255  tbl_filter.SetActionForFeat("source", ITableFilter::eAction_Disallowed );
256 
257  unique_ptr<ILineErrorListener> p_temp_err_container;
258  if( ! pMessageListener ) {
259  p_temp_err_container.reset( new CMessageListenerLenientIgnoreProgress );
260  pMessageListener = p_temp_err_container.get();
261  }
262 
263  CRef<CSeq_annot> annot;
265  (*reader,
266  additional_flags,
267  pMessageListener,
268  &tbl_filter
269  )) != nullptr)
270  {
271  BOOST_REQUIRE(annot->IsFtable());
272  if( annot->GetData().GetFtable().empty() ) {
273  break;
274  }
275  pAnnotRefList->push_back( CRef<CSeq_annot>(annot) );
276  }
277 
278  s_CheckErrorsVersusExpected( pMessageListener, expected_errors );
279 
280  return pAnnotRefList;
281 }
282 
284  const set<string> & expected_quals)
285 {
286  set<string> found_quals;
287  ITERATE (CSeq_feat::TQual, it, feat->GetQual()) {
288  found_quals.insert( (*it)->GetQual() );
289  }
290 
291  // print unexpected qualifiers
292  // (found, but not expected)
293  {{
294  set<string> unexpected_quals;
295  set_difference(found_quals.begin(), found_quals.end(),
296  expected_quals.begin(), expected_quals.end(),
297  inserter(unexpected_quals, unexpected_quals.begin() ) );
298  ITERATE(set<string>, unexpected_qual, unexpected_quals) {
299  BOOST_CHECK_EQUAL("Unexpected qualifier", *unexpected_qual);
300  }
301  }}
302 
303  // print missing qualifiers
304  // (expected, but not found)
305  {{
306  set<string> missing_quals;
307  set_difference(expected_quals.begin(), expected_quals.end(),
308  found_quals.begin(), found_quals.end(),
309  inserter(missing_quals, missing_quals.begin() ) );
310  ITERATE(set<string>, missing_qual, missing_quals) {
311  BOOST_CHECK_EQUAL("Missing qualifier", *missing_qual);
312  }
313  }}
314 }
315 
317 {
318  // Add calls like this for each command-line argument to be used.
319  descrs->AddFlag("v", "Verbose: tests produce extra output");
320 }
321 
322 // convenience functions for accessing the above cmd line opts
323 namespace {
324  bool IsVerbose()
325  {
326  return CNcbiApplication::Instance()->GetArgs()["v"];
327  }
328 
329  void SerializeOutIfVerbose(const string & note, const CSerialObject & obj)
330  {
331  if( ! IsVerbose() ) {
332  return;
333  }
334 
335  cerr << "Verbose output: " << note << ": "
336  << MSerial_AsnText << obj << endl;
337  }
338 }
339 
340 ///
341 /// Test a simple table
342 ///
343 BOOST_AUTO_TEST_CASE(Test_FeatureTableWithGeneAndCodingRegion)
344 {
347 
348  BOOST_REQUIRE(ftable.size() == 2);
349  BOOST_REQUIRE(ftable.front()->IsSetData());
350  BOOST_REQUIRE(ftable.front()->GetData().IsGene());
351  BOOST_REQUIRE(ftable.back()->IsSetData());
352  BOOST_REQUIRE(ftable.back()->GetData().IsCdregion());
353  // must be true AND not throw an exception
354 // NCBITEST_REQUIRE(
355 // ftable.back()->GetProduct().GetWhole().GetLocal().GetStr() == "seq_1_1" );
356 }
357 
358 
359 static const char * sc_Table2 = "\
360 >Feature lcl|seq_1\n\
361 1\t336\tgene\n\
362 \t\t\tgene\ta\n\
363 \t\t\tgene_desc\tb\n\
364 1\t336\tCDS\n\
365 \t\t\tproduct c\n\
366 \t\t\tprot_desc\td\n\
367 \t\t\tprotein_id\tlcl|seq_1_1\n\
368 \t\t\tnote\te\n\
369 >Feature lcl|seq_2\n\
370 1\t336\tgene\n\
371 \t\t\tgene\ta\n\
372 \t\t\tgene_desc\tb\n\
373 1\t336\tCDS\n\
374 \t\t\tproduct c\n\
375 \t\t\tprot_desc\td\n\
376 \t\t\tprotein_id\tlcl|seq_2_1\n\
377 \t\t\tnote\te\n\
378 ";
379 
380 
381 ///
382 /// Test reading multiple tables
383 ///
384 BOOST_AUTO_TEST_CASE(Test_MultipleFeatureTables)
385 {
387  BOOST_REQUIRE_EQUAL( pAnnotRefList->size(), 2 );
388 
389  ITERATE(TAnnotRefList, annot_ref_it, *pAnnotRefList ) {
391  (*annot_ref_it)->GetData().GetFtable();
392  BOOST_REQUIRE_EQUAL( ftable.size(), 2 );
393 
394  BOOST_REQUIRE_EQUAL(ftable.size(), 2);
395  BOOST_REQUIRE(ftable.front()->IsSetData());
396  BOOST_REQUIRE(ftable.front()->GetData().IsGene());
397  BOOST_REQUIRE(ftable.back()->IsSetData());
398  BOOST_REQUIRE(ftable.back()->GetData().IsCdregion());
399  }
400 }
401 
402 
403 static const char * sc_Table3 = "\
404 >Feature gnl|FlyBase|2R|gb|AE013599\n\
405 <8328778\t8328138 gene\n\
406 \t\t\tgene\tCG30334\n\
407 \t\t\tlocus_tag\tDmel_CG30334\n\
408 \t\t\tcyt_map 49A6-49A6\n\
409 \t\t\tgene_syn\tNEST:bs09a11\n\
410 \t\t\tgene_syn\tDmel\\CG30334\n\
411 \t\t\tdb_xref FLYBASE:FBgn0050334\n\
412 8328778\t8328693 mRNA\n\
413 8328642\t8328138\n\
414 \t\t\tgene\tCG30334\n\
415 \t\t\tlocus_tag\tDmel_CG30334\n\
416 \t\t\tproduct CG30334, transcript variant A\n\
417 \t\t\tnote\tCG30334-RA; Dmel\\CG30334-RA\n\
418 \t\t\tprotein_id\tgnl|FlyBase|CG30334-PA|gb|AAM68686\n\
419 \t\t\ttranscript_id\tgnl|FlyBase|CG30334-RA\n\
420 8328633\t8328241 CDS\n\
421 \t\t\tgene\tCG30334\n\
422 \t\t\tlocus_tag\tDmel_CG30334\n\
423 \t\t\ttranscript_id\tgnl|FlyBase|CG30334-RA\n\
424 \t\t\tprot_desc\tCG30334 gene product from transcript CG30334-RA\n\
425 \t\t\tproduct CG30334, isoform A\n\
426 \t\t\tproduct CG30334-PA\n\
427 \t\t\tprotein_id\tgnl|FlyBase|CG30334-PA|gb|AAM68686\n\
428 8328778\t8328693 mRNA\n\
429 8328625\t8328138\n\
430 \t\t\tgene\tCG30334\n\
431 \t\t\tlocus_tag\tDmel_CG30334\n\
432 \t\t\tproduct CG30334, transcript variant B\n\
433 \t\t\tnote\tCG30334-RB; Dmel\\CG30334-RB\n\
434 \t\t\tprotein_id\tgnl|FlyBase|CG30334-PB\n\
435 \t\t\ttranscript_id\tgnl|FlyBase|CG30334-RB\n\
436 8328694\t8328693 CDS\n\
437 8328625\t8328241\n\
438 \t\t\tgene\tCG30334\n\
439 \t\t\tlocus_tag\tDmel_CG30334\n\
440 \t\t\ttranscript_id\tgnl|FlyBase|CG30334-RB\n\
441 \t\t\tprot_desc\tCG30334 gene product from transcript CG30334-RB\n\
442 \t\t\tproduct CG30334, isoform B\n\
443 \t\t\tproduct CG30334-PB\n\
444 \t\t\tprotein_id\tgnl|FlyBase|CG30334-PB\n\
445 \t\t\ttransl_except\t(pos:8328694..8328693,8328625,aa:Met) \n\
446 \t\t\tnote\tnon-AUG (AUC) translation initiation\n\
447 ";
448 
449 
450 ///
451 /// Test a simple table
452 ///
453 BOOST_AUTO_TEST_CASE(Test_FlybaseFeatureTableWithMultiIntervalTranslExcept)
454 {
457  BOOST_CHECK_EQUAL(ftable.size(), 5);
458  CConstRef<CSeq_feat> cds = ftable.back();
459  BOOST_REQUIRE(cds->IsSetData());
460  BOOST_REQUIRE(cds->GetData().IsCdregion());
461  BOOST_CHECK_EQUAL(cds->GetQual().size(), 3);
462  NCBITEST_CHECK( cds->GetXref()[0]->GetData().IsGene() );
463  set<string> expected_quals;
464  expected_quals.insert("transcript_id");
465  expected_quals.insert("transl_except");
466  expected_quals.insert("protein_id");
467  CheckExpectedQuals (cds, expected_quals);
468 
469  // check protein_ids
470  CSeq_annot::TData::TFtable::const_iterator ftable_it = ftable.begin();
471  NCBITEST_CHECK( ! (*ftable_it++)->IsSetProduct() );
472 
473 /*
474  NCBITEST_CHECK_EQUAL( (*ftable_it++)->GetProduct().GetWhole().AsFastaString(), "gb|AAM68686|" );
475  NCBITEST_CHECK_EQUAL( (*ftable_it++)->GetProduct().GetWhole().AsFastaString(), "gb|AAM68686|" );
476  NCBITEST_CHECK_EQUAL( (*ftable_it++)->GetProduct().GetWhole().AsFastaString(), "gnl|FlyBase|CG30334-PB" );
477  NCBITEST_CHECK_EQUAL( (*ftable_it++)->GetProduct().GetWhole().AsFastaString(), "gnl|FlyBase|CG30334-PB" );
478 */
479 }
480 
481 
482 static const char * sc_Table4 = "\
483 >Feature ref|NC_019571.1|\n\
484 1\t1578\tgene\n\
485 \t\t\tgene\tCOX1\n\
486 \t\t\tdb_xref\tGeneID:14048202\n\
487 1\t1578\tCDS\n\
488 \t\t\tproduct\tcytochrome c oxidase subunit I\n\
489 \t\t\ttransl_table\t5\n\
490 \t\t\tprotein_id\tref|YP_007024788.1|\n\
491 1577\t1634\ttRNA\n\
492 \t\t\tproduct\ttRNA-Cys\n\
493 1635\t1694\ttRNA\n\
494 \t\t\tproduct\ttRNA-Met\n\
495 1697\t1751\ttRNA\n\
496 \t\t\tproduct\ttRNA-Asp\n\
497 1759\t1814\ttRNA\n\
498 \t\t\tproduct\ttRNA-Gly\n\
499 1815\t2507\tgene\n\
500 \t\t\tgene\tCOX2\n\
501 \t\t\tdb_xref\tGeneID:14048191\n\
502 1815\t2507\tCDS\n\
503 \t\t\tproduct\tcytochrome c oxidase subunit II\n\
504 \t\t\ttransl_except\t(pos:1815..1817,aa:Met)\n\
505 \t\t\ttransl_table\t5\n\
506 \t\t\tprotein_id\tref|YP_007024789.1|\n\
507 2506\t2561\ttRNA\n\
508 \t\t\tproduct\ttRNA-His\n\
509 2560\t3521\trRNA\n\
510 \t\t\tproduct\t16S ribosomal RNA\n\
511 \t\t\tnote\tl-rRNA\n\
512 3517\t3855\tgene\n\
513 \t\t\tgene\tND3\n\
514 \t\t\tdb_xref\tGeneID:14048192\n\
515 3517\t3855\tCDS\n\
516 \t\t\tproduct\tNADH dehydrogenase subunit 3\n\
517 \t\t\ttransl_table\t5\n\
518 \t\t\tprotein_id\tref|YP_007024790.1|\n\
519 3858\t5459\tgene\n\
520 \t\t\tgene\tND5\n\
521 \t\t\tdb_xref\tGeneID:14048193\n\
522 3858\t5459\tCDS\n\
523 \t\t\tproduct\tNADH dehydrogenase subunit 5\n\
524 \t\t\ttransl_table\t5\n\
525 \t\t\tprotein_id\tref|YP_007024791.1|\n\
526 5843\t5897\ttRNA\n\
527 \t\t\tproduct\ttRNA-Ala\n\
528 6132\t6190\ttRNA\n\
529 \t\t\tproduct\ttRNA-Pro\n\
530 6200\t6253\ttRNA\n\
531 \t\t\tproduct\ttRNA-Val\n\
532 6257\t6682\tgene\n\
533 \t\t\tgene\tND6\n\
534 \t\t\tdb_xref\tGeneID:14048194\n\
535 6257\t6682\tCDS\n\
536 \t\t\tproduct\tNADH dehydrogenase subunit 6\n\
537 \t\t\ttransl_table\t5\n\
538 \t\t\tprotein_id\tref|YP_007024792.1|\n\
539 6744\t6980\tgene\n\
540 \t\t\tgene\tND4L\n\
541 \t\t\tdb_xref\tGeneID:14048195\n\
542 6744\t6980\tCDS\n\
543 \t\t\tproduct\tNADH dehydrogenase subunit 4L\n\
544 \t\t\ttransl_table\t5\n\
545 \t\t\tprotein_id\tref|YP_007024793.1|\n\
546 6986\t7041\ttRNA\n\
547 \t\t\tproduct\ttRNA-Trp\n\
548 7054\t7109\ttRNA\n\
549 \t\t\tproduct\ttRNA-Glu\n\
550 7084\t7780\trRNA\n\
551 \t\t\tproduct\t12S ribosomal RNA\n\
552 \t\t\tnote\ts-rRNA\n\
553 7779\t7834\ttRNA\n\
554 \t\t\tproduct\ttRNA-Ser\n\
555 \t\t\tcodon_recognized\tUCN\n\
556 7842\t7896\ttRNA\n\
557 \t\t\tproduct\ttRNA-Asn\n\
558 7907\t7962\ttRNA\n\
559 \t\t\tproduct\ttRNA-Tyr\n\
560 7962\t8828\tgene\n\
561 \t\t\tgene\tND1\n\
562 \t\t\tdb_xref\tGeneID:14048196\n\
563 7962\t8828\tCDS\n\
564 \t\t\tproduct\tNADH dehydrogenase subunit 1\n\
565 \t\t\ttransl_table\t5\n\
566 \t\t\tprotein_id\tref|YP_007024794.1|\n\
567 8840\t9439\tgene\n\
568 \t\t\tgene\tATP6\n\
569 \t\t\tdb_xref\tGeneID:14048197\n\
570 8840\t9439\tCDS\n\
571 \t\t\tproduct\tATP synthase F0 subunit 6\n\
572 \t\t\ttransl_table\t5\n\
573 \t\t\tprotein_id\tref|YP_007024795.1|\n\
574 9461\t9522\ttRNA\n\
575 \t\t\tproduct\ttRNA-Lys\n\
576 9523\t9578\ttRNA\n\
577 \t\t\tproduct\ttRNA-Leu\n\
578 \t\t\tcodon_recognized\tUUR\n\
579 9577\t9629\ttRNA\n\
580 \t\t\tproduct\ttRNA-Ser\n\
581 \t\t\tcodon_recognized\tAGN\n\
582 9632\t10486\tgene\n\
583 \t\t\tgene\tND2\n\
584 \t\t\tdb_xref\tGeneID:14048198\n\
585 9632\t10486\tCDS\n\
586 \t\t\tproduct\tNADH dehydrogenase subunit 2\n\
587 \t\t\ttransl_table\t5\n\
588 \t\t\tprotein_id\tref|YP_007024796.1|\n\
589 10486\t10541\ttRNA\n\
590 \t\t\tproduct\ttRNA-Ile\n\
591 10536\t10589\ttRNA\n\
592 \t\t\tproduct\ttRNA-Arg\n\
593 10589\t10646\ttRNA\n\
594 \t\t\tproduct\ttRNA-Gln\n\
595 10661\t10714\ttRNA\n\
596 \t\t\tproduct\ttRNA-Phe\n\
597 10706\t11809\tgene\n\
598 \t\t\tgene\tCYTB\n\
599 \t\t\tdb_xref\tGeneID:14048199\n\
600 10706\t11809\tCDS\n\
601 \t\t\tproduct\tcytochrome b\n\
602 \t\t\ttransl_table\t5\n\
603 \t\t\tprotein_id\tref|YP_007024797.1|\n\
604 11809\t11866\ttRNA\n\
605 \t\t\tproduct\ttRNA-Leu\n\
606 \t\t\tcodon_recognized\tCUN\n\
607 11859\t12630\tgene\n\
608 \t\t\tgene\tCOX3\n\
609 \t\t\tdb_xref\tGeneID:14048200\n\
610 11859\t12630\tCDS\n\
611 \t\t\tproduct\tcytochrome c oxidase subunit III\n\
612 \t\t\ttransl_except\t(pos:12630,aa:TERM)\n\
613 \t\t\ttransl_table\t5\n\
614 \t\t\tprotein_id\tref|YP_007024798.1|\n\
615 \t\t\tnote\tTAA stop codon is completed by the addition of 3' A residues to the mRNA\n\
616 12631\t12684\ttRNA\n\
617 \t\t\tproduct\ttRNA-Thr\n\
618 12684\t13913\tgene\n\
619 \t\t\tgene\tND4\n\
620 \t\t\tdb_xref\tGeneID:14048201\n\
621 12684\t13913\tCDS\n\
622 \t\t\tproduct\tNADH dehydrogenase subunit 4\n\
623 \t\t\ttransl_table\t5\n\
624 \t\t\tprotein_id\tref|YP_007024799.1|\n\
625 ";
626 
627 
628 ///
629 /// Test a simple table
630 ///
631 BOOST_AUTO_TEST_CASE(Test_NCTableWithtRNAs)
632 {
635  BOOST_CHECK_EQUAL(ftable.size(), 48);
637  if ((*feat)->GetData().IsRna()) {
638  const CRNA_ref& rna = (*feat)->GetData().GetRna();
639  BOOST_REQUIRE (rna.IsSetExt());
640  if (rna.GetType() == CRNA_ref::eType_tRNA) {
641  BOOST_REQUIRE (rna.GetExt().IsTRNA());
642  BOOST_REQUIRE (rna.GetExt().GetTRNA().IsSetAa());
643  } else if (rna.GetType() == CRNA_ref::eType_rRNA) {
644  BOOST_REQUIRE (rna.GetExt().IsName());
645  BOOST_REQUIRE (!NStr::IsBlank(rna.GetExt().GetName()));
646  }
647  } else if ((*feat)->GetData().IsCdregion()) {
648  BOOST_REQUIRE ((*feat)->IsSetXref());
649  BOOST_REQUIRE ((*feat)->GetXref().front()->IsSetData());
650  BOOST_REQUIRE ((*feat)->GetXref().front()->GetData().IsProt());
651  const CProt_ref& prot = (*feat)->GetXref().front()->GetData().GetProt();
652  BOOST_REQUIRE (prot.IsSetName());
653  BOOST_REQUIRE (prot.GetName().size() == 1);
654  set<string> expected_quals;
655  expected_quals.insert("protein_id");
656  // NCBITEST_CHECK( NStr::StartsWith(
657  // (*feat)->GetProduct().GetWhole().GetOther().GetAccession(), "YP_0070247") );
658  if (NStr::Equal(prot.GetName().front(), "cytochrome c oxidase subunit II")
659  || NStr::Equal(prot.GetName().front(), "cytochrome c oxidase subunit III")) {
660  expected_quals.insert("transl_except");
661  }
662  CheckExpectedQuals (*feat, expected_quals);
663  } else if ((*feat)->GetData().IsGene()) {
664  const CGene_ref& gene = (*feat)->GetData().GetGene();
665  BOOST_REQUIRE (gene.IsSetLocus());
666  BOOST_REQUIRE ((*feat)->IsSetDbxref());
667  BOOST_REQUIRE ((*feat)->GetDbxref().size() == 1);
668  CConstRef<CDbtag> tag = (*feat)->GetDbxref().front();
669  BOOST_REQUIRE (tag->IsSetDb());
670  BOOST_CHECK_EQUAL(tag->GetDb(), "GeneID");
671  BOOST_REQUIRE (tag->GetTag().IsId());
672  }
673  }
674 }
675 
676 
677 static const char * sc_TableTrnaWithCodon = "\
678 >Feature ref|NC_019571.1|\n\
679 1\t10\ttRNA\n\
680 \t\t\tproduct\ttRNA-Asn-GTT\n\
681 12\t20\ttRNA\n\
682 \t\t\tproduct\ttRNA-Met (ATT)\n\
683 ";
684 
685 ///
686 /// Test a simple table
687 ///
688 BOOST_AUTO_TEST_CASE(Test_NCTableWithtRNAsWithCodons)
689 {
692  BOOST_CHECK_EQUAL(ftable.size(), 2);
694  BOOST_REQUIRE((*feat)->GetData().IsRna());
695  const CRNA_ref& rna = (*feat)->GetData().GetRna();
696  BOOST_REQUIRE(rna.IsSetExt());
697  BOOST_CHECK_EQUAL(rna.GetType(), CRNA_ref::eType_tRNA);
698  BOOST_REQUIRE(rna.GetExt().IsTRNA());
699  BOOST_REQUIRE(rna.GetExt().GetTRNA().IsSetAa());
700  BOOST_REQUIRE(!(*feat)->IsSetQual());
701  }
702 }
703 
704 
705 static const char * sc_Table5 = "\
706 >Feature gb|CP003382.1|\n\
707 1982606\t1982707\tgene\n\
708 \t\t\tlocus_tag\tDeipe_1981\n\
709 \t\t\tnote\tIMG reference gene:2509592968\n\
710 1982606\t1982707\tncRNA\n\
711 \t\t\tncRNA_class\tSRP_RNA\n\
712 \t\t\tproduct\tBacterial signal recognition particle RNA\n\
713 ";
714 
715 
716 ///
717 /// Test a simple table
718 ///
719 BOOST_AUTO_TEST_CASE(Test_CPTableWithncRNAs)
720 {
723  BOOST_CHECK_EQUAL(ftable.size(), 2);
724  CConstRef<CSeq_feat> ncrna = ftable.back();
725  BOOST_REQUIRE(ncrna->IsSetData());
726  BOOST_REQUIRE(ncrna->GetData().IsRna());
727  const CRNA_ref& rna = ncrna->GetData().GetRna();
728  BOOST_CHECK_EQUAL(rna.GetType(), CRNA_ref::eType_ncRNA);
729  BOOST_REQUIRE(rna.IsSetExt());
730  BOOST_REQUIRE(rna.GetExt().IsGen());
731  BOOST_REQUIRE(rna.GetExt().GetGen().IsSetProduct());
732  BOOST_REQUIRE(rna.GetExt().GetGen().IsSetClass());
733  BOOST_CHECK_EQUAL(rna.GetExt().GetGen().GetProduct(), "Bacterial signal recognition particle RNA");
734  BOOST_CHECK_EQUAL(rna.GetExt().GetGen().GetClass(), "SRP_RNA");
735 }
736 
737 
738 static const char * sc_Table6 = "\
739 >Feature ref|NC_000008.9|NC_000008\n\
740 <1\t>13208\tgene\n\
741 \t\t\tgene\tFBXO25\n\
742 \t\t\tdb_xref\tGeneID:26260\n\
743 \t\t\tdb_xref\tHGNC:13596\n\
744 \t\t\tdb_xref\tMIM:609098\n\
745 <4920\t5023\tmRNA\n\
746 6465\t6514\n\
747 9194\t9286\n\
748 \t\t\tproduct\tF-box protein 25\n\
749 \t\t\ttranscript_id\tNM_183421.1\n\
750 \t\t\texception\tunclassified transcription discrepancy\n\
751 \t\t\tdb_xref\tGeneID:26260\n\
752 \t\t\tdb_xref\tMIM:609098\n\
753 <4920\t5023\tmRNA\n\
754 6465\t6514\n\
755 9194\t9286\n\
756 \t\t\tproduct\tF-box protein 25\n\
757 \t\t\ttranscript_id\tNM_183420.1\n\
758 \t\t\texception\tunclassified transcription discrepancy\n\
759 \t\t\tdb_xref\tGeneID:26260\n\
760 \t\t\tdb_xref\tMIM:609098\n\
761 <4920\t5023\tmRNA\n\
762 9194\t9286\n\
763 \t\t\tproduct\tF-box protein 25\n\
764 \t\t\ttranscript_id\tNM_012173.3\n\
765 \t\t\texception\tunclassified transcription discrepancy\n\
766 \t\t\tdb_xref\tGeneID:26260\n\
767 \t\t\tdb_xref\tMIM:609098\n\
768 <4920\t5023\tCDS\n\
769 6465\t6514\n\
770 9194\t9286\n\
771 \t\t\tproduct\tF-box only protein 25 isoform 1\n\
772 \t\t\tproduct\tF-box protein Fbx25\n\
773 \t\t\tproduct\tF-box only protein 25\n\
774 \t\t\tprotein_id\tNP_904357.1\n\
775 \t\t\tnote\tisoform 1 is encoded by transcript variant 1\n\
776 \t\t\tGO_function\tubiquitin-protein ligase activity|0004842|10531035|NAS\n\
777 \t\t\tGO_process\tprotein ubiquitination|0016567|10531035|NAS\n\
778 \t\t\tGO_component\tubiquitin ligase complex|0000151|10531035|NAS\n\
779 \t\t\tdb_xref\tCCDS:CCDS5953.1\n\
780 \t\t\tdb_xref\tGeneID:26260\n\
781 <4920\t5023\tCDS\n\
782 6465\t6514\n\
783 9194\t9286\n\
784 \t\t\tproduct\tF-box only protein 25 isoform 2\n\
785 \t\t\tproduct\tF-box protein Fbx25\n\
786 \t\t\tproduct\tF-box only protein 25\n\
787 \t\t\tprotein_id\tNP_904356.1\n\
788 \t\t\tnote\tisoform 2 is encoded by transcript variant 2\n\
789 \t\t\tGO_function\tubiquitin-protein ligase activity|0004842|10531035|NAS\n\
790 \t\t\tGO_process\tprotein ubiquitination|0016567|10531035|NAS\n\
791 \t\t\tGO_component\tubiquitin ligase complex|0000151|10531035|NAS\n\
792 \t\t\tdb_xref\tCCDS:CCDS5952.1\n\
793 \t\t\tdb_xref\tGeneID:26260\n\
794 1\t13208\tvariation\n\
795 150\t150\tvariation\n\
796 \t\t\treplace\tA\n\
797 \t\t\treplace\tG\n\
798 \t\t\tdb_xref\tdbSNP:55727401\n\
799 150\t150\tvariation\n\
800 \t\t\treplace\tA\n\
801 \t\t\treplace\tG\n\
802 \t\t\tdb_xref\tdbSNP:10793768\n\
803 257\t257\tvariation\n\
804 \t\t\treplace\tC\n\
805 \t\t\treplace\tT\n\
806 \t\t\tdb_xref\tdbSNP:12138618\n\
807 266\t266\tvariation\n\
808 \t\t\treplace\tC\n\
809 \t\t\treplace\tT\n\
810 \t\t\tdb_xref\tdbSNP:2427889\n\
811 269\t269\tvariation\n\
812 \t\t\treplace\tA\n\
813 \t\t\treplace\tG\n\
814 \t\t\tdb_xref\tdbSNP:7831204\n\
815 299\t299\tvariation\n\
816 \t\t\treplace\tA\n\
817 \t\t\treplace\tG\n\
818 \t\t\tdb_xref\tdbSNP:62483103\n\
819 302\t302\tvariation\n\
820 \t\t\treplace\tC\n\
821 \t\t\treplace\tG\n\
822 \t\t\tdb_xref\tdbSNP:2427890\n\
823 325\t325\tvariation\n\
824 \t\t\treplace\tA\n\
825 \t\t\treplace\tT\n\
826 \t\t\tdb_xref\tdbSNP:2977629\n\
827 408\t408\tvariation\n\
828 \t\t\treplace\tC\n\
829 \t\t\treplace\tT\n\
830 \t\t\tdb_xref\tdbSNP:62483104\n\
831 414\t414\tvariation\n\
832 \t\t\treplace\tC\n\
833 \t\t\treplace\tT\n\
834 \t\t\tdb_xref\tdbSNP:2905047\n\
835 438\t438\tvariation\n\
836 \t\t\treplace\tA\n\
837 \t\t\treplace\tG\n\
838 \t\t\tdb_xref\tdbSNP:11786745\n\
839 480\t480\tvariation\n\
840 \t\t\treplace\tA\n\
841 \t\t\treplace\tG\n\
842 \t\t\tdb_xref\tdbSNP:3115862\n\
843 496\t496\tvariation\n\
844 \t\t\treplace\tC\n\
845 \t\t\treplace\tT\n\
846 \t\t\tdb_xref\tdbSNP:3094316\n\
847 501\t501\tvariation\n\
848 \t\t\treplace\tG\n\
849 \t\t\treplace\tT\n\
850 \t\t\tdb_xref\tdbSNP:12547344\n\
851 503\t504\tvariation\n\
852 \t\t\treplace\tGAAAATAGGTTTCACATCTTTTTTTTAACTTATATAAAATTGACTGGACTTTCTCTTCTGTGTGTTGTGTTAGATATTTAGGAAGGAAT\n\
853 \t\t\tdb_xref\tdbSNP:71202620\n\
854 504\t504\tvariation\n\
855 \t\t\treplace\tA\n\
856 \t\t\treplace\tG\n\
857 \t\t\tdb_xref\tdbSNP:12550258\n\
858 504\t504\tvariation\n\
859 \t\t\treplace\tA\n\
860 \t\t\treplace\tG\n\
861 \t\t\tdb_xref\tdbSNP:3115863\n\
862 537\t537\tvariation\n\
863 \t\t\treplace\tA\n\
864 \t\t\treplace\tG\n\
865 \t\t\tdb_xref\tdbSNP:73525986\n\
866 561\t561\tvariation\n\
867 \t\t\treplace\tC\n\
868 \t\t\treplace\tG\n\
869 \t\t\tdb_xref\tdbSNP:11996480\n\
870 594\t594\tvariation\n\
871 \t\t\treplace\tC\n\
872 \t\t\treplace\tT\n\
873 \t\t\tdb_xref\tdbSNP:11985199\n\
874 620\t620\tvariation\n\
875 \t\t\treplace\tC\n\
876 \t\t\treplace\tG\n\
877 \t\t\tdb_xref\tdbSNP:11997205\n\
878 637\t637\tvariation\n\
879 \t\t\treplace\tA\n\
880 \t\t\treplace\tG\n\
881 \t\t\tdb_xref\tdbSNP:3115864\n\
882 733\t733\tvariation\n\
883 \t\t\treplace\tC\n\
884 \t\t\treplace\tG\n\
885 \t\t\tdb_xref\tdbSNP:61688116\n\
886 735\t735\tvariation\n\
887 \t\t\treplace\tA\n\
888 \t\t\treplace\tG\n\
889 \t\t\tdb_xref\tdbSNP:57970854\n\
890 786\t786\tvariation\n\
891 \t\t\treplace\tC\n\
892 \t\t\treplace\tT\n\
893 \t\t\tdb_xref\tdbSNP:12184338\n\
894 804\t804\tvariation\n\
895 \t\t\treplace\tA\n\
896 \t\t\treplace\tC\n\
897 \t\t\tdb_xref\tdbSNP:55678681\n\
898 810\t810\tvariation\n\
899 \t\t\treplace\tA\n\
900 \t\t\treplace\tG\n\
901 \t\t\tdb_xref\tdbSNP:12184332\n\
902 849\t849\tvariation\n\
903 \t\t\treplace\tA\n\
904 \t\t\treplace\tG\n\
905 \t\t\tdb_xref\tdbSNP:12550792\n\
906 852\t852\tvariation\n\
907 \t\t\treplace\tC\n\
908 \t\t\treplace\tG\n\
909 \t\t\tdb_xref\tdbSNP:11783529\n\
910 901\t901\tvariation\n\
911 \t\t\treplace\tC\n\
912 \t\t\treplace\tT\n\
913 \t\t\tdb_xref\tdbSNP:58688196\n\
914 901\t901\tvariation\n\
915 \t\t\treplace\tC\n\
916 \t\t\treplace\tT\n\
917 \t\t\tdb_xref\tdbSNP:56426218\n\
918 929\t929\tvariation\n\
919 \t\t\treplace\tG\n\
920 \t\t\treplace\tT\n\
921 \t\t\tdb_xref\tdbSNP:11136669\n\
922 976\t976\tvariation\n\
923 \t\t\treplace\tA\n\
924 \t\t\treplace\tG\n\
925 \t\t\tdb_xref\tdbSNP:55837473\n\
926 989\t989\tvariation\n\
927 \t\t\treplace\tG\n\
928 \t\t\treplace\tT\n\
929 \t\t\tdb_xref\tdbSNP:7834538\n\
930 1037\t1037\tvariation\n\
931 \t\t\treplace\tA\n\
932 \t\t\treplace\tG\n\
933 \t\t\tdb_xref\tdbSNP:7844307\n\
934 1135\t1135\tvariation\n\
935 \t\t\treplace\tA\n\
936 \t\t\treplace\tG\n\
937 \t\t\tdb_xref\tdbSNP:56115318\n\
938 1203\t1203\tvariation\n\
939 \t\t\treplace\tC\n\
940 \t\t\treplace\tT\n\
941 \t\t\tdb_xref\tdbSNP:10089646\n\
942 1226\t1226\tvariation\n\
943 \t\t\treplace\tA\n\
944 \t\t\treplace\tC\n\
945 \t\t\tdb_xref\tdbSNP:7823777\n\
946 1228\t1228\tvariation\n\
947 \t\t\treplace\tC\n\
948 \t\t\treplace\tT\n\
949 \t\t\tdb_xref\tdbSNP:56312035\n\
950 1425\t1425\tvariation\n\
951 \t\t\treplace\tC\n\
952 \t\t\treplace\tG\n\
953 \t\t\tdb_xref\tdbSNP:7813883\n\
954 1511\t1511\tvariation\n\
955 \t\t\treplace\tA\n\
956 \t\t\treplace\tC\n\
957 \t\t\tdb_xref\tdbSNP:73525988\n\
958 1569\t1569\tvariation\n\
959 \t\t\treplace\tA\n\
960 \t\t\treplace\tG\n\
961 \t\t\tdb_xref\tdbSNP:11783748\n\
962 1667\t1667\tvariation\n\
963 \t\t\treplace\tA\n\
964 \t\t\treplace\tC\n\
965 \t\t\tdb_xref\tdbSNP:35389027\n\
966 1721\t1721\tvariation\n\
967 \t\t\treplace\tA\n\
968 \t\t\treplace\tG\n\
969 \t\t\tdb_xref\tdbSNP:4495405\n\
970 1988\t1988\tvariation\n\
971 \t\t\treplace\tA\n\
972 \t\t\treplace\tG\n\
973 \t\t\tdb_xref\tdbSNP:73669377\n\
974 2471\t2471\tvariation\n\
975 \t\t\treplace\tA\n\
976 \t\t\treplace\tG\n\
977 \t\t\tdb_xref\tdbSNP:2954702\n\
978 2512\t2512\tvariation\n\
979 \t\t\treplace\tA\n\
980 \t\t\treplace\tG\n\
981 \t\t\tdb_xref\tdbSNP:7010178\n\
982 2941\t2942\tvariation\n\
983 \t\t\treplace\tC\n\
984 \t\t\treplace\tA\n\
985 \t\t\tdb_xref\tdbSNP:34708162\n\
986 3014\t3014\tvariation\n\
987 \t\t\treplace\tA\n\
988 \t\t\treplace\tG\n\
989 \t\t\tdb_xref\tdbSNP:73669378\n\
990 3233\t3233\tvariation\n\
991 \t\t\treplace\tA\n\
992 \t\t\treplace\tA\n\
993 \t\t\tdb_xref\tdbSNP:34374462\n\
994 3630\t3631\tvariation\n\
995 \t\t\treplace\tG\n\
996 \t\t\treplace\tG\n\
997 \t\t\tdb_xref\tdbSNP:36025637\n\
998 4035\t4035\tvariation\n\
999 \t\t\treplace\tA\n\
1000 \t\t\treplace\tG\n\
1001 \t\t\tdb_xref\tdbSNP:35310547\n\
1002 4349\t4349\tvariation\n\
1003 \t\t\treplace\tC\n\
1004 \t\t\treplace\tG\n\
1005 \t\t\tdb_xref\tdbSNP:17812912\n\
1006 4527\t4527\tvariation\n\
1007 \t\t\treplace\tG\n\
1008 \t\t\treplace\tT\n\
1009 \t\t\tdb_xref\tdbSNP:73669379\n\
1010 4790\t4790\tvariation\n\
1011 \t\t\treplace\tA\n\
1012 \t\t\treplace\tC\n\
1013 \t\t\tdb_xref\tdbSNP:9644342\n\
1014 4845\t4845\tvariation\n\
1015 \t\t\treplace\tC\n\
1016 \t\t\treplace\tT\n\
1017 \t\t\tdb_xref\tdbSNP:73525989\n\
1018 4923\t4923\tvariation\n\
1019 \t\t\treplace\tC\n\
1020 \t\t\treplace\tG\n\
1021 \t\t\tdb_xref\tdbSNP:28438773\n\
1022 4937\t5023\tCDS\n\
1023 9194\t9286\n\
1024 \t\t\tproduct\tF-box only protein 25 isoform 3\n\
1025 \t\t\tproduct\tF-box protein Fbx25\n\
1026 \t\t\tproduct\tF-box only protein 25\n\
1027 \t\t\tprotein_id\tNP_036305.2\n\
1028 \t\t\tnote\tisoform 3 is encoded by transcript variant 3\n\
1029 \t\t\tGO_function\tubiquitin-protein ligase activity|0004842|10531035|NAS\n\
1030 \t\t\tGO_process\tprotein ubiquitination|0016567|10531035|NAS\n\
1031 \t\t\tGO_component\tubiquitin ligase complex|0000151|10531035|NAS\n\
1032 \t\t\tdb_xref\tCCDS:CCDS5954.1\n\
1033 \t\t\tdb_xref\tGeneID:26260\n\
1034 5776\t5776\tvariation\n\
1035 \t\t\treplace\tC\n\
1036 \t\t\treplace\tT\n\
1037 \t\t\tdb_xref\tdbSNP:6981190\n\
1038 5977\t5977\tvariation\n\
1039 \t\t\treplace\tA\n\
1040 \t\t\treplace\tG\n\
1041 \t\t\tdb_xref\tdbSNP:56259539\n\
1042 6016\t6016\tvariation\n\
1043 \t\t\treplace\tA\n\
1044 \t\t\treplace\tG\n\
1045 \t\t\tdb_xref\tdbSNP:61012540\n\
1046 6130\t6130\tvariation\n\
1047 \t\t\treplace\tA\n\
1048 \t\t\treplace\tC\n\
1049 \t\t\tdb_xref\tdbSNP:2722516\n\
1050 6235\t6235\tvariation\n\
1051 \t\t\treplace\tA\n\
1052 \t\t\treplace\tC\n\
1053 \t\t\tdb_xref\tdbSNP:2722517\n\
1054 6290\t6290\tvariation\n\
1055 \t\t\treplace\tA\n\
1056 \t\t\treplace\tG\n\
1057 \t\t\tdb_xref\tdbSNP:9644272\n\
1058 6536\t6536\tvariation\n\
1059 \t\t\treplace\tC\n\
1060 \t\t\treplace\tT\n\
1061 \t\t\tdb_xref\tdbSNP:6998464\n\
1062 6842\t6842\tvariation\n\
1063 \t\t\treplace\tA\n\
1064 \t\t\treplace\tG\n\
1065 \t\t\tdb_xref\tdbSNP:73173380\n\
1066 7314\t7314\tvariation\n\
1067 \t\t\treplace\tA\n\
1068 \t\t\treplace\tG\n\
1069 \t\t\tdb_xref\tdbSNP:2798496\n\
1070 7316\t7316\tvariation\n\
1071 \t\t\treplace\tA\n\
1072 \t\t\treplace\tC\n\
1073 \t\t\tdb_xref\tdbSNP:2488924\n\
1074 7421\t7421\tvariation\n\
1075 \t\t\treplace\tA\n\
1076 \t\t\treplace\tT\n\
1077 \t\t\tdb_xref\tdbSNP:4973692\n\
1078 7424\t7424\tvariation\n\
1079 \t\t\treplace\tA\n\
1080 \t\t\treplace\tC\n\
1081 \t\t\tdb_xref\tdbSNP:4973650\n\
1082 7431\t7431\tvariation\n\
1083 \t\t\treplace\tG\n\
1084 \t\t\treplace\tT\n\
1085 \t\t\tdb_xref\tdbSNP:2722519\n\
1086 7447\t7447\tvariation\n\
1087 \t\t\treplace\tA\n\
1088 \t\t\treplace\tG\n\
1089 \t\t\tdb_xref\tdbSNP:2488925\n\
1090 7493\t7493\tvariation\n\
1091 \t\t\treplace\tC\n\
1092 \t\t\treplace\tT\n\
1093 \t\t\tdb_xref\tdbSNP:12550478\n\
1094 7545\t7545\tvariation\n\
1095 \t\t\treplace\tC\n\
1096 \t\t\treplace\tG\n\
1097 \t\t\tdb_xref\tdbSNP:12680761\n\
1098 7900\t7900\tvariation\n\
1099 \t\t\treplace\tA\n\
1100 \t\t\treplace\tG\n\
1101 \t\t\tdb_xref\tdbSNP:17064969\n\
1102 8491\t8491\tvariation\n\
1103 \t\t\treplace\tG\n\
1104 \t\t\treplace\tT\n\
1105 \t\t\tdb_xref\tdbSNP:71514143\n\
1106 8497\t8497\tvariation\n\
1107 \t\t\treplace\tA\n\
1108 \t\t\treplace\tG\n\
1109 \t\t\tdb_xref\tdbSNP:71514144\n\
1110 8559\t8559\tvariation\n\
1111 \t\t\treplace\tA\n\
1112 \t\t\treplace\tG\n\
1113 \t\t\tdb_xref\tdbSNP:71514145\n\
1114 8615\t8615\tvariation\n\
1115 \t\t\treplace\tA\n\
1116 \t\t\treplace\tG\n\
1117 \t\t\tdb_xref\tdbSNP:71514146\n\
1118 8637\t8637\tvariation\n\
1119 \t\t\treplace\tA\n\
1120 \t\t\treplace\tG\n\
1121 \t\t\tdb_xref\tdbSNP:71514147\n\
1122 8758\t8758\tvariation\n\
1123 \t\t\treplace\tA\n\
1124 \t\t\treplace\tG\n\
1125 \t\t\tdb_xref\tdbSNP:62483130\n\
1126 8785\t8785\tvariation\n\
1127 \t\t\treplace\tA\n\
1128 \t\t\treplace\tG\n\
1129 \t\t\tdb_xref\tdbSNP:71514148\n\
1130 8815\t8815\tvariation\n\
1131 \t\t\treplace\tG\n\
1132 \t\t\treplace\tT\n\
1133 \t\t\tdb_xref\tdbSNP:71514149\n\
1134 8819\t8819\tvariation\n\
1135 \t\t\treplace\tA\n\
1136 \t\t\treplace\tG\n\
1137 \t\t\tdb_xref\tdbSNP:13267767\n\
1138 8864\t8864\tvariation\n\
1139 \t\t\treplace\tC\n\
1140 \t\t\treplace\tT\n\
1141 \t\t\tdb_xref\tdbSNP:71514150\n\
1142 8924\t8924\tvariation\n\
1143 \t\t\treplace\tA\n\
1144 \t\t\treplace\tG\n\
1145 \t\t\tdb_xref\tdbSNP:71514151\n\
1146 9036\t9036\tvariation\n\
1147 \t\t\treplace\tC\n\
1148 \t\t\treplace\tT\n\
1149 \t\t\tdb_xref\tdbSNP:71514152\n\
1150 9067\t9067\tvariation\n\
1151 \t\t\treplace\tA\n\
1152 \t\t\treplace\tG\n\
1153 \t\t\tdb_xref\tdbSNP:71514153\n\
1154 9174\t9174\tvariation\n\
1155 \t\t\treplace\tA\n\
1156 \t\t\treplace\tC\n\
1157 \t\t\tdb_xref\tdbSNP:71219302\n\
1158 9187\t9187\tvariation\n\
1159 \t\t\treplace\tC\n\
1160 \t\t\treplace\tT\n\
1161 \t\t\tdb_xref\tdbSNP:71514154\n\
1162 9191\t9191\tvariation\n\
1163 \t\t\treplace\tC\n\
1164 \t\t\treplace\tT\n\
1165 \t\t\tdb_xref\tdbSNP:56016669\n\
1166 9191\t9191\tvariation\n\
1167 \t\t\treplace\tC\n\
1168 \t\t\treplace\tT\n\
1169 \t\t\tdb_xref\tdbSNP:17665428\n\
1170 9365\t9365\tvariation\n\
1171 \t\t\treplace\tG\n\
1172 \t\t\treplace\tT\n\
1173 \t\t\tdb_xref\tdbSNP:71514155\n\
1174 9405\t9405\tvariation\n\
1175 \t\t\treplace\tC\n\
1176 \t\t\treplace\tT\n\
1177 \t\t\tdb_xref\tdbSNP:71514156\n\
1178 9407\t9407\tvariation\n\
1179 \t\t\treplace\tA\n\
1180 \t\t\treplace\tT\n\
1181 \t\t\tdb_xref\tdbSNP:3965448\n\
1182 9415\t9415\tvariation\n\
1183 \t\t\treplace\tA\n\
1184 \t\t\treplace\tG\n\
1185 \t\t\tdb_xref\tdbSNP:71514157\n\
1186 9415\t9415\tvariation\n\
1187 \t\t\treplace\tA\n\
1188 \t\t\treplace\tG\n\
1189 \t\t\tdb_xref\tdbSNP:3873815\n\
1190 9512\t9512\tvariation\n\
1191 \t\t\treplace\tA\n\
1192 \t\t\treplace\tC\n\
1193 \t\t\tdb_xref\tdbSNP:7838909\n\
1194 10524\t10524\tvariation\n\
1195 \t\t\treplace\tC\n\
1196 \t\t\treplace\tG\n\
1197 \t\t\tdb_xref\tdbSNP:3936437\n\
1198 10667\t10667\tvariation\n\
1199 \t\t\treplace\tA\n\
1200 \t\t\treplace\tT\n\
1201 \t\t\tdb_xref\tdbSNP:3931132\n\
1202 10711\t10711\tvariation\n\
1203 \t\t\treplace\tC\n\
1204 \t\t\treplace\tT\n\
1205 \t\t\tdb_xref\tdbSNP:3936436\n\
1206 10780\t10780\tvariation\n\
1207 \t\t\treplace\tA\n\
1208 \t\t\treplace\tG\n\
1209 \t\t\tdb_xref\tdbSNP:2034353\n\
1210 10783\t10783\tvariation\n\
1211 \t\t\treplace\tA\n\
1212 \t\t\treplace\tG\n\
1213 \t\t\tdb_xref\tdbSNP:3936435\n\
1214 10792\t10792\tvariation\n\
1215 \t\t\treplace\tA\n\
1216 \t\t\treplace\tG\n\
1217 \t\t\tdb_xref\tdbSNP:1992879\n\
1218 11159\t11159\tvariation\n\
1219 \t\t\treplace\tC\n\
1220 \t\t\treplace\tT\n\
1221 \t\t\tdb_xref\tdbSNP:61708689\n\
1222 11319\t11320\tvariation\n\
1223 \t\t\treplace\tG\n\
1224 \t\t\treplace\tTT\n\
1225 \t\t\tdb_xref\tdbSNP:34339077\n\
1226 11332\t11332\tvariation\n\
1227 \t\t\treplace\tG\n\
1228 \t\t\treplace\tT\n\
1229 \t\t\tdb_xref\tdbSNP:11990180\n\
1230 11333\t11334\tvariation\n\
1231 \t\t\treplace\tC\n\
1232 \t\t\treplace\tTT\n\
1233 \t\t\tdb_xref\tdbSNP:56674698\n\
1234 11343\t11343\tvariation\n\
1235 \t\t\treplace\tA\n\
1236 \t\t\treplace\tG\n\
1237 \t\t\tdb_xref\tdbSNP:2335278\n\
1238 11406\t11406\tvariation\n\
1239 \t\t\treplace\tA\n\
1240 \t\t\treplace\tG\n\
1241 \t\t\tdb_xref\tdbSNP:2878381\n\
1242 11443\t11443\tvariation\n\
1243 \t\t\treplace\tC\n\
1244 \t\t\treplace\tT\n\
1245 \t\t\tdb_xref\tdbSNP:2335279\n\
1246 11455\t11455\tvariation\n\
1247 \t\t\treplace\tA\n\
1248 \t\t\treplace\tG\n\
1249 \t\t\tdb_xref\tdbSNP:2335280\n\
1250 11491\t11491\tvariation\n\
1251 \t\t\treplace\tA\n\
1252 \t\t\treplace\tG\n\
1253 \t\t\tdb_xref\tdbSNP:3965449\n\
1254 11501\t11501\tvariation\n\
1255 \t\t\treplace\tG\n\
1256 \t\t\treplace\tT\n\
1257 \t\t\tdb_xref\tdbSNP:3965450\n\
1258 11537\t11537\tvariation\n\
1259 \t\t\treplace\tA\n\
1260 \t\t\treplace\tG\n\
1261 \t\t\tdb_xref\tdbSNP:58291483\n\
1262 11653\t11653\tvariation\n\
1263 \t\t\treplace\tA\n\
1264 \t\t\treplace\tT\n\
1265 \t\t\tdb_xref\tdbSNP:35051996\n\
1266 11765\t11765\tvariation\n\
1267 \t\t\treplace\tC\n\
1268 \t\t\treplace\tG\n\
1269 \t\t\tdb_xref\tdbSNP:9644275\n\
1270 11906\t11906\tvariation\n\
1271 \t\t\treplace\tA\n\
1272 \t\t\treplace\tG\n\
1273 \t\t\tdb_xref\tdbSNP:9644276\n\
1274 12198\t12198\tvariation\n\
1275 \t\t\treplace\tC\n\
1276 \t\t\treplace\tT\n\
1277 \t\t\tdb_xref\tdbSNP:58937369\n\
1278 12492\t12492\tvariation\n\
1279 \t\t\treplace\tC\n\
1280 \t\t\treplace\tT\n\
1281 \t\t\tdb_xref\tdbSNP:73173382\n\
1282 12569\t12569\tvariation\n\
1283 \t\t\treplace\tC\n\
1284 \t\t\treplace\tG\n\
1285 \t\t\tdb_xref\tdbSNP:73173383\n\
1286 12684\t12684\tvariation\n\
1287 \t\t\treplace\tC\n\
1288 \t\t\treplace\tT\n\
1289 \t\t\tdb_xref\tdbSNP:12546248\n\
1290 12744\t12744\tvariation\n\
1291 \t\t\treplace\tG\n\
1292 \t\t\treplace\tT\n\
1293 \t\t\tdb_xref\tdbSNP:59983968\n\
1294 12864\t12864\tvariation\n\
1295 \t\t\treplace\tC\n\
1296 \t\t\treplace\tT\n\
1297 \t\t\tdb_xref\tdbSNP:4045703\n\
1298 12888\t12888\tvariation\n\
1299 \t\t\treplace\tC\n\
1300 \t\t\treplace\tG\n\
1301 \t\t\tdb_xref\tdbSNP:3857917\n\
1302 12982\t12983\tvariation\n\
1303 \t\t\treplace\tA\n\
1304 \t\t\treplace\tG\n\
1305 \t\t\tdb_xref\tdbSNP:35231189\n\
1306 13029\t13029\tvariation\n\
1307 \t\t\treplace\tA\n\
1308 \t\t\treplace\tG\n\
1309 \t\t\tdb_xref\tdbSNP:880926\n\
1310 13041\t13041\tvariation\n\
1311 \t\t\treplace\tA\n\
1312 \t\t\treplace\tG\n\
1313 \t\t\tdb_xref\tdbSNP:2335281\n\
1314 13057\t13058\tvariation\n\
1315 \t\t\treplace\tC\n\
1316 \t\t\treplace\tTTA\n\
1317 \t\t\tdb_xref\tdbSNP:4045704\n\
1318 13088\t13088\tvariation\n\
1319 \t\t\treplace\tC\n\
1320 \t\t\treplace\tG\n\
1321 \t\t\tdb_xref\tdbSNP:2003213\n\
1322 13208\t13208\tvariation\n\
1323 \t\t\treplace\tG\n\
1324 \t\t\treplace\tT\n\
1325 \t\t\tdb_xref\tdbSNP:7833133\n\
1326 \n\
1327 \n\
1328 ";
1329 
1330 
1331 ///
1332 /// Test a simple table
1333 ///
1334 BOOST_AUTO_TEST_CASE(Test_TableWithVariationsAndGoTerms)
1335 {
1337  const CSeq_annot::TData::TFtable& ftable = annot->GetData().GetFtable();
1338  BOOST_CHECK_EQUAL(ftable.size(), 138);
1339 
1340  int num_variations = 0;
1342  if ((*feat)->GetData().IsImp()) {
1343  if (NStr::Equal((*feat)->GetData().GetImp().GetKey(), "variation")) {
1344  num_variations++;
1345  }
1346  } else if ((*feat)->GetData().IsCdregion()) {
1347  BOOST_REQUIRE((*feat)->IsSetExt());
1348  BOOST_CHECK_EQUAL((*feat)->GetExt().GetType().GetStr(), "GeneOntology");
1349  ITERATE(CUser_object::TData, it, (*feat)->GetExt().GetData()) {
1350  BOOST_REQUIRE ((*it)->GetData().IsFields());
1351  BOOST_CHECK_EQUAL((*it)->GetData().GetFields().size(), 1);
1352  ITERATE(CUser_object::TData, it_fields, (*it)->GetData().GetFields()) {
1353  BOOST_REQUIRE ((*it_fields)->GetData().IsFields());
1354  BOOST_CHECK_EQUAL((*it_fields)->GetData().GetFields().size(), 4);
1355  }
1356  }
1357 
1358  // since no FASTA tag, becomes a local ID even though it might be
1359  // parsed as an accession
1360  // NCBITEST_CHECK(
1361  // NStr::StartsWith(
1362  // (*feat)->GetProduct().GetWhole().GetLocal().GetStr(), "NP_") );
1363  }
1364  }
1365  BOOST_CHECK_EQUAL(num_variations, 131);
1366 }
1367 
1368 
1369 static const char * sc_Table7 = "\
1370 >Feature gb|CP003382.1|\n\
1371 1982606\t1982707\tgene\n\
1372 \t\t\tlocus_tag\tDeipe_1981\n\
1373 \t\t\tnote\tIMG reference gene:2509592968\n\
1374 1982606\t1982707\tncRNA\n\
1375 \t\t\tncRNA_class\tSRP_RNA\n\
1376 \t\t\tproduct\tBacterial signal recognition particle RNA\n\
1377 \t\t\tEC_number\t1.2.3.4\n\
1378 \t\t\tPCR_conditions\tabc\n\
1379 1\t20\tmisc_feature\n\
1380 \t\t\tSTS\tabc\n\
1381 ";
1382 
1383 
1384 ///
1385 /// Test a simple table
1386 ///
1387 BOOST_AUTO_TEST_CASE(Test_CapitalizedQualifiers)
1388 {
1390 }
1391 
1392 static const char * sc_Table8 = "\
1393 >Feature lcl|seq1\n\
1394 1\t10\ttRNA\n\
1395 20\t30\n\
1396 \t\t\tanticodon\t(pos:21..23,aa:His)\n\
1397 101\t110\ttRNA\n\
1398 120\t130\n\
1399 \t\t\tanticodon\t(pos:complement(121..123),aa:Pro)\n\
1400 201\t210\ttRNA\n\
1401 220\t230\n\
1402 \t\t\tanticodon\t(pos:join(210,220..221),aa:Ala)\n\
1403 301\t310\ttRNA\n\
1404 320\t330\n\
1405 \t\t\tanticodon\t(pos:complement(join(310,320..321)),aa:Cys)\n\
1406 ";
1407 
1408 BOOST_AUTO_TEST_CASE(Test_tRNAAnticodonQualifiers)
1409 {
1410  // test various conditions for anticodon qualifiers
1411 
1413  const CSeq_annot::TData::TFtable& ftable = annot->GetData().GetFtable();
1414  BOOST_CHECK_EQUAL(ftable.size(), 4);
1415 
1416  // expect no quals
1417  set<string> expected_quals;
1418 
1419  // expected amino acids
1420  int expected_aas[] = {
1421  // implicit char to int casts
1422  'H', 'P', 'A', 'C'
1423  };
1424 
1425  const char *pchExpectedAnticodonLocations[] = {
1426  "Seq-loc ::= int { from 20, to 22, id local str \"seq1\" }",
1427  "Seq-loc ::= int { from 120, to 122, strand minus, id local str \"seq1\" }",
1428  "Seq-loc ::= mix { pnt { point 209, id local str \"seq1\" }, \
1429  int { from 219, to 220, id local str \"seq1\" } }",
1430  "Seq-loc ::= mix { int { from 319, to 320, strand minus, id local str \"seq1\" }, \
1431  pnt { point 309, strand minus, id local str \"seq1\" } }"
1432  };
1433 
1434  size_t pos = 0;
1436  const CRNA_ref & trna_ref = (*feat)->GetData().GetRna();
1437  BOOST_CHECK_EQUAL( trna_ref.GetType(), CRNA_ref::eType_tRNA );
1438  CheckExpectedQuals (*feat, expected_quals);
1439 
1440  const CTrna_ext & trna_ext = trna_ref.GetExt().GetTRNA();
1441 
1442  BOOST_CHECK_EQUAL(trna_ext.GetAa().GetNcbieaa(), expected_aas[pos] );
1443 
1444  CRef<CSeq_loc> pExpectedAnticodonLoc =
1445  s_StrToObject<CSeq_loc>(pchExpectedAnticodonLocations[pos]);
1446 
1447  if( ! trna_ext.GetAnticodon().Equals(*pExpectedAnticodonLoc) ) {
1448  BOOST_ERROR( "Anticodon mismatch: \n"
1449  << "Received: "
1450  << MSerial_AsnText << trna_ext.GetAnticodon() << "\n"
1451  << "\n"
1452  << "Expected: "
1453  << MSerial_AsnText << *pExpectedAnticodonLoc );
1454  }
1455 
1456  ++pos;
1457  }
1458 }
1459 
1460 static const char * sc_Table9 = "\
1461 >Feature lcl|seq1\n\
1462 1\t10\ttRNA\n\
1463 20\t30\n\
1464 \t\t\tanticodon\t(pos:join(10..10,complement(20..21)),aa:Pro)\n\
1465 ";
1466 
1467 BOOST_AUTO_TEST_CASE(Test_ForbidMixedStrandAnticodonQualifier)
1468 {
1469  TErrList expected_errors;
1470  expected_errors.push_back(ILineError::eProblem_QualifierBadValue);
1471 
1473  sc_Table9,
1474  expected_errors);
1475  const CSeq_annot::TData::TFtable& ftable = annot->GetData().GetFtable();
1476  BOOST_CHECK_EQUAL(ftable.size(), 1);
1477 
1478  // expect no quals
1479  set<string> expected_quals;
1480  CheckExpectedQuals (ftable.front(), expected_quals);
1481 
1482  const CRNA_ref & trna_ref = ftable.front()->GetData().GetRna();
1483  BOOST_CHECK_EQUAL( trna_ref.GetType(), CRNA_ref::eType_tRNA );
1484 
1485  const CTrna_ext & trna_ext = trna_ref.GetExt().GetTRNA();
1486  BOOST_CHECK( ! trna_ext.IsSetAa() );
1487  BOOST_CHECK( ! trna_ext.IsSetAnticodon() );
1488 }
1489 
1490 // each CDS has a note on it indicating
1491 // whether or not it should have an
1492 // error if fCDSsMustBeInTheirGenes is set.
1493 static const char * sc_Table10 = "\
1494 >Feature lcl|seq1\n\
1495 1\t100\tgene\n\
1496 \t\t\tgene\tSOME_GENE\n\
1497 50\t200\tgene\n\
1498 \t\t\tgene\tANOTHER_GENE\n\
1499 1\t100\tCDS\n\
1500 \t\t\tgene\tSOME_GENE\n\
1501 \t\t\tnote\tshould be okay\n\
1502 20\t70\tCDS\n\
1503 \t\t\tgene\tSOME_GENE\n\
1504 \t\t\tnote\tshould be okay\n\
1505 70\t150\tCDS\n\
1506 \t\t\tgene\tSOME_GENE\n\
1507 \t\t\tnote\tshould have error\n\
1508 2\t100\tCDS\n\
1509 \t\t\tgene\tANOTHER_GENE\n\
1510 \t\t\tnote\tshould have error\n\
1511 21\t70\tCDS\n\
1512 \t\t\tgene\tANOTHER_GENE\n\
1513 \t\t\tnote\tshould have error\n\
1514 71\t150\tCDS\n\
1515 \t\t\tgene\tANOTHER_GENE\n\
1516 \t\t\tnote\tshould be okay\n\
1517 60\t80\tCDS\n\
1518 \t\t\tgene\tSOME_GENE\n\
1519 \t\t\tnote\tshould be okay\n\
1520 61\t80\tCDS\n\
1521 \t\t\tgene\tANOTHER_GENE\n\
1522 \t\t\tnote\tshould be okay\n\
1523 ";
1524 
1525 BOOST_AUTO_TEST_CASE(TestCDSInGenesCheck)
1526 {
1527  // count how many times the word "error" appears in sc_Table10
1528  // to determine how many errors we expect
1529  vector<size_t> linesWithError;
1530  s_ListLinesWithPattern(sc_Table10, "error", linesWithError);
1531  BOOST_REQUIRE( ! linesWithError.empty() );
1532 
1533  TErrList expected_errors;
1534  fill_n( back_inserter(expected_errors),
1535  linesWithError.size(), ILineError::eProblem_FeatMustBeInXrefdGene);
1536 
1537  CMessageListenerLenientIgnoreProgress err_container;
1538 
1540  sc_Table10,
1541  expected_errors,
1543  &err_container );
1544  const CSeq_annot::TData::TFtable& ftable = annot->GetData().GetFtable();
1545  BOOST_CHECK_EQUAL(ftable.size(),
1546  s_CountOccurrences(sc_Table10, "gene\n") +
1547  s_CountOccurrences(sc_Table10, "CDS\n") );
1548 
1549  BOOST_REQUIRE_EQUAL(err_container.Count(), linesWithError.size());
1550  ITERATE_0_IDX(ii, err_container.Count()) {
1551  const ILineError& line_error = err_container.GetError(ii);
1552  // (The "2" is because the error line is 2 lines down from the CDS's start line)
1553  BOOST_CHECK_EQUAL( line_error.Line(), linesWithError[ii] - 2 );
1554  BOOST_CHECK( ! line_error.OtherLines().empty());
1555  }
1556 }
1557 
1558 static const char * sc_Table11 = "\
1559 >Feature lcl|seq1\n\
1560 1\t100\tgene\n\
1561 \t\t\tgene\tSOME_GENE\n\
1562 20\t70\tCDS\n\
1563 \t\t\tnote\tokay\n\
1564 150\t200\tCDS\n\
1565 \t\t\tnote\tokay\n\
1566 30\t80\tCDS\n\
1567 \t\t\tgene\tSOME_GENE\n\
1568 \t\t\tnote\tokay\n\
1569 75\t400\tCDS\n\
1570 \t\t\tgene\tSOME_GENE\n\
1571 \t\t\tnote\terror_if_checking_bounds\n\
1572 40\t90\tCDS\n\
1573 \t\t\tgene\tCREATED_GENE_1\n\
1574 \t\t\tnote\tokay\n\
1575 80\t300\tCDS\n\
1576 \t\t\tgene\tCREATED_GENE_2\n\
1577 \t\t\tnote\tokay\n\
1578 200\t250\tCDS\n\
1579 \t\t\tgene\tCREATED_GENE_2\n\
1580 \t\t\tnote\tokay\n\
1581 50\t300\tCDS\n\
1582 \t\t\tgene\tCREATED_GENE_2\n\
1583 \t\t\tnote\terror_if_checking_bounds\n\
1584 ";
1585 
1586 BOOST_AUTO_TEST_CASE(TestCreateGenesFromCDSs)
1587 {
1588  set<string> geneNamesExpected;
1589  geneNamesExpected.insert("SOME_GENE");
1590  geneNamesExpected.insert("CREATED_GENE_1");
1591  geneNamesExpected.insert("CREATED_GENE_2");
1592 
1593  vector<string> geneXrefExpectedOnEachCDS; //empty str if no xref
1594  // the code in these braces just sets geneExpectedOnEachCDS
1595  {{
1596  vector<CTempString> cdsSplitPieces;
1597  NStr::SplitByPattern(sc_Table11, "CDS", cdsSplitPieces);
1598 
1599  CTempString kStartOfGene("gene\t");
1600 
1601  ITERATE_0_IDX(ii, cdsSplitPieces.size() ) {
1602  if( 0 == ii ) {
1603  continue; // the first part is not CDS info
1604  }
1605  CTempString sCDSInfo = cdsSplitPieces[ii];
1607 
1608  // extract sGeneLocus (if any) from the
1609  // sCDSInfo
1610  CTempString sGeneLocus;
1611  if( NStr::StartsWith(sCDSInfo, kStartOfGene) ) {
1612  sGeneLocus = sCDSInfo.substr(
1613  kStartOfGene.length());
1614  SIZE_TYPE sz1stEndlinePos = sGeneLocus.find_first_of("\r\n");
1615  if( sz1stEndlinePos != NPOS ) {
1616  sGeneLocus = sGeneLocus.substr(0, sz1stEndlinePos);
1617  }
1618  NStr::TruncateSpacesInPlace(sGeneLocus);
1619  BOOST_CHECK( ! sGeneLocus.empty() );
1620  // make sure no spaces
1621  BOOST_CHECK( sGeneLocus.end() ==
1622  find_if(sGeneLocus.begin(), sGeneLocus.end(), ::isspace ) );
1623  }
1624  geneXrefExpectedOnEachCDS.push_back(sGeneLocus);
1625  }
1626  }}
1627 
1628  ITERATE_BOTH_BOOL_VALUES(bCheckIfCDSInItsGene) {
1629  cout << "Testing with bCheckIfCDSInItsGene = "
1630  << NStr::BoolToString(bCheckIfCDSInItsGene) << endl;
1631 
1632  TErrList expected_errors;
1633  vector<size_t> linesWithError;
1634  CFeature_table_reader::TFlags readfeat_flags =
1636 
1637  if( bCheckIfCDSInItsGene ) {
1638  expected_errors.push_back(
1640  linesWithError.push_back(11);
1641 
1643  }
1644 
1645  if( bCheckIfCDSInItsGene ) {
1646  expected_errors.push_back(
1648  linesWithError.push_back(23);
1649  }
1650 
1651  CMessageListenerLenientIgnoreProgress err_container;
1652 
1654  sc_Table11,
1655  expected_errors,
1656  readfeat_flags,
1657  &err_container );
1658  typedef CSeq_annot::TData::TFtable TFtable;
1659  const TFtable& ftable = annot->GetData().GetFtable();
1660  BOOST_CHECK_EQUAL(ftable.size(),
1661  geneNamesExpected.size() +
1662  s_CountOccurrences(sc_Table11, "CDS\n") );
1663 
1664  BOOST_REQUIRE_EQUAL(err_container.Count(), linesWithError.size());
1665  ITERATE_0_IDX(ii, err_container.Count()) {
1666  const ILineError& line_error = err_container.GetError(ii);
1667  // (The "2" is because the error line is 2 lines down from the CDS's start line)
1668  BOOST_CHECK_EQUAL( line_error.Line(), linesWithError[ii] );
1669  // Other lines expected only for "CDS not in xref'd gene" error
1670  const size_t iNumOtherLinesExpected = (
1671  line_error.Line() != 23 &&
1672  line_error.Problem() ==
1674  BOOST_CHECK_EQUAL( line_error.OtherLines().size(),
1675  iNumOtherLinesExpected );
1676  }
1677 
1678  // check that all genes were created
1679  vector<string> vecOfGenesInResult; // use a vector so we err on dupes
1680  vector<string> vecOfCDSXrefsInResult;
1681  ITERATE(TFtable, feat_it, ftable) {
1682  const CSeq_feat & feat = **feat_it;
1683  if( FIELD_IS_SET_AND_IS(feat, Data, Gene) ) {
1684  BOOST_CHECK_NO_THROW( vecOfGenesInResult.push_back(
1685  feat.GetData().GetGene().GetLocus() ) );
1688  feat.GetData().GetGene(), Locus_tag));
1689  } else if( FIELD_IS_SET_AND_IS(feat, Data, Cdregion) ) {
1690  const CGene_ref * pCDSGeneXref = feat.GetGeneXref();
1691  if( pCDSGeneXref ) {
1692  BOOST_CHECK_NO_THROW( vecOfCDSXrefsInResult.push_back(
1693  pCDSGeneXref->GetLocus() ) );
1694  } else {
1695  vecOfCDSXrefsInResult.push_back(kEmptyStr);
1696  }
1697  }
1698  }
1699  // sort, but don't remove dupes so we can detect them
1700  sort( vecOfGenesInResult.begin(), vecOfGenesInResult.end() );
1701  BOOST_CHECK_EQUAL_COLLECTIONS(
1702  vecOfGenesInResult.begin(), vecOfGenesInResult.end(),
1703  geneNamesExpected.begin(), geneNamesExpected.end() );
1704 
1705  // check that each CDS references the correct gene
1706  // (do NOT sort or unique, because order matters)
1707  BOOST_CHECK_EQUAL_COLLECTIONS(
1708  vecOfCDSXrefsInResult.begin(), vecOfCDSXrefsInResult.end(),
1709  geneXrefExpectedOnEachCDS.begin(),geneXrefExpectedOnEachCDS.end());
1710  }
1711 }
1712 
1713 static const char * sc_Table12 = "\
1714 >Feature lcl|Seq1\n\
1715 1\t20\tgene\n\
1716 \t\t\tgene g0\n\
1717 [offset=7]\n\
1718 1\t20\tgene\n\
1719 31\t41\n\
1720 \t\t\tgene g1\n\
1721 >Feature lcl|Seq2\n\
1722 1\t20\tgene\n\
1723 \t\t\tgene g2\n\
1724 30\t40\tgene\n\
1725 \t\t\tgene g3\n\
1726 [offset=0]\n\
1727 40\t50\tgene\n\
1728 \t\t\tgene g4\n\
1729 [offset=-30]\n\
1730 40\t50\tgene\n\
1731 \t\t\tgene g5\n\
1732 [offset=abc]\n\
1733 55\t45\tgene\n\
1734 \t\t\tgene g6\n\
1735 [nonsense=foo]\n\
1736 55\t65\tgene\n\
1737 \t\t\tgene g7\n\
1738 ";
1739 
1740 BOOST_AUTO_TEST_CASE(TestOffsetCommand)
1741 {
1742  TErrList expected_errors;
1743  fill_n( back_inserter(expected_errors),
1745 
1746  TAnnotRefListPtr pAnnotRefList =
1748  sc_Table12,
1749  expected_errors );
1750  BOOST_REQUIRE_EQUAL(pAnnotRefList->size(), 2);
1751 
1752  // merge ftables to simplify logic below
1753  CSeq_annot::TData::TFtable merged_ftables;
1754  ITERATE( TAnnotRefList, annot_ref_it, *pAnnotRefList ) {
1755  const CSeq_annot::TData::TFtable& an_ftable =
1756  (*annot_ref_it)->GetData().GetFtable();
1757  copy( an_ftable.begin(), an_ftable.end(),
1758  back_inserter(merged_ftables) );
1759  }
1760 
1761  // check that gene offsets are correct
1762  typedef SStaticPair<TSeqPos, TSeqPos> TGeneExtremes;
1763  TGeneExtremes gene_extremes_arr[] = { // 1-based, biological extremes
1764  {1, 20},
1765  {8, 48}, // Note: multi-interval
1766  {1, 20},
1767  {30, 40},
1768  {40, 50},
1769  {10, 20},
1770  {25, 15}, // Note: complement
1771  {25, 35}
1772  };
1773  BOOST_REQUIRE_EQUAL(
1774  ArraySize(gene_extremes_arr), merged_ftables.size() );
1775 
1776  CSeq_annot::TData::TFtable::const_iterator merged_ftables_it =
1777  merged_ftables.begin();
1778  ITERATE_0_IDX(ii, merged_ftables.size() ) {
1779  BOOST_CHECK( FIELD_IS_SET_AND_IS(**merged_ftables_it, Data, Gene) );
1780 
1781  const CSeq_loc & gene_loc = (*merged_ftables_it)->GetLocation();
1782  BOOST_CHECK_EQUAL(
1783  gene_loc.GetStart(eExtreme_Biological),
1784  (gene_extremes_arr[ii].first - 1) );
1785  BOOST_CHECK_EQUAL(
1786  gene_loc.GetStop(eExtreme_Biological),
1787  (gene_extremes_arr[ii].second - 1) );
1788 
1789  ++merged_ftables_it;
1790  }
1791 }
1792 
1793 // note the "END" buried in the string,
1794 // which should be replaced in code that uses sc_Table13
1795 static const char * sc_Table13 = "\
1796 >Feature lcl|Seq1\n\
1797 1\t20\tgene\n\
1798 \t\t\tgene g0\n\
1799 17^\tEND\tvariation\n\
1800 \t\t\treplace\tCCT\n\
1801 22^\t21\tvariation\n\
1802 \t\t\treplace\tTAA\n\
1803 ";
1804 
1805 BOOST_AUTO_TEST_CASE(TestBetweenBaseIntervals)
1806 {
1807  ITERATE_BOTH_BOOL_VALUES(bGoodLoc) {
1808  cerr << "Testing with bGoodLoc = " << NStr::BoolToString(bGoodLoc) << endl;
1809 
1810  const char * pchEndVal = (bGoodLoc ? "18" : "19");
1811 
1812  TErrList errList;
1813  if( ! bGoodLoc ) {
1814  errList.push_back(ILineError::eProblem_BadFeatureInterval);
1815  }
1816 
1817  CRef<CSeq_annot> pSeqAnnot =
1819  NStr::Replace(sc_Table13, "END", pchEndVal).c_str(),
1820  errList );
1822  pSeqAnnot->GetData().GetFtable();
1823  BOOST_REQUIRE(ftable.size() == 3);
1824 
1825  // first feature: a gene
1826  auto ftable_iter = ftable.cbegin();
1827  CRef<CSeq_feat> gene(*ftable_iter);
1828  BOOST_CHECK( FIELD_IS_SET_AND_IS(*gene, Data, Gene) );
1829 
1830  // second feature: a variation on plus strand
1831  CRef<CSeq_feat> variation_plus(*++ftable_iter);
1832  BOOST_CHECK_EQUAL("variation",
1833  variation_plus->GetData().GetImp().GetKey());
1834  if( bGoodLoc ) {
1835  const CSeq_loc & variation_plus_loc = variation_plus->GetLocation();
1836  BOOST_CHECK_EQUAL(16u, variation_plus_loc.GetPnt().GetPoint());
1837  BOOST_CHECK_EQUAL(false, variation_plus_loc.IsReverseStrand());
1838  BOOST_CHECK_EQUAL(CInt_fuzz::eLim_tr,
1839  variation_plus_loc.GetPnt().GetFuzz().GetLim());
1840  }
1841 
1842  // third feature: a variation on minus strand
1843  CRef<CSeq_feat> variation_minus(*++ftable_iter);
1844  BOOST_CHECK_EQUAL("variation",
1845  variation_minus->GetData().GetImp().GetKey());
1846  const CSeq_loc & variation_minus_loc = variation_minus->GetLocation();
1847  BOOST_CHECK_EQUAL(21u, variation_minus_loc.GetPnt().GetPoint());
1848  BOOST_CHECK_EQUAL(true, variation_minus_loc.IsReverseStrand());
1849  BOOST_CHECK_EQUAL(CInt_fuzz::eLim_tl,
1850  variation_minus_loc.GetPnt().GetFuzz().GetLim());
1851 
1852  // there should be no more feats
1853  BOOST_CHECK(ftable.cend() == ++ftable_iter);
1854  }
1855 }
1856 
1857 // For example, this should accept "> Feature abc"
1858 BOOST_AUTO_TEST_CASE(TestSpacesBeforeFeature)
1859 {
1860  ITERATE_0_IDX(num_spaces, 3) {
1861  string sTable =
1862  ">" + string(num_spaces, ' ') + "Feature lcl|Seq1\n"
1863  "1\t20\tgene\n"
1864  "\t\t\tgene g0\n";
1865 
1866  CRef<CSeq_annot> pSeqAnnot =
1868  sTable.c_str(),
1869  TErrList() );
1871  pSeqAnnot->GetData().GetFtable();
1872  BOOST_REQUIRE(ftable.size() == 1);
1873  }
1874 }
1875 
1876 static const char * sc_Table14 = "\
1877 >Feature lcl|Seq1\n\
1878 1\t1008\tCDS\n\
1879 \t\t\tgene THE_GENE_NAME\n\
1880 50\t200\n\
1881 \t\t\tproduct THE_GENE_PRODUCT\n\
1882 \n\
1883 ";
1884 
1885 BOOST_AUTO_TEST_CASE(TestErrorIfRangeAfterQuals)
1886 {
1887  TErrList expected_errors;
1888  expected_errors.push_back(ILineError::eProblem_NoFeatureProvidedOnIntervals );
1889 
1891  sc_Table14,
1892  expected_errors );
1893 }
1894 
1895 static const char * sc_Table15 = "\
1896 >Features_blah_blah lcl|Seq1\n\
1897 1\t1008\tCDS\n\
1898 \t\t\tgene THE_GENE_NAME\n\
1899 \n\
1900 ";
1901 
1902 BOOST_AUTO_TEST_CASE(TestIfHandlesWhenJunkAfterFeature)
1903 {
1905  sc_Table15 );
1906 }
1907 
1908 
1909 static const char * sc_Table16 = "\
1910 >Feature lcl|seq1\n\
1911 <1\t32\trRNA\n\
1912 \t\tProduct\t18S ribosomal RNA\n\
1913 33\t170\tmisc_RNA\n\
1914 \t\tProduct\tinternal transcribed spacer 1\n\
1915 \n\
1916 ";
1917 
1918 BOOST_AUTO_TEST_CASE(TestCaseInsensitivity)
1919 {
1921  SerializeOutIfVerbose("TestCaseInsensitivity initial annot", *annot);
1922 
1923  const CSeq_annot::TData::TFtable& ftable = annot->GetData().GetFtable();
1924 
1925  BOOST_REQUIRE_EQUAL(ftable.size(), 2);
1926  BOOST_REQUIRE(ftable.front()->IsSetData());
1927  BOOST_REQUIRE(ftable.back()->IsSetData());
1928 
1929  // make sure rRNA products are processed right, just in case
1930  const CSeqFeatData & feat_data_0 = ftable.front()->GetData();
1931  BOOST_CHECK_EQUAL(
1932  feat_data_0.GetSubtype(), CSeqFeatData::eSubtype_rRNA);
1933  BOOST_CHECK_EQUAL(
1934  feat_data_0.GetRna().GetExt().GetName(), "18S ribosomal RNA");
1935 
1936  const CSeqFeatData & feat_data_1 = ftable.back()->GetData();
1937  // make sure it's a miscRNA
1938  BOOST_CHECK_EQUAL(
1939  feat_data_1.GetSubtype(),
1940  CSeqFeatData::eSubtype_otherRNA // subtype otherRNA is used for miscRNA
1941  );
1942  // The "Product" in the input table should have become "product" in the
1943  // output
1944  BOOST_CHECK_EQUAL(
1945  ftable.back()->GetNamedQual("Product"), "");
1946  BOOST_CHECK_EQUAL(
1947  ftable.back()->GetNamedQual("product"),
1948  "internal transcribed spacer 1");
1949 }
1950 
1951 
1952 static const char * sc_Table17 = "\
1953 >Feature lcl|seq1\n\
1954 <1\t32\tsnoRNA\n\
1955 \t\t\tnote\tHello, this is a note.\n\
1956 33\t170\tmobile_element\n\
1957 \t\ttransposon\tSomeTransposon\n\
1958 \n\
1959 ";
1960 
1961 
1962 BOOST_AUTO_TEST_CASE(TestDiscouragedKeys)
1963 {
1964  TErrList expected_errors;
1965  expected_errors.push_back(
1967  expected_errors.push_back(
1969 
1971  sc_Table17,
1972  expected_errors,
1974  // &err_container);
1975 }
1976 
1980 };
1983  // This is a special case which a special string translation
1985  // Not a special case; just uses eSubtype_10_signal
1987 };
1988 
1989 
1990 BOOST_AUTO_TEST_CASE(TestRegulatoryFeat)
1991 {
1992  // set what constitutes a valid vs. invalid regulatory_class
1993  // (note that at this time we're not testing the case of a
1994  // mismatch between the subtype and qual value)
1995  const string kInvalidRegulatoryClass("foo");
1996  const string & kValidRegulatoryClass =
1999  BOOST_REQUIRE( ! kValidRegulatoryClass.empty() );
2000 
2001  ITERATE_0_IDX(subtype_case_idx, ArraySize(subtype_cases)) {
2002  ITERATE_BOTH_BOOL_VALUES(bUseValidRegulatoryClass) {
2003  TErrList expected_errors;
2004 
2005  const SRegulatoryFeatSubtypeCases & subtype_case =
2006  subtype_cases[subtype_case_idx];
2007 
2008  const CSeqFeatData::ESubtype subtype = subtype_case.subtype;
2009  string subtype_name =
2011 
2012  cout << "Test case: use subtype: "
2013  << subtype_name
2014  << ", use valid regulatory_class: "
2015  << NStr::BoolToString(bUseValidRegulatoryClass) << endl;
2016 
2017  const bool bUseRegulatorySubtypeItself =
2018  subtype_case.is_regulatory;
2019  if( ! bUseRegulatorySubtypeItself ) {
2020  expected_errors.push_back(
2022  }
2023 
2024  const string & qual_val = (
2025  bUseValidRegulatoryClass ?
2026  kValidRegulatoryClass :
2027  kInvalidRegulatoryClass );
2028  if( ! bUseValidRegulatoryClass ) {
2029  expected_errors.push_back(
2031  }
2032 
2033  // build the feat table
2034  string feat_table;
2035  feat_table += ">Feature lcl|seq1\n";
2036  feat_table += "<1\t32\t" + subtype_name + '\n';
2037  feat_table += "\t\t\tregulatory_class\t" + qual_val + '\n';
2038 
2040  feat_table.c_str(), expected_errors,
2042  // &err_container);
2043 
2044  BOOST_CHECK( annot );
2045  }
2046  }
2047 }
2048 
2049 static const char * sc_Table18 = "\
2050 >Feature lcl|seq1\n\
2051 10\t60\tmat_peptide\n\
2052 \t\t\tnote\tyo\n\
2053 100\t170\tpropeptide\n\
2054 \t\t\tnote\thello\n\
2055 ";
2056 
2057 BOOST_AUTO_TEST_CASE(TestPeptideFeats)
2058 {
2060  SerializeOutIfVerbose("TestCaseInsensitivity initial annot", *annot);
2061 
2062  const CSeq_annot::TData::TFtable& ftable = annot->GetData().GetFtable();
2063 
2064  BOOST_REQUIRE_EQUAL(ftable.size(), 2);
2065  BOOST_REQUIRE(ftable.front()->IsSetData());
2066  BOOST_REQUIRE(ftable.back()->IsSetData());
2067 
2068  const CSeqFeatData & feat_data_0 = ftable.front()->GetData();
2069  BOOST_REQUIRE_EQUAL(feat_data_0.GetProt().GetProcessed(),
2071 
2072  const CSeqFeatData & feat_data_1 = ftable.back()->GetData();
2073  BOOST_REQUIRE_EQUAL(feat_data_1.GetProt().GetProcessed(),
2075 }
2076 
2077 static const char * sc_Table19 = "\
2078 >Feature lcl|seq1\n\
2079 1\t1578\tCDS\n\
2080 \t\t\tproduct\tcytochrome c oxidase subunit I\n\
2081 \t\t\ttransl_table\t1111\n\
2082 ";
2083 
2084 // when transl_table is given a bad value, it should cause an error
2085 BOOST_AUTO_TEST_CASE(TestBadTranslTable)
2086 {
2087  TErrList expected_errors = {
2089 
2091  sc_Table19,
2092  expected_errors);
2093  const CSeq_annot::TData::TFtable& ftable = annot->GetData().GetFtable();
2094  BOOST_CHECK_EQUAL(ftable.size(), 1);
2095 
2096  // expect no quals
2097  set<string> expected_quals;
2098  CheckExpectedQuals (ftable.front(), expected_quals);
2099 }
2100 
2101 // GB-7157
2102 static const char* sc_Table20 = "\
2103 >Feature gb|KY807921|\n\
2104 3200\t3201\tregulatory \n\
2105 \t\t\tnote\tcobalamin riboswitch \n\
2106 \t\t\tbound_moiety\tcobalamin \n\
2107 \t\t\tregulatory_class\triboswitch\n\
2108 ";
2109 
2110 BOOST_AUTO_TEST_CASE(TestRiboswitch)
2111 {
2112  TErrList expected_errors; // There are no expected errors
2113 
2115  sc_Table20,
2116  expected_errors);
2117 }
2118 
2119 static const char * sc_Table21 = "\
2120 >Feature lcl|seq1\n\
2121 <1\t32\trRNA\n\
2122 \t\tProduct\t18S ribosomal RNA\n\
2123 33\t170\tmisc_RNA\n\
2124 \t\tProduct\tinternal transcribed spacer 1\n\
2125 ";
2126 
2127 BOOST_AUTO_TEST_CASE(TestSimpleTableFilter)
2128 {
2129  const TErrList expected_errors {
2131 
2133  // notice non-standard capitalization to make sure filter is case-insens
2135 
2137  sc_Table21, expected_errors, 0, nullptr, &tbl_filter);
2138 
2139  // make sure filter worked by filtering out rRNA but not misc_RNA
2140  const auto & ftable = annot->GetData().GetFtable();
2141  BOOST_CHECK_EQUAL(1, ftable.size());
2142  const auto & feat = *ftable.front();
2143  BOOST_CHECK_EQUAL("misc_RNA", feat.GetData().GetRna().GetExt().GetName());
2144  BOOST_CHECK_EQUAL("internal transcribed spacer 1",
2145  feat.GetNamedQual("product"));
2146 }
2147 
2148 
2149 static const char* sc_Table22 = "\
2150 >Feature MN517919\n\
2151 >3137\t2563\tgene\n\
2152 \t\tgene\trrn23\n\
2153 \t\tstandard_name\trrn23 gene\n\
2154 ";
2155 
2157 {
2158  TErrList expected_errors {
2164  };
2165 
2166  auto pSeqAnnot = s_ReadOneTableFromString(
2167  sc_Table22, expected_errors);
2168 }
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
long TFlags
binary OR of EFlags
Definition: readfeat.hpp:79
@ fReportDiscouragedKey
= 0x40 (Report discouraged keys into the error container)
Definition: readfeat.hpp:73
@ fCreateGenesFromCDSs
= 0x10 (If a CDS has a gene xref, create a gene with the same intervals if one doesn't already exist....
Definition: readfeat.hpp:71
@ fCDSsMustBeInTheirGenes
= 0x20 (If a CDS has a gene xref, it *must* be inside of that gene)
Definition: readfeat.hpp:72
CRef< CSeq_annot > ReadSequinFeatureTable(const TFlags flags=0, ITableFilter *filter=nullptr, const string &seqid_prefix=kEmptyStr)
Definition: readfeat.cpp:3715
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
CRef –.
Definition: ncbiobj.hpp:618
static const string & GetRegulatoryClass(ESubtype subtype)
ESubtype GetSubtype(void) const
static CTempString SubtypeValueToName(ESubtype eSubtype)
Turns a ESubtype into its string value which is NOT necessarily related to the identifier of the enum...
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
const CGene_ref * GetGeneXref(void) const
See related function in util/feature.hpp.
Definition: Seq_feat.cpp:181
Base class for all serializable objects.
Definition: serialbase.hpp:150
Example implementation of ITableFilter with the simplest, most common functionality.
void SetActionForFeat(const string &feature_name, EAction action)
Special EAction for this feat to override the default action.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
virtual size_t Count() const =0
virtual const ILineError & GetError(size_t) const =0
0-based error retrieval.
virtual EDiagSev Severity(void) const
Definition: line_error.hpp:370
@ eProblem_QualifierBadValue
Definition: line_error.hpp:68
@ eProblem_FeatMustBeInXrefdGene
Definition: line_error.hpp:73
@ eProblem_FeatureNameNotAllowed
Definition: line_error.hpp:62
@ eProblem_QualifierWithoutFeature
Definition: line_error.hpp:64
@ eProblem_FeatureBadStartAndOrStop
Definition: line_error.hpp:66
@ eProblem_UnrecognizedSquareBracketCommand
Definition: line_error.hpp:75
@ eProblem_BadFeatureInterval
Definition: line_error.hpp:67
@ eProblem_DiscouragedFeatureName
Definition: line_error.hpp:90
@ eProblem_NoFeatureProvidedOnIntervals
Definition: line_error.hpp:63
@ eProblem_DiscouragedQualifierName
Definition: line_error.hpp:91
virtual std::string ProblemStr(void) const
Definition: line_error.hpp:180
virtual std::string Message(void) const
Definition: line_error.hpp:143
virtual EProblem Problem(void) const =0
virtual unsigned int Line(void) const =0
virtual const TVecOfLines & OtherLines(void) const =0
@ eAction_Okay
Just accept the feat.
@ eAction_Disallowed
Do not accept the feat and give message eProblem_FeatureNameNotAllowed.
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
size_type size() const
Definition: set.hpp:132
const_iterator end() const
Definition: set.hpp:136
static const char * str(char *buf, int n)
Definition: stats.c:84
#define ITERATE_0_IDX(idx, up_to)
idx loops from 0 (inclusive) to up_to (exclusive)
Definition: ncbimisc.hpp:865
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
constexpr size_t ArraySize(const Element(&)[Size])
Definition: ncbimisc.hpp:1532
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define ITERATE_BOTH_BOOL_VALUES(BoolVar)
The body of the loop will be run with Var equal to false and then true.
Definition: ncbimisc.hpp:861
string
Definition: cgiapp.hpp:687
@ eDiag_Info
Informational message.
Definition: ncbidiag.hpp:651
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
Definition: line_reader.cpp:49
bool IsReverseStrand(void) const
Return true if all ranges have reverse strand.
Definition: Seq_loc.hpp:995
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
static list< string > & SplitByPattern(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Variation of Split() with fSplit_ByPattern flag applied by default.
Definition: ncbistr.cpp:3507
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
const_iterator end() const
Return an iterator to the string's ending position (one past the end of the represented sequence)
Definition: tempstr.hpp:306
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static const string BoolToString(bool value)
Convert bool to string.
Definition: ncbistr.cpp:2815
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:313
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
Definition: tempstr.hpp:334
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3314
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
size_type length(void) const
Return the length of the represented array.
Definition: tempstr.hpp:320
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
Definition: tempstr.hpp:776
size_type find_first_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character in the matching string within the current string,...
Definition: tempstr.hpp:538
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
const_iterator begin() const
Return an iterator to the string's starting position.
Definition: tempstr.hpp:299
@ eTrunc_Begin
Truncate leading spaces only.
Definition: ncbistr.hpp:2240
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:642
vector< CRef< CUser_field > > TData
@ eLim_tl
space to left of position
Definition: Int_fuzz_.hpp:214
@ eLim_tr
space to right of position
Definition: Int_fuzz_.hpp:213
TProcessed GetProcessed(void) const
Get the Processed member data.
Definition: Prot_ref_.hpp:538
const TAnticodon & GetAnticodon(void) const
Get the Anticodon member data.
Definition: Trna_ext_.hpp:649
TType GetType(void) const
Get the Type member data.
Definition: RNA_ref_.hpp:529
const TAa & GetAa(void) const
Get the Aa member data.
Definition: Trna_ext_.hpp:603
bool IsSetAa(void) const
Check if a value has been assigned to Aa data member.
Definition: Trna_ext_.hpp:591
bool IsSetAnticodon(void) const
location of anticodon Check if a value has been assigned to Anticodon data member.
Definition: Trna_ext_.hpp:637
TNcbieaa GetNcbieaa(void) const
Get the variant data.
Definition: Trna_ext_.hpp:516
const TName & GetName(void) const
Get the variant data.
Definition: RNA_ref_.hpp:484
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
const TTRNA & GetTRNA(void) const
Get the variant data.
Definition: RNA_ref_.cpp:134
@ eType_ncRNA
non-coding RNA; subsumes snRNA, scRNA, snoRNA
Definition: RNA_ref_.hpp:104
const TKey & GetKey(void) const
Get the Key member data.
Definition: Imp_feat_.hpp:259
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
bool IsCdregion(void) const
Check if variant Cdregion is selected.
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
const TXref & GetXref(void) const
Get the Xref member data.
Definition: Seq_feat_.hpp:1308
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
const TRna & GetRna(void) const
Get the variant data.
bool IsRna(void) const
Check if variant Rna is selected.
const TImp & GetImp(void) const
Get the variant data.
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
Definition: Seq_point_.hpp:420
const TFtable & GetFtable(void) const
Get the variant data.
Definition: Seq_annot_.hpp:621
list< CRef< CSeq_feat > > TFtable
Definition: Seq_annot_.hpp:193
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
int i
constexpr auto sort(_Init &&init)
const char * tag
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
int isspace(Uchar c)
Definition: ncbictype.hpp:69
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
Generic utility macros and templates for exploring NCBI objects.
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
#define RAW_FIELD_IS_EMPTY_OR_UNSET(Var, Fld)
RAW_FIELD_IS_EMPTY_OR_UNSET macro.
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
Utility stuff for more convenient using of Boost.Test library.
#define NCBITEST_CHECK(P)
Definition: test_boost.hpp:616
const SRegulatoryFeatSubtypeCases subtype_cases[]
USING_SCOPE(objects)
static const char * sc_Table22
static const char * sc_TableTrnaWithCodon
static const char * sc_Table5
static const char * sc_Table6
static const char * sc_Table3
static const char * sc_Table18
unique_ptr< TAnnotRefList > TAnnotRefListPtr
static const char * sc_Table21
static const char * sc_Table12
static void s_CheckErrorsVersusExpected(ILineErrorListener *pMessageListener, TErrList expected_errors)
static const char * sc_Table16
static const char * sc_Table4
list< ILineError::EProblem > TErrList
static const char * sc_Table7
static const char * sc_Table2
list< CRef< CSeq_annot > > TAnnotRefList
static const char * sc_Table1
static const char * sc_Table9
static const char * sc_Table14
static const char * sc_Table13
static const char * sc_Table10
static bool s_IgnoreError(const ILineError &line_error)
static void CheckExpectedQuals(CConstRef< CSeq_feat > feat, const set< string > &expected_quals)
static const char * sc_Table8
static CRef< CSeq_annot > s_ReadOneTableFromString(const char *str, const TErrList &expected_errors=TErrList(), CFeature_table_reader::TFlags additional_flags=0, ILineErrorListener *pMessageListener=nullptr, CSimpleTableFilter *p_tbl_filter=nullptr)
static const char * sc_Table11
NCBITEST_INIT_CMDLINE(descrs)
static const char * sc_Table20
static const char * sc_Table15
static TAnnotRefListPtr s_ReadMultipleTablesFromString(const char *str, const TErrList &expected_errors=TErrList(), CFeature_table_reader::TFlags additional_flags=0, ILineErrorListener *pMessageListener=nullptr)
static const char * sc_Table17
BOOST_AUTO_TEST_CASE(Test_FeatureTableWithGeneAndCodingRegion)
Test a simple table.
static const char * sc_Table19
static size_t s_CountOccurrences(const CTempString &str, const CTempString &pattern)
static void s_ListLinesWithPattern(const CTempString &str, const CTempString &pattern, vector< size_t > &out_vecOfLinesThatMatch)
#define ftable
Definition: utilfeat.h:37
Modified on Sun Apr 14 05:26:41 2024 by modify_doxy.py rev. 669887