NCBI C++ ToolKit
unit_test_gene_model.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_gene_model.cpp 98497 2022-11-28 15:24:06Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Mike DiCuccio
27  *
28  * File Description:
29  *
30  * ===========================================================================
31  */
32 
33 #include <ncbi_pch.hpp>
34 
35 #include <corelib/ncbiapp.hpp>
36 #include <corelib/ncbiargs.hpp>
37 #include <corelib/ncbienv.hpp>
38 #include <corelib/test_boost.hpp>
39 
44 #include <objmgr/scope.hpp>
45 
46 #include <serial/serial.hpp>
47 #include <serial/objistr.hpp>
48 #include <serial/objostr.hpp>
49 
54 
56 
57 #if BOOST_VERSION >= 105900
58 # include <boost/test/tools/output_test_stream.hpp>
59 #else
60 # include <boost/test/output_test_stream.hpp>
61 #endif
62 using boost::test_tools::output_test_stream;
63 
66 
68 {
69  // Here we make descriptions of command line parameters that we are
70  // going to use.
71 
72  arg_desc->AddKey("data-in", "InputData",
73  "Concatenated Seq-aligns used to generate gene models",
75 
76  arg_desc->AddKey("data-expected", "InputData",
77  "Expected Seq-annots produced from input alignments",
79  arg_desc->AddKey("seqdata-expected", "InputData",
80  "Expected bioseqs produced from input alignments",
82  arg_desc->AddKey("combined-data-expected", "InputData",
83  "Expected single seq-annot produced from all input alignments",
85  arg_desc->AddKey("combined-with-omission-expected", "InputData",
86  "Expected single seq-annot produced from all input alignments omitting the first RNA feature",
88 
89  arg_desc->AddOptionalKey("seqdata-in", "InputData",
90  "FASTA of test sequences",
92 
93  arg_desc->AddOptionalKey("data-out", "OutputData",
94  "Seq-annots produced from input alignments",
96  arg_desc->AddOptionalKey("seqdata-out", "OutputData",
97  "Bioseqss produced from input alignments",
99  arg_desc->AddOptionalKey("combined-data-out", "OutputData",
100  "Single seq-annot produced from all input alignments",
102  arg_desc->AddOptionalKey("combined-with-omission-out", "OutputData",
103  "Single seq-annot produced from all input alignments omitting the first RNA feature",
105 }
106 
107 /// Function to compare Cref<Cseq_feat>s by their referents
108 static bool s_CompareFeatRefs(const CRef<CSeq_feat>& ref1,
109  const CRef<CSeq_feat>& ref2)
110 {
111  return *ref1 < *ref2;
112 }
113 
116  const string& compared_features)
117 {
118  CSeq_annot::TData::TFtable::const_iterator actual_iter =
119  actual.begin();
120 
121  CSeq_annot::TData::TFtable::const_iterator expected_iter =
122  expected.begin();
123 
124  for ( ; actual_iter != actual.end() && expected_iter != expected.end();
125  ++actual_iter, ++expected_iter) {
126 
127  bool display = false;
128  const CSeq_feat& f1 = **actual_iter;
129  const CSeq_feat& f2 = **expected_iter;
130  BOOST_CHECK_MESSAGE(f1.GetData().GetSubtype() == f2.GetData().GetSubtype(),
131  compared_features << ": f1.GetData().GetSubtype() == f2.GetData().GetSubtype() failed ["
132  << f1.GetData().GetSubtype() << " != " << f2.GetData().GetSubtype() << "]");
133  BOOST_CHECK_MESSAGE(f1.GetLocation().Equals(f2.GetLocation()),
134  compared_features << ": f1.GetLocation().Equals(f2.GetLocation() failed");
135  if ( !f1.GetLocation().Equals(f2.GetLocation()) ) {
136  display = true;
137  }
138 
139  BOOST_CHECK_MESSAGE((f1.IsSetPartial() && f1.GetPartial()) ==
140  (f2.IsSetPartial() && f2.GetPartial()),
141  compared_features << ": (f1.IsSetPartial() && f1.GetPartial()) == (f2.IsSetPartial() && f2.GetPartial()) failed");
142  if ( (f1.IsSetPartial() && f1.GetPartial()) !=
143  (f2.IsSetPartial() && f2.GetPartial()) ) {
144  display = true;
145  }
146 
147  BOOST_CHECK_EQUAL(f1.IsSetPseudo() && f1.GetPseudo(),
148  f2.IsSetPseudo() && f2.GetPseudo());
149  if ( (f1.IsSetPseudo() && f1.GetPseudo()) !=
150  (f2.IsSetPseudo() && f2.GetPseudo()) ) {
151  display = true;
152  }
153 
154  BOOST_CHECK_EQUAL(f1.IsSetDbxref(), f2.IsSetDbxref());
155  if ( f1.IsSetDbxref() != f2.IsSetDbxref() ) {
156  display = true;
157  }
158 
159  BOOST_CHECK_EQUAL(f1.IsSetProduct(), f2.IsSetProduct());
160  if (f1.IsSetProduct() && f2.IsSetProduct()) {
161  BOOST_CHECK(f1.GetProduct().Equals(f2.GetProduct()));
162  }
163 
164  BOOST_CHECK_EQUAL(f1.IsSetPseudo(), f2.IsSetPseudo());
165  if ( f1.IsSetPseudo() != f2.IsSetPseudo() ) {
166  display = true;
167  }
168 
169  if (f1.GetData().IsCdregion() &&
170  f2.GetData().IsCdregion()) {
171  BOOST_CHECK(f1.GetData().GetCdregion().IsSetCode_break() ==
173  if (f1.GetData().GetCdregion().IsSetCode_break() !=
175  display = true;
176  } else if (f1.GetData().GetCdregion().IsSetCode_break()) {
177  CNcbiOstrstream stream1;
179  stream1 << MSerial_AsnText << **cb;
180  string code_break1 = CNcbiOstrstreamToString(stream1);
181 
182  CNcbiOstrstream stream2;
184  stream2 << MSerial_AsnText << **cb;
185  string code_break2 = CNcbiOstrstreamToString(stream2);
186 
187  BOOST_CHECK_EQUAL(code_break1, code_break2);
188  }
189  }
190 
191  if(f1.GetData().IsGene() && f2.GetData().IsGene()){
192  BOOST_CHECK(f1.GetData().GetGene().IsSetLocus() ==
193  f2.GetData().GetGene().IsSetLocus());
194  if (f1.GetData().GetGene().IsSetLocus() !=
195  f2.GetData().GetGene().IsSetLocus()) {
196  display = true;
197  }
198  if(f1.GetData().GetGene().IsSetLocus() &&
199  f2.GetData().GetGene().IsSetLocus()){
200  BOOST_CHECK(f1.GetData().GetGene().GetLocus() ==
201  f2.GetData().GetGene().GetLocus());
202  if (f1.GetData().GetGene().GetLocus() !=
203  f2.GetData().GetGene().GetLocus()) {
204  display = true;
205  }
206  }
207  BOOST_CHECK(f1.GetData().GetGene().IsSetDesc() ==
208  f2.GetData().GetGene().IsSetDesc());
209  if (f1.GetData().GetGene().IsSetDesc() !=
210  f2.GetData().GetGene().IsSetDesc()) {
211  display = true;
212  }
213  if(f1.GetData().GetGene().IsSetDesc() &&
214  f2.GetData().GetGene().IsSetDesc()){
215  BOOST_CHECK(f1.GetData().GetGene().GetDesc() ==
216  f2.GetData().GetGene().GetDesc());
217  if (f1.GetData().GetGene().GetDesc() !=
218  f2.GetData().GetGene().GetDesc()) {
219  display = true;
220  }
221  }
222  BOOST_CHECK(f1.GetData().GetGene().IsSetSyn() ==
223  f2.GetData().GetGene().IsSetSyn());
224  if (f1.GetData().GetGene().IsSetSyn() !=
225  f2.GetData().GetGene().IsSetSyn()) {
226  display = true;
227  }
228  if(f1.GetData().GetGene().IsSetSyn() &&
229  f2.GetData().GetGene().IsSetSyn()){
230  BOOST_CHECK(f1.GetData().GetGene().GetSyn() ==
231  f2.GetData().GetGene().GetSyn());
232  if (f1.GetData().GetGene().GetSyn() !=
233  f2.GetData().GetGene().GetSyn()) {
234  display = true;
235  }
236  }
237  }
238 
239  bool f1_except = f1.IsSetExcept() && f1.GetExcept();
240  bool f2_except = f2.IsSetExcept() && f2.GetExcept();
241 
242  BOOST_CHECK_EQUAL(f1_except, f2_except);
243 
244  string f1_except_text =
246  string f2_except_text =
248  BOOST_CHECK_EQUAL(f1_except_text, f2_except_text);
249 
250  BOOST_CHECK(f1.IsSetComment() == f2.IsSetComment());
251  if (f1.IsSetComment() != f2.IsSetComment()) {
252  display = true;
253  }
254  if(f1.IsSetComment() && f2.IsSetComment()){
255  BOOST_CHECK(f1.GetComment() == f2.GetComment());
256  if (f1.GetComment() != f2.GetComment()) {
257  display = true;
258  }
259  }
260 
261  if (display) {
262  cerr << "expected: " << MSerial_AsnText << f2;
263  cerr << "got: " << MSerial_AsnText << f1;
264  }
265  }
266 }
267 
268 void AddFastaToScope(const string& fasta_file, CScope& scope)
269 {
270  CFastaReader fastareader(fasta_file, CFastaReader::fParseRawID);
271  do {
272  CRef<CSeq_entry> seq_entry = fastareader.ReadOneSeq();
273  scope.AddTopLevelSeqEntry(*seq_entry);
274  } while(!fastareader.AtEOF());
275 }
276 
277 BOOST_AUTO_TEST_CASE(TestUsingArg)
278 {
281  CScope scope(*om);
282  scope.AddDefaults();
283  validator::CValidator validator(*om);
284 
285  const CArgs& args = CNcbiApplication::Instance()->GetArgs();
286  CNcbiIstream& align_istr = args["data-in"].AsInputFile();
287  CNcbiIstream& annot_istr = args["data-expected"].AsInputFile();
288  CNcbiIstream& combined_annot_istr = args["combined-data-expected"].AsInputFile();
289  CNcbiIstream& combined_annot_with_omission_istr = args["combined-with-omission-expected"].AsInputFile();
290 
291  unique_ptr<CObjectIStream> align_is(CObjectIStream::Open(eSerial_AsnText,
292  align_istr));
293  unique_ptr<CObjectIStream> annot_is(CObjectIStream::Open(eSerial_AsnText,
294  annot_istr));
295  unique_ptr<CObjectIStream> combined_annot_is(CObjectIStream::Open(eSerial_AsnText,
296  combined_annot_istr));
297  unique_ptr<CObjectIStream> combined_annot_with_omission_is(
299  combined_annot_with_omission_istr));
300  unique_ptr<CObjectOStream> annot_os;
301  if (args["data-out"]) {
302  CNcbiOstream& annot_ostr = args["data-out"].AsOutputFile();
303  annot_os.reset(CObjectOStream::Open(eSerial_AsnText,
304  annot_ostr));
305  }
306  unique_ptr<CObjectOStream> combined_annot_os;
307  if (args["combined-data-out"]) {
308  CNcbiOstream& combined_annot_ostr = args["combined-data-out"].AsOutputFile();
309  combined_annot_os.reset(CObjectOStream::Open(eSerial_AsnText,
310  combined_annot_ostr));
311  }
312  unique_ptr<CObjectOStream> combined_annot_with_omission_os;
313  if (args["combined-with-omission-out"]) {
314  CNcbiOstream& combined_annot_with_omission_ostr = args["combined-with-omission-out"].AsOutputFile();
315  combined_annot_with_omission_os.reset(CObjectOStream::Open(eSerial_AsnText,
316  combined_annot_with_omission_ostr));
317  }
318  output_test_stream seqdata_test_stream( args["seqdata-expected"].AsString(), true );
319  unique_ptr<CObjectOStream> seqdata_test_os(CObjectOStream::Open(eSerial_AsnText,
320  seqdata_test_stream));
321  unique_ptr<CObjectOStream> seqdata_os;
322  if (args["seqdata-out"]) {
323  CNcbiOstream& seqdata_ostr = args["seqdata-out"].AsOutputFile();
324  seqdata_os.reset(CObjectOStream::Open(eSerial_AsnText,
325  seqdata_ostr));
326  }
327 
328  if (args["seqdata-in"]) {
329  AddFastaToScope(args["seqdata-in"].AsString(), scope);
330  }
331 
332  CSeq_annot actual_combined_annot;
333  CSeq_annot::C_Data::TFtable &actual_combined_features =
334  actual_combined_annot.SetData().SetFtable();
335  CSeq_annot expected_combined_annot;
336  *combined_annot_is >> expected_combined_annot;
337  CSeq_annot::C_Data::TFtable &expected_combined_features =
338  expected_combined_annot.SetData().SetFtable();
339  CSeq_annot actual_combined_annot_with_omission;
340  CSeq_annot::C_Data::TFtable &actual_combined_features_with_omission =
341  actual_combined_annot_with_omission.SetData().SetFtable();
342  CSeq_annot expected_combined_annot_with_omission;
343  *combined_annot_with_omission_is >> expected_combined_annot_with_omission;
344  CSeq_annot::C_Data::TFtable &expected_combined_features_with_omission =
345  expected_combined_annot_with_omission.SetData().SetFtable();
346 
347  set< CSeq_id_Handle > unique_gene_ids;
348 
349  /// combined_aligns will contain all alignments read fro this specific gene
350  list< CRef<CSeq_align> > combined_aligns;
351  CSeq_id gene_for_combined_aligns;
352  gene_for_combined_aligns.SetOther().SetAccession("NT_007933");
353  gene_for_combined_aligns.SetOther().SetVersion(15);
354  set<CSeq_id_Handle> genes_for_redo_partial;
355  genes_for_redo_partial.insert(CSeq_id_Handle::GetHandle(gene_for_combined_aligns));
356  genes_for_redo_partial.insert(CSeq_id_Handle::GetHandle("NT_011515.12"));
357  genes_for_redo_partial.insert(CSeq_id_Handle::GetGiHandle(GI_CONST(224514634)));
358  genes_for_redo_partial.insert(CSeq_id_Handle::GetGiHandle(GI_CONST(258441149)));
359 
360  const int default_flags =
362  ~CFeatureGenerator::fGenerateLocalIds) |
365 
366  for (int alignment = 0; align_istr && annot_istr; ++alignment) {
367 
368  CFeatureGenerator generator(scope);
369 
370  CRef<CSeq_align> align(new CSeq_align);
371  CSeq_annot expected_annot;
372 
373  /// we wrap the first serialization in try/catch
374  /// if this fails, we are at the end of the file, and we expect both to
375  /// be at the end of the file.
376  /// a failure in the second serialization is fatal
377  try {
378  *align_is >> *align;
379  }
380  catch (CEofException&) {
381  try {
382  *annot_is >> expected_annot;
383  }
384  catch (CEofException&) {
385  }
386  break;
387  }
388 
389  cerr << "Alignment "<< alignment << endl;
390 
391  BOOST_CHECK_NO_THROW(align->Validate(true));
392 
393 
394  CRef<CSeq_entry> seq_entry(new CSeq_entry);
395  CBioseq_set& seqs = seq_entry->SetSet();
396  seqs.SetSeq_set();
397  CSeq_annot actual_annot;
398  CSeq_annot::C_Data::TFtable &actual_features =
399  actual_annot.SetData().SetFtable();
400  {
401  generator.SetFlags(default_flags);
403  TSeqRange adjust_range;
404  CRef<CSeq_feat> feat;
405  ITERATE (CSeq_align::TExt, ext_it, align->GetExt()) {
406  if ((*ext_it)->GetType().IsStr() &&
407  (*ext_it)->GetType().GetStr() == "CFeatureGenerator") {
408  if ((*ext_it)->HasField("Flags")) {
409  int flags = (*ext_it)->GetField("Flags").GetData().GetInt();
410  generator.SetFlags(flags);
411  }
412  if ((*ext_it)->HasField("MinIntron")) {
413  int value = (*ext_it)->GetField("MinIntron").GetData().GetInt();
414  generator.SetMinIntron(value);
415  }
416  if ((*ext_it)->HasField("AdjustRange")) {
417  const vector<int>& range_vec = (*ext_it)->GetField("AdjustRange").GetData().GetInts();
418  adjust_range = TSeqRange(range_vec[0], range_vec[1]);
419  generator.SetAllowedUnaligned(0);
420  }
421  if ((*ext_it)->HasField("cdregion")) {
422  string cdregion = (*ext_it)->GetField("cdregion").GetData().GetStr();
423  CNcbiIstrstream istrs(cdregion);
424  unique_ptr<CObjectIStream> istr(CObjectIStream::Open(eSerial_AsnText, istrs));
425  feat.Reset(new CSeq_feat);
426  *istr >> *feat;
427  }
428  }
429  }
430 
431  CConstRef<CSeq_align> clean_align = generator.CleanAlignment(*align);
432 
433 // cerr << MSerial_AsnText << *clean_align;
434 
435  if (adjust_range.NotEmpty()) {
436  clean_align = generator.AdjustAlignment(*clean_align, adjust_range);
437  }
438 
439  generator.ConvertAlignToAnnot(*clean_align, actual_annot, seqs, 0, feat.GetPointer());
440 // BOOST_CHECK( validator.Validate(*seq_entry, &scope)->FatalSize()==0 );
441  CSeq_id_Handle id = CSeq_id_Handle::GetHandle(*actual_features.front()->GetLocation().GetId());
442  if(id == gene_for_combined_aligns)
443  combined_aligns.push_back(align);
444 
445  NON_CONST_ITERATE(CSeq_annot::C_Data::TFtable, it, actual_features){
446  /// Add to combined annot, unless this is a gene feature that
447  /// was already added. Also, don't add the RNA feature from the
448  /// very first alignment (to test recomputation of the partial flag
449  /// for the gene)
450  if (genes_for_redo_partial.count(id) &&
451  (!(*it)->GetData().IsGene() ||
452  unique_gene_ids.insert(id).second) &&
453  (!(*it)->GetData().IsRna() || alignment > 0))
454  actual_combined_features_with_omission.push_back(*it);
455  }
456  }
457 
458  if (annot_os.get() != NULL) {
459  *annot_os << actual_annot;
460  }
461  if (seqdata_os.get() != NULL) {
462  *seqdata_os << seqs;
463  }
464 
465  *seqdata_test_os << seqs;
466  BOOST_CHECK( seqdata_test_stream.match_pattern() );
467 
468  *annot_is >> expected_annot;
469  const CSeq_annot::C_Data::TFtable &expected_features =
470  expected_annot.GetData().GetFtable();
471 
472  s_CompareFtables(actual_features, expected_features, "Main annotation");
473  }
474 
475  CFeatureGenerator generator(scope);
476 
477  generator.SetFlags(default_flags);
478 
479  generator.RecomputePartialFlags(actual_combined_annot_with_omission);
480 
481  if (combined_annot_with_omission_os.get() != NULL) {
482  *combined_annot_with_omission_os << actual_combined_annot_with_omission;
483  }
484  s_CompareFtables(actual_combined_features_with_omission, expected_combined_features_with_omission, "combined_features_with_omission");
485 
486  CBioseq_set seqs;
487  generator.ConvertAlignToAnnot(combined_aligns, actual_combined_annot, seqs);
488 
489  // ConvertAlignToAnnot collates alignments by gene using unpredictable ordering over
490  // SeqId handles; so order of features in result is unpredictable, we need to sort
491  // them before comparison
492  actual_combined_features.sort(s_CompareFeatRefs);
493  expected_combined_features.sort(s_CompareFeatRefs);
494 
495  if(combined_annot_os.get() != NULL) {
496  *combined_annot_os << actual_combined_annot;
497  }
498  s_CompareFtables(actual_combined_features, expected_combined_features, "combined_features");
499 
500  BOOST_CHECK(align_istr.eof());
501  BOOST_CHECK(annot_istr.eof());
502 }
503 
504 BOOST_AUTO_TEST_SUITE(TestSuiteTrimAlignment)
505 
506 BOOST_AUTO_TEST_CASE(TestCaseTrimAlignmentCall)
507 {
510  CRef<CScope> scope(new CScope(*om));
511  scope->AddDefaults();
512 
513  CFeatureGenerator feat_gen(*scope);
514 
515  CSeq_align align;
516  CConstRef<CSeq_align> trimmed_align;
517  BOOST_CHECK_NO_THROW(
518  trimmed_align = feat_gen.CleanAlignment(align)
519  );
520 }
521 
522 BOOST_AUTO_TEST_CASE(TestCaseStitch)
523 {
526  CRef<CScope> scope(new CScope(*om));
527  scope->AddDefaults();
528 
529  CFeatureGenerator feat_gen(*scope);
530 
531  CSeq_align align;
532  CSpliced_seg& seg = align.SetSegs().SetSpliced();
534  CSpliced_seg::TExons& exons = seg.SetExons();
535  CRef<CSpliced_exon> exon;
536  exon.Reset(new CSpliced_exon);
537  exon->SetProduct_start().SetNucpos(0);
538  exon->SetProduct_end().SetNucpos(100);
539  exon->SetGenomic_start(0);
540  exon->SetGenomic_end(100);
541  exons.push_back(exon);
542  exon.Reset(new CSpliced_exon);
543  exon->SetProduct_start().SetNucpos(200);
544  exon->SetProduct_end().SetNucpos(300);
545  exon->SetGenomic_start(200);
546  exon->SetGenomic_end(300);
547  exons.push_back(exon);
548 
549  CConstRef<CSeq_align> trimmed_align;
550  trimmed_align = feat_gen.CleanAlignment(align);
551 
552  BOOST_CHECK_EQUAL(trimmed_align->GetSegs().GetSpliced().GetExons().size(), size_t(1));
553 
554 }
555 
556 BOOST_AUTO_TEST_CASE(TestCaseTrim)
557 {
560  CRef<CScope> scope(new CScope(*om));
561  scope->AddDefaults();
562 
563  CFeatureGenerator feat_gen(*scope);
564 
565  CSeq_align align;
566  CSpliced_seg& seg = align.SetSegs().SetSpliced();
568  CRef<CSeq_id> seq_id;
569  seq_id.Reset(new CSeq_id("NM_018690.2"));
570  seg.SetProduct_id(*seq_id);
571  seq_id.Reset(new CSeq_id("NT_010393.16"));
572  seg.SetGenomic_id(*seq_id);
573  CSpliced_seg::TExons& exons = seg.SetExons();
574  CRef<CSpliced_exon> exon;
575 
576  exon.Reset(new CSpliced_exon);
577  exon->SetProduct_start().SetNucpos(10);
578  exon->SetProduct_end().SetNucpos(11);
579  exon->SetGenomic_start(20);
580  exon->SetGenomic_end(21);
581  exons.push_back(exon);
582 
583  exon.Reset(new CSpliced_exon);
584  exon->SetProduct_start().SetNucpos(16);
585  exon->SetProduct_end().SetNucpos(19);
586  exon->SetGenomic_start(1031);
587  exon->SetGenomic_end(1034);
589 
590  chunk.Reset(new CSpliced_exon_chunk);
591  chunk->SetProduct_ins(1);
592  exon->SetParts().push_back(chunk);
593 
594  chunk.Reset(new CSpliced_exon_chunk);
595  chunk->SetMatch(2);
596  exon->SetParts().push_back(chunk);
597 
598  chunk.Reset(new CSpliced_exon_chunk);
599  chunk->SetGenomic_ins(1);
600  exon->SetParts().push_back(chunk);
601 
602  chunk.Reset(new CSpliced_exon_chunk);
603  chunk->SetMatch(1);
604  exon->SetParts().push_back(chunk);
605  exons.push_back(exon);
606 
607  exon.Reset(new CSpliced_exon);
608  exon->SetProduct_start().SetNucpos(200);
609  exon->SetProduct_end().SetNucpos(300);
610  exon->SetGenomic_start(2000);
611  exon->SetGenomic_end(2100);
612  exons.push_back(exon);
613 
614  BOOST_CHECK_NO_THROW(align.Validate(true));
615 
616  CConstRef<CSeq_align> trimmed_align;
617  trimmed_align = feat_gen.CleanAlignment(align);
618 
619  BOOST_CHECK_NO_THROW(trimmed_align->Validate(true));
620 
621  BOOST_CHECK_EQUAL(trimmed_align->GetSegs().GetSpliced().GetExons().size(), size_t(2));
622 
623  CSpliced_seg::TExons::const_iterator i = trimmed_align->GetSegs().GetSpliced().GetExons().begin();
624 
625  BOOST_CHECK_EQUAL((*i)->GetGenomic_start(), TSeqPos(1031) );
626  BOOST_CHECK_EQUAL((*i)->GetGenomic_end(), TSeqPos(1032) );
627  BOOST_CHECK_EQUAL((*++i)->GetGenomic_start(), TSeqPos(2002) );
628 }
629 
630 BOOST_AUTO_TEST_CASE(TestCaseTrimProtein)
631 {
634  CRef<CScope> scope(new CScope(*om));
635  scope->AddDefaults();
636 
637  CFeatureGenerator feat_gen(*scope);
638 
639  CSeq_align align;
640  CSpliced_seg& seg = align.SetSegs().SetSpliced();
642  seg.SetProduct_length(101);
643  CRef<CSeq_id> seq_id;
644  seq_id.Reset(new CSeq_id("lcl|prot"));
645  seg.SetProduct_id(*seq_id);
646  seq_id.Reset(new CSeq_id("lcl|genomic"));
647  seg.SetGenomic_id(*seq_id);
648  CSpliced_seg::TExons& exons = seg.SetExons();
649  CRef<CSpliced_exon> exon;
650 
651  exon.Reset(new CSpliced_exon);
652  exon->SetProduct_start().SetProtpos().SetAmin(3);
653  exon->SetProduct_start().SetProtpos().SetFrame(1);
654  exon->SetProduct_end().SetProtpos().SetAmin(3);
655  exon->SetProduct_end().SetProtpos().SetFrame(2);
656  exon->SetGenomic_start(20);
657  exon->SetGenomic_end(21);
658  exons.push_back(exon);
659 
660  exon.Reset(new CSpliced_exon);
661  exon->SetProduct_start().SetProtpos().SetAmin(5);
662  exon->SetProduct_start().SetProtpos().SetFrame(2);
663  exon->SetProduct_end().SetProtpos().SetAmin(7);
664  exon->SetProduct_end().SetProtpos().SetFrame(2);
665  exon->SetGenomic_start(1031);
666  exon->SetGenomic_end(1037);
668 
669  chunk.Reset(new CSpliced_exon_chunk);
670  chunk->SetProduct_ins(1);
671  exon->SetParts().push_back(chunk);
672 
673  chunk.Reset(new CSpliced_exon_chunk);
674  chunk->SetMatch(2);
675  exon->SetParts().push_back(chunk);
676 
677  chunk.Reset(new CSpliced_exon_chunk);
678  chunk->SetGenomic_ins(1);
679  exon->SetParts().push_back(chunk);
680 
681  chunk.Reset(new CSpliced_exon_chunk);
682  chunk->SetMatch(4);
683  exon->SetParts().push_back(chunk);
684  exons.push_back(exon);
685 
686  exon.Reset(new CSpliced_exon);
687  exon->SetProduct_start().SetProtpos().SetAmin(66);
688  exon->SetProduct_start().SetProtpos().SetFrame(3);
689  exon->SetProduct_end().SetProtpos().SetAmin(100);
690  exon->SetProduct_end().SetProtpos().SetFrame(1);
691  exon->SetGenomic_start(2000);
692  exon->SetGenomic_end(2100);
693  exons.push_back(exon);
694 
695  BOOST_CHECK_NO_THROW(align.Validate(true));
696 
697  CConstRef<CSeq_align> trimmed_align;
698  trimmed_align = feat_gen.CleanAlignment(align);
699 
700  BOOST_CHECK_NO_THROW(trimmed_align->Validate(true));
701 
702  BOOST_CHECK_EQUAL(trimmed_align->GetSegs().GetSpliced().GetExons().size(), size_t(2));
703 
704  CSpliced_seg::TExons::const_iterator i = trimmed_align->GetSegs().GetSpliced().GetExons().begin();
705 
706  BOOST_CHECK_EQUAL((*i)->GetGenomic_start(), TSeqPos(1032) );
707  BOOST_CHECK_EQUAL((*i)->GetGenomic_end(), TSeqPos(1035) );
708  BOOST_CHECK_EQUAL((*++i)->GetGenomic_start(), TSeqPos(2001) );
709 }
710 
711 BOOST_AUTO_TEST_CASE(TestCaseTrimHolesToCodons)
712 {
715  CRef<CScope> scope(new CScope(*om));
716  scope->AddDefaults();
717 
718  CFeatureGenerator feat_gen(*scope);
719 
720 string buf = " \
721 Seq-align ::= { \
722  type disc, \
723  dim 2, \
724  segs spliced { \
725  product-id gi 16762324, \
726  genomic-id gi 188504888, \
727  genomic-strand plus, \
728  product-type protein, \
729  exons { \
730  { \
731  product-start protpos { amin 25, frame 1 }, \
732  product-end protpos { amin 30, frame 3 }, \
733  genomic-start 0, genomic-end 17 \
734  }, \
735  { \
736  product-start protpos { amin 35, frame 2 }, \
737  product-end protpos { amin 45, frame 2 }, \
738  genomic-start 31, genomic-end 61 \
739  }, \
740  { \
741  product-start protpos { amin 55, frame 2 }, \
742  product-end protpos { amin 55, frame 2 }, \
743  genomic-start 91, genomic-end 91 \
744  }, \
745  { \
746  product-start protpos { amin 65, frame 2 }, \
747  product-end protpos { amin 576, frame 3 }, \
748  genomic-start 121, genomic-end 1655 \
749  } \
750  }, \
751  product-length 577, \
752  modifiers { \
753  stop-codon-found TRUE \
754  } \
755  } \
756 }";
757  CNcbiIstrstream istrs(buf);
758  unique_ptr<CObjectIStream> istr(CObjectIStream::Open(eSerial_AsnText, istrs));
759  CSeq_align align;
760  *istr >> align;
761 
762  BOOST_CHECK_NO_THROW(align.Validate(true));
763 
764  feat_gen.SetMinIntron(5);
765  CConstRef<CSeq_align> trimmed_align;
766  trimmed_align = feat_gen.CleanAlignment(align);
767 
768  BOOST_CHECK_NO_THROW(trimmed_align->Validate(true));
769 
770  BOOST_CHECK_EQUAL(trimmed_align->GetSegs().GetSpliced().GetExons().size(), size_t(3));
771 
772  CSpliced_seg::TExons::const_iterator i = trimmed_align->GetSegs().GetSpliced().GetExons().begin();
773 
774  BOOST_CHECK_EQUAL((*i)->GetGenomic_start(), TSeqPos(0) );
775  BOOST_CHECK_EQUAL((*i)->GetGenomic_end(), TSeqPos(17) );
776  BOOST_CHECK_EQUAL((*++i)->GetGenomic_start(), TSeqPos(33) );
777  BOOST_CHECK_EQUAL((*i)->GetGenomic_end(), TSeqPos(59) );
778  BOOST_CHECK_EQUAL((*++i)->GetGenomic_start(), TSeqPos(123) );
779  BOOST_CHECK_EQUAL((*i)->GetGenomic_end(), TSeqPos(1655) );
780 }
781 BOOST_AUTO_TEST_CASE(TestCaseTrimHoleOff)
782 {
785  CRef<CScope> scope(new CScope(*om));
786  scope->AddDefaults();
787 
788  CFeatureGenerator feat_gen(*scope);
789 
790 string buf = " \
791 Seq-align ::= { \
792  type disc, \
793  dim 2, \
794  segs spliced { \
795  product-id gi 16762324, \
796  genomic-id gi 188504888, \
797  genomic-strand plus, \
798  product-type protein, \
799  exons { \
800  { \
801  product-start protpos { amin 25, frame 1 }, \
802  product-end protpos { amin 30, frame 3 }, \
803  genomic-start 0, genomic-end 17 \
804  }, \
805  { \
806  product-start protpos { amin 65, frame 2 }, \
807  product-end protpos { amin 576, frame 3 }, \
808  genomic-start 121, genomic-end 1655 \
809  } \
810  }, \
811  product-length 577, \
812  modifiers { \
813  stop-codon-found TRUE \
814  } \
815  } \
816 }";
817  CNcbiIstrstream istrs(buf);
818  unique_ptr<CObjectIStream> istr(CObjectIStream::Open(eSerial_AsnText, istrs));
819  CSeq_align align;
820  *istr >> align;
821 
822  BOOST_CHECK_NO_THROW(align.Validate(true));
823 
824  feat_gen.SetMinIntron(5);
825  CConstRef<CSeq_align> trimmed_align;
826  trimmed_align = feat_gen.AdjustAlignment(align, TSeqRange(1653, 1655));
827 
828  BOOST_CHECK_NO_THROW(trimmed_align->Validate(true));
829 
830  BOOST_CHECK_EQUAL(trimmed_align->GetSegs().GetSpliced().GetExons().size(), size_t(1));
831 
832  CSpliced_seg::TExons::const_iterator i = trimmed_align->GetSegs().GetSpliced().GetExons().begin();
833 
834  BOOST_CHECK_EQUAL((*i)->GetGenomic_start(), TSeqPos(1653) );
835  BOOST_CHECK_EQUAL((*i)->GetGenomic_end(), TSeqPos(1655) );
836 }
837 
838 BOOST_AUTO_TEST_CASE(TestCaseStitchProtein)
839 {
842  CRef<CScope> scope(new CScope(*om));
843  scope->AddDefaults();
844 
845  CFeatureGenerator feat_gen(*scope);
846 
847  CSeq_align align;
849  CSpliced_seg& seg = align.SetSegs().SetSpliced();
851  seg.SetProduct_length(101);
852  CRef<CSeq_id> seq_id;
853  seq_id.Reset(new CSeq_id("lcl|prot"));
854  seg.SetProduct_id(*seq_id);
855  seq_id.Reset(new CSeq_id("lcl|genomic"));
856  seg.SetGenomic_id(*seq_id);
857  CSpliced_seg::TExons& exons = seg.SetExons();
858  CRef<CSpliced_exon> exon;
859 
860  exon.Reset(new CSpliced_exon);
861  exon->SetProduct_start().SetProtpos().SetAmin(3);
862  exon->SetProduct_start().SetProtpos().SetFrame(1);
863  exon->SetProduct_end().SetProtpos().SetAmin(3);
864  exon->SetProduct_end().SetProtpos().SetFrame(2);
865  exon->SetGenomic_start(20);
866  exon->SetGenomic_end(21);
867  exons.push_back(exon);
868 
869  exon.Reset(new CSpliced_exon);
870  exon->SetProduct_start().SetProtpos().SetAmin(5);
871  exon->SetProduct_start().SetProtpos().SetFrame(2);
872  exon->SetProduct_end().SetProtpos().SetAmin(7);
873  exon->SetProduct_end().SetProtpos().SetFrame(2);
874  exon->SetGenomic_start(31);
875  exon->SetGenomic_end(37);
877 
878  chunk.Reset(new CSpliced_exon_chunk);
879  chunk->SetProduct_ins(1);
880  exon->SetParts().push_back(chunk);
881 
882  chunk.Reset(new CSpliced_exon_chunk);
883  chunk->SetMatch(2);
884  exon->SetParts().push_back(chunk);
885 
886  chunk.Reset(new CSpliced_exon_chunk);
887  chunk->SetGenomic_ins(1);
888  exon->SetParts().push_back(chunk);
889 
890  chunk.Reset(new CSpliced_exon_chunk);
891  chunk->SetMatch(4);
892  exon->SetParts().push_back(chunk);
893  exons.push_back(exon);
894 
895  exon.Reset(new CSpliced_exon);
896  exon->SetProduct_start().SetProtpos().SetAmin(36);
897  exon->SetProduct_start().SetProtpos().SetFrame(3);
898  exon->SetProduct_end().SetProtpos().SetAmin(70);
899  exon->SetProduct_end().SetProtpos().SetFrame(1);
900  exon->SetGenomic_start(137);
901  exon->SetGenomic_end(237);
902  exons.push_back(exon);
903 
904  BOOST_CHECK_NO_THROW(align.Validate(true));
905 
906  CConstRef<CSeq_align> modified_align;
907  modified_align = feat_gen.CleanAlignment(align);
908 
909  BOOST_CHECK_NO_THROW(modified_align->Validate(true));
910 
911  BOOST_CHECK_EQUAL(modified_align->GetSegs().GetSpliced().GetExons().size(), size_t(1));
912 
913  CSpliced_seg::TExons::const_iterator i = modified_align->GetSegs().GetSpliced().GetExons().begin();
914 
915  BOOST_CHECK_EQUAL((*i)->GetGenomic_start(), TSeqPos(20) );
916  BOOST_CHECK_EQUAL((*i)->GetGenomic_end(), TSeqPos(237) );
917 
918  TSeqPos product_pos = 0;
919  ITERATE(CSpliced_exon::TParts, p, (*i)->GetParts()) {
920  const CSpliced_exon_chunk& chunk = **p;
921  switch (chunk.Which()) {
923  product_pos += chunk.GetMatch();
924  break;
926  product_pos += chunk.GetMismatch();
927  break;
929  product_pos += chunk.GetProduct_ins();
930  break;
932  break;
933  default:
934  break;
935  }
936  }
937 
938 
939 // CObjectOStream* ostr = CObjectOStream::Open(eSerial_AsnText,
940 // cerr);
941 // *ostr << *trimmed_align;
942 // delete ostr;
943 
944 }
945 
946 BOOST_AUTO_TEST_CASE(TestCaseExpandAlignment)
947 {
950 
951  for (int strand = -1 ; strand <= 1; strand +=2) {
952 
953  CRef<CScope> scope(new CScope(*om));
954  scope->AddDefaults();
955 
956  CFeatureGenerator feat_gen(*scope);
957 
958  CSeq_align align;
959  {
961  CSpliced_seg& seg = align.SetSegs().SetSpliced();
963  seg.SetProduct_length(101);
964  CRef<CSeq_id> seq_id;
965  seq_id.Reset(new CSeq_id("lcl|prot"));
966  seg.SetProduct_id(*seq_id);
967  seq_id.Reset(new CSeq_id("lcl|genomic"));
968  seg.SetGenomic_id(*seq_id);
969  CSpliced_seg::TExons& exons = seg.SetExons();
970  CRef<CSpliced_exon> exon;
971 
972  exon.Reset(new CSpliced_exon);
973  exon->SetProduct_start().SetProtpos().SetAmin(3);
974  exon->SetProduct_start().SetProtpos().SetFrame(3);
975  exon->SetProduct_end().SetProtpos().SetAmin(75);
976  exon->SetProduct_end().SetProtpos().SetFrame(1);
977  exon->SetGenomic_start(22);
978  exon->SetGenomic_end(237);
979  exon->SetGenomic_strand() = strand > 0 ? eNa_strand_plus : eNa_strand_minus;
980 
982 
983  chunk.Reset(new CSpliced_exon_chunk);
984  chunk->SetDiag(1);
985  exon->SetParts().push_back(chunk);
986 
987  chunk.Reset(new CSpliced_exon_chunk);
988  chunk->SetGenomic_ins(1);
989  exon->SetParts().push_back(chunk);
990 
991  chunk.Reset(new CSpliced_exon_chunk);
992  chunk->SetDiag(7);
993  exon->SetParts().push_back(chunk);
994 
995  chunk.Reset(new CSpliced_exon_chunk);
996  chunk->SetProduct_ins(1);
997  exon->SetParts().push_back(chunk);
998 
999  chunk.Reset(new CSpliced_exon_chunk);
1000  chunk->SetDiag(2);
1001  exon->SetParts().push_back(chunk);
1002 
1003  chunk.Reset(new CSpliced_exon_chunk);
1004  chunk->SetGenomic_ins(1);
1005  exon->SetParts().push_back(chunk);
1006 
1007  chunk.Reset(new CSpliced_exon_chunk);
1008  chunk->SetDiag(204);
1009  exon->SetParts().push_back(chunk);
1010 
1011  exons.push_back(exon);
1012  }
1013  BOOST_CHECK_NO_THROW(align.Validate(true));
1014 
1015 // CObjectOStream* ostr = CObjectOStream::Open(eSerial_AsnText,
1016 // cerr);
1017 // *ostr << align;
1018 
1019  CConstRef<CSeq_align> modified_align;
1020  TSeqRange range(8, 248);
1021  modified_align = feat_gen.AdjustAlignment(align, range);
1022 // *ostr << *modified_align;
1023 
1024  BOOST_CHECK_NO_THROW(modified_align->Validate(true));
1025  BOOST_CHECK_EQUAL(modified_align->GetSegs().GetSpliced().GetExons().size(), size_t(1));
1026  const CSpliced_exon& exon = **modified_align->GetSegs().GetSpliced().GetExons().begin();
1027 
1028  int cumulative_indel_len = 0;
1029  TSeqPos product_pos = 0;
1030  ITERATE(CSpliced_exon::TParts, p, exon.GetParts()) {
1031  const CSpliced_exon_chunk& chunk = **p;
1032  switch (chunk.Which()) {
1034  product_pos += chunk.GetMatch();
1035  break;
1037  product_pos += chunk.GetMismatch();
1038  break;
1040  product_pos += chunk.GetProduct_ins();
1041  cumulative_indel_len += 1;
1042  break;
1044  if (chunk.GetGenomic_ins() > 1) {
1045  BOOST_CHECK_EQUAL(product_pos % 3, TSeqPos(0) );
1046  }
1047  cumulative_indel_len -= 1;
1048  break;
1049  default:
1050  break;
1051  }
1052  }
1053 
1054 
1055  BOOST_CHECK_EQUAL(modified_align->GetSegs().GetSpliced().GetProduct_length(), (range.GetLength()+cumulative_indel_len)/3 );
1056 
1057  BOOST_CHECK_EQUAL(exon.GetGenomic_start(), range.GetFrom() );
1058  BOOST_CHECK_EQUAL(exon.GetGenomic_end(), range.GetTo() );
1059 
1060  BOOST_CHECK_EQUAL(exon.GetProduct_start().GetProtpos().GetAmin(), unsigned(0) );
1061  BOOST_CHECK_EQUAL(exon.GetProduct_end().GetProtpos().GetAmin(), modified_align->GetSegs().GetSpliced().GetProduct_length() -1 );
1062 
1063 
1064 
1065 
1066 // CObjectOStream* ostr = CObjectOStream::Open(eSerial_AsnText,
1067 // cerr);
1068 // *ostr << *trimmed_align;
1069 // delete ostr;
1070  }
1071 }
1072 BOOST_AUTO_TEST_CASE(TestCaseShrinkAlignment)
1073 {
1076 
1077  for (int strand = -1 ; strand <= 1; strand +=2) {
1078 
1079  CRef<CScope> scope(new CScope(*om));
1080  scope->AddDefaults();
1081 
1082  CFeatureGenerator feat_gen(*scope);
1083 
1084  CSeq_align align;
1085  {
1087  CSpliced_seg& seg = align.SetSegs().SetSpliced();
1089  seg.SetProduct_length(101);
1090  CRef<CSeq_id> seq_id;
1091  seq_id.Reset(new CSeq_id("lcl|prot"));
1092  seg.SetProduct_id(*seq_id);
1093  seq_id.Reset(new CSeq_id("lcl|genomic"));
1094  seg.SetGenomic_id(*seq_id);
1095  CSpliced_seg::TExons& exons = seg.SetExons();
1096  CRef<CSpliced_exon> exon;
1097 
1098  exon.Reset(new CSpliced_exon);
1099  exon->SetProduct_start().SetProtpos().SetAmin(3);
1100  exon->SetProduct_start().SetProtpos().SetFrame(3);
1101  exon->SetProduct_end().SetProtpos().SetAmin(75);
1102  exon->SetProduct_end().SetProtpos().SetFrame(1);
1103  exon->SetGenomic_start(22);
1104  exon->SetGenomic_end(237);
1105  exon->SetGenomic_strand() = strand > 0 ? eNa_strand_plus : eNa_strand_minus;
1106 
1108 
1109  chunk.Reset(new CSpliced_exon_chunk);
1110  chunk->SetDiag(2);
1111  exon->SetParts().push_back(chunk);
1112 
1113  chunk.Reset(new CSpliced_exon_chunk);
1114  chunk->SetGenomic_ins(1);
1115  exon->SetParts().push_back(chunk);
1116 
1117  chunk.Reset(new CSpliced_exon_chunk);
1118  chunk->SetDiag(6);
1119  exon->SetParts().push_back(chunk);
1120 
1121  chunk.Reset(new CSpliced_exon_chunk);
1122  chunk->SetProduct_ins(1);
1123  exon->SetParts().push_back(chunk);
1124 
1125  chunk.Reset(new CSpliced_exon_chunk);
1126  chunk->SetDiag(2);
1127  exon->SetParts().push_back(chunk);
1128 
1129  chunk.Reset(new CSpliced_exon_chunk);
1130  chunk->SetGenomic_ins(1);
1131  exon->SetParts().push_back(chunk);
1132 
1133  chunk.Reset(new CSpliced_exon_chunk);
1134  chunk->SetDiag(204);
1135  exon->SetParts().push_back(chunk);
1136 
1137  exons.push_back(exon);
1138  }
1139  BOOST_CHECK_NO_THROW(align.Validate(true));
1140 
1141 // CObjectOStream* ostr = CObjectOStream::Open(eSerial_AsnText,
1142 // cerr);
1143 // *ostr << align;
1144 
1145  CConstRef<CSeq_align> modified_align;
1146  TSeqRange range(23, 236);
1147  modified_align = feat_gen.AdjustAlignment(align, range);
1148 // *ostr << *modified_align;
1149 
1150  BOOST_CHECK_NO_THROW(modified_align->Validate(true));
1151  BOOST_CHECK_EQUAL(modified_align->GetSegs().GetSpliced().GetExons().size(), size_t(1));
1152  const CSpliced_exon& exon = **modified_align->GetSegs().GetSpliced().GetExons().begin();
1153 
1154  int cumulative_indel_len = 0;
1155  TSeqPos product_pos = 0;
1156  ITERATE(CSpliced_exon::TParts, p, exon.GetParts()) {
1157  const CSpliced_exon_chunk& chunk = **p;
1158  switch (chunk.Which()) {
1160  product_pos += chunk.GetMatch();
1161  break;
1163  product_pos += chunk.GetMismatch();
1164  break;
1166  product_pos += chunk.GetProduct_ins();
1167  cumulative_indel_len += 1;
1168  break;
1170  if (chunk.GetGenomic_ins() > 1) {
1171  BOOST_CHECK_EQUAL(product_pos % 3, TSeqPos(0) );
1172  }
1173  cumulative_indel_len -= 1;
1174  break;
1175  default:
1176  break;
1177  }
1178  }
1179 
1180 
1181  BOOST_CHECK_EQUAL(modified_align->GetSegs().GetSpliced().GetProduct_length(), (range.GetLength()+cumulative_indel_len)/3 );
1182 
1183  BOOST_CHECK_EQUAL(exon.GetGenomic_start(), range.GetFrom() );
1184  BOOST_CHECK_EQUAL(exon.GetGenomic_end(), range.GetTo() );
1185 
1186  BOOST_CHECK_EQUAL(exon.GetProduct_start().GetProtpos().GetAmin(), unsigned(0) );
1187  BOOST_CHECK_EQUAL(exon.GetProduct_end().GetProtpos().GetAmin(), modified_align->GetSegs().GetSpliced().GetProduct_length() -1 );
1188 
1189 
1190 
1191 
1192 // CObjectOStream* ostr = CObjectOStream::Open(eSerial_AsnText,
1193 // cerr);
1194 // *ostr << *trimmed_align;
1195 // delete ostr;
1196  }
1197 }
1198 
1199 BOOST_AUTO_TEST_CASE(TestCaseExpandAlignmentCrossOrigin)
1200 {
1203 
1204  for (int strand = -1 ; strand <= 1; strand +=2) {
1205 
1206  CRef<CScope> scope(new CScope(*om));
1207  scope->AddDefaults();
1208 
1209  CFeatureGenerator feat_gen(*scope);
1210 
1211  CSeq_align align;
1212  {
1214  CSpliced_seg& seg = align.SetSegs().SetSpliced();
1216  seg.SetProduct_length(101);
1217  CRef<CSeq_id> seq_id;
1218  seq_id.Reset(new CSeq_id("lcl|prot"));
1219  seg.SetProduct_id(*seq_id);
1220  seq_id.Reset(new CSeq_id("gi|17158061"));
1221  seg.SetGenomic_id(*seq_id);
1222  CSpliced_seg::TExons& exons = seg.SetExons();
1223  CRef<CSpliced_exon> exon;
1224 
1225  exon.Reset(new CSpliced_exon);
1226  exon->SetProduct_start().SetProtpos().SetAmin(3);
1227  exon->SetProduct_start().SetProtpos().SetFrame(3);
1228  exon->SetProduct_end().SetProtpos().SetAmin(75);
1229  exon->SetProduct_end().SetProtpos().SetFrame(1);
1230  exon->SetGenomic_start(10);
1231  exon->SetGenomic_end(225);
1232  exon->SetGenomic_strand() = strand > 0 ? eNa_strand_plus : eNa_strand_minus;
1233 
1235 
1236  chunk.Reset(new CSpliced_exon_chunk);
1237  chunk->SetDiag(1);
1238  exon->SetParts().push_back(chunk);
1239 
1240  chunk.Reset(new CSpliced_exon_chunk);
1241  chunk->SetGenomic_ins(1);
1242  exon->SetParts().push_back(chunk);
1243 
1244  chunk.Reset(new CSpliced_exon_chunk);
1245  chunk->SetDiag(7);
1246  exon->SetParts().push_back(chunk);
1247 
1248  chunk.Reset(new CSpliced_exon_chunk);
1249  chunk->SetProduct_ins(1);
1250  exon->SetParts().push_back(chunk);
1251 
1252  chunk.Reset(new CSpliced_exon_chunk);
1253  chunk->SetDiag(2);
1254  exon->SetParts().push_back(chunk);
1255 
1256  chunk.Reset(new CSpliced_exon_chunk);
1257  chunk->SetGenomic_ins(1);
1258  exon->SetParts().push_back(chunk);
1259 
1260  chunk.Reset(new CSpliced_exon_chunk);
1261  chunk->SetDiag(204);
1262  exon->SetParts().push_back(chunk);
1263 
1264  exons.push_back(exon);
1265  }
1266 // CObjectOStream* ostr = CObjectOStream::Open(eSerial_AsnText,
1267 // cerr);
1268 // *ostr << align;
1269 
1270  BOOST_CHECK_NO_THROW(align.Validate(true));
1271 
1272  CConstRef<CSeq_align> modified_align;
1273  TSeqRange range(5583, 248);
1274  modified_align = feat_gen.AdjustAlignment(align, range);
1275 // *ostr << *modified_align;
1276 
1277  BOOST_CHECK_NO_THROW(modified_align->Validate(true));
1278  BOOST_CHECK_EQUAL(modified_align->GetSegs().GetSpliced().GetExons().size(), size_t(2));
1279 
1280  const CSpliced_exon* exon = modified_align->GetSegs().GetSpliced().GetExons().front().GetPointer();
1281 
1282  BOOST_CHECK_EQUAL(exon->GetGenomic_start(), strand > 0 ? range.GetFrom() : 0 );
1283  BOOST_CHECK_EQUAL(exon->GetGenomic_end(), strand > 0 ? range.GetFrom() : range.GetTo() );
1284 
1285  exon = modified_align->GetSegs().GetSpliced().GetExons().back().GetPointer();
1286 
1287  BOOST_CHECK_EQUAL(exon->GetGenomic_start(), strand > 0 ? 0 : range.GetFrom());
1288  BOOST_CHECK_EQUAL(exon->GetGenomic_end(), strand > 0 ? range.GetTo() : range.GetFrom());
1289 
1290 
1291 
1292 
1293 // CObjectOStream* ostr = CObjectOStream::Open(eSerial_AsnText,
1294 // cerr);
1295 // *ostr << *trimmed_align;
1296 // delete ostr;
1297  }
1298 }
1299 BOOST_AUTO_TEST_CASE(TestCaseShrinkAlignmentCrossOrigin)
1300 {
1303 
1304  for (int strand = -1 ; strand <= 1; strand +=2) {
1305 
1306  CRef<CScope> scope(new CScope(*om));
1307  scope->AddDefaults();
1308 
1309  CFeatureGenerator feat_gen(*scope);
1310 
1311  CSeq_align align;
1312  {
1314  CSpliced_seg& seg = align.SetSegs().SetSpliced();
1316  seg.SetProduct_length(101);
1317  CRef<CSeq_id> seq_id;
1318  seq_id.Reset(new CSeq_id("lcl|prot"));
1319  seg.SetProduct_id(*seq_id);
1320  seq_id.Reset(new CSeq_id("gi|17158061"));
1321  seg.SetGenomic_id(*seq_id);
1322  CSpliced_seg::TExons& exons = seg.SetExons();
1323  CRef<CSpliced_exon> exon;
1324 
1325  exon.Reset(new CSpliced_exon);
1326  exon->SetProduct_start().SetProtpos().SetAmin(strand > 0 ? 0 : 2);
1327  exon->SetProduct_start().SetProtpos().SetFrame(strand > 0 ? 1 : 3);
1328  exon->SetProduct_end().SetProtpos().SetAmin(strand > 0 ? 1 : 3);
1329  exon->SetProduct_end().SetProtpos().SetFrame(strand > 0 ? 1 : 3);
1330  exon->SetGenomic_start(5580);
1331  exon->SetGenomic_end(5583);
1332  exon->SetGenomic_strand() = strand > 0 ? eNa_strand_plus : eNa_strand_minus;
1333 
1334  exons.push_back(exon);
1335 
1336  exon.Reset(new CSpliced_exon);
1337  exon->SetProduct_start().SetProtpos().SetAmin(strand > 0 ? 1 : 0);
1338  exon->SetProduct_start().SetProtpos().SetFrame(strand > 0 ? 2 : 1);
1339  exon->SetProduct_end().SetProtpos().SetAmin(strand > 0 ? 3 : 2);
1340  exon->SetProduct_end().SetProtpos().SetFrame(strand > 0 ? 3 : 2);
1341  exon->SetGenomic_start(0);
1342  exon->SetGenomic_end(7);
1343  exon->SetGenomic_strand() = strand > 0 ? eNa_strand_plus : eNa_strand_minus;
1344 
1345  if (strand > 0)
1346  exons.push_back(exon);
1347  else
1348  exons.insert(exons.begin(), exon);
1349  }
1350  BOOST_CHECK_NO_THROW(align.Validate(true));
1351 
1352 // CObjectOStream* ostr = CObjectOStream::Open(eSerial_AsnText,
1353 // cerr);
1354 // *ostr << align;
1355 
1356  CConstRef<CSeq_align> modified_align;
1357  TSeqRange range(5583, 4);
1358  modified_align = feat_gen.AdjustAlignment(align, range);
1359 // *ostr << *modified_align;
1360 
1361  BOOST_CHECK_NO_THROW(modified_align->Validate(true));
1362  BOOST_CHECK_EQUAL(modified_align->GetSegs().GetSpliced().GetExons().size(), size_t(2));
1363 
1364  const CSpliced_exon* exon = modified_align->GetSegs().GetSpliced().GetExons().front().GetPointer();
1365 
1366  BOOST_CHECK_EQUAL(exon->GetGenomic_start(), strand > 0 ? range.GetFrom() : 0 );
1367  BOOST_CHECK_EQUAL(exon->GetGenomic_end(), strand > 0 ? range.GetFrom() : range.GetTo() );
1368 
1369  exon = modified_align->GetSegs().GetSpliced().GetExons().back().GetPointer();
1370 
1371  BOOST_CHECK_EQUAL(exon->GetGenomic_start(), strand > 0 ? 0 : range.GetFrom());
1372  BOOST_CHECK_EQUAL(exon->GetGenomic_end(), strand > 0 ? range.GetTo() : range.GetFrom());
1373 
1374 
1375 // CObjectOStream* ostr = CObjectOStream::Open(eSerial_AsnText,
1376 // cerr);
1377 // *ostr << *trimmed_align;
1378 // delete ostr;
1379  }
1380 }
1381 
1382 BOOST_AUTO_TEST_CASE(TestCaseExpandAlignmentNextToOrigin)
1383 {
1386 
1387  CRef<CScope> scope(new CScope(*om));
1388  scope->AddDefaults();
1389 
1390  CFeatureGenerator feat_gen(*scope);
1391 
1392 string buf = " \
1393 Seq-align ::= { \
1394  type disc, \
1395  dim 2, \
1396  segs spliced { \
1397  product-id gi 386076534, \
1398  genomic-id gi 386018361, \
1399  product-type protein, \
1400  exons { \
1401  { \
1402  product-start protpos { \
1403  amin 0, \
1404  frame 1 \
1405  }, \
1406  product-end protpos { \
1407  amin 0, \
1408  frame 3 \
1409  }, \
1410  genomic-start 321741, \
1411  genomic-end 321743, \
1412  partial TRUE \
1413  } \
1414  }, \
1415  product-length 2 \
1416  } \
1417 }";
1418 
1419  TSeqPos genomic_size = 321744;
1420 
1421  CNcbiIstrstream istrs(buf);
1422 
1423  unique_ptr<CObjectIStream> istr(CObjectIStream::Open(eSerial_AsnText, istrs));
1424  CSeq_align align;
1425  *istr >> align;
1426 
1427  for (int strand = -1 ; strand <= 1; strand +=2) {
1428  for (int side = -1 ; side <= 1; side +=2) {
1429 
1430  {
1431  CSpliced_exon* exon = align.SetSegs().SetSpliced().SetExons().front().GetPointer();
1432  exon->SetProduct_start().SetProtpos().SetAmin(side != strand ? 0 : 1);
1433  exon->SetProduct_end().SetProtpos().SetAmin(side != strand ? 0 : 1);
1434  exon->SetGenomic_start(side < 0 ? genomic_size-3 : 0);
1435  exon->SetGenomic_end(side < 0 ? genomic_size-1 : 2);
1436  exon->SetGenomic_strand() = strand > 0 ? eNa_strand_plus : eNa_strand_minus;
1437  }
1438 
1439  BOOST_CHECK_NO_THROW(align.Validate(true));
1440 
1441  CConstRef<CSeq_align> modified_align;
1442  TSeqRange range(genomic_size-3, 2);
1443  modified_align = feat_gen.AdjustAlignment(align, range);
1444 
1445 // CObjectOStream* ostr = CObjectOStream::Open(eSerial_AsnText,
1446 // cerr);
1447 // *ostr << *modified_align;
1448 
1449  BOOST_CHECK_NO_THROW(modified_align->Validate(true));
1450  BOOST_CHECK_EQUAL(modified_align->GetSegs().GetSpliced().GetExons().size(), size_t(2));
1451 
1452  const CSpliced_exon* exon = modified_align->GetSegs().GetSpliced().GetExons().front().GetPointer();
1453 
1454  BOOST_CHECK_EQUAL(exon->GetGenomic_start(), strand > 0 ? range.GetFrom() : TSeqPos(0));
1455  BOOST_CHECK_EQUAL(exon->GetGenomic_end(), strand > 0 ? genomic_size - 1 : range.GetTo());
1456 
1457  exon = modified_align->GetSegs().GetSpliced().GetExons().back().GetPointer();
1458 
1459  BOOST_CHECK_EQUAL(exon->GetGenomic_start(), strand > 0 ? TSeqPos(0) : range.GetFrom());
1460  BOOST_CHECK_EQUAL(exon->GetGenomic_end(), strand > 0 ? range.GetTo() : genomic_size - 1);
1461 
1462  }}
1463 }
1464 
1465 BOOST_AUTO_TEST_CASE(TestCasePartialCDS)
1466 {
1469 
1470 string buf = " \
1471 Seq-align ::= { \
1472  type disc, \
1473  dim 2, \
1474  segs spliced { \
1475  product-id local id 386076534, \
1476  genomic-id gi 183579259, \
1477  genomic-strand minus, \
1478  product-type transcript, \
1479  exons { \
1480  { \
1481  product-start nucpos 0, \
1482  product-end nucpos 150, \
1483  genomic-start 132443, \
1484  genomic-end 132593 \
1485  }, \
1486  { \
1487  product-start nucpos 151, \
1488  product-end nucpos 381, \
1489  genomic-start 132090, \
1490  genomic-end 132320, \
1491  partial TRUE \
1492  } \
1493  }, \
1494  product-length 382 \
1495  } \
1496 } \
1497 Seq-feat ::= { \
1498  data cdregion { \
1499  code { \
1500  id 1 \
1501  } \
1502  }, \
1503  product whole local str \"PROT_10_36\", \
1504  location int { \
1505  from 59, \
1506  to 381, \
1507  id local id 386076534, \
1508  fuzz-to lim gt \
1509  } \
1510  } \
1511 Seq-align ::= { \
1512  type disc, \
1513  dim 2, \
1514  segs spliced { \
1515  product-id local id 386076534, \
1516  genomic-id gi 183579259, \
1517  genomic-strand minus, \
1518  product-type transcript, \
1519  exons { \
1520  { \
1521  product-start nucpos 0, \
1522  product-end nucpos 132, \
1523  genomic-start 127519, \
1524  genomic-end 127651 \
1525  }, \
1526  { \
1527  product-start nucpos 133, \
1528  product-end nucpos 355, \
1529  genomic-start 127174, \
1530  genomic-end 127396 \
1531  }, \
1532  { \
1533  product-start nucpos 356, \
1534  product-end nucpos 359, \
1535  genomic-start 110589, \
1536  genomic-end 110592, \
1537  partial TRUE \
1538  } \
1539  }, \
1540  product-length 382 \
1541  } \
1542 } \
1543 Seq-feat ::= { \
1544  data cdregion { \
1545  code { \
1546  id 1 \
1547  } \
1548  }, \
1549  product whole local str \"PROT_10_36\", \
1550  location int { \
1551  from 41, \
1552  to 359, \
1553  id local id 386076534, \
1554  fuzz-to lim gt \
1555  } \
1556  } \
1557 ";
1558 
1559  CNcbiIstrstream istrs(buf);
1560 
1561  unique_ptr<CObjectIStream> istr(CObjectIStream::Open(eSerial_AsnText, istrs));
1562 
1563  for (;;) {
1564  CSeq_align align;
1565  CSeq_feat feat;
1566  try {
1567  *istr >> align;
1568  *istr >> feat;
1569  }
1570  catch (CEofException&) {
1571  break;
1572  }
1573 
1574  BOOST_CHECK_NO_THROW(align.Validate(true));
1575 
1576  CRef<CSeq_entry> seq_entry(new CSeq_entry);
1577  CBioseq_set& seqs = seq_entry->SetSet();
1578  seqs.SetSeq_set();
1579  CSeq_annot annot;
1580  annot.SetData().SetFtable();
1581 
1582  CRef<CScope> scope(new CScope(*om));
1583  scope->AddDefaults();
1584 
1585  CFeatureGenerator feat_gen(*scope);
1586 
1587  int flags = (CFeatureGenerator::fDefaults & ~CFeatureGenerator::fGenerateLocalIds) |
1590  feat_gen.SetFlags(flags);
1591  BOOST_CHECK_NO_THROW(feat_gen.ConvertAlignToAnnot(align, annot, seqs, 0, &feat));
1592  }
1593 }
1594 
1595 BOOST_AUTO_TEST_CASE(TestCaseGeneForPartialFeatureIsPartial)
1596 {
1599 
1600  CRef<CScope> scope(new CScope(*om));
1601  scope->AddDefaults();
1602 
1603  CFeatureGenerator feat_gen(*scope);
1604 
1605 string buf = " \
1606 Seq-align ::= { \
1607  type disc, \
1608  dim 2, \
1609  segs spliced { \
1610  product-id gi 16762324, \
1611  genomic-id gi 188504888, \
1612  genomic-strand plus, \
1613  product-type protein, \
1614  exons { \
1615  { \
1616  product-start protpos { \
1617  amin 25, \
1618  frame 1 \
1619  }, \
1620  product-end protpos { \
1621  amin 576, \
1622  frame 3 \
1623  }, \
1624  genomic-start 0, \
1625  genomic-end 1655 \
1626  } \
1627  }, \
1628  product-length 577, \
1629  modifiers { \
1630  stop-codon-found TRUE \
1631  } \
1632  } \
1633 } \
1634 ";
1635 
1636  CNcbiIstrstream istrs(buf);
1637 
1638  unique_ptr<CObjectIStream> istr(CObjectIStream::Open(eSerial_AsnText, istrs));
1639 
1640  CSeq_align align;
1641  *istr >> align;
1642 
1643  BOOST_CHECK_NO_THROW(align.Validate(true));
1644 
1645  CRef<CSeq_entry> seq_entry(new CSeq_entry);
1646  CBioseq_set& seqs = seq_entry->SetSet();
1647  seqs.SetSeq_set();
1648  CSeq_annot annot;
1649  annot.SetData().SetFtable();
1650 
1651  int flags =
1655  feat_gen.SetFlags(flags);
1656 
1657  TSeqRange range(0, 1655);
1658  CConstRef<CSeq_align> modified_align = feat_gen.AdjustAlignment(align, range);
1659  feat_gen.ConvertAlignToAnnot(*modified_align, annot, seqs);
1660 
1662  if ((*it)->GetData().IsGene()) {
1663  BOOST_CHECK( (*it)->GetLocation().IsPartialStart(eExtreme_Biological) );
1664  }
1665  }
1666 }
1667 
1668 BOOST_AUTO_TEST_CASE(TestCaseConvertLocToAnnotIncompleteCodon)
1669 {
1672  CRef<CScope> scope(new CScope(*om));
1673  scope->AddDefaults();
1674 
1675 string buf = " \
1676 Seq-loc ::= packed-int { \
1677  { \
1678  from 632363, \
1679  to 633564, \
1680  strand plus, \
1681  id gi 255926317, \
1682  fuzz-to lim gt \
1683  } \
1684  } \
1685 ";
1686  CNcbiIstrstream istrs(buf);
1687  unique_ptr<CObjectIStream> istr(CObjectIStream::Open(eSerial_AsnText, istrs));
1688  CSeq_loc loc;
1689  *istr >> loc;
1690 
1691  CRef<CSeq_entry> seq_entry(new CSeq_entry);
1692  CBioseq_set& seqs = seq_entry->SetSet();
1693  seqs.SetSeq_set();
1694  CSeq_annot annot;
1695  annot.SetData().SetFtable();
1696  CFeatureGenerator feat_gen(*scope);
1697  int flags =
1701  feat_gen.SetFlags(flags);
1702 
1703 
1704  feat_gen.ConvertLocToAnnot(loc, annot, seqs);
1705 
1706  int protein_length = seqs.GetSeq_set().front()->GetSeq().GetLength();
1707  BOOST_CHECK_EQUAL(protein_length, 401);
1708 
1709 }
1710 
1711 BOOST_AUTO_TEST_CASE(TestCaseConvertLocToAnnotNoMismatches)
1712 {
1715  CRef<CScope> scope(new CScope(*om));
1716  scope->AddDefaults();
1717 
1718 string buf = " \
1719 Seq-loc ::= packed-int { \
1720  { \
1721  from 0, \
1722  to 290, \
1723  strand minus, \
1724  id general { db \"PRJNA205468\" , \
1725  tag str \"contig_484\" }, \
1726  fuzz-from lim lt, \
1727  fuzz-to lim gt \
1728  } \
1729  } \
1730 ";
1731 string fasta_string = "\
1732 >gnl|PRJNA205468|contig_484 [organism=Sphingobacterium sp. IITKGP-BTPF85] [moltype=Genomic] [strain=IITKGP-BTPF85] [gcode=11] [tech=wgs]\n\
1733 GCTTCAACAAATAGGCATAGCCTTGATTCTGAAAAGCTTTTAAGGCGTAATCTTCAAACGCTGTAGTAAA\n\
1734 AATAACAGGTGCCTGAACTTTGACCTGGTCAAATATTTCGAAACTCAATCCATCACCGAGCTGCACATCC\n\
1735 ATAAAGATGAGATCGACTTCATTTTTAAGCAACCAATCGGTAGCTTCACGCACTGTTGTTATAATCGTAG\n\
1736 ATTGAAATTTGGAAGCAATCAATTGGTCTAATTTCTCCAAAAGACTTTCCGAAGCCCAGTTTTCATCTTC\n\
1737 TACGATCAATA\n\
1738 ";
1739  CNcbiIstrstream istrs(buf);
1740  unique_ptr<CObjectIStream> istr(CObjectIStream::Open(eSerial_AsnText, istrs));
1741  CSeq_loc loc;
1742  *istr >> loc;
1743 
1744  CNcbiIstrstream fasta_stream(fasta_string);
1745  CFastaReader fasta_reader(fasta_stream, CFastaReader::fAddMods);
1746  scope->AddTopLevelSeqEntry(*fasta_reader.ReadOneSeq());
1747 
1748  CRef<CSeq_entry> seq_entry(new CSeq_entry);
1749  CBioseq_set& seqs = seq_entry->SetSet();
1750  seqs.SetSeq_set();
1751  CSeq_annot annot;
1752  annot.SetData().SetFtable();
1753  CFeatureGenerator feat_gen(*scope);
1754  int flags =
1758  feat_gen.SetFlags(flags);
1759 
1760 
1761  feat_gen.ConvertLocToAnnot(loc, annot, seqs, CCdregion::eFrame_three);
1762 
1763  // there should not be any exceptions set
1764  bool no_exceptions_set = !annot.GetData().GetFtable().back()->IsSetExcept();
1765  BOOST_CHECK(no_exceptions_set);
1766 
1767 }
1768 
1769 BOOST_AUTO_TEST_CASE(TestCaseConvertLocToAnnotTrimXs)
1770 {
1773  CRef<CScope> scope(new CScope(*om));
1774  scope->AddDefaults();
1775 
1776 string buf = " \
1777 Seq-loc ::= packed-int { \
1778  { \
1779  from 194830, \
1780  to 200052, \
1781  strand minus, \
1782  id gi 500770508, \
1783  fuzz-from lim lt \
1784  } \
1785  } \
1786 ";
1787  CNcbiIstrstream istrs(buf);
1788  unique_ptr<CObjectIStream> istr(CObjectIStream::Open(eSerial_AsnText, istrs));
1789  CSeq_loc loc;
1790  *istr >> loc;
1791 
1792  CRef<CSeq_entry> seq_entry(new CSeq_entry);
1793  CBioseq_set& seqs = seq_entry->SetSet();
1794  seqs.SetSeq_set();
1795  CSeq_annot annot;
1796  annot.SetData().SetFtable();
1797  CFeatureGenerator feat_gen(*scope);
1798  int flags =
1801  feat_gen.SetFlags(flags);
1802 
1803 
1804  feat_gen.ConvertLocToAnnot(loc, annot, seqs);
1805 
1806  if (annot.GetData().GetFtable().front()->GetLocation().Compare(
1807  annot.GetData().GetFtable().back()->GetLocation(), CSeq_loc::fCompare_Strand)
1808  != 0
1809  ) {
1810  cerr << MSerial_AsnText << annot.GetData();
1811  }
1812 
1813  BOOST_CHECK(annot.GetData().GetFtable().front()->GetLocation().Compare(
1814  annot.GetData().GetFtable().back()->GetLocation(), CSeq_loc::fCompare_Strand)
1815  == 0
1816  );
1817 
1818 }
1819 
1820 BOOST_AUTO_TEST_CASE(TestCaseConvertLocToAnnotSlippage)
1821 {
1824  CRef<CScope> scope(new CScope(*om));
1825  scope->AddDefaults();
1826 
1827 string buf = " \
1828 Seq-loc ::= packed-int { \
1829  { \
1830  from 3815880, \
1831  to 3816548, \
1832  strand minus, \
1833  id gi 545778205 \
1834  }, \
1835  { \
1836  from 3815833, \
1837  to 3815880, \
1838  strand minus, \
1839  id gi 545778205 \
1840  } \
1841  } \
1842 ";
1843  CNcbiIstrstream istrs(buf);
1844  unique_ptr<CObjectIStream> istr(CObjectIStream::Open(eSerial_AsnText, istrs));
1845  CSeq_loc loc;
1846  *istr >> loc;
1847 
1848  CRef<CSeq_entry> seq_entry(new CSeq_entry);
1849  CBioseq_set& seqs = seq_entry->SetSet();
1850  seqs.SetSeq_set();
1851  CSeq_annot annot;
1852  annot.SetData().SetFtable();
1853  CFeatureGenerator feat_gen(*scope);
1854  int flags =
1859  feat_gen.SetFlags(flags);
1860 
1861 
1862  BOOST_CHECK_NO_THROW(feat_gen.ConvertLocToAnnot(loc, annot, seqs));
1863 }
1864 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
CArgs –.
Definition: ncbiargs.hpp:379
Base class for reading FASTA sequences.
Definition: fasta.hpp:80
CRef< objects::CSeq_feat > ConvertAlignToAnnot(const objects::CSeq_align &align, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, Int8 gene_id=0, const objects::CSeq_feat *cdregion_on_mrna=NULL)
Convert an alignment to an annotation.
CConstRef< objects::CSeq_align > AdjustAlignment(const objects::CSeq_align &align, TSeqRange range, EProductPositionsMode mode=eForceProductFrom0)
void RecomputePartialFlags(objects::CSeq_annot &annot)
Recompute the correct partial states for all features in this annotation.
Definition: gene_model.cpp:374
void SetMinIntron(TSeqPos)
Definition: gene_model.cpp:210
void SetFlags(TFeatureGeneratorFlags)
Definition: gene_model.cpp:195
void SetAllowedUnaligned(TSeqPos)
Definition: gene_model.cpp:215
static const TSeqPos kDefaultMinIntron
Definition: gene_model.hpp:127
CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align)
Clean an alignment according to our best guess of its biological representation.
Definition: gene_model.cpp:221
void ConvertLocToAnnot(const objects::CSeq_loc &loc, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, objects::CCdregion::EFrame frame=objects::CCdregion::eFrame_one, CRef< objects::CSeq_id > prot_id=CRef< objects::CSeq_id >(), CRef< objects::CSeq_id > rna_id=CRef< objects::CSeq_id >())
Convert genomic location to an annotation.
Definition: gene_model.cpp:243
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:300
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:244
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
void Validate(bool full_test=false) const
Definition: Seq_align.cpp:649
Definition: Seq_entry.hpp:56
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSpliced_exon_chunk –.
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
char value[7]
Definition: config.c:431
static uch flags
Operators to edit gaps in sequences.
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:285
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define GI_CONST(gi)
Definition: ncbimisc.hpp:1087
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
#define NULL
Definition: ncbistd.hpp:225
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)
Read a single effective sequence, which may turn out to be a segmented set.
Definition: fasta.cpp:312
bool AtEOF(void) const
Indicates (negatively) whether there is any more input.
Definition: fasta.hpp:141
@ fAddMods
Parse defline mods and add to SeqEntry.
Definition: fasta.hpp:104
@ fParseRawID
Try to identify raw accessions.
Definition: fasta.hpp:97
static CSeq_id_Handle GetGiHandle(TGi gi)
Faster way to create a handle for a gi.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ fCompare_Strand
Definition: Seq_loc.hpp:246
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
Definition: objostr.cpp:126
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const
Definition: range.hpp:152
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define kEmptyStr
Definition: ncbistr.hpp:123
bool IsSetSyn(void) const
synonyms for locus Check if a value has been assigned to Syn data member.
Definition: Gene_ref_.hpp:756
const TSyn & GetSyn(void) const
Get the Syn member data.
Definition: Gene_ref_.hpp:768
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Gene_ref_.hpp:599
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
bool IsSetDesc(void) const
descriptive name Check if a value has been assigned to Desc data member.
Definition: Gene_ref_.hpp:587
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
const TProtpos & GetProtpos(void) const
Get the variant data.
void SetProduct_start(TProduct_start &value)
Assign a value to Product_start data member.
TMatch GetMatch(void) const
Get the variant data.
void SetProduct_id(TProduct_id &value)
Assign a value to Product_id data member.
void SetProduct_end(TProduct_end &value)
Assign a value to Product_end data member.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
TExons & SetExons(void)
Assign a value to Exons data member.
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
void SetProduct_length(TProduct_length value)
Assign a value to Product_length data member.
TMismatch GetMismatch(void) const
Get the variant data.
list< CRef< CUser_object > > TExt
Definition: Seq_align_.hpp:402
TAmin GetAmin(void) const
Get the Amin member data.
Definition: Prot_pos_.hpp:220
void SetGenomic_start(TGenomic_start value)
Assign a value to Genomic_start data member.
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
void SetGenomic_strand(TGenomic_strand value)
Assign a value to Genomic_strand data member.
void SetProduct_type(TProduct_type value)
Assign a value to Product_type data member.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
void SetGenomic_id(TGenomic_id &value)
Assign a value to Genomic_id data member.
void SetGenomic_end(TGenomic_end value)
Assign a value to Genomic_end data member.
const TExt & GetExt(void) const
Get the Ext member data.
list< CRef< CSpliced_exon_chunk > > TParts
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eType_partial
mapping pieces together
Definition: Seq_align_.hpp:103
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
Definition: Seq_feat_.hpp:1037
bool IsCdregion(void) const
Check if variant Cdregion is selected.
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
Definition: Seq_feat_.hpp:943
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
bool IsGene(void) const
Check if variant Gene is selected.
list< CRef< CCode_break > > TCode_break
Definition: Cdregion_.hpp:111
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
Definition: Seq_feat_.hpp:990
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
Definition: Seq_feat_.hpp:1405
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
Definition: Seq_feat_.hpp:1393
const TCdregion & GetCdregion(void) const
Get the variant data.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
Definition: Seq_feat_.hpp:1365
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
Definition: Seq_feat_.hpp:1346
const TComment & GetComment(void) const
Get the Comment member data.
Definition: Seq_feat_.hpp:1049
const TGene & GetGene(void) const
Get the variant data.
TPartial GetPartial(void) const
Get the Partial member data.
Definition: Seq_feat_.hpp:962
TExcept GetExcept(void) const
Get the Except member data.
Definition: Seq_feat_.hpp:1009
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
Definition: Seq_feat_.hpp:1321
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
Definition: Cdregion_.hpp:733
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
Definition: Cdregion_.hpp:721
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
TOther & SetOther(void)
Select the variant.
Definition: Seq_id_.cpp:353
void SetAccession(const TAccession &value)
Assign a value to Accession data member.
void SetVersion(TVersion value)
Assign a value to Version data member.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
const TFtable & GetFtable(void) const
Get the variant data.
Definition: Seq_annot_.hpp:621
list< CRef< CSeq_feat > > TFtable
Definition: Seq_annot_.hpp:193
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
char * buf
int i
range(_Ty, _Ty) -> range< _Ty >
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
The Object manager core.
static const char * expected[]
Definition: bcp.c:42
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
CRef< objects::CObjectManager > om
Utility stuff for more convenient using of Boost.Test library.
USING_SCOPE(objects)
BOOST_AUTO_TEST_CASE(TestUsingArg)
NCBITEST_INIT_CMDLINE(arg_desc)
void AddFastaToScope(const string &fasta_file, CScope &scope)
void s_CompareFtables(const CSeq_annot::TData::TFtable &actual, const CSeq_annot::TData::TFtable &expected, const string &compared_features)
static bool s_CompareFeatRefs(const CRef< CSeq_feat > &ref1, const CRef< CSeq_feat > &ref2)
Function to compare Cref<Cseq_feat>s by their referents.
BOOST_AUTO_TEST_SUITE_END()
Auxiliary class to convert a string into an argument count and vector.
Modified on Thu Sep 28 03:36:05 2023 by modify_doxy.py rev. 669887