NCBI C++ ToolKit
unit_test_cds_fix.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_cds_fix.cpp 93572 2021-04-30 13:48:31Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin, Jie Chen, NCBI
27 *
28 * File Description:
29 * Unit tests for the field handlers.
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include "unit_test_cds_fix.hpp"
37 
38 #include <corelib/ncbi_system.hpp>
39 
40 // This macro should be defined before inclusion of test_boost.hpp in all
41 // "*.cpp" files inside executable except one. It is like function main() for
42 // non-Boost.Test executables is defined only in one *.cpp file - other files
43 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
44 // then test_boost.hpp will define such "main()" function for tests.
45 //
46 // Usually if your unit tests contain only one *.cpp file you should not
47 // care about this macro at all.
48 //
49 //#define NCBI_BOOST_NO_AUTO_TEST_MAIN
50 
51 
52 // This header must be included before all Boost.Test headers if there are any
53 #include <corelib/test_boost.hpp>
54 
56 #include <objects/biblio/Title.hpp>
71 #include <objects/pub/Pub.hpp>
73 #include <objects/seq/GIBB_mol.hpp>
74 #include <objects/seq/Seq_ext.hpp>
78 #include <objects/seq/Ref_ext.hpp>
79 #include <objects/seq/Map_ext.hpp>
80 #include <objects/seq/Seg_ext.hpp>
81 #include <objects/seq/Seq_gap.hpp>
82 #include <objects/seq/Seq_data.hpp>
84 #include <objects/seq/Seqdesc.hpp>
85 #include <objects/seq/MolInfo.hpp>
86 #include <objects/seq/Pubdesc.hpp>
87 #include <objects/seq/Seq_hist.hpp>
105 #include <objmgr/object_manager.hpp>
106 #include <objmgr/scope.hpp>
107 #include <objmgr/bioseq_ci.hpp>
108 #include <objmgr/feat_ci.hpp>
109 #include <objmgr/seq_vector.hpp>
110 #include <objmgr/util/sequence.hpp>
111 #include <objmgr/util/feature.hpp>
113 #include <objmgr/seqdesc_ci.hpp>
118 #include <corelib/ncbiapp.hpp>
119 
121 #include <objtools/edit/cds_fix.hpp>
122 
123 #include <common/test_assert.h> /* This header must go last */
124 
125 
126 
129 
130 
131 extern const string sc_TestEntry;
132 extern const string sc_TestEntry2;
133 const string sc_TestEntry3 = "cds_fix_test_cases/GB-8277.asn";
134 extern const string sc_mrna_loc;
135 extern const string sc_mrna1;
136 extern const string sc_mrna2;
137 extern const string sc_mrna3;
138 
140 {
141  if ( !CNcbiApplication::Instance()->GetConfig().HasEntry("NCBI", "Data") ) {
142  }
143 }
144 
145 static bool s_debugMode = false;
146 
148 {
149  // Here we make descriptions of command line parameters that we are
150  // going to use.
151 
152  arg_desc->AddFlag( "debug_mode",
153  "Debugging mode writes errors seen for each test" );
154 }
155 
157 {
158  // initialization function body
159 
160  const CArgs& args = CNcbiApplication::Instance()->GetArgs();
161  if (args["debug_mode"]) {
162  s_debugMode = true;
163  }
164 }
165 
166 
168  bool strict, bool extend,
169  bool expected_rval, bool set_codebreak,
170  bool set_comment, TSeqPos expected_endpoint)
171 {
172  const CCdregion& cdr = cds.GetData().GetCdregion();
173  BOOST_CHECK_EQUAL(edit::SetTranslExcept(cds,
174  "TAA stop codon is completed by the addition of 3' A residues to the mRNA",
175  strict, extend, scope),
176  expected_rval);
177  BOOST_CHECK_EQUAL(cdr.IsSetCode_break(), set_codebreak);
178  if (set_codebreak) {
179  BOOST_CHECK_EQUAL(cdr.GetCode_break().size(), 1);
180  }
181 
182  BOOST_CHECK_EQUAL(cds.IsSetComment(), set_comment);
183  if (set_comment) {
184  BOOST_CHECK_EQUAL(cds.GetComment(), "TAA stop codon is completed by the addition of 3' A residues to the mRNA");
185  }
186  BOOST_CHECK_EQUAL(cds.GetLocation().GetStop(eExtreme_Biological), expected_endpoint);
187 }
188 
189 
190 void OneTerminalTranslationExceptionTest(bool strict, bool extend, TSeqPos endpoint,
191  const string& seq,
192  bool expected_rval, bool set_codebreak, bool set_comment,
193  TSeqPos expected_endpoint)
194 {
197 
199  CCdregion& cdr = cds->SetData().SetCdregion();
200  CBioseq& nuc_seq = entry->SetSet().SetSeq_set().front()->SetSeq();
201  nuc_seq.SetInst().SetSeq_data().SetIupacna().Set(seq);
202  cds->SetLocation().SetInt().SetTo(endpoint);
203 
204  // Should not set translation exception if coding region already has stop codon
205  CheckTerminalExceptionResults(*cds, seh.GetScope(),
206  strict, extend, expected_rval,
207  set_codebreak, set_comment, expected_endpoint);
208 
209  cdr.ResetCode_break();
210  cds->ResetComment();
211  cds->SetLocation().SetInt().SetTo(endpoint);
212 
213  // same results if reverse-complement
214  scope.RemoveTopLevelSeqEntry(seh);
216  seh = scope.AddTopLevelSeqEntry(*entry);
217  CheckTerminalExceptionResults(*cds, seh.GetScope(),
218  strict, extend, expected_rval,
219  set_codebreak, set_comment,
220  nuc_seq.GetLength() - expected_endpoint - 1);
221 }
222 
223 
224 BOOST_AUTO_TEST_CASE(Test_AddTerminalTranslationException)
225 {
226  string original_seq = "ATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG";
227  // no change if normal
229  original_seq,
230  false, false, false, 26);
231 
232  // should not set translation exception, but should extend to cover stop codon if extend is true
234  original_seq,
235  true, false, false, 26);
236 
237  // but no change if extend flag is false
238  OneTerminalTranslationExceptionTest(true, false, 23,
239  original_seq,
240  false, false, false, 23);
241 
242  // should be set if last A in stop codon is replaced with other NT and coding region is one shorter
243  string changed_seq = original_seq;
244  changed_seq[26] = 'C';
246  changed_seq,
247  true, true, true, 25);
248 
249  // should extend for partial stop codon and and add terminal exception if coding region missing
250  // entire last codon
252  changed_seq,
253  true, true, true, 25);
254 
255  // for non-strict, first NT could be N
256  changed_seq[24] = 'N';
257  OneTerminalTranslationExceptionTest(false, true, 25,
258  changed_seq,
259  true, true, true, 25);
260  // but not for strict
262  changed_seq,
263  false, false, false, 23);
264 
265 
266 }
267 
268 
269 BOOST_AUTO_TEST_CASE(Test_FeaturePartialSynchronization)
270 {
273 
277  CRef<CSeqdesc> prot_molinfo;
278  NON_CONST_ITERATE(CBioseq::TDescr::Tdata, it, prot_seq->SetSeq().SetDescr().Set()) {
279  if ((*it)->IsMolinfo()) {
280  prot_molinfo.Reset(it->GetPointer());
281  }
282  }
283 
284  // establish that everything is ok before
285  BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStart(eExtreme_Biological), false);
286  BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
287  BOOST_CHECK_EQUAL(cds->IsSetPartial(), false);
288  BOOST_CHECK_EQUAL(feature::AdjustFeaturePartialFlagForLocation(*cds), false);
289  BOOST_CHECK_EQUAL(prot_feat->GetLocation().IsPartialStart(eExtreme_Biological), false);
290  BOOST_CHECK_EQUAL(prot_feat->GetLocation().IsPartialStop(eExtreme_Biological), false);
291  BOOST_CHECK_EQUAL(prot_feat->IsSetPartial(), false);
292  BOOST_CHECK_EQUAL(edit::AdjustProteinFeaturePartialsToMatchCDS(*prot_feat, *cds), false);
294  BOOST_CHECK_EQUAL(feature::AdjustProteinMolInfoToMatchCDS(prot_molinfo->SetMolinfo(), *cds), false);
295  BOOST_CHECK_EQUAL(feature::AdjustForCDSPartials(*cds, seh), false);
296 
297  cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
298  BOOST_CHECK_EQUAL(cds->IsSetPartial(), false);
299  BOOST_CHECK_EQUAL(feature::AdjustFeaturePartialFlagForLocation(*cds), true);
300  BOOST_CHECK_EQUAL(cds->IsSetPartial(), true);
301 
302  BOOST_CHECK_EQUAL(edit::AdjustProteinFeaturePartialsToMatchCDS(*prot_feat, *cds), true);
303  BOOST_CHECK_EQUAL(prot_feat->GetLocation().IsPartialStart(eExtreme_Biological), true);
304  BOOST_CHECK_EQUAL(prot_feat->GetLocation().IsPartialStop(eExtreme_Biological), false);
305  BOOST_CHECK_EQUAL(prot_feat->IsSetPartial(), true);
306 
307  BOOST_CHECK_EQUAL(feature::AdjustProteinMolInfoToMatchCDS(prot_molinfo->SetMolinfo(), *cds), true);
309 
310  // all changes in one go
311  cds->SetLocation().SetPartialStart(false, eExtreme_Biological);
312  BOOST_CHECK_EQUAL(feature::AdjustFeaturePartialFlagForLocation(*cds), true);
313  BOOST_CHECK_EQUAL(feature::AdjustForCDSPartials(*cds, seh), true);
315  BOOST_CHECK_EQUAL(prot_feat->GetLocation().IsPartialStart(eExtreme_Biological), false);
316  BOOST_CHECK_EQUAL(prot_feat->GetLocation().IsPartialStop(eExtreme_Biological), false);
317  BOOST_CHECK_EQUAL(prot_feat->IsSetPartial(), false);
319 
320 }
321 
322 
323 BOOST_AUTO_TEST_CASE(Test_MakemRNAforCDS)
324 {
327 
329  CRef<CSeq_feat> mrna = edit::MakemRNAforCDS(*cds, scope);
330  BOOST_CHECK_EQUAL(sequence::Compare(cds->GetLocation(), mrna->GetLocation(),
332  BOOST_CHECK_EQUAL(mrna->GetLocation().IsPartialStart(eExtreme_Biological), true);
333  BOOST_CHECK_EQUAL(mrna->GetLocation().IsPartialStop(eExtreme_Biological), true);
334 
335  // with a 3' UTR
336  scope.RemoveTopLevelSeqEntry(seh);
338  CRef<CSeq_feat> utr3 = unit_test_util::AddGoodImpFeat(nuc_seq, "3'UTR");
339  utr3->ResetComment();
340  utr3->SetLocation().SetInt().SetFrom(27);
341  utr3->SetLocation().SetInt().SetTo(30);
342  seh = scope.AddTopLevelSeqEntry(*entry);
343 
344  mrna = edit::MakemRNAforCDS(*cds, scope);
345  BOOST_CHECK_EQUAL(sequence::Compare(cds->GetLocation(), mrna->GetLocation(),
347  BOOST_CHECK_EQUAL(mrna->GetLocation().IsPartialStart(eExtreme_Biological), true);
348  BOOST_CHECK_EQUAL(mrna->GetLocation().IsPartialStop(eExtreme_Biological), false);
349  BOOST_CHECK_EQUAL(mrna->GetLocation().GetStop(eExtreme_Biological), utr3->GetLocation().GetStop(eExtreme_Biological));
350 
351  // with a 5' UTR and a 3' UTR
352  scope.RemoveTopLevelSeqEntry(seh);
353  CRef<CSeq_feat> utr5 = unit_test_util::AddGoodImpFeat(nuc_seq, "5'UTR");
354  utr5->ResetComment();
355  utr5->SetLocation().SetInt().SetFrom(0);
356  utr5->SetLocation().SetInt().SetTo(2);
357  cds->SetLocation().SetInt().SetFrom(3);
358  seh = scope.AddTopLevelSeqEntry(*entry);
359  mrna = edit::MakemRNAforCDS(*cds, scope);
360  BOOST_CHECK_EQUAL(sequence::Compare(cds->GetLocation(), mrna->GetLocation(),
362  BOOST_CHECK_EQUAL(mrna->GetLocation().IsPartialStart(eExtreme_Biological), false);
363  BOOST_CHECK_EQUAL(mrna->GetLocation().IsPartialStop(eExtreme_Biological), false);
364  BOOST_CHECK_EQUAL(mrna->GetLocation().GetStart(eExtreme_Biological), utr5->GetLocation().GetStart(eExtreme_Biological));
365  BOOST_CHECK_EQUAL(mrna->GetLocation().GetStop(eExtreme_Biological), utr3->GetLocation().GetStop(eExtreme_Biological));
366 
367  scope.RemoveTopLevelSeqEntry(seh);
368  unit_test_util::AddFeat(mrna, nuc_seq);
369  seh = scope.AddTopLevelSeqEntry(*entry);
370  // should not create another mRNA if one is already on the record with the right product name
371  CRef<CSeq_feat> mrna2 = edit::MakemRNAforCDS(*cds, scope);
372  BOOST_REQUIRE(!mrna2);
373 
374  // but will create if the existing mRNA has the wrong product
375  mrna->SetData().SetRna().SetExt().SetName("abc");
376  mrna2 = edit::MakemRNAforCDS(*cds, scope);
377  BOOST_CHECK_EQUAL(sequence::Compare(mrna2->GetLocation(), mrna->GetLocation(),
379 
380 }
381 
382 BOOST_AUTO_TEST_CASE(Test_MakemRNAforCDS_with_complex_UTR)
383 {
384  CSeq_entry entry;
385  {
387  istr >> MSerial_AsnText >> entry;
388  }
389 
391  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
392  CRef<CSeq_feat> cds = entry.SetSet().SetSeq_set().front()->SetSet().SetAnnot().front()->SetData().SetFtable().front();
393  CRef<CSeq_feat> mrna = edit::MakemRNAforCDS(*cds, scope);
394  BOOST_REQUIRE(mrna);
395  CSeq_loc mrna_loc;
396  {
398  istr >> MSerial_AsnText >> mrna_loc;
399  }
400  BOOST_CHECK_EQUAL(sequence::Compare(mrna->GetLocation(), mrna_loc, &scope, sequence::fCompareOverlapping), sequence::eSame);
401 }
402 
403 BOOST_AUTO_TEST_CASE(Test_MakemRNAforCDS_with_exons)
404 {
405  CSeq_entry entry;
406  {
408  istr >> MSerial_AsnText >> entry;
409  }
410 
412  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
413  auto it = entry.SetSet().SetAnnot().front()->SetData().SetFtable().begin();
414  CRef<CSeq_feat> cds1 = *it;
415  CRef<CSeq_feat> mrna1 = edit::MakemRNAforCDS(*cds1, scope);
416  BOOST_REQUIRE(mrna1);
417  CSeq_feat ground_mrna1;
418  {
420  istr >> MSerial_AsnText >> ground_mrna1;
421  }
422  BOOST_CHECK(mrna1->Equals(ground_mrna1));
423 
424  ++it;
425  CRef<CSeq_feat> cds2 = *it;
426  CRef<CSeq_feat> mrna2 = edit::MakemRNAforCDS(*cds2, scope);
427  BOOST_REQUIRE(mrna2);
428  CSeq_feat ground_mrna2;
429  {
431  istr >> MSerial_AsnText >> ground_mrna2;
432  }
433  BOOST_CHECK(mrna2->Equals(ground_mrna2));
434 
435  ++it;
436  CRef<CSeq_feat> cds3 = *it;
437  CRef<CSeq_feat> mrna3 = edit::MakemRNAforCDS(*cds3, scope);
438  BOOST_REQUIRE(mrna3);
439  CSeq_feat ground_mrna3;
440  {
442  istr >> MSerial_AsnText >> ground_mrna3;
443  }
444  BOOST_CHECK(mrna3->Equals(ground_mrna3));
445 }
446 
447 
448 BOOST_AUTO_TEST_CASE(Test_GetmRNAforCDS)
449 {
452 
454  CConstRef<CSeq_feat> mrna = edit::GetmRNAforCDS(*cds, scope);
455  BOOST_CHECK_EQUAL(mrna.Empty(), true);
456 
459  mrna1->SetData().SetRna().SetExt().SetName("product 1");
460  CRef<CSeq_annot> annot = unit_test_util::AddFeat(mrna1, nuc_seq);
461  CSeq_entry_EditHandle edit_seh = seh.GetEditHandle();
462  edit_seh.AttachAnnot(*annot);
463 
464  mrna = edit::GetmRNAforCDS(*cds, scope);
465  BOOST_REQUIRE(!mrna.Empty());
466  BOOST_CHECK_EQUAL(mrna == mrna1, true);
467 }
468 
469 BOOST_AUTO_TEST_CASE(Test_GetGeneticCodeForBioseq)
470 {
473 
476  BOOST_REQUIRE(!code);
477 
478  unit_test_util::SetGcode(entry, 6);
480  BOOST_CHECK_EQUAL(code->GetId(), 6);
481 
484  BOOST_REQUIRE(!code);
485 
486  unit_test_util::SetMGcode(entry, 2);
488  BOOST_CHECK_EQUAL(code->GetId(), 2);
489 
492  BOOST_CHECK_EQUAL(code->GetId(), 11);
493 
494  unit_test_util::SetPGcode(entry, 12);
496  BOOST_CHECK_EQUAL(code->GetId(), 12);
497 }
498 
499 
500 BOOST_AUTO_TEST_CASE(Test_TruncateCDSAtStop)
501 {
503  entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AATTGGCCAAAATTGGCCAAATAAGTAAATAATTGGCCAAAATTGGCCAAAATTGGCCAA");
505  cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
506  cds->SetData().SetCdregion();
508 
509  // check for frame 1/unset
510  bool found_stop = edit::TruncateCDSAtStop(*cds, scope);
511  BOOST_CHECK_EQUAL(found_stop, true);
512  BOOST_CHECK_EQUAL(cds->GetLocation().GetStop(eExtreme_Biological), 23);
513  BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
514 
515  // check for frame 2
516  cds->SetData().SetCdregion().SetFrame(CCdregion::eFrame_two);
517  cds->SetLocation().SetInt().SetTo(entry->GetSeq().GetInst().GetLength() - 1);
518  cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
519  found_stop = edit::TruncateCDSAtStop(*cds, scope);
520  BOOST_CHECK_EQUAL(found_stop, true);
521  BOOST_CHECK_EQUAL(cds->GetLocation().GetStop(eExtreme_Biological), 27);
522  BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
523 
524  // check for frame 3
525  cds->SetData().SetCdregion().SetFrame(CCdregion::eFrame_three);
526  cds->SetLocation().SetInt().SetTo(entry->GetSeq().GetInst().GetLength() - 1);
527  cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
528  found_stop = edit::TruncateCDSAtStop(*cds, scope);
529  BOOST_CHECK_EQUAL(found_stop, true);
530  BOOST_CHECK_EQUAL(cds->GetLocation().GetStop(eExtreme_Biological), 31);
531  BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
532 
533 
534 }
535 
536 
537 BOOST_AUTO_TEST_CASE(Test_ExtendCDSToStopCodon)
538 {
540  entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AATTGGCCAAAATTGGCCAAATAAGTAAATAATTGGCCAAAATTGGCCAAAATTGGCCAA");
542  cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
543  cds->SetData().SetCdregion();
545 
546  // check for frame 1/unset
547  bool found_stop = edit::ExtendCDSToStopCodon(*cds, scope);
548  BOOST_CHECK_EQUAL(found_stop, true);
549  BOOST_CHECK_EQUAL(cds->GetLocation().GetStop(eExtreme_Biological), 23);
550  BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
551 
552  // check for frame 2
553  cds->SetData().SetCdregion().SetFrame(CCdregion::eFrame_two);
554  cds->SetLocation().SetInt().SetTo(15);
555  cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
556  found_stop = edit::ExtendCDSToStopCodon(*cds, scope);
557  BOOST_CHECK_EQUAL(found_stop, true);
558  BOOST_CHECK_EQUAL(cds->GetLocation().GetStop(eExtreme_Biological), 27);
559  BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
560 
561  // check for frame 3
562  cds->SetData().SetCdregion().SetFrame(CCdregion::eFrame_three);
563  cds->SetLocation().SetInt().SetTo(15);
564  cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
565  found_stop = edit::ExtendCDSToStopCodon(*cds, scope);
566  BOOST_CHECK_EQUAL(found_stop, true);
567  BOOST_CHECK_EQUAL(cds->GetLocation().GetStop(eExtreme_Biological), 31);
568  BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
569 
570 
571 }
572 
573 
574 BOOST_AUTO_TEST_CASE(Test_MakemRNAAnnotOnly)
575 {
576  CRef<CSeq_feat> cds(new CSeq_feat());
577  cds->SetData().SetCdregion();
578  cds->SetLocation().SetInt().SetId().SetLocal().SetStr("abc");
579  cds->SetLocation().SetInt().SetFrom(10);
580  cds->SetLocation().SetInt().SetTo(40);
581  CRef<CSeq_annot> annot(new CSeq_annot());
582  annot->SetData().SetFtable().push_back(cds);
583 
585  scope.AddDefaults();
586  CSeq_annot_Handle sah = scope.AddSeq_annot(*annot);
587  CFeat_CI it(sah);
588  while (it) {
590  const CSeq_feat& cds = it->GetOriginalFeature();
591  CRef<CSeq_feat> pRna = edit::MakemRNAforCDS(cds, scope); //<-- blows up on NULL ptr !!!
592  BOOST_CHECK_EQUAL(pRna->GetLocation().GetStart(eExtreme_Biological), 10);
593  BOOST_CHECK_EQUAL(pRna->GetLocation().GetStop(eExtreme_Biological), 40);
594  }
595  ++it;
596  }
597 
598  scope.RemoveSeq_annot(sah);
599  CRef<CSeq_feat> utr5(new CSeq_feat());
600  utr5->SetData().SetImp().SetKey("5'UTR");
601  utr5->SetLocation().SetInt().SetId().SetLocal().SetStr("abc");
602  utr5->SetLocation().SetInt().SetFrom(0);
603  utr5->SetLocation().SetInt().SetTo(9);
604  annot->SetData().SetFtable().push_back(utr5);
605  CRef<CSeq_feat> utr3(new CSeq_feat());
606  utr3->SetData().SetImp().SetKey("3'UTR");
607  utr3->SetLocation().SetInt().SetId().SetLocal().SetStr("abc");
608  utr3->SetLocation().SetInt().SetFrom(41);
609  utr3->SetLocation().SetInt().SetTo(50);
610  annot->SetData().SetFtable().push_back(utr3);
611 
612  sah = scope.AddSeq_annot(*annot);
613  CFeat_CI it2(sah);
614  while (it2) {
616  const CSeq_feat& cds = it2->GetOriginalFeature();
617  CRef<CSeq_feat> pRna = edit::MakemRNAforCDS(cds, scope); //<-- blows up on NULL ptr !!!
618  BOOST_CHECK_EQUAL(pRna->GetLocation().GetStart(eExtreme_Biological), 0);
619  BOOST_CHECK_EQUAL(pRna->GetLocation().GetStop(eExtreme_Biological), 50);
620  }
621  ++it2;
622  }
623 
624  // should not make mRNA if one already exists
625  scope.RemoveSeq_annot(sah);
626  CRef<CSeq_feat> mrna(new CSeq_feat());
627  mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
628  mrna->SetData().SetRna().SetExt().SetName("");
629  mrna->SetLocation().SetInt().SetId().SetLocal().SetStr("abc");
630  mrna->SetLocation().SetInt().SetFrom(10);
631  mrna->SetLocation().SetInt().SetTo(40);
632  annot->SetData().SetFtable().push_back(mrna);
633  BOOST_CHECK_EQUAL(mrna->GetData().GetSubtype(), CSeqFeatData::eSubtype_mRNA);
634  sah = scope.AddSeq_annot(*annot);
635 
636  CFeat_CI it3(sah);
637  while (it3) {
639  const CSeq_feat& cds = it3->GetOriginalFeature();
640  CRef<CSeq_feat> pRna = edit::MakemRNAforCDS(cds, scope);
641  BOOST_REQUIRE(!pRna);
642  }
643  ++it3;
644  }
645 
646 }
647 
648 
649 BOOST_AUTO_TEST_CASE(Test_SimpleReplace)
650 {
652  repl->SetReplace("foo");
653 
654  string test = "abc";
655 
656  CRef<CString_constraint> constraint(NULL);
657  BOOST_CHECK(repl->ApplyToString(test, test, constraint));
658  BOOST_CHECK_EQUAL(test, "foo");
659 
660  test = "candidate abc";
661  repl->SetWeasel_to_putative(true);
662  BOOST_CHECK(repl->ApplyToString(test, test, constraint));
663  BOOST_CHECK_EQUAL(test, "putative foo");
664 }
665 
666 
667 BOOST_AUTO_TEST_CASE(Test_ReplaceFunc)
668 {
670  repl->SetHaem_replace("haem");
671 
672  string test = "haemagglutination domain protein";
673 
674  CRef<CString_constraint> constraint(NULL);
675  BOOST_CHECK(repl->ApplyToString(test, test, constraint));
676  BOOST_CHECK_EQUAL(test, "hemagglutination domain protein");
677 
678  test = "land of the free, haem of the brave";
679  BOOST_CHECK(repl->ApplyToString(test, test, constraint));
680  BOOST_CHECK_EQUAL(test, "land of the free, heme of the brave");
681 
682  repl->SetSimple_replace().SetReplace("foo");
683  test = "abc";
684 
685  BOOST_CHECK(repl->ApplyToString(test, test, constraint));
686  BOOST_CHECK_EQUAL(test, "foo");
687 
688  test = "candidate abc";
690  BOOST_CHECK(repl->ApplyToString(test, test, constraint));
691  BOOST_CHECK_EQUAL(test, "putative foo");
692 
693 }
694 
695 
696 BOOST_AUTO_TEST_CASE(Test_SuspectRule)
697 {
699  rule->SetFind().SetString_constraint().SetMatch_text("haem");
700  rule->SetReplace().SetReplace_func().SetHaem_replace("haem");
701 
702  string test = "haemagglutination domain protein";
703 
704  BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
705  BOOST_CHECK_EQUAL(test, "hemagglutination domain protein");
706 
707  test = "land of the free, haem of the brave";
708  BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
709  BOOST_CHECK_EQUAL(test, "land of the free, heme of the brave");
710 
711  rule->SetFind().SetString_constraint().SetMatch_text("abc");
712  rule->SetReplace().SetReplace_func().SetSimple_replace().SetReplace("foo");
713  rule->SetReplace().SetReplace_func().SetSimple_replace().SetWhole_string(true);
714  test = "abc";
715 
716  BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
717  BOOST_CHECK_EQUAL(test, "foo");
718 
719  test = "candidate abc";
720  rule->SetReplace().SetReplace_func().SetSimple_replace().SetWeasel_to_putative(true);
721  BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
722  BOOST_CHECK_EQUAL(test, "putative foo");
723 
724  test = "do not match me";
725  rule->SetReplace().SetReplace_func().SetSimple_replace().ResetWhole_string();
726  rule->SetFind().SetString_constraint().SetMatch_text("me");
727  rule->SetFind().SetString_constraint().SetMatch_location(eString_location_starts);
728  BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
729  BOOST_CHECK_EQUAL(test, "do not match me");
730 
731  rule->SetFind().SetString_constraint().SetMatch_location(eString_location_ends);
732  BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
733  BOOST_CHECK_EQUAL(test, "do not match foo");
734 
735  test = "me first";
736  rule->SetFind().SetString_constraint().SetMatch_location(eString_location_starts);
737  BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
738  BOOST_CHECK_EQUAL(test, "foo first");
739 
740  test = "me me me me";
741  rule->SetFind().SetString_constraint().ResetMatch_location();
742 
743  BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
744  BOOST_CHECK_EQUAL(test, "foo foo foo foo");
745 
746  test = "30S ribosomal protein S12";
747  rule->SetFind().Reset();
748  rule->SetFind().SetString_constraint().SetMatch_location(eString_location_equals);
749  rule->SetFind().SetString_constraint().SetMatch_text("CHC2 zinc finger");
750  rule->SetFind().SetString_constraint().SetIgnore_weasel(true);
751  rule->SetReplace().Reset();
752  rule->SetReplace().SetReplace_func().SetSimple_replace().SetReplace("CHC2 zinc finger protein");
753  rule->SetReplace().SetReplace_func().SetSimple_replace().SetWhole_string(false);
754  rule->SetReplace().SetReplace_func().SetSimple_replace().SetWeasel_to_putative(true);
755  BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
756 
757  test = "hypothetical protein";
758  rule->SetFind().Reset();
759  rule->SetFind().SetString_constraint().SetMatch_location(eString_location_equals);
760  rule->SetFind().SetString_constraint().SetMatch_text("protein");
761  rule->SetFind().SetString_constraint().SetIgnore_weasel(true);
762  rule->SetReplace().Reset();
763  rule->SetReplace().SetReplace_func().SetSimple_replace().SetReplace("hypothetical protein");
764  BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
765 
766  // string_constraint with ignore-words
767  test = "human";
768  rule->SetFind().Reset();
769  rule->SetFind().SetString_constraint().SetMatch_text("Homo sapiens");
770  rule->SetFind().SetString_constraint().SetMatch_location(eString_location_equals);
771  rule->SetFind().SetString_constraint().SetIgnore_space(true);
772  rule->SetFind().SetString_constraint().SetIgnore_punct(true);
773 
775  rule->SetFind().SetString_constraint().SetIgnore_words(word_subs.GetObject());
776 
778  word_sub->SetWord("Homo sapiens");
779  list <string> syns;
780  syns.push_back("human");
781  syns.push_back("Homo sapien");
782  syns.push_back("Homosapiens");
783  syns.push_back("Homo-sapiens");
784  syns.push_back("Homo spiens");
785  syns.push_back("Homo Sapience");
786  syns.push_back("homosapein");
787  syns.push_back("homosapiens");
788  syns.push_back("homosapien");
789  syns.push_back("homo_sapien");
790  syns.push_back("homo_sapiens");
791  syns.push_back("Homosipian");
792  word_sub->SetSynonyms() = syns;
793  rule->SetFind().SetString_constraint().SetIgnore_words().Set().push_back(word_sub);
794 
795  word_sub.Reset(new CWord_substitution);
796  word_sub->SetWord("sapiens");
797  syns.clear();
798  syns.push_back("sapien");
799  syns.push_back("sapeins");
800  syns.push_back("sapein");
801  syns.push_back("sapins");
802  syns.push_back("sapens");
803  syns.push_back("sapin");
804  syns.push_back("sapen");
805  syns.push_back("sapians");
806  syns.push_back("sapian");
807  syns.push_back("sapies");
808  syns.push_back("sapie");
809  word_sub->SetSynonyms() = syns;
810  rule->SetFind().SetString_constraint().SetIgnore_words().Set().push_back(word_sub);
811  BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
812  test = "human";
813  BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
814  test = "human1";
815  BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
816  test = "Homo sapien";
817  BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
818  test = "Human sapien";
819  BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
820  test = "sapien";
821  BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
822 
823  word_sub.Reset(new CWord_substitution);
824  // all the syns won't match because of missing word_sub.Word;
825  syns.clear();
826  syns.push_back("fruit");
827  syns.push_back("apple");
828  syns.push_back("apple, pear");
829  syns.push_back("grape");
830  syns.push_back("peaches");
831  syns.push_back("peach");
832  word_sub->SetSynonyms() = syns;
833  rule->SetFind().SetString_constraint().SetIgnore_words().Set().push_back(word_sub);
834  test = "fruit";
835  BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
836  test = "pear, apple";
837  BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
838 }
839 
840 BOOST_AUTO_TEST_CASE(Test_FindMatchingFrame)
841 {
842  CSeq_entry entry;
843  {{
845  istr >> MSerial_AsnText >> entry;
846  }}
847 
848  CRef<CSeq_feat> cds = entry.SetSet().SetAnnot().front()->SetData().SetFtable().front();
849 
851  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
852 
853  BOOST_CHECK_EQUAL(edit::ApplyCDSFrame::s_FindMatchingFrame(*cds, scope), CCdregion::eFrame_one);
854  cds->SetLocation().SetInt().SetFrom(13);
855  cds->SetData().SetCdregion().SetFrame(CCdregion::eFrame_one);
856  BOOST_CHECK_EQUAL(edit::ApplyCDSFrame::s_FindMatchingFrame(*cds, scope), CCdregion::eFrame_two);
857  cds->SetLocation().SetInt().SetFrom(12);
858  cds->SetData().SetCdregion().SetFrame(CCdregion::eFrame_one);
859  BOOST_CHECK_EQUAL(edit::ApplyCDSFrame::s_FindMatchingFrame(*cds, scope), CCdregion::eFrame_three);
860 
861  edit::ApplyCDSFrame::s_SetCDSFrame(*cds, edit::ApplyCDSFrame::eOne, scope);
862  BOOST_CHECK_EQUAL(cds->GetData().GetCdregion().GetFrame(), CCdregion::eFrame_one);
863  edit::ApplyCDSFrame::s_SetCDSFrame(*cds, edit::ApplyCDSFrame::eTwo, scope);
864  BOOST_CHECK_EQUAL(cds->GetData().GetCdregion().GetFrame(), CCdregion::eFrame_two);
865  edit::ApplyCDSFrame::s_SetCDSFrame(*cds, edit::ApplyCDSFrame::eThree, scope);
866  BOOST_CHECK_EQUAL(cds->GetData().GetCdregion().GetFrame(), CCdregion::eFrame_three);
867 
868 }
869 
870 
871 BOOST_AUTO_TEST_CASE(Test_PromoteCDSToNucProtSet_And_DemoteCDSToNucSeq)
872 {
874 
876  CRef<CSeq_id> nuc_id(new CSeq_id());
877  nuc_id->SetLocal().SetStr("nuc");
878  unit_test_util::ChangeId(nuc, nuc_id);
880  cds->ResetComment();
881  cds->SetData().SetCdregion();
882 
883  // should not change cdregion if not in nuc-prot set and product not set
885  CSeq_feat_Handle fh = scope.GetSeq_featHandle(*cds);
886 
887  BOOST_CHECK_EQUAL(feature::PromoteCDSToNucProtSet(fh), false);
888  BOOST_ASSERT(fh.GetAnnot().GetParentEntry() == seh);
889  scope.RemoveTopLevelSeqEntry(seh);
890 
891  // should not change cdregion if not in nuc-prot set and product is set
892  CRef<CSeq_id> product_id(new CSeq_id());
893  product_id->SetLocal().SetStr("prot");
894  cds->SetProduct().SetWhole().Assign(*product_id);
895 
896  seh = scope.AddTopLevelSeqEntry(*nuc);
897  fh = scope.GetSeq_featHandle(*cds);
898  BOOST_CHECK_EQUAL(feature::PromoteCDSToNucProtSet(fh), false);
899  BOOST_ASSERT(fh.GetAnnot().GetParentEntry() == seh);
900  scope.RemoveTopLevelSeqEntry(seh);
901 
902  // move cdregion if in nuc-prot set and product set but
903  // protein sequence not local
904  CRef<CSeq_entry> entry(new CSeq_entry());
906  entry->SetSet().SetSeq_set().push_back(nuc);
907 
908  seh = scope.AddTopLevelSeqEntry(*entry);
909  fh = scope.GetSeq_featHandle(*cds);
910  BOOST_CHECK_EQUAL(feature::PromoteCDSToNucProtSet(fh), true);
911  BOOST_ASSERT(fh.GetAnnot().GetParentEntry() == seh);
912 
913  // can't promote again
914  BOOST_CHECK_EQUAL(feature::PromoteCDSToNucProtSet(fh), false);
915 
916  // after demotion, should go back to nucleotide sequence
917  CBioseq_Handle n_bsh = scope.GetBioseqHandle(*nuc_id);
918  BOOST_CHECK_EQUAL(edit::DemoteCDSToNucSeq(fh), true);
919  BOOST_ASSERT(fh.GetAnnot().GetParentEntry() == n_bsh.GetSeq_entry_Handle());
920 
921  // can't demote again
922  BOOST_CHECK_EQUAL(edit::DemoteCDSToNucSeq(fh), false);
923 }
924 
925 BOOST_AUTO_TEST_CASE(Test_GetNewProtId)
926 {
928 
930  CRef<CSeq_id> nuc_id(new CSeq_id());
931  nuc_id->SetGeneral().SetDb("TEST");
932  nuc_id->SetGeneral().SetTag().SetStr("nuc");
933  unit_test_util::ChangeId(nuc, nuc_id);
935  objects::CBioseq_Handle bsh = seh.GetSeq();
936  int offset = 1;
937  string id_label;
938  CRef<objects::CSeq_id> new_prot_id = edit::GetNewProtId(bsh, offset, id_label, true);
939  BOOST_CHECK_EQUAL(id_label, "gnl|TEST:nuc_1");
940  BOOST_CHECK_EQUAL(offset, 1);
941 
943  CRef<CSeq_id> nuc_id2(new CSeq_id());
944  nuc_id2->SetGeneral().SetDb("TEST2");
945  nuc_id2->SetGeneral().SetTag().SetStr(string(50, 'A'));
946  unit_test_util::ChangeId(nuc2, nuc_id2);
947  CSeq_entry_Handle seh2 = scope.AddTopLevelSeqEntry(*nuc2);
948  objects::CBioseq_Handle bsh2 = seh2.GetSeq();
949  id_label.clear();
950  CRef<objects::CSeq_id> new_prot_id2 = edit::GetNewProtId(bsh2, offset, id_label, true);
951  BOOST_CHECK_EQUAL(id_label, "gnl|TEST2:624900FCF5A05DAD_1");
952  BOOST_CHECK_EQUAL(offset, 1);
953 }
954 
955 BOOST_AUTO_TEST_CASE(Test_GetNewProtIdFromExistingProt)
956 {
959 
961  CRef<CSeq_id> prot_id(new CSeq_id());
962  prot_id->SetLocal().SetStr("prot");
963  unit_test_util::ChangeId(prot, prot_id);
965  objects::CBioseq_Handle bsh = seh.GetSeq();
966  int offset = 1;
967  string id_label;
968  vector<CRef<objects::CSeq_id> > new_prot_ids = edit::GetNewProtIdFromExistingProt(bsh, offset, id_label);
969  BOOST_CHECK_EQUAL(id_label, "lcl|prot_1");
970  BOOST_CHECK_EQUAL(offset, 1);
971 
972  scope.RemoveTopLevelSeqEntry(seh);
973  id_label.clear();
974  prot_id->SetLocal().SetStr(string(50, 'A'));
975  unit_test_util::ChangeId(prot, prot_id);
976  seh = scope.AddTopLevelSeqEntry(*prot);
977  bsh = seh.GetSeq();
978  new_prot_ids = edit::GetNewProtIdFromExistingProt(bsh, offset, id_label);
979  BOOST_CHECK_EQUAL(id_label, "lcl|624900FCF5A05DAD_1");
980  BOOST_CHECK_EQUAL(offset, 1);
981 }
982 
983 
984 //////////////////////////////////////////////////////////////////////////////////
985 const string sc_TestEntry ="\
986 Seq-entry ::= set {\
987  class nuc-prot,\
988  seq-set {\
989  seq {\
990  id {\
991  genbank {\
992  name \"AF010144\",\
993  accession \"AF010144\",\
994  version 1\
995  },\
996  gi 3002526\
997  },\
998  inst {\
999  repr raw,\
1000  mol rna,\
1001  length 1442,\
1002  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
1003 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
1004 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTATTTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
1005 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
1006 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
1007 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
1008 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
1009 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
1010 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
1011 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
1012 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
1013 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
1014 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
1015 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
1016 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
1017 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
1018 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
1019 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
1020 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
1021  }\
1022  },\
1023  seq {\
1024  id {\
1025  genbank {\
1026  accession \"AAC08737\",\
1027  version 1\
1028  },\
1029  gi 3002527\
1030  },\
1031  inst {\
1032  repr raw,\
1033  mol aa,\
1034  length 375,\
1035  topology not-set,\
1036  seq-data ncbieaa \"MEFSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLILY\
1037 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
1038 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
1039 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
1040 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
1041 LSGWSQTPDLR\"\
1042  },\
1043  annot {\
1044  {\
1045  data ftable {\
1046  {\
1047  data prot {\
1048  name {\
1049  \"neuronal thread protein AD7c-NTP\"\
1050  }\
1051  },\
1052  location int {\
1053  from 0,\
1054  to 374,\
1055  strand plus,\
1056  id gi 3002527\
1057  }\
1058  }\
1059  }\
1060  }\
1061  }\
1062  }\
1063  },\
1064  annot {\
1065  {\
1066  data ftable {\
1067  {\
1068  data cdregion {\
1069  frame two,\
1070  code {\
1071  id 1\
1072  }\
1073  },\
1074  product whole gi 3002527,\
1075  location int {\
1076  from 14,\
1077  to 1141,\
1078  strand plus,\
1079  id gi 3002526\
1080  }\
1081  }\
1082  }\
1083  }\
1084  }\
1085 }";
1086 
1087 
1088 //////////////////////////////////////////////////////////////////////////////////
1089 const string sc_TestEntry2 ="\
1090 Seq-entry ::= set {\
1091  class genbank,\
1092  descr {\
1093  user {\
1094  type str \"NcbiCleanup\",\
1095  data {\
1096  {\
1097  label str \"method\",\
1098  data str \"ExtendedSeqEntryCleanup\"\
1099  },\
1100  {\
1101  label str \"version\",\
1102  data int 1\
1103  },\
1104  {\
1105  label str \"month\",\
1106  data int 9\
1107  },\
1108  {\
1109  label str \"day\",\
1110  data int 15\
1111  },\
1112  {\
1113  label str \"year\",\
1114  data int 2015\
1115  }\
1116  }\
1117  }\
1118  },\
1119  seq-set {\
1120  set {\
1121  class nuc-prot,\
1122  descr {\
1123  source {\
1124  genome genomic,\
1125  org {\
1126  taxname \"Erythranthe lewisii\",\
1127  db {\
1128  {\
1129  db \"taxon\",\
1130  tag id 69919\
1131  }\
1132  },\
1133  orgname {\
1134  name binomial {\
1135  genus \"Erythranthe\",\
1136  species \"lewisii\"\
1137  },\
1138  mod {\
1139  {\
1140  subtype other,\
1141  subname \"inbred line LF10\"\
1142  },\
1143  {\
1144  subtype gb-synonym,\
1145  subname \"Mimulus lewisii\"\
1146  }\
1147  },\
1148  lineage \"Eukaryota; Viridiplantae; Streptophyta; Embryophyta;\
1149  Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae;\
1150  Pentapetalae; asterids; lamiids; Lamiales; Phrymaceae; Erythranthe\",\
1151  gcode 1,\
1152  mgcode 1,\
1153  div \"PLN\",\
1154  pgcode 11\
1155  }\
1156  },\
1157  subtype {\
1158  {\
1159  subtype country,\
1160  name \"USA\"\
1161  },\
1162  {\
1163  subtype collection-date,\
1164  name \"2000\"\
1165  },\
1166  {\
1167  subtype collected-by,\
1168  name \"A. Angert\"\
1169  }\
1170  }\
1171  },\
1172  pub {\
1173  pub {\
1174  gen {\
1175  cit \"Unpublished\",\
1176  authors {\
1177  names std {\
1178  {\
1179  name name {\
1180  last \"Sagawa\",\
1181  first \"Janelle\",\
1182  initials \"J.M.\"\
1183  }\
1184  },\
1185  {\
1186  name name {\
1187  last \"Stanley\",\
1188  first \"Lauren\",\
1189  initials \"L.E.\"\
1190  }\
1191  },\
1192  {\
1193  name name {\
1194  last \"LaFountain\",\
1195  first \"Amy\",\
1196  initials \"A.M.\"\
1197  }\
1198  },\
1199  {\
1200  name name {\
1201  last \"Frank\",\
1202  first \"Harry\",\
1203  initials \"H.A.\"\
1204  }\
1205  },\
1206  {\
1207  name name {\
1208  last \"Liu\",\
1209  first \"Chang\",\
1210  initials \"C.\"\
1211  }\
1212  },\
1213  {\
1214  name name {\
1215  last \"Yuan\",\
1216  first \"Yao-Wu\",\
1217  initials \"Y.-W.\"\
1218  }\
1219  }\
1220  }\
1221  },\
1222  title \"Transcriptional control of floral carotenoid pigmentation\"\
1223  }\
1224  }\
1225  },\
1226  pub {\
1227  pub {\
1228  sub {\
1229  authors {\
1230  names std {\
1231  {\
1232  name name {\
1233  last \"Sagawa\",\
1234  first \"Janelle\",\
1235  initials \"J.M.\"\
1236  }\
1237  },\
1238  {\
1239  name name {\
1240  last \"Stanley\",\
1241  first \"Lauren\",\
1242  initials \"L.E.\"\
1243  }\
1244  },\
1245  {\
1246  name name {\
1247  last \"LaFountain\",\
1248  first \"Amy\",\
1249  initials \"A.M.\"\
1250  }\
1251  },\
1252  {\
1253  name name {\
1254  last \"Frank\",\
1255  first \"Harry\",\
1256  initials \"H.A.\"\
1257  }\
1258  },\
1259  {\
1260  name name {\
1261  last \"Liu\",\
1262  first \"Chang\",\
1263  initials \"C.\"\
1264  }\
1265  },\
1266  {\
1267  name name {\
1268  last \"Yuan\",\
1269  first \"Yao-Wu\",\
1270  initials \"Y.-W.\"\
1271  }\
1272  }\
1273  },\
1274  affil std {\
1275  affil \"University of Connecticut\",\
1276  div \"Department of Ecology and Evolutionary Biology\",\
1277  city \"Storrs\",\
1278  sub \"CT\",\
1279  country \"USA\",\
1280  street \"75 N. Eagleville Road, Unit 3043\",\
1281  postal-code \"06269-3043\"\
1282  }\
1283  },\
1284  medium email,\
1285  date std {\
1286  year 2015,\
1287  month 4,\
1288  day 1\
1289  }\
1290  }\
1291  }\
1292  },\
1293  user {\
1294  class \"SMART_V1.0\",\
1295  type id 1,\
1296  data {\
1297  {\
1298  label id 1,\
1299  num 1,\
1300  data int 9436144\
1301  }\
1302  }\
1303  },\
1304  user {\
1305  type str \"Submission\",\
1306  data {\
1307  {\
1308  label str \"SmartComment\",\
1309  data str \"ALT EMAIL:yaowu.yuan@uconn.edu\"\
1310  },\
1311  {\
1312  label str \"AdditionalComment\",\
1313  data str \"BankIt1812495\"\
1314  }\
1315  }\
1316  },\
1317  user {\
1318  type str \"StructuredComment\",\
1319  data {\
1320  {\
1321  label str \"StructuredCommentPrefix\",\
1322  data str \"##Assembly-Data-START##\"\
1323  },\
1324  {\
1325  label str \"Assembly Method\",\
1326  data str \"CLC Genomics Workbench v. v. 6\"\
1327  },\
1328  {\
1329  label str \"Sequencing Technology\",\
1330  data str \"Illumina\"\
1331  },\
1332  {\
1333  label str \"StructuredCommentSuffix\",\
1334  data str \"##Assembly-Data-END##\"\
1335  }\
1336  }\
1337  },\
1338  user {\
1339  type str \"Submission\",\
1340  data {\
1341  {\
1342  label str \"SmartComment\",\
1343  data str \"TOTAL # OF SEQS:17\"\
1344  }\
1345  }\
1346  },\
1347  user {\
1348  type str \"Submission\",\
1349  data {\
1350  {\
1351  label str \"AdditionalComment\",\
1352  data str \"GAP: unknown length: introns\"\
1353  }\
1354  }\
1355  },\
1356  update-date std {\
1357  year 2015,\
1358  month 9,\
1359  day 15\
1360  }\
1361  },\
1362  seq-set {\
1363  seq {\
1364  id {\
1365  local str \"Seq2\",\
1366  general {\
1367  db \"BankIt\",\
1368  tag str \"1812495/Seq2\"\
1369  },\
1370  general {\
1371  db \"TMSMART\",\
1372  tag id 53779088\
1373  },\
1374  genbank {\
1375  accession \"KR053166\"\
1376  }\
1377  },\
1378  descr {\
1379  title \"Erythranthe lewisii phytoene synthase 1 (PSY1) gene,\
1380  complete cds.\",\
1381  molinfo {\
1382  biomol genomic\
1383  },\
1384  user {\
1385  type str \"Submission\",\
1386  data {\
1387  {\
1388  label str \"AdditionalComment\",\
1389  data str \"LocalID:Seq2\"\
1390  }\
1391  }\
1392  },\
1393  user {\
1394  type str \"OriginalID\",\
1395  data {\
1396  {\
1397  label str \"LocalId\",\
1398  data str \"Seq2\"\
1399  }\
1400  }\
1401  }\
1402  },\
1403  inst {\
1404  repr delta,\
1405  mol dna,\
1406  length 4644,\
1407  strand ds,\
1408  ext delta {\
1409  literal {\
1410  length 4644,\
1411  seq-data ncbi2na '5145D47243BA80CEC080F1884947E9069464C6FFF451\
1412 E6977753F8C29CD7FE53EAC3F6C46EDF7C777DC70FC1FC3C73FFF3DCED40DCA4E35C7EAB6B4313\
1413 FCC45D41C2C76FEBE473DD35CFFFFFFFF3FE03DAFFBFA0EC7C1CFCCF7F01D3E0E7106FF30C0733\
1414 5DF43F113FFB7E7AC7F123407ED3138F3783F2FCCDD5B14FE4173FCD00ECBF170ECFF62E1ECF74\
1415 0200000070E24BBFEC3361E23BC3C3172CFF4300CD0FAFC86C140FD034BEDD03E3D31D98AF72D6\
1416 BCD703FA53FF7620018C593087CC60F00D1C37D3710334B4F77D3F888EED1F3FF44113B03E0B93\
1417 FCCFD8333CBCF3EC0FC3F3FFF7F0F43803D8FF6FFFBFFFC000200803849CE2C4018E83782F4802\
1418 082316CA09406AD3C9002BA8543620375352C00BA00AB3C0FF512E803AFB123D527040CE0D1D0F\
1419 72D34DB6DB78108088800ED2D33033CBD8100200200074B0982002245DF7F910A2402A18BABFF7\
1420 DBE8FFF7908B3104B88417DD7D050D02BFFFCB3FFFDFBF38FCEF4FFEFCEC0DEF9ED53037AFEF1A\
1421 2F3CC00F803BF9733008C9F706EECFE4530900AE23F3D3CFB3D4BE1F0FCC0B9D0154FD170ACC8F\
1422 61870FBEFD2F4FFFF300F7092FDF2AA1FFF5EF3FFAA3FFCBA0F4BDEC8FCCCFFFFF3EBFCFEF9209\
1423 D4404AD43E7B9081E28F7E0F407E96F7D8FFF5BF287BF6040DF733D0152FC028A28480D730F7F3\
1424 0A3F0D3B7BE7FBEEAEB77547D62F7406A16CF5D8F5F4896FD42C420DE3490C904A7010EB44008A\
1425 603F653BE88300923DDEE24324EF271D49EB88D97CDF58900AFC63BEDF024A49DEBC22123800B6\
1426 2628FE82E0168CFBDF5AA1EC17EF8B827CE367B6E0BBB9A0CE4086FF176B0F107BFE3C4F2FCEFF\
1427 038420D300EEE2B0F4FFBEE08102BC3CFAFFADBFFF1281BE78E11588A6089CDEA63B3AC21DEDFF\
1428 3B133B843FF347DA3FF3FFFED0C0F2ED9D03ABD017783F4A0FBAF3340FFFEDDBF7F06200203ACF\
1429 A48E0E8E2CF0FFEB3A4BC3300EA34D13D13538F94070F4F3E51C5022CAC08FA053BCB7634EF2DD\
1430 8F000100E00024534ED0A02014F87B710EEFF3BF4FFBF70B60003E75CFF7C2D0C7D1FF7EECDFDF\
1431 BE7F3E34CFFBC0D4FEEF3404BBAECA2048E27EF8EA5039344CF1D7127FA32BA8A729F8633FD26A\
1432 657FE339D8E79FCD631EF18AFD7BE3352BC8FB9B7EFFB53F78EFB0C3E9030C8F0F82D3333D7FDA\
1433 8B8BFF6B87F17360FD3D5FFD7249F40DFEE72FBA683FB2EC7E7F7E0DC3CC1C71E66EEEEFFECFEC\
1434 0E738D3E333F411060C7F3B9253F02338F86A3883A17BA0B76B1081F638BECDCC7BC73B27AC7BA\
1435 8F8E2EE5CF3A9324560D109441E22ECC439E7FA4F2B7660D2F840CF7488EFA208B0BFBDBFFFF01\
1436 C049E3DE73EC93CCF6F3C67BB9F33FB6803FB83FFB3CB90A62822ECDC592860FA64A6A1CD63823\
1437 33D9EA02E12303A283F4E08240F98A98A03DF638E4889AED1A27499E72C8E96B0B3F40283CFD21\
1438 8BFC0F48600A48CC7343D3604FF3BC8822F31CEE31802E007F2B4792E183CE202F13830083809D\
1439 20C0DBFFFFDB75B7DF9CEDADA7C0202F8D5BBFF92BBA936F9F7B361031E862320901871041F462\
1440 8A9B3BC90160823DF91E51F933902DDFBD7534D40534DC77EC084E2C81034D0D33330373DBC783\
1441 CC4BDE0A80000000'H\
1442  }\
1443  }\
1444  },\
1445  annot {\
1446  {\
1447  data ftable {\
1448  {\
1449  data gene {\
1450  locus \"PSY1\"\
1451  },\
1452  comment \"MlPSY1\",\
1453  location int {\
1454  from 0,\
1455  to 4643,\
1456  strand plus,\
1457  id local str \"Seq2\"\
1458  }\
1459  },\
1460  {\
1461  data imp {\
1462  key \"5'UTR\"\
1463  },\
1464  location mix {\
1465  int {\
1466  from 0,\
1467  to 130,\
1468  strand plus,\
1469  id local str \"Seq2\"\
1470  },\
1471  int {\
1472  from 970,\
1473  to 1399,\
1474  strand plus,\
1475  id local str \"Seq2\"\
1476  },\
1477  int {\
1478  from 1800,\
1479  to 1971,\
1480  strand plus,\
1481  id local str \"Seq2\"\
1482  }\
1483  }\
1484  },\
1485  {\
1486  data imp {\
1487  key \"3'UTR\"\
1488  },\
1489  location int {\
1490  from 4565,\
1491  to 4643,\
1492  strand plus,\
1493  id genbank {\
1494  accession \"KR053166\"\
1495  }\
1496  }\
1497  }\
1498  }\
1499  }\
1500  }\
1501  },\
1502  seq {\
1503  id {\
1504  local str \"Seq2_prot_2\",\
1505  general {\
1506  db \"TMSMART\",\
1507  tag id 53779089\
1508  }\
1509  },\
1510  descr {\
1511  title \"phytoene synthase 1 [Erythranthe lewisii]\",\
1512  molinfo {\
1513  biomol peptide,\
1514  completeness complete\
1515  },\
1516  user {\
1517  type str \"OriginalID\",\
1518  data {\
1519  {\
1520  label str \"LocalId\",\
1521  data str \"Seq2_prot_2\"\
1522  }\
1523  }\
1524  }\
1525  },\
1526  inst {\
1527  repr raw,\
1528  mol aa,\
1529  length 417,\
1530  seq-data ncbieaa \"MSVALLWVVSPTSEFSNGTVFLDSFRAVSKYKNLISNSNRLNNGHKKR\
1531 RNFAMLENKSRFSVSNSMLATPAGEIALSSEQKVYDVVLKQAALVKRQMKKSSEDLEVKPDIVLPGTVTLLSEAYDRC\
1532 REVCAEYAKTFYLGTLLMTPERRRAIWAMYVWCRRTDELVDGPNASHITPTALDRWEARLDDIFSGRPFDMLDAALSD\
1533 TVTRFPVDIQPFKDMIDGMRMDLWKSRYKNFDELYLYCYYVAGTVGLMSVPIMGIAPESQATTESVYNAALALGLANQ\
1534 LTNILRDVGEDARRGRVYLPQDELAQAGLSDEDIFAGKVTDKWRNFMKKQIARARKFFDDAESGVTELSAASRWPVWA\
1535 SLLLYRQILDEIEANDYNNFTRRAYVSKPKKILALPLAYAKSLVPPSSKPSSTLVKT\"\
1536  },\
1537  annot {\
1538  {\
1539  data ftable {\
1540  {\
1541  data prot {\
1542  name {\
1543  \"phytoene synthase 1\"\
1544  }\
1545  },\
1546  location int {\
1547  from 0,\
1548  to 416,\
1549  id local str \"Seq2_prot_2\"\
1550  }\
1551  }\
1552  }\
1553  }\
1554  }\
1555  }\
1556  },\
1557  annot {\
1558  {\
1559  data ftable {\
1560  {\
1561  data cdregion {\
1562  frame one,\
1563  code {\
1564  id 1\
1565  }\
1566  },\
1567  product whole local str \"Seq2_prot_2\",\
1568  location packed-int {\
1569  {\
1570  from 1972,\
1571  to 2389,\
1572  strand plus,\
1573  id genbank {\
1574  accession \"KR053166\"\
1575  }\
1576  },\
1577  {\
1578  from 2516,\
1579  to 2566,\
1580  strand plus,\
1581  id genbank {\
1582  accession \"KR053166\"\
1583  }\
1584  },\
1585  {\
1586  from 3101,\
1587  to 3273,\
1588  strand plus,\
1589  id genbank {\
1590  accession \"KR053166\"\
1591  }\
1592  },\
1593  {\
1594  from 3570,\
1595  to 3805,\
1596  strand plus,\
1597  id genbank {\
1598  accession \"KR053166\"\
1599  }\
1600  },\
1601  {\
1602  from 3921,\
1603  to 4113,\
1604  strand plus,\
1605  id genbank {\
1606  accession \"KR053166\"\
1607  }\
1608  },\
1609  {\
1610  from 4382,\
1611  to 4564,\
1612  strand plus,\
1613  id genbank {\
1614  accession \"KR053166\"\
1615  }\
1616  }\
1617  }\
1618  }\
1619  }\
1620  }\
1621  }\
1622  }\
1623  }\
1624 }";
1625 
1626 
1627 const string sc_mrna_loc = "\
1628 Seq-loc ::= packed-int {\
1629  {\
1630  from 0,\
1631  to 130,\
1632  strand plus,\
1633  id genbank {\
1634  accession \"KR053166\"\
1635  }\
1636  },\
1637  {\
1638  from 970,\
1639  to 1399,\
1640  strand plus,\
1641  id genbank {\
1642  accession \"KR053166\"\
1643  }\
1644  },\
1645  {\
1646  from 1800,\
1647  to 2389,\
1648  strand plus,\
1649  id genbank {\
1650  accession \"KR053166\"\
1651  }\
1652  },\
1653  {\
1654  from 2516,\
1655  to 2566,\
1656  strand plus,\
1657  id genbank {\
1658  accession \"KR053166\"\
1659  }\
1660  },\
1661  {\
1662  from 3101,\
1663  to 3273,\
1664  strand plus,\
1665  id genbank {\
1666  accession \"KR053166\"\
1667  }\
1668  },\
1669  {\
1670  from 3570,\
1671  to 3805,\
1672  strand plus,\
1673  id genbank {\
1674  accession \"KR053166\"\
1675  }\
1676  },\
1677  {\
1678  from 3921,\
1679  to 4113,\
1680  strand plus,\
1681  id genbank {\
1682  accession \"KR053166\"\
1683  }\
1684  },\
1685  {\
1686  from 4382,\
1687  to 4643,\
1688  strand plus,\
1689  id genbank {\
1690  accession \"KR053166\"\
1691  }\
1692  }\
1693 }";
1694 
1695 
1696 
1697 const string sc_mrna1 = "Seq-feat ::= {\
1698  data rna {\
1699  type mRNA,\
1700  ext name \"F-box protein\"\
1701  },\
1702  partial TRUE,\
1703  location int {\
1704  from 0,\
1705  to 1012,\
1706  strand plus,\
1707  id local str \"S7_haplotype\",\
1708  fuzz-from lim lt\
1709  }\
1710 }";
1711 
1712 const string sc_mrna2 = "Seq-feat ::= {\
1713  data rna {\
1714  type mRNA,\
1715  ext name \"S haplotype-specific F-box7\"\
1716  },\
1717  partial FALSE,\
1718  location mix {\
1719  int {\
1720  from 13002,\
1721  to 13124,\
1722  strand minus,\
1723  id genbank {\
1724  accession \"MH029536\"\
1725  }\
1726  },\
1727  int {\
1728  from 11708,\
1729  to 12916,\
1730  strand minus,\
1731  id genbank {\
1732  accession \"MH029536\"\
1733  }\
1734  }\
1735  }\
1736 }";
1737 
1738 const string sc_mrna3 = "Seq-feat ::= {\
1739  data rna {\
1740  type mRNA,\
1741  ext name \"ribonuclease\"\
1742  },\
1743  partial FALSE,\
1744  location mix {\
1745  int {\
1746  from 7628,\
1747  to 7756,\
1748  strand plus,\
1749  id local str \"S7_haplotype\"\
1750  },\
1751  int {\
1752  from 8032,\
1753  to 8219,\
1754  strand plus,\
1755  id local str \"S7_haplotype\"\
1756  },\
1757  int {\
1758  from 9745,\
1759  to 10179,\
1760  strand plus,\
1761  id local str \"S7_haplotype\"\
1762  }\
1763  }\
1764 }";
1765 
1766 // cat mrna_cds_exon.asn |sed 's/"/\\"/g'|sed 's/$/\\/g'
1769 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool ExtendCDSToStopCodon(CSeq_feat &cds, CScope &scope)
ExtendCDSToStopCodon A function to extend a CDS location to the first in-frame stop codon in the prot...
Definition: cds_fix.cpp:835
bool AdjustProteinFeaturePartialsToMatchCDS(CSeq_feat &new_prot, const CSeq_feat &cds)
AdjustProteinFeaturePartialsToMatchCDS A function to change an existing MolInfo to match a coding reg...
Definition: cds_fix.cpp:398
CRef< CSeq_feat > MakemRNAforCDS(const CSeq_feat &cds, CScope &scope)
MakemRNAforCDS A function to create a CSeq_feat that represents the appropriate mRNA for a given CDS.
Definition: cds_fix.cpp:525
bool TruncateCDSAtStop(CSeq_feat &cds, CScope &scope)
TruncateCDSAtStop A function to truncate a CDS location after the first stop codon in the protein tra...
Definition: cds_fix.cpp:786
CRef< objects::CSeq_id > GetNewProtId(objects::CBioseq_Handle bsh, int &offset, string &id_label, bool general_only)
vector< CRef< objects::CSeq_id > > GetNewProtIdFromExistingProt(objects::CBioseq_Handle bsh, int &offset, string &id_label)
CRef< CGenetic_code > GetGeneticCodeForBioseq(CBioseq_Handle bh)
GetGeneticCodeForBioseq A function to construct the appropriate CGenetic_code object to use when cons...
Definition: cds_fix.cpp:707
bool DemoteCDSToNucSeq(objects::CSeq_feat_Handle &orig_feat)
CArgs –.
Definition: ncbiargs.hpp:379
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
CCdregion –.
Definition: Cdregion.hpp:66
CFeat_CI –.
Definition: feat_ci.hpp:64
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
bool ApplyToString(string &result, const CMatchString &str, CConstRef< CString_constraint > find) const
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
CSeq_annot_Handle –.
CSeq_entry_Handle –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
bool ApplyToString(string &result, const CMatchString &str) const
CWord_substitution_set –.
#define test(a, b, c, d, e)
Definition: numeric.c:170
int offset
Definition: replacements.h:160
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
bool PromoteCDSToNucProtSet(objects::CSeq_feat_Handle &orig_feat)
Promotes coding region from Seq-annot on nucleotide sequence to Seq-annot on nuc-prot-set if necessar...
Definition: feature.cpp:3914
bool AdjustFeaturePartialFlagForLocation(CSeq_feat &new_feat)
AdjustFeaturePartialFlagForLocation A function to ensure that Seq-feat.partial is set if either end o...
Definition: feature.cpp:3983
bool AdjustForCDSPartials(const CSeq_feat &cds, CSeq_entry_Handle seh)
AdjustForCDSPartials A function to make all of the necessary related changes to a Seq-entry after the...
Definition: feature.cpp:4115
bool AdjustProteinMolInfoToMatchCDS(CMolInfo &molinfo, const CSeq_feat &cds)
AdjustProteinMolInfoToMatchCDS A function to change an existing MolInfo to match a coding region.
Definition: feature.cpp:4024
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
CConstRef< CSeq_feat > GetmRNAforCDS(const CSeq_feat &cds, CScope &scope)
GetmRNAforCDS A function to find a CSeq_feat representing the appropriate mRNA for a given CDS.
Definition: sequence.cpp:1261
void RemoveSeq_annot(const CSeq_annot_Handle &annot)
Revoke Seq-annot previously added using AddSeq_annot().
Definition: scope.cpp:388
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
Definition: scope.cpp:200
CSeq_annot_Handle AddSeq_annot(CSeq_annot &annot, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add Seq-annot, return its CSeq_annot_Handle.
Definition: scope.cpp:538
void RemoveTopLevelSeqEntry(const CTSE_Handle &entry)
Revoke TSE previously added using AddTopLevelSeqEntry() or AddBioseq().
Definition: scope.cpp:376
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
TSeq GetSeq(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CSeqFeatData::ESubtype GetFeatSubtype(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
Definition: ncbiobj.hpp:1385
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType & GetObject(void)
Get object.
Definition: ncbiobj.hpp:1011
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
void SetTag(TTag &value)
Assign a value to Tag data member.
Definition: Dbtag_.cpp:66
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
void SetDb(const TDb &value)
Assign a value to Db data member.
Definition: Dbtag_.hpp:229
void SetReplace(const TReplace &value)
Assign a value to Replace data member.
TSimple_replace & SetSimple_replace(void)
Select the variant.
void SetFind(TFind &value)
Assign a value to Find data member.
void SetWeasel_to_putative(TWeasel_to_putative value)
Assign a value to Weasel_to_putative data member.
THaem_replace & SetHaem_replace(void)
Select the variant.
void SetReplace(TReplace &value)
Assign a value to Replace data member.
@ eString_location_equals
@ eString_location_starts
@ eString_location_ends
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
Definition: Seq_feat_.hpp:1037
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
void ResetCode_break(void)
Reset Code_break data member.
Definition: Cdregion_.cpp:80
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
Definition: Seq_feat_.hpp:943
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
void ResetComment(void)
Reset Comment data member.
Definition: Seq_feat_.cpp:99
const TCdregion & GetCdregion(void) const
Get the variant data.
const TComment & GetComment(void) const
Get the Comment member data.
Definition: Seq_feat_.hpp:1049
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
Definition: Cdregion_.hpp:733
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
Definition: Cdregion_.hpp:721
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
TGeneral & SetGeneral(void)
Select the variant.
Definition: Seq_id_.cpp:375
TLocal & SetLocal(void)
Select the variant.
Definition: Seq_id_.cpp:199
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
void SetClass(TClass value)
Assign a value to Class data member.
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
Definition: MolInfo_.hpp:594
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
TMolinfo & SetMolinfo(void)
Select the variant.
Definition: Seqdesc_.cpp:594
@ eCompleteness_unknown
Definition: MolInfo_.hpp:155
@ eCompleteness_complete
complete biological entity
Definition: MolInfo_.hpp:156
@ eCompleteness_no_left
missing 5' or NH3 end
Definition: MolInfo_.hpp:158
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
The Object manager core.
Definition: inftrees.h:24
Utility stuff for more convenient using of Boost.Test library.
static bool s_debugMode
NCBITEST_INIT_TREE()
NCBITEST_INIT_CMDLINE(arg_desc)
void CheckTerminalExceptionResults(CSeq_feat &cds, CScope &scope, bool strict, bool extend, bool expected_rval, bool set_codebreak, bool set_comment, TSeqPos expected_endpoint)
const string sc_TestEntry2
const string sc_mrna2
const string sc_TestEntry3
const string sc_mrna1
BOOST_AUTO_TEST_CASE(Test_AddTerminalTranslationException)
const string sc_TestEntry
NCBITEST_AUTO_INIT()
const string sc_mrna_loc
const string sc_mrna3
void OneTerminalTranslationExceptionTest(bool strict, bool extend, TSeqPos endpoint, const string &seq, bool expected_rval, bool set_codebreak, bool set_comment, TSeqPos expected_endpoint)
#define STANDARD_SETUP
CRef< objects::CSeq_feat > AddGoodImpFeat(CRef< objects::CSeq_entry > entry, string key)
void SetGenome(CRef< objects::CSeq_entry > entry, objects::CBioSource::TGenome genome)
CRef< objects::CSeq_feat > GetCDSFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_annot > AddFeat(CRef< objects::CSeq_feat > feat, CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > BuildGoodSeq(void)
CRef< objects::CSeq_feat > AddMiscFeature(CRef< objects::CSeq_entry > entry)
void ChangeId(CRef< objects::CSeq_annot > annot, CRef< objects::CSeq_id > id)
void SetMGcode(CRef< objects::CSeq_entry > entry, objects::COrgName::TGcode mgcode)
CRef< objects::CSeq_feat > GetProtFeatFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > GetProteinSequenceFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
void SetPGcode(CRef< objects::CSeq_entry > entry, objects::COrgName::TGcode pgcode)
CRef< objects::CSeq_entry > BuildGoodNucProtSet(void)
CRef< objects::CSeq_feat > MakemRNAForCDS(CRef< objects::CSeq_feat > feat)
CRef< objects::CSeq_entry > GetNucleotideSequenceFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
void RevComp(objects::CBioseq &bioseq)
void SetGcode(CRef< objects::CSeq_entry > entry, objects::COrgName::TGcode gcode)
CRef< CCmdComposite > SetTranslExcept(objects::CSeq_entry_Handle seh, const string &comment, bool strict, bool extend, bool adjust_gene)
Modified on Sun Apr 14 05:29:20 2024 by modify_doxy.py rev. 669887