NCBI C++ ToolKit
unit_test_seq_translator.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_seq_translator.cpp 93060 2021-03-03 17:57:41Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Pavel Ivanov, NCBI
27 *
28 * File Description:
29 * Sample unit tests file for main stream test developing.
30 *
31 * This file represents basic most common usage of Ncbi.Test framework based
32 * on Boost.Test framework. For more advanced techniques look into another
33 * sample - unit_test_alt_sample.cpp.
34 *
35 * ===========================================================================
36 */
37 
38 #include <ncbi_pch.hpp>
39 
40 #include <corelib/ncbi_system.hpp>
41 
42 // This macro should be defined before inclusion of test_boost.hpp in all
43 // "*.cpp" files inside executable except one. It is like function main() for
44 // non-Boost.Test executables is defined only in one *.cpp file - other files
45 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
46 // then test_boost.hpp will define such "main()" function for tests.
47 //
48 // Usually if your unit tests contain only one *.cpp file you should not
49 // care about this macro at all.
50 //
51 //#define NCBI_BOOST_NO_AUTO_TEST_MAIN
52 
53 
54 // This header must be included before all Boost.Test headers if there are any
55 #include <corelib/test_boost.hpp>
56 
59 #include <objmgr/scope.hpp>
60 #include <objmgr/bioseq_ci.hpp>
61 #include <objmgr/feat_ci.hpp>
62 #include <objmgr/seq_vector.hpp>
63 #include <objmgr/util/sequence.hpp>
65 #include <objects/seq/Seq_inst.hpp>
66 #include <objects/seq/Seq_ext.hpp>
73 
76 
77 extern const string sc_TestEntry; //
78 extern const string sc_TestEntry_code_break; //
79 extern const string sc_TestEntry_alt_frame; //
80 extern const string sc_TestEntry_internal_stop; //
81 extern const string sc_TestEntry_5prime_partial;
82 extern const string sc_TestEntry_3prime_partial;
83 extern const string sc_TestEntry_5prime_partial_minus;
84 extern const string sc_TestEntry_TerminalTranslExcept;
85 extern const string sc_TestEntry_ShortCDS;
86 extern const string sc_TestEntry_FirstCodon;
87 extern const string sc_TestEntry_FirstCodon2;
88 extern const string sc_TestEntry_GapInSeq1;
89 extern const string sc_TestEntry_GapInSeq2;
90 extern const string sc_TestEntry_GapInSeq3;
91 extern const string sc_TestEntry_GapInSeq4;
92 extern const string sc_TestEntry_GapInSeq5;
93 extern const string sc_TestEntry_CodeBreakForStopCodon;
94 extern const string sc_TestEntry_GB_2236;
95 
96 static string GetProteinString (CFeat_CI fi, CScope& scope)
97 {
98  string real_prot_seq;
99  CBioseq_Handle bsh =
100  scope.GetBioseqHandle(*(fi->GetProduct().GetId()));
102  vec.SetCoding(CSeq_data::e_Ncbieaa); // allow extensions
103  vec.GetSeqData(0, bsh.GetBioseqLength(), real_prot_seq);
104  return real_prot_seq;
105 }
106 
107 
108 #ifdef TEST_DEPRECATED
109 // removed, CCdregion_translate::TranslateCdregion is deprecated, so discontinue unit test
110 BOOST_AUTO_TEST_CASE(Test_TranslateCdregion)
111 {
112  CSeq_entry entry;
113  {{
115  istr >> MSerial_AsnText >> entry;
116  }}
117 
119  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
120  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
121  CFeat_CI feat_iter(*bs_iter,
122  SAnnotSelector().IncludeFeatSubtype
124  for ( ; feat_iter; ++feat_iter) {
125  ///
126  /// retrieve the actual protein sequence
127  ///
128  string real_prot_seq = GetProteinString(feat_iter, scope);
129 
130  ///
131  /// translate the CDRegion directly
132  ///
133  string tmp;
134 
135  /// use CCdregion_translate
136  tmp.clear();
138  (tmp, feat_iter->GetOriginalFeature(), scope, false);
139  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
140 
141  /// use CCdregion_translate, include the stop codon
142  real_prot_seq += '*';
143  tmp.clear();
145  (tmp, feat_iter->GetOriginalFeature(), scope, true);
146  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
147  }
148  }
149 }
150 #endif
151 
152 
153 BOOST_AUTO_TEST_CASE(Test_Translator_Raw)
154 {
155  CSeq_entry entry;
156  {{
158  istr >> MSerial_AsnText >> entry;
159  }}
160 
162  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
163  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
164  CBioseq_Handle bsh = *bs_iter;
166 
167  CFeat_CI feat_iter(*bs_iter,
168  SAnnotSelector().IncludeFeatSubtype
170  for ( ; feat_iter; ++feat_iter) {
171  ///
172  /// retrieve the actual protein sequence
173  ///
174  string real_prot_seq = GetProteinString (feat_iter, scope);
175 
176  string nucleotide_sequence;
177  vec.GetSeqData(feat_iter->GetTotalRange().GetFrom(),
178  feat_iter->GetTotalRange().GetTo() + 1,
179  nucleotide_sequence);
180 
181  ///
182  /// translate the CDRegion directly
183  ///
184  string tmp;
185 
186  /// use CSeqTranslator::Translate()
187  tmp.clear();
188  CSeqTranslator::Translate(nucleotide_sequence, tmp,
191 
192  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
193 
194  /// use CSeqTranslator::Translate(), include the stop codon
195  real_prot_seq += '*';
196  tmp.clear();
198 
199  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
200  }
201  }
202 }
203 
204 
205 BOOST_AUTO_TEST_CASE(Test_Translator_CSeqVector)
206 {
207  CSeq_entry entry;
208  {{
210  istr >> MSerial_AsnText >> entry;
211  }}
212 
214  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
215  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
216  CFeat_CI feat_iter(*bs_iter,
217  SAnnotSelector().IncludeFeatSubtype
219  for ( ; feat_iter; ++feat_iter) {
220  ///
221  /// retrieve the actual protein sequence
222  ///
223  string real_prot_seq = GetProteinString (feat_iter, scope);
224 
225 
226  CSeqVector vec(feat_iter->GetLocation(), scope);
227 
228  ///
229  /// translate the CDRegion directly
230  ///
231  string tmp;
232 
233  /// use CSeqTranslator::Translate()
234  tmp.clear();
238  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
239 
240  /// use CSeqTranslator::Translate()
241  real_prot_seq += '*';
242  tmp.clear();
244  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
245  }
246  }
247 }
248 
249 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_loc_1)
250 {
251  CSeq_entry entry;
252  {{
254  istr >> MSerial_AsnText >> entry;
255  }}
256 
258  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
259  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
260  CFeat_CI feat_iter(*bs_iter,
261  SAnnotSelector().IncludeFeatSubtype
263  for ( ; feat_iter; ++feat_iter) {
264  ///
265  /// retrieve the actual protein sequence
266  ///
267  string real_prot_seq = GetProteinString (feat_iter, scope);
268 
269  ///
270  /// translate the CDRegion directly
271  ///
272  string tmp;
273 
274  /// use CSeqTranslator::Translate()
275  tmp.clear();
276  CSeqTranslator::Translate(feat_iter->GetLocation(), bs_iter->GetScope(), tmp,
277  NULL, false, true);
278  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
279 
280  /// use CSeqTranslator::Translate()
281  real_prot_seq += '*';
282  tmp.clear();
283  CSeqTranslator::Translate(feat_iter->GetLocation(), bs_iter->GetScope(), tmp,
284  NULL, true, true);
285  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
286  }
287  }
288 }
289 
290 
291 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_loc_2)
292 {
293  CSeq_entry entry;
294  {{
296  istr >> MSerial_AsnText >> entry;
297  }}
298 
300  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
301  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
302  CFeat_CI feat_iter(*bs_iter,
303  SAnnotSelector().IncludeFeatSubtype
305  for ( ; feat_iter; ++feat_iter) {
306  ///
307  /// retrieve the actual protein sequence
308  ///
309  string real_prot_seq = GetProteinString (feat_iter, scope);
310 
311  ///
312  /// translate the CDRegion directly
313  ///
314  string tmp;
315 
316  /// use CSeqTranslator::Translate()
317  tmp.clear();
318  CSeqTranslator::Translate(feat_iter->GetLocation(), scope, tmp,
319  NULL, false, true);
320  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
321 
322  /// use CSeqTranslator::Translate()
323  real_prot_seq += '*';
324  tmp.clear();
325  CSeqTranslator::Translate(feat_iter->GetLocation(), scope, tmp,
326  NULL, true, true);
327  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
328  }
329  }
330 }
331 
332 
333 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat)
334 {
335  CSeq_entry entry;
336  {{
338  istr >> MSerial_AsnText >> entry;
339  }}
340 
342  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
343  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
344  CFeat_CI feat_iter(*bs_iter,
345  SAnnotSelector().IncludeFeatSubtype
347  for ( ; feat_iter; ++feat_iter) {
348  ///
349  /// retrieve the actual protein sequence
350  ///
351  string real_prot_seq = GetProteinString (feat_iter, scope);
352 
353  ///
354  /// translate the CDRegion directly
355  ///
356  string tmp;
357 
358  /// use CSeqTranslator::Translate()
359  tmp.clear();
361  scope, tmp,
362  false, true);
363  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
364 
365  /// use CSeqTranslator::Translate()
366  real_prot_seq += '*';
367  tmp.clear();
369  scope, tmp,
370  true, true);
371  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
372  }
373  }
374 }
375 
376 
377 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_code_break)
378 {
379  CSeq_entry entry;
380  {{
382  istr >> MSerial_AsnText >> entry;
383  }}
384 
386  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
387  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
388  CFeat_CI feat_iter(*bs_iter,
389  SAnnotSelector().IncludeFeatSubtype
391  for ( ; feat_iter; ++feat_iter) {
392  ///
393  /// retrieve the actual protein sequence
394  ///
395  string real_prot_seq = GetProteinString (feat_iter, scope);
396 
397  ///
398  /// translate the CDRegion directly
399  ///
400  string tmp;
401 
402  /// use CSeqTranslator::Translate()
403  tmp.clear();
405  scope, tmp,
406  false, true);
407  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
408 
409  /// use CSeqTranslator::Translate()
410  real_prot_seq += '*';
411  tmp.clear();
413  scope, tmp,
414  true, true);
415  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
416  }
417  }
418 }
419 
420 
421 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_alt_frame)
422 {
423  CSeq_entry entry;
424  {{
426  istr >> MSerial_AsnText >> entry;
427  }}
428 
430  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
431  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
432  CFeat_CI feat_iter(*bs_iter,
433  SAnnotSelector().IncludeFeatSubtype
435  for ( ; feat_iter; ++feat_iter) {
436  ///
437  /// retrieve the actual protein sequence
438  ///
439  string real_prot_seq = GetProteinString (feat_iter, scope);
440 
441  ///
442  /// translate the CDRegion directly
443  ///
444  string tmp;
445 
446  /// use CSeqTranslator::Translate()
447  tmp.clear();
449  scope, tmp,
450  false, true);
451  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
452 
453  /// use CSeqTranslator::Translate()
454  real_prot_seq += '*';
455  tmp.clear();
457  scope, tmp,
458  true, true);
459  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
460  }
461  }
462 }
463 
464 
465 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_internal_stop)
466 {
467  CSeq_entry entry;
468  {{
470  istr >> MSerial_AsnText >> entry;
471  }}
472 
474  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
475  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
476  CFeat_CI feat_iter(*bs_iter,
477  SAnnotSelector().IncludeFeatSubtype
479  for ( ; feat_iter; ++feat_iter) {
480  ///
481  /// retrieve the actual protein sequence
482  ///
483  string real_prot_seq = GetProteinString (feat_iter, scope);
484  real_prot_seq[51] = '*';
485 
486  ///
487  /// translate the CDRegion directly
488  ///
489  string tmp;
490 
491  /// use CSeqTranslator::Translate()
492  real_prot_seq += '*';
493  tmp.clear();
495  scope, tmp,
496  true /*include stops*/,
497  true /*remove trailing X*/);
498  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
499 
500  /// use CSeqTranslator::Translate()
501  tmp.clear();
502  real_prot_seq.erase(real_prot_seq.find_first_of("*"));
504  scope, tmp,
505  false /*include stops*/,
506  true /*remove trailing X*/);
507  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
508 
509  }
510  }
511 }
512 
513 
514 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_5prime_partial)
515 {
516  CSeq_entry entry;
517  {{
519  istr >> MSerial_AsnText >> entry;
520  }}
521 
523  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
524  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
525  CFeat_CI feat_iter(*bs_iter,
526  SAnnotSelector().IncludeFeatSubtype
528  for ( ; feat_iter; ++feat_iter) {
529  ///
530  /// retrieve the actual protein sequence
531  ///
532  string real_prot_seq = GetProteinString (feat_iter, scope);
533 
534  ///
535  /// translate the CDRegion directly
536  ///
537  string tmp;
538 
539  /// use CSeqTranslator::Translate()
540  real_prot_seq += '*';
541  tmp.clear();
543  scope, tmp,
544  true /*include stops*/,
545  true /*remove trailing X*/);
546  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
547 
548  /// use CSeqTranslator::Translate()
549  real_prot_seq.erase(real_prot_seq.find_first_of("*"));
550  tmp.clear();
552  scope, tmp,
553  false /*include stops*/,
554  true /*remove trailing X*/);
555  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
556  for (size_t i = 0; i < real_prot_seq.size() && i < tmp.size(); ++i) {
557  if (real_prot_seq[i] != tmp[i]) {
558  ERR_POST(Error << "char " << i << ": "
559  << real_prot_seq[i] << " != "
560  << tmp[i]);
561  }
562  }
563  }
564  }
565 }
566 
567 
568 
569 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_3prime_partial)
570 {
571  CSeq_entry entry;
572  {{
574  istr >> MSerial_AsnText >> entry;
575  }}
576 
578  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
579  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
580  CFeat_CI feat_iter(*bs_iter,
581  SAnnotSelector().IncludeFeatSubtype
583  for ( ; feat_iter; ++feat_iter) {
584  ///
585  /// retrieve the actual protein sequence
586  ///
587  string real_prot_seq = GetProteinString (feat_iter, scope);
588  real_prot_seq[51] = '*';
589 
590  ///
591  /// translate the CDRegion directly
592  ///
593  string tmp;
594 
595  /// use CSeqTranslator::Translate()
596  tmp.clear();
598  scope, tmp,
599  true, true);
600  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
601  }
602  }
603 }
604 
605 
606 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_5prime_partial_minus)
607 {
608  CSeq_entry entry;
609  {{
611  istr >> MSerial_AsnText >> entry;
612  }}
613 
615  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
616  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
617  CFeat_CI feat_iter(*bs_iter,
618  SAnnotSelector().IncludeFeatSubtype
620  for ( ; feat_iter; ++feat_iter) {
621  ///
622  /// retrieve the actual protein sequence
623  ///
624  string real_prot_seq = GetProteinString (feat_iter, scope);
625 
626  ///
627  /// translate the CDRegion directly
628  ///
629  string tmp;
630 
631  /// use CSeqTranslator::Translate()
632  tmp.clear();
634  scope, tmp,
635  false, true);
636  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
637  for (size_t i = 0; i < real_prot_seq.size() && i < tmp.size(); ++i) {
638  if (real_prot_seq[i] != tmp[i]) {
639  ERR_POST(Error << "char " << i << ": "
640  << real_prot_seq[i] << " != "
641  << tmp[i]);
642  }
643  }
644 
645  /// use CSeqTranslator::Translate()
646  tmp.clear();
648  scope, tmp,
649  true, true);
650  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
651  }
652  }
653 }
654 
655 
656 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_TerminalTranslExcept)
657 {
658  CSeq_entry entry;
659  {{
661  istr >> MSerial_AsnText >> entry;
662  }}
663 
665  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
666  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
667  CFeat_CI feat_iter(*bs_iter,
668  SAnnotSelector().IncludeFeatSubtype
670  for ( ; feat_iter; ++feat_iter) {
671  ///
672  /// retrieve the actual protein sequence
673  ///
674  string real_prot_seq = GetProteinString (feat_iter, scope);
675 
676  ///
677  /// translate the CDRegion directly
678  ///
679  string tmp;
680 
681  /// use CSeqTranslator::Translate()
682  tmp.clear();
684  scope, tmp,
685  false, true);
686  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
687  for (size_t i = 0; i < real_prot_seq.size() && i < tmp.size(); ++i) {
688  if (real_prot_seq[i] != tmp[i]) {
689  ERR_POST(Error << "char " << i << ": "
690  << real_prot_seq[i] << " != "
691  << tmp[i]);
692  }
693  }
694 
695  /// use CSeqTranslator::Translate()
696  real_prot_seq += '*';
697  tmp.clear();
699  scope, tmp,
700  true, true);
701  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
702  }
703  }
704 }
705 
706 
707 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_ShortCDS)
708 {
709  CSeq_entry entry;
710  {{
712  istr >> MSerial_AsnText >> entry;
713  }}
714 
716  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
717  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
718  CFeat_CI feat_iter(*bs_iter,
719  SAnnotSelector().IncludeFeatSubtype
721  for ( ; feat_iter; ++feat_iter) {
722 
723  ///
724  /// translate the CDRegion directly
725  ///
726  string tmp;
727 
728  /// use CSeqTranslator::Translate()
729  tmp.clear();
731  scope, tmp,
732  false, true);
733  BOOST_CHECK_EQUAL("-", tmp);
734  }
735  }
736 }
737 
738 
739 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_FirstCodon)
740 {
741  CSeq_entry entry;
742  {{
744  istr >> MSerial_AsnText >> entry;
745  }}
746 
748  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
749 
750  CRef<CSeq_feat> feat (new CSeq_feat());
751  feat->SetData().SetCdregion();
752  feat->SetLocation().SetInt().SetId().SetLocal().SetStr("FirstCodon");
753  feat->SetLocation().SetInt().SetFrom(0);
754  feat->SetLocation().SetInt().SetTo(38);
755  CRef<CSeq_annot> annot(new CSeq_annot());
756  annot->SetData().SetFtable().push_back(feat);
757  entry.SetSeq().SetAnnot().push_back(annot);
758 
759  string tmp;
760  string complete_trans = "-MGMCFLRGWKGV";
761  string partial_trans = "KMGMCFLRGWKGV";
762 
763  // translate with vector
764  tmp.clear();
765  CSeqVector vec(feat->GetLocation(), scope);
766  // default value for 5' complete is true
769  BOOST_CHECK_EQUAL(complete_trans, tmp);
770  // try it with flag version
771  tmp.clear();
773  BOOST_CHECK_EQUAL(complete_trans, tmp);
774 
775  // set 5' complete false
776  tmp.clear();
777 #ifdef TEST_DEPRECATED
779  NULL, false, true, 0, false);
780  BOOST_CHECK_EQUAL(partial_trans, tmp);
781 #endif
782  // try it with flag version
783  tmp.clear();
785  BOOST_CHECK_EQUAL(partial_trans, tmp);
786 
787  // translate with string
788  string seq_str;
789  vec.GetSeqData(0, entry.GetSeq().GetLength(), seq_str);
790  // default value for 5' complete is true
791 #ifdef TEST_DEPRECATED
793  NULL, false, true);
794  BOOST_CHECK_EQUAL(complete_trans, tmp);
795 #endif
796  // try it with flag version
797  tmp.clear();
799  BOOST_CHECK_EQUAL(complete_trans, tmp);
800 
801  // set 5' complete false
802  tmp.clear();
803 #ifdef TEST_DEPRECATED
805  NULL, false, true, 0, false);
806  BOOST_CHECK_EQUAL(partial_trans, tmp);
807 #endif
808  // try it with flag version
809  tmp.clear();
811  BOOST_CHECK_EQUAL(partial_trans, tmp);
812 
813 
814  ///
815  /// translate the CDRegion directly
816  ///
817 
818  /// use CSeqTranslator::Translate()
819  tmp.clear();
821  scope, tmp,
822  false, true);
823  BOOST_CHECK_EQUAL(complete_trans, tmp);
824 
825  // if partial, should translate first codon
826  feat->SetLocation().SetPartialStart(true, eExtreme_Biological);
827  tmp.clear();
829  scope, tmp,
830  false, true);
831  BOOST_CHECK_EQUAL(partial_trans, tmp);
832 
833 
834 
835 }
836 
837 
838 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_FirstCodon2)
839 {
840  // here, the first codon translates to M if complete, because it's an alternate start,
841  // but L if partial
842  CSeq_entry entry;
843  {{
845  istr >> MSerial_AsnText >> entry;
846  }}
847 
849  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
850 
851  CRef<CSeq_feat> feat (new CSeq_feat());
852  feat->SetData().SetCdregion();
853  feat->SetLocation().SetInt().SetId().SetLocal().SetStr("FirstCodon2");
854  feat->SetLocation().SetInt().SetFrom(0);
855  feat->SetLocation().SetInt().SetTo(26);
856  CRef<CSeq_annot> annot(new CSeq_annot());
857  annot->SetData().SetFtable().push_back(feat);
858  entry.SetSeq().SetAnnot().push_back(annot);
859 
860  string tmp;
861  string complete_trans = "MP*K*E*N*";
862  string partial_trans = "LP*K*E*N*";
863 
864  // translate with vector
865  tmp.clear();
866  CSeqVector vec(feat->GetLocation(), scope);
867 
868  //
869  // default value for 5' complete is true
870 #ifdef TEST_DEPRECATED
872  NULL, true, true);
873  BOOST_CHECK_EQUAL(complete_trans, tmp);
874 #endif
875 
876  // try it with flag version
877  tmp.clear();
879  BOOST_CHECK_EQUAL(complete_trans, tmp);
880 
881  //
882  // set 5' complete false
883  tmp.clear();
884 #ifdef TEST_DEPRECATED
886  NULL, true, true, 0, false);
887  BOOST_CHECK_EQUAL(partial_trans, tmp);
888 #endif
889 
890  // try it with flag version
891  tmp.clear();
893  BOOST_CHECK_EQUAL(partial_trans, tmp);
894 
895 
896  // translate with string
897  string seq_str;
898  vec.GetSeqData(0, entry.GetSeq().GetLength(), seq_str);
899  // default value for 5' complete is true
900 #ifdef TEST_DEPRECATED
902  NULL, true, true);
903  BOOST_CHECK_EQUAL(complete_trans, tmp);
904 #endif
905  // try it with flag version
906  tmp.clear();
907  CSeqTranslator::Translate(seq_str, tmp, 0);
908  BOOST_CHECK_EQUAL(complete_trans, tmp);
909 
910  // set 5' complete false
911  tmp.clear();
912 #ifdef TEST_DEPRECATED
914  NULL, true, true, 0, false);
915  BOOST_CHECK_EQUAL(partial_trans, tmp);
916 #endif
917  // try it with flag version
918  tmp.clear();
920  BOOST_CHECK_EQUAL(partial_trans, tmp);
921 
922 
923  ///
924  /// translate the CDRegion directly
925  ///
926 
927  /// use CSeqTranslator::Translate()
928  tmp.clear();
930  scope, tmp,
931  true, true);
932  BOOST_CHECK_EQUAL(complete_trans, tmp);
933 
934  // if partial, should translate first codon
935  feat->SetLocation().SetPartialStart(true, eExtreme_Biological);
936  tmp.clear();
938  scope, tmp,
939  true, true);
940  BOOST_CHECK_EQUAL(partial_trans, tmp);
941 
942 }
943 
944 
945 static void CheckTranslatedBioseq (CRef<CBioseq> bioseq, string seg1, bool mid_fuzz, string seg2)
946 {
947  if (bioseq) {
948  BOOST_CHECK_EQUAL(CSeq_inst::eRepr_delta, bioseq->GetInst().GetRepr());
949  if (bioseq->GetInst().IsSetExt()
950  && bioseq->GetInst().GetExt().IsDelta()) {
951  CDelta_ext::Tdata::iterator seg_it = bioseq->SetInst().SetExt().SetDelta().Set().begin();
952  CRef<CDelta_seq> seg = *seg_it;
953  const CSeq_literal& lit1 = seg->GetLiteral();
954  string p1 = lit1.GetSeq_data().GetIupacaa().Get();
955  BOOST_CHECK_EQUAL(seg1, p1);
956 
957  ++seg_it;
958  if (seg_it != bioseq->SetInst().SetExt().SetDelta().Set().end()) {
959  seg = *seg_it;
960 
961  BOOST_CHECK_EQUAL(true, seg->GetLiteral().GetSeq_data().IsGap());
962  BOOST_CHECK_EQUAL(mid_fuzz, seg->GetLiteral().IsSetFuzz());
963  ++seg_it;
964  } else {
965  BOOST_CHECK_EQUAL("Missing segment", "Missing segment in Bioseq");
966  }
967 
968  if (seg_it != bioseq->SetInst().SetExt().SetDelta().Set().end()) {
969  seg = *seg_it;
970  const CSeq_literal& lit2 = seg->GetLiteral();
971  string p2 = lit2.GetSeq_data().GetIupacaa().Get();
972  BOOST_CHECK_EQUAL(seg2, p2);
973  } else {
974  BOOST_CHECK_EQUAL("Missing segment", "Missing segment in Bioseq");
975  }
976  } else {
977  BOOST_CHECK_EQUAL("Expected delta seq", "Result not delta seq");
978  }
979  } else {
980  BOOST_CHECK_EQUAL("Expected Bioseq creation", "Bioseq creation failed");
981  }
982 }
983 
984 
985 static void CheckTranslatedBioseq (CRef<CBioseq> bioseq, string seqdata)
986 {
987  if (bioseq) {
988  BOOST_CHECK_EQUAL(CSeq_inst::eRepr_raw, bioseq->GetInst().GetRepr());
989  if (bioseq->GetInst().IsSetSeq_data()) {
990  if (bioseq->GetInst().GetSeq_data().IsIupacaa()) {
991  BOOST_CHECK_EQUAL(seqdata, bioseq->GetInst().GetSeq_data().GetIupacaa().Get());
992  } else if (bioseq->GetInst().GetSeq_data().IsNcbieaa()) {
993  BOOST_CHECK_EQUAL(seqdata, bioseq->GetInst().GetSeq_data().GetNcbieaa().Get());
994  } else {
995  BOOST_CHECK_EQUAL("Unexpected encoding", "Result not Iupacaa or Ncbieaa");
996  }
997  } else {
998  BOOST_CHECK_EQUAL("Expected raw seq", "Result not raw seq");
999  }
1000  } else {
1001  BOOST_CHECK_EQUAL("Expected Bioseq creation", "Bioseq creation failed");
1002  }
1003 }
1004 
1005 
1006 static void SetLocationSkipGap (CRef<CSeq_feat> feat, const CBioseq& bioseq)
1007 {
1008  string local_id = bioseq.GetId().front()->GetLocal().GetStr();
1009 
1010  feat->ResetLocation();
1011  CDelta_ext::Tdata::const_iterator nuc_it = bioseq.GetInst().GetExt().GetDelta().Get().begin();
1012  size_t pos = 0;
1013  while (nuc_it != bioseq.GetInst().GetExt().GetDelta().Get().end()) {
1014  size_t lit_len = (*nuc_it)->GetLiteral().GetLength();
1015  if ((*nuc_it)->GetLiteral().IsSetSeq_data() && (*nuc_it)->GetLiteral().GetSeq_data().IsIupacna()) {
1016  CRef<CSeq_id> id(new CSeq_id());
1017  id->SetLocal().SetStr(local_id);
1018  feat->SetLocation().SetMix().AddInterval(*id, pos, pos + lit_len - 1);
1019  }
1020  pos += lit_len;
1021  ++nuc_it;
1022  }
1023 }
1024 
1025 
1026 static void TestOneGapSeq(const string& asn, string seg1, string seg2)
1027 {
1028  CSeq_entry entry;
1029  {{
1030  CNcbiIstrstream istr(asn);
1031  istr >> MSerial_AsnText >> entry;
1032  }}
1033 
1034  string local_id = entry.GetSeq().GetId().front()->GetLocal().GetStr();
1035 
1036  CRef<CSeq_feat> feat (new CSeq_feat());
1037  feat->SetData().SetCdregion();
1038  feat->SetLocation().SetInt().SetId().SetLocal().SetStr(local_id);
1039  feat->SetLocation().SetInt().SetFrom(0);
1040  feat->SetLocation().SetInt().SetTo(entry.GetSeq().GetLength() - 1);
1041  CRef<CSeq_annot> annot(new CSeq_annot());
1042  annot->SetData().SetFtable().push_back(feat);
1043  entry.SetSeq().SetAnnot().push_back(annot);
1044 
1046  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1047 
1048  CRef<CBioseq> bioseq = CSeqTranslator::TranslateToProtein(*feat, scope);
1049  CheckTranslatedBioseq (bioseq, seg1, false, seg2);
1050 
1051  // take sequence out of scope, so that change in fuzz will be noted
1052  scope.RemoveTopLevelSeqEntry(seh);
1053  CDelta_ext::Tdata::iterator nuc_it = entry.SetSeq().SetInst().SetExt().SetDelta().Set().begin();
1054  ++nuc_it;
1055  CRef<CDelta_seq> nuc_mid = *nuc_it;
1056  nuc_mid->SetLiteral().SetFuzz().SetLim(CInt_fuzz::eLim_unk);
1057  seh = scope.AddTopLevelSeqEntry(entry);
1058 
1059  bioseq = CSeqTranslator::TranslateToProtein(*feat, scope);
1060  CheckTranslatedBioseq (bioseq, seg1, true, seg2);
1061 }
1062 
1063 
1064 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_GapInSeq)
1065 {
1066  TestOneGapSeq (sc_TestEntry_GapInSeq1, "MPK", "PK");
1067  // try with gap not on codon boundary
1068  TestOneGapSeq (sc_TestEntry_GapInSeq2, "MPX", "XPK");
1069  // try with 2 leftover nt, no stop codon
1070  TestOneGapSeq (sc_TestEntry_GapInSeq3, "MPK", "PKI");
1071 
1072  // try with coding region that has gap in intron
1073  CSeq_entry entry;
1074  {{
1076  istr >> MSerial_AsnText >> entry;
1077  }}
1078 
1080  CRef<CSeq_feat> feat (new CSeq_feat());
1081  feat->SetData().SetCdregion();
1082  SetLocationSkipGap (feat, entry.SetSeq());
1083  CRef<CSeq_annot> annot(new CSeq_annot());
1084  annot->SetData().SetFtable().push_back(feat);
1085  entry.SetSeq().SetAnnot().push_back(annot);
1086  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1087 
1088  CRef<CBioseq> bioseq = CSeqTranslator::TranslateToProtein(*feat, scope);
1089  CheckTranslatedBioseq (bioseq, "MPKPK");
1090 }
1091 
1092 
1093 BOOST_AUTO_TEST_CASE(Test_Translator_CSeq_feat_ZeroGap)
1094 {
1095  // try with coding region that has zero-length gap
1096 
1097  CSeq_entry entry;
1098  {{
1100  istr >> MSerial_AsnText >> entry;
1101  }}
1102 
1104  CRef<CSeq_feat> feat (new CSeq_feat());
1105  feat->SetData().SetCdregion();
1106  feat->SetLocation().SetInt().SetId().SetLocal().SetStr("GapInSeq5");
1107  feat->SetLocation().SetInt().SetFrom(0);
1108  feat->SetLocation().SetInt().SetTo(17);
1109  CRef<CSeq_annot> annot(new CSeq_annot());
1110  annot->SetData().SetFtable().push_back(feat);
1111  entry.SetSeq().SetAnnot().push_back(annot);
1112  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1113 
1114  CRef<CBioseq> bioseq = CSeqTranslator::TranslateToProtein(*feat, scope);
1115  CheckTranslatedBioseq (bioseq, "MPK", true, "PK");
1116 }
1117 
1118 
1119 
1120 BOOST_AUTO_TEST_CASE(Test_Translate_CodeBreakForStopCodon)
1121 {
1122  CSeq_entry entry;
1123  {{
1125  istr >> MSerial_AsnText >> entry;
1126  }}
1127 
1128  CRef<CBioseq> prot(new CBioseq);
1129  prot->SetId().push_back(CRef<CSeq_id>(new CSeq_id("gnl|GNOMON|912063.p")));
1130 
1132  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1133  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
1134  CFeat_CI feat_iter(*bs_iter,
1135  SAnnotSelector().IncludeFeatSubtype
1137  for ( ; feat_iter; ++feat_iter) {
1138  ///
1139  /// retrieve the actual protein sequence
1140  ///
1141  string real_prot_seq = GetProteinString (feat_iter, scope);
1142 
1143  ///
1144  /// translate the CDRegion directly
1145  ///
1146  string tmp;
1147 
1148  tmp.clear();
1150  (feat_iter->GetOriginalFeature(), scope, tmp, false);
1151  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
1152 
1153 
1154  /// use CCdregion_translate, include the stop codon
1155  //NOTE: the test case lacks a trailing stop!
1156  //real_prot_seq += '*';
1157  tmp.clear();
1159  (feat_iter->GetOriginalFeature(), scope, tmp, true);
1160  BOOST_CHECK_EQUAL(real_prot_seq, tmp);
1161 
1162  prot->SetInst().SetRepr(CSeq_inst::eRepr_raw);
1163  prot->SetInst().SetMol(CSeq_inst::eMol_aa);
1164  prot->SetInst().SetLength(tmp.size());
1165  prot->SetInst().SetSeq_data().SetNcbieaa().Set(tmp);
1166  }
1167  }
1168 
1169  /**
1170  CRef<CSeq_entry> nuc_se(new CSeq_entry);
1171  nuc_se->Assign(entry);
1172 
1173  CRef<CSeq_entry> prot_se(new CSeq_entry);
1174  prot_se->SetSeq(*prot);
1175 
1176  CRef<CSeq_entry> e(new CSeq_entry);
1177  e->SetSet().SetSeq_set().push_back(nuc_se);
1178  e->SetSet().SetSeq_set().push_back(prot_se);
1179  cerr << MSerial_AsnText << *e;
1180  **/
1181 }
1182 
1183 
1184 BOOST_AUTO_TEST_CASE(Test_FindBestFrame)
1185 {
1186  CSeq_entry entry;
1187  {{
1189  istr >> MSerial_AsnText >> entry;
1190  }}
1191 
1192  CRef<CSeq_feat> cds = entry.SetSet().SetAnnot().front()->SetData().SetFtable().front();
1193 
1195  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1196 
1197  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope), CCdregion::eFrame_one);
1198  cds->SetLocation().SetInt().SetFrom(15);
1199  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope), CCdregion::eFrame_three);
1200  cds->SetLocation().SetInt().SetFrom(16);
1201  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope), CCdregion::eFrame_two);
1202 }
1203 
1204 const string sc_TestBestFrameEntry ="\
1205 Seq-entry ::= seq {\
1206  id { local str \"nuc1\" } , \
1207  inst { repr raw, mol dna, length 45,\
1208  seq-data iupacna \"TTTTTATGGAGTAATCGCTAACTTGTAATGCCCAGGCTGGAGTGC\"\
1209  },\
1210  annot { { data ftable {\
1211  {\
1212  data cdregion { frame one, code { id 1 } },\
1213  location int { from 5, to 43, id local str \"nuc1\" }\
1214  }\
1215  } } }\
1216 }";
1217 
1218 
1219 BOOST_AUTO_TEST_CASE(Test_FindFrame2)
1220 {
1221  CSeq_entry entry;
1222  // only change if new frame has no internal stops
1223  {{
1225  istr >> MSerial_AsnText >> entry;
1226  }}
1227  CRef<CSeq_feat> cds = entry.SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1228 
1230  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1231  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope), CCdregion::eFrame_one);
1232 
1233  bool ambiguous = false;
1234  CSeqTranslator::FindBestFrame(*cds, scope, ambiguous);
1235  BOOST_CHECK_EQUAL(ambiguous, false);
1236 }
1237 
1238 
1240 Seq-entry ::= seq {\
1241  id { local str \"nuc1\" } , \
1242  inst { repr raw, mol dna, length 45,\
1243  seq-data iupacna \"TTTTTATGGAGAAATCGCAAACTTGAAATGCCCAGGCTGGAGTGC\"\
1244  },\
1245  annot { { data ftable {\
1246  {\
1247  data cdregion { frame one, code { id 1 } },\
1248  location int { from 5, to 43, id local str \"nuc1\" }\
1249  }\
1250  } } }\
1251 }";
1252 
1253 
1254 BOOST_AUTO_TEST_CASE(Test_FindFrame3)
1255 {
1256  CSeq_entry entry;
1257  // only change if new frame has no internal stops
1258  {{
1260  istr >> MSerial_AsnText >> entry;
1261  }}
1262  CRef<CSeq_feat> cds = entry.SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1263 
1265  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1266  bool ambiguous = false;
1267  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope, ambiguous), CCdregion::eFrame_two);
1268  BOOST_CHECK_EQUAL(ambiguous, true);
1269 }
1270 
1271 
1272 const string sc_TestSQD_4334_1 ="\
1273 Seq-entry ::= seq {\
1274  id { local str \"nuc1\" } , \
1275  inst { repr raw, mol dna, length 14,\
1276  seq-data iupacna \"ATGGGGTTTATAAA\"\
1277  },\
1278  annot { { data ftable {\
1279  {\
1280  data cdregion { frame two, code { id 1 } },\
1281  location int { from 0, to 14, id local str \"nuc1\" }\
1282  }\
1283  } } }\
1284 }";
1285 
1286 
1287 const string sc_TestSQD_4334_2 ="\
1288 Seq-entry ::= seq {\
1289  id { local str \"nuc1\" } , \
1290  inst { repr raw, mol dna, length 14,\
1291  seq-data iupacna \"ATGGGGTTTATAAA\"\
1292  },\
1293  annot { { data ftable {\
1294  {\
1295  data cdregion { frame two, code { id 1 } },\
1296  location int { from 0, to 13, id local str \"nuc1\" }\
1297  }\
1298  } } }\
1299 }";
1300 
1301 BOOST_AUTO_TEST_CASE(Test_SQD_4334)
1302 {
1303  CSeq_entry entry;
1304  // internal stop plus partial untranslatable codon at the end does not count
1305  {{
1307  istr >> MSerial_AsnText >> entry;
1308  }}
1309  CRef<CSeq_feat> cds = entry.SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1310 
1312  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1313  bool ambiguous = false;
1314  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope, ambiguous), CCdregion::eFrame_one);
1315  BOOST_CHECK_EQUAL(ambiguous, false);
1316  scope.RemoveTopLevelSeqEntry(seh);
1317 
1318  {{
1320  istr >> MSerial_AsnText >> entry;
1321  }}
1322  cds = entry.SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1323  seh = scope.AddTopLevelSeqEntry(entry);
1324  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope, ambiguous), CCdregion::eFrame_one);
1325  BOOST_CHECK_EQUAL(ambiguous, false);
1326 
1327 }
1328 
1329 
1330 
1332 Seq-entry ::= seq {\
1333  id { local str \"nuc1\" } , \
1334  inst { repr raw, mol dna, length 60,\
1335  seq-data iupacna \"cagtttccctcaaatcactctttggcaacgaccccttgtcacagtaaaaataggaggaca\"\
1336  },\
1337  annot { { data ftable {\
1338  {\
1339  data cdregion { frame one, code { id 1 } },\
1340  location int { from 0, to 46, id local str \"nuc1\", fuzz-from lim lt }\
1341  }\
1342  } } }\
1343 }";
1344 
1345 BOOST_AUTO_TEST_CASE(Test_PickFrameWithEndStopIf3Complete)
1346 {
1347  CSeq_entry entry;
1348  // only change if new frame has no internal stops
1349  {{
1351  istr >> MSerial_AsnText >> entry;
1352  }}
1353  CRef<CSeq_feat> cds = entry.SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1354 
1356  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1357  bool ambiguous = false;
1358  BOOST_CHECK_EQUAL(CSeqTranslator::FindBestFrame(*cds, scope, ambiguous), CCdregion::eFrame_three);
1359  BOOST_CHECK_EQUAL(ambiguous, false);
1360 
1361 }
1362 
1363 
1364 const string sc_MinusOrigin = "\
1365 Seq-entry ::= seq {\
1366  id { \
1367  local str \"test\" } , \
1368  inst { \
1369  repr raw , \
1370  mol dna , \
1371  length 20 , \
1372  topology circular , \
1373  seq-data iupacna \"AAAATTTTGGGGCCCCAAAA\" } , \
1374  annot {\
1375  {\
1376  data ftable {\
1377  {\
1378  data cdregion {\
1379  },\
1380  location mix { \
1381  int {\
1382  from 0,\
1383  to 8,\
1384  strand minus,\
1385  id local str \"test\" } , \
1386  int { \
1387  from 17 , \
1388  to 19 , \
1389  strand minus,\
1390  id local str \"test\" } } } , \
1391  {\
1392  data gene {\
1393  },\
1394  location mix { \
1395  int {\
1396  from 0,\
1397  to 8,\
1398  strand minus,\
1399  id local str \"test\" } , \
1400  int { \
1401  from 17 , \
1402  to 19 , \
1403  strand minus,\
1404  id local str \"test\" } \
1405  } \
1406  }\
1407  }\
1408  }\
1409  }\
1410  }\
1411 }";
1412 
1413 
1414 BOOST_AUTO_TEST_CASE(Test_FindOverlappingFeatureForMinusStrandCrossingOrigin)
1415 {
1416 
1417  CSeq_entry entry;
1418  {{
1420  istr >> MSerial_AsnText >> entry;
1421  }}
1422 
1424  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1425  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
1426  CBioseq_Handle bsh = *bs_iter;
1427  CFeat_CI feat_iter(*bs_iter,
1428  SAnnotSelector().IncludeFeatSubtype
1430  for ( ; feat_iter; ++feat_iter) {
1431  size_t num_cds = 0;
1435  ITERATE (sequence::TFeatScores, s, cds) {
1436  num_cds++;
1437  }
1438  BOOST_CHECK_EQUAL(num_cds, 1u);
1439  num_cds = 0;
1440  cds.clear();
1443  ITERATE (sequence::TFeatScores, s, cds) {
1444  num_cds++;
1445  }
1446  BOOST_CHECK_EQUAL(num_cds, 1u);
1447  }
1448  }
1449 }
1450 
1451 
1452 const string sc_TooManyOverlap = "\
1453 Seq-entry ::= seq {\
1454  id { \
1455  local str \"test\" } , \
1456  inst { \
1457  repr raw , \
1458  mol dna , \
1459  length 20 , \
1460  topology circular , \
1461  seq-data iupacna \"AAAATTTTGGGGCCCCAAAA\" } , \
1462  annot {\
1463  {\
1464  data ftable {\
1465  {\
1466  data rna {\
1467  type mRNA },\
1468  partial TRUE , \
1469  location mix { \
1470  int {\
1471  from 0,\
1472  to 19,\
1473  id local str \"test\" } , \
1474  null NULL , \
1475  int { \
1476  from 0 , \
1477  to 19 , \
1478  id gi 1213148 } } } , \
1479  {\
1480  data gene {\
1481  },\
1482  location mix { \
1483  int {\
1484  from 0,\
1485  to 19,\
1486  id local str \"test\" } , \
1487  null NULL , \
1488  int { \
1489  from 0 , \
1490  to 19 , \
1491  id gi 1213148 } \
1492  } \
1493  }\
1494  }\
1495  }\
1496  }\
1497  }\
1498 }";
1499 
1500 
1501 BOOST_AUTO_TEST_CASE(Test_FindOverlappingFeaturesOnMultipleSeqs)
1502 {
1503 
1504  CSeq_entry entry;
1505  {{
1507  istr >> MSerial_AsnText >> entry;
1508  }}
1509 
1511  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1512 
1513  FOR_EACH_ANNOT_ON_BIOSEQ (annot, entry.GetSeq()) {
1514  if ((*annot)->IsFtable()) {
1515  FOR_EACH_FEATURE_ON_ANNOT (feat, **annot) {
1516  if ((*feat)->GetData().IsRna()) {
1517  sequence::TFeatScores gene;
1518  GetOverlappingFeatures ((*feat)->GetLocation(), CSeqFeatData::e_Gene,
1520  BOOST_CHECK_EQUAL(gene.size(), 1u);
1521  } else if ((*feat)->GetData().IsGene()) {
1522  BOOST_CHECK_EQUAL((*feat)->IsSetPartial(), false);
1523  }
1524  }
1525  }
1526  }
1527 
1528 
1529  for (CBioseq_CI bs_iter(seh); bs_iter; ++bs_iter) {
1530  CBioseq_Handle bsh = *bs_iter;
1531 
1532  CFeat_CI mrna_iter(*bs_iter,
1533  SAnnotSelector().IncludeFeatSubtype
1535  for ( ; mrna_iter; ++mrna_iter) {
1536  sequence::TFeatScores gene;
1539  BOOST_CHECK_EQUAL(gene.size(), 1u);
1540  }
1541 
1542  CFeat_CI gene_iter(*bs_iter,
1543  SAnnotSelector().IncludeFeatSubtype
1545  for ( ; gene_iter; ++gene_iter) {
1546  BOOST_CHECK_EQUAL(gene_iter->IsSetPartial(), false);
1547  }
1548 
1549  }
1550 }
1551 
1552 
1554 {
1555  CSeq_entry entry;
1556  {{
1558  istr >> MSerial_AsnText >> entry;
1559  }}
1560 
1562  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(entry);
1563 
1564  CRef<CSeq_feat> cds(new CSeq_feat());
1565 
1566  // set genetic code
1568  ce->SetId(1);
1569  CRef<CGenetic_code> gcode(new CGenetic_code());
1570  cds->SetData().SetCdregion().SetCode().Set().push_back(ce);
1571 
1572  // set location
1573  CRef<CSeq_loc> int1(new CSeq_loc());
1574  int1->SetInt().SetId().Assign(*(entry.GetSeq().GetId().front()));
1575  int1->SetInt().SetFrom(0);
1576  int1->SetInt().SetTo(40);
1577  CRef<CSeq_loc> int2(new CSeq_loc());
1578  int2->SetInt().SetId().Assign(*(entry.GetSeq().GetId().front()));
1579  int2->SetInt().SetFrom(121);
1580  int2->SetInt().SetTo(175);
1581  CRef<CSeq_loc> int3(new CSeq_loc());
1582  int3->SetInt().SetId().Assign(*(entry.GetSeq().GetId().front()));
1583  int3->SetInt().SetFrom(201);
1584  int3->SetInt().SetTo(416);
1585  cds->SetLocation().SetMix().Set().push_back(int1);
1586  cds->SetLocation().SetMix().Set().push_back(int2);
1587  cds->SetLocation().SetMix().Set().push_back(int3);
1588 
1589  CRef<CBioseq> bioseq = CSeqTranslator::TranslateToProtein(*cds, scope);
1590  string seg1 = "-TISGEHGLDSNGVYNGTSELQLERMNVYFNESSHHPASCLLLPSGLLWPDRACPSDAFLVQASGNKYVPRAVLVDLEPGTMDAVRAGPFGQLFRPDNFVFGQS";
1591  CheckTranslatedBioseq (bioseq, seg1);
1592 
1593 }
1594 
1595 
1596 
1597 //////////////////////////////////////////////////////////////////////////////
1598 
1599 const string sc_TestEntry ="\
1600 Seq-entry ::= set {\
1601  class nuc-prot,\
1602  seq-set {\
1603  seq {\
1604  id {\
1605  genbank {\
1606  name \"AF010144\",\
1607  accession \"AF010144\",\
1608  version 1\
1609  },\
1610  gi 3002526\
1611  },\
1612  inst {\
1613  repr raw,\
1614  mol rna,\
1615  length 1442,\
1616  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
1617 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
1618 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTATTTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
1619 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
1620 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
1621 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
1622 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
1623 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
1624 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
1625 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
1626 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
1627 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
1628 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
1629 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
1630 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
1631 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
1632 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
1633 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
1634 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
1635  }\
1636  },\
1637  seq {\
1638  id {\
1639  genbank {\
1640  accession \"AAC08737\",\
1641  version 1\
1642  },\
1643  gi 3002527\
1644  },\
1645  inst {\
1646  repr raw,\
1647  mol aa,\
1648  length 375,\
1649  topology not-set,\
1650  seq-data ncbieaa \"MEFSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLILY\
1651 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
1652 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
1653 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
1654 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
1655 LSGWSQTPDLR\"\
1656  },\
1657  annot {\
1658  {\
1659  data ftable {\
1660  {\
1661  data prot {\
1662  name {\
1663  \"neuronal thread protein AD7c-NTP\"\
1664  }\
1665  },\
1666  location int {\
1667  from 0,\
1668  to 374,\
1669  strand plus,\
1670  id gi 3002527\
1671  }\
1672  }\
1673  }\
1674  }\
1675  }\
1676  }\
1677  },\
1678  annot {\
1679  {\
1680  data ftable {\
1681  {\
1682  data cdregion {\
1683  frame one,\
1684  code {\
1685  id 1\
1686  }\
1687  },\
1688  product whole gi 3002527,\
1689  location int {\
1690  from 14,\
1691  to 1141,\
1692  strand plus,\
1693  id gi 3002526\
1694  }\
1695  }\
1696  }\
1697  }\
1698  }\
1699 }";
1700 
1701 const string sc_TestEntry_code_break ="\
1702 Seq-entry ::= set {\
1703  class nuc-prot,\
1704  seq-set {\
1705  seq {\
1706  id {\
1707  genbank {\
1708  name \"AF010144\",\
1709  accession \"AF010144\",\
1710  version 1\
1711  },\
1712  gi 3002526\
1713  },\
1714  inst {\
1715  repr raw,\
1716  mol rna,\
1717  length 1442,\
1718  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
1719 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
1720 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTATTTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
1721 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
1722 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
1723 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
1724 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
1725 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
1726 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
1727 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
1728 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
1729 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
1730 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
1731 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
1732 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
1733 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
1734 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
1735 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
1736 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
1737  }\
1738  },\
1739  seq {\
1740  id {\
1741  genbank {\
1742  accession \"AAC08737\",\
1743  version 1\
1744  },\
1745  gi 3002527\
1746  },\
1747  inst {\
1748  repr raw,\
1749  mol aa,\
1750  length 375,\
1751  topology not-set,\
1752  seq-data ncbieaa \"MQFSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLILY\
1753 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
1754 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
1755 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
1756 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
1757 LSGWSQTPDLR\"\
1758  },\
1759  annot {\
1760  {\
1761  data ftable {\
1762  {\
1763  data prot {\
1764  name {\
1765  \"neuronal thread protein AD7c-NTP\"\
1766  }\
1767  },\
1768  location int {\
1769  from 0,\
1770  to 374,\
1771  strand plus,\
1772  id gi 3002527\
1773  }\
1774  }\
1775  }\
1776  }\
1777  }\
1778  }\
1779  },\
1780  annot {\
1781  {\
1782  data ftable {\
1783  {\
1784  data cdregion {\
1785  frame one,\
1786  code {\
1787  id 1\
1788  },\
1789  code-break {\
1790  {\
1791  loc int {\
1792  from 17,\
1793  to 19,\
1794  strand plus,\
1795  id gi 3002526\
1796  },\
1797  aa ncbieaa 81\
1798  }\
1799  }\
1800  },\
1801  product whole gi 3002527,\
1802  location int {\
1803  from 14,\
1804  to 1141,\
1805  strand plus,\
1806  id gi 3002526\
1807  }\
1808  }\
1809  }\
1810  }\
1811  }\
1812 }";
1813 
1814 
1815 const string sc_TestEntry_alt_frame ="\
1816 Seq-entry ::= set {\
1817  class nuc-prot,\
1818  seq-set {\
1819  seq {\
1820  id {\
1821  genbank {\
1822  name \"AF010144\",\
1823  accession \"AF010144\",\
1824  version 1\
1825  },\
1826  gi 3002526\
1827  },\
1828  inst {\
1829  repr raw,\
1830  mol rna,\
1831  length 1442,\
1832  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
1833 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
1834 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTATTTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
1835 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
1836 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
1837 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
1838 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
1839 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
1840 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
1841 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
1842 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
1843 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
1844 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
1845 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
1846 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
1847 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
1848 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
1849 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
1850 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
1851  }\
1852  },\
1853  seq {\
1854  id {\
1855  genbank {\
1856  accession \"AAC08737\",\
1857  version 1\
1858  },\
1859  gi 3002527\
1860  },\
1861  inst {\
1862  repr raw,\
1863  mol aa,\
1864  length 375,\
1865  topology not-set,\
1866  seq-data ncbieaa \"MEFSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLILY\
1867 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
1868 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
1869 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
1870 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
1871 LSGWSQTPDLR\"\
1872  },\
1873  annot {\
1874  {\
1875  data ftable {\
1876  {\
1877  data prot {\
1878  name {\
1879  \"neuronal thread protein AD7c-NTP\"\
1880  }\
1881  },\
1882  location int {\
1883  from 0,\
1884  to 374,\
1885  strand plus,\
1886  id gi 3002527\
1887  }\
1888  }\
1889  }\
1890  }\
1891  }\
1892  }\
1893  },\
1894  annot {\
1895  {\
1896  data ftable {\
1897  {\
1898  data cdregion {\
1899  frame two,\
1900  code {\
1901  id 1\
1902  }\
1903  },\
1904  product whole gi 3002527,\
1905  location int {\
1906  from 13,\
1907  to 1141,\
1908  strand plus,\
1909  id gi 3002526\
1910  }\
1911  }\
1912  }\
1913  }\
1914  }\
1915 }";
1916 
1918 Seq-entry ::= set {\
1919  class nuc-prot,\
1920  seq-set {\
1921  seq {\
1922  id {\
1923  genbank {\
1924  name \"AF010144\",\
1925  accession \"AF010144\",\
1926  version 1\
1927  },\
1928  gi 3002526\
1929  },\
1930  inst {\
1931  repr raw,\
1932  mol rna,\
1933  length 1442,\
1934  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
1935 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
1936 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTGATTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
1937 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
1938 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
1939 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
1940 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
1941 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
1942 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
1943 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
1944 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
1945 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
1946 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
1947 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
1948 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
1949 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
1950 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
1951 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
1952 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
1953  }\
1954  },\
1955  seq {\
1956  id {\
1957  genbank {\
1958  accession \"AAC08737\",\
1959  version 1\
1960  },\
1961  gi 3002527\
1962  },\
1963  inst {\
1964  repr raw,\
1965  mol aa,\
1966  length 375,\
1967  topology not-set,\
1968  seq-data ncbieaa \"MEFSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLILX\
1969 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
1970 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
1971 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
1972 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
1973 LSGWSQTPDLR\"\
1974  },\
1975  annot {\
1976  {\
1977  data ftable {\
1978  {\
1979  data prot {\
1980  name {\
1981  \"neuronal thread protein AD7c-NTP\"\
1982  }\
1983  },\
1984  location int {\
1985  from 0,\
1986  to 374,\
1987  strand plus,\
1988  id gi 3002527\
1989  }\
1990  }\
1991  }\
1992  }\
1993  }\
1994  }\
1995  },\
1996  annot {\
1997  {\
1998  data ftable {\
1999  {\
2000  data cdregion {\
2001  frame one,\
2002  code {\
2003  id 1\
2004  }\
2005  },\
2006  product whole gi 3002527,\
2007  location int {\
2008  from 14,\
2009  to 1141,\
2010  strand plus,\
2011  id gi 3002526\
2012  }\
2013  }\
2014  }\
2015  }\
2016  }\
2017 }";
2018 
2020 Seq-entry ::= set {\
2021  class nuc-prot,\
2022  seq-set {\
2023  seq {\
2024  id {\
2025  genbank {\
2026  name \"AF010144\",\
2027  accession \"AF010144\",\
2028  version 1\
2029  },\
2030  gi 3002526\
2031  },\
2032  inst {\
2033  repr raw,\
2034  mol rna,\
2035  length 1442,\
2036  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
2037 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
2038 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTGATTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
2039 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
2040 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
2041 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
2042 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
2043 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
2044 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
2045 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
2046 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
2047 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
2048 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
2049 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
2050 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
2051 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
2052 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
2053 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
2054 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
2055  }\
2056  },\
2057  seq {\
2058  id {\
2059  genbank {\
2060  accession \"AAC08737\",\
2061  version 1\
2062  },\
2063  gi 3002527\
2064  },\
2065  inst {\
2066  repr raw,\
2067  mol aa,\
2068  length 374,\
2069  topology not-set,\
2070  seq-data ncbieaa \"-FSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLIL*\
2071 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
2072 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
2073 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
2074 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
2075 LSGWSQTPDLR\"\
2076  },\
2077  annot {\
2078  {\
2079  data ftable {\
2080  {\
2081  data prot {\
2082  name {\
2083  \"neuronal thread protein AD7c-NTP\"\
2084  }\
2085  },\
2086  location int {\
2087  from 0,\
2088  to 374,\
2089  strand plus,\
2090  id gi 3002527\
2091  }\
2092  }\
2093  }\
2094  }\
2095  }\
2096  }\
2097  },\
2098  annot {\
2099  {\
2100  data ftable {\
2101  {\
2102  data cdregion {\
2103  frame one,\
2104  code {\
2105  id 1\
2106  }\
2107  },\
2108  product whole gi 3002527,\
2109  location int {\
2110  from 17,\
2111  to 1141,\
2112  strand plus,\
2113  id gi 3002526,\
2114  fuzz-from lim tr\
2115  }\
2116  }\
2117  }\
2118  }\
2119  }\
2120 }";
2121 
2123 Seq-entry ::= set {\
2124  class nuc-prot,\
2125  seq-set {\
2126  seq {\
2127  id {\
2128  genbank {\
2129  name \"AF010144\",\
2130  accession \"AF010144\",\
2131  version 1\
2132  },\
2133  gi 3002526\
2134  },\
2135  inst {\
2136  repr raw,\
2137  mol rna,\
2138  length 1442,\
2139  seq-data iupacna \"TTTTTTTTTTTGAGATGGAGTTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAA\
2140 TGGCGCAATCTCAGCTCACCGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCCAGTAGCTGG\
2141 GATTACAGGCATGTGCACCCACGCTCGGCTAATTTTGTGATTTTTTTTAGTAGAGATGGAGTTTCTCCATGTTGGTCA\
2142 GGCTGGTCTCGAACTCCCGACCTCAGATGATCCCTCCGTCTCGGCCTCCCAAAGTGCTAGATACAGGACTGGCCACCA\
2143 TGCCCGGCTCTGCCTGGCTAATTTTTGTGGTAGAAACAGGGTTTCACTGATGTGCCCAAGCTGGTCTCCTGAGCTCAA\
2144 GCAGTCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGCAGCCGTGCCTGGCCTTTTTATTTTATTTTT\
2145 TTTAAGACACAGGTGTCCCACTCTTACCCAGGATGAAGTGCAGTGGTGTGATCACAGCTCACTGCAGCCTTCAACTCC\
2146 TGAGATCAAGCATCCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACCAAAGACATGCACCACTACACCTGGCTAATTTT\
2147 TATTTTTATTTTTAATTTTTTGAGACAGAGTCTCAACTCTGTCACCCAGGCTGGAGTGCAGTGGCGCAATCTTGGCTC\
2148 ACTGCAACCTCTGCCTCCCGGGTTCAAGTTATTCTCCTGCCCCAGCCTCCTGAGTAGCTGGGACTACAGGCGCCCACC\
2149 ACGCCTAGCTAATTTTTTTGTATTTTTAGTAGAGATGGGGTTCACCATGTTCGCCAGGTTGATCTTGATCTCTGGACC\
2150 TTGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCTTATTTTTAATTT\
2151 TTGTTTGTTTGAAATGGAATCTCACTCTGTTACCCAGGCTGGAGTGCAATGGCCAAATCTCGGCTCACTGCAACCTCT\
2152 GCCTCCCGGGCTCAAGCGATTCTCCTGTCTCAGCCTCCCAAGCAGCTGGGATTACGGGCACCTGCCACCACACCCCGC\
2153 TAATTTTTGTATTTTCATTAGAGGCGGGGTTTCACCATATTTGTCAGGCTGGTCTCAAACTCCTGACCTCAGGTGACC\
2154 CACCTGCCTCAGCCTTCCAAAGTGCTGGGATTACAGGCGTGAGCCACCTCACCCAGCCGGCTAATTTAGATAAAAAAA\
2155 TATGTAGCAATGGGGGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTTCTGGCTTCATGCAATCCTTCCAAATGAG\
2156 CCACAACACCCAGCCAGTCACATTTTTTAAACAGTTACATCTTTATTTTAGTATACTAGAAAGTAATACAATAAACAT\
2157 GTCAAACCTGCAAATTCAGTAGTAACAGAGTTCTTTTATAACTTTTAAACAAAGCTTTAGAGCA\"\
2158  }\
2159  },\
2160  seq {\
2161  id {\
2162  genbank {\
2163  accession \"AAC08737\",\
2164  version 1\
2165  },\
2166  gi 3002527\
2167  },\
2168  inst {\
2169  repr raw,\
2170  mol aa,\
2171  length 374,\
2172  topology not-set,\
2173  seq-data ncbieaa \"MEFSLLLPRLECNGAISAHRNLRLPGSSDSPASASPVAGITGMCTHARLILX\
2174 FFLVEMEFLHVGQAGLELPTSDDPSVSASQSARYRTGHHARLCLANFCGRNRVSLMCPSWSPELKQSTCLSLPKCWDY\
2175 RRAAVPGLFILFFLRHRCPTLTQDEVQWCDHSSLQPSTPEIKHPPASASQVAGTKDMHHYTWLIFIFIFNFLRQSLNS\
2176 VTQAGVQWRNLGSLQPLPPGFKLFSCPSLLSSWDYRRPPRLANFFVFLVEMGFTMFARLILISGPCDLPASASQSAGI\
2177 TGVSHHARLIFNFCLFEMESHSVTQAGVQWPNLGSLQPLPPGLKRFSCLSLPSSWDYGHLPPHPANFCIFIRGGVSPY\
2178 LSGWSQTPDL\"\
2179  },\
2180  annot {\
2181  {\
2182  data ftable {\
2183  {\
2184  data prot {\
2185  name {\
2186  \"neuronal thread protein AD7c-NTP\"\
2187  }\
2188  },\
2189  location int {\
2190  from 0,\
2191  to 374,\
2192  strand plus,\
2193  id gi 3002527\
2194  }\
2195  }\
2196  }\
2197  }\
2198  }\
2199  }\
2200  },\
2201  annot {\
2202  {\
2203  data ftable {\
2204  {\
2205  data cdregion {\
2206  frame one,\
2207  code {\
2208  id 1\
2209  }\
2210  },\
2211  product whole gi 3002527,\
2212  location int {\
2213  from 14,\
2214  to 1135,\
2215  strand plus,\
2216  id gi 3002526,\
2217  fuzz-from lim tl\
2218  }\
2219  }\
2220  }\
2221  }\
2222  }\
2223 }";
2224 
2226 Seq-entry ::= set {\
2227  class nuc-prot,\
2228  seq-set {\
2229  seq {\
2230  id {\
2231  local str \"minus_5_prime_partial\" },\
2232  inst {\
2233  repr raw,\
2234  mol dna,\
2235  length 20,\
2236  seq-data iupacna \"AAATTTGGGCAAATTTGGGC\"\
2237  }\
2238  },\
2239  seq {\
2240  id {\
2241  local str \"minus_5_prime_partial_prot\" },\
2242  inst {\
2243  repr raw,\
2244  mol aa,\
2245  length 5,\
2246  seq-data ncbieaa \"-FAQI\"\
2247  },\
2248  annot {\
2249  {\
2250  data ftable {\
2251  {\
2252  data prot {\
2253  name {\
2254  \"test protein\"\
2255  }\
2256  },\
2257  location int {\
2258  from 0,\
2259  to 5,\
2260  strand plus,\
2261  id local str \"minus_5_prime_partial_prot\"\
2262  }\
2263  }\
2264  }\
2265  }\
2266  }\
2267  }\
2268  },\
2269  annot {\
2270  {\
2271  data ftable {\
2272  {\
2273  data cdregion {\
2274  frame one,\
2275  code {\
2276  id 1\
2277  }\
2278  },\
2279  product whole local str \"minus_5_prime_partial_prot\",\
2280  location int {\
2281  from 0,\
2282  to 15,\
2283  strand minus,\
2284  id local str \"minus_5_prime_partial\",\
2285  fuzz-from lim tr\
2286  }\
2287  }\
2288  }\
2289  }\
2290  }\
2291 }";
2292 
2294 Seq-entry ::= set {\
2295  class nuc-prot ,\
2296  descr {\
2297  source {\
2298  genome mitochondrion ,\
2299  org {\
2300  taxname \"Takifugu fasciatus\" ,\
2301  common \"obscure pufferfish\" ,\
2302  db {\
2303  {\
2304  db \"taxon\" ,\
2305  tag\
2306  id 301270 } } ,\
2307  orgname {\
2308  name\
2309  binomial {\
2310  genus \"Takifugu\" ,\
2311  species \"fasciatus\" } ,\
2312  lineage \"Eukaryota; Metazoa; Chordata; Craniata; Vertebrata;\
2313  Euteleostomi; Actinopterygii; Neopterygii; Teleostei; Euteleostei;\
2314  Neoteleostei; Acanthomorpha; Acanthopterygii; Percomorpha; Tetraodontiformes;\
2315  Tetradontoidea; Tetraodontidae; Takifugu\" ,\
2316  gcode 1 ,\
2317  mgcode 2 ,\
2318  div \"VRT\" } } } } ,\
2319  seq-set {\
2320  seq {\
2321  id {\
2322  local\
2323  str \"bankit1246641\" ,\
2324  general {\
2325  db \"BankIt\" ,\
2326  tag\
2327  id 1246641 } ,\
2328  general {\
2329  db \"TMSMART\" ,\
2330  tag\
2331  id 10764938 } ,\
2332  genbank {\
2333  accession \"GQ409967\" } } ,\
2334  descr {\
2335  title \"Takifugu fasciatus mitochondrion, complete genome.\" ,\
2336  molinfo {\
2337  biomol genomic ,\
2338  completeness complete } } ,\
2339  inst {\
2340  repr raw ,\
2341  mol dna ,\
2342  length 16444 ,\
2343  topology circular ,\
2344  strand ds ,\
2345  seq-data\
2346  ncbi2na '9C9B27C140922C78239C23A15E00B564A44027EB5E1FC7041DE3407\
2347 C44E42CD64D52E00E5559556D680CA8BEB34A4440FEC953845C9FE519550A83D24B8C04F094C2E\
2348 007E1F2D38DC08B6B0076E525166BCC622150BEF25069B02AEBC81CC04101E216045F429EF319F\
2349 58241820430602C97470760546027284407A8F2315473971570118CE01C6C4CD65EBC71893C9F0\
2350 0540A1FA6B9FC014DC8A25EFF0058C755BD05D1575FBFC165CCC516DB49715EE0A1032C8403E91\
2351 25001B4AD8AEC983A2AA100EA713DDE5C8811808EE780E44560A28FC92C2420322ED39E0169CE0\
2352 999111656D1DD54077C3F003070C25140200AA290B6C13AC2EC5A0AE47E8005A24C9F049F0245D\
2353 5F1162F8656E40D89E55817049C95515454410147303155C231F07010040D3FC515CB3288C8028\
2354 1CA2732302C590A819E0220380C152C0B00009223C45DB17FE4D38FC9CB30F2902247F2DC11560\
2355 1E0E271D42125FCCA9135B77BA4022E8089FE2C8AE3017162F4BCC9EBE562078B309D25FFA7DF0\
2356 753073F33C1587F0805008BC340AAB1255FE3108041FF04A28C28F3000F0A4799F0B29F2092514\
2357 42026F0277244D4E510314300475C157D5716A7FDCE7D4C8203CE700E2C30AA5855750910BB134\
2358 8181551603D068541408A80C0CF01D10420013F047FD6F15711EBB940CA021C020020A076901D0\
2359 25D97BF14004D977E7D2E0C22B465E57B8733BF06966B3FE16E40AC990D1FB5FC0EE85ECE0E930\
2360 62A7C9EDD7F742D0E07E3755B9209AA30053086208573A27F210004957B43015C0C0A030172E05\
2361 EFF0EDFEBEA9859AAC1000554EE8380115FFC01422D147728C48137850C38D69C0963C18162F15\
2362 CA8C126435DFF22D4CD8422AFC61763BE8D284D70EB9259CF02BDBFBD063C0B5C6E378BD2168B0\
2363 D4AD2FDCDCE0B17FF52C602858008AA50ED4042551DD1F9E0549D0903088B10070B4081384EF2E\
2364 E92256B3E4025F015F6048AF41D7757073871AC70F151F0F4557053CFB57BDF725B27FFC13C3E0\
2365 600B7CAF1390F1802A54136CA17CE875D4153658E9BC07FD340895B595371759D535CFCF3E55C4\
2366 F257C776530C338114397757C5535F87C0DE94F73FB5C9CDDC9FA4B71DCF729D68E25D41D00CE5\
2367 7CCA3771892F91030DD3182C2DDA1C35FF34F8F3FF12907F15F404FC1B454A024FE1C34D506E15\
2368 76490CE3137519C96010162753F85C120AA2D607ACDEAF41BE0CE4A2957F95CF7F72483394135D\
2369 F0C04477510D73D72895C4CE5277D481ED77343F8CD001253FCD5D37D738958975C5463D633852\
2370 70E45D138001F5C51F13C93D3CDE137E47541C4324A5F55740CC0281EE5E0102851FE38AE0DC8A\
2371 BC0F575275F2020A87E054156223401DD2E7D471147F72C0B4AC0C27FCA54C57003A2BC03555FC\
2372 40E057CCF1E5D5FF3FA5E7BCAC41CF107149111E1C3E4E0EA5C80F0457273CF57C325410451518\
2373 90F8251C4031F7C1D09E592490575D764944141978304A50E2071242715152D50414C3C53E4F25\
2374 F0036872559D445E1D520B0C4287217C41E87CD7A513840036553DD5F71C40F4150C140477C35D\
2375 76535DD4C7EDA68E2AA5C0D0150B1B00D725C75D4D951729E0CB4F3DF43DD1415E9EC305724771\
2376 34D304D7517D7E7F4E3010051043E817C10DD3894015634FD3570C55C35F774FA8AFE554707AFD\
2377 3970387CD7D081F100421729304944F2495F2E571C2DFCFDCDC65CD719CC15C14DD540C1784A04\
2378 715E1815010100013C50594D73ED77D4F75D757F05560B1F11DFC1130887CAF045214225F409DD\
2379 2E82E00DFD2D5E3087E4253CDD137DE4E401211FC3C27025F072321297635E4077CBC127099500\
2380 50627D4DC1FD565C900900A6A80956921B4379F7F23F90DC13B04552A78C808A1F0177B33AA710\
2381 1459F057494F717BA4344678FFDD050D102136915CC5CBFFAE5E25A0CB2912470B7DF3DA96074B\
2382 415A6477EA638523710EC36F1254E4F6C38FF7F32C31434E3E8A7FA81E3CBD57CC36895484E97D\
2383 55830104C27DE1C7D5535F5D7DE764D77A2C825A26AC6A781AFC5455C9280DF9519289F7B21745\
2384 37DDDF4DF92AB75DCFF2A90D07D3441CD3C138055490DD10C501177FDBB896FF0F1E7B1F75E775\
2385 7D4B5F924A8D10C7DD1E1601703105F7F854928A2885535EC5247CF78F7FA515E0B713DF3DD5E9\
2386 F6A30FD1336C971C76A4002053DAF13A93ADE2538E94FADF7E9FCFB3895144EFC4B693A1B21158\
2387 971FC5D6D10C3CF94D584A2D02CFC9E1F905F93A28D0D0388055CC73895DA7D37D73F12EA69705\
2388 A0FB5E941D3572136FF11845C71B2F953F51CED7753AB9ECFE43CEAE4F6C478F5473FD28C44751\
2389 247E0700D47FA2C3BD3EBB41705F7D55047D7EB727A8C5D8631D61C55865C65738075B75D0FA74\
2390 32DDDCBA4B0F3BD77F35DE2097D16509882D434BE07043860EC838711AB95D75F14C4F6089597D\
2391 B501D017735880A8A0D8155B01EBF4252710051DED1FDF4C21DCB00E90F11C7FB42B20FBABC855\
2392 46EDF3CF0E9135F441CABF42B927D14B0C82075F47D4614E770C36CF5F3C913CB1FC4F3EF9CCB7\
2393 5140707042C4F721DD088F80F37814D7149CF3DF35D3E5757D5D60F7F170C8603438554DC10F02\
2394 532B450E31E09CE0CC4873261725FE1D3130D5444217A556943D65FF201E14D83ADBD4BA1D553D\
2395 835CB749208ED751D7896D57772BB003A396C5696DC050125FCD7B5617AEFF73A50E7780DE689C\
2396 1449F4C50DBEF825B5572047F801E35D1C39D823974708270068449BC97FC27003AE5C50115F2E\
2397 039741D055917E3D74F3ADF77E3BB3D70DF5D5D400D32747CF540E2575D50047E5502034057857\
2398 8D384709F7D850FFC255165FE935578F95D9DD73C5FA15DF551141416F81410DB7FC11D40943F3\
2399 C363F1D041E75D51C04CA2A440E257CCF65D3C32EF5C3C53C131CA1D7D4C44F455C51127F5B033\
2400 25727B157BA76412C34DA0C60105412495CA517DF520AC55419DC35535C3CF3601ED25DF4F6177\
2401 A6728BD87859C17449294DCF0F41C3671249FDB9D7557CE507B653DC13415EF3D75E11F720BE5B\
2402 6430D425CEFF6C7DF709773710801B70E953424454C5132C854254E17704A24B6795F7DF135A5C\
2403 94FE3D47F0741CF70C15CA1CB5C7FC74731F438E188CFB1882913D4294511575ED400A5F631A0C\
2404 3DCFCF17582CF7DF5FA7DF7893DC51D097255475E0F28A79E15544A4F35570357D82F51D701124\
2405 B5E7A4D28BC5B05E2745124F3A0AA2600424352D5D15C10D7DCAFDC7F13D79090C8B3CE0955F44\
2406 3648EBBF1A7417DF6D9C5A7D4697D1B4D368D05F7C96CEDC71840D63D47D17581147FA7D824965\
2407 E31E11FEC8EFB78F3DF31374371E38A74C37F72CC0B4B17B87D43454B7EBC0750A0230E07C7041\
2408 0CF0F345127F3574FF0E16DD3DE1D5657C4521C5001CD14CE0EE9F6155CA34958757F74719F7DF\
2409 2D90D7BD75FF61F2036575D755757E28850754D5515E11F33A17595F70F75D10FA5C971838750A\
2410 297E0E25832B0F2FC3C013F8FDA74001CEBC0B530F05E38574D41DD3D174B7DF5CA1CFE97E4FF1\
2411 62D45F73791DF3B7E024C33C95CF5D91FD04E09790335D4527FD25914F1D7DC93DD24EE09EBB2A\
2412 7A7F0CB651256C5468D2345C40170175C439C00D70D5051CC7CD724138F04557038F38577D1D74\
2413 32DD70F9DD1C971CE9C003177804A381FD741571F2516151CD0555FF0D774E7875F55C30D72525\
2414 00511374E250F05B4183333447DF975790F7D78D7E5F7594480E34EFF1B0CFE094770F545F0D74\
2415 F159EA80509206DF064AC4C7D7FDCC5724A7571575D36471C77C401D43A3573575F31C551F2941\
2416 F81C134CE48C2378EA4A3B35E93D72F00C55FC68BD177871502746C82754F928DCCB1C967B1C70\
2417 0728A71A4C3183FCB4572157C14081D27153D3ECF25778A6E3CC1EBD4DE4C6D0121700D3C3E5C7\
2418 4D2D2513287ACBE8ACF74D407578A3D16991C35D30F951A3E136D67F3DE5C9431C9C6264444960\
2419 531C7E762B39030C75770C925E38F4D9097650FC91C51575507C32881C30F3C5D5CF43E3557810\
2420 FA5F1EA72911F3C7969C7477333D74C15062A56554145D7277E057444562245F7CF95D45D75470\
2421 D71F3C401481C378AB81E7EB2132FC34013C8FB8F700122BC015DFB516022BD9043821E70DF4D5\
2422 5DAF01D6A9D3D8549FF028C3270D6FADF28140077EB90750B009CE4751D770F304DC570F3CFF64\
2423 71D3F152FF0417FD54014401501E2474D42E00492C03197D76D25F7957397517C18281E2DCD347\
2424 07C07830115D17F84D04D25F03D875C743CDFC552C91CC6C178D4FF20FE4D78C4C474854F4C058\
2425 F7F0B17DE37D70D90C3CF7ED16504133D41DF4DA7882ABEB3CCD3D7DD369E38C646268670C4965\
2426 71092D373058B6884FA3C34D9CC9F8325151701DF889C41037DFF51C004C84C15D53C3E578DF25\
2427 904A40D2743DA5F4D6E1F5F790C82B5C45ADDE571D47724532DBE4A0DF7C30D6CDD5577C801054\
2428 12574477973972A95C14573F16517997C15206334000FB27FF41F52D072970E32C14DA5C0D2550\
2429 7E5F5E44DE45467FFD02533CF7CE77AB73CD449F038E042135800EA2831147E157BC5D74E5C10F\
2430 A49F25705A0557D725A7DFF502394D3E0DFC145D50F0197895CE5D15D7241F7F449CDC49718B6C\
2431 F71B3532945767F0D57F450D0E0041535B0F05534061C938A493CF9E973C3451017DD5040045EC\
2432 3343173AFBC07C79DF3ED10DFA875C3E5720F27D5C1750107C05C45117B5D5147DDC331FA7DF54\
2433 10FB116E5DD700F0DD3FEA1010F9C540F3E1705E3C800BE854090FD373411D574DDC53CB04D410\
2434 A343421317ED77D74711F99CD0576F7DD17079D8225558541559141DC31C4241BC30401509550C\
2435 B0C755451C83330B8055353B45D80079374373775201574D0177D3428533FBB108C500100000CD\
2436 40020C0145ED05ECA65F28D7CDE48499E183030110504C55533034F04114084202F554E401E095\
2437 449020B90524503E007530028928FEC84110F3441413570C020010993004CBF794A3FC14A173A6\
2438 E0014DBEF1D0710011C3A525C64015155C70036C06132C3E17D5157D013F5978E01FE9D5C768F3\
2439 97CF11034D1287BD7E4311C44D61377165FF4D6C9533F9621B01C69E1C3D90DC46406B974F7FFC\
2440 DE7CCF54CDA58ADDC73A77C5C2C0805E01B2AB2D77C7FCB0E9459FDB28C6D7D4E2840CD7FE2894\
2441 7B0F10179DDE5B55C6CA0119FBD0E2CE28A7DD2C84994770463DFE5F51F5D754FCDBE496794FB1\
2442 37CFFDF460129D410557287C3D0112300D53D454C7DDF102175EA7D10D39DD2576431D95CF7550\
2443 1C5DA215E107D1149435DDB455654CF014838CFD73F93190D79ADCD54309CA2BBDE95F7E5D0DF0\
2444 F7CCB2F5FDF11177010609704F5951CD10F5CF7815C3E786C94D705E0FA29315B608D7C4F3CDA1\
2445 03E5D0C7F1F7577D7ADF0E58C96BE1C80100C701C1093CB2748F489B6B7EC0583B6AAF00D557CE\
2446 740082A1F4155147A75409493DF0F01C77F8C31333B3CD54F4CCCF013C310E4C1C2132CF333D1F\
2447 30C5F41130242C4801C02E720930F200F57003CF04078180FD21604301D34B423314A1D04D7300\
2448 C50B3C3B2C2058534BE3F7C3933CF3E0AE2A10C12EAAFD1C0E073D7A4FEBD71F4A94F0F8F475D3\
2449 DFD3619E130BEFAE8B534B88C3544E589BDDD44AAD2BCFFFDDFD7F43E13F48B9266D06BD342BE0\
2450 4FFDFAFCEB0EF2F0E0F88CD33083C4F3E334284C0C30C63D0434C43F4557DF7FF008FC1B31555C\
2451 555400328805FCA7E05095D47C3C0CF4D33CF34CCF30CF30C330F33'H } } ,\
2452  seq {\
2453  id {\
2454  local\
2455  str \"PROT_4_bankit1246641\" ,\
2456  general {\
2457  db \"TMSMART\" ,\
2458  tag\
2459  id 10764942 } } ,\
2460  descr {\
2461  title \"cytochrome c oxidase subunit II [Takifugu fasciatus]\" ,\
2462  molinfo {\
2463  biomol peptide ,\
2464  tech concept-trans-a } } ,\
2465  inst {\
2466  repr raw ,\
2467  mol aa ,\
2468  length 230 ,\
2469  topology not-set ,\
2470  seq-data\
2471  ncbieaa \"MAHPSQLGFQGAASPVMEELLHFHDHALMIVFLISTLVLYIIVAMVSTKLTNKYI\
2472 LDSQEIEIIWTILPAIILILIALPSLRILYLMDEINDPHLTIKAMGHQWYWSYEYTDYSDLAFDSYMIPTQDLAPGQF\
2473 RLLETDHRMVVPVDSPIRILVSAEDVLHSWAVPSLGVKMDAVPGRLNQTAFILSRPGVFYGQCSEICGANHSFMPIVV\
2474 EAVPLEHFENWSSLMLEDA\" } ,\
2475  annot {\
2476  {\
2477  data\
2478  ftable {\
2479  {\
2480  data\
2481  prot {\
2482  name {\
2483  \"cytochrome c oxidase subunit II\" } } ,\
2484  location\
2485  int {\
2486  from 0 ,\
2487  to 229 ,\
2488  id\
2489  local\
2490  str \"PROT_4_bankit1246641\" } } } } } } } ,\
2491  annot {\
2492  {\
2493  data\
2494  ftable {\
2495  {\
2496  data\
2497  cdregion {\
2498  frame one ,\
2499  code {\
2500  id 2 } ,\
2501  code-break {\
2502  {\
2503  loc\
2504  int {\
2505  from 7837 ,\
2506  to 7837 ,\
2507  strand plus ,\
2508  id\
2509  genbank {\
2510  accession \"GQ409967\" } } ,\
2511  aa\
2512  ncbieaa 42 } } } ,\
2513  comment \"TAA stop codon is completed by the addition of 3' A\
2514  residues to the mRNA\" ,\
2515  product\
2516  whole\
2517  local\
2518  str \"PROT_4_bankit1246641\" ,\
2519  location\
2520  int {\
2521  from 7147 ,\
2522  to 7837 ,\
2523  strand plus ,\
2524  id\
2525  genbank {\
2526  accession \"GQ409967\" } } } } } } }\
2527 ";
2528 
2529 const string sc_TestEntry_ShortCDS = "\
2530 Seq-entry ::= seq {\
2531  id {\
2532  local\
2533  str \"ShortCDS\" } ,\
2534  descr {\
2535  molinfo {\
2536  biomol mRNA } } ,\
2537  inst {\
2538  repr raw ,\
2539  mol rna ,\
2540  length 20 ,\
2541  seq-data\
2542  iupacna \"ATGTTTAAACATGTTTAAAC\" } ,\
2543  annot {\
2544  {\
2545  data\
2546  ftable {\
2547  {\
2548  data\
2549  cdregion {\
2550  } ,\
2551  location\
2552  int {\
2553  from 12 ,\
2554  to 13 ,\
2555  strand plus ,\
2556  id\
2557  local\
2558  str \"ShortCDS\" } } ,\
2559  {\
2560  data\
2561  cdregion {\
2562  } ,\
2563  location\
2564  int {\
2565  from 12 ,\
2566  to 13 ,\
2567  strand minus ,\
2568  id\
2569  local\
2570  str \"ShortCDS\" } } } } } }\
2571 ";
2572 
2573 const string sc_TestEntry_FirstCodon = "\
2574 Seq-entry ::= seq {\
2575  id {\
2576  local\
2577  str \"FirstCodon\" } ,\
2578  descr {\
2579  molinfo {\
2580  biomol mRNA } } ,\
2581  inst {\
2582  repr raw ,\
2583  mol rna ,\
2584  length 39 ,\
2585  seq-data\
2586  iupacna \"AAAATGGGAATGTGCTTTTTGAGAGGATGGAAAGGTGTT\" } }\
2587 ";
2588 
2589 const string sc_TestEntry_FirstCodon2 = "\
2590 Seq-entry ::= seq {\
2591  id {\
2592  local\
2593  str \"FirstCodon2\" } ,\
2594  descr {\
2595  molinfo {\
2596  biomol genomic } } ,\
2597  inst {\
2598  repr raw ,\
2599  mol dna ,\
2600  length 27 ,\
2601  seq-data\
2602  iupacna \"TTGCCCTAAAAATAAGAGTAAAACTAA\" } }\
2603 ";
2604 
2605 
2606 const string sc_TestEntry_GapInSeq1 = "\
2607 Seq-entry ::= seq {\
2608  id {\
2609  local\
2610  str \"GapInSeq1\" } ,\
2611  descr {\
2612  molinfo {\
2613  biomol genomic } } ,\
2614  inst {\
2615  repr delta ,\
2616  mol dna ,\
2617  length 27 ,\
2618  ext \
2619  delta { \
2620  literal { \
2621  length 9 , \
2622  seq-data \
2623  iupacna \"ATGCCCAAA\" } , \
2624  literal { \
2625  length 9 } , \
2626  literal { \
2627  length 9 , \
2628  seq-data \
2629  iupacna \"CCCAAATAA\" } } } } \
2630 ";
2631 
2632 
2633 const string sc_TestEntry_GapInSeq2 = "\
2634 Seq-entry ::= seq {\
2635  id {\
2636  local\
2637  str \"GapInSeq2\" } ,\
2638  descr {\
2639  molinfo {\
2640  biomol genomic } } ,\
2641  inst {\
2642  repr delta ,\
2643  mol dna ,\
2644  length 27 ,\
2645  ext \
2646  delta { \
2647  literal { \
2648  length 8 , \
2649  seq-data \
2650  iupacna \"ATGCCCAA\" } , \
2651  literal { \
2652  length 9 } , \
2653  literal { \
2654  length 10 , \
2655  seq-data \
2656  iupacna \"ACCCAAATAA\" } } } } \
2657 ";
2658 
2659 const string sc_TestEntry_GapInSeq3 = "\
2660 Seq-entry ::= seq {\
2661  id {\
2662  local\
2663  str \"GapInSeq3\" } ,\
2664  descr {\
2665  molinfo {\
2666  biomol genomic } } ,\
2667  inst {\
2668  repr delta ,\
2669  mol dna ,\
2670  length 29 ,\
2671  ext \
2672  delta { \
2673  literal { \
2674  length 9 , \
2675  seq-data \
2676  iupacna \"ATGCCCAAA\" } , \
2677  literal { \
2678  length 9 } , \
2679  literal { \
2680  length 11 , \
2681  seq-data \
2682  iupacna \"CCCAAAATAAA\" } } } } \
2683 ";
2684 
2685 
2686 const string sc_TestEntry_GapInSeq4 = "\
2687 Seq-entry ::= seq {\
2688  id {\
2689  local\
2690  str \"GapInSeq4\" } ,\
2691  descr {\
2692  molinfo {\
2693  biomol genomic } } ,\
2694  inst {\
2695  repr delta ,\
2696  mol dna ,\
2697  length 27 ,\
2698  ext \
2699  delta { \
2700  literal { \
2701  length 9 , \
2702  seq-data \
2703  iupacna \"ATGCCCAAA\" } , \
2704  literal { \
2705  length 9 } , \
2706  literal { \
2707  length 9 , \
2708  seq-data \
2709  iupacna \"CCCAAATAA\" } } } } \
2710 ";
2711 
2712 
2713 const string sc_TestEntry_GapInSeq5 = "\
2714 Seq-entry ::= seq {\
2715  id {\
2716  local\
2717  str \"GapInSeq5\" } ,\
2718  descr {\
2719  molinfo {\
2720  biomol genomic } } ,\
2721  inst {\
2722  repr delta ,\
2723  mol dna ,\
2724  length 18 ,\
2725  ext \
2726  delta { \
2727  literal { \
2728  length 9 , \
2729  seq-data \
2730  iupacna \"ATGCCCAAA\" } , \
2731  literal { \
2732  length 0 } , \
2733  literal { \
2734  length 9 , \
2735  seq-data \
2736  iupacna \"CCCAAATAA\" } } } } \
2737 ";
2738 
2740 Seq-entry ::= set {\
2741  seq-set {\
2742  seq {\
2743  id {\
2744  general {\
2745  db \"GNOMON\",\
2746  tag str \"912063.m\"\
2747  }\
2748  },\
2749  descr {\
2750  molinfo {\
2751  biomol mRNA,\
2752  completeness no-ends\
2753  }\
2754  },\
2755  inst {\
2756  repr raw,\
2757  mol rna,\
2758  length 1674,\
2759  seq-data ncbi4na '2481822428821148121184414284124281824121844848888241\
2760 141141141484144141411144128442828241148441842444141141114121411142142144412884\
2761 284114118182828124121144121282418824821288821144188824821182484118844842828828\
2762 241182824121124142488211882218482884844184488821142214412112441144882828824248\
2763 428821141418214121142184444821114844282118844224414484824284181848284288122182\
2764 12441411214824288184121121188484218282422214418812488441822112112182112F842882\
2765 882211824411848824481282482882144484411144121484288214282148121828881282112848\
2766 242214821282481212828822248282124184181184882842488882888184114111182142411884\
2767 142214148448188482211881482211844882824821184414282114441824181214418442421241\
2768 121822281188121182212484112844821141121821142482881824224424142242141114228814\
2769 282218448121141128822424411141821882111824122242482418884218428184484114884284\
2770 842884184121121221884141821224148842211821112148411282114188121114141144888844\
2771 144418841844141428244124141141112242284881824844128122144148182121214181211284\
2772 821118824824841142844842214411112882484118844142421142144182882182114842822142\
2773 822118284821828421841428221844842888824184284224418482882421248221288242214144\
2774 211881282114148828124181422422114141148122842141411144148122822824114882882144\
2775 282141421142482821281122144282488821822824221141821121114141884818824141121141\
2776 141144814221884824184188841821111844488884184284182214841141148441144144142111\
2777 142821142882484118284414111844124114184182814281284824224824114184124114424888\
2778 228284284241144224884111221142818214428124148122888824411841281822824124888244\
2779 114114141281121142821821184118244144141141841214142882414842841141141141228442\
2780 11418288844218414418284412112882288822414282214211141144284142'H\
2781  },\
2782  annot {\
2783  {\
2784  data ftable {\
2785  {\
2786  data cdregion {\
2787  frame one,\
2788  code {\
2789  id 1\
2790  },\
2791  code-break {\
2792  {\
2793  loc int {\
2794  from 879,\
2795  to 881,\
2796  strand plus,\
2797  id general {\
2798  db \"GNOMON\",\
2799  tag str \"912063.m\"\
2800  }\
2801  },\
2802  aa ncbieaa 88\
2803  }\
2804  }\
2805  },\
2806  partial TRUE,\
2807  product whole general {\
2808  db \"GNOMON\",\
2809  tag str \"912063.p\"\
2810  },\
2811  location int {\
2812  from 0,\
2813  to 1673,\
2814  strand plus,\
2815  id general {\
2816  db \"GNOMON\",\
2817  tag str \"912063.m\"\
2818  },\
2819  fuzz-from lim lt,\
2820  fuzz-to lim gt\
2821  }\
2822  }\
2823  }\
2824  }\
2825  }\
2826  },\
2827  seq {\
2828  id {\
2829  general {\
2830  db \"GNOMON\",\
2831  tag str \"912063.p\"\
2832  }\
2833  },\
2834  inst {\
2835  repr raw,\
2836  mol aa,\
2837  length 558,\
2838  seq-data ncbieaa \"RIRFKYNGADAIDMVFSKKKSEERKDWLSKWMREKKDRKQQGLAEEYLYDKD\
2839 TRFVTFKDFVNRELVLFSNLDNERSIPCLVDGFKPGQRKVLFACFKRSDKHGVKVAQLAGGVADMSAYHHGEQSLMTT\
2840 IVHLAQDYVGSNNINXLLPIGMFGTRLQGGKDSASAQYIFTQLSPVTRTLFPSHDDNVLRFLYEENQRIEPEWYCPIS\
2841 PMVLVNGAQGIDTGWRTNIPNYNPRELVKNIKRLIAGEPQKALAPWYKNFRGKIIQIDPRRFACYGEVAVLDDNTIEI\
2842 TELPIKQXTQDYKEKVLEGLMESSDEKKPPVIVDYQEYHTDTTVKFVVKLVPGKLRELERKQDLHQVLQLQSVICMSS\
2843 MVLFDAAGCLRTSTSPEAITQEFYDSRQEKYLQRKEYLLEVLQAQSKRLTNQARFILAKINKEIVFENKKKVAIVDDL\
2844 IKMGFDADPVKKWKEEQKLKLRESGEMDEDDLATVAVEDDEGVSSAAKAVETKLSGYEYLFGMTILDVSEEETNKLIN\
2845 ESEEKMTELRVLKKKTWQDLWHEDLDNFLSELQQRRLS\"\
2846  }\
2847  }\
2848  }\
2849 }\
2850 ";
2851 
2852 const string sc_TestEntry_GB_2236 = "\
2853 Seq-entry ::= seq {\
2854  id {\
2855  local str \"CMW8541SA\"\
2856  },\
2857  descr {\
2858  source {\
2859  genome genomic,\
2860  org {\
2861  taxname \"Holocryphia eucalypti\",\
2862  orgname {\
2863  mod {\
2864  {\
2865  subtype isolate,\
2866  subname \"CMW8541\"\
2867  }\
2868  },\
2869  lineage \"Cryphonectriaceae Diaporthales\",\
2870  gcode 1\
2871  }\
2872  },\
2873  subtype {\
2874  {\
2875  subtype country,\
2876  name \"South Africa\"\
2877  }\
2878  }\
2879  },\
2880  title \"Holocryphia eucalypti\",\
2881  molinfo {\
2882  biomol genomic\
2883  },\
2884  create-date std {\
2885  year 2009,\
2886  month 8,\
2887  day 5\
2888  }\
2889  },\
2890  inst {\
2891  repr delta,\
2892  mol dna,\
2893  length 987,\
2894  ext delta {\
2895  literal {\
2896  length 417,\
2897  seq-data iupacna \"CAAACCATCTCGGGCGAGCACGGCCTCGACAGCAATGGCGTGTA\
2898 TGTACCACACCATACCCTACACGGCGGCCCACGCAAGATGGACGCGGCTCGGGCTTTCCTGCTAACCACCCGCGTAGC\
2899 TACAACGGCACCTCCGAGCTCCAGCTCGAGCGCATGAACGTCTACTTCAACGAGGTATGTCTTGTCGGCTGACCAGGC\
2900 CTCCAGCCATCATCCTGCCTCCTGCCTCCTCCTTCCATCGGGACTTCTGTGGCCTGACCGAGCTTGCCCTTCTGACGC\
2901 GTTTCTCGTCCAGGCCTCCGGCAACAAGTATGTTCCCCGCGCCGTCCTCGTCGATCTCGAGCCCGGTACCATGGATGC\
2902 CGTCCGCGCCGGCCCCTTCGGCCAGCTGTTCCGTCCCGACAACTTCGTCTTCGGCCAGTCC\"\
2903  },\
2904  literal {\
2905  length 100,\
2906  fuzz lim unk,\
2907  seq-data gap {\
2908  type unknown\
2909  }\
2910  },\
2911  literal {\
2912  length 470,\
2913  seq-data iupacna \"TGACCAGCCGTGGCGCCCACTCCTTCCGCGCCCTCACCGTGCCC\
2914 GAGTTGACCCAGCAAATGTTCGACCCCAAGAACATGATGGCTGCCTCGGACTTCCGCAACGGCCGCTACCTGACGTGC\
2915 TCTGCCATCTTGTACGTTTTTGTCTTCTCTGTCTCACACATCTCGGATCCACCTCTCGGGCTTGTTTTTGCTAACCCT\
2916 GCTTTCCTCTCTCCCCTACAGCCGTGGCAAGGTCTCCATGAAGGAGGTCGAGGACCAGATGCGCAACGTCCAGAGCAA\
2917 GAACTCGTCCTACTTCGTCGAGTGGATCCCCAACAACGTCCAGACCGCCCTCTGCTCCATCCCCCCCAAGGGCCTCAA\
2918 GATGTCCTCCACCTTTGTCGGCAACTCCACCGCCATCCAGGAGCTCTTCAAGCGTGTTGGCGAGCAGTTCACCGCCAT\
2919 GTTCCGGCGCAAGGCTTTCTTGCATTGGTACACTGG\"\
2920  }\
2921  }\
2922  }\
2923 }\
2924 ";
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
CFeat_CI –.
Definition: feat_ci.hpp:64
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
const TPrim & Get(void) const
Definition: serialbase.hpp:347
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Contained
2nd contained within 1st extremes
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
Definition: sequence.cpp:3839
static CCdregion::EFrame FindBestFrame(const CSeq_feat &cds, CScope &scope)
Find "best" frame for a coding region.
Definition: sequence.cpp:4376
static void TranslateCdregion(string &prot, const CBioseq_Handle &bsh, const CSeq_loc &loc, const CCdregion &cdr, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=0, ETranslationLengthProblemOptions options=eThrowException)
translation coding region into ncbieaa protein sequence
Definition: sequence.cpp:4384
vector< TFeatScore > TFeatScores
Definition: sequence.hpp:353
void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts=0, CGetOverlappingFeaturesPlugin *plugin=NULL)
Find all features overlapping the location.
Definition: sequence.cpp:945
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
@ fIs5PrimePartial
= 0x4 Translate first codon even if not start codon (because sequence is 5' partial)
Definition: sequence.hpp:984
@ fRemoveTrailingX
= 0x2 Remove trailing Xs from protein
Definition: sequence.hpp:983
@ fNoStop
= 0x1 Do not include stop in translation
Definition: sequence.hpp:982
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void RemoveTopLevelSeqEntry(const CTSE_Handle &entry)
Revoke TSE previously added using AddTopLevelSeqEntry() or AddBioseq().
Definition: scope.cpp:376
TSeqPos GetBioseqLength(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
bool IsSetPartial(void) const
const CSeq_loc & GetLocation(void) const
TRange GetTotalRange(void) const
Definition: mapped_feat.hpp:93
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
void SetCoding(TCoding coding)
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
@ eLim_unk
unknown
Definition: Int_fuzz_.hpp:210
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
void ResetLocation(void)
Reset Location data member.
Definition: Seq_feat_.cpp:122
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
const TIupacaa & GetIupacaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:530
TRepr GetRepr(void) const
Get the Repr member data.
Definition: Seq_inst_.hpp:565
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
Definition: Seq_inst_.hpp:805
bool IsNcbieaa(void) const
Check if variant Ncbieaa is selected.
Definition: Seq_data_.hpp:644
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
bool IsIupacaa(void) const
Check if variant Iupacaa is selected.
Definition: Seq_data_.hpp:524
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:372
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
Definition: Seq_inst_.hpp:826
const TNcbieaa & GetNcbieaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:650
bool IsDelta(void) const
Check if variant Delta is selected.
Definition: Seq_ext_.hpp:336
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
const TExt & GetExt(void) const
Get the Ext member data.
Definition: Seq_inst_.hpp:838
const TDelta & GetDelta(void) const
Get the variant data.
Definition: Seq_ext_.cpp:180
const Tdata & Get(void) const
Get the member data.
Definition: Delta_ext_.hpp:164
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
Definition: Seq_inst_.hpp:817
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
@ eRepr_delta
sequence made by changes (delta) to others
Definition: Seq_inst_.hpp:100
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ e_Ncbieaa
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
int i
The Object manager core.
static char tmp[2048]
Definition: utf8.c:42
#define fi
#define FOR_EACH_ANNOT_ON_BIOSEQ
Definition: seq_macros.hpp:286
#define FOR_EACH_FEATURE_ON_ANNOT
Definition: seq_macros.hpp:433
SAnnotSelector –.
Utility stuff for more convenient using of Boost.Test library.
const string sc_TestEntry_internal_stop
USING_SCOPE(objects)
const string sc_TestEntry_alt_frame
const string sc_TestEntry_GapInSeq2
const string sc_TestSQD_4334_2
const string sc_TestSQD_4334_1
const string sc_MinusOrigin
static string GetProteinString(CFeat_CI fi, CScope &scope)
static void TestOneGapSeq(const string &asn, string seg1, string seg2)
const string sc_TestAmbiguousBestFrameEntry
const string sc_TestEntry_GapInSeq4
const string sc_TestEntry
const string sc_TestEntry_3prime_partial
static void SetLocationSkipGap(CRef< CSeq_feat > feat, const CBioseq &bioseq)
static void CheckTranslatedBioseq(CRef< CBioseq > bioseq, string seg1, bool mid_fuzz, string seg2)
const string sc_TestEntry_5prime_partial_minus
BOOST_AUTO_TEST_CASE(Test_Translator_Raw)
const string sc_PickFrameWithEndStopIf3CompleteEntry
const string sc_TestEntry_TerminalTranslExcept
const string sc_TooManyOverlap
const string sc_TestEntry_GB_2236
const string sc_TestEntry_5prime_partial
const string sc_TestBestFrameEntry
const string sc_TestEntry_code_break
const string sc_TestEntry_CodeBreakForStopCodon
const string sc_TestEntry_GapInSeq3
const string sc_TestEntry_ShortCDS
const string sc_TestEntry_GapInSeq1
const string sc_TestEntry_FirstCodon2
const string sc_TestEntry_GapInSeq5
const string sc_TestEntry_FirstCodon
Modified on Mon Feb 26 04:02:51 2024 by modify_doxy.py rev. 669887