NCBI C++ ToolKit
unit_test_autodef.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_autodef.cpp 99409 2023-03-23 18:45:06Z kans $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin, NCBI
27 *
28 * File Description:
29 * Unit tests for the validator.
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include "unit_test_autodef.hpp"
37 
38 #include <corelib/ncbi_system.hpp>
39 
40 // This macro should be defined before inclusion of test_boost.hpp in all
41 // "*.cpp" files inside executable except one. It is like function main() for
42 // non-Boost.Test executables is defined only in one *.cpp file - other files
43 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
44 // then test_boost.hpp will define such "main()" function for tests.
45 //
46 // Usually if your unit tests contain only one *.cpp file you should not
47 // care about this macro at all.
48 //
49 //#define NCBI_BOOST_NO_AUTO_TEST_MAIN
50 
51 
52 // This header must be included before all Boost.Test headers if there are any
53 #include <corelib/test_boost.hpp>
54 
56 #include <objects/biblio/Title.hpp>
63 #include <objects/pub/Pub.hpp>
65 #include <objects/seq/GIBB_mol.hpp>
66 #include <objects/seq/Seq_ext.hpp>
70 #include <objects/seq/Ref_ext.hpp>
71 #include <objects/seq/Map_ext.hpp>
72 #include <objects/seq/Seg_ext.hpp>
73 #include <objects/seq/Seq_gap.hpp>
74 #include <objects/seq/Seq_data.hpp>
76 #include <objects/seq/Seqdesc.hpp>
77 #include <objects/seq/MolInfo.hpp>
78 #include <objects/seq/Pubdesc.hpp>
79 #include <objects/seq/Seq_hist.hpp>
99 #include <objmgr/scope.hpp>
100 #include <objmgr/bioseq_ci.hpp>
101 #include <objmgr/feat_ci.hpp>
102 #include <objmgr/seq_vector.hpp>
103 #include <objmgr/util/sequence.hpp>
104 #include <objmgr/seqdesc_ci.hpp>
108 #include <corelib/ncbiapp.hpp>
109 
111 
112 
113 // for writing out tmp files
114 #include <serial/objostrasn.hpp>
115 #include <serial/objostrasnb.hpp>
116 
117 
118 #include <common/test_assert.h> /* This header must go last */
119 
120 
121 extern const char* sc_TestEntryCollidingLocusTags;
122 
125 
127 {
128  if ( !CNcbiApplication::Instance()->GetConfig().HasEntry("NCBI", "Data") ) {
129  }
130 }
131 
132 static bool s_debugMode = false;
133 
135 {
136  // Here we make descriptions of command line parameters that we are
137  // going to use.
138 
139  arg_desc->AddFlag( "debug_mode",
140  "Debugging mode writes errors seen for each test" );
141 }
142 
144 {
145  // initialization function body
146 
147  const CArgs& args = CNcbiApplication::Instance()->GetArgs();
148  if (args["debug_mode"]) {
149  s_debugMode = true;
150  }
151 }
152 
153 
155 {
156  CRef<CSeq_entry> entry(new CSeq_entry());
157  entry->SetSeq().SetInst().SetMol(CSeq_inst::eMol_dna);
158  entry->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
159  entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
160  entry->SetSeq().SetInst().SetLength(60);
161 
162  CRef<CSeq_id> id(new CSeq_id());
163  id->SetLocal().SetStr ("good");
164  entry->SetSeq().SetId().push_back(id);
165 
166  CRef<CSeqdesc> mdesc(new CSeqdesc());
168  entry->SetSeq().SetDescr().Set().push_back(mdesc);
169  return entry;
170 }
171 
172 
173 static CRef<CSeqdesc> AddSource (CRef<CSeq_entry> entry, string taxname)
174 {
175  CRef<CSeqdesc> odesc(new CSeqdesc());
176  odesc->SetSource().SetOrg().SetTaxname(taxname);
177 
178  if (entry->IsSeq()) {
179  entry->SetSeq().SetDescr().Set().push_back(odesc);
180  } else if (entry->IsSet()) {
181  entry->SetSet().SetDescr().Set().push_back(odesc);
182  }
183  return odesc;
184 }
185 
186 
187 static void AddTitle (CRef<CSeq_entry> entry, string defline)
188 {
189  CRef<CSeqdesc> odesc(new CSeqdesc());
190  odesc->SetTitle(defline);
191 
192  if (entry->IsSeq()) {
193  bool found = false;
194  if (entry->SetSeq().IsSetDescr()) {
196  if ((*it)->IsTitle()) {
197  (*it)->SetTitle(defline);
198  found = true;
199  }
200  }
201  }
202  if (!found) {
203  entry->SetSeq().SetDescr().Set().push_back(odesc);
204  }
205  } else if (entry->IsSet()) {
206  if (entry->GetSet().IsSetClass() && entry->GetSet().GetClass() == CBioseq_set::eClass_nuc_prot) {
207  AddTitle (entry->SetSet().SetSeq_set().front(), defline);
208  } else {
209  entry->SetSet().SetDescr().Set().push_back(odesc);
210  }
211  }
212 }
213 
214 
215 size_t HasBoolField(const CUser_object& user, const string& field_name)
216 {
217  size_t num_found = 0;
218  ITERATE(CUser_object::TData, it, user.GetData()) {
219  if ((*it)->IsSetLabel() && (*it)->GetLabel().IsStr() &&
220  NStr::EqualNocase((*it)->GetLabel().GetStr(), field_name)) {
221  num_found++;
222  if (!(*it)->IsSetData()) {
223  BOOST_CHECK_EQUAL("Data for " + field_name + "should be set", "Data not set");
224  } else {
225  BOOST_CHECK_EQUAL((*it)->GetData().Which(), CUser_field::TData::e_Bool);
226  if ((*it)->GetData().IsBool()) {
227  BOOST_CHECK_EQUAL((*it)->GetData().GetBool(), true);
228  }
229  }
230  }
231  }
232  return num_found;
233 }
234 
235 size_t HasStringField(const CUser_object& user, const string& field_name, const string& value)
236 {
237  size_t num_found = 0;
238  ITERATE(CUser_object::TData, it, user.GetData()) {
239  if ((*it)->IsSetLabel() && (*it)->GetLabel().IsStr() &&
240  NStr::EqualNocase((*it)->GetLabel().GetStr(), field_name)) {
241  num_found++;
242  if (!(*it)->IsSetData()) {
243  BOOST_CHECK_EQUAL("Data for " + field_name + "should be set", "Data not set");
244  } else {
245  BOOST_CHECK_EQUAL((*it)->GetData().Which(), CUser_field::TData::e_Str);
246  if ((*it)->GetData().IsStr()) {
247  BOOST_CHECK_EQUAL((*it)->GetData().GetStr(), value);
248  }
249  }
250  }
251  }
252  return num_found;
253 }
254 
255 size_t HasIntField(const CUser_object& user, const string& field_name, int value)
256 {
257  size_t num_found = 0;
258  ITERATE(CUser_object::TData, it, user.GetData()) {
259  if ((*it)->IsSetLabel() && (*it)->GetLabel().IsStr() &&
260  NStr::EqualNocase((*it)->GetLabel().GetStr(), field_name)) {
261  num_found++;
262  if (!(*it)->IsSetData()) {
263  BOOST_CHECK_EQUAL("Data for " + field_name + "should be set", "Data not set");
264  } else {
265  BOOST_CHECK_EQUAL((*it)->GetData().Which(), CUser_field::TData::e_Int);
266  if ((*it)->GetData().IsInt()) {
267  BOOST_CHECK_EQUAL((*it)->GetData().GetInt(), value);
268  }
269  }
270  }
271  }
272  return num_found;
273 }
274 
275 
277 {
278  size_t expected_num_fields = 7;
279  if (opts.GetOrgMods().size() > 0 || opts.GetSubSources().size() > 0) {
280  expected_num_fields++;
281  }
282  if (!opts.GetDoNotApplyToSp()) {
283  expected_num_fields--;
284  }
285  if (opts.GetUseLabels()) {
286  expected_num_fields++;
287  }
288  if (opts.GetAllowModAtEndOfTaxname()) {
289  expected_num_fields++;
290  }
291  if (opts.GetUseFakePromoters()) {
292  expected_num_fields ++;
293  }
294  if (opts.GetKeepRegulatoryFeatures()) {
295  expected_num_fields++;
296  }
297  if (opts.GetKeepIntrons()) {
298  expected_num_fields++;
299  }
300  if (opts.GetKeepExons()) {
301  expected_num_fields++;
302  }
303  if (opts.GetKeepuORFs()) {
304  expected_num_fields++;
305  }
306  if (opts.GetKeepMobileElements()) {
307  expected_num_fields++;
308  }
309  if (opts.AreAnyFeaturesSuppressed()) {
310  expected_num_fields++;
311  }
312  if (opts.GetKeepMiscRecomb()) {
313  expected_num_fields++;
314  }
315  if (opts.GetKeep5UTRs()) {
316  expected_num_fields++;
317  }
318  if (opts.GetKeep3UTRs()) {
319  expected_num_fields++;
320  }
321  if (opts.GetKeepRepeatRegion()) {
322  expected_num_fields++;
323  }
324  if (!NStr::IsBlank(opts.GetCustomFeatureClause())) {
325  expected_num_fields++;
326  }
327 
328  BOOST_CHECK_EQUAL(user.GetObjectType(), CUser_object::eObjectType_AutodefOptions);
329  BOOST_CHECK_EQUAL(user.GetData().size(), expected_num_fields);
330  BOOST_CHECK_EQUAL(HasBoolField(user, "LeaveParenthetical"), 1);
331  BOOST_CHECK_EQUAL(HasBoolField(user, "SpecifyNuclearProduct"), 1);
332  if (opts.GetUseLabels()) {
333  BOOST_CHECK_EQUAL(HasBoolField(user, "UseLabels"), 1);
334  }
335  if (opts.GetAllowModAtEndOfTaxname()) {
336  BOOST_CHECK_EQUAL(HasBoolField(user, "AllowModAtEndOfTaxname"), 1);
337  }
338  if (opts.GetDoNotApplyToSp()) {
339  BOOST_CHECK_EQUAL(HasBoolField(user, "DoNotApplyToSp"), 1);
340  }
341  if (opts.GetUseFakePromoters()) {
342  BOOST_CHECK_EQUAL(HasBoolField(user, "UseFakePromoters"), 1);
343  }
344  if (opts.GetKeepIntrons()) {
345  BOOST_CHECK_EQUAL(HasBoolField(user, "KeepIntrons"), 1);
346  }
347  if (opts.GetKeepExons()) {
348  BOOST_CHECK_EQUAL(HasBoolField(user, "KeepExons"), 1);
349  }
350  if (opts.GetKeepuORFs()) {
351  BOOST_CHECK_EQUAL(HasBoolField(user, "KeepuORFs"), 1);
352  }
353  BOOST_CHECK_EQUAL(HasStringField(user, "MiscFeatRule", opts.GetMiscFeatRule(opts.GetMiscFeatRule())) , 1);
354  BOOST_CHECK_EQUAL(HasStringField(user, "FeatureListType", opts.GetFeatureListType(opts.GetFeatureListType())), 1);
355  BOOST_CHECK_EQUAL(HasStringField(user, "HIVRule", "WantBoth"), 1);
356  if (!NStr::IsBlank(opts.GetCustomFeatureClause())) {
357  BOOST_CHECK_EQUAL(HasStringField(user, "CustomFeatureClause", opts.GetCustomFeatureClause()), 1);
358  }
359  BOOST_CHECK_EQUAL(HasIntField(user, "MaxMods", -99), 1);
360  if (user.GetData().size() != expected_num_fields) {
361  int field_num = 1;
362  ITERATE(CUser_object::TData, it, user.GetData()) {
363  if (!(*it)->IsSetLabel() || !(*it)->GetLabel().IsStr()) {
364  BOOST_CHECK_EQUAL("Label should be set", "label not set for " + NStr::IntToString(field_num));
365  } else {
366  printf("%s\n", (*it)->GetLabel().GetStr().c_str());
367  }
368  }
369  }
370 }
371 
373  CAutoDefWithTaxonomy& autodef,
374  CRef<CAutoDefModifierCombo> mod_combo)
375 {
376  // check defline for each nucleotide sequence
377  CBioseq_CI seq_iter(seh, CSeq_inst::eMol_na);
378  for ( ; seq_iter; ++seq_iter ) {
379  CBioseq_Handle bh (*seq_iter);
380  //Display ID of sequence
381  CConstRef<CSeq_id> id = bh.GetSeqId();
382 
383  // original defline
384  string orig_defline = "";
385  CSeqdesc_CI desc_it(bh, CSeqdesc::e_Title, 1);
386  if (desc_it) {
387  orig_defline = desc_it->GetTitle();
388  }
389 
390  string new_defline = autodef.GetOneDefLine(mod_combo, bh);
391 
392  BOOST_CHECK_EQUAL(orig_defline, new_defline);
393 
394  CRef<CUser_object> tmp_user = autodef.GetOptionsObject();
395  CAutoDefOptions opts;
396  opts.InitFromUserObject(*tmp_user);
397  mod_combo->InitOptions(opts);
398  CRef<CUser_object> user = opts.MakeUserObject();
399  CAutoDef autodef2;
400  autodef2.SetOptionsObject(*user);
401  new_defline = autodef2.GetOneDefLine(bh);
402  BOOST_CHECK_EQUAL(orig_defline, new_defline);
403  CheckAutoDefOptions(*user, opts);
404  }
405 
406  for (CBioseq_CI seq_it(seh, CSeq_inst::eMol_na); seq_it; ++seq_it) {
407 
408  CAutoDef autodefA;
409  autodefA.SetOptions(*mod_combo);
410  CRef<CUser_object> user_optsA = autodefA.GetOptionsObject();
411 
412  CAutoDef autodefB;
413  autodefB.GetOneDefLine(mod_combo, *seq_it);
414  CRef<CUser_object> user_optsB = autodefB.GetOptionsObject();
415  BOOST_CHECK(user_optsA->Equals(*user_optsB));
416  }
417 
418  // check popset title if needed
419 
420  if (seh.IsSet() && seh.GetSet().GetCompleteBioseq_set()->NeedsDocsumTitle()) {
421  string orig_defline = "";
422  CSeqdesc_CI desc_it(seh, CSeqdesc::e_Title, 1);
423  if (desc_it) {
424  orig_defline = desc_it->GetTitle();
425  }
426  string new_defline = autodef.GetDocsumDefLine(seh);
427  BOOST_CHECK_EQUAL(orig_defline, new_defline);
428  }
429 }
430 
432  vector<CSubSource::ESubtype> subsrcs,
433  vector<COrgMod::ESubtype> orgmods,
434  bool init_with_descrs = false)
435 {
437 
438  CRef<CScope> scope(new CScope(*object_manager));
439  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
440 
441  CAutoDefWithTaxonomy autodef;
442 
443  if (init_with_descrs) {
444  CAutoDef::TSources sources;
445  for (CBioseq_CI b_iter(seh, CSeq_inst::eMol_na); b_iter; ++b_iter) {
446  for (CSeqdesc_CI desc_it(*b_iter, CSeqdesc::e_Source); desc_it; ++desc_it) {
447  sources.emplace_back(&desc_it->GetSource());
448  }
449  }
450  autodef.AddDescriptors(sources);
451  }
452  else {
453  // add to autodef
454  autodef.AddSources(seh);
455  }
456 
457  CRef<CAutoDefModifierCombo> mod_combo;
458  mod_combo = new CAutoDefModifierCombo ();
459  mod_combo->SetUseModifierLabels(true);
460  ITERATE(vector<CSubSource::ESubtype>, it, subsrcs) {
461  mod_combo->AddSubsource(*it, true);
462  }
463  ITERATE(vector<COrgMod::ESubtype>, it, orgmods) {
464  mod_combo->AddOrgMod(*it, true);
465  }
466 
469 
470  CheckDeflineMatches(seh, autodef, mod_combo);
471 }
472 
474  bool use_best = false,
477  bool init_with_descrs = false)
478 {
480 
481  CRef<CScope> scope(new CScope(*object_manager));
482  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
483 
484  CAutoDefWithTaxonomy autodef;
485 
486  if (init_with_descrs) {
487  CAutoDef::TSources sources;
488  for (CBioseq_CI b_iter(seh, CSeq_inst::eMol_na); b_iter; ++b_iter) {
489  for (CSeqdesc_CI desc_it(*b_iter, CSeqdesc::e_Source); desc_it; ++desc_it) {
490  sources.emplace_back(&desc_it->GetSource());
491  }
492  }
493  autodef.AddDescriptors(sources);
494  }
495  else {
496  // add to autodef
497  autodef.AddSources(seh);
498  }
499 
500  CRef<CAutoDefModifierCombo> mod_combo;
501  if (use_best) {
502  mod_combo = autodef.FindBestModifierCombo();
503  } else {
504  mod_combo = new CAutoDefModifierCombo ();
505  }
506 
507  autodef.SetFeatureListType(list_type);
508  autodef.SetMiscFeatRule(misc_feat_rule);
509 
510  CheckDeflineMatches(seh, autodef, mod_combo);
511 }
512 
514  bool use_best = false,
517 {
518  bool init_with_descrs = true;
519  CheckDeflineMatches(entry, use_best, list_type, misc_feat_rule, init_with_descrs);
520 }
521 
523 {
524  auto objmgr = CObjectManager::GetInstance();
525  CRef<CScope> scope(new CScope(*objmgr));
526  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(entry);
527 
528  CAutoDef::TSources sources;
529  for (CBioseq_CI b_iter(seh, CSeq_inst::eMol_na); b_iter; ++b_iter) {
530  for (CSeqdesc_CI desc_it(*b_iter, CSeqdesc::e_Source); desc_it; ++desc_it) {
531  sources.emplace_back(&desc_it->GetSource());
532  }
533  }
534  return sources;
535 }
536 
537 
538 static void CheckDeflineMatches(CRef<CSeq_entry> entry, CSeqFeatData::ESubtype feat_to_suppress, bool init_with_descrs = false)
539 {
541 
542  CRef<CScope> scope(new CScope(*object_manager));
543  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
544 
545  CAutoDefWithTaxonomy autodef;
546 
547  if (init_with_descrs) {
548  CAutoDef::TSources sources;
549  for (CBioseq_CI b_iter(seh, CSeq_inst::eMol_na); b_iter; ++b_iter) {
550  for (CSeqdesc_CI desc_it(*b_iter, CSeqdesc::e_Source); desc_it; ++desc_it) {
551  sources.emplace_back(&desc_it->GetSource());
552  }
553  }
554  autodef.AddDescriptors(sources);
555  }
556  else {
557  // add to autodef
558  autodef.AddSources(seh);
559  }
560 
562  autodef.SuppressFeature(feat_to_suppress);
565 
566  CheckDeflineMatches(seh, autodef, mod_combo);
567 }
568 
569 
571 {
573  if (!entry) {
574  return empty;
575  } else if (entry->IsSeq() && entry->GetSeq().IsNa()) {
576  return entry;
577  } else if (entry->IsSet()) {
580  if (rval) {
581  return rval;
582  }
583  }
584  }
585  return empty;
586 }
587 
588 
589 static void AddFeat (CRef<CSeq_feat> feat, CRef<CSeq_entry> entry)
590 {
591  CRef<CSeq_annot> annot;
592 
593  if (entry->IsSeq()) {
594  if (!entry->GetSeq().IsSetAnnot()
595  || !entry->GetSeq().GetAnnot().front()->IsFtable()) {
596  CRef<CSeq_annot> new_annot(new CSeq_annot());
597  entry->SetSeq().SetAnnot().push_back(new_annot);
598  annot = new_annot;
599  } else {
600  annot = entry->SetSeq().SetAnnot().front();
601  }
602  } else if (entry->IsSet()) {
603  if (!entry->GetSet().IsSetAnnot()
604  || !entry->GetSet().GetAnnot().front()->IsFtable()) {
605  CRef<CSeq_annot> new_annot(new CSeq_annot());
606  entry->SetSet().SetAnnot().push_back(new_annot);
607  annot = new_annot;
608  } else {
609  annot = entry->SetSet().SetAnnot().front();
610  }
611  }
612 
613  if (!feat->IsSetLocation() || feat->GetLocation().Which() == CSeq_loc::e_not_set) {
614  CRef<CSeq_entry> nuc_entry = FindNucInSeqEntry(entry);
615  if (nuc_entry) {
616  CRef<CSeq_id> id(new CSeq_id());
617  id->Assign(*(nuc_entry->GetSeq().GetId().front()));
618  feat->SetLocation().SetInt().SetId(*id);
619  feat->SetLocation().SetInt().SetFrom(0);
620  feat->SetLocation().SetInt().SetTo(entry->GetSeq().GetLength() - 1);
621  }
622  }
623 
624  annot->SetData().SetFtable().push_back(feat);
625 }
626 
627 
628 static CRef<CSeq_entry> MakeProteinForNucProtSet (string id, string protein_name)
629 {
630  // make protein
631  CRef<CBioseq> pseq(new CBioseq());
632  pseq->SetInst().SetMol(CSeq_inst::eMol_aa);
633  pseq->SetInst().SetRepr(CSeq_inst::eRepr_raw);
634  pseq->SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEIN");
635  pseq->SetInst().SetLength(8);
636 
637  CRef<CSeq_id> pid(new CSeq_id());
638  pid->SetLocal().SetStr (id);
639  pseq->SetId().push_back(pid);
640 
641  CRef<CSeqdesc> mpdesc(new CSeqdesc());
643  pseq->SetDescr().Set().push_back(mpdesc);
644 
645  CRef<CSeq_entry> pentry(new CSeq_entry());
646  pentry->SetSeq(*pseq);
647 
648  CRef<CSeq_feat> feat (new CSeq_feat());
649  feat->SetData().SetProt().SetName().push_back(protein_name);
650  feat->SetLocation().SetInt().SetId().SetLocal().SetStr(id);
651  feat->SetLocation().SetInt().SetFrom(0);
652  feat->SetLocation().SetInt().SetTo(7);
653  AddFeat (feat, pentry);
654 
655  return pentry;
656 }
657 
658 
659 static CRef<CSeq_feat> MakeCDSForNucProtSet (string nuc_id, string prot_id)
660 {
661  CRef<CSeq_feat> cds (new CSeq_feat());
662  cds->SetData().SetCdregion();
663  cds->SetProduct().SetWhole().SetLocal().SetStr(prot_id);
664  cds->SetLocation().SetInt().SetId().SetLocal().SetStr(nuc_id);
665  cds->SetLocation().SetInt().SetFrom(0);
666  cds->SetLocation().SetInt().SetTo(26);
667  return cds;
668 }
669 
670 
671 static CRef<CSeq_feat> MakeGeneForNucProtSet(const string& nuc_id, const string& locus, const string& allele = kEmptyStr)
672 {
673  CRef<CSeq_feat> gene(new CSeq_feat());
674  gene->SetData().SetGene().SetLocus(locus);
675  if (!allele.empty()) {
676  gene->SetData().SetGene().SetAllele(allele);
677  }
678  gene->SetLocation().SetInt().SetId().SetLocal().SetStr(nuc_id);
679  gene->SetLocation().SetInt().SetFrom(0);
680  gene->SetLocation().SetInt().SetTo(26);
681  return gene;
682 }
683 
684 
685 static CRef<CSeq_entry> BuildNucProtSet(const string& protein_name, const string& locus = kEmptyStr, const string& allele = kEmptyStr)
686 {
689 
690  // make nucleotide
691  CRef<CBioseq> nseq(new CBioseq());
692  nseq->SetInst().SetMol(CSeq_inst::eMol_dna);
693  nseq->SetInst().SetRepr(CSeq_inst::eRepr_raw);
694  nseq->SetInst().SetSeq_data().SetIupacna().Set("ATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
695  nseq->SetInst().SetLength(60);
696 
697  CRef<CSeq_id> id(new CSeq_id());
698  id->SetLocal().SetStr ("nuc");
699  nseq->SetId().push_back(id);
700 
701  CRef<CSeqdesc> mdesc(new CSeqdesc());
703  nseq->SetDescr().Set().push_back(mdesc);
704 
705  CRef<CSeq_entry> nentry(new CSeq_entry());
706  nentry->SetSeq(*nseq);
707 
708  if (!locus.empty()) {
709  CRef<CSeq_feat> gene = MakeGeneForNucProtSet("nuc", locus, allele);
710  AddFeat(gene, nentry);
711  }
712 
713  set->SetSeq_set().push_back(nentry);
714 
715  // make protein
716  CRef<CSeq_entry> pentry = MakeProteinForNucProtSet("prot", protein_name);
717 
718  set->SetSeq_set().push_back(pentry);
719 
720  CRef<CSeq_entry> set_entry(new CSeq_entry());
721  set_entry->SetSet(*set);
722 
723  CRef<CSeq_feat> cds = MakeCDSForNucProtSet("nuc", "prot");
724  AddFeat (cds, set_entry);
725 
726  return set_entry;
727 }
728 
729 
730 // tests
731 
732 
733 BOOST_AUTO_TEST_CASE(Test_SimpleAutodef)
734 {
735  // prepare entry
737  AddSource (entry, "Homo sapiens");
738  AddTitle(entry, "Homo sapiens sequence.");
739 
740  CheckDeflineMatches(entry);
742 }
743 
744 BOOST_AUTO_TEST_CASE(Test_UnnamedPlasmid)
745 {
746  // prepare entry
748  CRef<CSeqdesc> desc = AddSource (entry, "Alcanivorax sp. HA03");
750  CRef<CSubSource> sub(new CSubSource("plasmid-name", "unnamed"));
751  desc->SetSource().SetSubtype().push_back(sub);
752  AddTitle(entry, "Alcanivorax sp. HA03 plasmid sequence.");
753 
754  CheckDeflineMatches(entry);
756 }
757 
758 BOOST_AUTO_TEST_CASE(Test_SQD_476)
759 {
760  CRef<CSeq_entry> entry = BuildNucProtSet("chlorocatechol 1,2-dioxygenase");
761  CRef<CSeqdesc> desc = AddSource (entry, "Alcanivorax sp. HA03");
763  CRef<CSubSource> sub(new CSubSource("plasmid-name", "unnamed"));
764  desc->SetSource().SetSubtype().push_back(sub);
765  AddTitle(entry, "Alcanivorax sp. HA03 plasmid chlorocatechol 1,2-dioxygenase gene, complete cds.");
766 
767  CheckDeflineMatches(entry);
769 }
770 
771 BOOST_AUTO_TEST_CASE(Test_SQD_630)
772 {
774  CRef<CSeqdesc> desc = AddSource (entry, "Clathrina aurea");
775  CRef<CSubSource> sub(new CSubSource("clone", "Cau_E6"));
776  desc->SetSource().SetSubtype().push_back(sub);
777  CRef<CSeq_feat> feat(new CSeq_feat());
778  feat->SetData().SetImp().SetKey("repeat_region");
779  CRef<CGb_qual> qual(new CGb_qual("satellite", "microsatellite"));
780  feat->SetQual().push_back(qual);
781  AddFeat(feat, entry);
782 
783  AddTitle(entry, "Clathrina aurea microsatellite sequence.");
784 
785  CheckDeflineMatches(entry);
787 
788  feat->SetComment("dinucleotide");
789  CheckDeflineMatches(entry);
791 }
792 
793 BOOST_AUTO_TEST_CASE(Test_SQD_169)
794 {
796  CRef<CSeqdesc> desc = AddSource (entry, "Clathrina aurea");
797  CRef<CSeq_feat> feat(new CSeq_feat());
798  feat->SetData().SetImp().SetKey("misc_feature");
799  feat->SetComment("contains 5S ribosomal RNA and nontranscribed spacer");
800  AddFeat(feat, entry);
801 
802  AddTitle(entry, "Clathrina aurea 5S ribosomal RNA gene region.");
803 
804  CheckDeflineMatches(entry);
806 }
807 
808 BOOST_AUTO_TEST_CASE(Test_SQD_374)
809 {
811  CRef<CSeqdesc> desc = AddSource (entry, "Clathrina aurea");
812  CRef<CSeq_feat> feat(new CSeq_feat());
813  feat->SetData().SetImp().SetKey("misc_feature");
814  feat->SetComment("contains DNA lyase (Apn2) gene, Apn2-Mat1 intergenic spacer, and mating type protein (Mat1) gene");
815  AddFeat(feat, entry);
816  feat->SetLocation().SetPartialStart(true, eExtreme_Biological);
817  feat->SetLocation().SetPartialStop(true, eExtreme_Biological);
818 
819  AddTitle(entry, "Clathrina aurea DNA lyase (Apn2) gene, partial sequence; Apn2-Mat1 intergenic spacer, complete sequence; and mating type protein (Mat1) gene, partial sequence.");
820 
821  CheckDeflineMatches(entry);
823 }
824 
825 BOOST_AUTO_TEST_CASE(Test_SQD_155)
826 {
828  CRef<CSeqdesc> desc = AddSource (entry, "Clathrina aurea");
829  CRef<CSeq_feat> feat(new CSeq_feat());
830  feat->SetData().SetImp().SetKey("misc_feature");
831  feat->SetComment("amplified with primers designed for 16S ribosomal RNA");
832  AddFeat(feat, entry);
833 
834  AddTitle(entry, "Clathrina aurea sequence.");
835 
836  CheckDeflineMatches(entry);
838 }
839 
840 
841 BOOST_AUTO_TEST_CASE(Test_DocsumTitle_Popset)
842 {
844  unit_test_util::SetTaxname(seq1, "Pinus cembra");
845  // clear previous taxid before setting new one
846  unit_test_util::SetTaxon(seq1, 0);
847  unit_test_util::SetTaxon(seq1, 58041);
849  string defline = "Pinus cembra AcesapD07 fake protein name gene, complete cds.";
851 
853  unit_test_util::ChangeId(seq2, "2");
854  unit_test_util::SetTaxname(seq2, "Pinus cembra");
855  // clear previous taxid before setting new one
856  unit_test_util::SetTaxon(seq2, 0);
857  unit_test_util::SetTaxon(seq2, 58041);
859  defline = "Pinus cembra AcesapD12 fake protein name gene, complete cds.";
861 
863  unit_test_util::ChangeId(seq3, "3");
864  unit_test_util::SetTaxname(seq3, "Pinus cembra");
865  // clear previous taxid before setting new one
866  unit_test_util::SetTaxon(seq3, 0);
867  unit_test_util::SetTaxon(seq3, 58041);
869  defline = "Pinus cembra AcesapD33 fake protein name gene, complete cds.";
871 
872 
874  set->SetSet().SetClass(CBioseq_set::eClass_pop_set);
875  set->SetSet().SetSeq_set().push_back(seq1);
876  set->SetSet().SetSeq_set().push_back(seq2);
877  set->SetSet().SetSeq_set().push_back(seq3);
878  defline = "Pinus cembra fake protein name gene, complete cds.";
879  AddTitle(set, defline);
880  CheckDeflineMatches(set, true);
882 }
883 
884 BOOST_AUTO_TEST_CASE(Test_DocsumTitle_Physet)
885 {
887  unit_test_util::SetTaxname(seq1, "Bembidion mendocinum");
888  // clear previous taxid before setting new one
889  unit_test_util::SetTaxon(seq1, 0);
890  unit_test_util::SetTaxon(seq1, 1353850);
891  string defline = "Bembidion mendocinum fake protein name gene, complete cds.";
893 
895  unit_test_util::ChangeId(seq2, "2");
896  unit_test_util::SetTaxname(seq2, "Bembidion orregoi");
897  // clear previous taxid before setting new one
898  unit_test_util::SetTaxon(seq2, 0);
899  unit_test_util::SetTaxon(seq2, 1353851);
900  defline = "Bembidion orregoi fake protein name gene, complete cds.";
902 
904  set->SetSet().SetClass(CBioseq_set::eClass_pop_set);
905  set->SetSet().SetSeq_set().push_back(seq1);
906  set->SetSet().SetSeq_set().push_back(seq2);
907  defline = "Chilioperyphus fake protein name gene, complete cds.";
908  AddTitle(set, defline);
909  CheckDeflineMatches(set, true);
911 }
912 
913 
914 BOOST_AUTO_TEST_CASE(Test_GB_3108)
915 {
917  CRef<CSeqdesc> desc = AddSource(entry, "Fusarium incarnatum");
918  CRef<CSeq_feat> feat1(new CSeq_feat());
919  feat1->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
920  feat1->SetData().SetRna().SetExt().SetName("5.8S ribosomal RNA");
921  AddFeat(feat1, entry);
922  feat1->SetLocation().SetInt().SetTo(19);
923  feat1->SetLocation().SetPartialStart(true, eExtreme_Biological);
924  CRef<CSeq_feat> feat2(new CSeq_feat());
925  feat2->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
926  feat2->SetData().SetRna().SetExt().SetName("internal transcribed spacer 2");
927  AddFeat(feat2, entry);
928  feat2->SetLocation().SetInt().SetFrom(20);
929  feat2->SetLocation().SetInt().SetTo(39);
930 
931  CRef<CSeq_feat> feat3(new CSeq_feat());
932  feat3->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
933  feat3->SetData().SetRna().SetExt().SetName("28S ribosomal RNA");
934  AddFeat(feat3, entry);
935  feat3->SetLocation().SetInt().SetFrom(40);
936  feat3->SetLocation().SetInt().SetTo(59);
937  feat3->SetLocation().SetPartialStop(true, eExtreme_Biological);
938 
939  AddTitle(entry, "Fusarium incarnatum 5.8S ribosomal RNA gene, partial sequence; internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence.");
940 
941  CheckDeflineMatches(entry);
943 
944  feat2->SetData().SetRna().SetType(CRNA_ref::eType_other);
945  CheckDeflineMatches(entry);
947 
948 }
949 
950 BOOST_AUTO_TEST_CASE(Test_GB_3099)
951 {
953  unit_test_util::SetTaxname(seq, "Influenza A virus (A/USA/RVD1_H1/2011(H1N1))");
954  string defline = "Influenza A virus (A/USA/RVD1_H1/2011(H1N1)) hemagglutinin (HA) gene, complete cds.";
956  AddTitle(nuc, defline);
957  unit_test_util::SetNucProtSetProductName(seq, "hemagglutinin");
958  CRef<CSeq_feat> gene(new CSeq_feat());
959  gene->SetData().SetGene().SetLocus("HA");
960  AddFeat(gene, nuc);
961 
962  CheckDeflineMatches(seq, true);
963  CheckDeflineMatchesWithDescr(seq, true);
964 }
965 
966 
967 BOOST_AUTO_TEST_CASE(Test_GB_3359)
968 {
970  unit_test_util::SetTaxname(seq, "Erwinia amylovora");
971  seq->SetSeq().SetInst().SetMol(CSeq_inst::eMol_rna);
974  ncrna->SetData().SetRna().SetType(CRNA_ref::eType_ncRNA);
975  ncrna->SetData().SetRna().SetExt().SetGen().SetProduct("RmaA");
976  ncrna->SetData().SetRna().SetExt().SetGen().SetClass("antisense_RNA");
977  unit_test_util::AddFeat (ncrna, seq);
978  string defline = "Erwinia amylovora RmaA antisense RNA, complete sequence.";
979  AddTitle(seq, defline);
980  CheckDeflineMatches(seq, true);
981  CheckDeflineMatchesWithDescr(seq, true);
982 }
983 
984 
985 void TestOneOrganelleSequenceDefline(CBioSource::TGenome genome, const string& defline)
986 {
988  unit_test_util::SetGenome(seq, genome);
989  AddTitle(seq, defline);
992 }
993 
994 
995 BOOST_AUTO_TEST_CASE(Test_SQD_1733)
996 {
997  TestOneOrganelleSequenceDefline(CBioSource::eGenome_unknown, "Sebaea microphylla genomic sequence.");
998  TestOneOrganelleSequenceDefline(CBioSource::eGenome_mitochondrion, "Sebaea microphylla mitochondrion sequence.");
999  TestOneOrganelleSequenceDefline(CBioSource::eGenome_apicoplast, "Sebaea microphylla apicoplast sequence.");
1000  TestOneOrganelleSequenceDefline(CBioSource::eGenome_chloroplast, "Sebaea microphylla chloroplast sequence.");
1001  TestOneOrganelleSequenceDefline(CBioSource::eGenome_kinetoplast, "Sebaea microphylla kinetoplast sequence.");
1002  TestOneOrganelleSequenceDefline(CBioSource::eGenome_leucoplast, "Sebaea microphylla leucoplast sequence.");
1003 
1004 }
1005 
1006 void AddExon(CRef<CSeq_entry> seq, const string& number, TSeqPos start)
1007 {
1008  CRef<CSeq_feat> exon = unit_test_util::AddGoodImpFeat(seq, "exon");
1009  exon->ResetComment();
1010  exon->SetLocation().SetInt().SetFrom(start);
1011  exon->SetLocation().SetInt().SetTo(start + 5);
1012  if (!NStr::IsBlank(number)) {
1013  CRef<CGb_qual> qual(new CGb_qual());
1014  qual->SetQual("number");
1015  qual->SetVal(number);
1016  exon->SetQual().push_back(qual);
1017  }
1018 }
1019 
1020 
1022 {
1026  cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
1028 
1029  string defline = "Sebaea microphylla fake protein name gene, exon 1 and partial cds.";
1030  AddTitle(nuc, defline);
1031  CheckDeflineMatches(nps, true);
1032  CheckDeflineMatchesWithDescr(nps, true);
1033 
1034  AddExon(nuc, "2", cds->GetLocation().GetStart(eExtreme_Positional) + 10);
1035  defline = "Sebaea microphylla fake protein name gene, exons 1 and 2 and partial cds.";
1036  AddTitle(nuc, defline);
1037  CheckDeflineMatches(nps, true);
1038  CheckDeflineMatchesWithDescr(nps, true);
1039 
1040  AddExon(nuc, "3", cds->GetLocation().GetStart(eExtreme_Positional) +20);
1041  defline = "Sebaea microphylla fake protein name gene, exons 1 through 3 and partial cds.";
1042  AddTitle(nuc, defline);
1043  CheckDeflineMatches(nps, true);
1044  CheckDeflineMatchesWithDescr(nps, true);
1045 }
1046 
1047 
1049 {
1052  misc->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
1053  misc->SetComment("contains internal transcribed spacer 1, 5.8S ribosomal RNA, and internal transcribed spacer 2");
1054  AddTitle(seq, "Sebaea microphylla internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence.");
1055 
1056  CheckDeflineMatches(seq, true);
1057  CheckDeflineMatchesWithDescr(seq, true);
1058 
1059  misc->SetLocation().SetPartialStart(true, eExtreme_Biological);
1060  misc->SetLocation().SetPartialStop(true, eExtreme_Biological);
1061  AddTitle(seq, "Sebaea microphylla internal transcribed spacer 1, partial sequence; 5.8S ribosomal RNA gene, complete sequence; and internal transcribed spacer 2, partial sequence.");
1062  CheckDeflineMatches(seq, true);
1063  CheckDeflineMatchesWithDescr(seq, true);
1064 
1065  misc->SetComment("contains 18S ribosomal RNA, internal transcribed spacer 1, 5.8S ribosomal RNA, and internal transcribed spacer 2");
1066  AddTitle(seq, "Sebaea microphylla 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1 and 5.8S ribosomal RNA gene, complete sequence; and internal transcribed spacer 2, partial sequence.");
1067  CheckDeflineMatches(seq, true);
1068  CheckDeflineMatchesWithDescr(seq, true);
1069 }
1070 
1071 
1073 {
1075  CRef<CSeq_feat> dloop = unit_test_util::AddGoodImpFeat (seq, "D-loop");
1076  dloop->ResetComment();
1077  AddTitle(seq, "Sebaea microphylla D-loop, complete sequence.");
1078  CheckDeflineMatches(seq, true);
1079  CheckDeflineMatchesWithDescr(seq, true);
1080 }
1081 
1082 
1084 {
1086  unit_test_util::SetTaxname(seq, "uncultured archaeon");
1087  CRef<CSeq_feat> dloop = unit_test_util::AddGoodImpFeat (seq, "D-loop");
1088  dloop->ResetComment();
1089  AddTitle(seq, "Uncultured archaeon D-loop, complete sequence.");
1090  CheckDeflineMatches(seq, true);
1091  CheckDeflineMatchesWithDescr(seq, true);
1092 }
1093 
1094 
1096 {
1098  unit_test_util::SetTaxname(seq, "Cypripedium japonicum");
1100  rna->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
1101  rna->ResetComment();
1102  AddTitle(seq, "Cypripedium japonicum gene, complete sequence.");
1103  CheckDeflineMatches(seq, true);
1104  CheckDeflineMatchesWithDescr(seq, true);
1105 }
1106 
1108 {
1110  unit_test_util::SetTaxname(seq, "Burkholderia sp. FERM BP-3421");
1112  gene->ResetComment();
1113  gene->SetData().SetGene().SetLocus("fr9A");
1114  AddTitle(seq, "Burkholderia sp. FERM BP-3421 fr9A gene, complete sequence.");
1115  CheckDeflineMatches(seq, true);
1116  CheckDeflineMatchesWithDescr(seq, true);
1117 
1118  CRef<CSeq_feat> gene_cluster = unit_test_util::AddMiscFeature(seq);
1119  gene_cluster->SetComment("spliceostatin/FR901464 biosynthetic gene cluster");
1120 
1121  AddTitle(seq, "Burkholderia sp. FERM BP-3421 spliceostatin/FR901464 biosynthetic gene cluster, complete sequence.");
1122  CheckDeflineMatches(seq, true);
1123  CheckDeflineMatchesWithDescr(seq, true);
1124 
1125 }
1126 
1127 
1129 {
1133  gene->SetData().SetGene().SetLocus("matK");
1137  prot->SetData().SetProt().SetName().front() = "maturase K";
1138 
1139  CRef<CSeq_feat> intron = unit_test_util::AddGoodImpFeat (nuc, "intron");
1140  intron->SetLocation().SetInt().SetTo(nuc->GetSeq().GetLength() - 1);
1141  intron->SetLocation().SetPartialStart(true, eExtreme_Biological);
1142  intron->SetLocation().SetPartialStop(true, eExtreme_Biological);
1143  intron->SetPartial(true);
1145  gene2->SetData().SetGene().SetLocus("trnK");
1146  gene2->SetData().SetGene().SetDesc("tRNA-Lys");
1147  unit_test_util::AddFeat(gene2, nuc);
1148 
1149  AddTitle(nuc, "Sebaea microphylla tRNA-Lys (trnK) gene, partial sequence; and maturase K (matK) gene, complete cds.");
1150  CheckDeflineMatches(entry, true);
1151  CheckDeflineMatchesWithDescr(entry, true);
1152 }
1153 
1154 
1156 {
1157  // if second coding region does not have protein name, should still not be considered alternatively spliced
1162  gene1->SetData().SetGene().SetLocus("M1");
1163  unit_test_util::AddFeat(gene1, nuc);
1165  cds2->SetData().SetCdregion();
1166  cds2->ResetComment();
1167  cds2->SetLocation().SetInt().SetFrom(cds1->GetLocation().GetStart(eExtreme_Positional));
1168  cds2->SetLocation().SetInt().SetTo(nuc->GetSeq().GetInst().GetLength() - 1);
1170  gene2->SetData().SetGene().SetLocus("M2");
1171  unit_test_util::AddFeat(gene2, nuc);
1172  // make protein for second coding region, with no protein feature
1173  CRef<CSeq_entry> pentry(new CSeq_entry());
1174  pentry->SetSeq().SetInst().SetMol(CSeq_inst::eMol_aa);
1175  pentry->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
1176  pentry->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEIN");
1177  pentry->SetSeq().SetInst().SetLength(8);
1178 
1179  CRef<CSeq_id> pid(new CSeq_id());
1180  pid->SetLocal().SetStr ("prot2");
1181  pentry->SetSeq().SetId().push_back(pid);
1182  entry->SetSet().SetSeq_set().push_back(pentry);
1183  cds2->SetProduct().SetWhole().SetLocal().SetStr("prot2");
1184 
1185  AddTitle(nuc, "Sebaea microphylla M2 and fake protein name (M1) genes, complete cds.");
1186  CheckDeflineMatches(entry, true);
1187  CheckDeflineMatchesWithDescr(entry, true);
1188 }
1189 
1190 
1192 {
1193  // if second coding region does not have protein name, should still not be considered alternatively spliced
1198  gene1->SetData().SetGene().SetLocus("M1");
1199  unit_test_util::AddFeat(gene1, nuc);
1200 
1202  integron->SetData().SetImp().SetKey("mobile_element");
1203  CRef<CGb_qual> q(new CGb_qual());
1204  q->SetQual("mobile_element_type");
1205  q->SetVal("integron:class I");
1206  integron->SetQual().push_back(q);
1207  integron->SetLocation().SetInt().SetFrom(0);
1208  integron->SetLocation().SetInt().SetTo(nuc->GetSeq().GetLength() - 1);
1209 
1210  AddTitle(nuc, "Sebaea microphylla class I integron fake protein name (M1) gene, complete cds.");
1211  CheckDeflineMatches(entry, true);
1212  CheckDeflineMatchesWithDescr(entry, true);
1213 }
1214 
1215 
1217 {
1222  gene1->SetData().SetGene().SetLocus("gltB");
1223  gene1->SetData().SetGene().SetAllele("16");
1224  unit_test_util::AddFeat(gene1, nuc);
1225 
1226  AddTitle(nuc, "Sebaea microphylla fake protein name (gltB) gene, gltB-16 allele, complete cds.");
1227  CheckDeflineMatches(entry, true);
1228  CheckDeflineMatchesWithDescr(entry, true);
1229 }
1230 
1231 
1232 BOOST_AUTO_TEST_CASE(Test_SQD_2075)
1233 {
1236  misc->SetComment("contains tRNA-Pro and control region");
1237  misc->SetLocation().SetPartialStart(true, eExtreme_Biological);
1238  misc->SetLocation().SetPartialStop(true, eExtreme_Biological);
1239  AddTitle(seq, "Sebaea microphylla tRNA-Pro gene and control region, partial sequence.");
1240  CheckDeflineMatches(seq, true);
1241  CheckDeflineMatchesWithDescr(seq, true);
1242 }
1243 
1244 
1245 BOOST_AUTO_TEST_CASE(Test_SQD_2115)
1246 {
1249  promoter->ResetComment();
1250  promoter->SetData().SetImp().SetKey("regulatory");
1251  CRef<CGb_qual> q(new CGb_qual());
1252  q->SetQual("regulatory_class");
1253  q->SetVal("promoter");
1254  promoter->SetQual().push_back(q);
1255  AddTitle(seq, "Sebaea microphylla promoter region.");
1256  CheckDeflineMatches(seq, true);
1257  CheckDeflineMatchesWithDescr(seq, true);
1258 
1260  gene->SetData().SetGene().SetLocus("chs");
1261  unit_test_util::AddFeat(gene, seq);
1262 
1263  AddTitle(seq, "Sebaea microphylla chs gene, promoter region.");
1264  CheckDeflineMatches(seq, true);
1265  CheckDeflineMatchesWithDescr(seq, true);
1266 
1267 }
1268 
1269 
1271 {
1274  misc1->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
1275  misc1->SetComment("contains 18S ribosomal RNA and internal transcribed spacer 1");
1276  misc1->SetLocation().SetInt().SetFrom(0);
1277  misc1->SetLocation().SetInt().SetTo(15);
1278  misc1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1279 
1281  rna->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
1282  rna->SetData().SetRna().SetExt().SetName("5.8S ribosomal RNA");
1283  rna->SetLocation().SetInt().SetFrom(16);
1284  rna->SetLocation().SetInt().SetTo(19);
1285 
1287  misc2->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
1288  misc2->SetComment("contains internal transcribed spacer 2 and 28S ribosomal RNA");
1289  misc2->SetLocation().SetInt().SetFrom(20);
1290  misc2->SetLocation().SetInt().SetTo(35);
1291  misc2->SetLocation().SetPartialStop(true, eExtreme_Biological);
1292 
1293  AddTitle(seq, "Sebaea microphylla 18S ribosomal RNA gene, partial \
1294 sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and \
1295 internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA \
1296 gene, partial sequence.");
1297  CheckDeflineMatches(seq, true);
1298  CheckDeflineMatchesWithDescr(seq, true);
1299 }
1300 
1301 
1302 BOOST_AUTO_TEST_CASE(Test_SQD_2118)
1303 {
1306  misc1->SetComment("contains tRNA-Thr, tRNA-Pro, and control region");
1307  misc1->SetLocation().SetInt().SetFrom(0);
1308  misc1->SetLocation().SetInt().SetTo(15);
1309  misc1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1310  misc1->SetLocation().SetPartialStop(true, eExtreme_Biological);
1311 
1312  AddTitle(seq, "Sebaea microphylla tRNA-Thr gene, partial sequence; \
1313 tRNA-Pro gene, complete sequence; and control region, partial sequence.");
1314  CheckDeflineMatches(seq, true);
1315  CheckDeflineMatchesWithDescr(seq, true);
1316 
1317 }
1318 
1319 
1321 {
1324  misc1->SetComment("nonfunctional xyz due to argle");
1325  misc1->SetLocation().SetInt().SetFrom(0);
1326  misc1->SetLocation().SetInt().SetTo(15);
1327  misc1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1328  misc1->SetLocation().SetPartialStop(true, eExtreme_Biological);
1329 
1330  // kept because the misc_feature is alone
1331  AddTitle(seq, "Sebaea microphylla nonfunctional xyz gene, partial sequence.");
1334 
1335  AddTitle(seq, "Sebaea microphylla nonfunctional xyz gene, partial sequence.");
1338 
1339  AddTitle(seq, "Sebaea microphylla nonfunctional xyz due to argle genomic sequence.");
1342 
1343 
1344  misc1->SetComment("similar to xyz");
1345  AddTitle(seq, "Sebaea microphylla xyz-like gene, partial sequence.");
1348 
1349 }
1350 
1351 
1352 void s_SetProteinName(CRef<CSeq_entry> prot, const string& name)
1353 {
1354  prot->SetSeq().SetAnnot().front()->SetData().SetFtable().front()->SetData().SetProt().SetName().front() = name;
1355 }
1356 
1357 
1358 CRef<CSeq_feat> s_AddCDS(CRef<CSeq_entry> np, const string& name, TSeqPos from, TSeqPos to)
1359 {
1360  CRef<CSeq_entry> prev_prot = np->SetSet().SetSeq_set().back();
1361  CRef<CSeq_entry> new_prot (new CSeq_entry());
1362  new_prot->Assign(*prev_prot);
1363  CRef<CSeq_id> new_id(new CSeq_id());
1364  new_id->Assign(*(prev_prot->GetSeq().GetId().front()));
1365  size_t pos = NStr::Find(new_id->GetLocal().GetStr(), "_");
1366  string prefix = new_id->GetLocal().GetStr().substr(0, pos+ 1);
1367  string suffix = new_id->GetLocal().GetStr().substr(pos + 1);
1368  int prev_offset = NStr::StringToInt(suffix);
1369  new_id->SetLocal().SetStr(prefix + NStr::NumericToString(prev_offset + 1));
1370  unit_test_util::ChangeId(new_prot, new_id);
1371  s_SetProteinName(new_prot, name);
1372  np->SetSet().SetSeq_set().push_back(new_prot);
1373 
1374  CRef<CSeq_feat> prev_cds = np->SetSet().SetAnnot().front()->SetData().SetFtable().back();
1375  CRef<CSeq_feat> new_cds(new CSeq_feat());
1376  new_cds->Assign(*prev_cds);
1377  new_cds->SetProduct().SetWhole().Assign(*new_id);
1378  new_cds->SetLocation().SetInt().SetFrom(from);
1379  new_cds->SetLocation().SetInt().SetTo(to);
1380  np->SetSet().SetAnnot().front()->SetData().SetFtable().push_back(new_cds);
1381  return new_cds;
1382 }
1383 
1384 
1386 {
1391 
1392  unit_test_util::ChangeId(prot1, "_1");
1393  cds1->SetLocation().SetInt().SetFrom(0);
1394  cds1->SetLocation().SetInt().SetTo(5);
1395  cds1->SetProduct().SetWhole().Assign(*(prot1->GetSeq().GetId().front()));
1396  s_SetProteinName(prot1, "RNA-dependent RNA polymerase");
1397 
1398  CRef<CSeq_feat> cds2 = s_AddCDS(entry, "Coat protein", 10, 25);
1399  CRef<CSeq_feat> cds3 = s_AddCDS(entry, "Movement protein", 12, 20);
1400 
1401  cds1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1402 
1403  AddTitle(nuc, "Sebaea microphylla RNA-dependent RNA polymerase gene, partial cds; and Coat protein and Movement protein genes, complete cds.");
1404  CheckDeflineMatches(entry, true);
1405  CheckDeflineMatchesWithDescr(entry, true);
1406 
1407 
1408  // actual splicing
1409  cds2->SetLocation().Assign(*(unit_test_util::MakeMixLoc(nuc->GetSeq().GetId().front())));
1410  cds3->SetLocation().Assign(cds2->GetLocation());
1411  TSeqPos old_end = cds3->GetLocation().GetMix().Get().back()->GetInt().GetTo();
1412  cds3->SetLocation().SetMix().Set().back()->SetInt().SetTo(old_end + 2);
1413 
1414  AddTitle(nuc, "Sebaea microphylla protein gene, complete cds, alternatively spliced; and RNA-dependent RNA polymerase gene, partial cds.");
1415  CheckDeflineMatches(entry, true);
1416  CheckDeflineMatchesWithDescr(entry, true);
1417 
1418 }
1419 
1420 
1422 {
1427 
1428  unit_test_util::ChangeId(prot1, "_1");
1429  cds1->SetLocation().SetInt().SetFrom(0);
1430  cds1->SetLocation().SetInt().SetTo(5);
1431  cds1->SetProduct().SetWhole().Assign(*(prot1->GetSeq().GetId().front()));
1432  s_SetProteinName(prot1, "RNA-dependent RNA polymerase");
1433 
1434  CRef<CSeq_feat> cds2 = s_AddCDS(entry, "Coat protein", 10, 25);
1435  CRef<CSeq_feat> cds3 = s_AddCDS(entry, "Movement protein", 12, 20);
1436 
1437  cds1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1438  cds2->SetLocation().Assign(*(unit_test_util::MakeMixLoc(nuc->GetSeq().GetId().front())));
1439  cds3->SetLocation().Assign(cds2->GetLocation());
1440  TSeqPos old_end = cds3->GetLocation().GetMix().Get().back()->GetInt().GetTo();
1441  cds3->SetLocation().SetMix().Set().back()->SetInt().SetTo(old_end + 2);
1442 
1443  unit_test_util::SetDiv(entry, "VRL");
1444 
1445  AddTitle(nuc, "Sebaea microphylla Movement protein and Coat protein genes, complete cds; and RNA-dependent RNA polymerase gene, partial cds.");
1446  CheckDeflineMatches(entry, true);
1447  CheckDeflineMatchesWithDescr(entry, true);
1448 }
1449 
1450 
1452 {
1455  misc1->ResetComment();
1456  misc1->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
1457  misc1->SetData().SetRna().SetExt().SetName("28S ribosomal RNA");
1458  misc1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1459  misc1->SetLocation().SetPartialStop(true, eExtreme_Biological);
1462 
1463  AddTitle(seq, "Sebaea microphylla isolate JU6 clone 1 28S ribosomal RNA gene, partial sequence.");
1464 
1465  vector<CSubSource::ESubtype> subsrcs;
1466  vector<COrgMod::ESubtype> orgmods;
1467  subsrcs.push_back(CSubSource::eSubtype_clone);
1468  orgmods.push_back(COrgMod::eSubtype_isolate);
1469 
1470  CheckDeflineMatches(seq, subsrcs, orgmods);
1471  CheckDeflineMatches(seq, subsrcs, orgmods, true);
1472 }
1473 
1474 BOOST_AUTO_TEST_CASE(Test_SQD_2181)
1475 {
1480  misc1->ResetComment();
1481  misc1->SetData().SetImp().SetKey("regulatory");
1482  CRef<CGb_qual> q(new CGb_qual());
1483  q->SetQual("regulatory_class");
1484  q->SetVal("promoter");
1485  misc1->SetQual().push_back(q);
1486 
1487  AddTitle(nuc, "Sebaea microphylla fake protein name gene, promoter region and complete cds.");
1488 
1489  auto sources = s_GatherSources(*entry);
1490  {
1492 
1493  CRef<CScope> scope(new CScope(*object_manager));
1494  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1495 
1496  CAutoDefWithTaxonomy autodef;
1497 
1498  // add to autodef
1499  autodef.AddSources(seh);
1500 
1502 
1505  autodef.SetUseFakePromoters(true);
1506 
1507  CheckDeflineMatches(seh, autodef, mod_combo);
1508  }
1509  {
1510  CAutoDefWithTaxonomy autodef;
1511 
1512  // add to autodef
1513  autodef.AddDescriptors(sources);
1514 
1516 
1519  autodef.SetUseFakePromoters(true);
1520 
1522 
1523  CRef<CScope> scope(new CScope(*object_manager));
1524  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1525  CheckDeflineMatches(seh, autodef, mod_combo);
1526  }
1527 }
1528 
1529 
1531 {
1534 
1536  AddTitle(nuc, "Sebaea microphylla culture ATCC:12345 fake protein name gene, complete cds.");
1537 
1538  vector<CSubSource::ESubtype> subsrcs;
1539  vector<COrgMod::ESubtype> orgmods;
1540  orgmods.push_back(COrgMod::eSubtype_culture_collection);
1541 
1542  CheckDeflineMatches(entry, subsrcs, orgmods);
1543  CheckDeflineMatches(entry, subsrcs, orgmods, true);
1544 }
1545 
1547 {
1551  cds->SetLocation().SetInt().SetFrom(20);
1552  cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
1554  intron->SetData().SetImp().SetKey("intron");
1555  intron->SetLocation().SetInt().SetFrom(0);
1556  intron->SetLocation().SetInt().SetTo(19);
1557  intron->SetLocation().SetPartialStart(true, eExtreme_Biological);
1558  intron->ResetComment();
1559  intron->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("number", "2")));
1561  gene->SetData().SetGene().SetLocus("GAPDH");
1562  gene->SetLocation().SetInt().SetFrom(0);
1563  gene->SetLocation().SetInt().SetTo(cds->GetLocation().GetInt().GetTo());
1564  gene->SetLocation().SetPartialStart(true, eExtreme_Biological);
1565  gene->ResetComment();
1566 
1567  AddTitle(nuc, "Sebaea microphylla fake protein name (GAPDH) gene, intron 2 and partial cds.");
1568 
1569  auto sources = s_GatherSources(*entry);
1570  {
1572 
1573  CRef<CScope> scope(new CScope(*object_manager));
1574  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1575 
1576  CAutoDefWithTaxonomy autodef;
1577 
1578  // add to autodef
1579  autodef.AddSources(seh);
1580  autodef.SetKeepIntrons(true);
1581 
1582  CRef<CAutoDefModifierCombo> mod_combo;
1583  mod_combo = autodef.FindBestModifierCombo();
1584 
1585  CheckDeflineMatches(seh, autodef, mod_combo);
1586  }
1587  {
1588  CAutoDefWithTaxonomy autodef;
1589 
1590  // add to autodef
1591  autodef.AddDescriptors(sources);
1592  autodef.SetKeepIntrons(true);
1593 
1594  CRef<CAutoDefModifierCombo> mod_combo;
1595  mod_combo = autodef.FindBestModifierCombo();
1596 
1598  CRef<CScope> scope(new CScope(*object_manager));
1599  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1600 
1601  CheckDeflineMatches(seh, autodef, mod_combo);
1602  }
1603 }
1604 
1605 
1607 {
1611  cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
1613  spacer->SetComment("G-L intergenic spacer");
1614  spacer->SetLocation().SetInt().SetFrom(cds->SetLocation().GetStart(eExtreme_Biological));
1615  spacer->SetLocation().SetInt().SetTo(cds->SetLocation().GetStart(eExtreme_Biological) + 2);
1616  spacer->SetLocation().SetPartialStop(true, eExtreme_Biological);
1618  gene->SetData().SetGene().SetLocus("G");
1620 
1621 
1622  AddTitle(nuc, "Sebaea microphylla fake protein name (G) gene, partial cds; and G-L intergenic spacer, partial sequence.");
1623 
1624  CheckDeflineMatches(entry);
1626 
1628  nuc->SetSeq().SetInst().SetMol(CSeq_inst::eMol_rna);
1629 
1630  CheckDeflineMatches(entry);
1632 
1633 }
1634 
1635 BOOST_AUTO_TEST_CASE(Test_SQD_2370)
1636 {
1639  misc1->SetComment("atpB-rbcL intergenic spacer region");
1640 
1641  AddTitle(seq, "Sebaea microphylla atpB-rbcL intergenic spacer region, complete sequence.");
1642 
1643  CheckDeflineMatches(seq);
1645 }
1646 
1647 
1649 {
1651  unit_test_util::SetTaxname(seq, "Trichoderma sp. FPZSP372");
1653  AddTitle(seq, "Trichoderma sp. FPZSP372 sequence.");
1654 
1655  vector<CSubSource::ESubtype> subsrcs;
1656  vector<COrgMod::ESubtype> orgmods;
1657  orgmods.push_back(COrgMod::eSubtype_isolate);
1658 
1659  CheckDeflineMatches(seq, subsrcs, orgmods);
1660  CheckDeflineMatches(seq, subsrcs, orgmods, true);
1661 
1662  // Try again, but deliberately allow modifier that includes taxname to be included
1663  AddTitle(seq, "Trichoderma sp. FPZSP372 isolate FPZSP37 sequence.");
1664  auto sources = s_GatherSources(*seq);
1665 
1666  {
1668 
1669  CRef<CScope> scope(new CScope(*object_manager));
1670  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*seq);
1671 
1672  CAutoDefWithTaxonomy autodef;
1673 
1674  // add to autodef
1675  autodef.AddSources(seh);
1676 
1677  CRef<CAutoDefModifierCombo> mod_combo;
1678  mod_combo = new CAutoDefModifierCombo();
1679  mod_combo->SetUseModifierLabels(true);
1680  mod_combo->SetAllowModAtEndOfTaxname(true);
1681  mod_combo->SetExcludeSpOrgs(false);
1682  ITERATE(vector<CSubSource::ESubtype>, it, subsrcs) {
1683  mod_combo->AddSubsource(*it, true);
1684  }
1685  ITERATE(vector<COrgMod::ESubtype>, it, orgmods) {
1686  mod_combo->AddOrgMod(*it, true);
1687  }
1688 
1691 
1692  CheckDeflineMatches(seh, autodef, mod_combo);
1693  }
1694  {
1695  CAutoDefWithTaxonomy autodef;
1696 
1697  // add to autodef
1698  autodef.AddDescriptors(sources);
1699 
1700  CRef<CAutoDefModifierCombo> mod_combo;
1701  mod_combo = new CAutoDefModifierCombo();
1702  mod_combo->SetUseModifierLabels(true);
1703  mod_combo->SetAllowModAtEndOfTaxname(true);
1704  mod_combo->SetExcludeSpOrgs(false);
1705  ITERATE(vector<CSubSource::ESubtype>, it, subsrcs) {
1706  mod_combo->AddSubsource(*it, true);
1707  }
1708  ITERATE(vector<COrgMod::ESubtype>, it, orgmods) {
1709  mod_combo->AddOrgMod(*it, true);
1710  }
1711 
1714 
1716  CRef<CScope> scope(new CScope(*object_manager));
1717  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*seq);
1718 
1719  CheckDeflineMatches(seh, autodef, mod_combo);
1720  }
1721 }
1722 
1723 BOOST_AUTO_TEST_CASE(Test_SQD_3440)
1724 {
1725  CAutoDefOptions options;
1726  CAutoDefModifierCombo combo;
1727  combo.InitOptions(options);
1728 
1729  CRef<CUser_object> user = options.MakeUserObject();
1730  BOOST_CHECK_EQUAL(user->GetObjectType(), CUser_object::eObjectType_AutodefOptions);
1731  options.SetUseLabels();
1732  user = options.MakeUserObject();
1733  CheckAutoDefOptions(*user, options);
1734 }
1735 
1736 
1737 BOOST_AUTO_TEST_CASE(Test_RemovableuORF)
1738 {
1739  CRef<CSeq_entry> entry = BuildNucProtSet("uORF");
1740  CRef<CSeqdesc> desc = AddSource(entry, "Alcanivorax sp. HA03");
1742  AddTitle(nuc, "Alcanivorax sp. HA03 uORF gene, complete cds.");
1743 
1744  CheckDeflineMatches(entry);
1746 
1747  // try again, with another feature present, so uORF isn't lonely
1749  misc->SetData().SetImp().SetKey("repeat_region");
1750  CRef<CGb_qual> q(new CGb_qual("satellite", "x"));
1751  misc->SetQual().push_back(q);
1752  AddTitle(nuc, "Alcanivorax sp. HA03 satellite x sequence.");
1753  CheckDeflineMatches(entry);
1755 
1756  auto sources = s_GatherSources(*entry);
1757  {
1758  // try again, but set keepORFs flag
1760 
1761  CRef<CScope> scope(new CScope(*object_manager));
1762  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1763 
1764  CAutoDefWithTaxonomy autodef;
1765 
1766  // add to autodef
1767  autodef.AddSources(seh);
1768 
1769  CRef<CAutoDefModifierCombo> mod_combo;
1770  mod_combo = new CAutoDefModifierCombo();
1771 
1774  autodef.SetKeepuORFs(true);
1775 
1776  AddTitle(nuc, "Alcanivorax sp. HA03 uORF gene, complete cds; and satellite x sequence.");
1777  CheckDeflineMatches(seh, autodef, mod_combo);
1778  }
1779  {
1780  // try again, but set keepORFs flag
1781  CAutoDefWithTaxonomy autodef;
1782 
1783  // add to autodef
1784  autodef.AddDescriptors(sources);
1785 
1786  CRef<CAutoDefModifierCombo> mod_combo;
1787  mod_combo = new CAutoDefModifierCombo();
1788 
1791  autodef.SetKeepuORFs(true);
1792 
1794  CRef<CScope> scope(new CScope(*object_manager));
1795  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1796  CheckDeflineMatches(seh, autodef, mod_combo);
1797  }
1798 
1799 }
1800 
1801 BOOST_AUTO_TEST_CASE(Test_RemovableMobileElement)
1802 {
1803  // first, try with lonely optional
1806  mob_el->SetData().SetImp().SetKey("mobile_element");
1807  CRef<CGb_qual> met(new CGb_qual("mobile_element_type", "SINE:x"));
1808  mob_el->SetQual().push_back(met);
1809  AddTitle(entry, "Sebaea microphylla SINE x, complete sequence.");
1810 
1811  CheckDeflineMatches(entry);
1813 
1814  // try again, with another feature present, so element isn't lonely
1816  misc->SetData().SetImp().SetKey("repeat_region");
1817  CRef<CGb_qual> q(new CGb_qual("satellite", "y"));
1818  misc->SetQual().push_back(q);
1819  misc->SetLocation().SetInt().SetFrom(0);
1820  misc->SetLocation().SetInt().SetTo(10);
1821  mob_el->SetLocation().SetInt().SetFrom(15);
1822  mob_el->SetLocation().SetInt().SetTo(20);
1823  AddTitle(entry, "Sebaea microphylla satellite y sequence.");
1824  CheckDeflineMatches(entry);
1826 
1827  auto sources = s_GatherSources(*entry);
1828 
1829  {
1830  // try again, but set keepMobileElements flag
1832 
1833  CRef<CScope> scope(new CScope(*object_manager));
1834  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1835 
1836  CAutoDefWithTaxonomy autodef;
1837 
1838  // add to autodef
1839  autodef.AddSources(seh);
1840 
1841  CRef<CAutoDefModifierCombo> mod_combo;
1842  mod_combo = new CAutoDefModifierCombo();
1843 
1846  autodef.SetKeepOptionalMobileElements(true);
1847 
1848  AddTitle(entry, "Sebaea microphylla satellite y sequence; and SINE x, complete sequence.");
1849  CheckDeflineMatches(seh, autodef, mod_combo);
1850 
1851  // keep non-optional mobile element when not lonely and flag not set
1852  met->SetVal("transposon:z");
1853  autodef.SetKeepOptionalMobileElements(false);
1854  AddTitle(entry, "Sebaea microphylla satellite y sequence; and transposon z, complete sequence.");
1855  CheckDeflineMatches(seh, autodef, mod_combo);
1856  }
1857  {
1858  // try again, but set keepMobileElements flag
1859  CAutoDefWithTaxonomy autodef;
1860 
1861  // add to autodef
1862  autodef.AddDescriptors(sources);
1863 
1864  CRef<CAutoDefModifierCombo> mod_combo;
1865  mod_combo = new CAutoDefModifierCombo();
1866 
1869  autodef.SetKeepOptionalMobileElements(true);
1870 
1871  met->SetVal("SINE:x");
1872  AddTitle(entry, "Sebaea microphylla satellite y sequence; and SINE x, complete sequence.");
1873 
1875  CRef<CScope> scope(new CScope(*object_manager));
1876  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1877  CheckDeflineMatches(seh, autodef, mod_combo);
1878 
1879  // keep non-optional mobile element when not lonely and flag not set
1880  met->SetVal("transposon:z");
1881  autodef.SetKeepOptionalMobileElements(false);
1882  AddTitle(entry, "Sebaea microphylla satellite y sequence; and transposon z, complete sequence.");
1883  CheckDeflineMatches(seh, autodef, mod_combo);
1884  }
1885 
1886 }
1887 
1889 {
1890  CRef<CSeq_entry> entry = BuildNucProtSet("rhodanese-related sulfurtransferase");
1891  CRef<CSeqdesc> desc = AddSource(entry, "Coxiella burnetii");
1893  CRef<CSeq_feat> gene(new CSeq_feat());
1894  gene->SetData().SetGene().SetLocus_tag("CBU_0065");
1895  AddFeat(gene, nuc);
1897  cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
1898  gene->SetLocation().SetPartialStart(true, eExtreme_Biological);
1899  AddTitle(nuc, "Coxiella burnetii rhodanese-related sulfurtransferase (CBU_0065) gene, partial cds.");
1900  CheckDeflineMatches(entry);
1902 }
1903 
1905 {
1906  CRef<CSeq_entry> entry = BuildNucProtSet("hypothetical protein");
1907  CRef<CSeqdesc> desc = AddSource(entry, "Coxiella burnetii");
1910  CRef<CSeq_feat> gene(new CSeq_feat());
1911  gene->SetData().SetGene().SetLocus_tag("CBU_0067");
1912  AddFeat(gene, nuc);
1913  gene->SetLocation().Assign(cds->GetLocation());
1914 
1916  cds2->SetLocation().SetInt().SetFrom(5);
1917  unit_test_util::AddFeat(cds2, entry);
1919  entry->SetSet().SetSeq_set().push_back(pentry);
1920  pentry->SetSeq().SetAnnot().front()->SetData().SetFtable().front()->SetData().SetProt().SetName().front() = "hypothetical protein";
1921  CRef<CSeq_feat> gene2(new CSeq_feat());
1922  gene2->SetData().SetGene().SetLocus_tag("CBU_0068");
1923  AddFeat(gene2, nuc);
1924  gene2->SetLocation().Assign(cds2->GetLocation());
1925 
1926  AddTitle(nuc, "Coxiella burnetii hypothetical protein (CBU_0067) and hypothetical protein (CBU_0068) genes, complete cds.");
1927  CheckDeflineMatches(entry);
1929 
1930  auto sources = s_GatherSources(*entry);
1931 
1932  {
1933  // try again, but suppress genes
1935 
1936  CRef<CScope> scope(new CScope(*object_manager));
1937  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1938 
1939  CAutoDefWithTaxonomy autodef;
1940 
1941  // add to autodef
1942  autodef.AddSources(seh);
1943 
1944  CRef<CAutoDefModifierCombo> mod_combo;
1945  mod_combo = new CAutoDefModifierCombo();
1946 
1948 
1949  AddTitle(nuc, "Coxiella burnetii hypothetical protein genes, complete cds.");
1950  CheckDeflineMatches(seh, autodef, mod_combo);
1951  }
1952  {
1953  // try again, but suppress genes
1954  CAutoDefWithTaxonomy autodef;
1955 
1956  // add to autodef
1957  autodef.AddDescriptors(sources);
1958 
1959  CRef<CAutoDefModifierCombo> mod_combo;
1960  mod_combo = new CAutoDefModifierCombo();
1961 
1963 
1965  CRef<CScope> scope(new CScope(*object_manager));
1966  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1967  CheckDeflineMatches(seh, autodef, mod_combo);
1968  }
1969 }
1970 
1971 
1973 {
1974  CRef<CSeq_entry> entry = BuildNucProtSet("hypothetical protein");
1975  CRef<CSeqdesc> desc = AddSource(entry, "Coxiella burnetii");
1977 
1979  cds3->SetLocation().SetInt().SetFrom(5);
1980  unit_test_util::AddFeat(cds3, entry);
1982  entry->SetSet().SetSeq_set().push_back(pentry);
1983  pentry->SetSeq().SetAnnot().front()->SetData().SetFtable().front()->SetData().SetProt().SetName().front() = "hypothetical protein";
1984 
1985  AddTitle(nuc, "Coxiella burnetii hypothetical protein genes, complete cds.");
1986  CheckDeflineMatches(entry);
1988 
1989  // try again, but with intervening non-hypothetical protein gene
1991  cds2->SetLocation().SetInt().SetFrom(3);
1992  unit_test_util::AddFeat(cds2, entry);
1994  entry->SetSet().SetSeq_set().push_back(pentry2);
1995  pentry2->SetSeq().SetAnnot().front()->SetData().SetFtable().front()->SetData().SetProt().SetName().front() = "fake protein";
1996 
1997  AddTitle(nuc, "Coxiella burnetii hypothetical protein, fake protein, and hypothetical protein genes, complete cds.");
1998  CheckDeflineMatches(entry);
2000 
2001 }
2002 
2003 
2005 {
2006  CRef<CSeq_entry> entry = BuildNucProtSet("brahma protein");
2007  CRef<CSeqdesc> desc = AddSource(entry, "Anas castanea");
2010  cds->SetLocation().SetInt().SetTo(8);
2011  cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
2012  cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2015  exon->ResetComment();
2016  exon->SetData().SetImp().SetKey("exon");
2017  exon->SetLocation().SetInt().SetFrom(0);
2018  exon->SetLocation().SetInt().SetTo(8);
2019  CRef<CGb_qual> exon_number(new CGb_qual("number", "15"));
2020  exon->SetQual().push_back(exon_number);
2022  intron->ResetComment();
2023  intron->SetData().SetImp().SetKey("intron");
2024  intron->SetLocation().SetInt().SetFrom(9);
2025  intron->SetLocation().SetInt().SetTo(nuc->GetSeq().GetLength() - 1);
2026  CRef<CGb_qual> intron_number(new CGb_qual("number", "15"));
2027  intron->SetQual().push_back(intron_number);
2028 
2030  gene->ResetComment();
2031  gene->SetData().SetGene().SetLocus("BRM");
2032  gene->SetLocation().SetInt().SetTo(nuc->GetSeq().GetLength() - 1);
2033 
2034  AddTitle(nuc, "Anas castanea isolate DPIWECT127 brahma protein (BRM) gene, exon 15, intron 15, and partial cds.");
2035  auto sources = s_GatherSources(*entry);
2036  {
2037  CAutoDefWithTaxonomy autodef;
2038 
2040 
2041  CRef<CScope> scope(new CScope(*object_manager));
2042  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2043  autodef.AddSources(seh);
2044  autodef.SetKeepExons(true);
2045  autodef.SetKeepIntrons(true);
2046 
2048  mod_combo->AddOrgMod(COrgMod::eSubtype_isolate, true);
2049  mod_combo->SetUseModifierLabels(true);
2050 
2051 
2052  CheckDeflineMatches(seh, autodef, mod_combo);
2053  }
2054  {
2055  CAutoDefWithTaxonomy autodef;
2056 
2057  autodef.AddDescriptors(sources);
2058  autodef.SetKeepExons(true);
2059  autodef.SetKeepIntrons(true);
2060 
2062  mod_combo->AddOrgMod(COrgMod::eSubtype_isolate, true);
2063  mod_combo->SetUseModifierLabels(true);
2064 
2066  CRef<CScope> scope(new CScope(*object_manager));
2067  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2068  CheckDeflineMatches(seh, autodef, mod_combo);
2069  }
2070 
2071 }
2072 
2073 BOOST_AUTO_TEST_CASE(Test_IsModifierInString)
2074 {
2075  // in the string, but ignore because it's at the end
2076  BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "abc", true), false);
2077  // in the string, report even at end
2078  BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "abc", false), true);
2079  // ignore because not whole word
2080  BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "tabc", false), false);
2081  // ignore because not whole word
2082  BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "abcq", false), false);
2083  // skip first match because not whole word, find second match
2084  BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "abcq abc", false), true);
2085 
2086 
2087 }
2088 
2089 
2090 BOOST_AUTO_TEST_CASE(Test_IsUsableInDefline)
2091 {
2096 }
2097 
2098 
2100 {
2101  // first, try with lonely optional
2104  miscrna->SetData().SetRna().SetType(CRNA_ref::eType_other);
2105  string remainder;
2106  miscrna->SetData().SetRna().SetRnaProductName("trans-spliced leader sequence SL", remainder);
2107  miscrna->SetComment("mini-exon");
2108  AddTitle(entry, "Sebaea microphylla trans-spliced leader sequence SL gene, complete sequence.");
2109 
2110  CheckDeflineMatches(entry);
2112 }
2113 
2114 
2115 BOOST_AUTO_TEST_CASE(Test_TargetedLocusName)
2116 {
2117  CAutoDefOptions options;
2118  options.SetTargetedLocusName("consensus string");
2119  BOOST_CHECK_EQUAL(options.GetTargetedLocusName(), "consensus string");
2120  CRef<CUser_object> user = options.MakeUserObject();
2121  BOOST_CHECK_EQUAL(HasStringField(*user, "Targeted Locus Name", "consensus string"), 1);
2122 
2123  options.SetTargetedLocusName("other");
2124  BOOST_CHECK_EQUAL(options.GetTargetedLocusName(), "other");
2125  options.InitFromUserObject(*user);
2126  BOOST_CHECK_EQUAL(options.GetTargetedLocusName(), "consensus string");
2127 
2128 
2129 }
2130 
2131 
2132 BOOST_AUTO_TEST_CASE(Test_SQD_3602)
2133 {
2137  misc->SetComment("contains tRNA-Pro gene, control region, tRNA-Phe gene, and 12S ribosomal RNA gene");
2138  misc->SetLocation().SetPartialStart(true, eExtreme_Biological);
2139  misc->SetLocation().SetPartialStop(true, eExtreme_Biological);
2140  AddTitle(entry, "Sebaea microphylla tRNA-Pro gene, partial sequence; control region and tRNA-Phe gene, complete sequence; and 12S ribosomal RNA gene, partial sequence; mitochondrial.");
2141 
2142  CheckDeflineMatches(entry);
2144 }
2145 
2146 
2148 {
2152  misc->SetComment("contains 12S ribosomal RNA gene, tRNA-Val (trnV) gene, and 16S ribosomal RNA gene");
2153  misc->SetLocation().SetPartialStart(true, eExtreme_Biological);
2154  misc->SetLocation().SetPartialStop(true, eExtreme_Biological);
2155  AddTitle(entry, "Sebaea microphylla 12S ribosomal RNA gene, partial sequence; tRNA-Val (trnV) gene, complete sequence; and 16S ribosomal RNA gene, partial sequence; mitochondrial.");
2156 
2157  CheckDeflineMatches(entry);
2159 }
2160 
2161 
2163 {
2168  prot1->SetData().SetProt().SetName().front() = "hypothetical protein";
2170  cds2->SetData().SetCdregion();
2171  cds2->ResetComment();
2172  cds2->SetLocation().SetInt().SetFrom(cds1->GetLocation().GetStart(eExtreme_Positional));
2173  cds2->SetLocation().SetInt().SetTo(nuc->GetSeq().GetInst().GetLength() - 1);
2174 
2175  CRef<CSeq_entry> pentry(new CSeq_entry());
2176  pentry->SetSeq().SetInst().SetMol(CSeq_inst::eMol_aa);
2177  pentry->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
2178  pentry->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEIN");
2179  pentry->SetSeq().SetInst().SetLength(8);
2180 
2181  CRef<CSeq_id> pid(new CSeq_id());
2182  pid->SetLocal().SetStr("prot2");
2183  pentry->SetSeq().SetId().push_back(pid);
2184  entry->SetSet().SetSeq_set().push_back(pentry);
2185  cds2->SetProduct().SetWhole().SetLocal().SetStr("prot2");
2187  prot2->SetData().SetProt().SetName().front() = "hypothetical protein";
2188 
2189  AddTitle(nuc, "Sebaea microphylla hypothetical protein genes, complete cds.");
2190  CheckDeflineMatches(entry, true);
2191  CheckDeflineMatchesWithDescr(entry, true);
2192 
2193 }
2194 
2195 
2196 void MakeRegulatoryFeatureTest(const string& regulatory_class, const string& defline_interval, bool use_fake_promoters, bool keep_regulatory)
2197 {
2200  if (!NStr::IsBlank(regulatory_class)) {
2202  feat->SetData().SetImp().SetKey("regulatory");
2203  CRef<CGb_qual> q(new CGb_qual("regulatory_class", regulatory_class));
2204  feat->SetQual().push_back(q);
2205  }
2206  AddTitle(nuc, "Sebaea microphylla fake protein name gene, " + defline_interval);
2207 
2208  {
2209  CAutoDefWithTaxonomy autodef;
2211 
2213  CRef<CScope> scope(new CScope(*object_manager));
2214  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2215 
2216  autodef.AddSources(seh);
2217  if (use_fake_promoters) {
2218  autodef.SetUseFakePromoters(true);
2219  }
2220  if (keep_regulatory) {
2221  autodef.SetKeepRegulatoryFeatures(true);
2222  }
2223 
2224  CheckDeflineMatches(seh, autodef, mod_combo);
2225  scope->RemoveTopLevelSeqEntry(seh);
2226  }
2227  {
2228  CAutoDefWithTaxonomy autodef;
2230 
2231  auto sources = s_GatherSources(*entry);
2232  autodef.AddDescriptors(sources);
2233  if (use_fake_promoters) {
2234  autodef.SetUseFakePromoters(true);
2235  }
2236  if (keep_regulatory) {
2237  autodef.SetKeepRegulatoryFeatures(true);
2238  }
2239 
2241  CRef<CScope> scope(new CScope(*object_manager));
2242  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2243  CheckDeflineMatches(seh, autodef, mod_combo);
2244  scope->RemoveTopLevelSeqEntry(seh);
2245  }
2246 }
2247 
2248 
2250 {
2251  // a sequence with no promoter, but we set the FakePromoter flag
2252  MakeRegulatoryFeatureTest(kEmptyStr, "promoter region and complete cds.", true, false);
2253 
2254  // a sequence with a promoter, but no flags
2255  MakeRegulatoryFeatureTest("promoter", "complete cds.", false, false);
2256 
2257  // a sequence with a promoter, set the FakePromoter flag
2258  MakeRegulatoryFeatureTest("promoter", "promoter region and complete cds.", true, false);
2259 
2260  // a sequence with a promoter, set keep regulatory
2261  MakeRegulatoryFeatureTest("promoter", "promoter region and complete cds.", false, true);
2262 
2263  // a sequence with a promoter, set keep regulatory and FakePromoter
2264  MakeRegulatoryFeatureTest("promoter", "promoter region and complete cds.", true, true);
2265 
2266  // a sequence with an enhancer, but no flags
2267  MakeRegulatoryFeatureTest("enhancer", "complete cds.", false, false);
2268 
2269  // a sequence with an enhancer, set fake promoters flag
2270  MakeRegulatoryFeatureTest("enhancer", "promoter region and complete cds.", true, false);
2271 
2272  // a sequence with an enhancer, set keep regulatory
2273  MakeRegulatoryFeatureTest("enhancer", "enhancer and complete cds.", false, true);
2274 
2275 }
2276 
2277 
2278 BOOST_AUTO_TEST_CASE(Test_AutodefOptionsSpecifyNuclearCopyFlag)
2279 {
2280  CAutoDefOptions opts;
2281 
2283  CRef<CUser_object> user = opts.MakeUserObject();
2284  BOOST_CHECK_EQUAL(HasStringField(*user, "NuclearCopyFlag", "mitochondrion"), 1);
2285 
2286 }
2287 
2288 
2290 {
2293  misc->ResetComment();
2294  misc->SetData().SetImp().SetKey("repeat_region");
2295  CRef<CGb_qual> q(new CGb_qual("rpt_type", "long_terminal_repeat"));
2296  misc->SetQual().push_back(q);
2297  AddTitle(entry, "Sebaea microphylla LTR repeat region.");
2298 
2299  CheckDeflineMatches(entry);
2301 }
2302 
2303 
2305 {
2307  unit_test_util::SetSubSource(entry, CSubSource::eSubtype_other, "a; minicircle b; c");
2308  AddTitle(entry, "Sebaea microphylla minicircle b sequence.");
2309  CheckDeflineMatches(entry);
2310 
2311  AddTitle(entry, "Sebaea microphylla a minicircle b sequence.");
2312 
2313  vector<CSubSource::ESubtype> subsrcs;
2314  subsrcs.push_back(CSubSource::eSubtype_other);
2315  vector<COrgMod::ESubtype> orgmods;
2316  CheckDeflineMatches(entry, subsrcs, orgmods);
2317  CheckDeflineMatches(entry, subsrcs, orgmods, true);
2318 }
2319 
2320 
2321 void TestForRecomb(CRef<CSeq_entry> entry, const string& expected)
2322 {
2323  AddTitle(entry, expected);
2324 
2325  {
2326  CAutoDefWithTaxonomy autodef;
2328  CRef<CScope> scope(new CScope(*object_manager));
2329  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2330 
2331  autodef.AddSources(seh);
2334  autodef.SetKeepMiscRecomb(true);
2335 
2336  CheckDeflineMatches(seh, autodef, mod_combo);
2337  }
2338  {
2339  auto sources = s_GatherSources(*entry);
2340 
2341  CAutoDefWithTaxonomy autodef;
2342 
2343  autodef.AddDescriptors(sources);
2346  autodef.SetKeepMiscRecomb(true);
2347 
2349  CRef<CScope> scope(new CScope(*object_manager));
2350  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2351 
2352  CheckDeflineMatches(seh, autodef, mod_combo);
2353  }
2354 }
2355 
2356 
2358 {
2361  m->SetData().SetImp().SetKey("misc_recomb");
2362  m->SetComment("GCC2-ALK translocation breakpoint junction; microhomology");
2363 
2364  // by default, misc_recomb not included
2365  AddTitle(entry, "Sebaea microphylla sequence.");
2366  CheckDeflineMatches(entry);
2368 
2369  // use option to show misc_recomb
2370  TestForRecomb(entry, "Sebaea microphylla GCC2-ALK translocation breakpoint junction genomic sequence.");
2371 
2372  // prefer recombination_class qualifier
2373  m->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("recombination_class", "mitotic_recombination")));
2374  TestForRecomb(entry, "Sebaea microphylla mitotic_recombination genomic sequence.");
2375 }
2376 
2377 
2379 {
2382  AddTitle(entry, "Sebaea microphylla special flower.");
2383  auto sources = s_GatherSources(*entry);
2384  {
2385  CAutoDefWithTaxonomy autodef;
2387  CRef<CScope> scope(new CScope(*object_manager));
2388  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2389 
2390  autodef.AddSources(seh);
2393  autodef.SetCustomFeatureClause("special flower");
2394  CheckDeflineMatches(seh, autodef, mod_combo);
2395  }
2396  {
2397  CAutoDefWithTaxonomy autodef;
2398 
2399  autodef.AddDescriptors(sources);
2402  autodef.SetCustomFeatureClause("special flower");
2403 
2405  CRef<CScope> scope(new CScope(*object_manager));
2406  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2407  CheckDeflineMatches(seh, autodef, mod_combo);
2408  }
2409 }
2410 
2411 
2412 BOOST_AUTO_TEST_CASE(Test_SQD_3914)
2413 {
2416  m->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
2417  m->SetComment("contains 16S-23S ribosomal RNA intergenic spacer, tRNA-Ile(trnI), and tRNA-Ala(trnA)");
2418  AddTitle(entry, "Sebaea microphylla 16S-23S ribosomal RNA intergenic spacer, tRNA-Ile (trnI) and tRNA-Ala (trnA) genes, complete sequence.");
2419  CheckDeflineMatches(entry);
2421 }
2422 
2423 
2424 BOOST_AUTO_TEST_CASE(Test_CAutoDefAvailableModifier_GetOrgModLabel)
2425 {
2451 }
2452 
2453 
2455 {
2458  utr3->SetLocation().SetInt().SetFrom(0);
2459  utr3->SetLocation().SetInt().SetTo(10);
2461  unit_test_util::AddFeat(gene1, entry);
2463  utr5->SetLocation().SetInt().SetFrom(20);
2464  utr5->SetLocation().SetInt().SetTo(25);
2466  unit_test_util::AddFeat(gene2, entry);
2467 
2468  string defline = "Sebaea microphylla gene locus gene, complete sequence.";
2469  AddTitle(entry, defline);
2470 
2471  {
2472  CAutoDefWithTaxonomy autodef;
2473 
2475 
2476  CRef<CScope> scope(new CScope(*object_manager));
2477  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2478  autodef.AddSources(seh);
2479  autodef.SetKeep3UTRs(true);
2480  autodef.SetKeep5UTRs(true);
2481 
2483  mod_combo->AddOrgMod(COrgMod::eSubtype_isolate, true);
2484  mod_combo->SetUseModifierLabels(true);
2485 
2486  defline = "Sebaea microphylla gene locus gene, 5' UTR and 3' UTR.";
2487  CheckDeflineMatches(seh, autodef, mod_combo);
2488  }
2489  {
2490  auto sources = s_GatherSources(*entry);
2491 
2492  CAutoDefWithTaxonomy autodef;
2493 
2494  autodef.AddDescriptors(sources);
2495  autodef.SetKeep3UTRs(true);
2496  autodef.SetKeep5UTRs(true);
2497 
2499  mod_combo->AddOrgMod(COrgMod::eSubtype_isolate, true);
2500  mod_combo->SetUseModifierLabels(true);
2501 
2503  CRef<CScope> scope(new CScope(*object_manager));
2504  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2505  CheckDeflineMatches(seh, autodef, mod_combo);
2506  }
2507 }
2508 
2510 {
2511  // suppress if no number
2515  cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2517  string defline = "Sebaea microphylla fake protein name gene, partial cds.";
2518  AddTitle(nuc, defline);
2519  CheckDeflineMatches(nps, true);
2520  CheckDeflineMatchesWithDescr(nps, true);
2521 
2522  // show if has number
2526  cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2528  defline = "Sebaea microphylla fake protein name gene, exon 1 and partial cds.";
2529  AddTitle(nuc, defline);
2530  CheckDeflineMatches(nps, true);
2531  CheckDeflineMatchesWithDescr(nps, true);
2532 
2533  // suppress if coding region complete
2534  cds->SetLocation().SetPartialStop(false, eExtreme_Biological);
2535  defline = "Sebaea microphylla fake protein name gene, complete cds.";
2536  AddTitle(nuc, defline);
2537  CheckDeflineMatches(nps, true);
2538  CheckDeflineMatchesWithDescr(nps, true);
2539 }
2540 
2541 
2543 {
2544  // nuclear gene for X product (but not for macronuclear)
2548  prot->SetData().SetProt().SetName().front() = "LIA2 macronuclear isoform";
2549 
2550  string defline = "Sebaea microphylla LIA2 macronuclear isoform gene, complete cds.";
2551  AddTitle(nuc, defline);
2552  CheckDeflineMatches(nps, true);
2553  CheckDeflineMatchesWithDescr(nps, true);
2554 
2555  // apicoplast
2556  prot->SetData().SetProt().SetName().front() = "LIA2 apicoplast protein";
2557  defline = "Sebaea microphylla LIA2 apicoplast protein gene, complete cds; nuclear gene for apicoplast product.";
2558  AddTitle(nuc, defline);
2559  CheckDeflineMatches(nps, true);
2560  CheckDeflineMatchesWithDescr(nps, true);
2561 
2562 }
2563 
2564 
2565 BOOST_AUTO_TEST_CASE(Test_SQD_4185)
2566 {
2568  unit_test_util::SetTaxname(seq, "Influenza A virus (A/USA/RVD1_H1/2011(H1N1))");
2570  string defline = "Influenza A virus (A/USA/RVD1_H1/2011(H1N1)) segment 4 hemagglutinin (HA) gene, complete cds.";
2572  AddTitle(nuc, defline);
2573  unit_test_util::SetNucProtSetProductName(seq, "hemagglutinin");
2574  CRef<CSeq_feat> gene(new CSeq_feat());
2575  gene->SetData().SetGene().SetLocus("HA");
2576  AddFeat(gene, nuc);
2577 
2578  CheckDeflineMatches(seq, true);
2579  CheckDeflineMatchesWithDescr(seq, true);
2580 }
2581 
2582 
2584 {
2585  // do not include notes in deflines when calculating uniqueness
2587  vector<string> notes = { "a", "b", "c" };
2588  vector<string>::iterator nit = notes.begin();
2590  AddTitle(*it, "Sebaea microphylla sequence.");
2592  ++nit;
2593  }
2594  entry->SetSet().ResetDescr();
2595  AddTitle(entry, "Sebaea microphylla sequence.");
2596 
2598  CRef<CScope> scope(new CScope(*object_manager));
2599  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2600 
2601  CAutoDef autodef;
2602  autodef.AddSources(seh);
2603 
2604  CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
2605  BOOST_CHECK_EQUAL(mod_combo->HasOrgMod(COrgMod::eSubtype_other), false);
2606  BOOST_CHECK_EQUAL(mod_combo->HasSubSource(CSubSource::eSubtype_other), false);
2607 
2608  CheckDeflineMatches(entry, true);
2609 
2610  scope->RemoveTopLevelSeqEntry(seh);
2611 
2612  nit = notes.begin();
2616  ++nit;
2617  }
2618  seh = scope->AddTopLevelSeqEntry(*entry);
2619  CAutoDef autodef2;
2620  autodef2.AddSources(seh);
2621  mod_combo = autodef.FindBestModifierCombo();
2622  BOOST_CHECK_EQUAL(mod_combo->HasOrgMod(COrgMod::eSubtype_other), false);
2623  BOOST_CHECK_EQUAL(mod_combo->HasSubSource(CSubSource::eSubtype_other), false);
2624 
2625  CheckDeflineMatches(entry, true);
2626 }
2627 
2628 BOOST_AUTO_TEST_CASE(Test_GB_6690_WithDescr)
2629 {
2630  // do not include notes in deflines when calculating uniqueness
2632  vector<string> notes = { "a", "b", "c" };
2633  vector<string>::iterator nit = notes.begin();
2635  AddTitle(*it, "Sebaea microphylla sequence.");
2637  ++nit;
2638  }
2639  entry->SetSet().ResetDescr();
2640  AddTitle(entry, "Sebaea microphylla sequence.");
2641 
2642  auto sources = s_GatherSources(*entry);
2643  CAutoDef autodef;
2644  autodef.AddDescriptors(sources);
2645 
2646  CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
2647  BOOST_CHECK_EQUAL(mod_combo->HasOrgMod(COrgMod::eSubtype_other), false);
2648  BOOST_CHECK_EQUAL(mod_combo->HasSubSource(CSubSource::eSubtype_other), false);
2649 
2650  CheckDeflineMatchesWithDescr(entry, true);
2651 
2652  nit = notes.begin();
2656  ++nit;
2657  }
2658 
2659  sources = s_GatherSources(*entry);
2660  CAutoDef autodef2;
2661  autodef2.AddDescriptors(sources);
2662  mod_combo = autodef.FindBestModifierCombo();
2663  BOOST_CHECK_EQUAL(mod_combo->HasOrgMod(COrgMod::eSubtype_other), false);
2664  BOOST_CHECK_EQUAL(mod_combo->HasSubSource(CSubSource::eSubtype_other), false);
2665 
2666  CheckDeflineMatchesWithDescr(entry, true);
2667 }
2668 
2669 
2670 CRef<CUser_field> MkField(const string& label, const string& val)
2671 {
2673  f->SetLabel().SetStr(label);
2674  f->SetData().SetStr(val);
2675  return f;
2676 }
2677 
2678 
2679 BOOST_AUTO_TEST_CASE(Test_HumanSTR)
2680 {
2682  CRef<CUser_object> obj(new CUser_object());
2683  obj->SetType().SetStr("StructuredComment");
2684  obj->SetData().push_back(MkField("StructuredCommentPrefix", "##HumanSTR-START##"));
2685  obj->SetData().push_back(MkField("STR locus name", "TPOX"));
2686  obj->SetData().push_back(MkField("Length-based allele", "7"));
2687  obj->SetData().push_back(MkField("Bracketed repeat", "[AATG]7"));
2688  CRef<CSeqdesc> d(new CSeqdesc());
2689  d->SetUser().Assign(*obj);
2690  entry->SetSeq().SetDescr().Set().push_back(d);
2691 
2693  var->SetData().SetImp().SetKey("variation");
2694  CRef<CDbtag> dbtag(new CDbtag());
2695  dbtag->SetDb("dbSNP");
2696  dbtag->SetTag().SetStr("rs115644759");
2697  var->SetDbxref().push_back(dbtag);
2698 
2699  string defline = "Sebaea microphylla microsatellite TPOX 7 [AATG]7 rs115644759 sequence.";
2700  AddTitle(entry, defline);
2701 
2702  CheckDeflineMatches(entry);
2704 
2705 }
2706 
2707 
2709 {
2711 
2713  intron->SetData().SetImp().SetKey("intron");
2714  intron->SetComment("group A");
2715 
2716  string defline = "Sebaea microphylla intron.";
2717  AddTitle(entry, defline);
2718 
2719  CheckDeflineMatches(entry);
2721 
2722 }
2723 
2724 
2726 {
2728 
2730  cds->SetData().SetCdregion();
2731  cds->ResetComment();
2732  cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2733 
2735  gene->SetData().SetGene().SetDesc("cullin 1");
2736  gene->ResetComment();
2737  gene->SetLocation().SetPartialStop(true, eExtreme_Biological);
2738  gene->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("pseudogene", "allelic")));
2739 
2740  string defline = "Sebaea microphylla cullin 1 pseudogene, partial sequence.";
2741  AddTitle(entry, defline);
2742 
2743  CheckDeflineMatches(entry);
2745 }
2746 
2747 
2748 void CheckInfluenzaDefline(const string& taxname, const string& strain, const string& serotype, const string& clone, const string& segment, const string& defline)
2749 {
2751  unit_test_util::SetTaxname(entry, taxname);
2752  if (!NStr::IsBlank(strain)) {
2754  }
2755  if (!NStr::IsBlank(serotype)) {
2757  }
2758  if (!NStr::IsBlank(clone)) {
2760  }
2761  if (!NStr::IsBlank(segment)) {
2763  }
2764 
2765  AddTitle(entry, defline);
2766 
2767  CheckDeflineMatches(entry);
2769 
2770 }
2771 
2772 
2774 {
2775  CheckInfluenzaDefline("Influenza A virus", "", "", "", "", "Influenza A virus sequence.");
2776  CheckInfluenzaDefline("Influenza B virus", "", "", "", "", "Influenza B virus sequence.");
2777  CheckInfluenzaDefline("Influenza A virus", "x", "", "", "", "Influenza A virus (x) sequence.");
2778  CheckInfluenzaDefline("Influenza B virus", "x", "", "", "", "Influenza B virus (x) sequence.");
2779  CheckInfluenzaDefline("Influenza A virus", "x", "y", "", "", "Influenza A virus (x(y)) sequence.");
2780  CheckInfluenzaDefline("Influenza B virus", "x", "y", "", "", "Influenza B virus (x) sequence.");
2781  CheckInfluenzaDefline("Influenza A virus", "", "y", "", "", "Influenza A virus ((y)) sequence.");
2782  CheckInfluenzaDefline("Influenza B virus", "", "y", "", "", "Influenza B virus sequence.");
2783  CheckInfluenzaDefline("Influenza A virus", "x", "y", "c", "", "Influenza A virus (x(y)) clone c sequence.");
2784  CheckInfluenzaDefline("Influenza B virus", "x", "y", "c", "", "Influenza B virus (x) clone c sequence.");
2785  CheckInfluenzaDefline("Influenza A virus", "x", "y", "", "1", "Influenza A virus (x(y)) segment 1 sequence.");
2786  CheckInfluenzaDefline("Influenza B virus", "x", "y", "", "1", "Influenza B virus (x) segment 1 sequence.");
2787  CheckInfluenzaDefline("Influenza A virus", "x", "y", "c", "1", "Influenza A virus (x(y)) clone c segment 1 sequence.");
2788  CheckInfluenzaDefline("Influenza B virus", "x", "y", "c", "1", "Influenza B virus (x) clone c segment 1 sequence.");
2789 
2790  CheckInfluenzaDefline("Influenza A virus (x(y))", "x", "y", "c", "1", "Influenza A virus (x(y)) clone c segment 1 sequence.");
2791  CheckInfluenzaDefline("Influenza C virus (x)", "x", "y", "c", "1", "Influenza C virus (x) clone c segment 1 sequence.");
2792 
2793 }
2794 
2795 
2797 {
2799  unit_test_util::SetTaxname(entry, "Amomum chryseum");
2802  prot->SetData().SetProt().SetName().front() = "maturase K";
2803 
2806  gene1->SetData().SetGene().SetLocus("matK");
2808  AddFeat(gene1, nuc);
2809  cds->SetXref().push_back(CRef<CSeqFeatXref>(new CSeqFeatXref()));
2810  cds->SetXref().front()->SetData().Assign(gene1->GetData());
2811 
2812  CRef<CSeq_feat> gene2(new CSeq_feat());
2813  gene2->Assign(*gene1);
2814  gene2->SetData().SetGene().SetLocus("trnK");
2815  gene2->SetData().SetGene().SetDesc("tRNA-Lys");
2816  AddFeat(gene2, nuc);
2817  CRef<CSeq_feat> intron(new CSeq_feat());
2818  intron->Assign(*gene2);
2819  intron->SetData().SetImp().SetKey("intron");
2820  intron->SetXref().push_back(CRef<CSeqFeatXref>(new CSeqFeatXref()));
2821  intron->SetXref().front()->SetData().Assign(gene2->GetData());
2822  AddFeat(intron, nuc);
2823 
2824  AddTitle(entry, "Amomum chryseum tRNA-Lys (trnK) gene, intron; and maturase K (matK) gene, complete cds; chloroplast.");
2825 
2826  {
2828  CRef<CScope> scope(new CScope(*object_manager));
2829  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2830 
2831  CAutoDefWithTaxonomy autodef;
2832 
2833  // add to autodef
2834  autodef.AddSources(seh);
2835  autodef.SetKeepIntrons(true);
2836 
2837  CRef<CAutoDefModifierCombo> mod_combo;
2838  mod_combo = autodef.FindBestModifierCombo();
2839 
2840 
2841  CheckDeflineMatches(seh, autodef, mod_combo);
2842  }
2843  {
2844  auto sources = s_GatherSources(*entry);
2845  CAutoDefWithTaxonomy autodef;
2846 
2847  // add to autodef
2848  autodef.AddDescriptors(sources);
2849  autodef.SetKeepIntrons(true);
2850 
2851  CRef<CAutoDefModifierCombo> mod_combo;
2852  mod_combo = autodef.FindBestModifierCombo();
2853 
2855  CRef<CScope> scope(new CScope(*object_manager));
2856  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2857  CheckDeflineMatches(seh, autodef, mod_combo);
2858  }
2859 
2860 }
2861 
2862 
2863 BOOST_AUTO_TEST_CASE(Test_SQD_4451)
2864 {
2865  CRef<CSeq_entry> entry = BuildSequence();
2866  CRef<CSeqdesc> desc = AddSource(entry, "Fusarium incarnatum");
2867  CRef<CSeq_feat> feat1(new CSeq_feat());
2868  feat1->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
2869  feat1->SetData().SetRna().SetExt().SetName("internal transcribed spacer region");
2870  AddFeat(feat1, entry);
2871  feat1->SetLocation().SetInt().SetFrom(0);
2872  feat1->SetLocation().SetInt().SetTo(59);
2873  feat1->SetLocation().SetPartialStart(true, eExtreme_Biological);
2874  feat1->SetLocation().SetPartialStop(true, eExtreme_Biological);
2875 
2876  AddTitle(entry, "Fusarium incarnatum internal transcribed spacer region, partial sequence.");
2877 
2878  CheckDeflineMatches(entry);
2880 }
2881 
2882 BOOST_AUTO_TEST_CASE(Test_SQD_4529)
2883 {
2884  CRef<CSeq_entry> entry = BuildSequence();
2885  CRef<CSeqdesc> desc = AddSource(entry, "Fusarium incarnatum");
2887  feat1->SetComment("similar to beta-tubulin");
2888 
2889  AddTitle(entry, "Fusarium incarnatum beta-tubulin-like gene, complete sequence.");
2890 
2893 
2895  rrna1->ResetComment();
2896  rrna1->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
2897  rrna1->SetData().SetRna().SetExt().SetName("foo");
2898  AddTitle(entry, "Fusarium incarnatum foo gene, complete sequence.");
2901 
2902 }
2903 
2904 
2906 {
2908  p->SetData().SetProt().SetProcessed(proc);
2909  p->SetData().SetProt().SetName().clear();
2910  p->SetData().SetProt().SetName().push_back("RdRp");
2911  p->ResetComment();
2912 }
2913 
2914 
2915 void TestMatPeptideListing(bool cds_is_partial, bool has_sig_peptide)
2916 {
2919  if (cds_is_partial) {
2920  cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2921  cds->SetPartial(true);
2922  }
2924  gene->SetData().SetGene().SetLocus("ORF1");
2928  pfeat->SetData().SetProt().SetName().clear();
2929  pfeat->SetData().SetProt().SetName().push_back("nonstructural polyprotein");
2932  if (has_sig_peptide) {
2934  }
2935 
2936  if (cds_is_partial) {
2937  if (has_sig_peptide) {
2938  AddTitle(nuc, "Sebaea microphylla nonstructural polyprotein (ORF1) gene, partial cds.");
2939  } else {
2940  AddTitle(nuc, "Sebaea microphylla nonstructural polyprotein, RdRp region, (ORF1) gene, partial cds.");
2941  }
2942  } else {
2943  AddTitle(nuc, "Sebaea microphylla nonstructural polyprotein (ORF1) gene, complete cds.");
2944  }
2947 
2948 }
2949 
2950 BOOST_AUTO_TEST_CASE(Test_SQD_4593)
2951 {
2952  TestMatPeptideListing(true, false);
2953  TestMatPeptideListing(true, true);
2954  TestMatPeptideListing(false, false);
2955  TestMatPeptideListing(false, true);
2956 }
2957 
2958 
2959 BOOST_AUTO_TEST_CASE(Test_SQD_4607)
2960 {
2963  feat1->SetComment("contains promoter and 5' UTR");
2964 
2965  AddTitle(entry, "Sebaea microphylla promoter region and 5' UTR, genomic sequence.");
2966 
2969 }
2970 
2971 
2972 void CheckRegulatoryFeatures(const string& expected_title, bool keep_promoters, bool keep_regulatory)
2973 {
2976  promoter->SetData().SetImp().SetKey("regulatory");
2977  promoter->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("regulatory_class", "promoter")));
2978  promoter->ResetComment();
2980  rbs->SetData().SetImp().SetKey("regulatory");
2981  rbs->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("regulatory_class", "ribosome_binding_site")));
2982  rbs->ResetComment();
2983 
2985  gene->SetData().SetGene().SetLocus("msa");
2986  gene->SetData().SetGene().SetDesc("mannose-specific adhesin");
2987  gene->ResetComment();
2988 
2989  AddTitle(entry, expected_title);
2990  {
2992 
2993  CRef<CScope> scope(new CScope(*object_manager));
2994  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2995 
2996  CAutoDefWithTaxonomy autodef;
2997 
2998  // add to autodef
2999  autodef.AddSources(seh);
3000 
3001  CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
3002 
3004  autodef.SetKeepRegulatoryFeatures(keep_regulatory);
3005  autodef.SetUseFakePromoters(keep_promoters);
3006 
3007  CheckDeflineMatches(seh, autodef, mod_combo);
3008  }
3009  {
3010  auto sources = s_GatherSources(*entry);
3011  CAutoDefWithTaxonomy autodef;
3012 
3013  // add to autodef
3014  autodef.AddDescriptors(sources);
3015 
3016  CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
3017 
3019  autodef.SetKeepRegulatoryFeatures(keep_regulatory);
3020  autodef.SetUseFakePromoters(keep_promoters);
3021 
3023  CRef<CScope> scope(new CScope(*object_manager));
3024  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
3025  CheckDeflineMatches(seh, autodef, mod_combo);
3026  }
3027 
3028 }
3029 
3030 
3031 BOOST_AUTO_TEST_CASE(Test_SQD_4612)
3032 {
3033  CheckRegulatoryFeatures("Sebaea microphylla mannose-specific adhesin (msa) gene, promoter region.", false, false);
3034  CheckRegulatoryFeatures("Sebaea microphylla mannose-specific adhesin (msa) gene, promoter region.", true, false);
3035  CheckRegulatoryFeatures("Sebaea microphylla mannose-specific adhesin (msa) gene, promoter region and ribosome_binding_site.", true, true);
3036 
3037 }
3038 
3040 {
3042  unit_test_util::SetTaxname(entry, "Influenza A virus");
3043  unit_test_util::SetOrgMod(entry, COrgMod::eSubtype_strain, "A/Florida/57/2019");
3045 
3046  AddTitle(entry, "Influenza A virus (A/Florida/57/2019) segment 5 sequence.");
3047 
3048  CheckDeflineMatches(entry);
3050 }
3051 
3053 {
3057  cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
3058  cds->SetPartial(true);
3060  pfeat->SetData().SetProt().SetName().front() = "proannomuricatin G";
3062  mrna->SetData().SetRna().SetExt().SetName("proannomuricatin G");
3065  gene->SetData().SetGene().SetLocus("PamG");
3067 
3068  // check without mat-peptide first
3069  AddTitle(nuc, "Sebaea microphylla proannomuricatin G (PamG) gene, partial cds.");
3070 
3071  CheckDeflineMatches(entry);
3073 
3074  // check with mat-peptide
3077  mat_peptide->ResetComment();
3078  mat_peptide->SetData().SetProt().SetProcessed(CProt_ref::eProcessed_mature);
3079  mat_peptide->SetData().SetProt().SetName().push_back("annomuricatin G");
3080 
3081  // if suppressing mat-peptide, no change
3084 
3085  // show when not suppressing
3086  AddTitle(entry, "Sebaea microphylla proannomuricatin G, annomuricatin G region, (PamG) gene, partial cds.");
3087  CheckDeflineMatches(entry);
3089 }
3090 
3091 CRef<CSeq_feat> MakeRegulatoryFeature(const string& reg_class, const string& comment, TSeqPos start_pos, CRef<CSeq_entry> entry)
3092 {
3094  reg->SetData().SetImp().SetKey("regulatory");
3095  reg->SetComment(comment);
3096  reg->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("regulatory_class", reg_class)));
3097  reg->SetLocation().SetInt().SetFrom(start_pos);
3098  reg->SetLocation().SetInt().SetTo(start_pos + 4);
3099  return reg;
3100 }
3101 
3102 CRef<CSeq_feat> MakeRptRegion(const string& rpt_type, TSeqPos start_pos, CRef<CSeq_entry> entry)
3103 {
3105  reg->ResetComment();
3106  reg->SetData().SetImp().SetKey("repeat_region");
3107  reg->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("rpt_type", rpt_type)));
3108  reg->SetLocation().SetInt().SetFrom(start_pos);
3109  reg->SetLocation().SetInt().SetTo(start_pos + 4);
3110  return reg;
3111 }
3112 
3113 
3114 void TestRepeatRegion(CRef<CSeq_entry> entry, bool init_with_descrs = false)
3115 {
3117 
3118  CRef<CScope> scope(new CScope(*object_manager));
3119  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
3120 
3121  CAutoDefWithTaxonomy autodef;
3122 
3123  if (init_with_descrs) {
3124  CAutoDef::TSources sources;
3125  for (CBioseq_CI b_iter(seh, CSeq_inst::eMol_na); b_iter; ++b_iter) {
3126  for (CSeqdesc_CI desc_it(*b_iter, CSeqdesc::e_Source); desc_it; ++desc_it) {
3127  sources.emplace_back(&desc_it->GetSource());
3128  }
3129  }
3130  autodef.AddDescriptors(sources);
3131  }
3132  else {
3133  // add to autodef
3134  autodef.AddSources(seh);
3135  }
3136 
3137  CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
3139  autodef.SetKeepRepeatRegion(true);
3140 
3141  CheckDeflineMatches(seh, autodef, mod_combo);
3142 
3143 }
3144 
3146 {
3148 
3149  CRef<CSeq_feat> rpt = MakeRptRegion("long_terminal_repeat", 15, entry);
3150  AddTitle(entry, "Sebaea microphylla LTR repeat region.");
3151  CheckDeflineMatches(entry);
3153 
3154  TestRepeatRegion(entry);
3155  TestRepeatRegion(entry, true);
3156 
3157  CRef<CSeq_feat> reg1 = MakeRegulatoryFeature("CAAT_signal", "U3 region", 0, entry);
3158  CRef<CSeq_feat> reg2 = MakeRegulatoryFeature("TATA_box", "U3 region", 5, entry);
3159  CRef<CSeq_feat> reg3 = MakeRegulatoryFeature("polyA_signal_sequence", "R-region", 10, entry);
3160 
3161  TestRepeatRegion(entry);
3162  TestRepeatRegion(entry, true);
3163 }
3164 
3165 
3166 BOOST_AUTO_TEST_CASE(Test_ClauseListOptions)
3167 {
3169 
3170  AddTitle(entry, "Sebaea microphylla, complete sequence.");
3173 
3174  AddTitle(entry, "Sebaea microphylla, complete genome.");
3177 
3178  AddTitle(entry, "Sebaea microphylla, partial sequence.");
3181 
3182  AddTitle(entry, "Sebaea microphylla, partial genome.");
3185 
3186  AddTitle(entry, "Sebaea microphylla, whole genome shotgun sequence.");
3189 }
3190 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CArgs –.
Definition: ncbiargs.hpp:379
static string GetOrgModLabel(COrgMod::ESubtype st)
static bool IsUsableInDefline(CSubSource::ESubtype subtype)
static bool IsModifierInString(const string &find_this, const string &find_in, bool ignore_at_end)
void InitOptions(CAutoDefOptions &options) const
bool GetKeepuORFs() const
void SetNuclearCopyFlag(CBioSource::EGenome val)
bool GetKeepRegulatoryFeatures() const
string GetTargetedLocusName() const
bool GetKeepExons() const
CRef< CUser_object > MakeUserObject() const
bool GetDoNotApplyToSp() const
void InitFromUserObject(const CUser_object &obj)
bool AreAnyFeaturesSuppressed() const
const TOrgMods & GetOrgMods() const
bool GetKeepIntrons() const
string GetCustomFeatureClause() const
TFeatureListType GetFeatureListType() const
bool GetKeep3UTRs() const
TMiscFeatRule GetMiscFeatRule() const
bool GetKeep5UTRs() const
bool GetAllowModAtEndOfTaxname() const
void SetUseLabels(bool val=true)
bool GetKeepRepeatRegion() const
bool GetKeepMiscRecomb() const
void SetTargetedLocusName(const string &tls)
bool GetUseFakePromoters() const
bool GetKeepMobileElements() const
const TSubSources & GetSubSources() const
bool GetUseLabels() const
string GetDocsumDefLine(CSeq_entry_Handle se)
void SetKeepIntrons(bool keep)
Definition: autodef.hpp:262
void SetUseFakePromoters(bool use_fake)
Definition: autodef.hpp:334
vector< CConstRef< objects::CBioSource > > TSources
Definition: autodef.hpp:71
void SetKeep5UTRs(bool keep)
Definition: autodef.hpp:289
void SetKeepExons(bool keep)
Definition: autodef.hpp:255
void SetKeepOptionalMobileElements(bool keep)
Definition: autodef.hpp:303
void SetKeepRegulatoryFeatures(bool keep)
Definition: autodef.hpp:269
void SetKeepMiscRecomb(bool keep)
Definition: autodef.hpp:321
void SetOptionsObject(const CUser_object &user)
Definition: autodef.cpp:1196
void SetCustomFeatureClause(const string &custom_feature_clause)
Definition: autodef.hpp:341
void SetKeepRepeatRegion(bool keep)
Definition: autodef.hpp:315
void AddDescriptors(const TSources &sources)
Definition: autodef.cpp:110
void AddSources(CSeq_entry_Handle se)
Definition: autodef.cpp:93
CRef< CUser_object > GetOptionsObject() const
Definition: autodef.hpp:84
void SetKeep3UTRs(bool keep)
Definition: autodef.hpp:282
void SetOptions(const CAutoDefModifierCombo &mod_combo)
Definition: autodef.cpp:1201
void SetFeatureListType(CAutoDefOptions::EFeatureListType feature_list_type)
Definition: autodef.hpp:178
string GetOneDefLine(CAutoDefModifierCombo *mod_combo, const CBioseq_Handle &bh, CRef< feature::CFeatTree > featTree=null)
Definition: autodef.cpp:1095
CRef< CAutoDefModifierCombo > FindBestModifierCombo()
Definition: autodef.cpp:210
void SetKeepuORFs(bool keep)
Definition: autodef.hpp:296
void SetMiscFeatRule(CAutoDefOptions::EMiscFeatRule misc_feat_rule)
Definition: autodef.hpp:185
void SuppressFeature(const objects::CFeatListItem &feat)
Definition: autodef.cpp:408
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
bool NeedsDocsumTitle() const
Definition: Bioseq_set.cpp:343
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
bool IsNa(void) const
Definition: Bioseq.cpp:345
Definition: Dbtag.hpp:53
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CScope –.
Definition: scope.hpp:92
CSeqFeatXref –.
Definition: SeqFeatXref.hpp:66
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
@ eObjectType_AutodefOptions
EObjectType GetObjectType() const
Definition: set.hpp:45
static const char * expected[]
Definition: bcp.c:42
static const char * proc
Definition: stats.c:21
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
void RemoveTopLevelSeqEntry(const CTSE_Handle &entry)
Revoke TSE previously added using AddTopLevelSeqEntry() or AddBioseq().
Definition: scope.cpp:376
TSet GetSet(void) const
CConstRef< CBioseq_set > GetCompleteBioseq_set(void) const
Return the complete bioseq-set object.
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
bool IsSet(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static const char label[]
void SetGenome(TGenome value)
Assign a value to Genome data member.
Definition: BioSource_.hpp:428
void SetOrg(TOrg &value)
Assign a value to Org data member.
Definition: BioSource_.cpp:108
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
Definition: BioSource_.hpp:545
@ eSubtype_collected_by
name of person who collected the sample
Definition: SubSource_.hpp:115
void SetTag(TTag &value)
Assign a value to Tag data member.
Definition: Dbtag_.cpp:66
TData & SetData(void)
Assign a value to Data data member.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
const TData & GetData(void) const
Get the Data member data.
void SetType(TType &value)
Assign a value to Type data member.
void SetDb(const TDb &value)
Assign a value to Db data member.
Definition: Dbtag_.hpp:229
vector< CRef< CUser_field > > TData
@ eSubtype_biotype
Definition: OrgMod_.hpp:97
@ eSubtype_subgroup
Definition: OrgMod_.hpp:99
@ eSubtype_substrain
Definition: OrgMod_.hpp:86
@ eSubtype_anamorph
Definition: OrgMod_.hpp:112
@ eSubtype_pathovar
Definition: OrgMod_.hpp:94
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
Definition: OrgMod_.hpp:125
@ eSubtype_authority
Definition: OrgMod_.hpp:107
@ eSubtype_cultivar
Definition: OrgMod_.hpp:93
@ eSubtype_variety
Definition: OrgMod_.hpp:89
@ eSubtype_strain
Definition: OrgMod_.hpp:85
@ eSubtype_biovar
Definition: OrgMod_.hpp:96
@ eSubtype_subtype
Definition: OrgMod_.hpp:88
@ eSubtype_teleomorph
Definition: OrgMod_.hpp:113
@ eSubtype_serogroup
Definition: OrgMod_.hpp:91
@ eSubtype_synonym
Definition: OrgMod_.hpp:111
@ eSubtype_group
Definition: OrgMod_.hpp:98
@ eSubtype_acronym
Definition: OrgMod_.hpp:102
@ eSubtype_serotype
Definition: OrgMod_.hpp:90
@ eSubtype_chemovar
Definition: OrgMod_.hpp:95
@ eSubtype_serovar
Definition: OrgMod_.hpp:92
@ eSubtype_culture_collection
Definition: OrgMod_.hpp:118
@ eSubtype_ecotype
Definition: OrgMod_.hpp:110
@ eSubtype_isolate
Definition: OrgMod_.hpp:100
EProcessed
processing status
Definition: Prot_ref_.hpp:95
@ eProcessed_signal_peptide
Definition: Prot_ref_.hpp:99
@ eType_ncRNA
non-coding RNA; subsumes snRNA, scRNA, snoRNA
Definition: RNA_ref_.hpp:104
TXref & SetXref(void)
Assign a value to Xref data member.
Definition: Seq_feat_.hpp:1314
void SetQual(const TQual &value)
Assign a value to Qual data member.
Definition: Gb_qual_.hpp:221
TDbxref & SetDbxref(void)
Assign a value to Dbxref data member.
Definition: Seq_feat_.hpp:1339
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
void SetComment(const TComment &value)
Assign a value to Comment data member.
Definition: Seq_feat_.hpp:1058
void SetPartial(TPartial value)
Assign a value to Partial data member.
Definition: Seq_feat_.hpp:971
void SetProduct(TProduct &value)
Assign a value to Product data member.
Definition: Seq_feat_.cpp:110
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
void ResetComment(void)
Reset Comment data member.
Definition: Seq_feat_.cpp:99
void SetVal(const TVal &value)
Assign a value to Val data member.
Definition: Gb_qual_.hpp:268
TQual & SetQual(void)
Assign a value to Qual data member.
Definition: Seq_feat_.hpp:1153
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
Definition: Seq_feat_.hpp:1105
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
const Tdata & Get(void) const
Get the member data.
TLocal & SetLocal(void)
Select the variant.
Definition: Seq_id_.cpp:199
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Seq_id_.cpp:193
TTo GetTo(void) const
Get the To member data.
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
const TMix & GetMix(void) const
Get the variant data.
Definition: Seq_loc_.cpp:282
@ e_not_set
No variant selected.
Definition: Seq_loc_.hpp:97
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TClass GetClass(void) const
Get the Class member data.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
void ResetDescr(void)
Reset Descr data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_pop_set
population study
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
TTitle & SetTitle(void)
Select the variant.
Definition: Seqdesc_.hpp:1039
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
Definition: Bioseq_.hpp:354
const TTitle & GetTitle(void) const
Get the variant data.
Definition: Seqdesc_.hpp:1032
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:372
const TAnnot & GetAnnot(void) const
Get the Annot member data.
Definition: Bioseq_.hpp:366
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
TSource & SetSource(void)
Select the variant.
Definition: Seqdesc_.cpp:572
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
Definition: Bioseq_.hpp:303
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
Definition: MolInfo_.hpp:453
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
TUser & SetUser(void)
Select the variant.
Definition: Seqdesc_.cpp:390
TMolinfo & SetMolinfo(void)
Select the variant.
Definition: Seqdesc_.cpp:594
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ eBiomol_cRNA
viral RNA genome copy intermediate
Definition: MolInfo_.hpp:111
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
Definition: MolInfo_.hpp:113
@ e_Title
a title for this sequence
Definition: Seqdesc_.hpp:115
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
constexpr bool empty(list< Ts... >) noexcept
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
The Object manager core.
static BOOL number
Definition: pcregrep.c:193
static const char * suffix[]
Definition: pcregrep.c:408
static const char * prefix[]
Definition: pcregrep.c:405
Utility stuff for more convenient using of Boost.Test library.
void AddProtFeat(CRef< CSeq_entry > prot, CProt_ref::EProcessed proc)
void AddExon(CRef< CSeq_entry > seq, const string &number, TSeqPos start)
CRef< CSeq_entry > FindNucInSeqEntry(CRef< CSeq_entry > entry)
static void CheckDeflineMatchesWithDescr(CRef< CSeq_entry > entry, bool use_best=false, CAutoDefOptions::EFeatureListType list_type=CAutoDefOptions::eListAllFeatures, CAutoDefOptions::EMiscFeatRule misc_feat_rule=CAutoDefOptions::eNoncodingProductFeat)
static bool s_debugMode
BOOST_AUTO_TEST_CASE(Test_SimpleAutodef)
CRef< CSeq_feat > MakeRptRegion(const string &rpt_type, TSeqPos start_pos, CRef< CSeq_entry > entry)
NCBITEST_INIT_TREE()
static CRef< CSeq_feat > MakeGeneForNucProtSet(const string &nuc_id, const string &locus, const string &allele=kEmptyStr)
NCBITEST_INIT_CMDLINE(arg_desc)
void TestOneOrganelleSequenceDefline(CBioSource::TGenome genome, const string &defline)
void MakeRegulatoryFeatureTest(const string &regulatory_class, const string &defline_interval, bool use_fake_promoters, bool keep_regulatory)
size_t HasIntField(const CUser_object &user, const string &field_name, int value)
CRef< CSeq_feat > MakeRegulatoryFeature(const string &reg_class, const string &comment, TSeqPos start_pos, CRef< CSeq_entry > entry)
void CheckAutoDefOptions(const CUser_object &user, CAutoDefOptions &opts)
static CRef< CSeq_entry > BuildSequence()
void s_SetProteinName(CRef< CSeq_entry > prot, const string &name)
CRef< CUser_field > MkField(const string &label, const string &val)
void TestRepeatRegion(CRef< CSeq_entry > entry, bool init_with_descrs=false)
void CheckRegulatoryFeatures(const string &expected_title, bool keep_promoters, bool keep_regulatory)
static void CheckDeflineMatches(CSeq_entry_Handle seh, CAutoDefWithTaxonomy &autodef, CRef< CAutoDefModifierCombo > mod_combo)
void TestMatPeptideListing(bool cds_is_partial, bool has_sig_peptide)
static CAutoDef::TSources s_GatherSources(const CSeq_entry &entry)
CRef< CSeq_feat > s_AddCDS(CRef< CSeq_entry > np, const string &name, TSeqPos from, TSeqPos to)
static void AddFeat(CRef< CSeq_feat > feat, CRef< CSeq_entry > entry)
void CheckInfluenzaDefline(const string &taxname, const string &strain, const string &serotype, const string &clone, const string &segment, const string &defline)
NCBITEST_AUTO_INIT()
static CRef< CSeq_entry > BuildNucProtSet(const string &protein_name, const string &locus=kEmptyStr, const string &allele=kEmptyStr)
size_t HasStringField(const CUser_object &user, const string &field_name, const string &value)
static CRef< CSeq_feat > MakeCDSForNucProtSet(string nuc_id, string prot_id)
size_t HasBoolField(const CUser_object &user, const string &field_name)
static CRef< CSeqdesc > AddSource(CRef< CSeq_entry > entry, string taxname)
const char * sc_TestEntryCollidingLocusTags
static void AddTitle(CRef< CSeq_entry > entry, string defline)
void TestForRecomb(CRef< CSeq_entry > entry, const string &expected)
static CRef< CSeq_entry > MakeProteinForNucProtSet(string id, string protein_name)
CRef< objects::CSeq_feat > MakeGeneForFeature(CRef< objects::CSeq_feat > feat)
void SetTaxon(objects::CBioSource &src, size_t taxon)
void SetSubSource(objects::CBioSource &src, objects::CSubSource::TSubtype subtype, string val)
CRef< objects::CSeq_feat > AddGoodImpFeat(CRef< objects::CSeq_entry > entry, string key)
void SetGenome(CRef< objects::CSeq_entry > entry, objects::CBioSource::TGenome genome)
void SetDiv(CRef< objects::CSeq_entry > entry, string div)
CRef< objects::CSeq_entry > MakeProteinForGoodNucProtSet(string id)
CRef< objects::CSeq_feat > GetCDSFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
CRef< CSeq_feat > BuildGoodFeat()
void SetTaxname(CRef< objects::CSeq_entry > entry, string taxname)
CRef< objects::CSeq_entry > BuildGoodSeq(void)
CRef< objects::CSeq_feat > AddMiscFeature(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > BuildGoodEcoSet()
void ChangeId(CRef< objects::CSeq_annot > annot, CRef< objects::CSeq_id > id)
CRef< objects::CSeq_feat > GetProtFeatFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > GetProteinSequenceFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
void SetBiomol(CRef< objects::CSeq_entry > entry, objects::CMolInfo::TBiomol biomol)
CRef< objects::CSeq_entry > BuildGoodNucProtSet(void)
CRef< objects::CSeq_feat > MakemRNAForCDS(CRef< objects::CSeq_feat > feat)
void SetOrgMod(objects::CBioSource &src, objects::COrgMod::TSubtype subtype, string val)
CRef< objects::CSeq_entry > GetNucleotideSequenceFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_loc > MakeMixLoc(CRef< objects::CSeq_id > id)
void SetNucProtSetProductName(CRef< objects::CSeq_entry > entry, string new_name)
CRef< objects::CSeq_feat > MakeCDSForGoodNucProtSet(const string &nuc_id, const string &prot_id)
Modified on Fri May 24 14:50:42 2024 by modify_doxy.py rev. 669887