NCBI C++ ToolKit
unit_test_entry_edit.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_entry_edit.cpp 96814 2022-05-13 12:29:10Z foleyjp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Michael Kornbluh, based on template by Pavel Ivanov, NCBI
27 *
28 * File Description:
29 * Unit test functions in seq_entry_edit.cpp
30 *
31 *
32 * ===========================================================================
33 */
34 
35 #include <ncbi_pch.hpp>
36 
37 #include "unit_test_entry_edit.hpp"
38 
39 #include <connect/ncbi_pipe.hpp>
40 #include <corelib/ncbiapp.hpp>
41 #include <corelib/ncbifile.hpp>
42 #include <corelib/ncbi_system.hpp>
46 #include <objmgr/scope.hpp>
47 #include <objmgr/bioseq_ci.hpp>
48 #include <objmgr/seqdesc_ci.hpp>
49 #include <objmgr/feat_ci.hpp>
50 #include <objmgr/graph_ci.hpp>
51 #include <objmgr/align_ci.hpp>
55 #include <map>
57 
58 // This header must be included before all Boost.Test headers if there are any
59 #include <corelib/test_boost.hpp>
60 
61 #include <common/test_assert.h> /* This header must go last */
62 
63 
66 
67 // key is processing stage (e.g. "input", "output_expected", etc.)
68 typedef std::map<string, CFile> TMapTestFiles;
69 
70 // key is name of test
71 // value is all the files for that test
73 
74 // key is function to test (e.g. "divvy")
75 // value is all the tests for that function
77 
78 namespace {
79 
80  TMapFunctionToVecOfTests s_mapFunctionToVecOfTests;
82 
83  // this loads the files it's given
84  // into s_mapFunctionToVecOfTests
85  class CFileSorter {
86  public:
87  CFileSorter(TMapFunctionToVecOfTests * out_pMapFunctionToVecOfTests) : m_pMapFunctionToVecOfTests(out_pMapFunctionToVecOfTests) { }
88 
89  void operator()(const CDirEntry & dir_entry) {
90  if( ! dir_entry.IsFile() ) {
91  return;
92  }
93 
94  // skip files with unknown extensions
95  static const char * arrAllowedExtensions[] = {
96  ".asn"
97  };
98  bool bHasAllowedExtension = false;
99  for( size_t ii = 0;
100  ii < sizeof(arrAllowedExtensions)/sizeof(arrAllowedExtensions[0]);
101  ++ii )
102  {
103  if( NStr::EndsWith(dir_entry.GetName(), arrAllowedExtensions[ii]) ){
104  bHasAllowedExtension = true;
105  }
106  }
107  if( ! bHasAllowedExtension ) {
108  return;
109  }
110 
111  CFile file(dir_entry);
112 
113  // split the name
114  vector<string> tokens;
115  NStr::Split(file.GetName(), ".", tokens, 0);
116 
117  if( tokens.size() != 4u ) {
118  throw std::runtime_error("initialization failed trying to tokenize this file: " + file.GetName());
119  }
120 
121  const string & sFunction = tokens[0];
122  const string & sTestName = tokens[1];
123  const string & sStage = tokens[2];
124  const string & sSuffix = tokens[3];
125  if( sSuffix != "asn") {
126  cout << "Skipping file with unknown suffix: " << file.GetName() << endl;
127  return;
128  }
129 
130  const bool bInsertSuccessful =
131  (*m_pMapFunctionToVecOfTests)[sFunction][sTestName].insert(
132  TMapTestFiles::value_type(sStage, file)).second;
133 
134  // this checks for duplicate file names
135  if( ! bInsertSuccessful ) {
136  throw std::runtime_error("initialization failed: duplicate file name: " + file.GetName() );
137  }
138  }
139 
140  private:
141  TMapFunctionToVecOfTests * m_pMapFunctionToVecOfTests;
142  };
143 
144  // sends the contents of the given file through the C preprocessor
145  // and then parses the results as a CSeq_entry.
146  // throws exception on error.
147  CRef<CSeq_entry> s_ReadAndPreprocessEntry( const string & sFilename )
148  {
149  ifstream in_file(sFilename.c_str());
150  BOOST_REQUIRE(in_file.good());
151  CRef<CSeq_entry> pEntry( new CSeq_entry );
152  in_file >> MSerial_AsnText >> *pEntry;
153  return pEntry;
154  }
155 
156  bool s_AreSeqEntriesEqualAndPrintIfNot(
157  const CSeq_entry & entry1,
158  const CSeq_entry & entry2 )
159  {
160  const bool bEqual = entry1.Equals(entry2);
161  if( ! bEqual ) {
162  // they're not equal, so print them
163  cerr << "These entries should be equal but they aren't: " << endl;
164  cerr << "Entry 1: " << MSerial_AsnText << entry1 << endl;
165  cerr << "Entry 2: " << MSerial_AsnText << entry2 << endl;
166  }
167  return bEqual;
168  }
169 }
170 
172 {
174 
175  static const vector<string> kEmptyStringVec;
176 
177  // initialize the map of which files belong to which test
178  CFileSorter file_sorter(&s_mapFunctionToVecOfTests);
179 
180  CDir test_cases_dir("./entry_edit_test_cases");
181  FindFilesInDir(test_cases_dir,
182  kEmptyStringVec, kEmptyStringVec,
183  file_sorter,
184  (fFF_Default | fFF_Recursive ) );
185 
186  // print list of tests found
187  cout << "List of tests found and their associated files: " << endl;
188  ITERATE( TMapFunctionToVecOfTests, func_to_vec_of_tests_it,
189  s_mapFunctionToVecOfTests )
190  {
191  cout << "FUNC: " << func_to_vec_of_tests_it->first << endl;
192  ITERATE( TMapTestNameToTestFiles, test_name_to_files_it,
193  func_to_vec_of_tests_it->second )
194  {
195  cout << "\tTEST NAME: " << test_name_to_files_it->first << endl;
196  ITERATE( TMapTestFiles, test_file_it, test_name_to_files_it->second ) {
197  cout << "\t\tSTAGE: " << test_file_it->first << " (file path: " << test_file_it->second.GetPath() << ")" << endl;
198  }
199  }
200  }
201 
202 }
203 
204 
205 void CheckLocalId(const CBioseq& seq, const string& expected)
206 {
207  if (!seq.IsSetDescr()) {
208  BOOST_CHECK_EQUAL("No descriptors", "Expected descriptors");
209  return;
210  }
211  int num_user_descriptors_found = 0;
213  if ((*it)->IsUser()) {
214  const CUser_object& usr = (*it)->GetUser();
215  BOOST_CHECK_EQUAL(usr.GetObjectType(), CUser_object::eObjectType_OriginalId);
216  BOOST_CHECK_EQUAL(usr.GetData()[0]->GetLabel().GetStr(), "LocalId");
217  BOOST_CHECK_EQUAL(usr.GetData()[0]->GetData().GetStr(), expected);
218  num_user_descriptors_found++;;
219  }
220  }
221  BOOST_CHECK_EQUAL(num_user_descriptors_found, 1);
222 
223 }
224 
225 
226 void TestCollidingAccessionFixes(const CSeq_id& collide, const string& last)
227 {
230  CRef<CSeq_id> id(new CSeq_id());
231  id->Assign(collide);
232  (*it)->SetSeq().SetId().push_back(id);
233  }
234  edit::AddLocalIdUserObjects(*good_set);
235  good_set->ReassignConflictingIds();
236  CConstRef<CSeq_id> last_id = good_set->GetSet().GetSeq_set().back()->GetSeq().GetId().back();
237  if (collide.IsGeneral()) {
238  BOOST_CHECK_EQUAL(last_id->Which(), CSeq_id::e_General);
239  BOOST_CHECK_EQUAL(last_id->GetGeneral().GetDb(), collide.GetGeneral().GetDb());
240  BOOST_CHECK_EQUAL(last_id->GetGeneral().GetTag().GetStr(), last);
241  } else {
242  BOOST_CHECK_EQUAL(last_id->Which(), CSeq_id::e_Local);
243  BOOST_CHECK_EQUAL(last_id->GetLocal().GetStr(), last);
244  }
245  BOOST_CHECK_EQUAL(true, edit::HasRepairedIDs(*good_set));
246 }
247 
248 
249 BOOST_AUTO_TEST_CASE(FixCollidingIds)
250 {
255  CRef<CSeq_entry> set_entry(new CSeq_entry());
257  set_entry->SetSet().SetSeq_set().push_back(entry1);
258  set_entry->SetSet().SetSeq_set().push_back(entry2);
259  edit::AddLocalIdUserObjects(*set_entry);
260  CheckLocalId(entry1->GetSeq(), "good");
261  CheckLocalId(entry2->GetSeq(), "good");
262 
263  set_entry->ReassignConflictingIds();
264  BOOST_CHECK_EQUAL(entry1->GetSeq().GetId().front()->GetLocal().GetStr(), "good");
265  BOOST_CHECK_EQUAL(entry2->GetSeq().GetId().front()->GetLocal().GetStr(), "good_1");
266  BOOST_CHECK_EQUAL(true, edit::HasRepairedIDs(*set_entry));
268  set_entry->SetSet().SetSeq_set().push_back(entry3);
269  edit::AddLocalIdUserObjects(*set_entry);
270  CheckLocalId(entry1->GetSeq(), "good");
271  CheckLocalId(entry2->GetSeq(), "good");
272  CheckLocalId(entry3->GetSeq(), "good");
273  set_entry->ReassignConflictingIds();
274  BOOST_CHECK_EQUAL(entry1->GetSeq().GetId().front()->GetLocal().GetStr(), "good");
275  BOOST_CHECK_EQUAL(entry2->GetSeq().GetId().front()->GetLocal().GetStr(), "good_1");
276  BOOST_CHECK_EQUAL(entry3->GetSeq().GetId().front()->GetLocal().GetStr(), "good_2");
277  BOOST_CHECK_EQUAL(true, edit::HasRepairedIDs(*set_entry));
278 
280  BOOST_CHECK_EQUAL(false, edit::HasRepairedIDs(*good_set));
281  edit::AddLocalIdUserObjects(*good_set);
282  BOOST_CHECK_EQUAL(false, edit::HasRepairedIDs(*good_set));
283  good_set->ReassignConflictingIds();
284  BOOST_CHECK_EQUAL(false, edit::HasRepairedIDs(*good_set));
285 
287  {
288  short count_users = 0;
289  FOR_EACH_SEQDESC_ON_SEQENTRY(desc_it, *good_set) {
290  if ((*desc_it)->IsUser() && (*desc_it)->GetUser().GetObjectType() == CUser_object::eObjectType_OriginalId)
291  count_users++;
292  }
293  BOOST_CHECK(count_users == 0);
294  }
295  // fix DDBJ duplicates
296  CRef<CSeq_id> id(new CSeq_id());
297  id->SetDdbj().SetAccession("X");
298  TestCollidingAccessionFixes(*id, "dbj_X__2");
299 
300  // fix EMBL duplicates
301  id->SetEmbl().SetAccession("X");
302  TestCollidingAccessionFixes(*id, "emb_X__2");
303 
304  // fix GenBank duplicates
305  id->SetGenbank().SetAccession("X");
306  TestCollidingAccessionFixes(*id, "gb_X__2");
307 
308  // fix RefSeq duplicates
309  id->SetOther().SetAccession("X");
310  TestCollidingAccessionFixes(*id, "ref_X__2");
311 
312  // fix general duplicates
313  id->SetGeneral().SetDb("Y");
314  id->SetGeneral().SetTag().SetStr("X");
315  TestCollidingAccessionFixes(*id, "X_2");
316 
317 }
318 
319 
320 void s_CheckSeg(const CDelta_seq& ds, bool expect_gap, size_t expect_length)
321 {
322  BOOST_CHECK_EQUAL(ds.IsLiteral(), true);
323  BOOST_CHECK_EQUAL(ds.GetLiteral().GetSeq_data().IsGap(), expect_gap);
324  BOOST_CHECK_EQUAL(ds.GetLiteral().GetLength(), expect_length);
325 }
326 
327 
329 {
331  CSeq_inst& inst = entry->SetSeq().SetInst();
332  string seq_str = "";
333  ITERATE(vector<string>, it, segs) {
334  seq_str += *it;
335  }
336  size_t orig_len = seq_str.length();
337  inst.SetSeq_data().SetIupacna().Set(seq_str);
338  inst.SetLength(orig_len);
339 
340  CRef<CSeq_id> id(new CSeq_id());
341  id->Assign(*(entry->GetSeq().GetId().front()));
342 
343  CRef<CSeq_annot> annot(new CSeq_annot());
344  entry->SetSeq().SetAnnot().push_back(annot);
345  // first feature covers entire sequence
346  CRef<CSeq_feat> f1(new CSeq_feat());
347  f1->SetData().SetImp().SetKey("misc_feature");
348  f1->SetLocation().SetInt().SetId().Assign(*id);
349  f1->SetLocation().SetInt().SetFrom(0);
350  f1->SetLocation().SetInt().SetTo(entry->GetSeq().GetLength() - 1);
351  annot->SetData().SetFtable().push_back(f1);
352 
353  // second feature is coding region, code break is after gap of unknown length
354  CRef<CSeq_feat> f2 (new CSeq_feat());
355  CRef<CCode_break> brk(new CCode_break());
356  brk->SetLoc().SetInt().SetId().Assign(*id);
357  brk->SetLoc().SetInt().SetFrom(54);
358  brk->SetLoc().SetInt().SetTo(56);
359  f2->SetData().SetCdregion().SetCode_break().push_back(brk);
360  f2->SetLocation().SetInt().SetId().Assign(*id);
361  f2->SetLocation().SetInt().SetFrom(0);
362  f2->SetLocation().SetInt().SetTo(entry->GetSeq().GetLength() - 1);
363  annot->SetData().SetFtable().push_back(f2);
364 
365  // third feature is tRNA, code break is before gap of unknown length, mixed location
366  CRef<CSeq_feat> f3 (new CSeq_feat());
367  CSeq_interval& interval = f3->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt();
368  interval.SetFrom(6);
369  interval.SetTo(8);
370  interval.SetId().Assign(*id);
371  f3->SetData().SetRna().SetType(CRNA_ref::eType_tRNA);
372 
373  //make mix location that does not include gaps
374  size_t pos = 0;
375  ITERATE(vector<string>, it, segs) {
376  if (NStr::Find(*it, "N") == string::npos) {
377  CRef<CSeq_loc> l1(new CSeq_loc());
378  l1->SetInt().SetFrom(pos);
379  l1->SetInt().SetTo(pos + it->length() - 1);
380  l1->SetInt().SetId().Assign(*id);
381  f3->SetLocation().SetMix().Set().push_back(l1);
382  }
383  pos += it->length();
384  }
385  return entry;
386 }
387 
388 
389 void s_IntervalsMatchGaps(const CSeq_loc& loc, const CSeq_inst& inst)
390 {
391  BOOST_CHECK_EQUAL(loc.Which(), CSeq_loc::e_Mix);
392  BOOST_CHECK_EQUAL(inst.GetRepr(), CSeq_inst::eRepr_delta);
393  if (loc.Which() != CSeq_loc::e_Mix || inst.GetRepr() != CSeq_inst::eRepr_delta) {
394  return;
395  }
396 
397  CSeq_loc_CI ci(loc);
398  CSeq_ext::TDelta::Tdata::const_iterator ds_it = inst.GetExt().GetDelta().Get().begin();
399  size_t pos = 0;
400  while ((ds_it != inst.GetExt().GetDelta().Get().end()) && ci) {
401  const CSeq_literal& lit = (*ds_it)->GetLiteral();
402  if (lit.IsSetSeq_data() && !lit.GetSeq_data().IsGap()) {
403  BOOST_CHECK_EQUAL(ci.GetRange().GetFrom(), pos);
404  BOOST_CHECK_EQUAL(ci.GetRange().GetTo(), pos + lit.GetLength() - 1);
405  ++ci;
406  }
407  pos += lit.GetLength();
408  ds_it++;
409  }
410 }
411 
412 
413 BOOST_AUTO_TEST_CASE(Test_ConvertRawToDeltaByNs)
414 {
415  vector<string> segs;
416  segs.push_back("AAAAAAAAAAAAAAAAAAAAAAAA");
417  segs.push_back("NNNNNNNNNNNNNNN");
418  segs.push_back("TTTTTTTT");
419  segs.push_back("NNNNN");
420  segs.push_back("TTTTTTTTTT");
421 
422  vector<size_t> lens;
423  vector<bool> is_gap;
424  ITERATE(vector<string>, it, segs) {
425  if (NStr::Find(*it, "N") != string::npos) {
426  is_gap.push_back(true);
427  if ((*it).length() == 5) {
428  lens.push_back(100);
429  } else {
430  lens.push_back((*it).length());
431  }
432  } else {
433  is_gap.push_back(false);
434  lens.push_back((*it).length());
435  }
436  }
437 
439  CSeq_inst& inst = entry->SetSeq().SetInst();
440  size_t orig_len = inst.GetLength();
441 
442  // This should convert the first run of Ns (15) to a gap of known length
443  // and the second run of Ns (5) to a gap of unknown length
444  edit::ConvertRawToDeltaByNs(inst, 5, 5, 10, -1, true);
446  BOOST_CHECK_EQUAL(inst.GetRepr(), CSeq_inst::eRepr_delta);
447  BOOST_CHECK_EQUAL(inst.GetExt().GetDelta().Get().size(), 5);
448  BOOST_CHECK_EQUAL(inst.GetLength(), orig_len - 5 + 100);
449  CSeq_ext::TDelta::Tdata::const_iterator ds_it = inst.GetExt().GetDelta().Get().begin();
450  vector<bool>::iterator is_gap_it = is_gap.begin();
451  vector<size_t>::iterator len_it = lens.begin();
452  while (ds_it != inst.GetExt().GetDelta().Get().end()) {
453  s_CheckSeg(**ds_it, *is_gap_it, *len_it);
454  ds_it++;
455  is_gap_it++;
456  len_it++;
457  }
458 
459 
462  if ((*it)->GetLiteral().GetSeq_data().IsGap()) {
463  BOOST_CHECK_EQUAL((*it)->GetLiteral().GetSeq_data().GetGap().GetType(), CSeq_gap::eType_short_arm);
464  }
465  }
466 
469  if ((*it)->GetLiteral().GetSeq_data().IsGap()) {
470  const CSeq_gap& gap = (*it)->GetLiteral().GetSeq_data().GetGap();
471  BOOST_CHECK_EQUAL(gap.GetType(), CSeq_gap::eType_scaffold);
472  BOOST_CHECK_EQUAL(gap.GetLinkage(), CSeq_gap::eLinkage_linked);
473  BOOST_CHECK_EQUAL(gap.GetLinkage_evidence().front()->GetType(), CLinkage_evidence::eType_unspecified);
474  }
475  }
476 
479  if ((*it)->GetLiteral().GetSeq_data().IsGap()) {
480  const CSeq_gap& gap = (*it)->GetLiteral().GetSeq_data().GetGap();
481  BOOST_CHECK_EQUAL(gap.GetType(), CSeq_gap::eType_repeat);
482  BOOST_CHECK_EQUAL(gap.GetLinkage(), CSeq_gap::eLinkage_linked);
483  BOOST_CHECK_EQUAL(gap.GetLinkage_evidence().front()->GetType(), CLinkage_evidence::eType_paired_ends);
484  }
485  }
486 
489  if ((*it)->GetLiteral().GetSeq_data().IsGap()) {
490  const CSeq_gap& gap = (*it)->GetLiteral().GetSeq_data().GetGap();
491  BOOST_CHECK_EQUAL(gap.GetType(), CSeq_gap::eType_centromere);
492  BOOST_CHECK_EQUAL(gap.IsSetLinkage(), false);
493  BOOST_CHECK_EQUAL(gap.IsSetLinkage_evidence(), false);
494  }
495  }
496 
497 
498  entry = MakeEntryForDeltaConversion (segs);
499 
500 
502  CScope scope(*objmgr);
503  scope.AddDefaults();
504  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(*entry);
505  CBioseq_Handle bsh = seh.GetSeq();
506  edit::ConvertRawToDeltaByNs(bsh, 5, 5, 10, -1, true);
507 
508  CFeat_CI fi(bsh);
509  while (fi) {
510  BOOST_CHECK_EQUAL(fi->GetLocation().GetInt().GetFrom(), 0);
511  BOOST_CHECK_EQUAL(fi->GetLocation().GetInt().GetTo(), entry->GetSeq().GetInst().GetLength() - 1);
512  if (fi->GetData().IsCdregion()) {
513  const CSeq_interval& interval = fi->GetData().GetCdregion().GetCode_break().front()->GetLoc().GetInt();
514  BOOST_CHECK_EQUAL(interval.GetFrom(), 149);
515  BOOST_CHECK_EQUAL(interval.GetTo(), 151);
516  } else if (fi->GetData().GetSubtype() == CSeqFeatData::eSubtype_tRNA) {
517  s_IntervalsMatchGaps(fi->GetLocation(), entry->GetSeq().GetInst());
518  const CSeq_interval& interval = fi->GetData().GetRna().GetExt().GetTRNA().GetAnticodon().GetInt();
519  BOOST_CHECK_EQUAL(interval.GetFrom(), 6);
520  BOOST_CHECK_EQUAL(interval.GetTo(), 8);
521  }
522  ++fi;
523  }
524 
525 }
526 
527 
528 static bool s_FindLocalId(const CBioseq_Handle& bsh, const string& sLocalid)
529 {
530  const CBioseq_Handle::TId& ids = bsh.GetId();
531  ITERATE( CBioseq_Handle::TId, id_itr, ids ) {
532  const CSeq_id_Handle& id_handle = *id_itr;
533  CConstRef<CSeq_id> id = id_handle.GetSeqIdOrNull();
534  if ( !id ) {
535  continue;
536  }
537 
538  if ( id->IsLocal() ) {
539  // Found localid
540  const CObject_id& localid = id->GetLocal();
541  if ( localid.IsStr() ) {
542  // Are the string values equal?
543  return localid.GetStr() == sLocalid;
544  }
545  }
546  }
547 
548  return false;
549 }
550 
551 
552 static bool s_FindGi(const CBioseq_Handle& bsh, const TGi& gi)
553 {
554  const CBioseq_Handle::TId& ids = bsh.GetId();
555  ITERATE( CBioseq_Handle::TId, id_itr, ids ) {
556  const CSeq_id_Handle& id_handle = *id_itr;
557  CConstRef<CSeq_id> id = id_handle.GetSeqIdOrNull();
558  if ( !id ) {
559  continue;
560  }
561 
562  if ( id->IsGi() ) {
563  // Found gi
564  return id->GetGi() == gi;
565  }
566  }
567 
568  return false;
569 }
570 
572 {
573  cout << "Testing FUNCTION: TrimSeqData" << endl;
574 
575  TMapTestNameToTestFiles & mapOfTests = s_mapFunctionToVecOfTests["trim_seq_data"];
576 
577  BOOST_CHECK( ! mapOfTests.empty() );
578 
579  NON_CONST_ITERATE( TMapTestNameToTestFiles, test_it, mapOfTests ) {
580  const string & sTestName = (test_it->first);
581  cout << "Running TEST: " << sTestName << endl;
582 
583  TMapTestFiles & test_stage_map = (test_it->second);
584 
585  BOOST_REQUIRE( test_stage_map.size() == 2u );
586 
587  // Get the input/output files
588  const CFile & input_entry_file = test_stage_map["input_entry"];
589  const CFile & output_expected_file = test_stage_map["output_expected"];
590 
591  CRef<CSeq_entry> pInputEntry = s_ReadAndPreprocessEntry( input_entry_file.GetPath() );
592  CRef<CSeq_entry> pOutputExpectedEntry = s_ReadAndPreprocessEntry( output_expected_file.GetPath() );
593 
594  CSeq_entry_Handle entry_h = s_pScope->AddTopLevelSeqEntry(*pInputEntry);
595  CSeq_entry_Handle expected_entry_h = s_pScope->AddTopLevelSeqEntry(*pOutputExpectedEntry);
596 
597  // Find the bioseq that we will trim
598  CBioseq_CI bioseq_ci( entry_h );
599  for( ; bioseq_ci; ++bioseq_ci ) {
600  const CBioseq_Handle& bsh = *bioseq_ci;
601  if (s_FindLocalId(bsh, "DH5a-357R-3")) {
602  // Create the cuts from known vector contamination
603  // Seqid "DH5a-357R-3" has two vector locations
604  edit::TRange cut1(600, 643);
605  edit::TRange cut2(644, 646);
606  edit::TCuts cuts;
607  cuts.push_back(cut1);
608  cuts.push_back(cut2);
609 
610  // Sort the cuts
611  edit::TCuts sorted_cuts;
612  BOOST_CHECK_NO_THROW(edit::GetSortedCuts( bsh, cuts, sorted_cuts ));
613 
614  // Create a copy of inst
615  CRef<CSeq_inst> copy_inst(new CSeq_inst());
616  copy_inst->Assign(bsh.GetInst());
617 
618  // Make changes to the inst copy
619  BOOST_CHECK_NO_THROW(edit::TrimSeqData( bsh, copy_inst, sorted_cuts ));
620 
621  // Update the input seqentry with the changes
622  bsh.GetEditHandle().SetInst(*copy_inst);
623 
624  break;
625  }
626  }
627 
628  // Are the changes what we expect?
629  BOOST_CHECK( s_AreSeqEntriesEqualAndPrintIfNot(
630  *entry_h.GetCompleteSeq_entry(),
631  *expected_entry_h.GetCompleteSeq_entry()) );
632 
633  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(entry_h) );
634  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(expected_entry_h) );
635  }
636 }
637 
638 
640 {
641  cout << "Testing FUNCTION: TrimSeqGraph" << endl;
642 
643  TMapTestNameToTestFiles & mapOfTests = s_mapFunctionToVecOfTests["trim_seq_graph"];
644 
645  BOOST_CHECK( ! mapOfTests.empty() );
646 
647  NON_CONST_ITERATE( TMapTestNameToTestFiles, test_it, mapOfTests ) {
648  const string & sTestName = (test_it->first);
649  cout << "Running TEST: " << sTestName << endl;
650 
651  TMapTestFiles & test_stage_map = (test_it->second);
652 
653  BOOST_REQUIRE( test_stage_map.size() == 2u );
654 
655  // Get the input/output files
656  const CFile & input_entry_file = test_stage_map["input_entry"];
657  const CFile & output_expected_file = test_stage_map["output_expected"];
658 
659  CRef<CSeq_entry> pInputEntry = s_ReadAndPreprocessEntry( input_entry_file.GetPath() );
660  CRef<CSeq_entry> pOutputExpectedEntry = s_ReadAndPreprocessEntry( output_expected_file.GetPath() );
661 
662  CSeq_entry_Handle entry_h = s_pScope->AddTopLevelSeqEntry(*pInputEntry);
663  CSeq_entry_Handle expected_entry_h = s_pScope->AddTopLevelSeqEntry(*pOutputExpectedEntry);
664 
665  // Find the bioseq(s) that we will trim
666  CBioseq_CI bioseq_ci( entry_h );
667  for( ; bioseq_ci; ++bioseq_ci ) {
668  const CBioseq_Handle& bsh = *bioseq_ci;
669 
670  if (s_FindLocalId(bsh, "cont1.86")) {
671  // Create the cuts from known vector contamination
672  // Seqid "cont1.86" has vector
673  edit::TRange cut1(913, 948);
674  edit::TCuts cuts;
675  cuts.push_back(cut1);
676 
677  // Sort the cuts
678  edit::TCuts sorted_cuts;
679  BOOST_CHECK_NO_THROW(edit::GetSortedCuts( bsh, cuts, sorted_cuts ));
680 
681  // Iterate over bioseq graphs
683  CGraph_CI graph_ci(bsh, graph_sel);
684  for (; graph_ci; ++graph_ci) {
685  // Only certain types of graphs are supported.
686  // See C Toolkit function GetGraphsProc in api/sqnutil2.c
687  const CMappedGraph& graph = *graph_ci;
688  if ( graph.IsSetTitle() &&
689  (NStr::CompareNocase( graph.GetTitle(), "Phrap Quality" ) == 0 ||
690  NStr::CompareNocase( graph.GetTitle(), "Phred Quality" ) == 0 ||
691  NStr::CompareNocase( graph.GetTitle(), "Gap4" ) == 0) )
692  {
693  // Make a copy of the graph
694  CRef<CSeq_graph> copy_graph(new CSeq_graph());
695  copy_graph->Assign(graph.GetOriginalGraph());
696 
697  // Modify the copy of the graph
698  BOOST_CHECK_NO_THROW(edit::TrimSeqGraph(bsh, copy_graph, sorted_cuts));
699 
700  // Update the original graph with the modified copy
701  graph.GetSeq_graph_Handle().Replace(*copy_graph);
702  }
703  }
704 
705  // Create a copy of inst
706  CRef<CSeq_inst> copy_inst(new CSeq_inst());
707  copy_inst->Assign(bsh.GetInst());
708 
709  // Make changes to the inst copy
710  BOOST_CHECK_NO_THROW(edit::TrimSeqData( bsh, copy_inst, sorted_cuts ));
711 
712  // Update the input seqentry with the changes
713  bsh.GetEditHandle().SetInst(*copy_inst);
714  }
715 
716  if (s_FindLocalId(bsh, "cont1.95")) {
717  // Create the cuts from known vector contamination
718  // Seqid "cont1.95" has vector
719  edit::TRange cut1(0, 30);
720  edit::TCuts cuts;
721  cuts.push_back(cut1);
722 
723  // Sort the cuts
724  edit::TCuts sorted_cuts;
725  BOOST_CHECK_NO_THROW(edit::GetSortedCuts( bsh, cuts, sorted_cuts ));
726 
727  // Iterate over bioseq graphs
729  CGraph_CI graph_ci(bsh, graph_sel);
730  for (; graph_ci; ++graph_ci) {
731  // Only certain types of graphs are supported.
732  // See C Toolkit function GetGraphsProc in api/sqnutil2.c
733  const CMappedGraph& graph = *graph_ci;
734  if ( graph.IsSetTitle() &&
735  (NStr::CompareNocase( graph.GetTitle(), "Phrap Quality" ) == 0 ||
736  NStr::CompareNocase( graph.GetTitle(), "Phred Quality" ) == 0 ||
737  NStr::CompareNocase( graph.GetTitle(), "Gap4" ) == 0) )
738  {
739  // Make a copy of the graph
740  CRef<CSeq_graph> copy_graph(new CSeq_graph());
741  copy_graph->Assign(graph.GetOriginalGraph());
742 
743  // Modify the copy of the graph
744  BOOST_CHECK_NO_THROW(edit::TrimSeqGraph(bsh, copy_graph, sorted_cuts));
745 
746  // Update the original graph with the modified copy
747  graph.GetSeq_graph_Handle().Replace(*copy_graph);
748  }
749  }
750 
751  // Create a copy of inst
752  CRef<CSeq_inst> copy_inst(new CSeq_inst());
753  copy_inst->Assign(bsh.GetInst());
754 
755  // Make changes to the inst copy
756  BOOST_CHECK_NO_THROW(edit::TrimSeqData( bsh, copy_inst, sorted_cuts ));
757 
758  // Update the input seqentry with the changes
759  bsh.GetEditHandle().SetInst(*copy_inst);
760  }
761  }
762 
763  // Are the changes what we expect?
764  BOOST_CHECK( s_AreSeqEntriesEqualAndPrintIfNot(
765  *entry_h.GetCompleteSeq_entry(),
766  *expected_entry_h.GetCompleteSeq_entry()) );
767 
768  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(entry_h) );
769  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(expected_entry_h) );
770  }
771 }
772 
773 
775 {
776  cout << "Testing FUNCTION: TrimSeqAlign" << endl;
777 
778  TMapTestNameToTestFiles & mapOfTests = s_mapFunctionToVecOfTests["trim_seq_align"];
779 
780  BOOST_CHECK( ! mapOfTests.empty() );
781 
782  NON_CONST_ITERATE( TMapTestNameToTestFiles, test_it, mapOfTests ) {
783  const string & sTestName = (test_it->first);
784  cout << "Running TEST: " << sTestName << endl;
785 
786  TMapTestFiles & test_stage_map = (test_it->second);
787 
788  BOOST_REQUIRE( test_stage_map.size() == 2u );
789 
790  // Get the input/output files
791  const CFile & input_entry_file = test_stage_map["input_entry"];
792  const CFile & output_expected_file = test_stage_map["output_expected"];
793 
794  CRef<CSeq_entry> pInputEntry = s_ReadAndPreprocessEntry( input_entry_file.GetPath() );
795  CRef<CSeq_entry> pOutputExpectedEntry = s_ReadAndPreprocessEntry( output_expected_file.GetPath() );
796 
797  CSeq_entry_Handle entry_h = s_pScope->AddTopLevelSeqEntry(*pInputEntry);
798  CSeq_entry_Handle expected_entry_h = s_pScope->AddTopLevelSeqEntry(*pOutputExpectedEntry);
799 
800  // Find the bioseq(s) that we will trim
801  CBioseq_CI bioseq_ci( entry_h );
802  for( ; bioseq_ci; ++bioseq_ci ) {
803  const CBioseq_Handle& bsh = *bioseq_ci;
804 
805  if (s_FindLocalId(bsh, "CBS118702")) {
806  // Create the cuts from known vector contamination
807  // Seqid "CBS118702" has vector
808  edit::TRange cut1(479, 502);
809  edit::TCuts cuts;
810  cuts.push_back(cut1);
811 
812  // Sort the cuts
813  edit::TCuts sorted_cuts;
814  BOOST_CHECK_NO_THROW(edit::GetSortedCuts( bsh, cuts, sorted_cuts ));
815 
816  // Iterate over bioseq alignments
818  CAlign_CI align_ci(bsh, align_sel);
819  for (; align_ci; ++align_ci) {
820  // Only DENSEG type is supported
821  const CSeq_align& align = *align_ci;
822  if ( align.CanGetSegs() &&
824  {
825  // Make sure mandatory fields are present in the denseg
826  const CDense_seg& denseg = align.GetSegs().GetDenseg();
827  if (! (denseg.CanGetDim() && denseg.CanGetNumseg() &&
828  denseg.CanGetIds() && denseg.CanGetStarts() &&
829  denseg.CanGetLens()) )
830  {
831  continue;
832  }
833 
834  // Make a copy of the alignment
835  CRef<CSeq_align> copy_align(new CSeq_align());
836  copy_align->Assign(align_ci.GetOriginalSeq_align());
837 
838  // Modify the copy of the alignment
839  BOOST_CHECK_NO_THROW(edit::TrimSeqAlign(bsh, copy_align, sorted_cuts));
840 
841  // Update the original alignment with the modified copy
842  align_ci.GetSeq_align_Handle().Replace(*copy_align);
843  }
844  }
845 
846  // Create a copy of inst
847  CRef<CSeq_inst> copy_inst(new CSeq_inst());
848  copy_inst->Assign(bsh.GetInst());
849 
850  // Make changes to the inst copy
851  BOOST_CHECK_NO_THROW(edit::TrimSeqData( bsh, copy_inst, sorted_cuts ));
852 
853  // Update the input seqentry with the changes
854  bsh.GetEditHandle().SetInst(*copy_inst);
855  }
856 
857  if (s_FindLocalId(bsh, "CBS124120")) {
858  // Create the cuts from known vector contamination
859  // Seqid "CBS124120" has vector
860  edit::TRange cut1(479, 502);
861  edit::TCuts cuts;
862  cuts.push_back(cut1);
863 
864  // Sort the cuts
865  edit::TCuts sorted_cuts;
866  BOOST_CHECK_NO_THROW(edit::GetSortedCuts( bsh, cuts, sorted_cuts ));
867 
868  // Iterate over bioseq alignments
870  CAlign_CI align_ci(bsh, align_sel);
871  for (; align_ci; ++align_ci) {
872  // Only DENSEG type is supported
873  const CSeq_align& align = *align_ci;
874  if ( align.CanGetSegs() &&
876  {
877  // Make sure mandatory fields are present in the denseg
878  const CDense_seg& denseg = align.GetSegs().GetDenseg();
879  if (! (denseg.CanGetDim() && denseg.CanGetNumseg() &&
880  denseg.CanGetIds() && denseg.CanGetStarts() &&
881  denseg.CanGetLens()) )
882  {
883  continue;
884  }
885 
886  // Make a copy of the alignment
887  CRef<CSeq_align> copy_align(new CSeq_align());
888  copy_align->Assign(align_ci.GetOriginalSeq_align());
889 
890  // Modify the copy of the alignment
891  BOOST_CHECK_NO_THROW(edit::TrimSeqAlign(bsh, copy_align, sorted_cuts));
892 
893  // Update the original alignment with the modified copy
894  align_ci.GetSeq_align_Handle().Replace(*copy_align);
895  }
896  }
897 
898  // Create a copy of inst
899  CRef<CSeq_inst> copy_inst(new CSeq_inst());
900  copy_inst->Assign(bsh.GetInst());
901 
902  // Make changes to the inst copy
903  BOOST_CHECK_NO_THROW(edit::TrimSeqData( bsh, copy_inst, sorted_cuts ));
904 
905  // Update the input seqentry with the changes
906  bsh.GetEditHandle().SetInst(*copy_inst);
907  }
908 
909  if (s_FindLocalId(bsh, "CBS534.83")) {
910  // Create the cuts from known vector contamination
911  // Seqid "CBS534.83" has vector
912  edit::TRange cut1(479, 502);
913  edit::TCuts cuts;
914  cuts.push_back(cut1);
915 
916  // Sort the cuts
917  edit::TCuts sorted_cuts;
918  BOOST_CHECK_NO_THROW(edit::GetSortedCuts( bsh, cuts, sorted_cuts ));
919 
920  // Iterate over bioseq alignments
922  CAlign_CI align_ci(bsh, align_sel);
923  for (; align_ci; ++align_ci) {
924  // Only DENSEG type is supported
925  const CSeq_align& align = *align_ci;
926  if ( align.CanGetSegs() &&
928  {
929  // Make sure mandatory fields are present in the denseg
930  const CDense_seg& denseg = align.GetSegs().GetDenseg();
931  if (! (denseg.CanGetDim() && denseg.CanGetNumseg() &&
932  denseg.CanGetIds() && denseg.CanGetStarts() &&
933  denseg.CanGetLens()) )
934  {
935  continue;
936  }
937 
938  // Make a copy of the alignment
939  CRef<CSeq_align> copy_align(new CSeq_align());
940  copy_align->Assign(align_ci.GetOriginalSeq_align());
941 
942  // Modify the copy of the alignment
943  BOOST_CHECK_NO_THROW(edit::TrimSeqAlign(bsh, copy_align, sorted_cuts));
944 
945  // Update the original alignment with the modified copy
946  align_ci.GetSeq_align_Handle().Replace(*copy_align);
947  }
948  }
949 
950  // Create a copy of inst
951  CRef<CSeq_inst> copy_inst(new CSeq_inst());
952  copy_inst->Assign(bsh.GetInst());
953 
954  // Make changes to the inst copy
955  BOOST_CHECK_NO_THROW(edit::TrimSeqData( bsh, copy_inst, sorted_cuts ));
956 
957  // Update the input seqentry with the changes
958  bsh.GetEditHandle().SetInst(*copy_inst);
959  }
960  }
961 
962  // Are the changes what we expect?
963  BOOST_CHECK( s_AreSeqEntriesEqualAndPrintIfNot(
964  *entry_h.GetCompleteSeq_entry(),
965  *expected_entry_h.GetCompleteSeq_entry()) );
966 
967  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(entry_h) );
968  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(expected_entry_h) );
969  }
970 }
971 
972 
973 BOOST_AUTO_TEST_CASE(TrimSeqFeat_Featured_Deleted)
974 {
975  cout << "Testing FUNCTION: TrimSeqFeat - cdregion feature was completely deleted" << endl;
976 
977  TMapTestNameToTestFiles & mapOfTests = s_mapFunctionToVecOfTests["trim_seq_feat_feature_deleted"];
978 
979  BOOST_CHECK( ! mapOfTests.empty() );
980 
981  NON_CONST_ITERATE( TMapTestNameToTestFiles, test_it, mapOfTests ) {
982  const string & sTestName = (test_it->first);
983  cout << "Running TEST: " << sTestName << endl;
984 
985  TMapTestFiles & test_stage_map = (test_it->second);
986 
987  BOOST_REQUIRE( test_stage_map.size() == 2u );
988 
989  // Get the input/output files
990  const CFile & input_entry_file = test_stage_map["input_entry"];
991  const CFile & output_expected_file = test_stage_map["output_expected"];
992 
993  CRef<CSeq_entry> pInputEntry = s_ReadAndPreprocessEntry( input_entry_file.GetPath() );
994  CRef<CSeq_entry> pOutputExpectedEntry = s_ReadAndPreprocessEntry( output_expected_file.GetPath() );
995 
996  CSeq_entry_Handle entry_h = s_pScope->AddTopLevelSeqEntry(*pInputEntry);
997  CSeq_entry_Handle expected_entry_h = s_pScope->AddTopLevelSeqEntry(*pOutputExpectedEntry);
998 
999  // Find the bioseq(s) that we will trim
1000  CBioseq_CI bioseq_ci( entry_h );
1001  for( ; bioseq_ci; ++bioseq_ci ) {
1002  const CBioseq_Handle& bsh = *bioseq_ci;
1003 
1004  if (s_FindLocalId(bsh, "Seq53")) {
1005  // Create the cuts from known vector contamination
1006  // Seqid "Seq53" has vector
1007  edit::TRange cut1(0, 2205);
1008  edit::TCuts cuts;
1009  cuts.push_back(cut1);
1010 
1011  // Sort the cuts
1012  edit::TCuts sorted_cuts;
1013  BOOST_CHECK_NO_THROW(edit::GetSortedCuts( bsh, cuts, sorted_cuts ));
1014 
1015  // Iterate over bioseq features
1017  CFeat_CI feat_ci(bsh, feat_sel);
1018  for (; feat_ci; ++feat_ci) {
1019  // Make a copy of the feature
1020  CRef<CSeq_feat> copy_feat(new CSeq_feat());
1021  copy_feat->Assign(feat_ci->GetOriginalFeature());
1022 
1023  // Detect complete deletions of feature
1024  bool bFeatureDeleted = false;
1025 
1026  // Detect case where feature was not deleted but merely trimmed
1027  bool bFeatureTrimmed = false;
1028 
1029  // Modify the copy of the feature
1030  bool isPartialStart = false;
1031  bool isPartialStop = false;
1032  BOOST_CHECK_NO_THROW(edit::TrimSeqFeat(copy_feat, sorted_cuts, bFeatureDeleted, bFeatureTrimmed, isPartialStart, isPartialStop));
1033 
1034  if (bFeatureDeleted) {
1035  // Delete the feature
1036  // If the feature was a cdregion, delete the protein and
1037  // renormalize the nuc-prot set
1038  BOOST_CHECK_NO_THROW(edit::DeleteProteinAndRenormalizeNucProtSet(*feat_ci));
1039  }
1040  else
1041  if (bFeatureTrimmed) {
1042  // Further modify the copy of the feature
1043 
1044  // Not testing AdjustCdregionFrame() and RetranslateCdregion()
1045  // in this unit test. See next unit test.
1046 
1047  // Update the original feature with the modified copy
1048  CSeq_feat_EditHandle feat_eh(*feat_ci);
1049  feat_eh.Replace(*copy_feat);
1050  }
1051  }
1052 
1053  // Create a copy of inst
1054  CRef<CSeq_inst> copy_inst(new CSeq_inst());
1055  copy_inst->Assign(bsh.GetInst());
1056 
1057  // Make changes to the inst copy
1058  BOOST_CHECK_NO_THROW(edit::TrimSeqData( bsh, copy_inst, sorted_cuts ));
1059 
1060  // Update the input seqentry with the changes
1061  bsh.GetEditHandle().SetInst(*copy_inst);
1062  }
1063  }
1064 
1065  // Are the changes what we expect?
1066  BOOST_CHECK( s_AreSeqEntriesEqualAndPrintIfNot(
1067  *entry_h.GetCompleteSeq_entry(),
1068  *expected_entry_h.GetCompleteSeq_entry()) );
1069 
1070  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(entry_h) );
1071  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(expected_entry_h) );
1072  }
1073 }
1074 
1075 /*
1076 
1077 BOOST_AUTO_TEST_CASE(TrimSeqFeat_Featured_Trimmed)
1078 {
1079  cout << "Testing FUNCTION: TrimSeqFeat - cdregion feature was trimmed" << endl;
1080 
1081  TMapTestNameToTestFiles & mapOfTests = s_mapFunctionToVecOfTests["trim_seq_feat_feature_trimmed"];
1082 
1083  BOOST_CHECK( ! mapOfTests.empty() );
1084 
1085  NON_CONST_ITERATE( TMapTestNameToTestFiles, test_it, mapOfTests ) {
1086  const string & sTestName = (test_it->first);
1087  cout << "Running TEST: " << sTestName << endl;
1088 
1089  TMapTestFiles & test_stage_map = (test_it->second);
1090 
1091  BOOST_REQUIRE( test_stage_map.size() == 2u );
1092 
1093  // Get the input/output files
1094  const CFile & input_entry_file = test_stage_map["input_entry"];
1095  const CFile & output_expected_file = test_stage_map["output_expected"];
1096 
1097  CRef<CSeq_entry> pInputEntry = s_ReadAndPreprocessEntry( input_entry_file.GetPath() );
1098  CRef<CSeq_entry> pOutputExpectedEntry = s_ReadAndPreprocessEntry( output_expected_file.GetPath() );
1099 
1100  CSeq_entry_Handle entry_h = s_pScope->AddTopLevelSeqEntry(*pInputEntry);
1101  CSeq_entry_Handle expected_entry_h = s_pScope->AddTopLevelSeqEntry(*pOutputExpectedEntry);
1102 
1103  // Find the bioseq(s) that we will trim
1104  CBioseq_CI bioseq_ci( entry_h );
1105  for( ; bioseq_ci; ++bioseq_ci ) {
1106  const CBioseq_Handle& bsh = *bioseq_ci;
1107 
1108  if (s_FindLocalId(bsh, "BankIt1717834")) {
1109  // Create the cuts from known vector contamination
1110  // Seqid "BankIt1717834" has vector
1111  edit::TRange cut1(0, 653);
1112  edit::TCuts cuts;
1113  cuts.push_back(cut1);
1114 
1115  // Sort the cuts
1116  edit::TCuts sorted_cuts;
1117  BOOST_CHECK_NO_THROW(edit::GetSortedCuts( bsh, cuts, sorted_cuts ));
1118 
1119  // Create a copy of inst
1120  CRef<CSeq_inst> copy_inst(new CSeq_inst());
1121  copy_inst->Assign(bsh.GetInst());
1122 
1123  // Make changes to the inst copy
1124  BOOST_CHECK_NO_THROW(edit::TrimSeqData( bsh, copy_inst, sorted_cuts ));
1125 
1126  // Iterate over bioseq features
1127  SAnnotSelector feat_sel(CSeq_annot::C_Data::e_Ftable);
1128  CFeat_CI feat_ci(bsh, feat_sel);
1129  for (; feat_ci; ++feat_ci) {
1130  // Make a copy of the feature
1131  CRef<CSeq_feat> copy_feat(new CSeq_feat());
1132  copy_feat->Assign(feat_ci->GetOriginalFeature());
1133 
1134  // Detect complete deletions of feature
1135  bool bFeatureDeleted = false;
1136 
1137  // Detect case where feature was not deleted but merely trimmed
1138  bool bFeatureTrimmed = false;
1139 
1140  // Modify the copy of the feature
1141  bool isPartialStart = false;
1142  bool isPartialStop = false;
1143  BOOST_CHECK_NO_THROW(edit::TrimSeqFeat(copy_feat, sorted_cuts, bFeatureDeleted, bFeatureTrimmed, isPartialStart, isPartialStop));
1144 
1145  if (bFeatureDeleted) {
1146  // Not testing this branch in this unit test.
1147 
1148  // Delete the feature
1149  // If the feature was a cdregion, delete the protein and
1150  // renormalize the nuc-prot set
1151  //DeleteProteinAndRenormalizeNucProtSet(*feat_ci);
1152  }
1153  else
1154  if (bFeatureTrimmed) {
1155  // Further modify the copy of the feature
1156 
1157  // If this feat is a Cdregion, then RETRANSLATE the protein
1158  // sequence AND adjust any protein feature
1159  if ( copy_feat->IsSetData() &&
1160  copy_feat->GetData().Which() == CSeqFeatData::e_Cdregion &&
1161  copy_feat->IsSetProduct() )
1162  {
1163  // Get length of nuc sequence before trimming
1164  TSeqPos original_nuc_len = 0;
1165  if (bsh.CanGetInst() && bsh.GetInst().CanGetLength()) {
1166  original_nuc_len = bsh.GetInst().GetLength();
1167  }
1168  BOOST_CHECK_NO_THROW(edit::AdjustCdregionFrame(original_nuc_len, copy_feat, sorted_cuts));
1169 
1170  // Retranslate the coding region using the new nuc sequence
1171  BOOST_CHECK_NO_THROW(edit::RetranslateCdregion(bsh, isPartialStart, isPartialStop, copy_inst, copy_feat, sorted_cuts));
1172  }
1173 
1174  // Update the original feature with the modified copy
1175  CSeq_feat_EditHandle feat_eh(*feat_ci);
1176  feat_eh.Replace(*copy_feat);
1177  }
1178  }
1179 
1180  // Update the input seqentry with the changes
1181  bsh.GetEditHandle().SetInst(*copy_inst);
1182  }
1183  }
1184 
1185  // Are the changes what we expect?
1186  BOOST_CHECK( s_AreSeqEntriesEqualAndPrintIfNot(
1187  *entry_h.GetCompleteSeq_entry(),
1188  *expected_entry_h.GetCompleteSeq_entry()) );
1189 
1190  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(entry_h) );
1191  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(expected_entry_h) );
1192  }
1193 }
1194 */
1195 
1196 
1198 {
1199  cout << "Testing FUNCTION: TrimSequenceAndAnnotation" << endl;
1200 
1201  TMapTestNameToTestFiles & mapOfTests = s_mapFunctionToVecOfTests["trim_sequence_and_annotation"];
1202 
1203  BOOST_CHECK( ! mapOfTests.empty() );
1204 
1205  NON_CONST_ITERATE( TMapTestNameToTestFiles, test_it, mapOfTests ) {
1206  const string & sTestName = (test_it->first);
1207  cout << "Running TEST: " << sTestName << endl;
1208 
1209  TMapTestFiles & test_stage_map = (test_it->second);
1210 
1211  BOOST_REQUIRE( test_stage_map.size() == 2u );
1212 
1213  // Get the input/output files
1214  const CFile & input_entry_file = test_stage_map["input_entry"];
1215  const CFile & output_expected_file = test_stage_map["output_expected"];
1216 
1217  CRef<CSeq_entry> pInputEntry = s_ReadAndPreprocessEntry( input_entry_file.GetPath() );
1218  CRef<CSeq_entry> pOutputExpectedEntry = s_ReadAndPreprocessEntry( output_expected_file.GetPath() );
1219 
1220  CSeq_entry_Handle entry_h = s_pScope->AddTopLevelSeqEntry(*pInputEntry);
1221  CSeq_entry_Handle expected_entry_h = s_pScope->AddTopLevelSeqEntry(*pOutputExpectedEntry);
1222 
1223  // Find the bioseq(s) that we will trim
1224  CBioseq_CI bioseq_ci( entry_h );
1225  for( ; bioseq_ci; ++bioseq_ci ) {
1226  const CBioseq_Handle& bsh = *bioseq_ci;
1227 
1228  // Seq4 is found in test1 input
1229  if (s_FindLocalId(bsh, "Seq4")) {
1230  // Create the cuts from known vector contamination
1231  // Seqid "Seq4" has vector
1232  edit::TRange cut1(376, 596);
1233  edit::TRange cut2(0, 92);
1234  edit::TRange cut3(93, 108);
1235  edit::TRange cut4(109, 188);
1236  edit::TRange cut5(662, 671);
1237  edit::TRange cut6(672, 690);
1238  edit::TCuts cuts;
1239  cuts.push_back(cut1);
1240  cuts.push_back(cut2);
1241  cuts.push_back(cut3);
1242  cuts.push_back(cut4);
1243  cuts.push_back(cut5);
1244  cuts.push_back(cut6);
1245 
1246  BOOST_CHECK_NO_THROW(edit::TrimSequenceAndAnnotation( bsh, cuts, edit::eTrimToClosestEnd ));
1247  }
1248 
1249  // trpB is found in test2 input
1250  if (s_FindLocalId(bsh, "trpB")) {
1251  // Create the cuts from known vector contamination
1252  // Seqid "trpB" has vector
1253  edit::TRange cut1(0, 2);
1254  edit::TRange cut2(3, 68);
1255  edit::TRange cut3(69, 86);
1256  edit::TRange cut4(87, 119);
1257  edit::TCuts cuts;
1258  cuts.push_back(cut1);
1259  cuts.push_back(cut2);
1260  cuts.push_back(cut3);
1261  cuts.push_back(cut4);
1262 
1263  BOOST_CHECK_NO_THROW(edit::TrimSequenceAndAnnotation( bsh, cuts, edit::eTrimToClosestEnd ));
1264  }
1265 
1266  // Seq1 is found in test3 input
1267  if (s_FindLocalId(bsh, "Seq1")) {
1268  // Create the cuts from known vector contamination
1269  // Seqid "Seq1" has vector
1270  edit::TRange cut1(0, 141);
1271  edit::TRange cut2(2080, 3035);
1272  edit::TRange cut3(285, 325);
1273  edit::TRange cut4(326, 359);
1274  edit::TRange cut5(360, 403);
1275  edit::TCuts cuts;
1276  cuts.push_back(cut1);
1277  cuts.push_back(cut2);
1278  cuts.push_back(cut3);
1279  cuts.push_back(cut4);
1280  cuts.push_back(cut5);
1281 
1282  BOOST_CHECK_NO_THROW(edit::TrimSequenceAndAnnotation( bsh, cuts, edit::eTrimToClosestEnd ));
1283  }
1284 
1285  // Contig1 is found in test4 input
1286  if (s_FindLocalId(bsh, "Contig1")) {
1287  // Create the cuts from known vector contamination
1288  // Seqid "Seq1" has vector
1289  edit::TRange cut1(0, 11);
1290  edit::TCuts cuts;
1291  cuts.push_back(cut1);
1292 
1293  BOOST_CHECK_NO_THROW(edit::TrimSequenceAndAnnotation(bsh, cuts, edit::eTrimToClosestEnd));
1294  }
1295 
1296  // ctg7180000000092 is found in test5 input
1297  if (s_FindLocalId(bsh, "ctg7180000000092")) {
1298  // Create the cuts from known vector contamination
1299  // Seqid "Seq1" has vector
1300  edit::TRange cut1(19148, 19270);
1301  edit::TCuts cuts;
1302  cuts.push_back(cut1);
1303 
1304  BOOST_CHECK_NO_THROW(edit::TrimSequenceAndAnnotation(bsh, cuts, edit::eTrimToClosestEnd));
1305  }
1306 
1307  // Seq1 is found in test6 input
1308  if (s_FindLocalId(bsh, "scaffold281")) {
1309  // Cut 1st data element of sequence but leave gap
1310  // function should remove gap as well
1311  edit::TRange cut1(0, 275);
1312  edit::TCuts cuts;
1313  cuts.push_back(cut1);
1314 
1315  BOOST_CHECK_NO_THROW(edit::TrimSequenceAndAnnotation( bsh, cuts, edit::eTrimToClosestEnd ));
1316  }
1317 
1318  // gi 1530793995 is found in test7 input
1319  if (s_FindGi(bsh, GI_CONST(1530793995))) {
1320  // Cut 1st data element of sequence but leave gap
1321  // function should remove gap as well
1322  edit::TRange cut1(0, 999);
1323  edit::TCuts cuts;
1324  cuts.push_back(cut1);
1325 
1326  BOOST_CHECK_NO_THROW(edit::TrimSequenceAndAnnotation( bsh, cuts, edit::eTrimToClosestEnd ));
1327  }
1328 
1329  // gi 1530812376 is found in test8 input
1330  if (s_FindGi(bsh, GI_CONST(1530812376))) {
1331  // Cut 1st data element of sequence but leave gap
1332  // function should remove gap as well
1333  edit::TRange cut1(0, 999);
1334  edit::TCuts cuts;
1335  cuts.push_back(cut1);
1336 
1337  BOOST_CHECK_NO_THROW(edit::TrimSequenceAndAnnotation( bsh, cuts, edit::eTrimToClosestEnd ));
1338  }
1339 
1340  // gi 75914080 is found in test9 input
1341  if (s_FindGi(bsh, GI_CONST(75914080))) {
1342  // Cut 1st data element of sequence but leave gap
1343  // function should remove gap as well
1344  edit::TRange cut1(0, 521);
1345  edit::TCuts cuts;
1346  cuts.push_back(cut1);
1347 
1348  BOOST_CHECK_NO_THROW(edit::TrimSequenceAndAnnotation( bsh, cuts, edit::eTrimToClosestEnd ));
1349  }
1350  }
1351 
1352  // Are the changes what we expect?
1353  BOOST_CHECK( s_AreSeqEntriesEqualAndPrintIfNot(
1354  *entry_h.GetCompleteSeq_entry(),
1355  *expected_entry_h.GetCompleteSeq_entry()) );
1356  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(entry_h) );
1357  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(expected_entry_h) );
1358  }
1359 }
1360 
1361 
1362 BOOST_AUTO_TEST_CASE(TrimSequenceAndAnnotation_InvalidInput)
1363 {
1364  cout << "Testing FUNCTION: TrimSequenceAndAnnotation with invalid input" << endl;
1365 
1366  TMapTestNameToTestFiles & mapOfTests = s_mapFunctionToVecOfTests["trim_sequence_and_annotation_invalid_input"];
1367 
1368  BOOST_CHECK( ! mapOfTests.empty() );
1369 
1370  NON_CONST_ITERATE( TMapTestNameToTestFiles, test_it, mapOfTests ) {
1371  const string & sTestName = (test_it->first);
1372  cout << "Running TEST: " << sTestName << endl;
1373 
1374  TMapTestFiles & test_stage_map = (test_it->second);
1375 
1376  BOOST_REQUIRE( test_stage_map.size() == 1u );
1377 
1378  // Need input file only
1379  const CFile & input_entry_file = test_stage_map["input_entry"];
1380 
1381  CRef<CSeq_entry> pInputEntry = s_ReadAndPreprocessEntry( input_entry_file.GetPath() );
1382 
1383  CSeq_entry_Handle entry_h = s_pScope->AddTopLevelSeqEntry(*pInputEntry);
1384 
1385  // Find the bioseq(s) of interest that we will check
1386  CBioseq_CI bioseq_ci( entry_h );
1387  for( ; bioseq_ci; ++bioseq_ci ) {
1388  const CBioseq_Handle& bsh = *bioseq_ci;
1389 
1390  // Deliberately use invalid cut locations for Seq4 which has length 691
1391  if (s_FindLocalId(bsh, "Seq4")) {
1392  // Invalid "from" value of -1
1393  edit::TCuts cuts;
1394  cuts.push_back(edit::TRange(-1, 100));
1395  BOOST_CHECK_THROW(edit::TrimSequenceAndAnnotation( bsh, cuts, edit::eTrimToClosestEnd ), edit::CEditException);
1396 
1397  // Invalid "to" value of 691
1398  cuts.clear();
1399  cuts.push_back(edit::TRange(100, 691));
1400  BOOST_CHECK_THROW(edit::TrimSequenceAndAnnotation( bsh, cuts, edit::eTrimToClosestEnd ), edit::CEditException);
1401  }
1402 
1403  // Deliberately specify cuts for a protein sequence!
1404  if (s_FindLocalId(bsh, "Seq4_prot_4")) {
1405  // Not a nuc bioseq!
1406  edit::TCuts cuts;
1407  cuts.push_back(edit::TRange(0, 10));
1408  BOOST_CHECK_THROW(edit::TrimSequenceAndAnnotation( bsh, cuts, edit::eTrimToClosestEnd ), edit::CEditException);
1409  }
1410  }
1411 
1412  BOOST_CHECK_NO_THROW( s_pScope->RemoveTopLevelSeqEntry(entry_h) );
1413  }
1414 }
1415 
1416 BOOST_AUTO_TEST_CASE(Test_Unverified)
1417 {
1419 
1420  BOOST_CHECK_EQUAL(edit::IsUnverifiedOrganism(entry->GetSeq()), false);
1421  BOOST_CHECK_EQUAL(edit::IsUnverifiedFeature(entry->GetSeq()), false);
1422 
1423  CRef<CSeqdesc> new_unv(new CSeqdesc());
1424  new_unv->SetUser();
1425  entry->SetSeq().SetDescr().Set().push_back(new_unv);
1426  new_unv->SetUser().AddUnverifiedOrganism();
1427  BOOST_CHECK_EQUAL(edit::IsUnverifiedOrganism(entry->GetSeq()), true);
1428  BOOST_CHECK_EQUAL(edit::IsUnverifiedFeature(entry->GetSeq()), false);
1429  BOOST_CHECK_EQUAL(edit::IsUnverifiedMisassembled(entry->GetSeq()), false);
1430  CRef<CSeqdesc> unv = edit::FindUnverified(entry->GetSeq());
1431  BOOST_CHECK_EQUAL(unv.GetPointer(), new_unv.GetPointer());
1432 
1433  new_unv->SetUser().AddUnverifiedFeature();
1434  BOOST_CHECK_EQUAL(edit::IsUnverifiedOrganism(entry->GetSeq()), true);
1435  BOOST_CHECK_EQUAL(edit::IsUnverifiedFeature(entry->GetSeq()), true);
1436  BOOST_CHECK_EQUAL(edit::IsUnverifiedMisassembled(entry->GetSeq()), false);
1437  unv = edit::FindUnverified(entry->GetSeq());
1438  BOOST_CHECK_EQUAL(unv.GetPointer(), new_unv.GetPointer());
1439 
1440  new_unv->SetUser().RemoveUnverifiedOrganism();
1441  BOOST_CHECK_EQUAL(edit::IsUnverifiedOrganism(entry->GetSeq()), false);
1442  BOOST_CHECK_EQUAL(edit::IsUnverifiedFeature(entry->GetSeq()), true);
1443  BOOST_CHECK_EQUAL(edit::IsUnverifiedMisassembled(entry->GetSeq()), false);
1444  unv = edit::FindUnverified(entry->GetSeq());
1445  BOOST_CHECK_EQUAL(unv.GetPointer(), new_unv.GetPointer());
1446 
1447  new_unv->SetUser().RemoveUnverifiedFeature();
1448  BOOST_CHECK_EQUAL(edit::IsUnverifiedOrganism(entry->GetSeq()), false);
1449  BOOST_CHECK_EQUAL(edit::IsUnverifiedFeature(entry->GetSeq()), false);
1450  BOOST_CHECK_EQUAL(edit::IsUnverifiedMisassembled(entry->GetSeq()), false);
1451  unv = edit::FindUnverified(entry->GetSeq());
1452  BOOST_CHECK_EQUAL(unv.GetPointer(), new_unv.GetPointer());
1453 
1454  new_unv->SetUser().AddUnverifiedMisassembled();
1455  BOOST_CHECK_EQUAL(edit::IsUnverifiedOrganism(entry->GetSeq()), false);
1456  BOOST_CHECK_EQUAL(edit::IsUnverifiedFeature(entry->GetSeq()), false);
1457  BOOST_CHECK_EQUAL(edit::IsUnverifiedMisassembled(entry->GetSeq()), true);
1458  unv = edit::FindUnverified(entry->GetSeq());
1459  BOOST_CHECK_EQUAL(unv.GetPointer(), new_unv.GetPointer());
1460 
1461  new_unv->SetUser().AddUnverifiedContaminant();
1462  BOOST_CHECK_EQUAL(edit::IsUnverifiedOrganism(entry->GetSeq()), false);
1463  BOOST_CHECK_EQUAL(edit::IsUnverifiedFeature(entry->GetSeq()), false);
1464  BOOST_CHECK_EQUAL(edit::IsUnverifiedMisassembled(entry->GetSeq()), true);
1465  BOOST_CHECK_EQUAL(edit::IsUnverifiedContaminant(entry->GetSeq()), true);
1466  unv = edit::FindUnverified(entry->GetSeq());
1467  BOOST_CHECK_EQUAL(unv.GetPointer(), new_unv.GetPointer());
1468 
1469 }
1470 
1471 
1472 BOOST_AUTO_TEST_CASE(Test_SeqEntryFromSeqSubmit)
1473 {
1474  CRef<CSeq_submit> submit(new CSeq_submit());
1476  submit->SetData().SetEntrys().push_back(wrapped);
1477  submit->SetSub().SetCit().SetAuthors().SetNames().SetStd().push_back(unit_test_util::BuildGoodAuthor());
1478 
1480  BOOST_CHECK_EQUAL(entry->IsSeq(), true);
1481  BOOST_CHECK_EQUAL(entry->GetSeq().GetDescr().Get().size(), wrapped->GetSeq().GetDescr().Get().size() + 1);
1482 
1483  // try again with cit-sub pub already there
1484  submit->SetData().SetEntrys().front()->Assign(*entry);
1485  entry = edit::SeqEntryFromSeqSubmit(*submit);
1486  BOOST_CHECK_EQUAL(entry->GetSeq().GetDescr().Get().size(), submit->GetData().GetEntrys().front()->GetSeq().GetDescr().Get().size());
1487 
1489  submit->SetData().SetEntrys().front()->Assign(*wrapped);
1490 
1491  entry = edit::SeqEntryFromSeqSubmit(*submit);
1492  BOOST_CHECK_EQUAL(entry->IsSet(), true);
1493  BOOST_CHECK_EQUAL(entry->GetSet().GetDescr().Get().size(), wrapped->GetSet().GetDescr().Get().size() + 1);
1494  // try again with cit-sub pub already there
1495  submit->SetData().SetEntrys().front()->Assign(*entry);
1496  entry = edit::SeqEntryFromSeqSubmit(*submit);
1497  BOOST_CHECK_EQUAL(entry->GetSet().GetDescr().Get().size(), submit->GetData().GetEntrys().front()->GetSet().GetDescr().Get().size());
1498 
1499 }
1500 
1501 
1502 BOOST_AUTO_TEST_CASE(Test_GetTargetedLocusNameConsensus)
1503 {
1504  BOOST_CHECK_EQUAL(edit::GetTargetedLocusNameConsensus(kEmptyStr, "16S ribosomal RNA"), "16S ribosomal RNA");
1505  BOOST_CHECK_EQUAL(edit::GetTargetedLocusNameConsensus("16S ribosomal RNA", kEmptyStr), "16S ribosomal RNA");
1506  BOOST_CHECK_EQUAL(edit::GetTargetedLocusNameConsensus("16S ribosomal RNA conserved region", "16S ribosomal RNA"), "16S ribosomal RNA");
1507  BOOST_CHECK_EQUAL(edit::GetTargetedLocusNameConsensus("16S ribosomal RNA", "16S ribosomal RNA conserved region"), "16S ribosomal RNA");
1508  BOOST_CHECK_EQUAL(edit::GetTargetedLocusNameConsensus("conserved region 16S ribosomal RNA", "16S ribosomal RNA"), "16S ribosomal RNA");
1509  BOOST_CHECK_EQUAL(edit::GetTargetedLocusNameConsensus("16S ribosomal RNA", "conserved region 16S ribosomal RNA"), "16S ribosomal RNA");
1510  BOOST_CHECK_EQUAL(edit::GetTargetedLocusNameConsensus("abc 16S ribosomal RNA 456", "16S ribosomal RNA"), "16S ribosomal RNA");
1511  BOOST_CHECK_EQUAL(edit::GetTargetedLocusNameConsensus("16S ribosomal RNA", "abc 16S ribosomal RNA 456"), "16S ribosomal RNA");
1512  BOOST_CHECK_EQUAL(edit::GetTargetedLocusNameConsensus("something 16S ribosomal RNA else", "abc 16S ribosomal RNA 456"), "16S ribosomal RNA");
1513  BOOST_CHECK_EQUAL(edit::GetTargetedLocusNameConsensus("this is not a match", "something else entirely"), kEmptyStr);
1514  BOOST_CHECK_EQUAL(edit::GetTargetedLocusNameConsensus("ultra-conserved element locus SR-01", "ultra-conserved element locus SR-02"), "ultra-conserved element locus");
1515 
1516 }
1517 
1518 
1520 {
1522  CRef<CScope> scope(new CScope(*object_manager));
1523  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1524  CBioseq_CI bi(seh, CSeq_inst::eMol_na);
1525  BOOST_CHECK_EQUAL(edit::GenerateTargetedLocusName(*bi), expected);
1526 }
1527 
1528 
1529 BOOST_AUTO_TEST_CASE(Test_GetTargetedLocusName)
1530 {
1533  BOOST_CHECK_EQUAL(edit::GetTargetedLocusName(*prot), "fake protein name");
1534  CheckTargetedLocusEntry(entry, "fake protein name");
1535 
1538  gene->SetData().SetGene().SetLocus("XYZ");
1539  BOOST_CHECK_EQUAL(edit::GetTargetedLocusName(*gene), "XYZ");
1542  CheckTargetedLocusEntry(entry, "XYZ");
1543 
1545  rna->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
1546  string remainder;
1547  rna->SetData().SetRna().SetRnaProductName("18S ribosomal RNA", remainder);
1548  BOOST_CHECK_EQUAL(edit::GetTargetedLocusName(*rna), "18S ribosomal RNA");
1549 
1551  imp->SetData().SetImp().SetKey("misc_feature");
1552  imp->SetComment("uce");
1553  BOOST_CHECK_EQUAL(edit::GetTargetedLocusName(*imp), "uce");
1554 }
1555 
1556 
1557 BOOST_AUTO_TEST_CASE(Test_SQD_4679)
1558 {
1561  imp->SetData().SetImp().SetKey("mobile_element");
1562  imp->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("mobile_element_type", "retrotransposon:LTR retrotransposon")));
1563  CheckTargetedLocusEntry(entry, "LTR retrotransposon");
1564  imp->SetQual().back()->SetVal("transposon");
1565  CheckTargetedLocusEntry(entry, "transposon");
1566 }
1567 
1568 
1569 BOOST_AUTO_TEST_CASE(Test_BioseqSetDescriptorPropagateUp)
1570 {
1572  CRef<CSeqdesc> comment(new CSeqdesc());
1573  comment->SetComment("A comment");
1574  entry->SetSet().SetDescr().Set().push_back(comment);
1575 
1577  CRef<CScope> scope(new CScope(*object_manager));
1578  CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1579 
1580  // before, set should have two descriptors, the pop-set title and the comment
1581  BOOST_CHECK_EQUAL(seh.GetDescr().Get().size(), 2);
1582  // components should each have source, molinfo, two pubs
1583  ITERATE(CBioseq_set::TSeq_set, it, entry->GetSet().GetSeq_set()) {
1584  BOOST_CHECK_EQUAL((*it)->GetSeq().GetDescr().Get().size(), 4);
1585  }
1586 
1587  CSeqdesc_CI::TDescChoices desc_choices_to_erase;
1588  edit::BioseqSetDescriptorPropagateDown(seh.GetSet(), desc_choices_to_erase);
1589 
1590  BOOST_CHECK_EQUAL(seh.IsSetDescr(), false);
1591  // components should each have source, molinfo, two pubs, comment, title
1592  ITERATE(CBioseq_set::TSeq_set, it, entry->GetSet().GetSeq_set()) {
1593  BOOST_CHECK_EQUAL((*it)->GetSeq().GetDescr().Get().size(), 6);
1594  }
1595 
1596 
1598  // should now have comment, plus the two pubs that were on each member
1599  BOOST_CHECK_EQUAL(seh.GetDescr().Get().size(), 3);
1600  // components should have source, molinfo, title
1601  ITERATE(CBioseq_set::TSeq_set, it, entry->GetSet().GetSeq_set()) {
1602  BOOST_CHECK_EQUAL((*it)->GetSeq().GetDescr().Get().size(), 3);
1603  }
1604 }
1605 
1606 
CLocalRange< TOffset > TRange
define for the fundamental building block of sequence ranges
Definition: base.hpp:115
CAlign_CI –.
Definition: align_ci.hpp:63
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
CCode_break –.
Definition: Code_break.hpp:66
CDelta_seq –.
Definition: Delta_seq.hpp:66
CDirEntry –.
Definition: ncbifile.hpp:262
CDir –.
Definition: ncbifile.hpp:1695
CFeat_CI –.
Definition: feat_ci.hpp:64
CFile –.
Definition: ncbifile.hpp:1604
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
CGraph_CI –.
Definition: graph_ci.hpp:234
CMappedGraph –.
Definition: graph_ci.hpp:61
CScope –.
Definition: scope.hpp:92
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
void ReassignConflictingIds(void)
Definition: Seq_entry.cpp:548
CSeq_feat_EditHandle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
void RemoveUnverifiedFeature()
void AddUnverifiedOrganism()
void AddUnverifiedContaminant()
void RemoveUnverifiedOrganism()
void AddUnverifiedMisassembled()
EObjectType GetObjectType() const
void AddUnverifiedFeature()
Definition: map.hpp:338
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
static const char * expected[]
Definition: bcp.c:42
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define GI_CONST(gi)
Definition: ncbimisc.hpp:1087
void FindFilesInDir(const CDir &dir, const vector< string > &masks, const vector< string > &masks_subdir, TFindFunc &find_func, TFindFiles flags=fFF_Default)
Find files in the specified directory.
Definition: ncbifile.hpp:3022
bool IsFile(EFollowLinks follow=eFollowLinks) const
Check whether a directory entry is a file.
Definition: ncbifile.hpp:3940
string GetName(void) const
Get the base entry name with extension (if any).
Definition: ncbifile.hpp:3916
const string & GetPath(void) const
Get entry path.
Definition: ncbifile.hpp:3910
@ fFF_Recursive
descend into sub-dirs
Definition: ncbifile.hpp:3012
@ fFF_Default
default behavior
Definition: ncbifile.hpp:3014
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
CConstRef< CSeq_id > GetSeqIdOrNull(void) const
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
TRange GetRange(void) const
Get the range.
Definition: Seq_loc.hpp:1042
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
void RemoveTopLevelSeqEntry(const CTSE_Handle &entry)
Revoke TSE previously added using AddTopLevelSeqEntry() or AddBioseq().
Definition: scope.cpp:376
vector< CSeq_id_Handle > TId
const TDescr & GetDescr(void) const
void Replace(const CSeq_align &new_obj) const
Replace the Seq-align with new Seq-align object.
TSet GetSet(void) const
void SetInst(TInst &v) const
TSeq GetSeq(void) const
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
bool IsSetDescr(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
const TId & GetId(void) const
void Replace(const CSeq_graph &new_obj) const
Replace the Seq-graph with new Seq-graph object.
const TInst & GetInst(void) const
const string & GetTitle(void) const
Definition: graph_ci.hpp:112
bool IsSetTitle(void) const
Definition: graph_ci.hpp:108
const CSeq_align & GetOriginalSeq_align(void) const
Get original alignment.
Definition: align_ci.cpp:225
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
const CSeq_graph & GetOriginalGraph(void) const
Get original graph with unmapped location/product.
Definition: graph_ci.hpp:70
vector< CSeqdesc::E_Choice > TDescChoices
Definition: seqdesc_ci.hpp:67
CSeq_align_Handle GetSeq_align_Handle(void) const
Get original alignment handle.
Definition: align_ci.cpp:233
CSeq_graph_Handle GetSeq_graph_Handle(void) const
Get original graph handle.
Definition: graph_ci.cpp:93
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TData & GetData(void) const
Get the Data member data.
bool CanGetDim(void) const
Check if it is safe to call GetDim method.
Definition: Dense_seg_.hpp:402
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
bool CanGetNumseg(void) const
Check if it is safe to call GetNumseg method.
Definition: Dense_seg_.hpp:452
bool CanGetIds(void) const
Check if it is safe to call GetIds method.
Definition: Dense_seg_.hpp:499
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
Definition: Seq_align_.hpp:915
bool CanGetStarts(void) const
Check if it is safe to call GetStarts method.
Definition: Dense_seg_.hpp:524
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool CanGetLens(void) const
Check if it is safe to call GetLens method.
Definition: Dense_seg_.hpp:549
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
void SetComment(const TComment &value)
Assign a value to Comment data member.
Definition: Seq_feat_.hpp:1058
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
void SetLoc(TLoc &value)
Assign a value to Loc data member.
TQual & SetQual(void)
Assign a value to Qual data member.
Definition: Seq_feat_.hpp:1153
void SetTo(TTo value)
Assign a value to To data member.
void SetId(TId &value)
Assign a value to Id data member.
TFrom GetFrom(void) const
Get the From member data.
bool IsGeneral(void) const
Check if variant General is selected.
Definition: Seq_id_.hpp:877
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
void SetFrom(TFrom value)
Assign a value to From data member.
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Seq_id_.cpp:193
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
TTo GetTo(void) const
Get the To member data.
@ e_General
for other databases
Definition: Seq_id_.hpp:105
@ e_Local
local use
Definition: Seq_id_.hpp:95
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
const TDescr & GetDescr(void) const
Get the Descr member data.
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
void SetClass(TClass value)
Assign a value to Class data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_phy_set
phylogenetic study
bool IsSetLinkage(void) const
Check if a value has been assigned to Linkage data member.
Definition: Seq_gap_.hpp:310
TRepr GetRepr(void) const
Get the Repr member data.
Definition: Seq_inst_.hpp:565
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
TLinkage GetLinkage(void) const
Get the Linkage member data.
Definition: Seq_gap_.hpp:329
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
void SetExt(TExt &value)
Assign a value to Ext data member.
Definition: Seq_inst_.cpp:147
TType GetType(void) const
Get the Type member data.
Definition: Seq_gap_.hpp:282
const TLiteral & GetLiteral(void) const
Get the variant data.
Definition: Delta_seq_.cpp:124
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:372
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
TLength GetLength(void) const
Get the Length member data.
const TLinkage_evidence & GetLinkage_evidence(void) const
Get the Linkage_evidence member data.
Definition: Seq_gap_.hpp:369
TComment & SetComment(void)
Select the variant.
Definition: Seqdesc_.hpp:1065
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
const TExt & GetExt(void) const
Get the Ext member data.
Definition: Seq_inst_.hpp:838
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
Definition: Bioseq_.hpp:303
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
const TDelta & GetDelta(void) const
Get the variant data.
Definition: Seq_ext_.cpp:180
TUser & SetUser(void)
Select the variant.
Definition: Seqdesc_.cpp:390
const Tdata & Get(void) const
Get the member data.
Definition: Delta_ext_.hpp:164
bool IsLiteral(void) const
Check if variant Literal is selected.
Definition: Delta_seq_.hpp:263
bool IsSetSeq_data(void) const
may have the data Check if a value has been assigned to Seq_data data member.
void SetLength(TLength value)
Assign a value to Length data member.
Definition: Seq_inst_.hpp:668
list< CRef< CDelta_seq > > Tdata
Definition: Delta_ext_.hpp:89
bool IsGap(void) const
Check if variant Gap is selected.
Definition: Seq_data_.hpp:704
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
Definition: Seq_inst_.cpp:130
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Bioseq_.hpp:315
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
bool IsSetLinkage_evidence(void) const
Check if a value has been assigned to Linkage_evidence data member.
Definition: Seq_gap_.hpp:357
@ eRepr_delta
sequence made by changes (delta) to others
Definition: Seq_inst_.hpp:100
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
void SetSub(TSub &value)
Assign a value to Sub data member.
const TEntrys & GetEntrys(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
FILE * file
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
Portable class to work with a spawned process via pipes.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
The Object manager core.
#define fi
void TrimSeqAlign(CBioseq_Handle bsh, CRef< CSeq_align > align, const TCuts &sorted_cuts)
Trim Seq-align annotation.
void SetLinkageType(CSeq_ext &ext, CSeq_gap::TType linkage_type)
SetLinkageType A function to set the linkage_type for gaps in a delta sequence.
void TrimSequenceAndAnnotation(CBioseq_Handle bsh, const TCuts &cuts, EInternalTrimType internal_cut_conversion=eTrimToClosestEnd)
Trim sequence data and all associated annotation.
void TrimSeqData(CBioseq_Handle bsh, CRef< CSeq_inst > inst, const TCuts &sorted_cuts)
Trim sequence data.
TLocAdjustmentVector NormalizeUnknownLengthGaps(CSeq_inst &inst, TSeqPos unknown_length=100)
NormalizeUnknownLengthGaps A function to adjust the length of unknown-length gaps to a specific lengt...
CRef< CSeqdesc > FindUnverified(const CBioseq &seq)
void TrimSeqGraph(CBioseq_Handle bsh, CRef< CSeq_graph > graph, const TCuts &sorted_cuts)
Trim Seq-graph annotation.
bool IsUnverifiedMisassembled(const CBioseq &seq)
string GetTargetedLocusName(const CSeq_feat &feat)
void AddLocalIdUserObjects(CSeq_entry &entry)
Creates a User-object descriptor on every sequence that has a local ID Contains the original local ID...
CRef< CSeq_entry > SeqEntryFromSeqSubmit(const CSeq_submit &submit)
Create a Seq-entry from a Seq-submit.
void BioseqSetDescriptorPropagateDown(const CBioseq_set_Handle &bioseq_set_h, const vector< CSeqdesc::E_Choice > &choices_to_delete=vector< CSeqdesc::E_Choice >())
Moves descriptors down to children of the given bioseq-set.
bool HasRepairedIDs(const CSeq_entry &entry)
Detects whether colliding IDs were fixed by comparing sequence IDs to the contents of the OriginalID ...
void RemoveUserObjectType(CSeq_entry &entry, CUser_object::EObjectType type)
Removes User-object descriptors of a certain type from the seq-entry.
bool IsUnverifiedOrganism(const CBioseq &seq)
void SetLinkageTypeLinkedRepeat(CSeq_ext &ext, CLinkage_evidence::TType evidence_type)
void DeleteProteinAndRenormalizeNucProtSet(const CSeq_feat_Handle &feat_h)
Secondary function needed after trimming Seq-feat.
void ConvertRawToDeltaByNs(CSeq_inst &inst, size_t min_unknown, int max_unknown, size_t min_known, int max_known, bool is_assembly_gap=false, int gap_type=CSeq_gap::eType_unknown, int linkage=-1, int linkage_evidence=-1)
ConvertRawToDeltaByNs A function to convert a raw sequence to a delta sequence, using runs of Ns to d...
string GetTargetedLocusNameConsensus(const string &tls1, const string &tls2)
void TrimSeqFeat(CRef< CSeq_feat > feat, const TCuts &sorted_cuts, bool &bFeatureDeleted, bool &bFeatureTrimmed, bool &partial_start, bool &partial_stop)
Trim Seq-feat annotation.
void GetSortedCuts(CBioseq_Handle bsh, const TCuts &cuts, TCuts &sorted_cuts, EInternalTrimType internal_cut_conversion=eTrimToClosestEnd)
1) Merge abutting and overlapping cuts.
bool IsUnverifiedContaminant(const CBioseq &seq)
bool IsUnverifiedFeature(const CBioseq &seq)
@ eTrimToClosestEnd
string GenerateTargetedLocusName(CBioseq_Handle seq)
void BioseqSetDescriptorPropagateUp(CBioseq_set_Handle set)
Moves descriptors up from children of the given bioseq-set if each child has an identical copy of the...
vector< TRange > TCuts
#define FOR_EACH_SEQDESC_ON_SEQENTRY(Itr, Var)
FOR_EACH_SEQDESC_ON_SEQENTRY EDIT_EACH_SEQDESC_ON_SEQENTRY.
SAnnotSelector –.
Utility stuff for more convenient using of Boost.Test library.
void s_IntervalsMatchGaps(const CSeq_loc &loc, const CSeq_inst &inst)
void s_CheckSeg(const CDelta_seq &ds, bool expect_gap, size_t expect_length)
void CheckTargetedLocusEntry(CRef< CSeq_entry > entry, const string &expected)
BOOST_AUTO_TEST_CASE(FixCollidingIds)
CRef< CSeq_entry > MakeEntryForDeltaConversion(vector< string > segs)
std::map< string, TMapTestNameToTestFiles > TMapFunctionToVecOfTests
void TestCollidingAccessionFixes(const CSeq_id &collide, const string &last)
std::map< string, CFile > TMapTestFiles
NCBITEST_AUTO_INIT()
void CheckLocalId(const CBioseq &seq, const string &expected)
static bool s_FindGi(const CBioseq_Handle &bsh, const TGi &gi)
std::map< string, TMapTestFiles > TMapTestNameToTestFiles
static bool s_FindLocalId(const CBioseq_Handle &bsh, const string &sLocalid)
CRef< CScope > s_pScope
CRef< objects::CSeq_feat > MakeGeneForFeature(CRef< objects::CSeq_feat > feat)
CRef< objects::CAuthor > BuildGoodAuthor()
void SetDrosophila_melanogaster(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_feat > GetCDSFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
void SetSebaea_microphylla(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_annot > AddFeat(CRef< objects::CSeq_feat > feat, CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > BuildGoodSeq(void)
CRef< objects::CSeq_feat > AddMiscFeature(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > BuildGoodEcoSet()
CRef< objects::CSeq_feat > GetProtFeatFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > BuildGoodNucProtSet(void)
CRef< objects::CSeq_entry > GetNucleotideSequenceFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
Modified on Tue Apr 23 07:39:17 2024 by modify_doxy.py rev. 669887