NCBI C++ ToolKit
unit_test_sequpdate.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_sequpdate.cpp 47479 2023-05-02 13:24:02Z ucko $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Andrea Asztalos
27 * File Description:
28 * Unit tests for sequence update
29 */
30 
31 #include <ncbi_pch.hpp>
32 #include <serial/objistr.hpp>
33 #include <serial/typeinfo.hpp>
34 #include <serial/objostr.hpp>
35 #include <serial/serial.hpp>
36 
37 #include <objects/seq/Bioseq.hpp>
38 #include <corelib/ncbi_system.hpp>
39 #include <corelib/ncbiapp.hpp>
43 
44 #include <objmgr/scope.hpp>
46 #include <objmgr/bioseq_ci.hpp>
47 #include <objmgr/feat_ci.hpp>
48 #include <objmgr/seqdesc_ci.hpp>
49 #include <objmgr/seq_vector.hpp>
54 // This header must be included before all Boost.Test headers if there are any
55 #include <corelib/test_boost.hpp>
59 
62 
63 extern const char* sc_MainSequence;
64 extern const char* sc_UpdSequence_Same;
65 extern const char* sc_UpdSequence_Replace;
66 extern const char* sc_UpdSequence_Patch;
67 extern const char* sc_UpdSequence_Extend5;
68 extern const char* sc_UpdSequence_Extend5_AlignWithGap;
69 extern const char* sc_UpdSequence_Extend3;
70 
72 {
73 }
74 
76 {
77 }
78 
79 
80 CRef<CSeq_entry> ReadEntryFromFile(const string& fname)
81 {
82  CRef<CSeq_entry> entry(new CSeq_entry);
83 
84  try {
85  CNcbiIfstream istr(fname.c_str());
86  unique_ptr<CObjectIStream> os(CObjectIStream::Open(eSerial_AsnText, istr));
87  *os >> *entry;
88  }
89  catch (const CException& e) {
90  LOG_POST(Error << e.ReportAll());
91  return CRef<CSeq_entry>();
92  }
93  return entry;
94 }
95 
97 {
102 
103  CNcbiIfstream fstream(fname.c_str(), ios_base::in);
104  CFastaReader fasta_reader(fstream, flags);
105 
106  CRef<CSeq_entry> entry(new CSeq_entry);
107  try {
108  CMessageListenerLenient msg_listener;
109  entry = fasta_reader.ReadSet(kMax_Int, &msg_listener);
110  }
111  catch (const CException& e) {
112  LOG_POST(Error << e.ReportAll());
113  return CRef<CSeq_entry>();
114  }
115  return entry;
116 }
117 
118 CRef<CSeq_entry> ReadEntry(const char* seq)
119 {
120  CRef<CSeq_entry> entry(new CSeq_entry);
121  CNcbiIstrstream istr(seq);
122  istr >> MSerial_AsnText >> *entry;
123  entry->Parentize();
124 
125  return entry;
126 }
127 
129 {
131  scope->AddDefaults();
132 
133  return scope;
134 }
135 
136 void ReadFromFile(const string& old_seq, const string& upd_seq, CSeq_entry_Handle& old_seh, CSeq_entry_Handle& upd_seh)
137 {
138  CRef<CSeq_entry> old_entry = ReadEntryFromFile(old_seq);
139  CRef<CSeq_entry> upd_entry = ReadEntryFromFile(upd_seq);
140  CRef<CScope> scope = BuildScope();
141 
142  old_seh = scope->AddTopLevelSeqEntry(*old_entry);
143  upd_seh = scope->AddTopLevelSeqEntry(*upd_entry);
144  BOOST_REQUIRE(old_seh);
145  BOOST_REQUIRE(upd_seh);
146 }
147 
148 void SetupForUpdate(const CSeq_entry_Handle& old_seh, const CSeq_entry_Handle& upd_seh, CUpdateSeq_Input& sequpd_in, bool identical_res = false)
149 {
150  CBioseq_Handle old_bsh, upd_bsh;
151  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
152  if (b_iter) old_bsh = *b_iter;
153  CBioseq_CI b_it(upd_seh, CSeq_inst::eMol_na);
154  if (b_it) upd_bsh = *b_it;
155 
156  BOOST_REQUIRE(old_bsh);
157  BOOST_REQUIRE(upd_bsh);
158 
159  if (sequpd_in.SetOldBioseqAndScope(old_bsh)
160  && sequpd_in.SetUpdateBioseq(upd_bsh)) {
161  try {
162  sequpd_in.CalculateAlignmentForUnitTest();
163  }
164  catch (const CException& e) {
165  LOG_POST(Error << "Blasting the two sequences has failed: " << e.GetMsg());
166  }
167  }
168 
169  BOOST_REQUIRE(sequpd_in.IsReadyForUpdate());
170  BOOST_CHECK_EQUAL(sequpd_in.HaveIdenticalResidues(), identical_res);
171 }
172 
173 void ReadBioSeqs(const char* old_seq, const char* upd_seq, CSeq_entry_Handle& old_seh, CSeq_entry_Handle& upd_seh)
174 {
175  CRef<CSeq_entry> old_entry = ReadEntry(old_seq);
176  CRef<CSeq_entry> upd_entry = ReadEntry(upd_seq);
177  CRef<CScope> scope = BuildScope();
178 
179  old_seh = scope->AddTopLevelSeqEntry(*old_entry);
180  upd_seh = scope->AddTopLevelSeqEntry(*upd_entry);
181  BOOST_REQUIRE(old_seh);
182  BOOST_REQUIRE(upd_seh);
183 }
184 
185 template<class T>
186 string MakeAsn(const T& object)
187 {
189  str << MSerial_AsnText << object;
191 }
192 
193 
194 
195 BOOST_AUTO_TEST_CASE(Test_SameSequence)
196 {
197  CSeq_entry_Handle old_seh, upd_seh;
198  ReadBioSeqs(sc_MainSequence, sc_UpdSequence_Same, old_seh, upd_seh);
199 
200  CUpdateSeq_Input sequpd_input;
201  SetupForUpdate(old_seh, upd_seh, sequpd_input, true);
202 }
203 
204 BOOST_AUTO_TEST_CASE(Test_NoChange)
205 {
206  CSeq_entry_Handle old_seh, upd_seh;
208 
209  CUpdateSeq_Input sequpd_input;
210  SetupForUpdate(old_seh, upd_seh, sequpd_input);
212  CSequenceUpdater updater(sequpd_input, params);
213  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
214  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
215  BOOST_REQUIRE(cmd);
216  cmd->Execute();
217 
218  CBioseq_Handle new_bsh;
219  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
220  if (b_iter) new_bsh = *b_iter;
221 
222  BOOST_REQUIRE(new_bsh);
223  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 985);
224  //NcbiCout << "Passed Test_NoChange" << NcbiEndl;
225 }
226 
227 BOOST_AUTO_TEST_CASE(Test_NoChange_DeleteAlignedFeatures)
228 {
229  // remove existing features, do not import new ones
230  CSeq_entry_Handle old_seh, upd_seh;
232 
233  CUpdateSeq_Input sequpd_input;
234  SetupForUpdate(old_seh, upd_seh, sequpd_input);
235 
236  // delete features within the aligned region
239  CSequenceUpdater updater(sequpd_input, params);
240  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
241  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
242  BOOST_REQUIRE(cmd);
243  cmd->Execute();
244 
245  short count = 0;
247  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
248  bool type_ok = (type == CSeqFeatData::eSubtype_cdregion)
250  BOOST_CHECK_EQUAL(type_ok, true);
251  count++;
252  }
253  BOOST_CHECK_EQUAL(count, 2);
254  //NcbiCout << "Passed Test_NoChange_DeleteAlignedFeatures" << NcbiEndl;
255 }
256 
257 BOOST_AUTO_TEST_CASE(Test_NoChange_DeleteNotAlignedFeatures)
258 {
259  // remove existing features, do not import new ones
260  CSeq_entry_Handle old_seh, upd_seh;
262 
263  CUpdateSeq_Input sequpd_input;
264  SetupForUpdate(old_seh, upd_seh, sequpd_input);
265 
266  // delete features outside the aligned region
269  CSequenceUpdater updater(sequpd_input, params);
270  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
271  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
272 
273  BOOST_REQUIRE(cmd);
274  cmd->Execute();
275 
276  short count = 0;
278  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
279  bool type_ok = (type == CSeqFeatData::eSubtype_gene)
281  BOOST_CHECK_EQUAL(type_ok, true);
282  count++;
283  }
284  BOOST_CHECK_EQUAL(count, 2);
285  //NcbiCout << "Passed Test_NoChange_DeleteNotAlignedFeatures" << NcbiEndl;
286 }
287 
288 BOOST_AUTO_TEST_CASE(Test_NoChange_DeleteAllFeatures)
289 {
290  CSeq_entry_Handle old_seh, upd_seh;
292 
293  CUpdateSeq_Input sequpd_input;
294  SetupForUpdate(old_seh, upd_seh, sequpd_input);
295 
296  // delete all existing features from the old sequence
299  CSequenceUpdater updater(sequpd_input, params);
300  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
301  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
302 
303  BOOST_REQUIRE(cmd);
304  cmd->Execute();
305 
306  short count = 0;
308  count++;
309  }
310  BOOST_CHECK_EQUAL(count, 0);
311  //NcbiCout << "Passed Test_NoChange_DeleteAllFeatures" << NcbiEndl;
312 }
313 
314 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportNewFeatures1) // include duplicates
315 {
316  CSeq_entry_Handle old_seh, upd_seh;
318 
319  CUpdateSeq_Input sequpd_input;
320  SetupForUpdate(old_seh, upd_seh, sequpd_input);
321 
322  // import new (any kind of) features and do not remove any existing features
325  params.m_ImportFeatures = true;
327  CSequenceUpdater updater(sequpd_input, params);
328  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
329  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
330 
331  BOOST_REQUIRE(cmd);
332  cmd->Execute();
333 
334  short count = 0;
336  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
337  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
341  BOOST_CHECK_EQUAL(type_ok, true);
342 
343  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
344  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
345  bool feat_ok = false;
346  switch (type) {
348  feat_ok = (start == 0 && stop == 983) || (start == 568 && stop == 718);
349  break;
351  feat_ok = (start == 199 && stop == 599);
352  break;
354  feat_ok = (start == 249 && stop == 562) || (start == 587 && stop == 686);
355  break;
357  feat_ok = (start == 0 && stop == 69) || (start == 0 && stop == 25);
358  break;
359  default:
360  break;
361  }
362  BOOST_CHECK_EQUAL(feat_ok, true);
363  count++;
364  }
365  BOOST_CHECK_EQUAL(count, 7);
366  //NcbiCout << "Passed Test_NoChange_ImportNewFeatures1" << NcbiEndl;
367 }
368 
369 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportNewFeatures2)
370 {
371  string old_fname("test_data/Pbseq.asn");
372  string upd_fname("test_data/NC_014053.asn");
373  CSeq_entry_Handle old_seh, upd_seh;
374  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
375 
376  CUpdateSeq_Input sequpd_input;
377  SetupForUpdate(old_seh, upd_seh, sequpd_input);
378 
379  // import tRNAs and do not remove any existing features
382  params.m_ImportFeatures = true;
384  CSequenceUpdater updater(sequpd_input, params);
385  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
386  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
387 
388  BOOST_REQUIRE(cmd);
389  cmd->Execute();
390 
391  CBioseq_Handle new_bsh;
392  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
393  if (b_iter) new_bsh = *b_iter;
394 
395  BOOST_REQUIRE(new_bsh);
396  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 14547);
397 
398  const CSeq_id* lcl_id = new_bsh.GetCompleteBioseq()->GetLocalId();
399  BOOST_CHECK(lcl_id);
400 
401  short count = 0;
403  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
404  BOOST_CHECK_EQUAL(type, CSeqFeatData::eSubtype_tRNA);
405 
406  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
407  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
408 
409  const CSeq_id* feat_id = feat->GetLocation().GetId();
410  BOOST_REQUIRE(feat_id);
411  BOOST_CHECK(feat_id->Match(*lcl_id));
412 
413  const CRNA_ref::C_Ext& ext = feat->GetData().GetRna().GetExt();
414  const CTrna_ext& trna_ext = ext.GetTRNA();
415  // anticodon is set in all these tRNAs
416  const TSeqPos start_ac = trna_ext.GetAnticodon().GetStart(eExtreme_Biological);
417  const TSeqPos stop_ac = trna_ext.GetAnticodon().GetStop(eExtreme_Biological);
418 
419  const CSeq_id* anticodon_id = trna_ext.GetAnticodon().GetId();
420  BOOST_REQUIRE(anticodon_id);
421  BOOST_CHECK(anticodon_id->Match(*lcl_id));
422 
423  bool feat_ok(false);
424  switch (count) {
425  case (0):
426  feat_ok = (start == 0 && stop == 68) && (start_ac == 32 && stop_ac == 34);
427  break;
428  case (1):
429  feat_ok = (start == 69 && stop == 133) && (start_ac == 99 && stop_ac == 101);
430  break;
431  case (2):
432  feat_ok = (start == 199 && stop == 131) && (start_ac == 169 && stop_ac == 167);
433  break;
434  case (3):
435  feat_ok = (start == 1448 && stop == 1385) && (start_ac == 1418 && stop_ac == 1416);
436  break;
437  case (7):
438  feat_ok = (start == 5911 && stop == 5977) && (start_ac == 5942 && stop_ac == 5944);
439  break;
440  case (11):
441  feat_ok = (start == 8112 && stop == 8049) && (start_ac == 8082 && stop_ac == 8080);
442  break;
443  case (15):
444  feat_ok = (start == 14087 && stop == 14024) && (start_ac == 14058 && stop_ac == 14056);
445  default:
446  feat_ok = true;
447  break;
448  }
449  BOOST_CHECK_EQUAL(feat_ok, true);
450  count++;
451  }
452  BOOST_CHECK_EQUAL(count, 16);
453  //NcbiCout << "Passed Test_NoChange_ImportNewFeatures2" << NcbiEndl;
454 }
455 
456 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportNewFeatures_IncludeDupl)
457 {
458  string old_fname("test_data/Pbseq_withtRNA.asn");
459  string upd_fname("test_data/NC_014053.asn");
460  CSeq_entry_Handle old_seh, upd_seh;
461  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
462 
463  CUpdateSeq_Input sequpd_input;
464  SetupForUpdate(old_seh, upd_seh, sequpd_input);
465 
466  // import tRNAs, including duplicates and do not remove any existing features
469  params.m_ImportFeatures = true;
472 
473  CSequenceUpdater updater(sequpd_input, params);
474  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
475  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
476 
477  BOOST_REQUIRE(cmd);
478  cmd->Execute();
479 
480  CBioseq_Handle new_bsh;
481  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
482  if (b_iter) new_bsh = *b_iter;
483 
484  BOOST_REQUIRE(new_bsh);
485  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 14547);
486 
487  const CSeq_id* lcl_id = new_bsh.GetCompleteBioseq()->GetLocalId();
488  BOOST_CHECK(lcl_id);
489 
490  short count = 0;
492  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
493  BOOST_CHECK_EQUAL(type, CSeqFeatData::eSubtype_tRNA);
494 
495  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
496  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
497 
498  const CSeq_id* feat_id = feat->GetLocation().GetId();
499  BOOST_REQUIRE(feat_id);
500  BOOST_CHECK(feat_id->Match(*lcl_id));
501 
502  const CRNA_ref::C_Ext& ext = feat->GetData().GetRna().GetExt();
503  const CTrna_ext& trna_ext = ext.GetTRNA();
504  // anticodon is set in all these tRNAs
505  const TSeqPos start_ac = trna_ext.GetAnticodon().GetStart(eExtreme_Biological);
506  const TSeqPos stop_ac = trna_ext.GetAnticodon().GetStop(eExtreme_Biological);
507 
508  const CSeq_id* anticodon_id = trna_ext.GetAnticodon().GetId();
509  BOOST_REQUIRE(anticodon_id);
510  BOOST_CHECK(anticodon_id->Match(*lcl_id));
511 
512  bool feat_ok(false);
513  switch (count) {
514  case (0) :
515  feat_ok = (start == 0 && stop == 68) && (start_ac == 32 && stop_ac == 34);
516  BOOST_CHECK(feat->IsSetComment());
517  break;
518  case (1) :
519  feat_ok = (start == 69 && stop == 133) && (start_ac == 99 && stop_ac == 101);
520  BOOST_CHECK(feat->IsSetComment());
521  break;
522  case (2) :
523  feat_ok = (start == 6313 && stop == 6250) && (start_ac == 6283 && stop_ac == 6281);
524  BOOST_CHECK(feat->IsSetComment());
525  break;
526  case (3) :
527  feat_ok = (start == 14087 && stop == 14024) && (start_ac == 14058 && stop_ac == 14056);
528  BOOST_CHECK(feat->IsSetComment());
529  break;
530  case (4) :
531  feat_ok = (start == 0 && stop == 68) && (start_ac == 32 && stop_ac == 34);
532  BOOST_CHECK(!feat->IsSetComment());
533  break;
534  case (5) :
535  feat_ok = (start == 69 && stop == 133) && (start_ac == 99 && stop_ac == 101);
536  BOOST_CHECK(!feat->IsSetComment());
537  break;
538  case (12) :
539  feat_ok = (start == 5977 && stop == 6042) && (start_ac == 6006 && stop_ac == 6008);
540  BOOST_CHECK(!feat->IsSetComment());
541  break;
542  case (19) :
543  feat_ok = (start == 14087 && stop == 14024) && (start_ac == 14058 && stop_ac == 14056);
544  BOOST_CHECK(!feat->IsSetComment());
545  break;
546  default:
547  feat_ok = true;
548  break;
549  }
550  BOOST_CHECK_EQUAL(feat_ok, true);
551  count++;
552  }
553  BOOST_CHECK_EQUAL(count, 20);
554  //NcbiCout << "Passed Test_NoChange_ImportNewFeatures_IncludeDupl" << NcbiEndl;
555 
556 }
557 
558 
559 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportNewFeatures_ExceptDupl)
560 {
561  string old_fname("test_data/Pbseq_withtRNA.asn");
562  string upd_fname("test_data/NC_014053.asn");
563  CSeq_entry_Handle old_seh, upd_seh;
564  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
565 
566  CUpdateSeq_Input sequpd_input;
567  SetupForUpdate(old_seh, upd_seh, sequpd_input);
568 
569  // import tRNAs, except duplicates and do not remove any existing features
572  params.m_ImportFeatures = true;
575 
576  CSequenceUpdater updater(sequpd_input, params);
577  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
578  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
579 
580  BOOST_REQUIRE(cmd);
581  cmd->Execute();
582 
583  CBioseq_Handle new_bsh;
584  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
585  if (b_iter) new_bsh = *b_iter;
586 
587  BOOST_REQUIRE(new_bsh);
588  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 14547);
589 
590  const CSeq_id* lcl_id = new_bsh.GetCompleteBioseq()->GetLocalId();
591  BOOST_CHECK(lcl_id);
592 
593  short count = 0;
595  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
596  BOOST_CHECK_EQUAL(type, CSeqFeatData::eSubtype_tRNA);
597 
598  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
599  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
600 
601  const CSeq_id* feat_id = feat->GetLocation().GetId();
602  BOOST_REQUIRE(feat_id);
603  BOOST_CHECK(feat_id->Match(*lcl_id));
604 
605  const CRNA_ref::C_Ext& ext = feat->GetData().GetRna().GetExt();
606  const CTrna_ext& trna_ext = ext.GetTRNA();
607  // anticodon is set in all these tRNAs
608  const TSeqPos start_ac = trna_ext.GetAnticodon().GetStart(eExtreme_Biological);
609  const TSeqPos stop_ac = trna_ext.GetAnticodon().GetStop(eExtreme_Biological);
610 
611  const CSeq_id* anticodon_id = trna_ext.GetAnticodon().GetId();
612  BOOST_REQUIRE(anticodon_id);
613  BOOST_CHECK(anticodon_id->Match(*lcl_id));
614 
615  bool feat_ok(false);
616  switch (count) {
617  case (0) :
618  feat_ok = (start == 0 && stop == 68) && (start_ac == 32 && stop_ac == 34);
619  BOOST_CHECK(feat->IsSetComment());
620  break;
621  case (1) :
622  feat_ok = (start == 69 && stop == 133) && (start_ac == 99 && stop_ac == 101);
623  BOOST_CHECK(feat->IsSetComment());
624  break;
625  case (2) :
626  feat_ok = (start == 6313 && stop == 6250) && (start_ac == 6283 && stop_ac == 6281);
627  BOOST_CHECK(feat->IsSetComment());
628  break;
629  case (3) :
630  feat_ok = (start == 14087 && stop == 14024) && (start_ac == 14058 && stop_ac == 14056);
631  BOOST_CHECK(feat->IsSetComment());
632  break;
633  case (4) :
634  feat_ok = (start == 199 && stop == 131) && (start_ac == 169 && stop_ac == 167);
635  BOOST_CHECK(!feat->IsSetComment());
636  break;
637  case (12) :
638  feat_ok = (start == 8112 && stop == 8049) && (start_ac == 8082 && stop_ac == 8080);
639  BOOST_CHECK(!feat->IsSetComment());
640  break;
641  case (15) :
642  feat_ok = (start == 12670 && stop == 12602) && (start_ac == 12640 && stop_ac == 12638);
643  BOOST_CHECK(!feat->IsSetComment());
644  break;
645  default:
646  feat_ok = true;
647  break;
648  }
649  BOOST_CHECK_EQUAL(feat_ok, true);
650  count++;
651  }
652  BOOST_CHECK_EQUAL(count, 16);
653  //NcbiCout << "Passed Test_NoChange_ImportNewFeatures_ExceptDupl" << NcbiEndl;
654 }
655 
656 
657 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportNewFeatures_ReplaceDupl)
658 {
659  string old_fname("test_data/Pbseq_withtRNA.asn");
660  string upd_fname("test_data/NC_014053.asn");
661  CSeq_entry_Handle old_seh, upd_seh;
662  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
663 
664  CUpdateSeq_Input sequpd_input;
665  SetupForUpdate(old_seh, upd_seh, sequpd_input);
666 
667  // import tRNAs, replace duplicates and do not remove any existing features
670  params.m_ImportFeatures = true;
673 
674  CSequenceUpdater updater(sequpd_input, params);
675  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
676  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
677 
678  BOOST_REQUIRE(cmd);
679  cmd->Execute();
680 
681  CBioseq_Handle new_bsh;
682  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
683  if (b_iter) new_bsh = *b_iter;
684 
685  BOOST_REQUIRE(new_bsh);
686  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 14547);
687 
688  const CSeq_id* lcl_id = new_bsh.GetCompleteBioseq()->GetLocalId();
689  BOOST_CHECK(lcl_id);
690 
691  short count = 0;
693  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
694  BOOST_CHECK_EQUAL(type, CSeqFeatData::eSubtype_tRNA);
695 
696  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
697  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
698 
699  const CSeq_id* feat_id = feat->GetLocation().GetId();
700  BOOST_REQUIRE(feat_id);
701  BOOST_CHECK(feat_id->Match(*lcl_id));
702 
703  const CRNA_ref::C_Ext& ext = feat->GetData().GetRna().GetExt();
704  const CTrna_ext& trna_ext = ext.GetTRNA();
705  // anticodon is set in all these tRNAs
706  const TSeqPos start_ac = trna_ext.GetAnticodon().GetStart(eExtreme_Biological);
707  const TSeqPos stop_ac = trna_ext.GetAnticodon().GetStop(eExtreme_Biological);
708 
709  const CSeq_id* anticodon_id = trna_ext.GetAnticodon().GetId();
710  BOOST_REQUIRE(anticodon_id);
711  BOOST_CHECK(anticodon_id->Match(*lcl_id));
712 
713  bool feat_ok(false);
714  switch (count) {
715  case (0) :
716  feat_ok = (start == 0 && stop == 68) && (start_ac == 32 && stop_ac == 34);
717  BOOST_CHECK(!feat->IsSetComment());
718  break;
719  case (1) :
720  feat_ok = (start == 69 && stop == 133) && (start_ac == 99 && stop_ac == 101);
721  BOOST_CHECK(!feat->IsSetComment());
722  break;
723  case (2) :
724  feat_ok = (start == 199 && stop == 131) && (start_ac == 169 && stop_ac == 167);
725  BOOST_CHECK(!feat->IsSetComment());
726  break;
727  case (10) :
728  feat_ok = (start == 6313 && stop == 6250) && (start_ac == 6283 && stop_ac == 6281);
729  BOOST_CHECK(!feat->IsSetComment());
730  break;
731  case (15) :
732  feat_ok = (start == 14087 && stop == 14024) && (start_ac == 14058 && stop_ac == 14056);
733  BOOST_CHECK(!feat->IsSetComment());
734  break;
735  default:
736  feat_ok = true;
737  break;
738  }
739  BOOST_CHECK_EQUAL(feat_ok, true);
740  count++;
741  }
742  BOOST_CHECK_EQUAL(count, 16);
743  //NcbiCout << "Passed Test_NoChange_ImportNewFeatures_ReplaceDupl" << NcbiEndl;
744 }
745 
746 
747 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportNewFeatures_MergeDupl1)
748 {
749  string old_fname("test_data/Pbseq_withtRNA.asn");
750  string upd_fname("test_data/NC_014053.asn");
751  CSeq_entry_Handle old_seh, upd_seh;
752  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
753 
754  CUpdateSeq_Input sequpd_input;
755  SetupForUpdate(old_seh, upd_seh, sequpd_input);
756 
757  // import tRNAs, merge duplicates and do not remove any existing features
760  params.m_ImportFeatures = true;
763 
764  CSequenceUpdater updater(sequpd_input, params);
765  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
766  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
767 
768  BOOST_REQUIRE(cmd);
769  cmd->Execute();
770 
771  CBioseq_Handle new_bsh;
772  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
773  if (b_iter) new_bsh = *b_iter;
774 
775  BOOST_REQUIRE(new_bsh);
776  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 14547);
777 
778  const CSeq_id* lcl_id = new_bsh.GetCompleteBioseq()->GetLocalId();
779  BOOST_CHECK(lcl_id);
780 
781  short count = 0;
783  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
784  BOOST_CHECK_EQUAL(type, CSeqFeatData::eSubtype_tRNA);
785 
786  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
787  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
788 
789  const CSeq_id* feat_id = feat->GetLocation().GetId();
790  BOOST_REQUIRE(feat_id);
791  BOOST_CHECK(feat_id->Match(*lcl_id));
792 
793  const CRNA_ref::C_Ext& ext = feat->GetData().GetRna().GetExt();
794  const CTrna_ext& trna_ext = ext.GetTRNA();
795  // anticodon is set in all these tRNAs
796  const TSeqPos start_ac = trna_ext.GetAnticodon().GetStart(eExtreme_Biological);
797  const TSeqPos stop_ac = trna_ext.GetAnticodon().GetStop(eExtreme_Biological);
798 
799  const CSeq_id* anticodon_id = trna_ext.GetAnticodon().GetId();
800  BOOST_REQUIRE(anticodon_id);
801  BOOST_CHECK(anticodon_id->Match(*lcl_id));
802 
803  bool feat_ok(false);
804  switch (count) {
805  case (0) :
806  feat_ok = (start == 0 && stop == 68) && (start_ac == 32 && stop_ac == 34);
807  BOOST_CHECK(feat->IsSetComment());
808  break;
809  case (1) :
810  feat_ok = (start == 69 && stop == 133) && (start_ac == 99 && stop_ac == 101);
811  BOOST_CHECK(feat->IsSetComment());
812  break;
813  case (2) :
814  feat_ok = (start == 199 && stop == 131) && (start_ac == 169 && stop_ac == 167);
815  BOOST_CHECK(!feat->IsSetComment());
816  break;
817  case (10) :
818  feat_ok = (start == 6313 && stop == 6250) && (start_ac == 6283 && stop_ac == 6281);
819  BOOST_CHECK(feat->IsSetComment());
820  break;
821  case (15) :
822  feat_ok = (start == 14087 && stop == 14024) && (start_ac == 14058 && stop_ac == 14056);
823  BOOST_CHECK(feat->IsSetComment());
824  BOOST_CHECK(feat->IsSetPartial());
825  break;
826  default:
827  feat_ok = true;
828  break;
829  }
830  BOOST_CHECK_EQUAL(feat_ok, true);
831  count++;
832  }
833  BOOST_CHECK_EQUAL(count, 16);
834  //NcbiCout << "Passed Test_NoChange_ImportNewFeatures_MergeDupl1" << NcbiEndl;
835 }
836 
837 
838 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportNewFeatures_MergeDupl2)
839 {
840  string old_fname("test_data/Pbseq_withGenes.asn");
841  string upd_fname("test_data/NC_014053_genes.asn");
842  CSeq_entry_Handle old_seh, upd_seh;
843  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
844 
845  CUpdateSeq_Input sequpd_input;
846  SetupForUpdate(old_seh, upd_seh, sequpd_input);
847 
848  // import genes, merge duplicates and do not remove any existing features
851  params.m_ImportFeatures = true;
854 
855  CSequenceUpdater updater(sequpd_input, params);
856  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
857  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
858 
859  BOOST_REQUIRE(cmd);
860  cmd->Execute();
861 
862  CRef<CSeq_entry> output = ReadEntryFromFile("test_data/Pbseq_withGenes_updated.asn");
863 
864  CConstRef<CSeq_entry> updated_entry = old_seh.GetCompleteSeq_entry();
865  BOOST_CHECK_EQUAL(MakeAsn(*output), MakeAsn(*updated_entry));
866  //NcbiCout << "Passed Test_NoChange_ImportNewFeatures_MergeDupl2" << NcbiEndl;
867 }
868 
869 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportCDSFeatures_NewId1)
870 {
871  string old_fname("test_data/Pbseq_withGenes.asn");
872  string upd_fname("test_data/NC_014053.asn");
873  CSeq_entry_Handle old_seh, upd_seh;
874  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
875 
876  CUpdateSeq_Input sequpd_input;
877  SetupForUpdate(old_seh, upd_seh, sequpd_input);
878 
879  // import all coding regions and assign new IDs to the products
882  params.m_ImportFeatures = true;
885 
886  CSequenceUpdater updater(sequpd_input, params);
887  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
888  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
889 
890  BOOST_REQUIRE(cmd);
891  cmd->Execute();
892 
893  CRef<CSeq_entry> output = ReadEntryFromFile("test_data/Pbseq_importCDS_newId.asn");
894 
895  CConstRef<CSeq_entry> updated_entry = old_seh.GetCompleteSeq_entry();
896  BOOST_CHECK_EQUAL(MakeAsn(*output), MakeAsn(*updated_entry));
897  //NcbiCout << "Passed Test_NoChange_ImportCDSFeatures_NewId1" << NcbiEndl;
898 }
899 
900 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportCDSFeatures_KeepId1)
901 {
902  string old_fname("test_data/Pbseq_withGenes.asn");
903  string upd_fname("test_data/NC_014053.asn");
904  CSeq_entry_Handle old_seh, upd_seh;
905  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
906 
907  CUpdateSeq_Input sequpd_input;
908  SetupForUpdate(old_seh, upd_seh, sequpd_input);
909 
910  // import coding regions and keep the original protein IDs
913  params.m_ImportFeatures = true;
916  params.m_KeepProteinId = true;
917 
918  CSequenceUpdater updater(sequpd_input, params);
919  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
920  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
921 
922  BOOST_REQUIRE(cmd);
923  cmd->Execute();
924 
925  CRef<CSeq_entry> output = ReadEntryFromFile("test_data/Pbseq_importCDS_keepId.asn");
926 
927  CConstRef<CSeq_entry> updated_entry = old_seh.GetCompleteSeq_entry();
928  BOOST_CHECK_EQUAL(MakeAsn(*output), MakeAsn(*updated_entry));
929  //NcbiCout << "Passed Test_NoChange_ImportCDSFeatures_KeepId1" << NcbiEndl;
930 }
931 
932 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportCDSFeatures_NewId2)
933 {
934  string old_fname("test_data/duplicateCDS.asn");
935  string upd_fname("test_data/NC_024274_cds.asn");
936  CSeq_entry_Handle old_seh, upd_seh;
937  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
938 
939  CUpdateSeq_Input sequpd_input;
940  SetupForUpdate(old_seh, upd_seh, sequpd_input);
941 
942  // import coding regions and do not keep original protein IDs
945  params.m_ImportFeatures = true;
948 
949  CSequenceUpdater updater(sequpd_input, params);
950  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
951  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
952 
953  BOOST_REQUIRE(cmd);
954  cmd->Execute();
955 
956  CRef<CSeq_entry> output = ReadEntryFromFile("test_data/duplicateCDS_newId_updated.asn");
957 
958  CConstRef<CSeq_entry> updated_entry = old_seh.GetCompleteSeq_entry();
959  BOOST_CHECK_EQUAL(MakeAsn(*output), MakeAsn(*updated_entry));
960  //NcbiCout << "Passed Test_NoChange_ImportCDSFeatures_NewId2" << NcbiEndl;
961 }
962 
963 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportCDSFeatures_KeepId2)
964 {
965  string old_fname("test_data/duplicateCDS.asn");
966  string upd_fname("test_data/NC_024274_cds.asn");
967  CSeq_entry_Handle old_seh, upd_seh;
968  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
969 
970  CUpdateSeq_Input sequpd_input;
971  SetupForUpdate(old_seh, upd_seh, sequpd_input);
972 
973  // import coding regions and keep protein IDs
976  params.m_ImportFeatures = true;
979  params.m_KeepProteinId = true;
980 
981  CSequenceUpdater updater(sequpd_input, params);
982  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
983  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
984 
985  BOOST_REQUIRE(cmd);
986  cmd->Execute();
987 
988  CRef<CSeq_entry> output = ReadEntryFromFile("test_data/duplicateCDS_keepId_updated.asn");
989 
990  CConstRef<CSeq_entry> updated_entry = old_seh.GetCompleteSeq_entry();
991  BOOST_CHECK_EQUAL(MakeAsn(*output), MakeAsn(*updated_entry));
992  //NcbiCout << "Passed Test_NoChange_ImportCDSFeatures_KeepId2" << NcbiEndl;
993 }
994 
995 
996 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportCDSFeatures_Merge)
997 {
998  string old_fname("test_data/duplicateCDS.asn");
999  string upd_fname("test_data/NC_024274_cds.asn");
1000  CSeq_entry_Handle old_seh, upd_seh;
1001  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1002 
1003  CUpdateSeq_Input sequpd_input;
1004  SetupForUpdate(old_seh, upd_seh, sequpd_input);
1005 
1006  // import coding regions, merge duplicates and do not remove any existing features
1009  params.m_ImportFeatures = true;
1012 
1013  CSequenceUpdater updater(sequpd_input, params);
1014  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1015  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1016 
1017  BOOST_REQUIRE(cmd);
1018  cmd->Execute();
1019 
1020  CRef<CSeq_entry> output = ReadEntryFromFile("test_data/duplicateCDS_merged.asn");
1021 
1022  CConstRef<CSeq_entry> updated_entry = old_seh.GetCompleteSeq_entry();
1023  BOOST_CHECK_EQUAL(MakeAsn(*output), MakeAsn(*updated_entry));
1024  //NcbiCout << "Passed Test_NoChange_ImportCDSFeatures_Merge" << NcbiEndl;
1025 }
1026 
1027 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportCDSFeatures_ExceptDuplicates)
1028 {
1029  string old_fname("test_data/duplicateCDS.asn");
1030  string upd_fname("test_data/NC_024274_cds.asn");
1031  CSeq_entry_Handle old_seh, upd_seh;
1032  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1033 
1034  CUpdateSeq_Input sequpd_input;
1035  SetupForUpdate(old_seh, upd_seh, sequpd_input);
1036 
1037  // import coding regiosn except duplicates and do not remove any existing features
1040  params.m_ImportFeatures = true;
1043 
1044  CSequenceUpdater updater(sequpd_input, params);
1045  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1046  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1047 
1048  BOOST_REQUIRE(cmd);
1049  cmd->Execute();
1050 
1051  CRef<CSeq_entry> output = ReadEntryFromFile("test_data/duplicateCDS_exceptdupl.asn");
1052 
1053  CConstRef<CSeq_entry> updated_entry = old_seh.GetCompleteSeq_entry();
1054  BOOST_CHECK_EQUAL(MakeAsn(*output), MakeAsn(*updated_entry));
1055  //NcbiCout << "Passed Test_NoChange_ImportCDSFeatures_ExceptDuplicates" << NcbiEndl;
1056 }
1057 
1058 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportCDSFeatures_ReplaceDuplicates)
1059 {
1060  string old_fname("test_data/duplicateCDS.asn");
1061  string upd_fname("test_data/NC_024274_cds.asn");
1062  CSeq_entry_Handle old_seh, upd_seh;
1063  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1064 
1065  CUpdateSeq_Input sequpd_input;
1066  SetupForUpdate(old_seh, upd_seh, sequpd_input);
1067 
1068  // import coding regions, replace duplicates and do not remove any existing features
1071  params.m_ImportFeatures = true;
1074 
1075  CSequenceUpdater updater(sequpd_input, params);
1076  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1077  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1078 
1079  BOOST_REQUIRE(cmd);
1080  cmd->Execute();
1081 
1082  CRef<CSeq_entry> output = ReadEntryFromFile("test_data/duplicateCDS_replacedupl.asn");
1083 
1084  CConstRef<CSeq_entry> updated_entry = old_seh.GetCompleteSeq_entry();
1085  BOOST_CHECK_EQUAL(MakeAsn(*output), MakeAsn(*updated_entry));
1086  //NcbiCout << "Passed Test_NoChange_ImportCDSFeatures_ReplaceDuplicates" << NcbiEndl;
1087 }
1088 
1089 
1090 // When old protein ids are the same as imported protein ids
1091 /* currently fails, the old proteins are 'shadowing' the imported ones
1092 // Resolution: annotations should be placed in different scopes
1093 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportCDSFeatures_NewId2) // GB-5732
1094 {
1095  string old_fname("test_data/proteinacc.asn");
1096  string upd_fname("test_data/NC_024274_cds.asn");
1097  CSeq_entry_Handle old_seh, upd_seh;
1098  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1099 
1100  CUpdateSeq_Input sequpd_input;
1101  SetupForUpdate(old_seh, upd_seh, sequpd_input);
1102 
1103  // import all features, merge duplicates and do not remove any existing features
1104  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateNoChange);
1105  params.m_FeatRemoveOption = SUpdateSeqParams::eFeatRemoveNone;
1106  params.m_ImportFeatures = true;
1107  params.m_FeatImportType = CSeqFeatData::eSubtype_cdregion;
1108  params.m_FeatImportOption = SUpdateSeqParams::eFeatUpdateAll;
1109 
1110  CSequenceUpdater updater(sequpd_input, params);
1111  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1112  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1113 
1114  BOOST_REQUIRE(cmd);
1115  cmd->Execute();
1116 
1117  CRef<CSeq_entry> output = ReadEntryFromFile("test_data/proteinacc_CDS_newId_updated.asn");
1118 
1119  CConstRef<CSeq_entry> updated_entry = old_seh.GetCompleteSeq_entry();
1120  BOOST_CHECK_EQUAL(MakeAsn(*output), MakeAsn(*updated_entry));
1121 }
1122 */
1123 
1124 BOOST_AUTO_TEST_CASE(Test_NoChange_Delete_ImportAllFeatures)
1125 {
1126  CSeq_entry_Handle old_seh, upd_seh;
1127  ReadBioSeqs(sc_MainSequence, sc_UpdSequence_Patch, old_seh, upd_seh);
1128 
1129  CUpdateSeq_Input sequpd_input;
1130  SetupForUpdate(old_seh, upd_seh, sequpd_input);
1131 
1132  // remove all existing features, and import all features
1135  params.m_ImportFeatures = true;
1137  CSequenceUpdater updater(sequpd_input, params);
1138  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1139  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1140 
1141  BOOST_REQUIRE(cmd);
1142  cmd->Execute();
1143 
1144  short count = 0;
1146  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
1147  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
1150  BOOST_CHECK_EQUAL(type_ok, true);
1151 
1152  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
1153  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
1154  bool feat_ok = false;
1155  switch (type) {
1157  // if the first residue of the patch is different, start = 569 ( in Sequin it is 568)
1158  feat_ok = (start == 568 && stop == 718);
1159  break;
1161  feat_ok = (start == 587 && stop == 686);
1162  break;
1164  feat_ok = (start == 0 && stop == 25);
1165  break;
1166  default:
1167  break;
1168  }
1169  BOOST_CHECK_EQUAL(feat_ok, true);
1170  count++;
1171  }
1172  BOOST_CHECK_EQUAL(count, 3);
1173  //NcbiCout << "Passed Test_NoChange_Delete_ImportAllFeatures" << NcbiEndl;
1174 }
1175 
1176 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportFeats_ShortSeq)
1177 {
1178  string old_fname("test_data/shortSeq.asn");
1179  string upd_fname("test_data/update_shortSeq.asn");
1180  CSeq_entry_Handle old_seh, upd_seh;
1181  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1182 
1183  CUpdateSeq_Input sequpd_input;
1184  SetupForUpdate(old_seh, upd_seh, sequpd_input);
1186  params.m_ImportFeatures = true;
1188  CSequenceUpdater updater(sequpd_input, params);
1189  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1190  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1191 
1192  BOOST_REQUIRE(cmd);
1193  cmd->Execute();
1194 
1195  short count = 0;
1196  // none of features from the update sequence get imported
1198  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
1199  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
1203  BOOST_CHECK_EQUAL(type_ok, true);
1204 
1205  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
1206  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
1207  bool feat_ok = false;
1208  switch (type) {
1210  feat_ok = (start == 0 && stop == 127);
1211  break;
1213  feat_ok = (start == 0 && stop == 29);
1214  break;
1216  feat_ok = (start == 29 && stop == 127);
1217  break;
1219  feat_ok = (start == 0 && stop == 49);
1220  break;
1221  default:
1222  break;
1223  }
1224  BOOST_CHECK_EQUAL(feat_ok, true);
1225  count++;
1226  }
1227  BOOST_CHECK_EQUAL(count, 4);
1228  //NcbiCout << "Passed Test_NoChange_ImportFeats_ShortSeq" << NcbiEndl;
1229 }
1230 
1231 
1232 BOOST_AUTO_TEST_CASE(Test_ReplaceSequence)
1233 {
1234  CSeq_entry_Handle old_seh, upd_seh;
1235  ReadBioSeqs(sc_MainSequence, sc_UpdSequence_Patch, old_seh, upd_seh);
1236 
1237  CUpdateSeq_Input sequpd_input;
1238  SetupForUpdate(old_seh, upd_seh, sequpd_input);
1239  // replace the sequence, and do not import any new features
1241  CSequenceUpdater updater(sequpd_input, params);
1242  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1243  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1244 
1245  BOOST_REQUIRE(cmd);
1246  cmd->Execute();
1247 
1248  CBioseq_Handle new_bsh;
1249  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
1250  if (b_iter) new_bsh = *b_iter;
1251 
1252  BOOST_REQUIRE(new_bsh);
1253  const CBioseq_Handle& upd_bsh = sequpd_input.GetUpdateBioseq();
1254 
1255  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), upd_bsh.GetBioseqLength());
1256  BOOST_CHECK_EQUAL(new_bsh.GetInst_Mol() == CSeq_inst::eMol_rna, true);
1257 
1258  // new sequence should be equal to the update sequence
1259  string newseq;
1261  new_svec.GetSeqData(0, new_bsh.GetBioseqLength(), newseq);
1262  string updseq;
1264  upd_svec.GetSeqData(0, upd_bsh.GetBioseqLength(), updseq);
1265  BOOST_CHECK_EQUAL(updseq, newseq);
1266 
1267  // check whether the existing features on the old sequence have been properly adjusted
1268  short count = 0;
1270  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
1271  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
1275  BOOST_CHECK_EQUAL(type_ok, true);
1276 
1277  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
1278  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
1279  bool feat_ok = false;
1280 
1281  switch (type) {
1283  feat_ok = (start == 0 && stop == 131);
1284  break;
1286  feat_ok = (start == 0 && stop == 31);
1287  break;
1289  feat_ok = (start == 0 && stop == 0);
1290  break;
1292  feat_ok = (start == 0 && stop == 69);
1293  break;
1294  default:
1295  break;
1296  }
1297  BOOST_CHECK_EQUAL(feat_ok, true);
1298  count++;
1299  }
1300  BOOST_CHECK_EQUAL(count, 4);
1301  //NcbiCout << "Passed Test_ReplaceSequence" << NcbiEndl;
1302 }
1303 
1304 BOOST_AUTO_TEST_CASE(Test_ReplaceSequence_WithoutFeats)
1305 {
1306  string old_fname("test_data/OneSeqWithoutFeature.asn");
1307  string upd_fname("test_data/update_OneSeq.asn");
1308  CSeq_entry_Handle old_seh, upd_seh;
1309  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1310 
1311  CUpdateSeq_Input sequpd_input;
1312  SetupForUpdate(old_seh, upd_seh, sequpd_input);
1313 
1315  CSequenceUpdater updater(sequpd_input, params);
1316  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1317  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1318 
1319  BOOST_REQUIRE(cmd);
1320  cmd->Execute();
1321 
1322  CBioseq_Handle new_bsh;
1323  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
1324  if (b_iter) new_bsh = *b_iter;
1325 
1326  BOOST_REQUIRE(new_bsh);
1327  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 535);
1328  vector<string> check{
1329  "Seq-id ::= local str \"Seq1\"\n",
1330  "Seq-id ::= general {\n"
1331  " db \"DFG\",\n"
1332  " tag str \"3456/Seq1\"\n"
1333  "}\n",
1334  "Seq-id ::= general {\n"
1335  " db \"test\",\n"
1336  " tag id 2356\n"
1337  "}\n",
1338  "Seq-id ::= genbank {\n"
1339  " accession \"AA123456\"\n"
1340  "}\n"
1341  };
1342 
1343  auto i = 0;
1344  ITERATE(CBioseq::TId, it, new_bsh.GetCompleteBioseq()->GetId()) {
1345  BOOST_CHECK_EQUAL(MakeAsn(**it), check[i++]);
1346  }
1347 }
1348 
1349 
1350 BOOST_AUTO_TEST_CASE(Test_ReplaceSequence_FeatAtSeqEnd)
1351 {
1352  string old_fname("test_data/KR002586.asn");
1353  string upd_fname("test_data/KR002586_update.asn");
1354  CSeq_entry_Handle old_seh, upd_seh;
1355  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1356 
1357  CUpdateSeq_Input sequpd_input;
1358  SetupForUpdate(old_seh, upd_seh, sequpd_input);
1359 
1360  // replace the sequence, and the original CDS should be at the end of sequence
1362  CSequenceUpdater updater(sequpd_input, params);
1363  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1364  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1365 
1366  BOOST_REQUIRE(cmd);
1367  cmd->Execute();
1368 
1369  CBioseq_Handle new_bsh;
1370  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
1371  if (b_iter) new_bsh = *b_iter;
1372 
1373  BOOST_REQUIRE(new_bsh);
1374  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1080);
1375 
1376  short count = 0;
1378  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
1379  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
1381  BOOST_CHECK_EQUAL(type_ok, true);
1382 
1383  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
1384  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
1385  bool feat_ok = false;
1386 
1387  switch (type) {
1389  feat_ok = (start == 0 && stop == 1079);
1390  break;
1392  feat_ok = (start == 0 && stop == 394);
1393  break;
1394  default:
1395  break;
1396  }
1397  BOOST_CHECK_EQUAL(feat_ok, true);
1398  count++;
1399  }
1400  BOOST_CHECK_EQUAL(count, 2);
1401  //NcbiCout << "Passed Test_ReplaceSequence_FeatAtSeqEnd" << NcbiEndl;
1402 }
1403 
1404 BOOST_AUTO_TEST_CASE(Test_ReplaceSeqWithinSet_FeatsAtSeqEnd1_AddCitSub)
1405 {
1406  string old_fname("test_data/seq_withinSet.asn");
1407  string upd_fname("test_data/update_seqWithinSet1.asn");
1408  CSeq_entry_Handle old_seh, upd_seh;
1409  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1410 
1411  CSeq_entry_Handle oldSeh;
1412  for (CBioseq_CI b_iter(old_seh); b_iter; ++b_iter) {
1413  if (b_iter->GetBioseqLength() == 1430) {
1414  oldSeh = b_iter->GetSeq_entry_Handle();
1415  break;
1416  }
1417  }
1418 
1419  CUpdateSeq_Input sequpd_input;
1420  SetupForUpdate(oldSeh, upd_seh, sequpd_input);
1421 
1422  // replace the middle sequence
1424  params.m_ImportFeatures = true;
1426  params.m_AddCitSub = true;
1427  CSequenceUpdater updater(sequpd_input, params);
1428  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1429  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1430 
1431  BOOST_REQUIRE(cmd);
1432  cmd->Execute();
1433 
1434  CBioseq_Handle new_bsh;
1435  CBioseq_CI bseq_it(old_seh, CSeq_inst::eMol_na);
1436  if (bseq_it && ++bseq_it) {
1437  new_bsh = *bseq_it;
1438  }
1439 
1440  CSeq_entry_Handle newSeh;
1441  CBioseq_set_Handle newSetSeh = new_bsh.GetParentBioseq_set();
1442  if (newSetSeh &&
1443  newSetSeh.IsSetClass() &&
1444  newSetSeh.GetClass() == CBioseq_set::eClass_nuc_prot) {
1445  newSeh = newSetSeh.GetParentEntry();
1446  }
1447  else {
1448  newSeh = new_bsh.GetSeq_entry_Handle();
1449  }
1450 
1451  BOOST_REQUIRE(new_bsh);
1452  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1387);
1453 
1454  short count = 0;
1456  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
1457  bool type_ok = (type == CSeqFeatData::eSubtype_rRNA)
1466  BOOST_CHECK_EQUAL(type_ok, true);
1467 
1468  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
1469  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
1470  bool feat_ok = false;
1471 
1472  switch (type) {
1474  feat_ok = ((start == 0 && stop == 1378) && feat->IsSetPartial()) ||
1475  ((start == 0 && stop == 1386) && !feat->IsSetPartial()); // imported feature
1476  break;
1478  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
1479  "Seq-loc ::= mix {\n"
1480  " int {\n"
1481  " from 55,\n"
1482  " to 171,\n"
1483  " strand minus,\n"
1484  " id local str \"Seq23\"\n"
1485  " },\n"
1486  " int {\n"
1487  " from 0,\n"
1488  " to 15,\n"
1489  " strand minus,\n"
1490  " id local str \"Seq23\"\n"
1491  " }\n"
1492  "}\n");
1493  feat_ok = true;
1494  break;
1496  feat_ok = !feat->IsSetPartial() && ((start == 0 && stop == 94)
1497  || (start == 271 && stop == 1148)
1498  || (start == 647 && stop == 1378));
1499  break;
1501  feat_ok = feat->IsSetPartial()
1502  && feat->GetLocation().IsPartialStop(eExtreme_Biological)
1503  && (start == 745 && stop == 171);
1504  break;
1506  feat_ok = !feat->IsSetPartial() && (start == 0 && stop == 94);
1507  break;
1509  feat_ok = !feat->IsSetPartial() && (start == 0 && stop == 31);
1510  break;
1512  feat_ok = feat->IsSetPartial()
1513  && feat->GetLocation().IsPartialStart(eExtreme_Biological)
1514  && (start == 1378 && stop == 1358);
1515  break;
1517  feat_ok = feat->IsSetPartial()
1518  && feat->GetLocation().IsPartialStop(eExtreme_Biological)
1519  && (start == 1368 && stop == 1374);
1520  break;
1522  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
1523  "Seq-loc ::= mix {\n"
1524  " int {\n"
1525  " from 20,\n"
1526  " to 35,\n"
1527  " strand plus,\n"
1528  " id local str \"Seq23\"\n"
1529  " },\n"
1530  " int {\n"
1531  " from 1308,\n"
1532  " to 1328,\n"
1533  " strand plus,\n"
1534  " id local str \"Seq23\"\n"
1535  " }\n"
1536  "}\n");
1537  feat_ok = true;
1538  break;
1539  default:
1540  break;
1541  }
1542  BOOST_CHECK_EQUAL(feat_ok, true);
1543  count++;
1544  }
1545  BOOST_CHECK_EQUAL(count, 12);
1546 
1547  // check added cit-sub that should appear on the sequence and not on the set
1548  CRef<CDate> today(new CDate);
1550  FOR_EACH_SEQDESC_ON_BIOSEQ(desc_it, *new_bsh.GetCompleteBioseq()) {
1551  if ((*desc_it)->IsPub()) {
1552  FOR_EACH_PUB_ON_PUBDESC(pub_it, (*desc_it)->GetPub()) {
1553  if ((*pub_it)->IsSub()) {
1554  const CCit_sub& sub = (*pub_it)->GetSub();
1555  BOOST_CHECK(sub.IsSetDate());
1556  BOOST_CHECK_EQUAL(MakeAsn(sub.GetDate()), MakeAsn(*today));
1557  if (sub.IsSetDescr()) {
1558  BOOST_CHECK_EQUAL(sub.GetDescr(), string("Sequence update by submitter"));
1559  }
1560  }
1561  }
1562  }
1563  }
1564 }
1565 
1566 BOOST_AUTO_TEST_CASE(Test_ReplaceSeqWithinSet_FeatsAtSeqEnd2)
1567 {
1568  string old_fname("test_data/seq_withinSet.asn");
1569  string upd_fname("test_data/update_seqWithinSet2.asn");
1570  CSeq_entry_Handle old_seh, upd_seh;
1571  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1572 
1573  CSeq_entry_Handle oldSeh;
1574  for (CBioseq_CI b_iter(old_seh); b_iter; ++b_iter) {
1575  if (b_iter->GetBioseqLength() == 1430) {
1576  oldSeh = b_iter->GetSeq_entry_Handle();
1577  break;
1578  }
1579  }
1580 
1581  CUpdateSeq_Input sequpd_input;
1582  SetupForUpdate(oldSeh, upd_seh, sequpd_input);
1583 
1584  // replace the middle sequence
1586  params.m_ImportFeatures = true;
1588  CSequenceUpdater updater(sequpd_input, params);
1589  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1590  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1591 
1592  BOOST_REQUIRE(cmd);
1593  cmd->Execute();
1594 
1595  CBioseq_Handle new_bsh;
1596  CBioseq_CI bseq_it(old_seh, CSeq_inst::eMol_na);
1597  if (bseq_it && ++bseq_it) {
1598  new_bsh = *bseq_it;
1599  }
1600 
1601  CSeq_entry_Handle newSeh;
1602  CBioseq_set_Handle newSetSeh = new_bsh.GetParentBioseq_set();
1603  if (newSetSeh &&
1604  newSetSeh.IsSetClass() &&
1605  newSetSeh.GetClass() == CBioseq_set::eClass_nuc_prot) {
1606  newSeh = newSetSeh.GetParentEntry();
1607  }
1608  else {
1609  newSeh = new_bsh.GetSeq_entry_Handle();
1610  }
1611 
1612  BOOST_REQUIRE(new_bsh);
1613  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1563);
1614 
1615  short count = 0;
1617  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
1618  bool type_ok = (type == CSeqFeatData::eSubtype_rRNA)
1628 
1629  BOOST_CHECK_EQUAL(type_ok, true);
1630 
1631  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
1632  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
1633  bool feat_ok = false;
1634 
1635  switch (type) {
1637  feat_ok = ((start == 0 && stop == 1562) && feat->IsSetPartial()) || // imported feature
1638  ((start == 133 && stop == 1562) && feat->IsSetPartial());
1639  break;
1641  feat_ok = !feat->IsSetPartial() && ((start == 133 && stop == 232)
1642  || (start == 432 && stop == 1332)
1643  || (start == 832 && stop == 1562));
1644  break;
1646  feat_ok = !feat->IsSetPartial() && start == 0 && stop == 132; // imported feature
1647  break;
1649  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
1650  "Seq-loc ::= mix {\n"
1651  " int {\n"
1652  " from 192,\n"
1653  " to 332,\n"
1654  " strand minus,\n"
1655  " id local str \"Seq23\"\n"
1656  " },\n"
1657  " int {\n"
1658  " from 133,\n"
1659  " to 152,\n"
1660  " strand minus,\n"
1661  " id local str \"Seq23\"\n"
1662  " }\n"
1663  "}\n");
1664  feat_ok = true; // ok
1665  break;
1667  feat_ok = feat->IsSetPartial()
1668  && feat->GetLocation().IsPartialStop(eExtreme_Biological)
1669  && (start == 932 && stop == 332);
1670  break;
1672  feat_ok = !feat->IsSetPartial() && (start == 133 && stop == 232);
1673  break;
1675  feat_ok = !feat->IsSetPartial() && (start == 0 && stop == 31);
1676  break;
1678  feat_ok = feat->IsSetPartial()
1679  && feat->GetLocation().IsPartialStart(eExtreme_Biological)
1680  && (start == 1562 && stop == 1542);
1681  break;
1683  feat_ok = feat->IsSetPartial()
1684  && feat->GetLocation().IsPartialStop(eExtreme_Biological)
1685  && (start == 1552 && stop == 1558);
1686  break;
1688  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
1689  "Seq-loc ::= mix {\n"
1690  " int {\n"
1691  " from 157,\n"
1692  " to 172,\n"
1693  " strand plus,\n"
1694  " id local str \"Seq23\"\n"
1695  " },\n"
1696  " int {\n"
1697  " from 1492,\n"
1698  " to 1512,\n"
1699  " strand plus,\n"
1700  " id local str \"Seq23\"\n"
1701  " }\n"
1702  "}\n");
1703  feat_ok = true;
1704  break;
1705  default:
1706  break;
1707  }
1708  BOOST_CHECK_EQUAL(feat_ok, true);
1709  count++;
1710  }
1711  BOOST_CHECK_EQUAL(count, 13);
1712 }
1713 
1714 BOOST_AUTO_TEST_CASE(Test_ReplaceSequence_UpdateProteins_KeepProteinId)
1715 {
1716  string old_fname("test_data/retranslateCDS_1.asn");
1717  string upd_fname("test_data/replace_retranslateCDS_1.asn");
1718  CSeq_entry_Handle old_seh, upd_seh;
1719  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1720 
1721  CUpdateSeq_Input sequpd_input;
1722  SetupForUpdate(old_seh, upd_seh, sequpd_input);
1723  // do not remove old features and do not import new features
1724  // retranslate the coding regions and update the proteins
1727  params.m_KeepProteinId = true;
1728 
1729  CSequenceUpdater updater(sequpd_input, params);
1730  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1731  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1732 
1733  BOOST_REQUIRE(cmd);
1734  cmd->Execute();
1735 
1736  CBioseq_Handle new_bsh;
1737  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
1738  if (b_iter) new_bsh = *b_iter;
1739 
1740  BOOST_REQUIRE(new_bsh);
1741  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 718);
1742 
1743  CBioseq_Handle psh;
1744  CBioseq_CI p_iter(old_seh, CSeq_inst::eMol_aa);
1745  if (p_iter) psh = *p_iter;
1746 
1747  BOOST_REQUIRE(psh);
1748  BOOST_CHECK_EQUAL(psh.GetBioseqLength(), 239);
1749 
1750  const CBioseq& protein = *psh.GetCompleteBioseq();
1751  string id_label;
1752  protein.GetFirstId()->GetLabel(&id_label);
1753  BOOST_CHECK_EQUAL(id_label, "lcl|12HNVN567_prot_72");
1754 
1755  const CBioseq::TDescr& descrs = protein.GetDescr();
1756  BOOST_CHECK(descrs.Get().size() == 3);
1757 
1758  string newpr;
1760  new_svec.GetSeqData(0, psh.GetBioseqLength(), newpr);
1761 
1762  string prseq("CDVKVXXXKFQWDRTMXKCQLSTMHTWNXASGINSIAAKWQSCRRRDNDQIXKSHRQCQN\
1763 HNSATXXFCRNQLYQTIRXXKNKXDSRTRTSILYNRRNXRKYKESILXXXXNKMXXSFXTGSWKTKRALXXXXYNLSTTL\
1764 RRRSRNXNTSFXLXRGIFLLQYNTTVXXYCLHKQYKRGRVXWHYHTSMPDKTNYKHVAKSRKSNVCSSHQ\
1765 WKHXVXIKYYRNTIDKRWWYXXLXXXYLR");
1766  BOOST_CHECK_EQUAL(newpr, prseq);
1767 
1769  BOOST_REQUIRE(prot_feat);
1770 
1771  bool feat_ok = (prot_feat->GetLocation().GetStart(eExtreme_Positional) == 0) &&
1772  (prot_feat->GetLocation().GetStop(eExtreme_Positional) == 238);
1773  BOOST_CHECK(feat_ok);
1774  //NcbiCout << "Passed Test_ReplaceSequence_UpdateProteins_KeepProteinId" << NcbiEndl;
1775 }
1776 
1777 
1778 BOOST_AUTO_TEST_CASE(Test_ReplaceSequence_UpdateProteins_DoNot_KeepProteinId)
1779 {
1780  string old_fname("test_data/retranslateCDS_1.asn");
1781  string upd_fname("test_data/replace_retranslateCDS_1.asn");
1782  CSeq_entry_Handle old_seh, upd_seh;
1783  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1784 
1785  CUpdateSeq_Input sequpd_input;
1786  SetupForUpdate(old_seh, upd_seh, sequpd_input);
1787  // do not remove old features and do not import new features
1788  // retranslate the coding regions and update the proteins
1791  params.m_KeepProteinId = false;
1792 
1793  CSequenceUpdater updater(sequpd_input, params);
1794  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1795  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1796 
1797  BOOST_REQUIRE(cmd);
1798  cmd->Execute();
1799 
1800  CBioseq_Handle new_bsh;
1801  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
1802  if (b_iter) new_bsh = *b_iter;
1803 
1804  BOOST_REQUIRE(new_bsh);
1805  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 718);
1806 
1807  CBioseq_Handle psh;
1808  CBioseq_CI p_iter(old_seh, CSeq_inst::eMol_aa);
1809  if (p_iter) psh = *p_iter;
1810 
1811  BOOST_REQUIRE(psh);
1812  BOOST_CHECK_EQUAL(psh.GetBioseqLength(), 239);
1813 
1814  // the existing protein ID should never be removed
1815  const CBioseq& protein = *psh.GetCompleteBioseq();
1816  string id_label;
1817  protein.GetFirstId()->GetLabel(&id_label);
1818  BOOST_CHECK_EQUAL(id_label, "lcl|12HNVN567_prot_72");
1819  const CBioseq::TDescr& descrs = protein.GetDescr();
1820  BOOST_CHECK(descrs.Get().size() == 3);
1821 
1822  string newpr;
1824  new_svec.GetSeqData(0, psh.GetBioseqLength(), newpr);
1825 
1826  string prseq("CDVKVXXXKFQWDRTMXKCQLSTMHTWNXASGINSIAAKWQSCRRRDNDQIXKSHRQCQN\
1827 HNSATXXFCRNQLYQTIRXXKNKXDSRTRTSILYNRRNXRKYKESILXXXXNKMXXSFXTGSWKTKRALXXXXYNLSTTL\
1828 RRRSRNXNTSFXLXRGIFLLQYNTTVXXYCLHKQYKRGRVXWHYHTSMPDKTNYKHVAKSRKSNVCSSHQ\
1829 WKHXVXIKYYRNTIDKRWWYXXLXXXYLR");
1830  BOOST_CHECK_EQUAL(newpr, prseq);
1831 
1833  BOOST_REQUIRE(prot_feat);
1834 
1835  bool feat_ok = (prot_feat->GetLocation().GetStart(eExtreme_Positional) == 0) &&
1836  (prot_feat->GetLocation().GetStop(eExtreme_Positional) == 238);
1837  BOOST_CHECK(feat_ok);
1838 }
1839 
1840 BOOST_AUTO_TEST_CASE(Test_PatchSeqWithinSet_FeatsAtSeqEnd)
1841 {
1842  string old_fname("test_data/seq_withinSet.asn");
1843  string upd_fname("test_data/update_seqWithinSet1.asn");
1844  CSeq_entry_Handle old_seh, upd_seh;
1845  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1846 
1847  CSeq_entry_Handle oldSeh;
1848  for (CBioseq_CI b_iter(old_seh); b_iter; ++b_iter) {
1849  if (b_iter->GetBioseqLength() == 1430) {
1850  oldSeh = b_iter->GetSeq_entry_Handle();
1851  break;
1852  }
1853  }
1854 
1855  CUpdateSeq_Input sequpd_input;
1856  SetupForUpdate(oldSeh, upd_seh, sequpd_input);
1857 
1858  // patch the middle sequence, do not import new features
1860  CSequenceUpdater updater(sequpd_input, params);
1861  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
1862  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
1863 
1864  BOOST_REQUIRE(cmd);
1865  cmd->Execute();
1866 
1867  CBioseq_Handle new_bsh;
1868  CBioseq_CI bseq_it(old_seh, CSeq_inst::eMol_na);
1869  if (bseq_it && ++bseq_it) {
1870  new_bsh = *bseq_it;
1871  }
1872 
1873  CSeq_entry_Handle newSeh;
1874  CBioseq_set_Handle newSetSeh = new_bsh.GetParentBioseq_set();
1875  if (newSetSeh &&
1876  newSetSeh.IsSetClass() &&
1877  newSetSeh.GetClass() == CBioseq_set::eClass_nuc_prot) {
1878  newSeh = newSetSeh.GetParentEntry();
1879  }
1880  else {
1881  newSeh = new_bsh.GetSeq_entry_Handle();
1882  }
1883 
1884  BOOST_REQUIRE(new_bsh);
1885  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1360);
1886 
1887  short count = 0;
1889  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
1890  bool type_ok = (type == CSeqFeatData::eSubtype_rRNA)
1899 
1900  BOOST_CHECK_EQUAL(type_ok, true);
1901 
1902  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
1903  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
1904  bool feat_ok = false;
1905 
1906  switch (type) {
1908  feat_ok = ((start == 0 && stop == 1359) && feat->IsSetPartial());
1909  break;
1911  feat_ok = !feat->IsSetPartial() && ((start == 0 && stop == 98)
1912  || (start == 275 && stop == 1152)
1913  || (start == 651 && stop == 1359));
1914  break;
1916  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
1917  "Seq-loc ::= mix {\n"
1918  " int {\n"
1919  " from 59,\n"
1920  " to 175,\n"
1921  " strand minus,\n"
1922  " id local str \"Seq23\"\n"
1923  " },\n"
1924  " int {\n"
1925  " from 0,\n"
1926  " to 19,\n"
1927  " strand minus,\n"
1928  " id local str \"Seq23\"\n"
1929  " }\n"
1930  "}\n");
1931  feat_ok = true;
1932  break;
1934  feat_ok = feat->IsSetPartial()
1935  && feat->GetLocation().IsPartialStop(eExtreme_Biological)
1936  && (start == 749 && stop == 175);
1937  break;
1939  feat_ok = !feat->IsSetPartial() && (start == 0 && stop == 98);
1940  break;
1942  feat_ok = !feat->IsSetPartial() && (start == 0 && stop == 31);
1943  break;
1945  feat_ok = feat->IsSetPartial()
1946  && feat->GetLocation().IsPartialStart(eExtreme_Biological)
1947  && (start == 1359 && stop == 1359);
1948  break;
1950  feat_ok = feat->IsSetPartial()
1951  && feat->GetLocation().IsPartialStop(eExtreme_Biological)
1952  && (start == 1359 && stop == 1359);
1953  break;
1955  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
1956  "Seq-loc ::= mix {\n"
1957  " int {\n"
1958  " from 24,\n"
1959  " to 39,\n"
1960  " strand plus,\n"
1961  " id local str \"Seq23\"\n"
1962  " },\n"
1963  " int {\n"
1964  " from 1312,\n"
1965  " to 1332,\n"
1966  " strand plus,\n"
1967  " id local str \"Seq23\"\n"
1968  " }\n"
1969  "}\n");
1970  feat_ok = true;
1971  break;
1972  default:
1973  break;
1974  }
1975  BOOST_CHECK_EQUAL(feat_ok, true);
1976  count++;
1977  }
1978  BOOST_CHECK_EQUAL(count, 11);
1979 }
1980 
1981 BOOST_AUTO_TEST_CASE(Test_PatchSeqWithinSet_ImportFeatures)
1982 {
1983  string old_fname("test_data/seq_withinSet.asn");
1984  string upd_fname("test_data/update_seqWithinSet_Feats.asn");
1985  CSeq_entry_Handle old_seh, upd_seh;
1986  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
1987 
1988  CSeq_entry_Handle oldSeh;
1989  for (CBioseq_CI b_iter(old_seh); b_iter; ++b_iter) {
1990  if (b_iter->GetBioseqLength() == 1430) {
1991  oldSeh = b_iter->GetSeq_entry_Handle();
1992  break;
1993  }
1994  }
1995 
1996  CUpdateSeq_Input sequpd_input;
1997  SetupForUpdate(oldSeh, upd_seh, sequpd_input);
1998 
1999  // patch the middle sequence, remove all existing features and import new features
2001  params.m_ImportFeatures = true;
2004  CSequenceUpdater updater(sequpd_input, params);
2005  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2006  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2007 
2008  BOOST_REQUIRE(cmd);
2009  cmd->Execute();
2010 
2011  CBioseq_Handle new_bsh;
2012  CBioseq_CI bseq_it(old_seh, CSeq_inst::eMol_na);
2013  if (bseq_it && ++bseq_it) {
2014  new_bsh = *bseq_it;
2015  }
2016 
2017  CSeq_entry_Handle newSeh;
2018  CBioseq_set_Handle newSetSeh = new_bsh.GetParentBioseq_set();
2019  if (newSetSeh &&
2020  newSetSeh.IsSetClass() &&
2021  newSetSeh.GetClass() == CBioseq_set::eClass_nuc_prot) {
2022  newSeh = newSetSeh.GetParentEntry();
2023  }
2024  else {
2025  newSeh = new_bsh.GetSeq_entry_Handle();
2026  }
2027 
2028  BOOST_REQUIRE(new_bsh);
2029  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1360);
2030 
2031  short count = 0;
2033  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
2034  bool type_ok = (type == CSeqFeatData::eSubtype_rRNA)
2037  BOOST_CHECK_EQUAL(type_ok, true);
2038 
2039  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
2040  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
2041  bool feat_ok = false;
2042 
2043  switch (type) {
2045  feat_ok = (start == 4 && stop == 1359);
2046  break;
2048  feat_ok = (!feat->IsSetPartial() && (start == 803 && stop == 303))
2049  || (feat->IsSetPartial() && (start == 1313 && stop == 1359))
2050  || (!feat->IsSetPartial() && (start == 1359 && stop == 1343));
2051  break;
2053  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
2054  "Seq-loc ::= packed-int {\n"
2055  " {\n"
2056  " from 4,\n"
2057  " to 29,\n"
2058  " strand plus,\n"
2059  " id local str \"Seq23\"\n"
2060  " },\n"
2061  " {\n"
2062  " from 33,\n"
2063  " to 83,\n"
2064  " strand plus,\n"
2065  " id local str \"Seq23\"\n"
2066  " }\n"
2067  "}\n");
2068  feat_ok = true;
2069  break;
2070  default:
2071  break;
2072  }
2073  BOOST_CHECK_EQUAL(feat_ok, true);
2074  count++;
2075  }
2076  BOOST_CHECK_EQUAL(count, 5);
2077  //NcbiCout << "Passed Test_PatchSeqWithinSet_ImportFeatures" << NcbiEndl;
2078 }
2079 
2080 BOOST_AUTO_TEST_CASE(Test_ReplaceSequence_ImportFeatures)
2081 {
2082  CSeq_entry_Handle old_seh, upd_seh;
2083  ReadBioSeqs(sc_MainSequence, sc_UpdSequence_Patch, old_seh, upd_seh);
2084 
2085  CUpdateSeq_Input sequpd_input;
2086  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2087 
2088  // import all new features and do not remove existing features
2090  params.m_ImportFeatures = true;
2092  CSequenceUpdater updater(sequpd_input, params);
2093  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2094  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2095 
2096  BOOST_REQUIRE(cmd);
2097  cmd->Execute();
2098 
2099  CBioseq_Handle new_bsh;
2100  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2101  if (b_iter) new_bsh = *b_iter;
2102 
2103  BOOST_REQUIRE(new_bsh);
2104  const CBioseq_Handle& upd_bsh = sequpd_input.GetUpdateBioseq();
2105 
2106  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), upd_bsh.GetBioseqLength());
2107 
2108  short count = 0;
2110  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
2111  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
2115  BOOST_CHECK_EQUAL(type_ok, true);
2116 
2117  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
2118  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
2119  bool feat_ok = false;
2120 
2121  switch (type) {
2123  feat_ok = (start == 0 && stop == 131);
2124  break;
2126  feat_ok = (start == 0 && stop == 31);
2127  break;
2129  feat_ok = (start == 0 && stop == 0) || (start == 19 && stop == 99);
2130  break;
2132  feat_ok = (start == 0 && stop == 69) || (start == 0 && stop == 25);
2133  break;
2134  default:
2135  break;
2136  }
2137  BOOST_CHECK_EQUAL(feat_ok, true);
2138  count++;
2139  }
2140  BOOST_CHECK_EQUAL(count, 7);
2141 
2142 }
2143 
2144 BOOST_AUTO_TEST_CASE(Test_ReplaceSequence_ProteinId_5_Partial_CitSub1)
2145 {
2146  string old_fname("test_data/Old_Bourbon.asn");
2147  string upd_fname("test_data/Update_Bourbon.asn");
2148  CSeq_entry_Handle old_seh, upd_seh;
2149  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
2150 
2151  CUpdateSeq_Input sequpd_input;
2152  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2154  params.m_AddCitSub = true;
2155 
2156  CSequenceUpdater updater(sequpd_input, params);
2157  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2158  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2159 
2160  BOOST_REQUIRE(cmd);
2161  cmd->Execute();
2162 
2163  CBioseq_Handle new_bsh;
2164  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2165  if (b_iter) new_bsh = *b_iter;
2166 
2167  BOOST_REQUIRE(new_bsh);
2168  const CBioseq_Handle& upd_bsh = sequpd_input.GetUpdateBioseq();
2169 
2170  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), upd_bsh.GetBioseqLength());
2171 
2173  BOOST_CHECK(cds_it);
2174  CConstRef<CSeq_feat> cds = cds_it->GetOriginalSeq_feat();
2175 
2176  // check cds location and 5' completeness
2177  const CSeq_loc& loc = cds->GetLocation();
2178  BOOST_CHECK(!loc.IsPartialStart(eExtreme_Biological));
2179  BOOST_CHECK(loc.IsPartialStop(eExtreme_Biological));
2180 
2181  BOOST_CHECK_EQUAL(loc.GetStart(eExtreme_Biological), (TSeqPos)33);
2182  BOOST_CHECK_EQUAL(loc.GetStop(eExtreme_Biological), (TSeqPos)1936);
2183 
2184  // check protein ids
2185  {{
2186  CBioseq_Handle protein = old_seh.GetScope().GetBioseqHandle(cds->GetProduct());
2187  const CBioseq::TId& prot_ids = protein.GetCompleteBioseq()->GetId();
2188  BOOST_REQUIRE(prot_ids.size() == 2);
2189 
2190  vector<string> check{
2191  "Seq-id ::= genbank {\n"
2192  " accession \"AJP32538\",\n"
2193  " version 1\n"
2194  "}\n",
2195  "Seq-id ::= gi 761229279\n"
2196  };
2197 
2198  auto i = 0;
2199  ITERATE(CBioseq::TId, it, prot_ids) {
2200  BOOST_CHECK_EQUAL(MakeAsn(**it), check[i++]);
2201  }
2202  }}
2203 
2204  // check the newly added cit-sub with current date
2205  {{
2206  CRef<CDate> today(new CDate);
2208  vector<string> check{
2209  "Date ::= std {\n"
2210  " year 2015,\n"
2211  " month 1,\n"
2212  " day 14\n"
2213  "}\n",
2214  MakeAsn(*today)
2215  };
2216 
2217  auto i = 0;
2218  for (CSeqdesc_CI desc_it(new_bsh, CSeqdesc::e_Pub); desc_it; ++desc_it) {
2219  const CPubdesc& pubdesc = desc_it->GetPub();
2220  FOR_EACH_PUB_ON_PUBDESC(pub_it, pubdesc) {
2221  if ((*pub_it)->IsSub()) {
2222  const CCit_sub& sub = (*pub_it)->GetSub();
2223  BOOST_CHECK(sub.IsSetDate());
2224  BOOST_CHECK_EQUAL(MakeAsn(sub.GetDate()), check[i++]);
2225  if (sub.IsSetDescr()) {
2226  BOOST_CHECK_EQUAL(sub.GetDescr(), string("Sequence update by submitter"));
2227  }
2228  }
2229  }
2230  }
2231  }}
2232 }
2233 
2234 BOOST_AUTO_TEST_CASE(Test_ReplaceSequence_CitSub2)
2235 {
2236  string old_fname("test_data/Old_Bourbon_woSub.asn");
2237  string upd_fname("test_data/Update_Bourbon.asn");
2238  CSeq_entry_Handle old_seh, upd_seh;
2239  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
2240 
2241  CUpdateSeq_Input sequpd_input;
2242  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2244  params.m_AddCitSub = true;
2245 
2246  CSequenceUpdater updater(sequpd_input, params);
2247  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2248  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2249 
2250  BOOST_REQUIRE(cmd);
2251  cmd->Execute();
2252 
2253  CBioseq_Handle new_bsh;
2254  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2255  if (b_iter) new_bsh = *b_iter;
2256 
2257  BOOST_REQUIRE(new_bsh);
2258  const CBioseq_Handle& upd_bsh = sequpd_input.GetUpdateBioseq();
2259 
2260  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), upd_bsh.GetBioseqLength());
2261  BOOST_CHECK_EQUAL(updater.GetCitSubMessage(), string("There is no earlier Cit-sub template"));
2262  //NcbiCout << "Passed Test_ReplaceSequence_CitSub2" << NcbiEndl;
2263 }
2264 
2265 BOOST_AUTO_TEST_CASE(Test_ReplaceSequence_WithoutAlign)
2266 {
2267  string old_fname("test_data/replace_woalign.asn");
2268  string upd_fname("test_data/replace_woalign_update.asn");
2269  CRef<CSeq_entry> old_entry = ReadEntryFromFile(old_fname);
2270  CRef<CSeq_entry> upd_entry = ReadEntryFromFile(upd_fname);
2271  CRef<CScope> scope = BuildScope();
2272 
2273  CSeq_entry_Handle old_seh = scope->AddTopLevelSeqEntry(*old_entry);
2274  BOOST_REQUIRE(old_seh);
2275 
2276  CUpdateMultipleSeq_Input multiupdseq_in;
2277  bool ok = multiupdseq_in.SetOldEntryAndScope(old_seh);
2278  ok = ok && multiupdseq_in.SetUpdateEntry(upd_entry);
2279  ok = ok && multiupdseq_in.PrepareMultipleSequenceInputsForUnitTest();
2280  BOOST_REQUIRE(ok);
2281 
2282  const CUpdateMultipleSeq_Input::TIDToUpdInputMap& updates = multiupdseq_in.GetNonIdenticalUpdates();
2283  const CUpdateMultipleSeq_Input::TIDToUpdInputMap& identicals = multiupdseq_in.GetIdenticalUpdates();
2284  const CUpdateMultipleSeq_Input::TSeqIDHVector& noUpdates = multiupdseq_in.GetSeqsWithoutUpdates();
2285 
2286  BOOST_CHECK(updates.size() == 3);
2287  BOOST_CHECK(identicals.empty());
2288  BOOST_CHECK(noUpdates.empty());
2289 
2290  CRef<CCmdComposite> update_cmd(new CCmdComposite("Update multiple sequences"));
2292  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2293 
2294  for (auto& it : updates) {
2295  CSequenceUpdater updater(it.second.GetObject(), params);
2296  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2297  update_cmd->AddCommand(*cmd);
2298  }
2299 
2300  update_cmd->Execute();
2301 
2302  short index = 0;
2303  for (CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na); b_iter; ++b_iter) {
2304  BOOST_CHECK(b_iter->GetBioseqLength() == 709);
2305  CFeat_CI feat(*b_iter);
2306  BOOST_REQUIRE(feat);
2307  index++;
2308  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Positional);
2309  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Positional);
2310  switch (index) {
2311  case 1:
2312  BOOST_CHECK(start == 0 && stop == 230);
2313  break;
2314  case 2:
2315  BOOST_CHECK(start == 0 && stop == 708);
2316  break;
2317  case 3:
2318  BOOST_CHECK(start == 0 && stop == 911);
2319  break;
2320  }
2321 
2322  }
2323 }
2324 
2325 BOOST_AUTO_TEST_CASE(Test_PatchSequence)
2326 {
2327  CSeq_entry_Handle old_seh, upd_seh;
2328  ReadBioSeqs(sc_MainSequence, sc_UpdSequence_Patch, old_seh, upd_seh);
2329 
2330  CUpdateSeq_Input sequpd_input;
2331  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2332 
2334  CSequenceUpdater updater(sequpd_input, params);
2335  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2336  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2337  BOOST_REQUIRE(cmd);
2338  cmd->Execute();
2339 
2340  CBioseq_Handle new_bsh;
2341  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2342  if (b_iter) new_bsh = *b_iter;
2343 
2344  BOOST_REQUIRE(new_bsh);
2345  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 966);
2346  BOOST_CHECK_EQUAL(new_bsh.GetInst_Mol() == CSeq_inst::eMol_rna, true);
2347 
2348  string newseq;
2350  new_svec.GetSeqData(0, new_bsh.GetBioseqLength(), newseq);
2351 
2352  string patchseq("TGCAGTCGAGCGGCAGCACGGGGAGCTTGCTCCCTGGTGGCGAGCGGCGGACGGGTGAGTAATGTAGGAA\
2353 TCTGCCCGGTAGTGGGGGATAACGTGGGGAAACCCACGCTAATACCGCATACGTCCTACGGGAGAAAGCG\
2354 GAGGATCTTCGGACTTCGCGCTATCGGATGAGCCTATGTCGGATTAGCTAGTTGGTAAGGTAACGGCTTA\
2355 CCAAGGCGACGATCCGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCCCAGACT\
2356 CCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTTGATCCAGCCATGCCGCGTGTGT\
2357 GAAGAAGGCTTTCGGGTTGTAAAGCACTTTCAGCGAGGAAGAAAGCCTGGTGGTTAAAGCACCGGCTAAC\
2358 TCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCGAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCG\
2359 TAGGTGGCTTGGCACGCCGGTTGTGAAAGCCCCGGGCTCAACCTGGGAACGGCATCCGGAACGGCCAGGC\
2360 TAGAGTGCAGGAGAGGAAGGTAGAATTCCCGGTGTAGCGGTGAAATGCGTAGAGATCGGGCCCCCCCCCC\
2361 CCCCCCCCCCTGACACTGAGGTGCGAAAGCATGGGTAGCAAACAGGATTAGATACCCTGGTAGTCCACGC\
2362 CGTAAACGATGTCGACTAGCCGTTGGGACCTTTAAGGACTTAGTGGCGCAGTTAACGCGATAAGTCGACC\
2363 GCCTGGGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGCCCGCACAAGCGGTGGAGCA\
2364 TGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTACCCTTGACATCCTGCGAATTTGGTAGAGATAC\
2365 CTTAGTGCCTTCGGGAGCGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGCGC");
2366  BOOST_CHECK_EQUAL(patchseq, newseq);
2367 
2368  // check whether the existing features on the old sequence have been properly adjusted
2369  short count = 0;
2371  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
2372  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
2376  BOOST_CHECK_EQUAL(type_ok, true);
2377 
2378  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
2379  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
2380  bool feat_ok = false;
2381 
2382  switch (type) {
2384  feat_ok = (start == 0 && stop == 964);
2385  break;
2387  feat_ok = (start == 199 && stop == 599);
2388  break;
2390  feat_ok = (start == 249 && stop == 562);
2391  break;
2393  feat_ok = (start == 0 && stop == 69);
2394  break;
2395  default:
2396  break;
2397  }
2398  BOOST_CHECK_EQUAL(feat_ok, true);
2399  count++;
2400  }
2401  BOOST_CHECK_EQUAL(count, 4);
2402 }
2403 
2404 BOOST_AUTO_TEST_CASE(Test_PatchSequence_ImportFeatures)
2405 {
2406  CSeq_entry_Handle old_seh, upd_seh;
2407  ReadBioSeqs(sc_MainSequence, sc_UpdSequence_Patch, old_seh, upd_seh);
2408 
2409  CUpdateSeq_Input sequpd_input;
2410  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2411 
2413  params.m_ImportFeatures = true;
2414  // do not remove existing features
2415 
2416  CSequenceUpdater updater(sequpd_input, params);
2417  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2418  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2419 
2420  BOOST_REQUIRE(cmd);
2421  cmd->Execute();
2422 
2423  CBioseq_Handle new_bsh;
2424  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2425  if (b_iter) new_bsh = *b_iter;
2426 
2427  BOOST_REQUIRE(new_bsh);
2428  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 966);
2429  BOOST_CHECK_EQUAL(new_bsh.GetInst_Mol() == CSeq_inst::eMol_rna, true);
2430 
2431  short count = 0;
2433  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
2434  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
2438  BOOST_CHECK_EQUAL(type_ok, true);
2439 
2440  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
2441  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
2442  bool feat_ok = false;
2443 
2444  switch (type) {
2446  feat_ok = (start == 0 && stop == 964) || (start == 568 && stop == 699);
2447  break;
2449  feat_ok = (start == 199 && stop == 599);
2450  break;
2452  feat_ok = (start == 587 && stop == 667) || (start == 249 && stop == 562);
2453  break;
2455  feat_ok = (start == 0 && stop == 69) || (start == 0 && stop == 25);
2456  break;
2457  default:
2458  break;
2459  }
2460  BOOST_CHECK_EQUAL(feat_ok, true);
2461  count++;
2462  }
2463  BOOST_CHECK_EQUAL(count, 7);
2464 }
2465 
2466 BOOST_AUTO_TEST_CASE(Test_ReplaceSequence_Import_ExceptDuplFeatures)
2467 {
2468  // do not remove existing features
2469  CSeq_entry_Handle old_seh, upd_seh;
2470  ReadBioSeqs(sc_MainSequence, sc_UpdSequence_Patch, old_seh, upd_seh);
2471 
2472  CUpdateSeq_Input sequpd_input;
2473  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2474 
2476  params.m_ImportFeatures = true;
2478 
2479  CSequenceUpdater updater(sequpd_input, params);
2480  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2481  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2482 
2483  BOOST_REQUIRE(cmd);
2484  cmd->Execute();
2485 
2486  CBioseq_Handle new_bsh;
2487  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2488  if (b_iter) new_bsh = *b_iter;
2489 
2490  BOOST_REQUIRE(new_bsh);
2491  const CBioseq_Handle& upd_bsh = sequpd_input.GetUpdateBioseq();
2492 
2493  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), upd_bsh.GetBioseqLength());
2494 
2495  short count = 0;
2497  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
2498  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
2502  BOOST_CHECK_EQUAL(type_ok, true);
2503 
2504  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
2505  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
2506  bool feat_ok = false;
2507 
2508  switch (type) {
2510  // only the gene from the old sequence should be present on the updated sequence
2511  feat_ok = (start == 0 && stop == 131)
2512  && (NStr::EqualNocase(feat->GetData().GetGene().GetLocus(), "16S rDNA"));
2513  break;
2515  feat_ok = (start == 0 && stop == 31);
2516  break;
2518  feat_ok = (start == 0 && stop == 0) || (start == 19 && stop == 99);
2519  break;
2521  feat_ok = (start == 0 && stop == 69) || (start == 0 && stop == 25);
2522  break;
2523  default:
2524  break;
2525  }
2526  BOOST_CHECK_EQUAL(feat_ok, true);
2527  count++;
2528  }
2529  BOOST_CHECK_EQUAL(count, 6);
2530  //NcbiCout << "Passed Test_ReplaceSequence_Import_ExceptDuplFeatures" << NcbiEndl;
2531 }
2532 
2533 BOOST_AUTO_TEST_CASE(Test_Extend5Sequence_IgnoreAlign)
2534 {
2535  CSeq_entry_Handle old_seh, upd_seh;
2537 
2538  CUpdateSeq_Input sequpd_input;
2539  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2540 
2541  const CBioseq_Handle& old_bsh = sequpd_input.GetOldBioseq();
2542  const CBioseq_Handle& upd_bsh = sequpd_input.GetUpdateBioseq();
2543 
2544  string origseq;
2546  orig_svec.GetSeqData(0, old_bsh.GetBioseqLength(), origseq);
2547  string extendseq;
2549  extendvec.GetSeqData(0, upd_bsh.GetBioseqLength(), extendseq);
2550 
2551  bool ignore_alignment(true);
2552  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateExtend5, ignore_alignment);
2553  params.m_ImportFeatures = false;
2554  CSequenceUpdater updater(sequpd_input, params);
2555  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2556  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2557 
2558  BOOST_REQUIRE(cmd);
2559  cmd->Execute();
2560 
2561  CBioseq_Handle new_bsh;
2562  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2563  if (b_iter) new_bsh = *b_iter;
2564 
2565  BOOST_REQUIRE(new_bsh);
2566  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1080);
2567  BOOST_CHECK_EQUAL(new_bsh.GetInst_Mol() == CSeq_inst::eMol_rna, true);
2568 
2569  string newseq;
2571  new_svec.GetSeqData(0, new_bsh.GetBioseqLength(), newseq);
2572  BOOST_CHECK_EQUAL(newseq, extendseq + origseq);
2573 
2574  // check whether features are properly adjusted
2575  // every feature should be shifted towards right
2576  short count = 0;
2578  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
2579  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
2583  BOOST_CHECK_EQUAL(type_ok, true);
2584 
2585  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
2586  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
2587  bool feat_ok = false;
2588 
2589  switch (type) {
2591  feat_ok = (start == 95 && stop == 1078);
2592  break;
2594  feat_ok = (start == 294 && stop == 694);
2595  break;
2597  feat_ok = (start == 344 && stop == 657);
2598  break;
2600  feat_ok = (start == 0 && stop == 69);
2601  break;
2602  default:
2603  break;
2604  }
2605  BOOST_CHECK_EQUAL(feat_ok, true);
2606  count++;
2607  }
2608  BOOST_CHECK_EQUAL(count, 4);
2609 }
2610 
2611 BOOST_AUTO_TEST_CASE(Test_Extend5Sequence_IgnoreAlign_ImportAllFeatures_Var1)
2612 {
2613  CSeq_entry_Handle old_seh, upd_seh;
2615 
2616  CUpdateSeq_Input sequpd_input;
2617  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2618 
2619  bool ignore_alignment(true);
2620  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateExtend5, ignore_alignment);
2621  params.m_ImportFeatures = true;
2623 
2624  CSequenceUpdater updater(sequpd_input, params);
2625  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2626  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2627 
2628  BOOST_REQUIRE(cmd);
2629  cmd->Execute();
2630 
2631  CBioseq_Handle new_bsh;
2632  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2633  if (b_iter) new_bsh = *b_iter;
2634 
2635  BOOST_REQUIRE(new_bsh);
2636  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1080);
2637 
2638  // existing features should be shifted and all features from update sequence should be imported
2639  short count = 0;
2641  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
2642  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
2647  BOOST_CHECK_EQUAL(type_ok, true);
2648 
2649  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
2650  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
2651  bool feat_ok = false;
2652 
2653  switch (type) {
2655  feat_ok = (start == 95 && stop == 1078) || (start == 54 && stop == 2);
2656  break;
2658  feat_ok = (start == 294 && stop == 694);
2659  break;
2661  feat_ok = (start == 70 && stop == 89);
2662  break;
2664  feat_ok = (start == 344 && stop == 657) || (start == 54 && stop == 2);
2665  break;
2667  feat_ok = (start == 0 && stop == 69) || (start == 0 && stop == 16);
2668  break;
2669  default:
2670  break;
2671  }
2672  BOOST_CHECK_EQUAL(feat_ok, true);
2673  count++;
2674  }
2675  BOOST_CHECK_EQUAL(count, 8);
2676 }
2677 
2678 BOOST_AUTO_TEST_CASE(Test_Extend5Sequence_IgnoreAlign_ImportAllFeatures_Var2)
2679 {
2680  CSeq_entry_Handle old_seh, upd_seh;
2682 
2683  CUpdateSeq_Input sequpd_input;
2684  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2685 
2686  bool ignore_alignment(true);
2687  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateExtend5, ignore_alignment);
2688  params.m_ImportFeatures = true;
2690 
2691  CSequenceUpdater updater(sequpd_input, params);
2692  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2693  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2694 
2695  BOOST_REQUIRE(cmd);
2696  cmd->Execute();
2697 
2698  CBioseq_Handle new_bsh;
2699  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2700  if (b_iter) new_bsh = *b_iter;
2701 
2702  BOOST_REQUIRE(new_bsh);
2703  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1077);
2704  BOOST_CHECK_EQUAL(new_bsh.GetInst_Mol() == CSeq_inst::eMol_rna, true);
2705 
2706  short count = 0;
2708  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
2709  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
2714  BOOST_CHECK_EQUAL(type_ok, true);
2715 
2716  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
2717  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
2718  bool feat_ok = false;
2719 
2720  switch (type) {
2722  feat_ok = (start == 92 && stop == 1075) || (start == 2 && stop == 54);
2723  break;
2725  feat_ok = (start == 291 && stop == 691);
2726  break;
2728  feat_ok = (start == 67 && stop == 86);
2729  break;
2731  feat_ok = (start == 341 && stop == 654) || (start == 2 && stop == 54);
2732  break;
2734  feat_ok = (start == 0 && stop == 69) || (start == 0 && stop == 16);
2735  break;
2736  default:
2737  break;
2738  }
2739  BOOST_CHECK_EQUAL(feat_ok, true);
2740  count++;
2741  }
2742  BOOST_CHECK_EQUAL(count, 8);
2743 }
2744 
2745 BOOST_AUTO_TEST_CASE(Test_Extend5Sequence_DoNotIgnoreAlign)
2746 {
2747  CSeq_entry_Handle old_seh, upd_seh;
2749 
2750  CUpdateSeq_Input sequpd_input;
2751  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2752 
2753  bool ignore_alignment(false);
2754  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateExtend5, ignore_alignment);
2756 
2757  CSequenceUpdater updater(sequpd_input, params);
2758  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2759  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2760 
2761  BOOST_REQUIRE(cmd);
2762  cmd->Execute();
2763 
2764  CBioseq_Handle new_bsh;
2765  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2766  if (b_iter) new_bsh = *b_iter;
2767 
2768  BOOST_REQUIRE(new_bsh);
2769  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1043);
2770  BOOST_CHECK_EQUAL(new_bsh.GetInst_Mol() == CSeq_inst::eMol_rna, true);
2771 
2772  string newseq;
2774  new_svec.GetSeqData(0, new_bsh.GetBioseqLength(), newseq);
2775 
2776  string extend5seq("GGGGGGGGGGCCCCGGAAAAAAAAAGGGGGGGGGGGCGCACGTTTTTTCACACAGGGGTGCAGTCGAGCG\
2777 GCAGCACGGGGAGCTTGCTCCCTGGTGGCGAGCGGCGGACGGGTGAGTAATGTAGGAATCTGCCCGGTAG\
2778 TGGGGGATAACGTGGGGAAACCCACGCTAATACCGCATACGTCCTACGGGAGAAAGCGGAGGATCTTCGG\
2779 ACTTCGCGCTATCGGATGAGCCTATGTCGGATTAGCTAGTTGGTAAGGTAACGGCTTACCAAGGCGACGA\
2780 TCCGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGC\
2781 AGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTTGATCCAGCCATGCCGCGTGTGTGAAGAAGGCTTT\
2782 CGGGTTGTAAAGCACTTTCAGCGAGGAAGAAAGCCTGGTGGTTAAAGCACCGGCTAACTCCGTGCCAGCA\
2783 GCCGCGGTAATACGGAGGGTGCGAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGCTTGG\
2784 CACGCCGGTTGTGAAAGCCCCGGGCTCAACCTGGGAACGGCATCCGGAACGGCCAGGCTAGAGTGCAGGA\
2785 GAGGAAGGTAGAATTCCCGGTGTAGCGGTGAAATGCGTAGAGATCGGGAGGAATACCAGTGGCGAAGGCG\
2786 GCCTTCTGGCCTGACACTGACACTGAGGTGCGAAAGCGTGGGTAGCAAACAGGATTAGATACCCTGGTAG\
2787 TCCACGCCGTAAACGATGTCGACTAGCCGTTGGGACCTTTAAGGACTTAGTGGCGCAGTTAACGCGATAA\
2788 GTCGACCGCCTGGGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGCCCGCACAAGCGG\
2789 TGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTACCCTTGACATCCTGCGAATTTGGTA\
2790 GAGATACCTTAGTGCCTTCGGGAGCGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGCGC");
2791  BOOST_CHECK_EQUAL(extend5seq, newseq);
2792 
2793  // check whether the existing features on the old sequence have been properly adjusted
2794  short count = 0;
2796  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
2797  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
2801  BOOST_CHECK_EQUAL(type_ok, true);
2802 
2803  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
2804  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
2805  bool feat_ok = false;
2806 
2807  switch (type) {
2809  feat_ok = (start == 58 && stop == 1041);
2810  break;
2812  feat_ok = (start == 257 && stop == 657);
2813  break;
2815  feat_ok = (start == 307 && stop == 620);
2816  break;
2818  feat_ok = (start == 0 && stop == 69);
2819  break;
2820  default:
2821  break;
2822  }
2823  BOOST_CHECK_EQUAL(feat_ok, true);
2824  count++;
2825  }
2826  BOOST_CHECK_EQUAL(count, 4);
2827 }
2828 
2829 BOOST_AUTO_TEST_CASE(Test_Extend5Sequence_DonotIgnoreAlign_ImportAllFeatures_Var1)
2830 {
2831  CSeq_entry_Handle old_seh, upd_seh;
2833 
2834  CUpdateSeq_Input sequpd_input;
2835  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2836 
2837  bool ignore_alignment(false);
2838  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateExtend5, ignore_alignment);
2840  params.m_ImportFeatures = true;
2842 
2843  CSequenceUpdater updater(sequpd_input, params);
2844  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2845  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2846  BOOST_REQUIRE(cmd);
2847  cmd->Execute();
2848 
2849  CBioseq_Handle new_bsh;
2850  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2851  if (b_iter) new_bsh = *b_iter;
2852 
2853  BOOST_REQUIRE(new_bsh);
2854 
2855  // existing features should be shifted and all features from update sequence should be imported
2856  short count = 0;
2858  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
2859  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
2864  BOOST_CHECK_EQUAL(type_ok, true);
2865 
2866  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
2867  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
2868  bool feat_ok = false;
2869 
2870  switch (type) {
2872  feat_ok = (start == 58 && stop == 1041) || (start == 54 && stop == 2);
2873  break;
2875  feat_ok = (start == 257 && stop == 657);
2876  break;
2878  feat_ok = (start == 70 && stop == 89);
2879  break;
2881  feat_ok = (start == 307 && stop == 620) || (start == 54 && stop == 2);
2882  break;
2884  feat_ok = (start == 0 && stop == 69) || (start == 0 && stop == 16);
2885  break;
2886  default:
2887  break;
2888  }
2889  BOOST_CHECK_EQUAL(feat_ok, true);
2890  count++;
2891  }
2892  BOOST_CHECK_EQUAL(count, 8);
2893 }
2894 
2895 BOOST_AUTO_TEST_CASE(Test_Extend5Sequence_DonotIgnoreAlign_ImportAllFeatures_Var2)
2896 {
2897  CSeq_entry_Handle old_seh, upd_seh;
2899 
2900  CUpdateSeq_Input sequpd_input;
2901  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2902 
2903  bool ignore_alignment(false);
2904  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateExtend5, ignore_alignment);
2906  params.m_ImportFeatures = true;
2908 
2909  CSequenceUpdater updater(sequpd_input, params);
2910  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2911  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2912 
2913  BOOST_REQUIRE(cmd);
2914  cmd->Execute();
2915 
2916  CBioseq_Handle new_bsh;
2917  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2918  if (b_iter) new_bsh = *b_iter;
2919 
2920  BOOST_REQUIRE(new_bsh);
2921  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1040);
2922  BOOST_CHECK_EQUAL(new_bsh.GetInst_Mol() == CSeq_inst::eMol_rna, true);
2923 
2924  // existing features should be shifted and all features from update sequence should be imported
2925  short count = 0;
2927  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
2928  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
2933  BOOST_CHECK_EQUAL(type_ok, true);
2934 
2935  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
2936  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
2937  bool feat_ok = false;
2938 
2939  switch (type) {
2941  feat_ok = (start == 58 && stop == 1038) || (start == 2 && stop == 54);
2942  break;
2944  feat_ok = (start == 254 && stop == 654);
2945  break;
2947  feat_ok = (start == 67 && stop == 86);
2948  break;
2950  feat_ok = (start == 304 && stop == 617) || (start == 2 && stop == 54);
2951  break;
2953  feat_ok = (start == 0 && stop == 69) || (start == 0 && stop == 16);
2954  break;
2955  default:
2956  break;
2957  }
2958  BOOST_CHECK_EQUAL(feat_ok, true);
2959  count++;
2960  }
2961  BOOST_CHECK_EQUAL(count, 8);
2962 }
2963 
2964 BOOST_AUTO_TEST_CASE(Test_Extend5Sequence_DonotIgnoreAlign_ImportAllFeatures_Var3)
2965 {
2966  string old_fname("test_data/seq_for_5extend.asn");
2967  string upd_fname("test_data/update_5extend.asn");
2968  CSeq_entry_Handle old_seh, upd_seh;
2969  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
2970 
2971  CUpdateSeq_Input sequpd_input;
2972  SetupForUpdate(old_seh, upd_seh, sequpd_input);
2973 
2974  bool ignore_alignment(false);
2975  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateExtend5, ignore_alignment);
2977  params.m_ImportFeatures = true;
2979 
2980  CSequenceUpdater updater(sequpd_input, params);
2981  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
2982  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
2983 
2984  BOOST_REQUIRE(cmd);
2985  cmd->Execute();
2986 
2987  CBioseq_Handle new_bsh;
2988  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
2989  if (b_iter) new_bsh = *b_iter;
2990 
2991  BOOST_REQUIRE(new_bsh);
2992  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1043);
2993  BOOST_CHECK_EQUAL(new_bsh.GetInst_Mol() == CSeq_inst::eMol_rna, true);
2994 
2995  short count = 0;
2997  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
2998  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
3003  BOOST_CHECK_EQUAL(type_ok, true);
3004 
3005  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
3006  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
3007  bool feat_ok = false;
3008 
3009  switch (type) {
3011  feat_ok = (start == 39 && stop == 64) || (start == 58 && stop == 1041);
3012  break;
3014  feat_ok = (start == 60 && stop == 77)
3015  || (start == 1037 && stop == 60)
3016  || (start == 82 && stop == 107);
3017  break;
3019  feat_ok = (start == 70 && stop == 89);
3020  break;
3022  feat_ok = (start == 307 && stop == 620) || (start == 54 && stop == 2);
3023  break;
3025  feat_ok = (start == 0 && stop == 69) || (start == 0 && stop == 17);
3026  break;
3027  default:
3028  break;
3029  }
3030  BOOST_CHECK_EQUAL(feat_ok, true);
3031  count++;
3032  }
3033  BOOST_CHECK_EQUAL(count, 10);
3034  //NcbiCout << "Passed Test_Extend5Sequence_DonotIgnoreAlign_ImportAllFeatures_Var3" << NcbiEndl;
3035 }
3036 
3037 BOOST_AUTO_TEST_CASE(Test_Extend3Sequence_IgnoreAlign)
3038 {
3039  CSeq_entry_Handle old_seh, upd_seh;
3041 
3042  CUpdateSeq_Input sequpd_input;
3043  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3044 
3045  const CBioseq_Handle& old_bsh = sequpd_input.GetOldBioseq();
3046  const CBioseq_Handle& upd_bsh = sequpd_input.GetUpdateBioseq();
3047 
3048  string origseq;
3050  orig_svec.GetSeqData(0, old_bsh.GetBioseqLength(), origseq);
3051  string extendseq;
3053  extendvec.GetSeqData(0, upd_bsh.GetBioseqLength(), extendseq);
3054 
3055  bool ignore_alignment(true);
3056  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateExtend3, ignore_alignment);
3057  CSequenceUpdater updater(sequpd_input, params);
3058  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3059  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3060 
3061  BOOST_REQUIRE(cmd);
3062  cmd->Execute();
3063 
3064  CBioseq_Handle new_bsh;
3065  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
3066  if (b_iter) new_bsh = *b_iter;
3067 
3068  BOOST_REQUIRE(new_bsh);
3069  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1180);
3070  BOOST_CHECK_EQUAL(new_bsh.GetInst_Mol() == CSeq_inst::eMol_rna, true);
3071 
3072  string newseq;
3074  new_svec.GetSeqData(0, new_bsh.GetBioseqLength(), newseq);
3075  BOOST_CHECK_EQUAL(newseq, origseq + extendseq);
3076 
3077  // check whether features are properly placed on the updated sequence
3078  short count = 0;
3080  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
3081  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
3085  BOOST_CHECK_EQUAL(type_ok, true);
3086 
3087  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
3088  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
3089  bool feat_ok = false;
3090 
3091  switch (type) {
3093  feat_ok = (start == 0 && stop == 983);
3094  break;
3096  feat_ok = (start == 199 && stop == 599);
3097  break;
3099  feat_ok = (start == 249 && stop == 562);
3100  break;
3102  feat_ok = (start == 0 && stop == 69);
3103  break;
3104  default:
3105  break;
3106  }
3107  BOOST_CHECK_EQUAL(feat_ok, true);
3108  count++;
3109  }
3110  BOOST_CHECK_EQUAL(count, 4);
3111 }
3112 
3113 BOOST_AUTO_TEST_CASE(Test_Extend3Sequence_IgnoreAlign_ImportAllFeatures)
3114 {
3115  CSeq_entry_Handle old_seh, upd_seh;
3117 
3118  CUpdateSeq_Input sequpd_input;
3119  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3120 
3121  bool ignore_alignment(true);
3122  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateExtend3, ignore_alignment);
3124  params.m_ImportFeatures = true;
3126 
3127  CSequenceUpdater updater(sequpd_input, params);
3128  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3129  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3130 
3131  BOOST_REQUIRE(cmd);
3132  cmd->Execute();
3133 
3134  CBioseq_Handle new_bsh;
3135  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
3136  if (b_iter) new_bsh = *b_iter;
3137 
3138  BOOST_REQUIRE(new_bsh);
3139  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1180);
3140 
3141 
3142 
3143  short count = 0;
3145  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
3146  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
3151  BOOST_CHECK_EQUAL(type_ok, true);
3152 
3153  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
3154  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
3155  bool feat_ok = false;
3156 
3157  switch (type) {
3159  feat_ok = (start == 0 && stop == 983) || (start == 989 && stop == 1074);
3160  break;
3162  feat_ok = (start == 199 && stop == 599);
3163  break;
3165  feat_ok = (start == 1104 && stop == 1124);
3166  break;
3168  feat_ok = (start == 249 && stop == 562) || (start == 1029 && stop == 1104);
3169  break;
3171  feat_ok = (start == 0 && stop == 69) || (start == 0 && stop == 24);
3172  break;
3173  default:
3174  break;
3175  }
3176  BOOST_CHECK_EQUAL(feat_ok, true);
3177  count++;
3178  }
3179  BOOST_CHECK_EQUAL(count, 8);
3180 }
3181 
3182 BOOST_AUTO_TEST_CASE(Test_Extend3Sequence_DoNotIgnoreAlign)
3183 {
3184  CSeq_entry_Handle old_seh, upd_seh;
3186 
3187  CUpdateSeq_Input sequpd_input;
3188  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3189 
3190  bool ignore_alignment(false);
3191  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateExtend3, ignore_alignment);
3192  CSequenceUpdater updater(sequpd_input, params);
3193  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3194  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3195 
3196  BOOST_REQUIRE(cmd);
3197  cmd->Execute();
3198 
3199  CBioseq_Handle new_bsh;
3200  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
3201  if (b_iter) new_bsh = *b_iter;
3202 
3203  BOOST_REQUIRE(new_bsh);
3204  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1095);
3205  BOOST_CHECK_EQUAL(new_bsh.GetInst_Mol() == CSeq_inst::eMol_rna, true);
3206 
3207  string newseq;
3209  new_svec.GetSeqData(0, new_bsh.GetBioseqLength(), newseq);
3210 
3211  string extend3seq("TGCAGTCGAGCGGCAGCACGGGGAGCTTGCTCCCTGGTGGCGAGCGGCGGACGGGTGAGTAATGTAGGAA\
3212 TCTGCCCGGTAGTGGGGGATAACGTGGGGAAACCCACGCTAATACCGCATACGTCCTACGGGAGAAAGCG\
3213 GAGGATCTTCGGACTTCGCGCTATCGGATGAGCCTATGTCGGATTAGCTAGTTGGTAAGGTAACGGCTTA\
3214 CCAAGGCGACGATCCGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCCCAGACT\
3215 CCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTTGATCCAGCCATGCCGCGTGTGT\
3216 GAAGAAGGCTTTCGGGTTGTAAAGCACTTTCAGCGAGGAAGAAAGCCTGGTGGTTAAAGCACCGGCTAAC\
3217 TCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCGAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCG\
3218 TAGGTGGCTTGGCACGCCGGTTGTGAAAGCCCCGGGCTCAACCTGGGAACGGCATCCGGAACGGCCAGGC\
3219 TAGAGTGCAGGAGAGGAAGGTAGAATTCCCGGTGTAGCGGTGAAATGCGTAGAGATCGGGAGGAATACCA\
3220 GTGGCGAAGGCGGCCTTCTGGCCTGACACTGACACTGAGGTGCGAAAGCGTGGGTAGCAAACAGGATTAG\
3221 ATACCCTGGTAGTCCACGCCGTAAACGATGTCGACTAGCCGTTGGGACCTTTAAGGACTTAGTGGCGCAG\
3222 TTAACGCGATAAGTCGACCGCCTGGGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGC\
3223 CCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTACCCTTGACATCCT\
3224 GCGAATTTGGTAGAGATACCTTAGTGCCTTCGGGAGCGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCT\
3225 CGCGCGGGAGAAAGGGGTTTTTTTTTTTATATTATACCCCACCCCTCTCTCCCGGGGGGAGATTAGCCAC\
3226 AGGGGTTTTTTTTTTTATATTATACCCCCCGGGGGGAGATTAGCC");
3227  BOOST_CHECK_EQUAL(extend3seq, newseq);
3228 
3229  // add a feature that is around the alignment
3230  short count = 0;
3232  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
3233  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
3237  BOOST_CHECK_EQUAL(type_ok, true);
3238 
3239  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
3240  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
3241  bool feat_ok = false;
3242 
3243  switch (type) {
3245  feat_ok = (start == 0 && stop == 983);
3246  break;
3248  feat_ok = (start == 199 && stop == 599);
3249  break;
3251  feat_ok = (start == 249 && stop == 562);
3252  break;
3254  feat_ok = (start == 0 && stop == 69);
3255  break;
3256  default:
3257  break;
3258  }
3259  BOOST_CHECK_EQUAL(feat_ok, true);
3260  count++;
3261  }
3262  BOOST_CHECK_EQUAL(count, 4);
3263  //NcbiCout << "Passed Test_Extend3Sequence_DoNotIgnoreAlign" << NcbiEndl;
3264 }
3265 
3266 BOOST_AUTO_TEST_CASE(Test_Extend3Sequence_DoNotIgnoreAlign_ImportAllFeatures_Var1)
3267 {
3268  CSeq_entry_Handle old_seh, upd_seh;
3270 
3271  CUpdateSeq_Input sequpd_input;
3272  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3273 
3274  bool ignore_alignment(false);
3275  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateExtend3, ignore_alignment);
3277  params.m_ImportFeatures = true;
3279 
3280  CSequenceUpdater updater(sequpd_input, params);
3281  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3282  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3283 
3284  BOOST_REQUIRE(cmd);
3285  cmd->Execute();
3286 
3287  CBioseq_Handle new_bsh;
3288  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
3289  if (b_iter) new_bsh = *b_iter;
3290 
3291  BOOST_REQUIRE(new_bsh);
3292  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1095);
3293 
3294  short count = 0;
3296  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
3297  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
3302  BOOST_CHECK_EQUAL(type_ok, true);
3303 
3304  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
3305  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
3306  bool feat_ok = false;
3307 
3308  switch (type) {
3310  feat_ok = (start == 0 && stop == 983) || (start == 904 && stop == 989);
3311  break;
3313  feat_ok = (start == 199 && stop == 599);
3314  break;
3316  feat_ok = (start == 1019 && stop == 1039);
3317  break;
3319  feat_ok = (start == 249 && stop == 562) || (start == 944 && stop == 1019);
3320  break;
3322  feat_ok = (start == 0 && stop == 69) || (start == 0 && stop == 24);
3323  break;
3324  default:
3325  break;
3326  }
3327  BOOST_CHECK_EQUAL(feat_ok, true);
3328  count++;
3329  }
3330  BOOST_CHECK_EQUAL(count, 8);
3331 }
3332 
3333 BOOST_AUTO_TEST_CASE(Test_Extend3Sequence_DonotIgnoreAlign_ImportAllFeatures_Var2)
3334 {
3335  string old_fname("test_data/seq_for_3extend.asn");
3336  string upd_fname("test_data/update_3extend.asn");
3337  CSeq_entry_Handle old_seh, upd_seh;
3338  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
3339 
3340  CUpdateSeq_Input sequpd_input;
3341  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3342 
3343  bool ignore_alignment(false);
3344  SUpdateSeqParams params(SUpdateSeqParams::eSeqUpdateExtend3, ignore_alignment);
3346  params.m_ImportFeatures = true;
3348 
3349  CSequenceUpdater updater(sequpd_input, params);
3350  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3351  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3352 
3353  BOOST_REQUIRE(cmd);
3354  cmd->Execute();
3355 
3356  CBioseq_Handle new_bsh;
3357  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
3358  if (b_iter) new_bsh = *b_iter;
3359 
3360  BOOST_REQUIRE(new_bsh);
3361  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1095);
3362  BOOST_CHECK_EQUAL(new_bsh.GetInst_Mol() == CSeq_inst::eMol_rna, true);
3363 
3364  short count = 0;
3366  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
3367  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
3372  BOOST_CHECK_EQUAL(type_ok, true);
3373 
3374  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
3375  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
3376  bool feat_ok = false;
3377 
3378  switch (type) {
3380  feat_ok = (start == 0 && stop == 983) || (start == 904 && stop == 944);
3381  break;
3383  feat_ok = (start == 979 && stop == 2)
3384  || (start == 2 && stop == 19)
3385  || (start == 849 && stop == 909);
3386  break;
3388  feat_ok = (start == 959 && stop == 999);
3389  break;
3391  feat_ok = (start == 899 && stop == 953) || (start == 1019 && stop == 1055);
3392  break;
3394  feat_ok = (start == 0 && stop == 11) || (start == 0 && stop == 16);
3395  break;
3396  default:
3397  break;
3398  }
3399  BOOST_CHECK_EQUAL(feat_ok, true);
3400  count++;
3401  }
3402  BOOST_CHECK_EQUAL(count, 10);
3403 }
3404 
3405 BOOST_AUTO_TEST_CASE(Test_PatchSequence_UpdateProteins01)
3406 {
3407  string old_fname("test_data/retranslateCDS.asn");
3408  string upd_fname("test_data/patch_retranslateCDS.asn");
3409  CSeq_entry_Handle old_seh, upd_seh;
3410  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
3411 
3412  CUpdateSeq_Input sequpd_input;
3413  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3414  // do not remove old features and do not import new features
3415  // retranslate the coding regions and update the proteins
3418 
3419  CSequenceUpdater updater(sequpd_input, params);
3420  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3421  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3422 
3423  BOOST_REQUIRE(cmd);
3424  cmd->Execute();
3425 
3426  CBioseq_Handle new_bsh;
3427  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
3428  if (b_iter) new_bsh = *b_iter;
3429 
3430  BOOST_REQUIRE(new_bsh);
3431  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 3728);
3432 
3433  short count = 0;
3435  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
3437  BOOST_CHECK_EQUAL(type_ok, true);
3438 
3439  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
3440  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
3441  bool feat_ok = false;
3442 
3443  switch (type) {
3445  {
3446  vector<TSeqRange> sublocs;
3447  for (CSeq_loc_CI loc_iter(feat->GetLocation()); loc_iter; ++loc_iter) {
3448  sublocs.push_back(loc_iter.GetRange());
3449  }
3450 
3451  vector<TSeqRange> explocs;
3452  explocs.push_back(CRange<TSeqPos>(64, 73));
3453  explocs.push_back(CRange<TSeqPos>(529, 668));
3454  explocs.push_back(CRange<TSeqPos>(805, 983));
3455  explocs.push_back(CRange<TSeqPos>(1699, 1854));
3456  explocs.push_back(CRange<TSeqPos>(2898, 3044));
3457  explocs.push_back(CRange<TSeqPos>(3665, 3727)); // in Sequin, the end was adjusted to 3725
3458  feat_ok = (sublocs == explocs);
3459 
3460  string new_prot;
3461  CBioseq_Handle prot_bsh = old_seh.GetScope().GetBioseqHandle(feat->GetProduct());
3463  new_svec.GetSeqData(0, prot_bsh.GetBioseqLength(), new_prot);
3464 
3465  string protein("MGQVFLLLPVLLVSCFLSQGAAMENQRLFNIAVNRVQHLHLMAQKMFNDFEVTLLPDERRQLNKIFLLDF\
3466 CNSDSIVSPPHIYIVLDRHVFKEREFKDRXSTSTRLRRVQSXSCSTSLTVXLNPGSTLARPXPSPTASXS\
3467 ETPTRSLRSSATSKWASTCSSRGARMAYXAWMTMTLSSCPPTGTTTRTWGATETSGGTTSCWPASRRTCT\
3468 RSRPTXPSPSAGSHWRPTALC");
3469  BOOST_CHECK_EQUAL(new_prot, protein);
3470  break;
3471  }
3473  feat_ok = (start == 0 && stop == 230);
3474  break;
3475  default:
3476  break;
3477  }
3478  BOOST_CHECK_EQUAL(feat_ok, true);
3479  count++;
3480  }
3481  BOOST_CHECK_EQUAL(count, 2);
3482  //NcbiCout << "Passed Test_PatchSequence_UpdateProteins01" << NcbiEndl;
3483 }
3484 
3485 BOOST_AUTO_TEST_CASE(Test_ReplaceSequence_NoAlign)
3486 {
3487  string old_fname("test_data/OldSeq_CDS_mRNA.asn");
3488  string upd_fname("test_data/UpdSeq_NoAlign.asn");
3489  CSeq_entry_Handle old_seh, upd_seh;
3490  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
3491 
3492  CUpdateSeq_Input sequpd_input;
3493  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3495 
3496  // replace old sequence, do not remove old features
3497  CSequenceUpdater updater(sequpd_input, params);
3498  BOOST_CHECK(updater.IsOldSequenceOK());
3499  BOOST_CHECK(updater.IsUpdateSequenceRaw());
3500  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3501  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3502 
3503  BOOST_REQUIRE(cmd);
3504  cmd->Execute();
3505 
3506  CBioseq_Handle new_bsh;
3507  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
3508  if (b_iter) new_bsh = *b_iter;
3509 
3510  BOOST_REQUIRE(new_bsh);
3511  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 1055);
3512 
3513  short count = 0;
3515  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
3516  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
3520  BOOST_CHECK_EQUAL(type_ok, true);
3521 
3522  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
3523  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
3524  bool feat_ok = false;
3525 
3526  switch (type) {
3528  feat_ok = (start == 0 && stop == 983);
3529  break;
3531  feat_ok = (start == 199 && stop == 599);
3532  break;
3534  feat_ok = (start == 249 && stop == 562);
3535  break;
3537  feat_ok = (start == 0 && stop == 69);
3538  break;
3539  default:
3540  break;
3541  }
3542  BOOST_CHECK_EQUAL(feat_ok, true);
3543  count++;
3544  }
3545  BOOST_CHECK_EQUAL(count, 4);
3546 }
3547 
3548 BOOST_AUTO_TEST_CASE(Test_Extend5Sequence_NoAlign)
3549 {
3550  string old_fname("test_data/OldSeq_CDS_mRNA.asn");
3551  string upd_fname("test_data/UpdSeq_NoAlign.asn");
3552  CSeq_entry_Handle old_seh, upd_seh;
3553  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
3554 
3555  CUpdateSeq_Input sequpd_input;
3556  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3558 
3559  // replace old sequence, do not remove old features
3560  CSequenceUpdater updater(sequpd_input, params);
3561  BOOST_CHECK(updater.IsOldSequenceOK());
3562  BOOST_CHECK(updater.IsUpdateSequenceRaw());
3563  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3564  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3565 
3566  BOOST_REQUIRE(cmd);
3567  cmd->Execute();
3568 
3569  CBioseq_Handle new_bsh;
3570  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
3571  if (b_iter) new_bsh = *b_iter;
3572 
3573  BOOST_REQUIRE(new_bsh);
3574  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 2040);
3575 
3576  short count = 0;
3578  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
3579  bool type_ok = (type == CSeqFeatData::eSubtype_prot)
3583  BOOST_CHECK_EQUAL(type_ok, true);
3584 
3585  const TSeqPos start = feat->GetLocation().GetStart(eExtreme_Biological);
3586  const TSeqPos stop = feat->GetLocation().GetStop(eExtreme_Biological);
3587  bool feat_ok = false;
3588 
3589  switch (type) {
3591  feat_ok = (start == 1055 && stop == 2038);
3592  break;
3594  feat_ok = (start == 1254 && stop == 1654);
3595  break;
3597  feat_ok = (start == 1304 && stop == 1617);
3598  break;
3600  feat_ok = (start == 0 && stop == 69);
3601  break;
3602  default:
3603  break;
3604  }
3605  BOOST_CHECK_EQUAL(feat_ok, true);
3606  count++;
3607  }
3608  BOOST_CHECK_EQUAL(count, 4);
3609 }
3610 
3611 BOOST_AUTO_TEST_CASE(Test_Extend3Sequence_NoAlign) // GB-5422
3612 {
3613  string old_fname("test_data/GB_5422.asn");
3614  string upd_fname("test_data/UpdSeq_NoAlign.asn");
3615  CSeq_entry_Handle old_seh, upd_seh;
3616  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
3617 
3618  CUpdateSeq_Input sequpd_input;
3619  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3621 
3622  // replace old sequence, do not remove old features
3623  CSequenceUpdater updater(sequpd_input, params);
3624  BOOST_CHECK(updater.IsOldSequenceOK());
3625  BOOST_CHECK(updater.IsUpdateSequenceRaw());
3626  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3627  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3628 
3629  BOOST_REQUIRE(cmd);
3630  cmd->Execute();
3631 
3632  CBioseq_Handle new_bsh;
3633  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
3634  if (b_iter) new_bsh = *b_iter;
3635 
3636  BOOST_REQUIRE(new_bsh);
3637  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), 2141);
3638 
3639  CFeat_CI feat_it(new_bsh);
3640  BOOST_CHECK(feat_it);
3642  BOOST_CHECK(rna);
3643  BOOST_CHECK(rna->GetData().GetSubtype() == CSeqFeatData::eSubtype_rRNA);
3644 
3645  BOOST_CHECK_EQUAL(rna->GetLocation().GetStart(eExtreme_Biological), (TSeqPos)0);
3646  BOOST_CHECK_EQUAL(rna->GetLocation().GetStop(eExtreme_Biological), (TSeqPos)1085);
3647 }
3648 
3649 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportFeatures_MixedAlign1)
3650 {
3651  string old_fname("test_data/negstrand2.asn");
3652  string upd_fname("test_data/negstrand2_upd.asn");
3653  CSeq_entry_Handle old_seh, upd_seh;
3654  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
3655 
3656  CUpdateSeq_Input sequpd_input;
3657  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3659  params.m_ImportFeatures = true;
3661 
3662  CSequenceUpdater updater(sequpd_input, params);
3663  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3664  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3665 
3666  BOOST_REQUIRE(cmd);
3667  cmd->Execute();
3668 
3670  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
3671  bool type_ok = (type == CSeqFeatData::eSubtype_gene)
3674  BOOST_CHECK_EQUAL(type_ok, true);
3675 
3676  switch (type) {
3678  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
3679  "Seq-loc ::= int {\n"
3680  " from 125,\n"
3681  " to 709,\n"
3682  " strand minus,\n"
3683  " id local str \"AE10_1\",\n"
3684  " fuzz-to lim gt\n"
3685  "}\n");
3686  break;
3688  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
3689  "Seq-loc ::= int {\n"
3690  " from 18,\n"
3691  " to 124,\n"
3692  " strand minus,\n"
3693  " id local str \"AE10_1\"\n"
3694  "}\n");
3695  break;
3697  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
3698  "Seq-loc ::= int {\n"
3699  " from 125,\n"
3700  " to 174,\n"
3701  " strand minus,\n"
3702  " id local str \"AE10_1\",\n"
3703  " fuzz-to lim gt\n"
3704  "}\n");
3705  break;
3706  default:
3707  break;
3708  }
3709  }
3710 }
3711 
3712 BOOST_AUTO_TEST_CASE(Test_NoChange_ImportFeatures_MixedAlign2)
3713 {
3714  string old_fname("test_data/negstrand3.asn");
3715  string upd_fname("test_data/negstrand3_upd.asn");
3716  CSeq_entry_Handle old_seh, upd_seh;
3717  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
3718 
3719  CUpdateSeq_Input sequpd_input;
3720  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3722  params.m_ImportFeatures = true;
3724 
3725  CSequenceUpdater updater(sequpd_input, params);
3726  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3727  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3728 
3729  BOOST_REQUIRE(cmd);
3730  cmd->Execute();
3731 
3733  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
3734  bool type_ok = (type == CSeqFeatData::eSubtype_mRNA)
3737  BOOST_CHECK_EQUAL(type_ok, true);
3738 
3739  switch (type) {
3742  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
3743  "Seq-loc ::= mix {\n"
3744  " int {\n"
3745  " from 0,\n"
3746  " to 22,\n"
3747  " strand plus,\n"
3748  " id local str \"Nay14\",\n"
3749  " fuzz-from lim lt\n"
3750  " },\n"
3751  " int {\n"
3752  " from 124,\n"
3753  " to 186,\n"
3754  " strand plus,\n"
3755  " id local str \"Nay14\"\n"
3756  " },\n"
3757  " int {\n"
3758  " from 421,\n"
3759  " to 559,\n"
3760  " strand plus,\n"
3761  " id local str \"Nay14\"\n"
3762  " },\n"
3763  " int {\n"
3764  " from 615,\n"
3765  " to 650,\n"
3766  " strand plus,\n"
3767  " id local str \"Nay14\",\n"
3768  " fuzz-to lim gt\n"
3769  " }\n"
3770  "}\n");
3771  break;
3772  default:
3773  break;
3774  }
3775  }
3776 }
3777 
3778 // Testing sequence update using alignment with mixed strands
3779 BOOST_AUTO_TEST_CASE(Test_Replace_MixedAlign)
3780 {
3781  string old_fname("test_data/negstrand.asn");
3782  string upd_fname("test_data/negstrand_upd.asn");
3783  CSeq_entry_Handle old_seh, upd_seh;
3784  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
3785 
3786  CUpdateSeq_Input sequpd_input;
3787  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3789 
3790  CSequenceUpdater updater(sequpd_input, params);
3791  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3792  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3793 
3794  BOOST_REQUIRE(cmd);
3795  cmd->Execute();
3796 
3797  CBioseq_Handle new_bsh;
3798  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
3799  if (b_iter) new_bsh = *b_iter;
3800 
3801  BOOST_REQUIRE(new_bsh);
3802  const CBioseq_Handle& upd_bsh = sequpd_input.GetUpdateBioseq();
3803  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), upd_bsh.GetBioseqLength());
3804 
3805  // new sequence should be equal to the update sequence
3806  string newseq;
3808  new_svec.GetSeqData(0, new_bsh.GetBioseqLength(), newseq);
3809  string updseq;
3811  upd_svec.GetSeqData(0, upd_bsh.GetBioseqLength(), updseq);
3812  BOOST_CHECK_EQUAL(updseq, newseq);
3813 
3814  // check whether the existing features on the old sequence have been properly adjusted
3816  CSeqFeatData::ESubtype type = feat->GetData().GetSubtype();
3817  bool type_ok = (type == CSeqFeatData::eSubtype_gene)
3819  BOOST_CHECK_EQUAL(type_ok, true);
3820 
3821  switch (type) {
3823  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
3824  "Seq-loc ::= int {\n"
3825  " from 47,\n"
3826  " to 53,\n"
3827  " strand minus,\n"
3828  " id local str \"AE10_1_1\",\n"
3829  " fuzz-to lim gt\n"
3830  "}\n");
3831  break;
3833  if (NStr::EqualNocase(feat->GetComment(), "before the alignment (1..>11)")) {
3834  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
3835  "Seq-loc ::= int {\n"
3836  " from 55,\n"
3837  " to 55,\n"
3838  " strand minus,\n"
3839  " id local str \"AE10_1_1\",\n"
3840  " fuzz-from lim lt\n"
3841  "}\n");
3842  }
3843  else if (NStr::EqualNocase(feat->GetComment(), "partially inside the alignment (<5..23)")) {
3844  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
3845  "Seq-loc ::= int {\n"
3846  " from 44,\n"
3847  " to 55,\n"
3848  " strand minus,\n"
3849  " id local str \"AE10_1_1\",\n"
3850  " fuzz-to lim gt\n"
3851  "}\n");
3852  }
3853  else if (NStr::EqualNocase(feat->GetComment(), "negative strand (<6..>50)")) {
3854  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
3855  "Seq-loc ::= int {\n"
3856  " from 17,\n"
3857  " to 55,\n"
3858  " strand plus,\n"
3859  " id local str \"AE10_1_1\",\n"
3860  " fuzz-from lim lt,\n"
3861  " fuzz-to lim gt\n"
3862  "}\n");
3863  }
3864  else if (NStr::EqualNocase(feat->GetComment(), "negative strand (1..>7)")) {
3865  BOOST_CHECK_EQUAL(MakeAsn(feat->GetLocation()),
3866  "Seq-loc ::= int {\n"
3867  " from 55,\n"
3868  " to 55,\n"
3869  " strand plus,\n"
3870  " id local str \"AE10_1_1\",\n"
3871  " fuzz-from lim lt\n"
3872  "}\n");
3873  }
3874  break;
3875  default:
3876  break;
3877  }
3878  }
3879 }
3880 
3881 BOOST_AUTO_TEST_CASE(Test_Replace_DeltaSequence)
3882 {
3883  string old_fname("test_data/deltaseq.asn");
3884  string upd_fname("test_data/update_rawseq.asn");
3885  CSeq_entry_Handle old_seh, upd_seh;
3886  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
3887 
3888  CUpdateSeq_Input sequpd_input;
3889  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3891 
3892  CSequenceUpdater updater(sequpd_input, params);
3893  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3894  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3895 
3896  BOOST_REQUIRE(cmd);
3897  cmd->Execute();
3898 
3899  CBioseq_Handle new_bsh;
3900  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
3901  if (b_iter) new_bsh = *b_iter;
3902 
3903  BOOST_REQUIRE(new_bsh);
3904  const CBioseq_Handle& upd_bsh = sequpd_input.GetUpdateBioseq();
3905  BOOST_CHECK_EQUAL(new_bsh.GetBioseqLength(), upd_bsh.GetBioseqLength());
3906 
3907  // new sequence should be equal to the update sequence
3908  string newseq;
3910  new_svec.GetSeqData(0, new_bsh.GetBioseqLength(), newseq);
3911  string updseq;
3913  upd_svec.GetSeqData(0, upd_bsh.GetBioseqLength(), updseq);
3914  BOOST_CHECK_EQUAL(updseq, newseq);
3915 
3916  BOOST_CHECK(new_bsh.GetInst_Repr() == CSeq_inst::eRepr_raw);
3917  BOOST_CHECK(!new_bsh.GetInst().IsSetExt());
3918  BOOST_CHECK(new_bsh.GetInst_Mol() == CSeq_inst::eMol_rna);
3919  BOOST_CHECK(new_bsh.GetInst_Topology() == CSeq_inst::eTopology_tandem);
3920  BOOST_CHECK(new_bsh.GetInst_Strand() == CSeq_inst::eStrand_ds);
3921  BOOST_CHECK(new_bsh.GetInst().GetSeq_data().IsNcbi4na());
3922  //NcbiCout << "Passed Test_Replace_DeltaSequence" << NcbiEndl;
3923 }
3924 
3925 BOOST_AUTO_TEST_CASE(Test_Replace_DeltaSeqFarPointers)
3926 {
3927  string old_fname("test_data/delta_farpointers.asn");
3928 
3929  CRef<CSeq_entry> old_entry = ReadEntryFromFile(old_fname);
3930  CRef<CScope> scope = BuildScope();
3931  CSeq_entry_Handle old_seh = scope->AddTopLevelSeqEntry(*old_entry);
3932 
3933  CBioseq_Handle old_bsh;
3934  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
3935  if (b_iter) old_bsh = *b_iter;
3936  CSeq_id id("NG_001019.5");
3937 
3941  CBioseq_Handle upd_bsh = scope->GetBioseqHandle(id);
3942  BOOST_REQUIRE(upd_bsh);
3943  CSeq_entry_Handle upd_seh = upd_bsh.GetTopLevelEntry();
3944 
3945 
3946  CUpdateSeq_Input sequpd_input;
3947  SetupForUpdate(old_seh, upd_seh, sequpd_input);
3950  params.m_ImportFeatures = true;
3952  CSequenceUpdater updater(sequpd_input, params);
3953  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
3954  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
3955 
3956  BOOST_REQUIRE(cmd);
3957  cmd->Execute();
3958 
3959  CBioseq_Handle new_bsh;
3960  CBioseq_CI bseq_it(old_seh, CSeq_inst::eMol_na);
3961  if (bseq_it) new_bsh = *bseq_it;
3962 
3963  short count_genes = 0;
3964  for (CFeat_CI feat_it(new_bsh); feat_it; ++feat_it) {
3965  count_genes++;
3966  }
3967 
3968  BOOST_CHECK_EQUAL(count_genes, 185);
3969  //NcbiCout << "Passed Test_Replace_DeltaSeqFarPointers" << NcbiEndl;
3970 }
3971 
3972 BOOST_AUTO_TEST_CASE(Test_ReplaceProteinSeqs1)
3973 {
3974  string old_fname("test_data/protein_seqs1.asn");
3975  string upd_fname("test_data/protein_seqs1_update.fa");
3976 
3977  CRef<CSeq_entry> old_entry = ReadEntryFromFile(old_fname);
3978  CRef<CSeq_entry> upd_entry = ReadFastaSeqsFromFile(upd_fname);
3979  CRef<CScope> scope = BuildScope();
3980 
3981  CSeq_entry_Handle old_seh = scope->AddTopLevelSeqEntry(*old_entry);
3982  BOOST_REQUIRE(old_seh);
3983 
3984 
3985  CUpdateMultipleSeq_Input multiupdseq_in;
3986  bool ok = multiupdseq_in.SetOldEntryAndScope(old_seh);
3987  ok = ok && multiupdseq_in.SetUpdateEntry(upd_entry);
3988  ok = ok && multiupdseq_in.PrepareMultipleSequenceInputsForUnitTest();
3989  BOOST_REQUIRE(ok);
3990 
3991  const CUpdateMultipleSeq_Input::TIDToUpdInputMap& updates = multiupdseq_in.GetNonIdenticalUpdates();
3992  const CUpdateMultipleSeq_Input::TIDToUpdInputMap& identicals = multiupdseq_in.GetIdenticalUpdates();
3993  const CUpdateMultipleSeq_Input::TSeqIDHVector& noUpdates = multiupdseq_in.GetSeqsWithoutUpdates();
3994 
3995  BOOST_CHECK(updates.size() == 4);
3996  BOOST_CHECK(identicals.empty());
3997  BOOST_CHECK(noUpdates.size() == 1);
3998 
3999  CRef<CCmdComposite> update_cmd(new CCmdComposite("Update multiple sequences"));
4001  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
4002 
4003  for (auto& it : updates) {
4004  CSequenceUpdater updater(it.second.GetObject(), params);
4005  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
4006  update_cmd->AddCommand(*cmd);
4007  }
4008 
4009  update_cmd->Execute();
4010 
4011  for (CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_aa); b_iter; ++b_iter) {
4012  string prot_seq;
4013  CSeqVector new_svec = b_iter->GetSeqVector(CBioseq_Handle::eCoding_Iupac, eNa_strand_plus);
4014  new_svec.GetSeqData(0, b_iter->GetBioseqLength(), prot_seq);
4015  BOOST_CHECK(prot_seq.front() == 'M');
4016  }
4017 }
4018 
4019 BOOST_AUTO_TEST_CASE(Test_ReplaceProteinSeqs2)
4020 {
4021  string old_fname("test_data/mat_peptide.asn");
4022  string upd_fname("test_data/mat_pepshort_upd.fa");
4023 
4024  CRef<CSeq_entry> old_entry = ReadEntryFromFile(old_fname);
4025  CRef<CSeq_entry> upd_entry = ReadFastaSeqsFromFile(upd_fname);
4026  CRef<CScope> scope = BuildScope();
4027 
4028  CSeq_entry_Handle old_seh = scope->AddTopLevelSeqEntry(*old_entry);
4029  BOOST_REQUIRE(old_seh);
4030 
4031 
4032  CUpdateMultipleSeq_Input multiupdseq_in;
4033  bool ok = multiupdseq_in.SetOldEntryAndScope(old_seh);
4034  ok = ok && multiupdseq_in.SetUpdateEntry(upd_entry);
4035  ok = ok && multiupdseq_in.PrepareMultipleSequenceInputsForUnitTest();
4036  BOOST_REQUIRE(ok);
4037 
4038  const CUpdateMultipleSeq_Input::TIDToUpdInputMap& updates = multiupdseq_in.GetNonIdenticalUpdates();
4039  const CUpdateMultipleSeq_Input::TIDToUpdInputMap& identicals = multiupdseq_in.GetIdenticalUpdates();
4040  const CUpdateMultipleSeq_Input::TSeqIDHVector& noUpdates = multiupdseq_in.GetSeqsWithoutUpdates();
4041 
4042  BOOST_CHECK(updates.size() == 1);
4043  BOOST_CHECK(identicals.empty());
4044  BOOST_CHECK(noUpdates.empty());
4045 
4046  CRef<CCmdComposite> update_cmd(new CCmdComposite("Update multiple sequences"));
4048  bool create_general_only = objects::edit::IsGeneralIdProtPresent(old_seh.GetTopLevelEntry());
4049 
4050  for (auto& it : updates) {
4051  CSequenceUpdater updater(it.second.GetObject(), params);
4052  CRef<CCmdComposite> cmd = updater.Update(create_general_only);
4053  update_cmd->AddCommand(*cmd);
4054  }
4055 
4056  update_cmd->Execute();
4057 
4058  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_aa);
4059  BOOST_CHECK(b_iter);
4060  string prot_seq;
4062  new_svec.GetSeqData(0, b_iter->GetBioseqLength(), prot_seq);
4063  BOOST_CHECK(prot_seq.front() == 'M');
4064 
4065  BOOST_CHECK_EQUAL(b_iter->GetBioseqLength(), 562);
4066  for (CFeat_CI it(*b_iter, SAnnotSelector(CSeqFeatData::e_Prot)); it; ++it) {
4067  const CProt_ref& prot_ref = it->GetOriginalFeature().GetData().GetProt();
4068  if (prot_ref.IsSetProcessed()) {
4070  BOOST_CHECK_EQUAL(MakeAsn(it->GetLocation()),
4071  "Seq-loc ::= int {\n"
4072  " from 0,\n"
4073  " to 15,\n"
4074  " id local str \"Seq4_prot_6\"\n"
4075  "}\n");
4076  }
4077  else {
4078  if (prot_ref.GetName().front() == "HA1") {
4079  BOOST_CHECK_EQUAL(MakeAsn(it->GetLocation()),
4080  "Seq-loc ::= int {\n"
4081  " from 16,\n"
4082  " to 343,\n"
4083  " id local str \"Seq4_prot_6\"\n"
4084  "}\n");
4085  }
4086  else {
4087  BOOST_CHECK_EQUAL(MakeAsn(it->GetLocation()),
4088  "Seq-loc ::= int {\n"
4089  " from 344,\n"
4090  " to 561,\n"
4091  " id local str \"Seq4_prot_6\"\n"
4092  "}\n");
4093  }
4094  }
4095  }
4096  else {
4097  BOOST_CHECK_EQUAL(MakeAsn(it->GetLocation()),
4098  "Seq-loc ::= int {\n"
4099  " from 0,\n"
4100  " to 561,\n"
4101  " id local str \"Seq4_prot_6\"\n"
4102  "}\n");
4103  }
4104  }
4105 
4106 }
4107 
4108 BOOST_AUTO_TEST_CASE(Test_s_ExtendOneEndOfSequence)
4109 {
4111  entry->SetSeq().SetInst().SetMol(CSeq_inst::eMol_rna);
4112 
4114  CSeq_entry_Handle seh = scope.AddTopLevelSeqEntry(*entry);
4115 
4116  CBioseq_CI b_iter(seh, CSeq_inst::eMol_na);
4117  BOOST_REQUIRE(b_iter);
4118 
4119  {{
4120  string extend("aaallaagggggzzzztttttccccec");
4123 
4124  BOOST_CHECK(inst_ext5->GetLength() == inst_ext3->GetLength());
4125  BOOST_CHECK(inst_ext5->GetLength() == 80);
4126  BOOST_CHECK(inst_ext5->GetMol() == CSeq_inst::eMol_rna);
4127 
4128  string extended5;
4130  CSeqUtil::e_Ncbi2na, 0, 80, extended5, CSeqUtil::e_Iupacna);
4131  BOOST_CHECK_EQUAL(extended5, string("AAAAAGGGGGTTTTTCCCCCAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAA"));
4132 
4133  string extended3;
4135  CSeqUtil::e_Ncbi2na, 0, 80, extended3, CSeqUtil::e_Iupacna);
4136  BOOST_CHECK_EQUAL(extended3, string("AATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAAAAAGGGGGTTTTTCCCCC"));
4137  }}
4138 
4139  {{
4140 
4141  string extend("PPPPP");
4144 
4145  BOOST_CHECK(!inst_ext5);
4146  BOOST_CHECK(!inst_ext3);
4147  }}
4148 }
4149 
4150 BOOST_AUTO_TEST_CASE(Test_s_OffsetFeature)
4151 {
4152  CRef<CSeq_id> seq_id(new CSeq_id(CSeq_id::e_Local, "good"));
4153  size_t left = 0;
4154  size_t right = 10;
4155  CRef<CSeq_feat> misc_feat = unit_test_util::MakeMiscFeature(seq_id, right, left);
4156 
4157  CRef<CSeq_loc> newloc(new CSeq_loc);
4158  newloc->SetInt().SetFrom(30);
4159  newloc->SetInt().SetTo(59);
4160  newloc->SetInt().SetId().Assign(*seq_id);
4161  misc_feat->SetLocation().Add(*newloc);
4162 
4163  string extend = CSequenceUpdater::s_GetValidExtension("aaalaapgggggzzzztttttccccpc");
4164  BOOST_REQUIRE(extend.size() == 20);
4165  {{
4166  CRef<CSeq_feat> newfeat = CSequenceUpdater::s_OffsetFeature(*misc_feat, static_cast<TSeqPos>(extend.size()));
4167  BOOST_CHECK_EQUAL(MakeAsn(newfeat->GetLocation()),
4168  "Seq-loc ::= packed-int {\n"
4169  " {\n"
4170  " from 20,\n"
4171  " to 30,\n"
4172  " id local str \"good\"\n"
4173  " },\n"
4174  " {\n"
4175  " from 50,\n"
4176  " to 79,\n"
4177  " id local str \"good\"\n"
4178  " }\n"
4179  "}\n");
4180  }}
4181 
4182  misc_feat->SetLocation().SetStrand(eNa_strand_minus);
4183  {{
4184  CRef<CSeq_feat> newfeat = CSequenceUpdater::s_OffsetFeature(*misc_feat, static_cast<TSeqPos>(extend.size()));
4185  BOOST_CHECK_EQUAL(MakeAsn(newfeat->GetLocation()),
4186  "Seq-loc ::= packed-int {\n"
4187  " {\n"
4188  " from 20,\n"
4189  " to 30,\n"
4190  " strand minus,\n"
4191  " id local str \"good\"\n"
4192  " },\n"
4193  " {\n"
4194  " from 50,\n"
4195  " to 79,\n"
4196  " strand minus,\n"
4197  " id local str \"good\"\n"
4198  " }\n"
4199  "}\n");
4200  }}
4201 
4202  CRef<CSeq_feat> trna(new CSeq_feat);
4203  trna->SetData().SetRna().SetType(CRNA_ref::eType_tRNA);
4204  trna->SetLocation().Assign(misc_feat->GetLocation());
4205 
4206  CRNA_ref::C_Ext& ext = trna->SetData().SetRna().SetExt();
4207  ext.SetTRNA().SetAnticodon().SetInt().SetFrom(40);
4208  ext.SetTRNA().SetAnticodon().SetInt().SetTo(42);
4209  ext.SetTRNA().SetAnticodon().SetInt().SetStrand(eNa_strand_minus);
4210  ext.SetTRNA().SetAnticodon().SetInt().SetId().Assign(*seq_id);
4211  {{
4212  CRef<CSeq_feat> newfeat = CSequenceUpdater::s_OffsetFeature(*trna, static_cast<TSeqPos>(extend.size()));
4213  BOOST_CHECK_EQUAL(MakeAsn(newfeat->GetLocation()),
4214  "Seq-loc ::= packed-int {\n"
4215  " {\n"
4216  " from 20,\n"
4217  " to 30,\n"
4218  " strand minus,\n"
4219  " id local str \"good\"\n"
4220  " },\n"
4221  " {\n"
4222  " from 50,\n"
4223  " to 79,\n"
4224  " strand minus,\n"
4225  " id local str \"good\"\n"
4226  " }\n"
4227  "}\n");
4228  const CRNA_ref::C_Ext& newExt = newfeat->GetData().GetRna().GetExt();
4229  BOOST_CHECK_EQUAL(MakeAsn(newExt.GetTRNA().GetAnticodon()),
4230  "Seq-loc ::= int {\n"
4231  " from 60,\n"
4232  " to 62,\n"
4233  " strand minus,\n"
4234  " id local str \"good\"\n"
4235  "}\n");
4236  }}
4237 
4238 }
4239 
4240 BOOST_AUTO_TEST_CASE(Test_FixCollidingIDs)
4241 {
4242  string old_fname("test_data/collideID.asn");
4243  string upd_fname("test_data/collideID_update.asn");
4244  CRef<CScope> scope = BuildScope();
4245 
4246  CRef<CSeq_entry> old_entry = ReadEntryFromFile(old_fname);
4247  CSeq_entry_Handle old_seh = scope->AddTopLevelSeqEntry(*old_entry);
4248  BOOST_REQUIRE(old_seh);
4249 
4250  CRef<CSeq_entry> upd_entry = ReadEntryFromFile(upd_fname);
4251  upd_entry->Parentize();
4252  const CBioseq::TId& old_ids = old_entry->GetSeq().GetId();
4253  CUpdateSeq_Input::s_FixCollidingIDs_Entry(*upd_entry, old_ids);
4254 
4255  CSeq_entry_Handle upd_seh = scope->AddTopLevelSeqEntry(*upd_entry);
4256  BOOST_REQUIRE(upd_seh);
4257  BOOST_REQUIRE(CFeat_CI(upd_seh, CSeqFeatData::eSubtype_cdregion));
4258 }
4259 
4260 BOOST_AUTO_TEST_CASE(Test_Align2IdentSeq)
4261 {
4262  string old_fname("test_data/GB_5422.asn");
4263  CRef<CScope> scope = BuildScope();
4264 
4265  CRef<CSeq_entry> old_entry = ReadEntryFromFile(old_fname);
4266  CSeq_entry_Handle old_seh = scope->AddTopLevelSeqEntry(*old_entry);
4267  BOOST_REQUIRE(old_seh);
4268 
4269  CBioseq_Handle old_bsh;
4270  CBioseq_CI b_iter(old_seh, CSeq_inst::eMol_na);
4271  if (b_iter) old_bsh = *b_iter;
4272 
4273  CRef<CSeq_entry> upd_entry = ReadEntryFromFile(old_fname);
4274  upd_entry->Parentize();
4275  const CBioseq::TId& old_ids = old_entry->GetSeq().GetId();
4276  CUpdateSeq_Input::s_FixCollidingIDs_Entry(*upd_entry, old_ids);
4277 
4278  CSeq_entry_Handle upd_seh = scope->AddTopLevelSeqEntry(*upd_entry);
4279  BOOST_REQUIRE(upd_seh);
4280 
4281  CBioseq_Handle upd_bsh;
4282  CBioseq_CI bupd_iter(upd_seh, CSeq_inst::eMol_na);
4283  if (bupd_iter) upd_bsh = *bupd_iter;
4284 
4285  CRef<CSeq_annot> annot = CUpdateSeq_Input::s_Align2IdenticalSeq(old_bsh, upd_bsh);
4286  BOOST_CHECK_EQUAL(MakeAsn(annot.GetObject()),
4287  "Seq-annot ::= {\n"
4288  " desc {\n"
4289  " name \"Alignment generated between two identical sequences\"\n"
4290  " },\n"
4291  " data align {\n"
4292  " {\n"
4293  " type partial,\n"
4294  " dim 2,\n"
4295  " segs denseg {\n"
4296  " dim 2,\n"
4297  " numseg 1,\n"
4298  " ids {\n"
4299  " local str \"KT250608_update\",\n"
4300  " genbank {\n"
4301  " accession \"KT250608\"\n"
4302  " }\n"
4303  " },\n"
4304  " starts {\n"
4305  " 0,\n"
4306  " 0\n"
4307  " },\n"
4308  " lens {\n"
4309  " 1085\n"
4310  " }\n"
4311  " }\n"
4312  " }\n"
4313  " }\n"
4314  "}\n");
4315 
4316 }
4317 
4318 BOOST_AUTO_TEST_CASE(Test_GoodAlignment_GB7819)
4319 {
4320  string old_fname("test_data/goodalign.asn");
4321  string upd_fname("test_data/update_goodalign.asn");
4322  CSeq_entry_Handle old_seh, upd_seh;
4323  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
4324 
4325  CUpdateSeq_Input sequpd_input;
4326  SetupForUpdate(old_seh, upd_seh, sequpd_input);
4327 
4328  auto align = sequpd_input.GetAlignment();
4329  BOOST_CHECK(align);
4330 
4331  BOOST_CHECK(align->GetAlignLength() == 228);
4332 }
4333 
4334 BOOST_AUTO_TEST_CASE(Test_GoodAlignment_GB8669)
4335 {
4336  string old_fname("test_data/GB_8669.asn");
4337  string upd_fname("test_data/update_GB_8669.asn");
4338  CSeq_entry_Handle old_seh, upd_seh;
4339  ReadFromFile(old_fname, upd_fname, old_seh, upd_seh);
4340 
4341  CUpdateSeq_Input sequpd_input;
4342  SetupForUpdate(old_seh, upd_seh, sequpd_input);
4343 
4344  auto align = sequpd_input.GetAlignment();
4345  BOOST_CHECK(align);
4346 
4347  BOOST_CHECK(align->GetAlignLength() == 53333);
4348  NcbiCout << "Passed ALL sequence update tests" << NcbiEndl;
4349 }
4350 
4351 //////////////////////////////////////////////////////////////////////////////////
4352 const char* sc_MainSequence = "\
4353 Seq-entry ::= set { \
4354  class nuc-prot , \
4355  descr { \
4356  source{ \
4357  genome genomic, \
4358  org { \
4359  taxname \"Salinicola sp. SOil-1\" , \
4360  orgname { \
4361  gcode 1 } } } }, \
4362  seq-set { \
4363  seq { \
4364  id { \
4365  local str \"Untitled_Seq_#1\" , \
4366  genbank { \
4367  accession \"KM986631\" } } , \
4368  descr { \
4369  molinfo { \
4370  biomol rRNA } }, \
4371  inst { \
4372  repr raw , \
4373  mol rna , \
4374  length 985 , \
4375  seq-data iupacna \"\
4376 TGCAGTCGAGCGGCAGCACGGGGAGCTTGCTCCCTGGTGGCGAGCGGCGGACGGGTGAGTAATGTAGGAA\
4377 TCTGCCCGGTAGTGGGGGATAACGTGGGGAAACCCACGCTAATACCGCATACGTCCTACGGGAGAAAGCG\
4378 GAGGATCTTCGGACTTCGCGCTATCGGATGAGCCTATGTCGGATTAGCTAGTTGGTAAGGTAACGGCTTA\
4379 CCAAGGCGACGATCCGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCCCAGACT\
4380 CCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTTGATCCAGCCATGCCGCGTGTGT\
4381 GAAGAAGGCTTTCGGGTTGTAAAGCACTTTCAGCGAGGAAGAAAGCCTGGTGGTTAAAGCACCGGCTAAC\
4382 TCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCGAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCG\
4383 TAGGTGGCTTGGCACGCCGGTTGTGAAAGCCCCGGGCTCAACCTGGGAACGGCATCCGGAACGGCCAGGC\
4384 TAGAGTGCAGGAGAGGAAGGTAGAATTCCCGGTGTAGCGGTGAAATGCGTAGAGATCGGGAGGAATACCA\
4385 GTGGCGAAGGCGGCCTTCTGGCCTGACACTGACACTGAGGTGCGAAAGCGTGGGTAGCAAACAGGATTAG\
4386 ATACCCTGGTAGTCCACGCCGTAAACGATGTCGACTAGCCGTTGGGACCTTTAAGGACTTAGTGGCGCAG\
4387 TTAACGCGATAAGTCGACCGCCTGGGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGC\
4388 CCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTACCCTTGACATCCT\
4389 GCGAATTTGGTAGAGATACCTTAGTGCCTTCGGGAGCGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCT\
4390 CGCGC\" } , \
4391  annot { \
4392  { \
4393  data \
4394  ftable{ \
4395  { \
4396  data \
4397  gene{ locus \"16S rDNA\" } , \
4398  comment \"partial sequence\", \
4399  location \
4400  int { \
4401  from 0 , \
4402  to 983 , \
4403  strand plus , \
4404  id local str \"Untitled_Seq_#1\" } } , \
4405  { \
4406  data \
4407  rna { \
4408  type mRNA , \
4409  ext \
4410  name \"new mRNA\" } , \
4411  comment \"mRNA comment\" , \
4412  location \
4413  mix { \
4414  int{ \
4415  from 199 , \
4416  to 399 , \
4417  strand plus , \
4418  id \
4419  genbank{ \
4420  accession \"KM986631\" } } , \
4421  int{ \
4422  from 499 , \
4423  to 599 , \
4424  strand plus , \
4425  id \
4426  genbank { \
4427  accession \"KM986631\" } } } } } } } } , \
4428  seq { \
4429  id { local str \"KM986631_1\" } , \
4430  descr { \
4431  molinfo { \
4432  biomol peptide , \
4433  completeness partial } } , \
4434  inst{ \
4435  repr raw , \
4436  mol aa , \
4437  length 70 , \
4438  seq-data \
4439  ncbieaa \"HTGTETRPRLLREAAVGNIGQWAKALIQPCRVCEEGFRVVKHFQRGRKPGGTPVVKAPGSTWERHPERPG\" } , \
4440  annot{ \
4441  { \
4442  data \
4443  ftable{ \
4444  { \
4445  data \
4446  prot { \
4447  name { \"new protein\" } } , \
4448  partial TRUE , \
4449  location \
4450  int { \
4451  from 0 , \
4452  to 69 , \
4453  id local str \"KM986631_1\" } } } } } } } , \
4454  annot { \
4455  { \
4456  data \
4457  ftable{ \
4458  { \
4459  data \
4460  cdregion { \
4461  frame three , \
4462  code { id 1 } } , \
4463  partial TRUE , \
4464  product \
4465  whole local str \"KM986631_1\" , \
4466  location \
4467  mix { \
4468  int { \
4469  from 249 , \
4470  to 399 , \
4471  strand plus , \
4472  id \
4473  genbank { \
4474  accession \"KM986631\" } } , \
4475  int { \
4476  from 499 , \
4477  to 562 , \
4478  strand plus , \
4479  id \
4480  genbank { \
4481  accession \"KM986631\" } } } } } } } } \
4482 }";
4483 
4484 const char* sc_UpdSequence_Same = "\
4485 Seq-entry ::= seq { \
4486  id { \
4487  local str \"Seq_01\" } , \
4488  inst { \
4489  repr raw , \
4490  mol dna , \
4491  length 985 , \
4492  seq-data iupacna \"\
4493 TGCAGTCGAGCGGCAGCACGGGGAGCTTGCTCCCTGGTGGCGAGCGGCGGACGGGTGAGTAATGTAGGAA\
4494 TCTGCCCGGTAGTGGGGGATAACGTGGGGAAACCCACGCTAATACCGCATACGTCCTACGGGAGAAAGCG\
4495 GAGGATCTTCGGACTTCGCGCTATCGGATGAGCCTATGTCGGATTAGCTAGTTGGTAAGGTAACGGCTTA\
4496 CCAAGGCGACGATCCGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCCCAGACT\
4497 CCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTTGATCCAGCCATGCCGCGTGTGT\
4498 GAAGAAGGCTTTCGGGTTGTAAAGCACTTTCAGCGAGGAAGAAAGCCTGGTGGTTAAAGCACCGGCTAAC\
4499 TCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCGAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCG\
4500 TAGGTGGCTTGGCACGCCGGTTGTGAAAGCCCCGGGCTCAACCTGGGAACGGCATCCGGAACGGCCAGGC\
4501 TAGAGTGCAGGAGAGGAAGGTAGAATTCCCGGTGTAGCGGTGAAATGCGTAGAGATCGGGAGGAATACCA\
4502 GTGGCGAAGGCGGCCTTCTGGCCTGACACTGACACTGAGGTGCGAAAGCGTGGGTAGCAAACAGGATTAG\
4503 ATACCCTGGTAGTCCACGCCGTAAACGATGTCGACTAGCCGTTGGGACCTTTAAGGACTTAGTGGCGCAG\
4504 TTAACGCGATAAGTCGACCGCCTGGGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGC\
4505 CCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTACCCTTGACATCCT\
4506 GCGAATTTGGTAGAGATACCTTAGTGCCTTCGGGAGCGCAGTGACAGGTGCTGCATGGCTGTCGTCAGCT\
4507 CGCGC\" } \
4508 }";
4509 
4510 const char* sc_UpdSequence_Replace = "\
4511 Seq-entry ::= seq { \
4512  id { \
4513  local str \"Seq_02\" } , \
4514  inst { \
4515  repr raw , \
4516  mol dna , \
4517  length 1360 , \
4518  seq-data iupacna \"\
4519 GAGGCACGGGGAGCTTGCTCCCTGGTGGCGAGCGGCGGACGGGTGAGTAATGTAGGAA\
4520 TCTGCCCGGTAGTGGGGGATAACGTGGGGAAACCCACGCTAATACCGCATACGTCCTACGGGAGAAAGCG\
4521 GAGGATCTTCGGACTTCGCGCTATCGGATGAGCCTATGTCGGATTAGCTAGTTGGTAAGGTAACGGCTTA\
4522 CCAAGGCGACGATCCGTAGCTGGTCTGAGAGGATGATCAGCCACACTGGGACTGAGACACGGCCCAGACT\
4523 CCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTTGATCCAGCCATGCCGCGTGTGT\
4524 GAAGAAGGCTTTCGGGTTGTAAAGCACTTTCAGCGAGGAAGAAAGCCTGGTGGTTAATACCCATCAGGAA\
4525 GGACATCACTCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCGAGC\
4526 GTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGCTTGGCACGCCGGTTGTGAAAGCCCCGGGCT\
4527 CAACCTGGGAACGGCATCCGGAACGGCCAGGCTAGAGTGCAGGAGAGGAAGGTAGAATTCCCGGTGTAGC\
4528 GGTGAAATGCGTAGAGATCGGGAGGAATACCAGTGGCGAAGGCGGCCTTCTGGCCTGACACTGACACTGA\
4529 GGTGCGAAAGCGTGGGTAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCGACTAG\
4530 CCGTTGGGACCTTTAAGGACTTAGTGGCGCAGTTAACGCGATAAGTCGACCGCCTGGGGGAGTACGGCCG\
4531 CAAGGTTAAAACTCAAATGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCA\
4532 ACGCGAAGAACCTTACCTACCCTTGACATCCTGCGAATTTGGTAGAGATACCTTAGTGCCTTCGGGAGCG\
4533 CAGTGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTTGTGAAATGTTGGGTTAAGTCCCGTAACGAGCG\
4534 CAACCCTTTGTCCTTATTTGCCAGCACGTAATGGTGGGAACTCTAAGGAGACTGCCGGTGACAAACCGGA\
4535 GGAAGGTGGGGACGACGTCGGCCCTTACGGGTAGGGCTACACACGTGCTACAATGGCCGG\
4536 TACAAAGGGTTGCGAGACCGCGAGGTGGAGCGAATCCCAGAAAGCCGGCCTCAGTCCGGATCGGAGTCTG\
4537 CAACTCGACTCCGTGAAGTCGGAAAGTAATCGTGAATCAGAATGTCACGGTGAATACGTTCCCGG\
4538 GCCTTGTACACACCGCCCGTCACACCATGGGAGTGGACTGCACCAGAAGTGGTTAGC\" } \
4539 }";
4540 
4541 const char* sc_UpdSequence_Patch = "\
4542 Seq-entry ::= set { \
4543  class nuc-prot , \
4544  seq-set { \
4545  seq { \
4546  id { local str \"Seq_02\" } , \
4547  inst { \
4548  repr raw , \
4549  mol dna , \
4550  length 132 , \
4551  seq-data iupacna \"AGGAGAGGAAGGTAGAATTCCCGGTGTAGCGGTGAAATGCGTAGAGATCGGGCCCCCCCCCCCCCCCCCC\
4552 CCTGACACTGAGGTGCGAAAGCATGGGTAGCAAACAGGATTAGATACCCTGGTAGTCCACGC\" } , \
4553  annot { \
4554  { \
4555  data \
4556  ftable{ \
4557  { \
4558  data \
4559  gene{ locus \"new gene locus\" } , \
4560  location \
4561  int { \
4562  from 0 , \
4563  to 131 , \
4564  strand plus , \
4565  id local str \"seq_02\" } } } } } } , \
4566  seq { \
4567  id{ local str \"seq_02_1\" } , \
4568  descr { \
4569  title \"new protein name, partial\", \
4570  molinfo { \
4571  biomol peptide, \
4572  tech concept-trans, \
4573  completeness no-left } } , \
4574  inst { \
4575  repr raw , \
4576  mol aa, \
4577  length 26 , \
4578  seq-data ncbieaa \"PGVAVKCVEIGPPPPPPPDTEVRKHG\" } , \
4579  annot { \
4580  { \
4581  data \
4582  ftable{ \
4583  { \
4584  data \
4585  prot { \
4586  name { \"new protein name\" } , \
4587  desc \"new protein descr\" } , \
4588  partial TRUE , \
4589  location \
4590  int { \
4591  from 0 , \
4592  to 25 , \
4593  id local str \"seq_02_1\" , \
4594  fuzz-from lim lt } } } } } } } , \
4595 annot{ \
4596  { \
4597  data \
4598  ftable{ \
4599  { \
4600  data \
4601  cdregion { \
4602  frame one, \
4603  code{ id 1 } } , \
4604  partial TRUE, \
4605  product whole local str \"seq_02_1\" , \
4606  location \
4607  int { \
4608  from 19 , \
4609  to 99 , \
4610  strand plus , \
4611  id local str \"seq_02\" , \
4612  fuzz-from lim lt } } } } } } \
4613 }";
4614 
4615 const char* sc_UpdSequence_Extend5 = "\
4616 Seq-entry ::= set { \
4617  class nuc-prot , \
4618  seq-set { \
4619  seq { \
4620  id { local str \"Seq_extend5\" } , \
4621  inst { \
4622  repr raw , \
4623  mol na , \
4624  length 95 , \
4625  seq-data iupacna \"\
4626 GGGGGGGGGGCCCCGGAAAAAAAAAGGGGGGGGGGGCGCACGTTTTTTCACACAGGGGTGCAGTCGAGCGGCAGCACGGGGAGCTTGCTCCCTGG\" } , \
4627  annot { \
4628  { \
4629  data \
4630  ftable{ \
4631  { \
4632  data \
4633  gene{ \
4634  locus \"import gene locusA\" }, \
4635  partial TRUE, \
4636  location \
4637  int { \
4638  from 2, \
4639  to 54, \
4640  strand minus, \
4641  id local str \"Seq_extend5\", \
4642  fuzz-from lim lt, \
4643  fuzz-to lim gt } } , \
4644  { \
4645  data \
4646  imp { \
4647  key \"misc_feature\" } , \
4648  location \
4649  int{ \
4650  from 70, \
4651  to 89, \
4652  strand plus, \
4653  id local str \"Seq_extend5\" } , \
4654  qual{ \
4655  { \
4656  qual \"number\", \
4657  val \"1\" }, \
4658  { \
4659  qual \"product\", \
4660  val \"feat_product\" } } } } } } }, \
4661  seq { \
4662  id { local str \"Seq_extend5_1\" } , \
4663  descr{ \
4664  title \"import gene locusA gene product\", \
4665  molinfo { \
4666  biomol peptide, \
4667  tech concept-trans } } , \
4668  inst{ \
4669  repr raw, \
4670  mol aa, \
4671  length 17, \
4672  seq-data ncbieaa \"MCEKTCAPPPFFFPGPPP\" } , \
4673  annot { \
4674  { \
4675  data \
4676  ftable{ \
4677  { \
4678  data \
4679  prot{ \
4680  name{ \"GR_protein name\" } } , \
4681  location \
4682  int{ \
4683  from 0, \
4684  to 16, \
4685  id local str \"Seq_extend5_1\" } } } } } } } , \
4686  annot { \
4687  { \
4688  data \
4689  ftable{ \
4690  { \
4691  data \
4692  cdregion{ \
4693  code{ id 1 } } , \
4694  product \
4695  whole local str \"Seq_extend5_1\", \
4696  location \
4697  int { \
4698  from 2, \
4699  to 54, \
4700  strand minus, \
4701  id local str \"Seq_extend5\" } } } } } } \
4702 }";
4703 
4705 Seq-entry ::= set { \
4706  class nuc-prot , \
4707  seq-set { \
4708  seq { \
4709  id { local str \"Seq_extend5\" } , \
4710  inst { \
4711  repr raw , \
4712  mol na , \
4713  length 92 , \
4714  seq-data iupacna \"\
4715 GGGGGGGGGGCCCCGGAAAAAAAAAGGGGGGGGGGGCGCACGTTTTTTCACACAGGGGTGCAGTCGGGCAGCACGGGGAGCTTGCTCCCTGG\" } , \
4716  annot { \
4717  { \
4718  data \
4719  ftable{ \
4720  { \
4721  data \
4722  gene{ \
4723  locus \"import gene locusA\" }, \
4724  partial TRUE, \
4725  location \
4726  int { \
4727  from 2, \
4728  to 54, \
4729  strand plus, \
4730  id local str \"Seq_extend5\", \
4731  fuzz-from lim lt, \
4732  fuzz-to lim gt } } , \
4733  { \
4734  data \
4735  imp { \
4736  key \"misc_feature\" } , \
4737  location \
4738  int{ \
4739  from 67, \
4740  to 86, \
4741  strand plus, \
4742  id local str \"Seq_extend5\" } , \
4743  qual{ \
4744  { \
4745  qual \"number\", \
4746  val \"1\" }, \
4747  { \
4748  qual \"product\", \
4749  val \"feat_product\" } } } } } } }, \
4750  seq { \
4751  id { local str \"Seq_extend5_1\" } , \
4752  descr{ \
4753  title \"import gene locusA gene product\", \
4754  molinfo { \
4755  biomol peptide, \
4756  tech concept-trans } } , \
4757  inst{ \
4758  repr raw, \
4759  mol aa, \
4760  length 17, \
4761  seq-data ncbieaa \"-GGPGKKKGGGAHVFSH\" } , \
4762  annot { \
4763  { \
4764  data \
4765  ftable{ \
4766  { \
4767  data \
4768  prot{ \
4769  name{ \"GR_protein name\" } } , \
4770  location \
4771  int{ \
4772  from 0, \
4773  to 16, \
4774  id local str \"Seq_extend5_1\" } } } } } } } , \
4775  annot { \
4776  { \
4777  data \
4778  ftable{ \
4779  { \
4780  data \
4781  cdregion{ \
4782  code{ id 1 } } , \
4783  product \
4784  whole local str \"Seq_extend5_1\", \
4785  location \
4786  int { \
4787  from 2, \
4788  to 54, \
4789  strand plus, \
4790  id local str \"Seq_extend5\" } } } } } } \
4791 }";
4792 
4793 const char* sc_UpdSequence_Extend3 = "\
4794 Seq-entry ::= set { \
4795  class nuc-prot , \
4796  seq-set { \
4797  seq { \
4798  id { local str \"Seq_extend3\" } , \
4799  inst { \
4800  repr raw , \
4801  mol na , \
4802  length 195, \
4803  seq-data iupacna \"\
4804 TTGACATCCTGCGAATTTGGTAGAGATACCTTAGTGCCTTCGGGAGCGCAGTGACAGGTGCTGCATGGCTGTCGT\
4805 CAGCTCGCGCGGGAGAAAGGGGTTTTTTTTTTTATATTATACCCCACCCCTCTCTCCCGGGGGGAGATTAGCCAC\
4806 AGGGGTTTTTTTTTTTATATTATACCCCCCGGGGGGAGATTAGCC\" } , \
4807  annot{ \
4808  { \
4809  data \
4810  ftable{ \
4811  { \
4812  data \
4813  gene { \
4814  locus \"import extend3 gene locus\" ,\
4815  allele \"dominant\" } ,\
4816  location \
4817  int{ \
4818  from 4, \
4819  to 89, \
4820  strand plus, \
4821  id local str \"Seq_extend3\" } }, \
4822  { \
4823  data \
4824  imp{ \
4825  key \"misc_feature\" }, \
4826  location \
4827  mix{ \
4828  int{ \
4829  from 119, \
4830  to 131, \
4831  strand plus, \
4832  id local str \"Seq_extend3\" }, \
4833  pnt{ \
4834  point 139, \
4835  strand plus, \
4836  id local str \"Seq_extend3\" } }, \
4837  qual{ \
4838  { \
4839  qual \"number\", \
4840  val \"1\" }, \
4841  { \
4842  qual \"product\", \
4843  val \"ext3 misc_feat product\" } } } } } } }, \
4844  seq { \
4845  id{ \
4846  local str \"Seq_extend3_1\" }, \
4847  descr{ \
4848  title \"new test protein ext3\", \
4849  molinfo{ \
4850  biomol peptide, \
4851  tech concept-trans } }, \
4852  inst{ \
4853  repr raw, \
4854  mol aa, \
4855  length 25, \
4856  seq-data ncbieaa \"AQ*QVLHGCRQLAREKGVFFFILYP\" }, \
4857  annot{ \
4858  { \
4859  data \
4860  ftable{ \
4861  { \
4862  data \
4863  prot{ \
4864  name{ \"new test protein ext3\" } }, \
4865  location \
4866  int{ \
4867  from 0, \
4868  to 24, \
4869  id local str \"Seq_extend3_1\" } } } } } } }, \
4870 annot { \
4871  { \
4872  data \
4873  ftable{ \
4874  { \
4875  data \
4876  cdregion{ \
4877  frame two, \
4878  code{ id 1 } }, \
4879  product whole local str \"Seq_extend3_1\", \
4880  location \
4881  int{ \
4882  from 44, \
4883  to 119, \
4884  strand plus, \
4885  id local str \"Seq_extend3\" } } } } } } \
4886 }";
4887 
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
bool IsGeneralIdProtPresent(objects::CSeq_entry_Handle tse)
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
CBioseq_set_Handle –.
const CSeq_id * GetFirstId() const
Definition: Bioseq.cpp:271
const CSeq_id * GetLocalId() const
Find a local ID if present.
Definition: Bioseq.cpp:336
void AddCommand(IEditCommand &command)
virtual void Execute()
Do the editing action.
Definition: Date.hpp:53
void SetToTime(const CTime &time, EPrecision prec=ePrecision_second)
Definition: Date.cpp:57
@ ePrecision_day
Definition: Date.hpp:58
Base class for reading FASTA sequences.
Definition: fasta.hpp:80
CFeat_CI –.
Definition: feat_ci.hpp:64
static string GetLoaderNameFromArgs(CReader *reader=0)
Definition: gbloader.cpp:377
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
CScope –.
Definition: scope.hpp:92
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
@ e_Iupacna
Definition: sequtil.hpp:47
@ e_Ncbi2na
Definition: sequtil.hpp:48
CSeqVector –.
Definition: seq_vector.hpp:65
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
void Parentize(void)
Definition: Seq_entry.cpp:71
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Class responsible for executing the sequence update for one old-update sequence pair.
Definition: update_seq.hpp:57
static CRef< objects::CSeq_feat > s_OffsetFeature(const objects::CSeq_feat &feat, const TSeqPos offset, const objects::CSeq_id *newId=NULL)
Offsets the location of the feature.
static CRef< objects::CSeq_inst > s_ExtendOneEndOfSequence(const objects::CBioseq_Handle &bsh, const string &extension, SUpdateSeqParams::ESequenceUpdateType update_type)
Extends the 5' or 3' end of a NA sequence (no alignment is necessary).
Definition: update_seq.cpp:953
bool IsUpdateSequenceRaw() const
Definition: update_seq.cpp:538
static string s_GetValidExtension(const string &extension)
Definition: update_seq.cpp:934
bool IsOldSequenceOK() const
Definition: update_seq.cpp:529
const string & GetCitSubMessage() const
Definition: update_seq.hpp:94
CRef< CCmdComposite > Update(bool create_general_only)
Main function responsible to update the old sequence with the update sequence.
Definition: update_seq.cpp:564
const TSeqIDHVector & GetSeqsWithoutUpdates() const
const TIDToUpdInputMap & GetNonIdenticalUpdates() const
vector< objects::CSeq_id_Handle > TSeqIDHVector
bool SetOldEntryAndScope(const objects::CSeq_entry_Handle &tse)
const TIDToUpdInputMap & GetIdenticalUpdates() const
bool SetUpdateEntry(CRef< objects::CSeq_entry > update)
Sets up the old and the update sequences, and generates the alignment between them.
bool IsReadyForUpdate(void) const
bool SetOldBioseqAndScope(IWorkbench *workbench, const objects::CSeq_entry_Handle &tse)
bool SetUpdateBioseq(const objects::CBioseq_Handle &bsh)
void CalculateAlignmentForUnitTest(void)
Calculates the alignment between the old and the update sequence, sets m_Align member.
CConstRef< objects::CSeq_align > GetAlignment(void) const
static void s_FixCollidingIDs_Entry(objects::CSeq_entry &entry, const objects::CBioseq::TId &seq_ids)
When the old and the update sequences have the same ID, the ID of the update sequence is modified.
bool HaveIdenticalResidues(void) const
static CRef< objects::CSeq_annot > s_Align2IdenticalSeq(const objects::CBioseq_Handle &subject, const objects::CBioseq_Handle &query)
const objects::CBioseq_Handle & GetUpdateBioseq(void) const
const objects::CBioseq_Handle & GetOldBioseq(void) const
Stores parameters regarding the type of sequence update, on how to handle existing features and on ho...
EFeatUpdateType m_FeatImportOption
TProtUpdFlags m_UpdateProteins
objects::CSeqFeatData::ESubtype m_FeatImportType
Defines the imported feature subtype.
bool m_AddCitSub
flag to attach a citation with the current date (false)
EFeatRemoveType m_FeatRemoveOption
bool m_ImportFeatures
Flag to indicate that features from the update sequence will be imported.
bool m_KeepProteinId
flag to update protein IDs, default is to update (false) - relevant only to imported protein IDs
@ eProtUpdate
retranslate coding regions and update the proteins
@ eSeqUpdateReplace
do not change the old sequence
@ eSeqUpdateExtend3
extend the 5' end of old sequence with the update
@ eSeqUpdatePatch
replace old sequence with update sequence
@ eSeqUpdateExtend5
patch old sequence with update in the aligned region
size_type size() const
Definition: map.hpp:148
bool empty() const
Definition: map.hpp:149
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
static uch flags
#define T(s)
Definition: common.h:230
#define check(s)
Definition: describecol2.c:21
Operators to edit gaps in sequences.
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
Definition: ncbiexpt.cpp:370
const TPrim & Get(void) const
Definition: serialbase.hpp:347
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
CRef< CSeq_entry > ReadSet(int max_seqs=kMax_Int, ILineErrorListener *pMessageListener=nullptr)
Read multiple sequences (by default, as many as are available.)
Definition: fasta.cpp:442
long TFlags
binary OR of EFlags
Definition: fasta.hpp:117
@ fHyphensIgnoreAndWarn
When a hyphen is encountered in seq data, ignore it but warn.
Definition: fasta.hpp:112
@ fUniqueIDs
Forbid duplicate IDs.
Definition: fasta.hpp:101
@ fAddMods
Parse defline mods and add to SeqEntry.
Definition: fasta.hpp:104
@ fNoSplit
Don't split out ambiguous sequence regions.
Definition: fasta.hpp:99
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2039
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1033
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
Definition: scope.cpp:510
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
TClass GetClass(void) const
CBioseq_set_Handle GetParentBioseq_set(void) const
Return a handle for the parent Bioseq-set, or null handle.
TSeqPos GetBioseqLength(void) const
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
TInst_Mol GetInst_Mol(void) const
TInst_Strand GetInst_Strand(void) const
TInst_Topology GetInst_Topology(void) const
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.
bool IsSetClass(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
TInst_Repr GetInst_Repr(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
const TInst & GetInst(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
const CSeq_loc & GetLocation(void) const
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TObjectType & GetObject(void)
Get object.
Definition: ncbiobj.hpp:1011
#define kMax_Int
Definition: ncbi_limits.h:184
#define NcbiEndl
Definition: ncbistre.hpp:548
#define NcbiCout
Definition: ncbistre.hpp:543
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
CTime CurrentTime(CTime::ETimeZone tz=CTime::eLocal, CTime::ETimeZonePrecision tzp=CTime::eTZPrecisionDefault)
Definition: ncbitime.hpp:2185
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Cit_sub_.hpp:476
const TDate & GetDate(void) const
Get the Date member data.
Definition: Cit_sub_.hpp:455
bool IsSetDescr(void) const
description of changes for public view Check if a value has been assigned to Descr data member.
Definition: Cit_sub_.hpp:464
bool IsSetDate(void) const
replaces imp, will become required Check if a value has been assigned to Date data member.
Definition: Cit_sub_.hpp:443
const TName & GetName(void) const
Get the Name member data.
Definition: Prot_ref_.hpp:378
TProcessed GetProcessed(void) const
Get the Processed member data.
Definition: Prot_ref_.hpp:538
bool IsSetProcessed(void) const
Check if a value has been assigned to Processed data member.
Definition: Prot_ref_.hpp:513
@ eProcessed_signal_peptide
Definition: Prot_ref_.hpp:99
TTRNA & SetTRNA(void)
Select the variant.
Definition: RNA_ref_.cpp:140
const TAnticodon & GetAnticodon(void) const
Get the Anticodon member data.
Definition: Trna_ext_.hpp:649
void SetAnticodon(TAnticodon &value)
Assign a value to Anticodon data member.
Definition: Trna_ext_.cpp:158
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
const TTRNA & GetTRNA(void) const
Get the variant data.
Definition: RNA_ref_.cpp:134
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
const TRna & GetRna(void) const
Get the variant data.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Local
local use
Definition: Seq_id_.hpp:95
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
bool IsNcbi4na(void) const
Check if variant Ncbi4na is selected.
Definition: Seq_data_.hpp:564
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
Definition: Seq_inst_.hpp:826
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
TMol GetMol(void) const
Get the Mol member data.
Definition: Seq_inst_.hpp:612
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
const TNcbi2na & GetNcbi2na(void) const
Get the variant data.
Definition: Seq_data_.hpp:550
const TPub & GetPub(void) const
Get the Pub member data.
Definition: Pubdesc_.hpp:605
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
Definition: Seq_inst_.hpp:817
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Bioseq_.hpp:315
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ e_Pub
a reference to the publication
Definition: Seqdesc_.hpp:122
@ eTopology_tandem
some part of tandem repeat
Definition: Seq_inst_.hpp:125
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
@ eStrand_ds
double strand
Definition: Seq_inst_.hpp:136
int i
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
std::istream & in(std::istream &in_, double &x_)
The Object manager core.
static SQLCHAR output[256]
Definition: print.c:5
Utility macros and typedefs for exploring NCBI objects from pub.asn.
#define FOR_EACH_PUB_ON_PUBDESC(Itr, Var)
FOR_EACH_PUB_ON_PUBDESC EDIT_EACH_PUB_ON_PUBDESC.
Definition: pub_macros.hpp:127
#define FOR_EACH_SEQDESC_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQDESC_ON_BIOSEQ EDIT_EACH_SEQDESC_ON_BIOSEQ.
Definition: seq_macros.hpp:218
#define VISIT_ALL_SEQFEATS_WITHIN_SEQENTRY(Itr, Var)
VISIT_ALL_SEQFEATS_WITHIN_SEQENTRY.
static const char * str(char *buf, int n)
Definition: stats.c:84
SAnnotSelector –.
Definition: type.c:6
Utility stuff for more convenient using of Boost.Test library.
CRef< CSeq_entry > ReadEntryFromFile(const string &fname)
USING_SCOPE(objects)
const char * sc_UpdSequence_Replace
const char * sc_UpdSequence_Patch
CRef< CSeq_entry > ReadEntry(const char *seq)
string MakeAsn(const T &object)
const char * sc_MainSequence
void SetupForUpdate(const CSeq_entry_Handle &old_seh, const CSeq_entry_Handle &upd_seh, CUpdateSeq_Input &sequpd_in, bool identical_res=false)
BOOST_AUTO_TEST_CASE(Test_SameSequence)
const char * sc_UpdSequence_Extend5_AlignWithGap
CRef< CScope > BuildScope(void)
CRef< CSeq_entry > ReadFastaSeqsFromFile(const string &fname)
void ReadFromFile(const string &old_seq, const string &upd_seq, CSeq_entry_Handle &old_seh, CSeq_entry_Handle &upd_seh)
void ReadBioSeqs(const char *old_seq, const char *upd_seq, CSeq_entry_Handle &old_seh, CSeq_entry_Handle &upd_seh)
const char * sc_UpdSequence_Extend3
NCBITEST_AUTO_INIT()
const char * sc_UpdSequence_Same
const char * sc_UpdSequence_Extend5
USING_NCBI_SCOPE
NCBITEST_AUTO_FINI()
CRef< objects::CSeq_feat > MakeMiscFeature(CRef< objects::CSeq_id > id, size_t right_end=10, size_t left_end=0)
CRef< objects::CSeq_entry > BuildGoodSeq(void)
Modified on Thu Mar 28 17:10:35 2024 by modify_doxy.py rev. 669887