NCBI C++ ToolKit
update_seq.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: update_seq.cpp 47479 2023-05-02 13:24:02Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrea Asztalos
27  */
28 
29 
30 #include <ncbi_pch.hpp>
31 #include <corelib/ncbiutil.hpp>
32 
41 #include <objects/pub/Pub_set.hpp>
42 #include <objects/pub/Pub.hpp>
44 #include <objects/general/Date.hpp>
49 
53 #include <objmgr/util/sequence.hpp>
54 
55 #include <objmgr/seq_vector.hpp>
57 #include <objmgr/scope.hpp>
59 #include <objmgr/feat_ci.hpp>
60 #include <objmgr/bioseq_ci.hpp>
61 #include <objmgr/seqdesc_ci.hpp>
62 #include <objmgr/seq_annot_ci.hpp>
63 
72 
74 
77 
78 
79 namespace {
80  CRef<CSeq_interval> s_MapInterval(const CSeq_interval& origInterval, const CSeq_id& newId,
81  const CSeq_align& align, bool is_circular, CScope& scope, const TSeqPos& newLength)
82  {
83 
84  CRef<CSeq_loc_Mapper_Base> mapper(new CSeq_loc_Mapper_Base(align, newId));
85  mapper->SetMergeAll();
86 
87  CBioseq_Handle bsh = scope.GetBioseqHandle(origInterval.GetId());
88 
89  const CSeq_align::TDim oldseqRow = CSequenceUpdater::FindRowInAlignment(bsh, align);
90  _ASSERT(oldseqRow > -1);
91  const CSeq_align::TDim newseqRow = !oldseqRow;
92 
93  const int aln_start = align.GetSeqStart(oldseqRow);
94  const int aln_stop = align.GetSeqStop(oldseqRow);
95  const ENa_strand old_strand = align.GetSeqStrand(oldseqRow);
96  const int aln_start_new = align.GetSeqStart(newseqRow);
97  const int aln_stop_new = align.GetSeqStop(newseqRow);
98  const ENa_strand new_strand = align.GetSeqStrand(newseqRow);
99 
100  bool reverse = !SameOrientation(old_strand, new_strand);
101  const int orig_start = origInterval.GetStart(eExtreme_Positional);
102  const int orig_stop = origInterval.GetStop(eExtreme_Positional);
103 
104  CRef<CSeq_interval> pTargetInt(new CSeq_interval());
105  pTargetInt->SetId().Assign(newId);
106  if (!reverse && origInterval.IsSetStrand()) {
107  pTargetInt->SetStrand(origInterval.GetStrand());
108  }
109 
111  source.SetInt().Assign(origInterval);
112  CRef<CSeq_loc> mapped = mapper->Map(source);
113 
114  if (mapped && !mapped->IsNull()) {
115  pTargetInt->SetFrom(mapped->GetStart(eExtreme_Positional));
116  pTargetInt->SetTo(mapped->GetStop(eExtreme_Positional));
117  if (reverse && mapped->IsSetStrand()) {
118  pTargetInt->SetStrand(mapped->GetStrand());
119  }
120  if (reverse && mapped->IsPartialStart(eExtreme_Biological)) {
121  pTargetInt->SetPartialStart(true, eExtreme_Biological);
122  }
123  if (reverse && mapped->IsPartialStop(eExtreme_Biological)) {
124  pTargetInt->SetPartialStop(true, eExtreme_Biological);
125  }
126  }
127  else {
128  pTargetInt->SetFrom(0);
129  pTargetInt->SetTo(0);
130  }
131 
132  int offset = 0;
133  if (orig_start < aln_start) {
134  if (reverse) {
135  offset = aln_stop_new + (aln_start - orig_start);
136  if (offset > (int)newLength - 1) {
137  offset = (int)newLength - 1;
138  }
139  pTargetInt->SetTo(offset);
140  }
141  else {
142  offset = aln_start_new - (aln_start - orig_start);
143  if (offset < 0) {
144  offset = 0;
145  }
146  pTargetInt->SetFrom(offset);
147  }
148  } else if (orig_start > aln_stop) {
149  if (reverse) {
150  offset = aln_start_new - (orig_start - aln_stop);
151  if (offset < 0) {
152  offset = 0;
153  }
154  pTargetInt->SetTo(offset);
155  }
156  else {
157  offset = aln_stop_new + (orig_start - aln_stop);
158  if (offset > (int)newLength - 1) {
159  offset = (int)newLength - 1;
160  }
161  pTargetInt->SetFrom(offset);
162  }
163  }
164 
165 
166  if (orig_stop < aln_start) {
167  if (reverse) {
168  offset = aln_stop_new + (aln_start - orig_stop);
169  if (offset > (int)newLength - 1) {
170  offset = (int)newLength - 1;
171  }
172  pTargetInt->SetFrom(offset);
173  }
174  else {
175  offset = aln_start_new - (aln_start - orig_stop);
176  if (offset < 0) {
177  offset = 0;
178  }
179  pTargetInt->SetTo(offset);
180  }
181  } else if (orig_stop > aln_stop){
182  if (reverse) {
183  offset = aln_start_new - (orig_stop - aln_stop);
184  if (offset < 0) {
185  offset = 0;
186  }
187  pTargetInt->SetFrom(offset);
188  }
189  else {
190  offset = aln_stop_new + (orig_stop - aln_stop);
191  if (offset > (int)newLength - 1) {
192  offset = (int)newLength - 1;
193  }
194  pTargetInt->SetTo(offset);
195  }
196  }
197 
198  // check for circular topology
199  if (is_circular && pTargetInt->GetFrom() > pTargetInt->GetTo()) {
200  const TSeqPos start = pTargetInt->GetStart(eExtreme_Biological);
201  const TSeqPos stop = pTargetInt->GetStop(eExtreme_Biological);
202  pTargetInt->SetFrom(stop);
203  pTargetInt->SetTo(start);
204  }
205 
206 
207  if (!pTargetInt->IsSetStrand()) {
208  if (reverse) {
209  ENa_strand orig_strand = (origInterval.IsSetStrand()) ? origInterval.GetStrand() : eNa_strand_unknown;
210  ENa_strand target_strand = Reverse(orig_strand);
211  pTargetInt->SetStrand(target_strand);
212  }
213  else {
214  if (origInterval.IsSetStrand()) {
215  pTargetInt->SetStrand(origInterval.GetStrand());
216  }
217  }
218  }
219 
220  if (origInterval.IsPartialStart(eExtreme_Positional)) {
221  if (reverse) {
222  pTargetInt->SetPartialStop(true, eExtreme_Positional);
223  }
224  else {
225  pTargetInt->SetPartialStart(true, eExtreme_Positional);
226  }
227  }
228  if (origInterval.IsPartialStop(eExtreme_Positional)) {
229  if (reverse) {
230  pTargetInt->SetPartialStart(true, eExtreme_Positional);
231  }
232  else {
233  pTargetInt->SetPartialStop(true, eExtreme_Positional);
234  }
235  }
236 
237  return pTargetInt;
238  }
239 
240  CRef<CSeq_loc> s_MapLocation(const CSeq_loc& origLoc, const CSeq_id& newId, const CSeq_align& align, bool is_circular, CScope& scope, const TSeqPos& newLength)
241  {
242  CRef<CSeq_loc> mappedLoc(new CSeq_loc);
243  switch (origLoc.Which()) {
244  case CSeq_loc::e_Pnt: {
245  const CSeq_point& pnt = origLoc.GetPnt();
246  CRef<CSeq_interval> shortInt(new CSeq_interval());
247  shortInt->SetId().Assign(pnt.GetId());
248  shortInt->SetFrom(pnt.GetPoint());
249  shortInt->SetTo(pnt.GetPoint());
250  if (pnt.IsSetStrand()) {
251  shortInt->SetStrand(pnt.GetStrand());
252  }
253  if (pnt.IsSetFuzz()) {
254  const CInt_fuzz& fuzz = pnt.GetFuzz();
255  if (!fuzz.IsLim() || fuzz.GetLim() != CInt_fuzz::eLim_gt) {
256  shortInt->SetFuzz_from().Assign(fuzz);
257  }
258  if (!fuzz.IsLim() || fuzz.GetLim() != CInt_fuzz::eLim_lt) {
259  shortInt->SetFuzz_to().Assign(fuzz);
260  }
261  }
262 
263  CRef<CSeq_interval> pTargetInt = s_MapInterval(*shortInt, newId, align, is_circular, scope, newLength);
264  if (pTargetInt) {
265  mappedLoc->SetInt(*pTargetInt);
266  return mappedLoc;
267  }
268  break;
269  }
270  case CSeq_loc::e_Int: {
271  CRef<CSeq_interval> pTargetInt = s_MapInterval(origLoc.GetInt(), newId, align, is_circular, scope, newLength);
272  if (pTargetInt) {
273  mappedLoc->SetInt(*pTargetInt);
274  return mappedLoc;
275  }
276  break;
277  }
278  case CSeq_loc::e_Packed_int: {
279  CPacked_seqint& targetInts = mappedLoc->SetPacked_int();
280  bool subloc_added = false;
281  for (auto&& cit : origLoc.GetPacked_int().Get()) {
282  CRef<CSeq_interval> sub_interval = s_MapInterval(*cit, newId, align, is_circular, scope, newLength);
283  if (sub_interval) {
284  targetInts.AddInterval(*sub_interval);
285  subloc_added = true;
286  }
287  }
288  if (subloc_added) return mappedLoc;
289  break;
290  }
291  case CSeq_loc::e_Mix: {
292  CSeq_loc_mix& targetMix = mappedLoc->SetMix();
293  bool subloc_added = false;
294  for (auto&& cit : origLoc.GetMix().Get()) {
295  CRef<CSeq_loc> target_loc = s_MapLocation(*cit, newId, align, is_circular, scope, newLength);
296  if (target_loc) {
297  targetMix.AddSeqLoc(*target_loc);
298  subloc_added = true;
299  }
300  }
301  if (subloc_added) return mappedLoc;
302  break;
303  }
304  default: {
305  break;
306  }
307  }
308  return CRef<CSeq_loc>();
309  }
310 
311  bool SameStrand(const CSeq_loc& loc1, const CSeq_loc& loc2)
312  {
313  ENa_strand s1 = loc1.GetStrand();
314  ENa_strand s2 = loc2.GetStrand();
315  if ((s1 == eNa_strand_minus && s2 == eNa_strand_minus)
316  || (s1 != eNa_strand_minus && s2 != eNa_strand_minus)) {
317  return true;
318  }
319  else {
320  return false;
321  }
322  }
323 
324  bool AreFeaturesDuplicates(const CSeq_feat& feat1, const CSeq_feat& feat2)
325  {
326  // if the feattypes are the same and the locations are identical, the features are considered to be duplicates
327  if (feat1.GetData().GetSubtype() != feat2.GetData().GetSubtype()) {
328  return false;
329  }
330 
331  const CSeq_loc& loc1 = feat1.GetLocation();
332  const CSeq_loc& loc2 = feat2.GetLocation();
333 
334  if (!SameStrand(loc1, loc2) || loc1.Compare(loc2)) {
335  return false;
336  }
337 
338  return true;
339  }
340 
341  // the location ID should not be compared when comparing two protein features
342  bool AreProteinFeaturesDuplicates(const CSeq_feat& feat1, const CSeq_feat& feat2)
343  {
344  if (!feat1.GetData().IsProt() || !feat2.GetData().IsProt()) {
345  return false;
346  }
347 
348  bool processed1 = feat1.GetData().GetProt().IsSetProcessed();
349  bool processed2 = feat2.GetData().GetProt().IsSetProcessed();
350  if ((processed1 && !processed2) || (!processed1 && processed2)) {
351  return false;
352  }
353  if (processed1 && processed2
354  && feat1.GetData().GetProt().GetProcessed() != feat2.GetData().GetProt().GetProcessed()) {
355  return false;
356  }
357 
358  const CSeq_loc& loc1 = feat1.GetLocation();
359  const CSeq_loc& loc2 = feat2.GetLocation();
360 
361  if (!SameStrand(loc1, loc2)) {
362  return false;
363  }
364 
365  CSeq_loc_CI loc1_it(loc1);
366  CSeq_loc_CI loc2_it(loc2);
367  while (loc1_it && loc2_it) {
368  if (loc1_it.GetRange() != loc2_it.GetRange()) {
369  return false;
370  }
371  ++loc1_it;
372  ++loc2_it;
373  }
374  if (loc1_it || loc2_it) {
375  return false;
376  }
377 
378  return true;
379  }
380 
381 #define FUSE_STRINGFIELDS(Var1, Var2, Field) \
382  {{ \
383  string orig_value = (Var1).IsSet##Field() ? (Var1).Get##Field() : kEmptyStr; \
384  string value = (Var2).IsSet##Field() ? (Var2).Get##Field() : kEmptyStr; \
385  if (!NStr::EqualNocase(orig_value, value) && edit::AddValueToString(orig_value, value, edit::eExistingText_append_semi)) { \
386  (Var1).Set##Field(orig_value); \
387  } \
388  }}
389 
390 #define FUSE_BOOLEANFIELDS(Var1, Var2, Field) \
391  {{ \
392  bool fused_value = (Var1).IsSet##Field() ? (Var1).Get##Field() : false; \
393  bool value = (Var2).IsSet##Field() ? (Var2).Get##Field() : false; \
394  if (fused_value || value) { \
395  (Var1).Set##Field(true); \
396  } else { \
397  (Var1).Reset##Field(); \
398  } \
399  }}
400 
401 #define FUSE_LISTFIELDS(Var1, Var2, Type, Field) \
402  if ((Var2).IsSet##Field() && !(Var2).Get##Field().empty()) { \
403  C##Type::T##Field& cont = (Var1).Set##Field(); \
404  copy((Var2).Get##Field().begin(), (Var2).Get##Field().end(), back_inserter(cont)); \
405  }
406 
407 #define FUSE_VECTORFIELDS(Var1, Var2, Type, Field) \
408  {{ \
409  if ((Var2).IsSet##Field() && !(Var2).Get##Field().empty()) { \
410  size_t new_size = (Var2).Get##Field().size(); \
411  new_size += (Var1).IsSet##Field() ? (Var1).Get##Field().size() : 0; \
412  C##Type::T##Field& cont = (Var1).Set##Field(); \
413  cont.reserve(new_size); \
414  copy((Var2).Get##Field().begin(), (Var2).Get##Field().end(), back_inserter(cont)); \
415  } \
416  }}
417 
418  void FuseGenes(CGene_ref& fusedGene, const CGene_ref& gene)
419  {
420  FUSE_STRINGFIELDS(fusedGene, gene, Locus);
421  FUSE_STRINGFIELDS(fusedGene, gene, Allele);
422  FUSE_STRINGFIELDS(fusedGene, gene, Desc);
423  FUSE_STRINGFIELDS(fusedGene, gene, Maploc);
424  FUSE_STRINGFIELDS(fusedGene, gene, Locus_tag);
425 
426  FUSE_BOOLEANFIELDS(fusedGene, gene, Pseudo);
427 
428  FUSE_VECTORFIELDS(fusedGene, gene, Gene_ref, Db);
429 
430  FUSE_LISTFIELDS(fusedGene, gene, Gene_ref, Syn);
431  }
432 
433  void FuseProteins(CSeq_feat& fusedFeat, const CSeq_feat& feat_old)
434  {
435  CProt_ref& fusedProtref = fusedFeat.SetData().SetProt();
436  const CProt_ref& protref = feat_old.GetData().GetProt();
437 
438  FUSE_LISTFIELDS(fusedProtref, protref, Prot_ref, Name);
439  FUSE_LISTFIELDS(fusedProtref, protref, Prot_ref, Ec);
440  FUSE_LISTFIELDS(fusedProtref, protref, Prot_ref, Activity);
441  FUSE_LISTFIELDS(fusedProtref, protref, Prot_ref, Db);
442 
443  FUSE_STRINGFIELDS(fusedProtref, protref, Desc);
444  }
445 
446 
447  void FuseCommonFeatureFields(CSeq_feat& fusedFeat, const CSeq_feat& feat)
448  {
449  FUSE_STRINGFIELDS(fusedFeat, feat, Comment);
450  FUSE_STRINGFIELDS(fusedFeat, feat, Title);
451  FUSE_STRINGFIELDS(fusedFeat, feat, Except_text);
452 
453  FUSE_VECTORFIELDS(fusedFeat, feat, Seq_feat, Qual);
454  FUSE_VECTORFIELDS(fusedFeat, feat, Seq_feat, Dbxref);
455  FUSE_VECTORFIELDS(fusedFeat, feat, Seq_feat, Xref);
456 
457 
458  if (feat.IsSetCit() && feat.GetCit().IsPub() && !feat.GetCit().GetPub().empty()) {
459  CPub_set::TPub& pub = fusedFeat.SetCit().SetPub();
460  copy(feat.GetCit().GetPub().begin(), feat.GetCit().GetPub().end(), back_inserter(pub));
461  }
462 
463  FUSE_BOOLEANFIELDS(fusedFeat, feat, Partial);
464  FUSE_BOOLEANFIELDS(fusedFeat, feat, Except);
465  FUSE_BOOLEANFIELDS(fusedFeat, feat, Pseudo);
466  }
467 
468  CSeq_annot_Handle DefaultGetAppropriateFeatureTable(CBioseq_Handle protbsh)
469  {
470  //check if there is a feature table already
471  CSeq_entry_Handle prot_seh = protbsh.GetSeq_entry_Handle();
473  for (/**/; ci; ++ci) {
474  if (ci->IsFtable()) {
475  return *ci;
476  }
477  }
478  //if not, make a new one
479  CBioseq_EditHandle eh = protbsh.GetEditHandle();
480  CRef<CSeq_annot> pAnnot(new CSeq_annot());
481  return eh.AttachAnnot(*pAnnot);
482  }
483 
484 } // end of namespace
485 
486 static const char* kNewLocalID = "NewSeq";
487 
489  : m_OldBsh(updseq_in.GetNCOldBioseq()),
490  m_UpdBsh(updseq_in.GetUpdateBioseq()),
491  m_Align(updseq_in.GetAlignment()),
492  m_Params(params),
493  m_NewEntry(0),
494  m_NewId(0),
495  m_Reversed(false)
496 {
497  if (!updseq_in.IsReadyForUpdate() || !IsAlignmentOK()) {
498  NCBI_THROW(CSeqUpdateException, eInternal,
499  "Sequence can not be updated: either the update sequence or the alignment is missing");
500  }
501 
502  if (!CheckParameters()) {
503  NCBI_THROW(CSeqUpdateException, eInternal,
504  "Sequence can not be updated: update parameters are not valid");
505  }
506 
507  if (!IsOldSequenceOK()) { // can be raw or delta
508  NCBI_THROW(CSeqUpdateException, eInternal,
509  "The old sequence can not be updated: unsupported seq-gap");
510  }
511 
512  // needs to be raw or delta with far pointers
513  if (!IsUpdateSequenceRaw() &&
514  !CUpdateSeq_Input::s_IsDeltaWithFarPointers(*m_UpdBsh.GetCompleteBioseq())) {
515  NCBI_THROW(CSeqUpdateException, eInternal,
516  "The update sequence contains gaps features.\nPlease convert your file so that gaps are runs of N's before updating the sequence.");
517  }
518 }
519 
521 {
522 }
523 
525 {
526  m_Params = params;
527 }
528 
530 {
531  bool old_raw = (m_OldBsh.IsSetInst_Repr() && (m_OldBsh.GetInst_Repr() == CSeq_inst::eRepr_raw));
532  bool old_delta = CUpdateSeq_Input::s_IsDeltaWithNoGaps(*m_OldBsh.GetCompleteBioseq())
533  || CUpdateSeq_Input::s_IsDeltaWithFarPointers(*m_OldBsh.GetCompleteBioseq());
534  return old_raw || old_delta;
535 }
536 
537 
539 {
540  return (m_UpdBsh.IsSetInst_Repr() && (m_UpdBsh.GetInst_Repr() == CSeq_inst::eRepr_raw));
541 }
542 
544 {
545  return (m_Align && m_Align->IsSetSegs() && m_Align->GetSegs().IsDenseg()) || !m_Align;
546 }
547 
549 {
550  if (!m_Align) {
552  if (update_type == SUpdateSeqParams::eSeqUpdatePatch) {
553  return false;
554  }
555  else if ((update_type == SUpdateSeqParams::eSeqUpdateExtend3 || update_type == SUpdateSeqParams::eSeqUpdateExtend5)
557  return false;
558  }
559  }
560  return true;
561 }
562 
563 
565 {
567 
568  string descr;
569  switch (m_Params.m_SeqUpdateOption) {
571  NoChange(create_general_only);
572  descr.assign("No change");
573  break;
575  Replace(create_general_only);
576  descr.assign("Replace sequence");
577  break;
579  Patch(create_general_only);
580  descr.assign("Patch sequence");
581  break;
583  ExtendOneEndOfSequence(create_general_only);
584  descr.assign("Extend 5' end");
585  break;
587  ExtendOneEndOfSequence(create_general_only);
588  descr.assign("Extend 3' end");
589  break;
590  default:
591  break;
592  }
593 
594  if (m_Params.m_AddCitSub) {
596  }
597 
598  if (m_Align &&
601  m_Reversed = !SameOrientation(m_Align->GetSeqStrand(0), m_Align->GetSeqStrand(1));
602  }
603 
604  CRef<CCmdComposite> upd_cmd(new CCmdComposite(descr));
605  upd_cmd->AddCommand(*x_SwapOldWithNewSeq());
606 
607  return upd_cmd;
608 }
609 
610 //static
612 {
613  CSeq_align::TDim dim = align.CheckNumRows();
614  for (CSeq_align::TDim row = 0; row < dim; ++row) {
615  const CSeq_id& row_id = align.GetSeq_id(row);
616  if (bsh.IsSynonym(row_id)) {
617  return row;
618  }
619  }
620  return -1;
621 }
622 
624 {
626  TSeqPos newLength = 0;
627  if (m_NewEntry->IsSeq()) {
628  newLength = m_NewEntry->GetSeq().GetInst().GetLength();
629  }
630  else {
631  FOR_EACH_SEQENTRY_ON_SEQSET(it, m_NewEntry->GetSet()) {
632  if ((*it)->IsSeq()) {
633  newLength = (*it)->GetSeq().GetLength();
634  }
635  }
636  }
637  return newLength;
638 }
639 
640 void CSequenceUpdater::NoChange(bool create_general_only)
641 {
642  CRef<CSeq_inst> newInst(new CSeq_inst);
643  newInst->Assign(m_OldBsh.GetInst());
644  x_MakeNewEntry(*newInst); // it is not being added to any scope at this point
645 
646  // prepare old features
648 
650  // the id in the row corresponding to the OLD sequence should be replaced by newId
653  sel.SetLimitTSE(m_UpdBsh.GetTSE_Handle());
654  for (CFeat_CI feat_it(m_UpdBsh, sel); feat_it; ++feat_it) {
655  if (!x_ShouldImportFeature(*feat_it)) {
656  continue;
657  }
658  CRef<CSeq_feat> import_feat = x_MappedFeature_ThroughAlign(*feat_it, *align);
659  if (import_feat) {
660  m_ImportUpdFeats.push_back(import_feat);
661  }
662  }
664  }
665 
666  x_FixID_AttachFeatures(create_general_only);
667 }
668 
669 void CSequenceUpdater::Replace(bool create_general_only)
670 {
672  x_MakeNewEntry(*newInst);
673 
674  if (m_Align) {
678  sel.SetLimitTSE(m_OldBsh.GetTSE_Handle());
679  for (CFeat_CI feat_it(m_OldBsh, sel); feat_it; ++feat_it) {
680  if (x_ShouldRemoveFeature(*feat_it)) {
681  continue;
682  }
683  x_ChangeIDInFeature(*feat_it, *align, oldseqRow);
684  CRef<CSeq_feat> mapped_feat = x_MappedFeature_ThroughAlign(*feat_it, *align);
685  if (mapped_feat) {
686  m_MappedOldFeats.push_back(mapped_feat);
687  }
688  }
689  }
690  else {
692  }
693 
695  // keep features in their initial position, only change their IDs
697  sel.SetLimitTSE(m_UpdBsh.GetTSE_Handle());
698  for (CFeat_CI feat_it(m_UpdBsh, sel); feat_it; ++feat_it) {
699  // import each feature as we are replacing the old sequence
700  CRef<CSeq_feat> import_feat = x_MappedFeature_ChangeId(*feat_it);
701  if (import_feat) {
702  m_ImportUpdFeats.push_back(import_feat);
703  }
704  }
706  }
707 
708  x_FixID_AttachFeatures(create_general_only);
709 }
710 
711 void CSequenceUpdater::Patch(bool create_general_only)
712 {
713  _ASSERT(m_Align);
714  CRef<CSeq_inst> newInst = x_PatchSequence();
715  x_MakeNewEntry(*newInst);
716 
718 
719  x_FixID_AttachFeatures(create_general_only);
720 }
721 
722 void CSequenceUpdater::ExtendOneEndOfSequence(bool create_general_only)
723 {
724  CRef<CSeq_inst> newInst = x_ExtendOneEnd();
725  x_MakeNewEntry(*newInst);
726 
728  TSeqPos offset_old = 0, offset_import = 0;
730  offset_old = m_UpdBsh.GetBioseqLength();
731  }
733  offset_import = m_OldBsh.GetBioseqLength();
734  }
736  sel.SetLimitTSE(m_OldBsh.GetTSE_Handle());
737  for (CFeat_CI feat_it(m_OldBsh, sel); feat_it; ++feat_it) {
738  if (x_ShouldRemoveFeature(*feat_it)) {
739  continue;
740  }
741 
742  CRef<CSeq_feat> mapped_feat = s_OffsetFeature(feat_it->GetOriginalFeature(), offset_old, m_NewId);
743  if (mapped_feat) {
744  m_MappedOldFeats.push_back(mapped_feat);
745  }
746  }
747 
750  sel.SetLimitTSE(m_UpdBsh.GetTSE_Handle());
751  for (CFeat_CI feat_it(m_UpdBsh, sel); feat_it; ++feat_it) {
752  CRef<CSeq_feat> mapped_feat = s_OffsetFeature(feat_it->GetOriginalFeature(), offset_import, m_NewId);
753  if (mapped_feat) {
754  m_ImportUpdFeats.push_back(mapped_feat);
755  }
756  }
758  }
759  }
760  else { // do not ignore alignment
762  }
763 
764  x_FixID_AttachFeatures(create_general_only);
765 }
766 
767 void CSequenceUpdater::x_FixID_AttachFeatures(bool create_general_only)
768 {
769  // add original Ids and remove local ID
771  // attach features to new sequence if there are any
772  x_AttachFeaturesToNewSeq(create_general_only);
773 }
774 
776 {
777 
778  CRef<CSeq_inst> new_inst(new CSeq_inst());
779  new_inst->Assign(m_OldBsh.GetInst());
780 
781  if (m_OldBsh.GetInst_Repr() == CSeq_inst::eRepr_raw) {
782  // do nothing
783  } else if (m_OldBsh.GetInst_Repr() == CSeq_inst::eRepr_delta
784  && CUpdateSeq_Input::s_IsDeltaWithNoGaps(m_OldBsh.GetCompleteBioseq().GetObject())) {
785  new_inst->SetRepr(CSeq_inst::eRepr_raw);
786  new_inst->ResetExt();
787  }
788  else {
789  NCBI_THROW(CSeqUpdateException, eInternal, "Could not replace the old sequence: old sequence is neither raw nor it can be converted to raw.");
790  }
791 
792 
793  if (m_UpdBsh.GetInst_Repr() == CSeq_inst::eRepr_raw) {
794  CRef<CSeq_data> new_data(new CSeq_data);
795  new_data->Assign(m_UpdBsh.GetInst_Seq_data());
796  new_inst->SetSeq_data(*new_data);
797  new_inst->SetLength(m_UpdBsh.GetInst_Length());
798  } else if (m_UpdBsh.GetInst_Repr() == CSeq_inst::eRepr_delta
799  && CUpdateSeq_Input::s_IsDeltaWithNoGaps(m_UpdBsh.GetCompleteBioseq().GetObject())) {
800 
801  string seqdata;
802  CSeqVector seq_vec = m_UpdBsh.GetSeqVector();
804  seq_vec.GetSeqData(0, m_UpdBsh.GetInst_Length(), seqdata);
805  new_inst->SetSeq_data().SetIupacna().Set(seqdata);
806  new_inst->SetLength(TSeqPos(seqdata.length()));
807  } else {
808  NCBI_THROW(CSeqUpdateException, eInternal, "Could not replace the old sequence: update sequence is neither raw nor it can be converted to raw.");
809  }
810 
811  if (m_OldBsh.IsNucleotide()) {
812  CSeqportUtil::Pack(&new_inst->SetSeq_data());
813  }
814 
815  return new_inst;
816 }
817 
819 {
822  _ASSERT(oldseqRow != updseqRow);
823  _ASSERT(oldseqRow > -1);
824  _ASSERT(updseqRow > -1);
825 
826  // the start of aligned region in the old sequence coordinates
827  TSeqPos aln_start = m_Align->GetSeqStart(oldseqRow);
828  // the end of aligned region in the old sequence coordinates
829  TSeqPos aln_stop = m_Align->GetSeqStop(oldseqRow);
830 
831  // the start of aligned region in the update sequence coordinates
832  TSeqPos aln_start_upd = m_Align->GetSeqStart(updseqRow);
833  // the end of aligned region in the update sequence coordinates
834  TSeqPos aln_stop_upd = m_Align->GetSeqStop(updseqRow);
835 
836  const TSeqPos old_length = m_OldBsh.GetBioseqLength();
837  const TSeqPos upd_length = m_UpdBsh.GetBioseqLength();
838 
839  // Take the whole patch even if the edges are not aligned
840  if (aln_start < aln_start_upd)
841  aln_start = 0;
842  else
843  aln_start -= aln_start_upd;
844  aln_start_upd = 0;
845 
846  TSeqPos right_edge = upd_length - 1 - aln_stop_upd;
847  aln_stop += right_edge;
848  if (aln_stop > old_length - 1)
849  aln_stop = old_length - 1;
850 
853  string patched;
854 
855  // take old 5' //
856  if (aln_start > 0){
857  string old_5prime;
858  old_seq.GetSeqData(0, aln_start, old_5prime); //[start, stop)
859  patched.append(old_5prime);
860  }
861 
862  // take aligned middle//
863  if (aln_start_upd < aln_stop_upd) {
864  string middle;
865  upd_seq.GetSeqData(aln_start_upd, aln_stop_upd + 1, middle);
866  patched.append(middle);
867  }
868 
869  // take old 3' (if any) //
870  if (aln_stop + 1 < old_length) {
871  string old_3prime;
872  old_seq.GetSeqData(aln_stop + 1, old_length, old_3prime);
873  patched.append(old_3prime);
874  }
875 
876  return x_UpdateSeqInst(patched);
877 }
878 
880 {
883  string seq;
884  upd_seq.GetSeqData(0, m_UpdBsh.GetBioseqLength(), seq);
886  }
887  else {
888  _ASSERT(m_Align);
891  _ASSERT(oldseqRow != updseqRow);
892  _ASSERT(oldseqRow > -1 && updseqRow > -1);
893 
895  string new_seq;
897  // the end of aligned region in the old sequence coordinates
898  const TSeqPos aln_stop = m_Align->GetSeqStop(oldseqRow);
899  // the end of aligned region in the update sequence coordinates
900  const TSeqPos aln_stop_upd = m_Align->GetSeqStop(updseqRow);
901 
902  // take new 5' and aligned middle //
903  string new_5prime;
904  upd_seq.GetSeqData(0, aln_stop_upd + 1, new_5prime);
905  new_seq.append(new_5prime);
906 
907  // take old 3' end //
908  string old_3prime;
909  old_seq.GetSeqData(aln_stop + 1, m_OldBsh.GetBioseqLength(), old_3prime);
910  new_seq.append(old_3prime);
911  }
913  // the start of aligned region in the old sequence coordinates
914  const TSeqPos aln_start = m_Align->GetSeqStart(oldseqRow);
915  // the start of aligned region in the update sequence coordinates
916  const TSeqPos aln_start_upd = m_Align->GetSeqStart(updseqRow);
917 
918  // take old 5' //
919  string old_5prime;
920  old_seq.GetSeqData(0, aln_start, old_5prime);
921  new_seq.append(old_5prime);
922 
923  // take aligned middle and new 3' end //
924  string new_3prime;
925  upd_seq.GetSeqData(aln_start_upd, m_UpdBsh.GetBioseqLength(), new_3prime);
926  new_seq.append(new_3prime);
927  }
928  return x_UpdateSeqInst(new_seq);
929  }
930 
931  return CRef<CSeq_inst>();
932 }
933 
934 string CSequenceUpdater::s_GetValidExtension(const string& extension)
935 {
936  if (extension.empty()) {
937  return kEmptyStr;
938  }
939 
940  const string allowed("ATUCGRYSWKMBDHVN"); // for nucleotides
941  string small_allowed(allowed);
942  NStr::ToLower(small_allowed);
943 
944  string valid_extension(kEmptyStr);
945  valid_extension.reserve(extension.size());
946  copy_if(extension.begin(), extension.end(),
947  back_inserter(valid_extension),
948  [&allowed, &small_allowed](const char& c) { return (allowed.find(c) != NPOS) || (small_allowed.find(c) != NPOS); });
949 
950  return valid_extension;
951 }
952 
954  const string& extension, SUpdateSeqParams::ESequenceUpdateType update_type)
955 {
956  if (extension.empty()
957  || bsh.IsAa()
958  || (update_type != SUpdateSeqParams::eSeqUpdateExtend5 && update_type != SUpdateSeqParams::eSeqUpdateExtend3)) {
959  return CRef<CSeq_inst>();
960  }
961 
962  const string valid_extension = s_GetValidExtension(extension);
963  if (valid_extension.empty()) {
964  return CRef<CSeq_inst>();
965  }
966 
967  string orig_seq;
969  seqvec.GetSeqData(0, bsh.GetBioseqLength(), orig_seq);
970 
971  string new_seq;
972  if (update_type == SUpdateSeqParams::eSeqUpdateExtend5) {
973  new_seq.assign(valid_extension);
974  new_seq.append(orig_seq);
975  } else if (update_type == SUpdateSeqParams::eSeqUpdateExtend3) {
976  new_seq.assign(orig_seq);
977  new_seq.append(valid_extension);
978  }
979 
980  return s_UpdateSeqInst(bsh, new_seq);
981 }
982 
984 {
985  return s_UpdateSeqInst(m_OldBsh, upd_str);
986 }
987 
989 {
990  CRef<CSeq_inst> new_inst(new CSeq_inst());
991  new_inst->Assign(bsh.GetInst());
994  // convert it into raw
995  new_inst->SetRepr(CSeq_inst::eRepr_raw);
996  new_inst->ResetExt();
997  }
998  bool updated(false);
999  if (!NStr::IsBlank(upd_str)) {
1000  new_inst->SetLength(static_cast<int>(upd_str.size()));
1001  if (bsh.IsNa()) {
1002  new_inst->SetSeq_data().SetIupacna(*new CIUPACna(upd_str));
1003  CSeqportUtil::Pack(&new_inst->SetSeq_data());
1004  } else {
1005  new_inst->SetSeq_data().SetIupacaa(*new CIUPACaa(upd_str));
1006  }
1007  updated = true;
1008  }
1009 
1010  return (updated) ? new_inst : CRef<CSeq_inst>();
1011 }
1012 
1013 
1015 {
1016  CRef<CSeq_loc> new_loc = s_OffsetLocation(feat.GetLocation(), offset, newId);
1017  CRef<CSeq_feat> shifted_feat(new CSeq_feat());
1018  shifted_feat->Assign(feat);
1019  shifted_feat->SetLocation(*new_loc);
1020 
1021  if (feat.GetData().IsCdregion()) {
1022  s_Shift_CDSCodeBreaks(shifted_feat, offset, newId);
1023  }
1025  s_Shift_tRNAAntiCodon(shifted_feat, offset, newId);
1026  }
1027  return shifted_feat;
1028 }
1029 
1031 {
1032  CCdregion& cds = feat->SetData().SetCdregion();
1033  if (cds.IsSetCode_break()) {
1035  if ((*it)->IsSetLoc()) {
1036  const CSeq_loc& codebreak = (*it)->GetLoc();
1037  CRef<CSeq_loc> new_codebreak = s_OffsetLocation(codebreak, offset, targetId);
1038  (*it)->SetLoc(*new_codebreak);
1039  }
1040  }
1041  }
1042 }
1043 
1045 {
1046  if (!feat->GetData().GetRna().IsSetExt()) {
1047  return;
1048  }
1049  CRNA_ref::C_Ext& ext = feat->SetData().SetRna().SetExt();
1050  if (ext.IsTRNA() && ext.GetTRNA().IsSetAnticodon()) {
1051  const CSeq_loc& anticodon = ext.GetTRNA().GetAnticodon();
1052  CRef<CSeq_loc> new_anticodon = s_OffsetLocation(anticodon, offset, targetId);
1053  ext.SetTRNA().SetAnticodon(*new_anticodon);
1054  }
1055 }
1056 
1058 {
1059  CRef<CSeq_loc> shiftedLoc(new CSeq_loc);
1060  shiftedLoc->Assign(sourceLoc);
1061 
1062  CSeq_loc_I loc_it(*shiftedLoc);
1063  for (; loc_it; ++loc_it) {
1064  if (targetId) {
1065  loc_it.SetSeq_id(*targetId);
1066  }
1067  loc_it.SetFrom(loc_it.GetRange().GetFrom() + offset);
1068  loc_it.SetTo(loc_it.GetRange().GetTo() + offset);
1069  }
1070 
1071  shiftedLoc->Assign(*loc_it.MakeSeq_loc());
1072  return shiftedLoc;
1073 }
1074 
1075 
1077 {
1079  sel.SetLimitTSE(m_OldBsh.GetTSE_Handle());
1080  for (CFeat_CI feat_it(m_OldBsh, sel); feat_it; ++feat_it) {
1081  if (x_ShouldRemoveFeature(*feat_it)) {
1082  continue;
1083  }
1084 
1085  CRef<CSeq_feat> mapped_feat = x_MappedFeature_ChangeId(*feat_it);
1086  // if features originally spanned the whole sequence, they should also span the whole sequence
1087  if (feat_it->GetLocation().IsInt()) {
1088  const TSeqPos start = feat_it->GetLocation().GetStart(eExtreme_Positional);
1089  const TSeqPos stop = feat_it->GetLocation().GetStop(eExtreme_Positional);
1090  if (start == 0 && stop == m_OldBsh.GetBioseqLength() - 1) {
1091  mapped_feat->SetLocation().SetInt().SetTo(x_GetNewSeqLength() - 1);
1092  }
1093  }
1094  if (mapped_feat) {
1095  m_MappedOldFeats.push_back(mapped_feat);
1096  }
1097 
1098  }
1099 }
1100 
1102 {
1103  auto align_OldNew = x_FormNewAlignment(m_OldBsh);
1104  CSeq_align::TDim oldseqRow = FindRowInAlignment(m_OldBsh, align_OldNew[0].GetObject());
1106  sel.SetLimitTSE(m_OldBsh.GetTSE_Handle());
1107  for (CFeat_CI feat_it(m_OldBsh, sel); feat_it; ++feat_it) {
1108  if (x_ShouldRemoveFeature(*feat_it)) {
1109  continue;
1110  }
1111  x_ChangeIDInFeature(*feat_it, align_OldNew[0].GetObject(), oldseqRow);
1112  CRef<CSeq_feat> mapped_feat = x_MappedFeature_ThroughAlign(*feat_it, align_OldNew[0].GetObject());
1113  if (mapped_feat) {
1114  m_MappedOldFeats.push_back(mapped_feat);
1115  }
1116  }
1117 
1118  // form alignment A(upd|new)
1119  if (m_Params.m_ImportFeatures) {
1120  auto align_UpdNew = x_FormNewAlignment(m_UpdBsh);
1122  sel.SetLimitTSE(m_UpdBsh.GetTSE_Handle());
1123  for (CFeat_CI feat_it(m_UpdBsh, sel); feat_it; ++feat_it) {
1124  CRef<CSeq_feat> import_feat = x_MappedFeature_ThroughAlign(*feat_it, align_UpdNew[0].GetObject());
1125  if (import_feat) {
1126  m_ImportUpdFeats.push_back(import_feat);
1127  }
1128  }
1130  }
1131 }
1132 
1134 {
1135  // make a new entry (seq or set) based on the old bioseq
1138  m_NewEntry->Assign(*(oldSeh.GetCompleteSeq_entry()));
1139 
1140  x_PrepareNewEntry(m_NewEntry, newInst);
1141 }
1142 
1144 {
1145  // for nucleotide sequence, remove original seq-inst and all annotations
1146  if (entry->IsSeq()) {
1147  entry->SetSeq().ResetInst();
1148  entry->SetSeq().SetInst().Assign(newInst);
1149  entry->SetSeq().ResetAnnot();
1150  // remove all Ids
1151  // add a new, temporary ID
1152  entry->SetSeq().ResetId();
1153  CRef<CSeq_id> newid(new CSeq_id);
1154  newid->Assign(*m_NewId);
1155  entry->SetSeq().SetId().push_back(newid);
1156  }
1157  else if (m_NewEntry->IsSet()) {
1158  // remove annotations from the set level
1159  entry->SetSet().ResetAnnot();
1160  CBioseq_set::TSeq_set& seqset = entry->SetSet().SetSeq_set();
1161  CBioseq_set::TSeq_set::iterator it = seqset.begin();
1162  while (it != seqset.end()) {
1163  // remove proteins from the set
1164  if ((*it)->IsSeq() && (*it)->GetSeq().IsAa()) {
1165  it = seqset.erase(it);
1166  }
1167  else {
1168  x_PrepareNewEntry(*it, newInst);
1169  ++it;
1170  }
1171  }
1172  }
1173 }
1174 
1176 {
1177  _ASSERT(m_OldBsh);
1178  CSeq_entry_Handle oldSeh;
1179 
1180  if (m_OldBsh.IsProtein())
1181  return m_OldBsh.GetSeq_entry_Handle();
1182 
1183  CBioseq_set_Handle oldSetSeh = m_OldBsh.GetParentBioseq_set();
1184  if (oldSetSeh &&
1185  oldSetSeh.IsSetClass() &&
1186  oldSetSeh.GetClass() == CBioseq_set::eClass_nuc_prot) {
1187  oldSeh = oldSetSeh.GetParentEntry();
1188  }
1189  else {
1190  oldSeh = m_OldBsh.GetSeq_entry_Handle();
1191  }
1192 
1193  return oldSeh;
1194 }
1195 
1197 {
1199  return true;
1200  }
1201 
1203  _ASSERT(updseqRow > -1);
1204  CRef<CSeq_loc> align_loc = m_Align->CreateRowSeq_loc(updseqRow);
1206 
1207  sequence::ECompare comp = sequence::Compare(feat->GetLocation(), *align_loc,
1209 
1210  if (comp == sequence::eNoOverlap || comp == sequence::eAbutting) {
1211  string subtype = CSeqFeatData::SubtypeValueToName(feat->GetData().GetSubtype());
1212  string label;
1213  feat->GetLocation().GetLabel(&label);
1214  subtype.append(" " + label);
1215  m_NotImportedFeats.push_back(subtype);
1216  return false;
1217  }
1218  return true;
1219 }
1220 
1222 {
1224  return true;
1226  return false;
1227  }
1228 
1229  _ASSERT(m_Align);
1231  _ASSERT(oldseqRow > -1);
1232  const TSeqPos aln_start = m_Align->GetSeqStart(oldseqRow);
1233  const TSeqPos aln_stop = m_Align->GetSeqStop(oldseqRow);
1234 
1235  const TSeqPos start = fh.GetLocation().GetStart(eExtreme_Positional);
1236  const TSeqPos stop = fh.GetLocation().GetStop(eExtreme_Positional);
1237 
1238  switch (m_Params.m_FeatRemoveOption) {
1240  // remove if feature is inside the alignment
1241  return (stop > aln_start && start <= aln_stop);
1243  // remove if feature is outside the alignment
1244  return (stop <= aln_start || start > aln_stop);
1245  default:
1246  break;
1247  }
1248  return false;
1249 }
1250 
1252 {
1253  CRef<CSeq_loc> mappedLoc(new CSeq_loc);
1254  mappedLoc->Assign(orig_fh.GetLocation());
1255  mappedLoc->SetId(*m_NewId);
1256 
1257  CRef<CSeq_feat> mappedFeature(new CSeq_feat);
1258  mappedFeature->Assign(*(orig_fh.GetSeq_feat()));
1259  mappedFeature->ResetLocation();
1260  mappedFeature->ResetPartial();
1261 
1262  mappedFeature->SetLocation().Assign(*mappedLoc);
1263  if (mappedLoc->IsPartialStart(eExtreme_Biological) || mappedLoc->IsPartialStop(eExtreme_Biological)) {
1264  mappedFeature->SetPartial(true);
1265  }
1266 
1267  if (mappedFeature->GetData().GetSubtype() == CSeqFeatData::eSubtype_tRNA) {
1268  CRNA_ref& rna = mappedFeature->SetData().SetRna();
1269  if (rna.IsSetExt() &&
1270  rna.GetExt().IsTRNA() &&
1271  rna.GetExt().GetTRNA().IsSetAnticodon()) {
1272 
1273  CRef<CSeq_loc> new_anticodon(new CSeq_loc);
1274  new_anticodon->Assign(rna.GetExt().GetTRNA().GetAnticodon());
1275  new_anticodon->SetId(*m_NewId);
1276  rna.SetExt().SetTRNA().SetAnticodon(*new_anticodon);
1277  }
1278  }
1279  else if (mappedFeature->GetData().IsCdregion()) {
1280  CCdregion& cds = mappedFeature->SetData().SetCdregion();
1281  if (cds.IsSetCode_break()) {
1283  if ((*it)->IsSetLoc()) {
1284  CRef<CSeq_loc> new_codebreak(new CSeq_loc);
1285  new_codebreak->Assign((*it)->GetLoc());
1286  new_codebreak->SetId(*m_NewId);
1287  (*it)->SetLoc(*new_codebreak);
1288  }
1289  }
1290  }
1291  }
1292 
1293  return mappedFeature;
1294 }
1295 
1297 {
1298 
1299  CScope& scope = orig_fh.GetScope();
1300  bool is_circular = m_OldBsh.IsSetInst_Topology() && m_OldBsh.GetInst_Topology() == CSeq_inst::eTopology_circular;
1301  CRef<CSeq_loc> mappedLoc = s_MapLocation(orig_fh.GetLocation(), *m_NewId, align, is_circular, scope, x_GetNewSeqLength());
1302  // NULL location are transformed into an [0, 0] interval (done, internally)
1303 
1304  if (!mappedLoc) {
1305  return CRef<CSeq_feat>();
1306  }
1307 
1308  CRef<CSeq_feat> mappedFeature(new CSeq_feat);
1309  mappedFeature->Assign(*(orig_fh.GetSeq_feat()));
1310  mappedFeature->ResetLocation();
1311  mappedFeature->ResetPartial();
1312 
1313  mappedFeature->SetLocation().Assign(*mappedLoc);
1314  if (mappedLoc->IsPartialStart(eExtreme_Biological) || mappedLoc->IsPartialStop(eExtreme_Biological)) {
1315  mappedFeature->SetPartial(true);
1316  }
1317 
1318  if (mappedFeature->GetData().GetSubtype() == CSeqFeatData::eSubtype_tRNA) {
1319  CRNA_ref& rna = mappedFeature->SetData().SetRna();
1320  if (rna.IsSetExt() &&
1321  rna.GetExt().IsTRNA() &&
1322  rna.GetExt().GetTRNA().IsSetAnticodon()) {
1323 
1324  const CSeq_loc& anticodon = rna.GetExt().GetTRNA().GetAnticodon();
1325  CRef<CSeq_loc> new_anticodon = s_MapLocation(anticodon, *m_NewId, align, is_circular, scope, x_GetNewSeqLength());
1326  rna.SetExt().SetTRNA().SetAnticodon(*new_anticodon);
1327  }
1328  }
1329  else if (mappedFeature->GetData().IsCdregion()) {
1330  CCdregion& cds = mappedFeature->SetData().SetCdregion();
1331  if (cds.IsSetCode_break()) {
1333  if ((*it)->IsSetLoc()) {
1334  const CSeq_loc& codebreak = (*it)->GetLoc();
1335  CRef<CSeq_loc> new_codebreak = s_MapLocation(codebreak, *m_NewId, align, is_circular, scope, x_GetNewSeqLength());
1336  (*it)->SetLoc(*new_codebreak);
1337  }
1338  }
1339  }
1340  }
1341 
1342  return mappedFeature;
1343 }
1344 
1346 {
1347  if (!fh || row < 0) {
1348  return;
1349  }
1350 
1351  CSeq_id::E_Choice loc_id_type = fh.GetLocationId().Which();
1352  CSeq_id::E_Choice align_id_type = align.GetSeq_id(row).Which();
1353 
1354  if (loc_id_type != align_id_type) {
1355  // replace the location id in the feature, it should match the alignment Id
1357  copy->Assign(*fh.GetSeq_feat());
1358 
1359  const CSeq_id& alignId = align.GetSeq_id(row);
1360  copy->SetLocation().SetId(alignId);
1361  if (copy->GetData().GetSubtype() == CSeqFeatData::eSubtype_tRNA) {
1362  CRNA_ref& rna = copy->SetData().SetRna();
1363  if (rna.IsSetExt() &&
1364  rna.GetExt().IsTRNA() &&
1365  rna.GetExt().GetTRNA().IsSetAnticodon()) {
1366  rna.SetExt().SetTRNA().SetAnticodon().SetId(alignId);
1367  }
1368  }
1369  else if (copy->GetData().IsCdregion()) {
1370  CCdregion& cds = copy->SetData().SetCdregion();
1371  if (cds.IsSetCode_break()) {
1373  if ((*it)->IsSetLoc()) {
1374  (*it)->SetLoc().SetId(alignId);
1375  }
1376  }
1377  }
1378  }
1379 
1381  }
1382 }
1383 
1384 vector<CConstRef<objects::CSeq_align> > CSequenceUpdater::x_FormNewAlignment(const CBioseq_Handle& subject)
1385 {
1387 
1388  // add New Sequence to the scope of m_OldBsh and m_UpdBsh
1389  CSeq_entry_Handle newSeh = m_OldBsh.GetScope().AddTopLevelSeqEntry(*m_NewEntry);
1390  CBioseq_Handle newBsh = newSeh.GetBioseqHandle(*m_NewId);
1391 
1392  try {
1393  auto align = sequpd::RunBlast2Seq(subject, newBsh, true);
1394 
1395  // remove new sequence from the original scope
1396  newSeh.GetTopLevelEntry().GetEditHandle().Remove();
1397 
1398  _ASSERT(align.size() == 1);
1399  return align;
1400  }
1401  catch (const CException& e) {
1402  ERR_POST(e.what());
1403  NCBI_THROW(CSeqUpdateException, eFeatAdjust,
1404  "Could not form alignment between the subject sequence (old or update) and newly formed sequence");
1405  }
1406 }
1407 
1408 
1410 {
1412  _ASSERT(row > -1);
1413 
1414  // make a new seq-align, where the id corresponding to bsh is changed to m_NewId
1415  CRef<CSeq_align> align(new CSeq_align);
1416  align->Assign(*m_Align);
1417 
1418  CDense_seg& denseg = align->SetSegs().SetDenseg();
1419  vector<CRef<CSeq_id> >& ids = denseg.SetIds();
1420 
1421  ids[row].Reset(new CSeq_id);
1422  ids[row]->Assign(*m_NewId);
1423 
1424  return ConstRef(align.GetPointer());
1425 }
1426 
1428 {
1429  if (m_MappedOldFeats.empty() || m_ImportUpdFeats.empty() ||
1431  return;
1432  }
1433 
1434  TFeatList::iterator old_it = m_MappedOldFeats.begin();
1435 
1436  while (old_it != m_MappedOldFeats.end()) {
1437  TFeatList::iterator imp_it = m_ImportUpdFeats.begin();
1438  bool deleteOld(false);
1439  while (imp_it != m_ImportUpdFeats.end() && !deleteOld) {
1440  if (AreFeaturesDuplicates(**old_it, **imp_it)) {
1442  imp_it = m_ImportUpdFeats.erase(imp_it);
1443  continue;
1444  }
1445  else if (m_Params.m_FeatImportOption == SUpdateSeqParams::eFeatUpdateAllReplaceDups) { // keep the newly imported feature
1446  deleteOld = true;
1447  }
1448  else if (m_Params.m_FeatImportOption == SUpdateSeqParams::eFeatUpdateAllMergeDups) { // merge the two features
1449  CRef<CSeq_feat> fusedFeat = x_FuseFeatures(**old_it, **imp_it);
1450  deleteOld = true;
1451  (*imp_it).Swap(fusedFeat);
1452  }
1453  } else {
1454  ++imp_it;
1455  }
1456  }
1457 
1458  if (deleteOld) {
1459  old_it = m_MappedOldFeats.erase(old_it);
1460  }
1461  else {
1462  ++old_it;
1463  }
1464  }
1465 }
1466 
1467 
1469 {
1470  CRef<CSeq_feat> fusedFeat(new CSeq_feat);
1471  fusedFeat->Assign(feat_upd);
1472 
1473  // merge common fields
1474  FuseCommonFeatureFields(*fusedFeat, feat_old);
1475 
1476  // type-specific data
1477  switch (fusedFeat->GetData().Which()) {
1478  case CSeqFeatData::e_Gene: {
1479  CGene_ref& fusedGene = fusedFeat->SetData().SetGene();
1480  const CGene_ref& gene = feat_old.GetData().GetGene();
1481  FuseGenes(fusedGene, gene);
1482  break;
1483  }
1484  case CSeqFeatData::e_Cdregion: {
1485  x_FuseProtFeatsForCDS(*fusedFeat, feat_old);
1486  break;
1487  }
1488  case CSeqFeatData::e_Prot: {
1489  FuseProteins(*fusedFeat, feat_old);
1490  break;
1491  }
1492  case CSeqFeatData::e_Rna: {
1493  CRNA_ref& fusedRna = fusedFeat->SetData().SetRna();
1494  const CRNA_ref& rna = feat_old.GetData().GetRna();
1495  if (fusedRna.IsSetExt() && fusedRna.GetExt().IsName() &&
1496  rna.IsSetExt() && rna.GetExt().IsName()) {
1497  string orig_value = fusedRna.GetExt().GetName();
1498  string value = rna.GetExt().GetName();
1499  if (!NStr::EqualNocase(orig_value, value) &&
1501  fusedRna.SetExt().SetName(orig_value);
1502  }
1503  }
1504  break;
1505  }
1506  case CSeqFeatData::e_Region: {
1507  string origRegion = fusedFeat->GetData().GetRegion();
1508  string region = feat_old.GetData().GetRegion();
1509  if (!NStr::EqualNocase(origRegion, region) &&
1511  fusedFeat->SetData().SetRegion(origRegion);
1512  }
1513  break;
1514  }
1515  default:
1516  break;
1517  }
1518  return fusedFeat;
1519 }
1520 
1521 void CSequenceUpdater::x_FuseProtFeatsForCDS(const CSeq_feat& fusedFeat, const CSeq_feat& feat_old)
1522 {
1523  if (!fusedFeat.GetData().IsCdregion() || !feat_old.GetData().IsCdregion()) {
1524  return;
1525  }
1526  if (!fusedFeat.IsSetProduct() || !feat_old.IsSetProduct()) {
1527  return;
1528  }
1529 
1530  CScope& scope = m_OldBsh.GetScope();
1531  CBioseq_Handle fused_protsh = scope.GetBioseqHandle(fusedFeat.GetProduct());
1532  CBioseq_Handle old_protsh = scope.GetBioseqHandle(feat_old.GetProduct());
1533 
1534  TFeatList old_protfeats, upd_protfeats;
1535  for (CFeat_CI prot_it(old_protsh); prot_it; ++prot_it) {
1536  CRef<CSeq_feat> new_feat(new CSeq_feat);
1537  new_feat->Assign(*prot_it->GetSeq_feat());
1538  old_protfeats.push_back(new_feat);
1539  }
1540 
1541  for (CFeat_CI prot_it(fused_protsh); prot_it; ++prot_it) {
1542  CRef<CSeq_feat> new_feat(new CSeq_feat);
1543  new_feat->Assign(*prot_it->GetSeq_feat());
1544  upd_protfeats.push_back(new_feat);
1545  }
1546 
1547  TFeatList::iterator old_it = old_protfeats.begin();
1548  bool found_duplicate = false;
1549  while (old_it != old_protfeats.end()) {
1550  TFeatList::iterator imp_it = upd_protfeats.begin();
1551  bool deleteOld(false);
1552  while (imp_it != upd_protfeats.end() && !deleteOld) {
1553  if (AreProteinFeaturesDuplicates(**old_it, **imp_it)) {
1554  CRef<CSeq_feat> fusedFeat = x_FuseFeatures(**old_it, **imp_it);
1555  deleteOld = true;
1556  (*imp_it).Swap(fusedFeat);
1557  found_duplicate = true;
1558  } else {
1559  ++imp_it;
1560  }
1561  }
1562 
1563  if (deleteOld) {
1564  old_it = old_protfeats.erase(old_it);
1565  }
1566  else {
1567  ++old_it;
1568  }
1569  }
1570 
1571  CSeq_annot_Handle ah = DefaultGetAppropriateFeatureTable(fused_protsh);
1572  CSeq_annot_EditHandle aeh(ah);
1573 
1574  if (!found_duplicate) {
1575  // attach the old protein features to the fused protein sequence
1576  ITERATE(TFeatList, feat_it, old_protfeats) {
1577  aeh.AddFeat(**feat_it);
1578  }
1579  }
1580  else {
1581  // first remove all features
1582  aeh.Remove();
1583 
1584  // attach the new features
1585  CRef<CSeq_annot> newAnnot(new CSeq_annot);
1586  NON_CONST_ITERATE(TFeatList, feat_it, old_protfeats) {
1587  CRef<CSeq_feat> feat = *feat_it;
1588  // should have the same ID ?
1589  newAnnot->SetData().SetFtable().push_back(feat);
1590  }
1591 
1592  NON_CONST_ITERATE(TFeatList, feat_it, upd_protfeats) {
1593  CRef<CSeq_feat> feat = *feat_it;
1594  newAnnot->SetData().SetFtable().push_back(feat);
1595  }
1596 
1597  CBioseq_EditHandle esh(fused_protsh);
1598  esh.AttachAnnot(*newAnnot);
1599  }
1600 }
1601 
1603  {
1604  CRef<CSeq_annot> seqAnnot(new CSeq_annot);
1605  bool has_non_cds = false;
1607  CRef<CSeq_feat> feat = *feat_it;
1608  if (feat->IsSetData() && feat->GetData().IsCdregion()) {
1609  has_cds = true;
1610  }
1611  else {
1612  seqAnnot->SetData().SetFtable().push_back(feat);
1613  has_non_cds = true;
1614  }
1615  }
1616 
1618  // feature has NewId that needs to be changed
1619  CRef<CSeq_feat> feat = *feat_it;
1620  if (feat->IsSetData() && feat->GetData().IsCdregion()) {
1621  has_cds = true;
1622  }
1623  else {
1624  seqAnnot->SetData().SetFtable().push_back(feat);
1625  has_non_cds = true;
1626  }
1627  }
1628 
1629  if (has_non_cds) {
1630  return seqAnnot;
1631  }
1632 
1633  return CRef<CSeq_annot>();
1634  }
1635 
1636 
1638 {
1640  x_RemoveNewIDInFeature(*feat_it);
1641  }
1643  x_RemoveNewIDInFeature(*feat_it);
1644  }
1645 
1646 }
1647 void CSequenceUpdater::x_AttachFeaturesToNewSeq(bool create_general_only)
1648 {
1650  if (m_MappedOldFeats.empty() && m_ImportUpdFeats.empty())
1651  return;
1652 
1653  CScope newScope(*CObjectManager::GetInstance());
1654  newScope.AddDefaults();
1655  CSeq_entry_Handle newSeh = newScope.AddTopLevelSeqEntry(*m_NewEntry);
1656 
1657  CBioseq_Handle newBsh;
1658  CBioseq_CI b_iter(newSeh, m_OldBsh.GetBioseqMolType());
1659  if (b_iter) {
1660  newBsh = *b_iter;
1661  }
1662 
1663  // features has NewId that needs to be changed
1665 
1666  bool has_cds = false;
1667  CRef<CSeq_annot> seqAnnot = x_GetSeqAnnot_WithoutCDS(has_cds);
1668  CBioseq_EditHandle ebsh = newBsh.GetEditHandle();
1669  if (seqAnnot) {
1670  ebsh.AttachAnnot(*seqAnnot);
1671  }
1672 
1673  if (!has_cds) {
1674  return;
1675  }
1676 
1677  // attach coding regions to the set
1678  CSeq_entry_Handle parentSeh = newBsh.GetSeq_entry_Handle();
1679  CSeq_entry_EditHandle eh = parentSeh.GetEditHandle();
1680  if (!eh.IsSet()) {
1681  CBioseq_set_Handle nucParent = eh.GetParentBioseq_set();
1682  if (nucParent && nucParent.IsSetClass() &&
1683  nucParent.GetClass() == CBioseq_set::eClass_nuc_prot) {
1684  eh = nucParent.GetParentEntry().GetEditHandle();
1685  }
1686  }
1687 
1688  if (!eh.IsSet()) {
1689  eh.ConvertSeqToSet();
1690  eh.SetSet().SetClass(CBioseq_set::eClass_nuc_prot);
1691 
1692  // move all source and pub descriptors from Bioseq to Nuc-prot set
1693  CBioseq_CI bi(eh, m_OldBsh.GetBioseqMolType());
1694  CBioseq_EditHandle nuc_h = bi->GetEditHandle();
1695  CSeq_descr::Tdata& seq_descr = nuc_h.SetDescr();
1696  CSeq_descr::Tdata::iterator desc_it = seq_descr.begin();
1697  while (desc_it != seq_descr.end()) {
1698  if ((*desc_it)->IsSource() || (*desc_it)->IsPub()) {
1699  CRef<objects::CSeqdesc> cpy(new objects::CSeqdesc());
1700  cpy->Assign(**desc_it);
1701  eh.AddSeqdesc(*cpy);
1702  desc_it = seq_descr.erase(desc_it);
1703  }
1704  else {
1705  ++desc_it;
1706  }
1707  }
1708  }
1709 
1710  CRef<CSeq_annot> setAnnot(new CSeq_annot);
1711  if (!m_Params.m_UpdateProteins) {
1712  // proteins do not need to be retranslated
1714  CRef<CSeq_feat> feat = *feat_it;
1715  if (feat->IsSetData() && feat->GetData().IsCdregion()) {
1716  x_AdjustOldCDSProduct(feat, eh);
1717  setAnnot->SetData().SetFtable().push_back(feat);
1718  }
1719  }
1720 
1722  CRef<CSeq_feat> feat = *feat_it;
1723  if (feat->IsSetData() && feat->GetData().IsCdregion()) {
1724  x_ImportCDSProduct(feat, eh, newBsh, create_general_only);
1725  setAnnot->SetData().SetFtable().push_back(feat);
1726  }
1727  }
1728  }
1729  else {
1730  // retranslate coding regions and update protein sequences
1732  CRef<CSeq_feat> feat = *feat_it;
1733  if (feat->IsSetData() && feat->GetData().IsCdregion()) {
1734  x_RetranslateOldCDSProduct(feat, eh, newBsh, create_general_only);
1735  setAnnot->SetData().SetFtable().push_back(feat);
1736  }
1737  }
1738 
1740  CRef<CSeq_feat> feat = *feat_it;
1741  if (feat->IsSetData() && feat->GetData().IsCdregion()) {
1742  x_RetranslateImportedCDSProduct(feat, eh, newBsh, create_general_only);
1743  setAnnot->SetData().SetFtable().push_back(feat);
1744  }
1745  }
1746  }
1747 
1748  eh.AttachAnnot(*setAnnot);
1749 
1750 
1751 }
1752 
1754 {
1755  if (!cds->IsSetProduct()) {
1756  return;
1757  }
1758 
1759  if (cds->IsSetExcept_text() && NStr::Find(cds->GetExcept_text(), "RNA editing") != string::npos) {
1760  return;
1761  }
1762 
1763  CScope& origScope = m_OldBsh.GetScope();
1764  CBioseq_Handle psh = origScope.GetBioseqHandle(cds->GetProduct());
1765  if (!psh) {
1766  return;
1767  }
1768 
1769  CSeq_entry_Handle prot_eh = psh.GetSeq_entry_Handle();
1770  CRef<CSeq_entry> new_protein(new CSeq_entry());
1771  new_protein->Assign(*(prot_eh.GetCompleteSeq_entry()));
1772  // copy of the original protein, keeping the original ids - always
1773 
1774  CRef<CSeq_entry> retranslated_protein = ncbi::CreateTranslatedProteinSequence(cds, newBsh, create_general_only);
1775  CSeq_inst& retranslated_inst = retranslated_protein->SetSeq().SetInst();
1776  CSeq_inst& new_inst = new_protein->SetSeq().SetInst();
1777 
1778  new_inst.SetLength(retranslated_inst.SetLength());
1779  new_inst.SetSeq_data(retranslated_inst.SetSeq_data());
1780  SetMolinfoForProtein(new_protein,
1783 
1784  x_AdjustProteinFeature(new_protein);
1785  eh.AttachEntry(*new_protein);
1786 }
1787 
1789 {
1790  if (!cds->IsSetProduct()) {
1791  return;
1792  }
1793 
1794  // don't change anything on the existing protein
1795  CScope& origScope = m_OldBsh.GetScope();
1796  CBioseq_Handle protein = origScope.GetBioseqHandle(cds->GetProduct());
1797  if (!protein) {
1798  return;
1799  }
1800 
1801  // attach the protein to the sequence:
1802  CSeq_entry_Handle proth = protein.GetSeq_entry_Handle();
1803  if (proth) {
1804  CConstRef<CSeq_entry> prot_entry = proth.GetCompleteSeq_entry();
1805  if (prot_entry) {
1806  CRef<CSeq_entry> new_protein(new CSeq_entry());
1807  new_protein->Assign(*prot_entry);
1808  eh.AttachEntry(*new_protein);
1809  }
1810  }
1811 }
1812 
1814  CBioseq_Handle& newBsh, bool create_general_only)
1815 {
1816  if (!cds->IsSetProduct()) {
1817  return;
1818  }
1819  if (cds->IsSetExcept_text() && NStr::Find(cds->GetExcept_text(), "RNA editing") != string::npos) {
1820  return;
1821  }
1822  CScope& origScope = m_OldBsh.GetScope();
1823  CBioseq_Handle psh = origScope.GetBioseqHandle(cds->GetProduct());
1824  if (!psh) {
1825  return;
1826  }
1827 
1828  CSeq_entry_Handle prot_eh = psh.GetSeq_entry_Handle();
1829  CRef<CSeq_entry> new_protein(new CSeq_entry());
1830  new_protein->Assign(*(prot_eh.GetCompleteSeq_entry()));
1831  // copy of the original protein, keeping the original ids
1832 
1833  CRef<CSeq_entry> retranslated_protein = ncbi::CreateTranslatedProteinSequence(cds, newBsh, create_general_only);
1834  CSeq_inst& retranslated_inst = retranslated_protein->SetSeq().SetInst();
1835  CSeq_inst& new_inst = new_protein->SetSeq().SetInst();
1836 
1837  new_inst.SetLength(retranslated_inst.SetLength());
1838  new_inst.SetSeq_data(retranslated_inst.SetSeq_data());
1839  SetMolinfoForProtein(new_protein,
1842 
1843  CSeq_entry_Handle parentSeh = newBsh.GetSeq_entry_Handle();
1844  if (!m_Params.m_KeepProteinId) {
1845  CSeq_entry_Handle seh = m_OldBsh.GetTopLevelEntry();
1846  int offset = 1;
1847  string id_label;
1848  CRef<CSeq_id> product_id = objects::edit::GetNewProtId(newBsh, offset, id_label, create_general_only);
1849  newBsh.GetScope().RemoveFromHistory(*product_id);
1850  x_AdjustProteinFeature(new_protein, product_id);
1851 
1852  // change product id in CDS
1853  cds->SetProduct().SetWhole().Assign(*product_id);
1854  }
1855  else {
1856  x_AdjustProteinFeature(new_protein);
1857  }
1858 
1860  eh.AttachEntry(*new_protein);
1861 
1862 }
1863 
1865  CBioseq_Handle& newBsh, bool create_general_only)
1866 {
1867  if (!cds->IsSetProduct()) {
1868  return;
1869  }
1870 
1871  CScope& origScope = m_OldBsh.GetScope();
1872  CBioseq_Handle protein = origScope.GetBioseqHandle(cds->GetProduct());
1873  if (!protein) {
1874  return;
1875  }
1876  CSeq_entry_Handle parentSeh = newBsh.GetSeq_entry_Handle();
1877  // attach the protein to the sequence:
1878  CSeq_entry_Handle proth = protein.GetSeq_entry_Handle();
1879  if (proth) {
1880  CConstRef<CSeq_entry> prot_entry = proth.GetCompleteSeq_entry();
1881  if (prot_entry) {
1882  CRef<CSeq_entry> new_protein(new CSeq_entry());
1883  new_protein->Assign(*prot_entry);
1884 
1885  if (!m_Params.m_KeepProteinId) {
1886  CSeq_entry_Handle seh = m_OldBsh.GetTopLevelEntry();
1887  int offset = 1;
1888  string id_label;
1889  CRef<CSeq_id> product_id = objects::edit::GetNewProtId(newBsh, offset, id_label, create_general_only);
1890  // reset scope history for unresolved seq_id
1891  newBsh.GetScope().RemoveFromHistory(*product_id);
1892  x_UpdateProteinID(new_protein, product_id);
1893 
1894  // change product id in CDS
1895  cds->SetProduct().SetWhole().Assign(*product_id);
1896  }
1897 
1898  // remove RefGeneTracking user-objects and create-date descriptors from imported proteins
1900  eh.AttachEntry(*new_protein);
1901  }
1902  }
1903 }
1904 
1905 
1907 {
1908  CRef<CSeq_id> bestId = FindBestChoice(m_OldBsh.GetCompleteBioseq()->GetId(), CSeq_id::BestRank);
1909  // if there is a local ID, change it to that one
1910  const CSeq_id* lcl_id = m_OldBsh.GetCompleteBioseq()->GetLocalId();
1911  if (lcl_id) {
1912  bestId.Reset(new CSeq_id);
1913  bestId->Assign(*lcl_id);
1914  }
1915 
1916  _ASSERT(bestId);
1917  feat->SetLocation().SetId(*bestId);
1918 
1919  if (feat->GetData().GetSubtype() == CSeqFeatData::eSubtype_tRNA) {
1920  CRNA_ref& rna = feat->SetData().SetRna();
1921  if (rna.IsSetExt() &&
1922  rna.GetExt().IsTRNA() &&
1923  rna.GetExt().GetTRNA().IsSetAnticodon()) {
1924  rna.SetExt().SetTRNA().SetAnticodon().SetId(*bestId);
1925  }
1926  }
1927  else if (feat->GetData().IsCdregion()) {
1928  CCdregion& cds = feat->SetData().SetCdregion();
1929  if (cds.IsSetCode_break()) {
1931  if ((*it)->IsSetLoc()) {
1932  (*it)->SetLoc().SetId(*bestId);
1933  }
1934  }
1935  }
1936  }
1937 }
1938 
1940 {
1941  EDIT_EACH_SEQDESC_ON_SEQDESCR(desc_it, protein->SetDescr()) {
1942  switch ((*desc_it)->Which()) {
1944  ERASE_SEQDESC_ON_SEQDESCR(desc_it, protein->SetDescr());
1945  break;
1946  case CSeqdesc::e_User:
1947  {
1948  CUser_object& user_obj = (*desc_it)->SetUser();
1949  if (!user_obj.IsSetClass()
1950  && user_obj.IsSetType() && user_obj.GetType().IsStr()
1951  && NStr::EqualCase(user_obj.GetType().GetStr(), "RefGeneTracking")) {
1952  ERASE_SEQDESC_ON_SEQDESCR(desc_it, protein->SetDescr());
1953  }
1954  break;
1955  }
1956  default:
1957  break;
1958  }
1959  }
1960 }
1961 
1962 
1964 {
1965 
1966  if (entry->IsSeq()) {
1967  // remove original local ID
1968  entry->SetSeq().ResetId();
1969  // copy ids from Old Sequence
1970  ITERATE(CBioseq::TId, it, m_OldBsh.GetCompleteBioseq()->GetId()) {
1971  CRef<CSeq_id> new_id(new CSeq_id);
1972  new_id->Assign((**it));
1973  entry->SetSeq().SetId().push_back(new_id);
1974  }
1975  }
1976  else if (m_NewEntry->IsSet()) {
1977  CBioseq_set::TSeq_set& seqset = entry->SetSet().SetSeq_set();
1978  CBioseq_set::TSeq_set::iterator it = seqset.begin();
1979  while (it != seqset.end()) {
1980  x_FixIDInNewEntry(*it);
1981  ++it;
1982  }
1983  }
1984 }
1985 
1987 {
1988  // updates the ID but also updates the location of protein features
1989  if (!protein || protein->IsSet() || protein->GetSeq().IsNa()) {
1990  return;
1991  }
1992 
1993  if (newId) {
1994  protein->SetSeq().ResetId();
1995  protein->SetSeq().SetId().push_back(newId);
1996  }
1997 
1998  const TSeqPos length = protein->GetSeq().GetLength();
1999  EDIT_EACH_SEQANNOT_ON_BIOSEQ(annot_it, protein->SetSeq()) {
2000  CSeq_annot& annot = **annot_it;
2001  if (!annot.IsFtable()) continue;
2002  EDIT_EACH_FEATURE_ON_ANNOT(feat_it, annot) {
2003  CSeq_feat& feat = **feat_it;
2004  if (newId) {
2005  feat.SetLocation().SetId(*newId);
2006  }
2007  if (feat.IsSetData() && feat.GetData().IsProt() && !feat.GetData().GetProt().IsSetProcessed()) {
2008  feat.SetLocation().SetInt().SetTo(length - 1);
2009  }
2010  }
2011  }
2012 }
2013 
2015 {
2016  if (!protein || protein->IsSet() || protein->GetSeq().IsNa() || !newId) {
2017  return;
2018  }
2019 
2020  protein->SetSeq().ResetId();
2021  protein->SetSeq().SetId().push_back(newId);
2022 
2023  EDIT_EACH_SEQANNOT_ON_BIOSEQ(annot_it, protein->SetSeq()) {
2024  CSeq_annot& annot = **annot_it;
2025  if (!annot.IsFtable()) continue;
2026  EDIT_EACH_FEATURE_ON_ANNOT(prot_feat, annot) {
2027  (*prot_feat)->SetLocation().SetId(*newId);
2028  }
2029  }
2030 }
2031 
2033 {
2034  CScope newScope(*CObjectManager::GetInstance());
2035  newScope.AddDefaults();
2036  CSeq_entry_Handle newSeh = newScope.AddTopLevelSeqEntry(*m_NewEntry);
2037 
2038  CBioseq_Handle newBsh;
2039  CBioseq_CI b_iter(newSeh, m_OldBsh.GetBioseqMolType());
2040  if (b_iter) {
2041  newBsh = *b_iter;
2042  }
2043 
2044  CSeq_entry_Handle seh = newBsh.GetSeq_entry_Handle();
2046  if (!eh.IsSet()) {
2047  CBioseq_set_Handle nucParent = eh.GetParentBioseq_set();
2048  if (nucParent && nucParent.IsSetClass() &&
2049  nucParent.GetClass() == CBioseq_set::eClass_nuc_prot) {
2050  eh = nucParent.GetParentEntry().GetEditHandle();
2051  }
2052  }
2053 
2054  CConstRef<CSeqdesc> changedSeqdesc;
2055  CSeq_entry_Handle seh_for_desc;
2056  CRef<CSeqdesc> changedORadded_citsub = CCitSubUpdater::s_GetCitSubForUpdatedSequence(newBsh, m_CitSubMessage, changedSeqdesc, seh_for_desc);
2057  if (changedORadded_citsub &&
2058  changedSeqdesc &&
2060 
2061  eh.RemoveSeqdesc(*changedSeqdesc);
2062  eh.AddSeqdesc(*changedORadded_citsub);
2063  }
2064  else {
2065  changedORadded_citsub.Reset(CCitSubUpdater::s_GetCitSubForUpdatedSequence(m_OldBsh, m_CitSubMessage, changedSeqdesc, seh_for_desc).GetPointer());
2066  if (changedORadded_citsub) {
2067  eh.AddSeqdesc(*changedORadded_citsub);
2068  }
2069  }
2070 }
2071 
2073 {
2074  CRef<CCmdComposite> cmd(new CCmdComposite("Swap old sequence with new sequence"));
2076  CIRef<IEditCommand> exch_cmd(new CCmdChangeSeqEntry(oldSeh, m_NewEntry));
2077  return exch_cmd;
2078 }
2079 
2081 {
2083 
2084  if (m_NewEntry->IsSet()) {
2085  FOR_EACH_SEQENTRY_ON_SEQSET(e_it, m_NewEntry->GetSet()) {
2086  if ((*e_it)->IsSeq()) {
2087  for (auto&& it : (*e_it)->GetSeq().GetId()) {
2088  LOG_POST(Info << MSerial_AsnText << *it);
2089  }
2090  }
2091  }
2092  }
2093  else if (m_NewEntry->IsSeq()) {
2094  for (auto&& it : m_NewEntry->GetSeq().GetId()) {
2095  LOG_POST(Info << MSerial_AsnText << *it);
2096  }
2097  }
2098 
2099 }
2100 
2102 {
2103  if (!m_CitSubMessage.empty()) {
2105  }
2106 }
2107 
2109 {
2110  if (!m_Reversed) {
2111  return kEmptyStr;
2112  }
2113 
2114  CRef<CSeq_id> bestId = FindBestChoice(m_OldBsh.GetCompleteBioseq()->GetId(), CSeq_id::BestRank);
2115  if (bestId) {
2116  string best_seqid;
2117  bestId->GetLabel(&best_seqid);
2118  return "Reverse complemented " + best_seqid;
2119  }
2120  return kEmptyStr;
2121 }
2122 
2124 {
2125  if (m_NotImportedFeats.empty()) {
2126  return;
2127  }
2128 
2130  string updseq_name;
2131  best_idh.GetSeqId()->GetLabel(&updseq_name, CSeq_id::eContent);
2132 
2134  _ASSERT(updseqRow > -1);
2135  CRef<CSeq_loc> align_loc = m_Align->CreateRowSeq_loc(updseqRow);
2136  string align_label;
2137  align_loc->GetLabel(&align_label);
2138 
2139  out << "Features from " << updseq_name;
2140  out << " were not imported as they were outside the " << align_label << " alignment range:\n";
2141  for (auto&& it : m_NotImportedFeats) {
2142  out << it;
2143  out << "\n";
2144  }
2145 }
2146 
2147 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
ENa_strand Reverse(ENa_strand s)
Definition: Na_strand.hpp:90
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
bool SameOrientation(ENa_strand a, ENa_strand b)
Definition: Na_strand.hpp:83
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CRef< objects::CSeq_id > GetNewProtId(objects::CBioseq_Handle bsh, int &offset, string &id_label, bool general_only)
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_EditHandle –.
CBioseq_Handle –.
CBioseq_set_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
bool IsNa(void) const
Definition: Bioseq.cpp:345
CCdregion –.
Definition: Cdregion.hpp:66
static CRef< objects::CSeqdesc > s_GetCitSubForUpdatedSequence(const objects::CBioseq_Handle &bsh, string &message, CConstRef< objects::CSeqdesc > &changedSeqdesc, objects::CSeq_entry_Handle &seh)
Generates/Creates a Cit-sub publication that will be added to the sequence.
static const char * sm_ChangeExistingCitSub
void AddCommand(IEditCommand &command)
CFeat_CI –.
Definition: feat_ci.hpp:64
CIUPACaa –.
Definition: IUPACaa.hpp:66
CIUPACna –.
Definition: IUPACna.hpp:66
void AddInterval(const CSeq_interval &ival)
for convenience
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
static CTempString SubtypeValueToName(ESubtype eSubtype)
Turns a ESubtype into its string value which is NOT necessarily related to the identifier of the enum...
Sequence update exception class.
CSeqVector –.
Definition: seq_vector.hpp:65
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
TDim CheckNumRows(void) const
Validatiors.
Definition: Seq_align.cpp:73
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
CSeq_annot_CI –.
CSeq_annot_Handle –.
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
CSeq_entry_Handle –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
void SetDescr(CSeq_descr &value)
Definition: Seq_entry.cpp:134
CSeq_feat_EditHandle –.
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStart(ESeqLocExtremes ext) const
bool IsPartialStart(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
void SetPartialStart(bool val, ESeqLocExtremes ext)
void SetPartialStop(bool val, ESeqLocExtremes ext)
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:593
CSeq_loc_Mapper_Base –.
void AddSeqLoc(const CSeq_loc &other)
static TSeqPos Pack(CSeq_data *in_seq, TSeqPos uLength=ncbi::numeric_limits< TSeqPos >::max())
static void s_Shift_tRNAAntiCodon(CRef< objects::CSeq_feat > feat, const TSeqPos &offset, const objects::CSeq_id *targetId=NULL)
void Patch(bool create_general_only)
Definition: update_seq.cpp:711
void x_RetranslateOldCDSProduct(CRef< objects::CSeq_feat > cds, objects::CSeq_entry_EditHandle &eh, objects::CBioseq_Handle &newBsh, bool create_general_only)
static void s_Shift_CDSCodeBreaks(CRef< objects::CSeq_feat > feat, const TSeqPos &offset, const objects::CSeq_id *targetId=NULL)
static CRef< objects::CSeq_feat > s_OffsetFeature(const objects::CSeq_feat &feat, const TSeqPos offset, const objects::CSeq_id *newId=NULL)
Offsets the location of the feature.
bool IsAlignmentOK() const
Definition: update_seq.cpp:543
SUpdateSeqParams m_Params
Definition: update_seq.hpp:201
void ExtendOneEndOfSequence(bool create_general_only)
Definition: update_seq.cpp:722
objects::CBioseq_Handle m_OldBsh
Definition: update_seq.hpp:198
CIRef< IEditCommand > x_SwapOldWithNewSeq()
Creates command that swaps the old sequence with the new one.
static CRef< objects::CSeq_inst > s_ExtendOneEndOfSequence(const objects::CBioseq_Handle &bsh, const string &extension, SUpdateSeqParams::ESequenceUpdateType update_type)
Extends the 5' or 3' end of a NA sequence (no alignment is necessary).
Definition: update_seq.cpp:953
CConstRef< objects::CSeq_id > m_NewId
Temporary seq-id of the new seq-entry.
Definition: update_seq.hpp:206
void x_FixIDInNewEntry(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_feat > x_MappedFeature_ChangeId(const objects::CSeq_feat_Handle &orig_fh)
Changes only the location ID of the feature.
void x_AdjustProteinFeature(CRef< objects::CSeq_entry > protein, CRef< objects::CSeq_id > newId=CRef< objects::CSeq_id >())
string m_CitSubMessage
Definition: update_seq.hpp:216
void x_ImportCDSProduct(CRef< objects::CSeq_feat > cds, objects::CSeq_entry_EditHandle &eh, objects::CBioseq_Handle &newBsh, bool create_general_only)
static CRef< objects::CSeq_loc > s_OffsetLocation(const objects::CSeq_loc &sourceLoc, const TSeqPos &offset, const objects::CSeq_id *targetId=NULL)
CConstRef< objects::CSeq_align > m_Align
Definition: update_seq.hpp:200
vector< string > m_NotImportedFeats
List of features that were not imported as they were outside the alignment range.
Definition: update_seq.hpp:219
void ShowCitSubMessage(void) const
void x_PrintNewEntryIds()
vector< CConstRef< objects::CSeq_align > > x_FormNewAlignment(const objects::CBioseq_Handle &subject)
Generates alignment between subject_bsh and the new sequence (query)
bool CheckParameters() const
Definition: update_seq.cpp:548
CRef< objects::CSeq_inst > x_UpdateSeqInst(const string &upd_str)
Definition: update_seq.cpp:983
CRef< objects::CSeq_inst > x_ReplaceSequence()
Definition: update_seq.cpp:775
bool IsUpdateSequenceRaw() const
Definition: update_seq.cpp:538
bool x_ShouldRemoveFeature(const objects::CSeq_feat_Handle &fh)
CRef< objects::CSeq_annot > x_GetSeqAnnot_WithoutCDS(bool &has_cds)
TFeatList m_ImportUpdFeats
Definition: update_seq.hpp:213
CRef< objects::CSeq_entry > m_NewEntry
The new seq-entry.
Definition: update_seq.hpp:204
void x_RemoveDescriptorsFromImportedProducts(CRef< objects::CSeq_entry > protein)
Removes RefGeneTracking user-object and create-date descriptors from imported cds products.
void x_MakeNewEntry(const objects::CSeq_inst &newInst)
Creates the new seq-entry. It is not added to the scope, yet.
void x_RetranslateImportedCDSProduct(CRef< objects::CSeq_feat > cds, objects::CSeq_entry_EditHandle &eh, objects::CBioseq_Handle &newBsh, bool create_general_only)
void x_MapOldAndImportedFeatsThroughNewAlign()
void x_AddCitSubToUpdatedSequence()
Attaches a cit-sub to the New sequence.
static string s_GetValidExtension(const string &extension)
Definition: update_seq.cpp:934
void SetUpdateSeqParams(const SUpdateSeqParams &params)
Definition: update_seq.cpp:524
bool IsOldSequenceOK() const
Definition: update_seq.cpp:529
const objects::CBioseq_Handle & m_UpdBsh
Definition: update_seq.hpp:199
void x_UpdateProteinID(CRef< objects::CSeq_entry > protein, CRef< objects::CSeq_id > newId)
void x_ChangeIDInFeature(objects::CSeq_feat_Handle fh, const objects::CSeq_align &align, objects::CSeq_align::TDim row)
Replaces the location ID in the feature to match the alignment ID corresponding to the given row.
TSeqPos x_GetNewSeqLength()
Definition: update_seq.cpp:623
void x_FixID_AttachFeatures(bool create_general_only)
Definition: update_seq.cpp:767
CRef< objects::CSeq_inst > x_ExtendOneEnd()
Definition: update_seq.cpp:879
objects::CSeq_entry_Handle x_GetOldBseq_EntryHandle(void)
string GetRevCompReport() const
CRef< objects::CSeq_feat > x_FuseFeatures(const objects::CSeq_feat &feat1, const objects::CSeq_feat &feat2)
Merges duplicate features.
void x_RemoveNewIDInFeature(CRef< objects::CSeq_feat > feat)
CSequenceUpdater(const CUpdateSeq_Input &updseq_in, const SUpdateSeqParams &params)
Definition: update_seq.cpp:488
void x_HandleDuplicateFeatures()
Decides which duplicate features will end up on the new sequence.
static CRef< objects::CSeq_inst > s_UpdateSeqInst(const objects::CBioseq_Handle &bsh, const string &upd_str)
Definition: update_seq.cpp:988
void NoChange(bool create_general_only)
Definition: update_seq.cpp:640
void Replace(bool create_general_only)
Definition: update_seq.cpp:669
list< CRef< objects::CSeq_feat > > TFeatList
Definition: update_seq.hpp:208
bool x_ShouldImportFeature(const objects::CSeq_feat_Handle &fh)
Determines whether the feature from update sequence should be imported to the new one.
void x_FuseProtFeatsForCDS(const objects::CSeq_feat &fusedFeat, const objects::CSeq_feat &feat_old)
Merges protein sequence features for duplicate coding regions.
void GetNotImportedFeatsReport(CNcbiOstream &out) const
TFeatList m_MappedOldFeats
List of features, originally belonging to the Old Sequence and update sequence, respectively.
Definition: update_seq.hpp:212
CRef< CCmdComposite > Update(bool create_general_only)
Main function responsible to update the old sequence with the update sequence.
Definition: update_seq.cpp:564
void x_ReplaceIdInOldFeatures()
void x_AdjustOldCDSProduct(CRef< objects::CSeq_feat > cds, objects::CSeq_entry_EditHandle &eh)
CRef< objects::CSeq_feat > x_MappedFeature_ThroughAlign(const objects::CSeq_feat_Handle &orig_fh, const objects::CSeq_align &align)
Maps feature's location through the alignment.
void x_PrepareNewEntry(CRef< objects::CSeq_entry > entry, const objects::CSeq_inst &newInst)
void x_SetOldIDInFeatures()
CConstRef< objects::CSeq_align > x_GetNewAlign_Replace(const objects::CBioseq_Handle &bsh)
void x_AttachFeaturesToNewSeq(bool create_general_only)
Removes newId in features and stores them in two seq-annots: one that will be attached to the sequenc...
static objects::CSeq_align::TDim FindRowInAlignment(const objects::CBioseq_Handle &bsh, const objects::CSeq_align &align)
Definition: update_seq.cpp:611
CRef< objects::CSeq_inst > x_PatchSequence()
Definition: update_seq.cpp:818
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Sets up the old and the update sequences, and generates the alignment between them.
bool IsReadyForUpdate(void) const
static bool s_IsDeltaWithFarPointers(const objects::CBioseq &bseq)
static bool s_IsDeltaWithNoGaps(const objects::CBioseq &bseq)
Stores parameters regarding the type of sequence update, on how to handle existing features and on ho...
EFeatUpdateType m_FeatImportOption
ESequenceUpdateType m_SeqUpdateOption
TProtUpdFlags m_UpdateProteins
objects::CSeqFeatData::ESubtype m_FeatImportType
Defines the imported feature subtype.
bool m_AddCitSub
flag to attach a citation with the current date (false)
EFeatRemoveType m_FeatRemoveOption
bool m_ImportFeatures
Flag to indicate that features from the update sequence will be imported.
bool m_KeepProteinId
flag to update protein IDs, default is to update (false) - relevant only to imported protein IDs
@ eSeqUpdateReplace
do not change the old sequence
@ eSeqUpdateExtend3
extend the 5' end of old sequence with the update
@ eSeqUpdatePatch
replace old sequence with update sequence
@ eSeqUpdateExtend5
patch old sequence with update in the aligned region
static vector< CConstRef< objects::CSeq_align > > RunBlast2Seq(const objects::CBioseq_Handle &subject, const objects::CBioseq_Handle &query, bool accept_atleast_one, ICanceled *canceled=nullptr)
Definition: sequpd.cpp:351
void SetMolinfoForProtein(CRef< objects::CSeq_entry > protein, bool partial5, bool partial3)
CRef< objects::CSeq_entry > CreateTranslatedProteinSequence(CRef< objects::CSeq_feat > cds, objects::CBioseq_Handle nuc_h, bool create_general_only, int *offset=nullptr)
std::ofstream out("events_result.xml")
main entry point for tests
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
#define false
Definition: bool.h:36
int offset
Definition: replacements.h:160
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
EDialogReturnValue NcbiInfoBox(const string &message, const string &title="Info")
specialized Message Box function for reporting general information messages
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
CConstRef< CSeq_id > GetSeqId(void) const
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:774
CSeq_id::E_Choice Which(void) const
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
CRef< CSeq_loc > MakeSeq_loc(EMakeType make_type=eMake_CompactType) const
return constructed CSeq_loc with all changes
Definition: Seq_loc.cpp:2946
void SetFrom(TSeqPos from)
Set the range from position.
Definition: Seq_loc.cpp:2818
void SetTo(TSeqPos to)
Set the range to position.
Definition: Seq_loc.cpp:2829
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
Definition: Seq_loc.cpp:3474
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
void SetSeq_id(const CSeq_id &id)
Set seq_id of the current location.
Definition: Seq_loc.hpp:713
int Compare(const CSeq_loc &loc) const
Definition: Seq_loc.cpp:590
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
Definition: Seq_loc.cpp:858
TRange GetRange(void) const
Get the range.
Definition: Seq_loc.hpp:1042
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
Definition: Seq_loc.cpp:3467
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
ECompare
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eAbutting
Abutting seq-locs.
@ eNoOverlap
CSeq_locs do not overlap or abut.
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
void RemoveFromHistory(const CTSE_Handle &tse, EActionIfLocked action=eKeepIfLocked)
Remove single TSE from the scope's history.
Definition: scope.cpp:362
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
virtual CSeq_id_Handle GetLocationId(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
TClass GetClass(void) const
CRef< CSeqdesc > RemoveSeqdesc(const CSeqdesc &v) const
void SetDescr(TDescr &v) const
CBioseq_set_EditHandle GetParentBioseq_set(void) const
Get parent bioseq-set edit handle.
void Remove(void) const
Remove current annot.
virtual CConstRef< CSeq_feat > GetSeq_feat(void) const
TSeqPos GetBioseqLength(void) const
bool IsAa(void) const
void Remove(void) const
Remove this Seq-entry from parent, or scope if it's top level Seq-entry.
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
TSet ConvertSeqToSet(TClass set_class=CBioseq_set::eClass_not_set) const
Convert the entry from Bioseq to Bioseq-set.
CSeq_feat_EditHandle AddFeat(const CSeq_feat &new_obj) const
virtual const CSeq_loc & GetLocation(void) const
CBioseq_Handle GetBioseqHandle(const CSeq_id &id) const
Get Bioseq handle from the TSE of this Seq-entry.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.
bool IsSetClass(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CScope & GetScope(void) const
Get scope this handle belongs to.
TInst_Repr GetInst_Repr(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
CSeq_entry_EditHandle AttachEntry(CSeq_entry &entry, int index=-1) const
Attach an existing seq-entry.
bool IsSet(void) const
bool IsNa(void) const
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
bool AddSeqdesc(CSeqdesc &v) const
bool IsSynonym(const CSeq_id &id) const
Check if this id can be used to obtain this bioseq handle.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
const TInst & GetInst(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & SetLimitTSE(const CTSE_Handle &limit)
Limit annotations to those from the TSE only.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
void SetCoding(TCoding coding)
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
Definition: ncbiobj.hpp:2024
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
TObjectType & GetObject(void) const
Get object.
Definition: ncbiobj.hpp:1697
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define kEmptyStr
Definition: ncbistr.hpp:123
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
Definition: ncbistr.hpp:5325
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
static const char label[]
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
bool IsLim(void) const
Check if variant Lim is selected.
Definition: Int_fuzz_.hpp:636
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:642
bool IsSetClass(void) const
endeavor which designed this object Check if a value has been assigned to Class data member.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TType & GetType(void) const
Get the Type member data.
@ eLim_gt
greater than
Definition: Int_fuzz_.hpp:211
@ eLim_lt
less than
Definition: Int_fuzz_.hpp:212
TProcessed GetProcessed(void) const
Get the Processed member data.
Definition: Prot_ref_.hpp:538
bool IsSetProcessed(void) const
Check if a value has been assigned to Processed data member.
Definition: Prot_ref_.hpp:513
const TPub & GetPub(void) const
Get the variant data.
Definition: Pub_set_.hpp:386
list< CRef< CPub > > TPub
Definition: Pub_set_.hpp:159
bool IsPub(void) const
Check if variant Pub is selected.
Definition: Pub_set_.hpp:380
TTRNA & SetTRNA(void)
Select the variant.
Definition: RNA_ref_.cpp:140
const TAnticodon & GetAnticodon(void) const
Get the Anticodon member data.
Definition: Trna_ext_.hpp:649
bool IsTRNA(void) const
Check if variant TRNA is selected.
Definition: RNA_ref_.hpp:498
bool IsSetAnticodon(void) const
location of anticodon Check if a value has been assigned to Anticodon data member.
Definition: Trna_ext_.hpp:637
void SetAnticodon(TAnticodon &value)
Assign a value to Anticodon data member.
Definition: Trna_ext_.cpp:158
void SetExt(TExt &value)
Assign a value to Ext data member.
Definition: RNA_ref_.cpp:211
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
Definition: RNA_ref_.hpp:604
const TName & GetName(void) const
Get the variant data.
Definition: RNA_ref_.hpp:484
const TExt & GetExt(void) const
Get the Ext member data.
Definition: RNA_ref_.hpp:616
const TTRNA & GetTRNA(void) const
Get the variant data.
Definition: RNA_ref_.cpp:134
bool IsName(void) const
Check if variant Name is selected.
Definition: RNA_ref_.hpp:478
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
void ResetPartial(void)
Reset Partial data member.
Definition: Seq_feat_.hpp:955
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
E_Choice Which(void) const
Which variant is currently selected.
bool IsProt(void) const
Check if variant Prot is selected.
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
const TRegion & GetRegion(void) const
Get the variant data.
const TCit & GetCit(void) const
Get the Cit member data.
Definition: Seq_feat_.hpp:1240
void SetCit(TCit &value)
Assign a value to Cit data member.
Definition: Seq_feat_.cpp:170
void SetPartial(TPartial value)
Assign a value to Partial data member.
Definition: Seq_feat_.hpp:971
void SetProduct(TProduct &value)
Assign a value to Product data member.
Definition: Seq_feat_.cpp:110
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
list< CRef< CCode_break > > TCode_break
Definition: Cdregion_.hpp:111
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
Definition: Seq_feat_.hpp:1405
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
Definition: Seq_feat_.hpp:1393
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
TCode_break & SetCode_break(void)
Assign a value to Code_break data member.
Definition: Cdregion_.hpp:739
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
bool IsSetCit(void) const
citations for this feature Check if a value has been assigned to Cit data member.
Definition: Seq_feat_.hpp:1228
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
void ResetLocation(void)
Reset Location data member.
Definition: Seq_feat_.cpp:122
const TRna & GetRna(void) const
Get the variant data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
Definition: Cdregion_.hpp:721
@ e_Region
named region (globin locus)
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const Tdata & Get(void) const
Get the member data.
const TId & GetId(void) const
Get the Id member data.
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
void SetId(TId &value)
Assign a value to Id data member.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
Definition: Seq_point_.hpp:408
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
Definition: Seq_point_.hpp:420
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_point_.hpp:390
void SetFrom(TFrom value)
Assign a value to From data member.
TStrand GetStrand(void) const
Get the Strand member data.
Definition: Seq_point_.hpp:350
const Tdata & Get(void) const
Get the member data.
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
Definition: Seq_point_.hpp:331
TStrand GetStrand(void) const
Get the Strand member data.
TTo GetTo(void) const
Get the To member data.
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
void SetStrand(TStrand value)
Assign a value to Strand data member.
const TMix & GetMix(void) const
Get the variant data.
Definition: Seq_loc_.cpp:282
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ e_Local
local use
Definition: Seq_id_.hpp:95
@ e_Int
from to
Definition: Seq_loc_.hpp:101
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
void ResetAnnot(void)
Reset Annot data member.
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
void ResetId(void)
Reset Id data member.
Definition: Bioseq_.cpp:54
void ResetAnnot(void)
Reset Annot data member.
Definition: Bioseq_.cpp:91
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void SetRepr(TRepr value)
Assign a value to Repr data member.
Definition: Seq_inst_.hpp:574
void SetLength(TLength value)
Assign a value to Length data member.
Definition: Seq_inst_.hpp:668
void ResetInst(void)
Reset Inst data member.
Definition: Bioseq_.cpp:77
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
Definition: Seq_inst_.cpp:130
void ResetExt(void)
Reset Ext data member.
Definition: Seq_inst_.cpp:142
@ eRepr_delta
sequence made by changes (delta) to others
Definition: Seq_inst_.hpp:100
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ e_Iupacna
IUPAC 1 letter nuc acid code.
Definition: Seq_data_.hpp:104
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Create_date
date entry first created/released
Definition: Seqdesc_.hpp:128
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const CharType(& source)[N]
Definition: pointer.h:1149
Useful/utility classes and methods.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
The Object manager core.
Utility macros and typedefs for exploring NCBI objects from pub.asn.
Utility macros and typedefs for exploring NCBI objects from seq.asn.
#define EDIT_EACH_FEATURE_ON_ANNOT
Definition: seq_macros.hpp:434
#define EDIT_EACH_SEQDESC_ON_SEQDESCR(Itr, Var)
Definition: seq_macros.hpp:660
#define EDIT_EACH_SEQANNOT_ON_BIOSEQ(Itr, Var)
Definition: seq_macros.hpp:266
#define ERASE_SEQDESC_ON_SEQDESCR(Itr, Var)
ERASE_SEQDESC_ON_SEQDESCR.
Definition: seq_macros.hpp:670
Utility macros and typedefs for exploring NCBI objects from seqset.asn.
#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.
#define row(bind, expected)
Definition: string_bind.c:73
@ eExistingText_append_semi
bool AddValueToString(string &str, const string &value, EExistingText existing_text)
Add text to an existing string, using the "existing_text" directive to combine new text with existing...
SAnnotSelector –.
static string subject
#define _ASSERT
USING_SCOPE(objects)
static const char * kNewLocalID
Definition: update_seq.cpp:486
#define FUSE_STRINGFIELDS(Var1, Var2, Field)
Definition: update_seq.cpp:381
#define FUSE_LISTFIELDS(Var1, Var2, Type, Field)
Definition: update_seq.cpp:401
#define FUSE_VECTORFIELDS(Var1, Var2, Type, Field)
Definition: update_seq.cpp:407
#define FUSE_BOOLEANFIELDS(Var1, Var2, Field)
Definition: update_seq.cpp:390
Modified on Wed Apr 17 13:10:57 2024 by modify_doxy.py rev. 669887