NCBI C++ ToolKit
seq_entry_edit.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seq_entry_edit.cpp 101189 2023-11-14 16:24:35Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Mati Shomrat, Jie Chen, NCBI
27 *
28 * File Description:
29 * High level Seq-entry edit, for meaningful combination of Seq-entries.
30 */
31 #include <ncbi_pch.hpp>
32 #include <corelib/ncbistd.hpp>
35 #include <objects/pub/Pub.hpp>
37 #include <objects/seq/Bioseq.hpp>
38 #include <objects/seq/Pubdesc.hpp>
40 #include <objects/seq/Seq_ext.hpp>
41 #include <objects/seq/Seg_ext.hpp>
45 #include <objmgr/align_ci.hpp>
46 #include <objmgr/bioseq_handle.hpp>
48 #include <objmgr/feat_ci.hpp>
49 #include <objmgr/scope.hpp>
50 #include <objmgr/seq_entry_ci.hpp>
51 #include <objmgr/seq_annot_ci.hpp>
52 #include <objmgr/seq_descr_ci.hpp>
53 #include <objmgr/seqdesc_ci.hpp>
56 #include <objmgr/graph_ci.hpp>
57 #include <objmgr/seq_vector.hpp>
58 #include <objmgr/util/sequence.hpp>
63 
65 
66 #include <set>
67 #include <sstream>
68 #include <map>
69 
73 
74 namespace {
75  // This works just like a map except that when you iterate over it,
76  // it goes by order of initial insertion.
77  // We use private inheritance because map's destructor is non-virtual
78  // Maybe this can be put in a shared place and others can use it,
79  // but it would need more cleanup first.
80  template<typename Key, typename Data, typename Compare = less<Key>, typename Alloc = allocator<pair<const Key,Data> > >
81  class CMapWithOriginalOrderingIteration : private map<Key, Data, Compare, Alloc>
82  {
83  public:
87  // "begin" deliberately omitted
92  // add more "using" statements if you find more things from map<> that
93  // you want to use. Make sure to keep consistency, though.
94  // For example, don't just use "using" on "erase". Implement
95  // erase to be a wrapper over map's erase and make sure to
96  // update m_keysInOriginalOrder, etc.
97 
98  // we override insert so we can keep track of ordering
99  // of keys
100  pair<iterator, bool> insert(const value_type& x)
101  {
102  pair<iterator, bool> result = map<Key, Data, Compare, Alloc>::insert(x);
103  if( result.second ) {
104  m_keysInOriginalOrder.push_back( x.first );
105  }
106  return result;
107  }
108 
109  Data &
110  operator[](const Key & k)
111  {
112  iterator find_iter = find(k);
113  if( find_iter != end() ) {
114  // already in map, just return it
115  return find_iter->second;
116  }
117 
118  // not in map, so add
119  pair<iterator, bool> result = insert(
120  value_type(k, Data()) );
121  _ASSERT( result.second );
122  return result.first->second;
123  }
124 
125  typedef vector<Key> TKeyVec;
126  const TKeyVec & GetKeysInOriginalOrder(void) const {
127  _ASSERT( m_keysInOriginalOrder.size() == size() );
128  return m_keysInOriginalOrder;
129  }
130 
131  private:
132  TKeyVec m_keysInOriginalOrder;
133  };
134 
135  constexpr auto ConstructSortMap()
136  {
137  auto init_seqdesc_sortmap = ct::make_array({
163  });
164  std::array<unsigned char, CSeqdesc::E_ChoiceStopper::e_MaxChoice> _sorted{};
165  static_assert(init_seqdesc_sortmap.size() <= _sorted.size());
166  for (auto& it: _sorted)
167  it = kMax_Char;
168 
169  unsigned char index=0;
170  for (auto rec: init_seqdesc_sortmap)
171  {
172  _sorted[rec] = index;
173  ++index;
174  }
175  return _sorted;
176  };
177 
178  static constexpr
179  auto seqdesc_sortmap = ConstructSortMap();
180 
181  struct CompareSeqdesc
182  {
183  static char mapit(CSeqdesc::E_Choice c)
184  {
185  if (c<0 || c>=seqdesc_sortmap.size())
186  return kMax_Char;
187  return seqdesc_sortmap[c];
188  }
189 
190  bool operator()(const CRef<CSeqdesc>& l, const CRef<CSeqdesc>& r) const
191  {
192  return mapit(l->Which()) < mapit(r->Which());
193  }
194  };
195 }
196 
197 CConstRef <CDelta_seq> GetDeltaSeqForPosition(const unsigned pos, const CBioseq_Handle seq_hl, CScope* scope, unsigned& left_endpoint)
198 {
199  if (!seq_hl || !seq_hl.IsNa() || !seq_hl.IsSetInst_Repr()
201  || !seq_hl.GetInst().CanGetExt()
202  || !seq_hl.GetInst().GetExt().IsDelta()) {
203  return CConstRef <CDelta_seq>();
204  }
205 
206  unsigned offset = 0;
207 
208  int len = 0;
209  ITERATE (list <CRef <CDelta_seq> >, it, seq_hl.GetInst_Ext().GetDelta().Get()) {
210  if ((*it)->IsLiteral()) {
211  len = (*it)->GetLiteral().GetLength();
212  } else if ((*it)->IsLoc()) {
213  len = sequence::GetLength((*it)->GetLoc(), scope);
214  }
215  if (pos >= offset && pos < offset + len) {
216  left_endpoint = offset;
217  return (*it);
218  } else {
219  offset += len;
220  }
221  }
222  return CConstRef <CDelta_seq>();
223 };
224 
226 {
227  if (delta->IsLoc()) return false;
228  if (delta->GetLiteral().CanGetSeq_data() && delta->GetLiteral().GetSeq_data().IsGap()){
229  return true;
230  }
231  else return false;
232 };
233 
234 bool Does5primerAbutGap(const CSeq_feat& feat, CBioseq_Handle seq_hl)
235 {
236  if (!seq_hl) return false;
237 
238  unsigned start = feat.GetLocation().GetTotalRange().GetFrom(); // Positional
239  if (!start) return false;
240 
242  unsigned i=0;
243  for (CSeqVector_CI it = seq_vec.begin(); it; ++ it, i++) {
244  if (i < (start - 1) ) continue;
245  if (it.IsInGap()) return true;
246  }
247  return false;
248 };
249 
250 bool Does3primerAbutGap(const CSeq_feat& feat, CBioseq_Handle seq_hl)
251 {
252  if (!seq_hl) return false;
253 
254  unsigned stop = feat.GetLocation().GetTotalRange().GetTo(); // positional
255 
257  if (stop >= seq_vec.size() - 1) return false;
258  unsigned i=0;
259  for (CSeqVector_CI it = seq_vec.begin(); it; ++ it, i++) {
260  if (i < (stop + 1) ) continue;
261  if (it.IsInGap()) return true;
262  }
263  return false;
264 };
265 
267 (CBioseq_set_EditHandle& parts,
268  CBioseq_EditHandle& seq)
269 {
270  _ASSERT(parts && seq);
272 
273  CSeq_inst::TMol seq_mol = seq.GetInst_Mol();
274  // Test that the new part has the same molecular type as other parts
275  for ( CSeq_entry_CI it(parts); it; ++it ) {
276  if ( it->IsSeq() && it->GetSeq().GetInst_Mol() != seq_mol ) {
277  NCBI_THROW(CEditException, eInvalid,
278  "Unable to add part due to conflicting molecular types");
279  }
280  }
281 
282  parts.TakeBioseq(seq);
283 }
284 
285 
287 (const CBioseq_EditHandle& seg,
288  const CBioseq_EditHandle& part)
289 {
290  _ASSERT(seg && part);
292 
293  // add a new reference to part in the segmented bioseq
294  // NB: temporary implementation, should be done through CSeqMap when
295  // this feature become available.
296  CRef<CSeq_id> id(new CSeq_id);
297  id->Assign(*part.GetSeqId());
298  CRef<CSeq_loc> loc(new CSeq_loc);
299  loc->SetWhole(*id);
300  // create a new CSeq_ext object
301  CRef<CSeq_ext> ext(new CSeq_ext);
302  // copy content of exisiting CSeq_ext object into new one.
303  CSeg_ext::Tdata& segs = ext->SetSeg().Set();
304  if ( seg.CanGetInst_Ext() ) {
305  copy(seg.GetInst_Ext().GetSeg().Get().begin(),
306  seg.GetInst_Ext().GetSeg().Get().end(),
307  back_inserter(segs));
308  }
309  // add reference to the new part
310  segs.push_back(loc);
311  // set the new one as the ext object
312  seg.SetInst_Ext(*ext);
313 }
314 
315 
317 (CBioseq_set_EditHandle& segset,
318  CBioseq_EditHandle& part)
319 {
321  CBioseq_EditHandle master;
322  for ( CSeq_entry_I it(segset); it; ++it ) {
323  if ( it->IsSet() &&
324  it->GetSet().GetClass() == CBioseq_set::eClass_parts ) {
325  parts = it->SetSet();
326  } else if ( it->IsSeq() &&
327  it->GetSeq().GetInst_Repr() == CSeq_inst::eRepr_seg ) {
328  master = it->SetSeq();
329  }
330  }
331 
332  if ( !parts || !master ) {
333  NCBI_THROW(CEditException, eInvalid, "Missing a component from segset");
334  }
335 
336  // add part to parts set
337  s_AddBioseqToPartsSet(parts, part);
338  s_AddPartToSegmentedBioseq(master, part);
339 }
340 
341 
342 // -- AddSeqEntryToSeqEntry
343 
345 (const CSeq_entry_Handle& target,
346  const CSeq_entry_Handle& insert)
347 {
348  if ( !target || !insert ) {
349  return;
350  }
351 
352  if ( target.IsSeq() && insert.IsSeq() ) {
353  AddBioseqToBioseq(target.GetSeq(), insert.GetSeq());
354  } else if ( target.IsSet() && insert.IsSeq() ) {
355  AddBioseqToBioseqSet(target.GetSet(), insert.GetSeq());
356  }
357 }
358 
359 
360 // -- AddBioseqToBioseq
361 
362 // Create a nuc-prot set containing the two bioseqs
363 static void s_AddProtToNuc(const CBioseq_EditHandle& nuc, const CBioseq_EditHandle& prot)
364 {
365  _ASSERT(CSeq_inst::IsNa(nuc.GetInst_Mol()));
366  _ASSERT(CSeq_inst::IsAa(prot.GetInst_Mol()));
367 
368  CSeq_entry_EditHandle nuc_entry = nuc.GetParentEntry();
369  CSeq_entry_EditHandle::TSet nuc_prot =
371  prot.MoveTo(nuc_prot);
372 }
373 
374 
376 {
377  static size_t count = 0;
378 
379  return new CSeq_id("lcl|segset_" + NStr::NumericToString(++count));
380 }
381 
382 
383 // The two bioseqs are parts of a segset
385 {
386  _ASSERT(to.GetInst_Mol() == add.GetInst_Mol());
387 
390  _ASSERT(segset.IsSet());
391 
394  parts.TakeBioseq(add);
395 
396  // the segmented bioseq
397  // NB: Code for creating the segmented bioseq should change
398  // when the functionality is provided by object manager
399  CRef<CBioseq> seq(new CBioseq);
400  _ASSERT(seq);
402  seq->SetId().push_back(id);
403  CBioseq_EditHandle master = segset.AttachBioseq(*seq, 0);
404 
406  master.SetInst_Mol(to.GetInst_Mol());
407  master.SetInst_Length(to.GetInst_Length() + add.GetInst_Length());
408 
409  s_AddPartToSegmentedBioseq(master, to);
410  s_AddPartToSegmentedBioseq(master, add);
411 }
412 
413 
415 (const CBioseq_Handle& to,
416  const CBioseq_Handle& add)
417 {
418  if ( !to || !add ) {
419  return;
420  }
421 
423  CBioseq_Handle::TInst_Mol add_mol = add.GetInst_Mol();
424 
425  // adding a protein to a nucletide
426  if ( CSeq_inst::IsNa(to_mol) && CSeq_inst::IsAa(add_mol) ) {
428  } else if ( to_mol == add_mol ) {
429  // these are two parts of a segset
431  }
432 }
433 
434 
435 // -- AddBioseqToBioseqSet
436 
437 // A nuc-prot associates one or more proteins with a single
438 // nucleotide.
439 
441 (CBioseq_set_EditHandle& nuc_prot,
442  CBioseq_EditHandle& seq)
443 {
444  _ASSERT(nuc_prot && seq);
446 
447 
448  if ( CSeq_inst::IsAa(seq.GetInst_Mol()) ) {
449  // if the new bioseq is a protein simply add it
450  seq.MoveTo(nuc_prot);
451  } else {
452  CSeq_entry_CI it(nuc_prot);
453  while ( it ) {
454  if ( it->IsSeq() &&
455  CSeq_inst::IsNa(it->GetSeq().GetInst_Mol()) ) {
456  break;
457  }
458  ++it;
459  }
460  if ( it ) {
461  AddBioseqToBioseq(it->GetSeq(), seq);
462  } else {
463  seq.MoveTo(nuc_prot, 0); // add the nucleotide as the first entry
464  }
465  }
466 }
467 
468 
470 {
471  if ( !set || !seq ) {
472  return;
473  }
474 
475  CBioseq_EditHandle seq_edit = seq.GetEditHandle();
476  CBioseq_set_EditHandle set_edit = set.GetEditHandle();
477  if ( !seq_edit || !set_edit ) {
478  return;
479  }
480 
481  switch ( set_edit.GetClass() ) {
483  s_AddBioseqToNucProtSet(set_edit, seq_edit);
484  break;
486  s_AddBioseqToSegset(set_edit, seq_edit);
487  break;
489  break;
491  s_AddBioseqToPartsSet(set_edit, seq_edit);
492  break;
494  break;
496  break;
498  break;
500  break;
502  break;
504  break;
506  break;
508  break;
510  break;
511 
518  default:
519  // just move the bioseq to the set
520  seq_edit.MoveTo(set_edit);
521  break;
522  }
523 }
524 
525 
526 bool IsSeqDescInList(const CSeqdesc& desc, const CSeq_descr& set)
527 {
528  for (auto it: set.Get()) {
529  if (it->Equals(desc)) {
530  return true;
531  } else if (it->IsPub() &&
532  desc.IsPub() &&
533  it->GetPub().GetPub().SameCitation(desc.GetPub().GetPub())) {
534  return true;
535  }
536  }
537  return false;
538 }
539 
540 
541 void AddSeqdescToSeqDescr(const CSeqdesc& desc, CSeq_descr& seq_descr)
542 {
543  CRef<CSeqdesc> d(new CSeqdesc());
544  d->Assign(desc);
545  seq_descr.Set().push_back(d);
546 }
547 
548 
549 void AddSeqdescToBioseq(const CSeqdesc& desc, CBioseq& seq)
550 {
551  if (!seq.IsSetDescr() || !IsSeqDescInList(desc, seq.GetDescr())) {
552  AddSeqdescToSeqDescr(desc, seq.SetDescr());
553  }
554 }
555 
556 
558 {
559  if (!set.IsSetDescr() || !IsSeqDescInList(desc, set.GetDescr())) {
560  AddSeqdescToSeqDescr(desc, set.SetDescr());
561  }
562 }
563 
564 
566 {
567  bool rval = false;
568  if (entry.IsSeq()) {
569  AddSeqdescToBioseq(desc, entry.SetSeq());
570  rval = true;
571  } else if (entry.IsSet()) {
572  if (entry.GetSet().IsSetClass() &&
575  AddSeqdescToBioseqSet(desc, entry.SetSet());
576  rval = true;
577  } else if (entry.GetSet().IsSetSeq_set()) {
579  rval |= AddSeqdescToSeqEntryRecursively(**it, desc);
580  }
581  if (!rval) {
582  AddSeqdescToBioseqSet(desc, entry.SetSet());
583  rval = true;
584  }
585  }
586  }
587  return rval;
588 }
589 
590 
592 {
593  CRef<CSeq_entry> entry(new CSeq_entry());
594  if (!submit.IsEntrys() || submit.GetData().GetEntrys().size() == 0) {
595  return CRef<CSeq_entry>();
596  }
597 
598  // Copy Seq-entry data from Seq-submit to new Seq-entry
599  if (submit.GetData().GetEntrys().size() > 1) {
602  CRef<CSeq_entry> e(new CSeq_entry());
603  e->Assign(**it);
604  entry->SetSet().SetSeq_set().push_back(e);
605  }
606  } else {
607  entry->Assign(*(submit.GetData().GetEntrys().front()));
608  }
609 
610  // Create cit-sub pub for Seq-entry
611  if (submit.IsSetSub() && submit.GetSub().IsSetCit()) {
612  CRef<CPub> pub(new CPub());
613  pub->SetSub().Assign(submit.GetSub().GetCit());
614  CRef<CSeqdesc> pdesc(new CSeqdesc());
615  pdesc->SetPub().SetPub().Set().push_back(pub);
616  if (entry->IsSeq()) {
617  AddSeqdescToBioseq(*pdesc, entry->SetSeq());
618  } else {
619  AddSeqdescToSeqEntryRecursively(*entry, *pdesc);
620  }
621  }
622  return entry;
623 }
624 
625 
626 static bool s_IsSingletonSet(
627  const CBioseq_set_Handle & bioseq_set )
628 {
629  CSeq_entry_CI direct_child_ci(bioseq_set, CSeq_entry_CI::eNonRecursive );
630  if( ! direct_child_ci ) {
631  // not singleton: has no children
632  return false;
633  }
634  ++direct_child_ci;
635  if( direct_child_ci ) {
636  // not singleton: has more than one child
637  return false;
638  }
639 
640  // not singleton if has any alignment annots
641  CSeq_annot_CI annot_ci( bioseq_set, CSeq_annot_CI::eSearch_entry );
642  for( ; annot_ci; ++annot_ci ) {
643  if( annot_ci->IsAlign() ) {
644  return false;
645  }
646  }
647 
648  // it's a singleton: it passed all tests
649  return true;
650 }
651 
653  const CBioseq_set_Handle & bioseq_set_h )
654 {
655  typedef vector<CSeq_entry_EditHandle> TBioseqSetsToPromote;
656  TBioseqSetsToPromote bioseqSetsToPromote;
657 
658  CSeq_entry_CI direct_child_set_ci( bioseq_set_h,
660  for( ; direct_child_set_ci; ++direct_child_set_ci ) {
661 
662  CBioseq_set_EditHandle direct_child_set_eh =
663  direct_child_set_ci->GetSet().GetEditHandle();
664  if( s_IsSingletonSet(direct_child_set_eh) ) {
665 
666  // get handle to the sets one child
667  CSeq_entry_CI direct_child_direct_child_ci(
668  direct_child_set_eh, CSeq_entry_CI::eNonRecursive );
669  CSeq_entry_EditHandle direct_child_direct_child_eh =
670  direct_child_direct_child_ci->GetEditHandle();
671  ++direct_child_direct_child_ci;
672  _ASSERT( ! direct_child_direct_child_ci );
673 
674  // remove titles, and then other descriptor
675  // types will be moved to the children of this child
676  CSeqdesc_CI::TDescChoices desc_choices_to_erase;
677  desc_choices_to_erase.push_back( CSeqdesc::e_Title );
678  BioseqSetDescriptorPropagateDown( direct_child_set_eh,
679  desc_choices_to_erase );
680 
681  // push down annotation
682  direct_child_direct_child_eh.TakeAllAnnots(
683  direct_child_set_eh.GetParentEntry() );
684 
685  // remember for later because removing now will
686  // confuse the iteration
687  bioseqSetsToPromote.push_back(
688  direct_child_direct_child_eh );
689  }
690  }
691 
692  // perform promotions requested
693  ITERATE( TBioseqSetsToPromote, promoted_set_it, bioseqSetsToPromote ) {
694  _ASSERT( bioseq_set_h.GetSeq_entry_Index( *promoted_set_it ) >= 0 );
695  promoted_set_it->GetParentEntry().Remove();
696  bioseq_set_h.GetEditHandle().TakeEntry( *promoted_set_it );
697  }
698 }
699 
700 // Compares CSerialObjects by their ASN.1 output (with caching).
701 // In the future, it would be better to have something more efficient
702 // than conversion to text ASN.1 (E.g. if CSerialObject gets a
703 // comparison function similar to the Equals function it already has)
704 template <class T>
706 public:
707  bool operator()( const CConstRef<T> & lhs,
708  const CConstRef<T> & rhs ) const
709  {
710  // NULL first
711  if( lhs.IsNull() ) {
712  if( rhs.IsNull() ) {
713  return false; // equal
714  } else {
715  // lhs is first
716  return true;
717  }
718  } else if ( rhs.IsNull() ) {
719  // rhs is first
720  return false;
721  }
722 
723  _ASSERT( lhs && rhs );
724 
725  const string & lhs_asn = x_GetAsnText( lhs );
726  const string & rhs_asn = x_GetAsnText( rhs );
727 
728  return lhs_asn < rhs_asn;
729  }
730 
731 private:
734 
735  // retrieves from cache, if possible
736  const string & x_GetAsnText( const CConstRef<T> & obj ) const
737  {
738  string & asn_text = m_ObjAsnCache[obj];
739  if( asn_text.empty() ) {
740  // not in cache, so add to cache
741  stringstream asn_strm;
742  asn_strm << MSerial_AsnText << *obj;
743  asn_strm.str().swap( asn_text );
744  }
745 
746  return asn_text;
747  }
748 };
749 
750 
751 template<class T>
753  // we just have to delcare a class because
754  // we can't have template-parameterized typedefs
756 };
757 
759  const CSeq_entry_Handle & target,
760  const CScope::TBioseqHandles & bioseq_handles )
761 {
762  if( ! target || ! target.IsSet() ) {
763  // can't split something that's not a bioseq-set
764  return;
765  }
766 
767  CBioseq_set_Handle bioseq_set_h = target.GetSet();
768 
769  CBioseq_set_Handle target_parent_h = target.GetParentBioseq_set();
770 
771  CBioseq_set::EClass child_class = (
772  bioseq_set_h.IsSetClass() ?
773  bioseq_set_h.GetClass() :
775 
776  bool child_became_parent = false;
777  if( ! target_parent_h ) {
778  // this set has no parent, so make it the parent set, class GenBank,
779  // and create two new sets using the original set class as members of this set
780  target_parent_h = bioseq_set_h;
781  child_became_parent = true;
782  }
783 
784  // create the bioseq-set that will hold the new ones
785  CBioseq_set_Handle new_bioseq_set;
786  {
787  CRef<CSeq_entry> pEntry( new CSeq_entry );
788  pEntry->SetSet().SetClass( child_class );
789  new_bioseq_set = target_parent_h.GetEditHandle().AttachEntry( *pEntry, 0 ).GetSet();
790  }
791 
792  // as we go along, accumulate the Seq-descrs and annots that we would like to add to
793  // new_bioseq_set.
794  typedef vector< CConstRef<CSeqdesc> > TDescRefVec;
795  TDescRefVec vecDescsToAddToNewBioseqSet;
796 
797  typedef vector< CConstRef<CSeq_annot> > TAnnotRefVec;
798  TAnnotRefVec vecAnnotsToAddToNewBioseqSet;
799 
800  // add SeqEntries for this category here
801  // AddItemListToSet(pBioseqSet, bioseq_handles, TRUE /* for_segregate */ );
802  ITERATE( CScope::TBioseqHandles, bioseq_it, bioseq_handles ) {
803 
804  // If it's directly in a nuc-prot bioseq-set return its seq-entry
805  // because nuc-prot sets need to travel together,
806  // otherwise return the seq-entry that directly contains this bioseq
807  // This is like C toolkit's GetBestTopParentForData
808  CSeq_entry_Handle best_entry_for_bioseq = bioseq_it->GetParentEntry();
809  if( best_entry_for_bioseq &&
810  best_entry_for_bioseq.HasParentEntry() )
811  {
812  CSeq_entry_Handle parent_entry = best_entry_for_bioseq.GetParentEntry();
813  if( parent_entry && parent_entry.IsSet() &&
814  FIELD_EQUALS( parent_entry.GetSet(),
816  {
817  best_entry_for_bioseq = parent_entry;
818  }
819  }
820  if( ! best_entry_for_bioseq ) {
821  continue;
822  }
823 
824  //
825  CBioseq_set_Handle orig_parent = best_entry_for_bioseq.GetParentBioseq_set();
826 
827  if( orig_parent ) {
828 
829  // if new_bioseq_set was of class genbank set,
830  // it can get the orig_parent's class instead
831  if( orig_parent.IsSetClass() &&
832  ( ! new_bioseq_set.IsSetClass() ||
833  new_bioseq_set.GetClass() == CBioseq_set::eClass_genbank ) )
834  {
835  new_bioseq_set.GetEditHandle().SetClass( orig_parent.GetClass() );
836  }
837 
838  // remember the descriptors and annots that we want to add later
839 
840  CSeqdesc_CI desc_ci( orig_parent.GetParentEntry(), CSeqdesc::e_not_set, 1 );
841  for( ; desc_ci; ++desc_ci ) {
842  vecDescsToAddToNewBioseqSet.push_back(
843  CConstRef<CSeqdesc>(&*desc_ci) );
844  }
845 
846  CSeq_annot_CI annot_ci( orig_parent, CSeq_annot_CI::eSearch_entry );
847  for( ; annot_ci; ++annot_ci ) {
848  vecAnnotsToAddToNewBioseqSet.push_back( annot_ci->GetCompleteSeq_annot() );
849  }
850  }
851 
852  // remove from orig_parent (if any) and add to the new_bioseq_set
853  new_bioseq_set.GetEditHandle().TakeEntry( best_entry_for_bioseq.GetEditHandle() );
854  }
855 
856  // add unique descriptors
857  {
859  // add the descriptors already in the destination
860  CSeqdesc_CI dest_desc_ci( new_bioseq_set.GetParentEntry(), CSeqdesc::e_not_set, 1 );
861  for( ; dest_desc_ci; ++dest_desc_ci ) {
862  descsSeen.insert( CConstRef<CSeqdesc>(&*dest_desc_ci) );
863  }
864 
865  ITERATE( TDescRefVec, src_seqdesc_ref_it, vecDescsToAddToNewBioseqSet ) {
866  if( descsSeen.find(*src_seqdesc_ref_it) != descsSeen.end() ) {
867  // skip because it duplicates an earlier one
868  continue;
869  }
870 
871  CRef<CSeqdesc> pNewDesc( SerialClone(**src_seqdesc_ref_it) );
872  new_bioseq_set.GetEditHandle().AddSeqdesc( *pNewDesc );
873  descsSeen.insert( *src_seqdesc_ref_it );
874  }
875  }
876 
877  // add unique annotations
878  {
880  // add the annots already in the destination
881  CSeq_annot_CI dest_annot_ci( new_bioseq_set, CSeq_annot_CI::eSearch_entry );
882  for( ; dest_annot_ci; ++dest_annot_ci ) {
883  annotsSeen.insert( dest_annot_ci->GetCompleteSeq_annot() );
884  }
885 
886  ITERATE( TAnnotRefVec, src_annot_it, vecAnnotsToAddToNewBioseqSet ) {
887  if( annotsSeen.find(*src_annot_it) != annotsSeen.end() ) {
888  // skip because it duplicates an earlier one
889  continue;
890  }
891 
892  CRef<CSeq_annot> pNewAnnot( SerialClone(**src_annot_it) );
893  new_bioseq_set.GetEditHandle().AttachAnnot( *pNewAnnot );
894  annotsSeen.insert( *src_annot_it );
895  }
896  }
897 
898 
899  if( child_became_parent ) {
900  // get siblings of new_bioseq_set and see if any need to be wrapped into a set
901 
902  // fill in list of siblings
903  vector<CSeq_entry_Handle> siblingVec;
904  {
905  CSeq_entry_CI new_set_sibling_ci( target_parent_h, CSeq_entry_CI::eNonRecursive );
906  // skip the first one, which should equal new_bioseq_set
907  _ASSERT( *new_set_sibling_ci == new_bioseq_set.GetParentEntry() );
908  ++new_set_sibling_ci;
909 
910  for( ; new_set_sibling_ci; ++new_set_sibling_ci ) {
911  siblingVec.push_back(*new_set_sibling_ci);
912  }
913  }
914 
915  bool bNeedsNewSet = false;
916  ITERATE( vector<CSeq_entry_Handle>, sibling_it, siblingVec ) {
917  if( sibling_it->IsSeq() ) {
918  bNeedsNewSet = true;
919  break;
920  }
921 
922  if( sibling_it->IsSet() &&
923  FIELD_EQUALS( sibling_it->GetSet(), Class,
925  {
926  bNeedsNewSet = true;
927  break;
928  }
929  }
930  if( bNeedsNewSet ) {
931  // remaining entries must be put into another set
932  CRef<CSeq_entry> pRemainderEntry( new CSeq_entry );
933  pRemainderEntry->SetSet().SetClass( child_class );
934  CBioseq_set_Handle remainder_bioseq_set =
935  target_parent_h.GetEditHandle().AttachEntry( *pRemainderEntry ).GetSet();
936 
937  ITERATE( vector<CSeq_entry_Handle>, sibling_it, siblingVec ) {
938  if( *sibling_it == remainder_bioseq_set.GetParentEntry() ) {
939  continue;
940  }
941 
942  remainder_bioseq_set.GetEditHandle().TakeEntry( sibling_it->GetEditHandle() );
943  }
944 
945  // take descriptors from parent onto our new remainder set
946  // (we do NOT have to check for uniqueness because remainder_bioseq_set
947  // starts off with no Seq-descrs)
948  CSeq_descr_CI parent_descr_ci(target_parent_h, 1);
949  for( ; parent_descr_ci; ++parent_descr_ci ) {
950  CRef<CSeq_descr> pRemainderSeqDescr( SerialClone(*parent_descr_ci) );
951  remainder_bioseq_set.GetEditHandle().AddSeq_descr( *pRemainderSeqDescr );
952  }
953  // set to genbank set
955  // remove all Seq_descrs
956  target_parent_h.GetEditHandle().ResetDescr();
957  }
958  }
959 
960  s_PromoteSingletonSetsInSet( target_parent_h );
961 }
962 
964  const CSeq_entry_Handle & target,
965  const CScope::TBioseqHandles & bioseq_handles )
966 {
967  if( ! target || ! target.IsSet() ) {
968  // can't split something that's not a bioseq-set
969  return;
970  }
971 
972  CBioseq_set_Handle bioseq_set_h = target.GetSet();
973 
974 
975  // MakeGroupsForUniqueValues
976  s_MakeGroupsForUniqueValues( target, bioseq_handles );
977 
978  // copy bioseq list alignments
979  TVecOfSeqEntryHandles vecOfSeqEntryHandles;
980  CSeq_entry_CI direct_child_ci(bioseq_set_h, CSeq_entry_CI::eNonRecursive);
981  for( ; direct_child_ci; ++direct_child_ci ) {
982  vecOfSeqEntryHandles.push_back(*direct_child_ci);
983  }
984 
985  // For every direct align under each direct child, figure out where it belongs:
986  // See if it belongs to one of the other children or more than one or none
987  // (for none, it should go to *all* of them, and for more than one, it's destroyed )
988  DivvyUpAlignments(vecOfSeqEntryHandles);
989 }
990 
991 // typedefs used by DivvyUpAlignments and its helper functions
993 typedef CMapWithOriginalOrderingIteration< CRef<CSeq_annot>, CSeq_entry_Handle> TMapSeqAnnotToDest;
994 typedef vector<CSeq_annot_Handle> TVecOfSeqAnnotsToErase;
995 
996 typedef vector< CConstRef<CSeq_align> > TAlignVec;
997 typedef CMapWithOriginalOrderingIteration<CSeq_entry_Handle, TAlignVec> TMapEntryToAlignVec;
998 
999 // returns true if any align was changed or deleted
1001  const CSeq_align_Handle & align,
1002  const TMapDescendentToInputEntry & mapDescendentToInputEntry,
1003  TMapEntryToAlignVec & mapEntryToAlignVec)
1004 {
1005  bool bAnyAlignChanged = false;
1006 
1007  CScope & scope = align.GetScope();
1008  const CSeq_entry_Handle & old_input_entry =
1009  align.GetAnnot().GetParentEntry();
1010 
1011  // figure out which entry each dense diag goes into
1012  typedef vector< CConstRef<CDense_diag> > TDenseDiagVec;
1013  typedef map<CSeq_entry_Handle, TDenseDiagVec > TMapEntryToDenseDiags;
1014  // an unset CSeq_entry_Handle as key means to place them "everywhere".
1015  // all non-deleted dense_diags should be in some value somewhere in this map
1016  TMapEntryToDenseDiags mapEntryToDenseDiags;
1018  dendiag_iter,
1019  align.GetSegs().GetDendiag() )
1020  {
1021  CConstRef<CDense_diag> pDendiag = *dendiag_iter;
1022  if( FIELD_EQUALS(*pDendiag, Dim, 2) &&
1023  pDendiag->IsSetIds() && pDendiag->GetIds().size() == 2 )
1024  {
1025  // figure out which input entry this belongs to
1026  // (empty handle for "all")
1027  // If it belongs to multiple input entries,
1028  // then it belongs *nowhere*, and we set bRemoveDendiag to true
1029  CSeq_entry_Handle dest_input_entry;
1030  bool bRemoveDendiag = false;
1031  ITERATE(CDense_diag_Base::TIds, id_iter, pDendiag->GetIds() ) {
1032  CBioseq_Handle bioseq = scope.GetBioseqHandle(**id_iter);
1033  CSeq_entry_Handle bioseqs_entry = (
1034  bioseq ?
1035  bioseq.GetParentEntry() :
1036  CSeq_entry_Handle() );
1037  TMapDescendentToInputEntry::const_iterator find_input_entry_iter = (
1038  bioseqs_entry ?
1039  mapDescendentToInputEntry.find(bioseqs_entry) :
1040  mapDescendentToInputEntry.end() );
1041  if( find_input_entry_iter == mapDescendentToInputEntry.end() ) {
1042  continue;
1043  }
1044 
1045  CSeq_entry_Handle candidate_input_entry =
1046  find_input_entry_iter->second;
1047  _ASSERT(candidate_input_entry);
1048 
1049  // update dest_input_entry based on candidate_input_entry
1050  if( ! dest_input_entry ) {
1051  // not set before, so set it now
1052  dest_input_entry = candidate_input_entry;
1053  } else if( dest_input_entry == candidate_input_entry ) {
1054  // great, matches so far
1055  } else {
1056  // conflict: this align belongs on multiple seq-entries so it
1057  // can't be put anywhere. We note that it should be
1058  // destroyed.
1059  bRemoveDendiag = true;
1060  break;
1061  }
1062  } // <-- ITERATE ids on dendiag
1063 
1064  if( bRemoveDendiag ) {
1065  bAnyAlignChanged = true;
1066  // don't add it to mapEntryToDenseDiags so it will eventually be lost
1067  } else {
1068  if( dest_input_entry != old_input_entry ) {
1069  bAnyAlignChanged = true;
1070  }
1071  mapEntryToDenseDiags[dest_input_entry].push_back(pDendiag);
1072  }
1073  } // <-- if dendiag is of dimensionality 2
1074  else {
1075  // dendiags of other dimensionality stay with same seq-entry
1076  mapEntryToDenseDiags[old_input_entry].push_back(pDendiag);
1077  }
1078  } // <-- ITERATE all dendiags on this alignment
1079 
1080  // first, check if we're in the (hopefully common) easy case of
1081  // "they all move to the same spot"
1082  if( mapEntryToDenseDiags.size() == 1 )
1083  {
1084  CSeq_entry_Handle dest_input_entry =
1085  mapEntryToDenseDiags.begin()->first;
1086  mapEntryToAlignVec[dest_input_entry].push_back( align.GetSeq_align() );
1087  } else {
1088  // each moves to a different spot and some might even be deleted,
1089  // so we will have to copy the original align and break it into pieces
1090 
1091  NON_CONST_ITERATE( TMapEntryToDenseDiags,
1092  entry_to_dendiags_iter,
1093  mapEntryToDenseDiags )
1094  {
1095  const CSeq_entry_Handle & dest_input_entry =
1096  entry_to_dendiags_iter->first;
1097  TDenseDiagVec & dendiag_vec = entry_to_dendiags_iter->second;
1098  _ASSERT( ! dendiag_vec.empty() );
1099 
1100  // copy the alignment for this dest_input_entry
1101  CRef<CSeq_align> pNewAlign( new CSeq_align );
1102  pNewAlign->Assign( *align.GetSeq_align() );
1103  pNewAlign->ResetSegs();
1104 
1105  CSeq_align::C_Segs::TDendiag & new_dendiag_vec =
1106  pNewAlign->SetSegs().SetDendiag();
1107  ITERATE( TDenseDiagVec, dendiag_vec_it, dendiag_vec ) {
1108  CRef<CDense_diag> pNewDendiag( new CDense_diag );
1109  pNewDendiag->Assign( **dendiag_vec_it );
1110  new_dendiag_vec.push_back( pNewDendiag );
1111  }
1112 
1113  mapEntryToAlignVec[dest_input_entry].push_back( pNewAlign );
1114  }
1115 
1116  bAnyAlignChanged = true;
1117  }
1118 
1119  return bAnyAlignChanged;
1120 }
1121 
1122 // returns true if any align was changed or deleted
1124  const CSeq_align_Handle & align,
1125  const TMapDescendentToInputEntry & mapDescendentToInputEntry,
1126  TMapEntryToAlignVec & mapEntryToAlignVec)
1127 {
1128  bool bAnyAlignChanged = false;
1129 
1130  CScope & scope = align.GetScope();
1131  const CSeq_entry_Handle & old_input_entry =
1132  align.GetAnnot().GetParentEntry();
1133 
1134  typedef vector<CDense_seg::TDim> TRowVec; // each element is a row (index into denseg)
1135  typedef map<CSeq_entry_Handle, TRowVec > TMapInputEntryToDensegRows;
1136  // this will map each input_entry to the rows in the denseg that it should use.
1137  // an empty key means "copy to every input entry"
1138  // (every non-deleted row index should have a value somewhere in mapInputEntryToDensegRows)
1139  TMapInputEntryToDensegRows mapInputEntryToDensegRows;
1140 
1141  // figure out what input entry each row belongs to
1142  const CDense_seg::TIds & ids = align.GetSegs().GetDenseg().GetIds();
1143  for(unsigned iRow = 0; iRow < ids.size(); ++iRow) {
1144  CBioseq_Handle id_bioseq = scope.GetBioseqHandle(*ids[iRow]);
1145  CSeq_entry_Handle id_bioseq_entry =
1146  ( id_bioseq ?
1147  id_bioseq.GetParentEntry() :
1148  CSeq_entry_Handle() );
1149  TMapDescendentToInputEntry::const_iterator find_input_entry_iter =
1150  ( id_bioseq_entry ?
1151  mapDescendentToInputEntry.find(id_bioseq_entry) :
1152  mapDescendentToInputEntry.end() );
1153  if( find_input_entry_iter == mapDescendentToInputEntry.end() ) {
1154  // goes to all rows
1155  bAnyAlignChanged = true;
1156  mapInputEntryToDensegRows[CSeq_entry_Handle()].push_back(iRow);
1157  continue;
1158  }
1159 
1160  const CSeq_entry_Handle & id_input_entry =
1161  find_input_entry_iter->second;
1162  _ASSERT(id_input_entry);
1163 
1164  if( id_input_entry != old_input_entry ) {
1165  bAnyAlignChanged = true;
1166  }
1167  mapInputEntryToDensegRows[id_input_entry].push_back(iRow);
1168  }
1169 
1170  if( ! bAnyAlignChanged ) {
1171  // easy case
1172  mapEntryToAlignVec[old_input_entry].push_back( align.GetSeq_align() );
1173  } else {
1174  // each row may end up in a different seq-entry
1175 
1176  ITERATE(TMapInputEntryToDensegRows,
1177  input_entry_to_denseg_it,
1178  mapInputEntryToDensegRows)
1179  {
1180  const CSeq_entry_Handle & dest_input_entry =
1181  input_entry_to_denseg_it->first;
1182  const TRowVec & rowVec = input_entry_to_denseg_it->second;
1183 
1184  // C++ toolkit has a handy function just for this purpose
1185  // (Note that it doesn't copy scores)
1186  CRef<CDense_seg> pNewDenseg =
1187  align.GetSegs().GetDenseg().ExtractRows(rowVec);
1188  CRef<CSeq_align> pNewSeqAlign( new CSeq_align );
1189  pNewSeqAlign->Assign( *align.GetSeq_align() );
1190  pNewSeqAlign->SetSegs().SetDenseg( *pNewDenseg );
1191 
1192  mapEntryToAlignVec[dest_input_entry].push_back( pNewSeqAlign );
1193 
1194  }
1195 
1196  bAnyAlignChanged = true;
1197  }
1198 
1199  return bAnyAlignChanged;
1200 }
1201 
1203  const CSeq_annot_Handle & annot_h,
1204  const TMapDescendentToInputEntry & mapDescendentToInputEntry,
1205  TMapSeqAnnotToDest & mapSeqAnnotToDest,
1206  TVecOfSeqAnnotsToErase & vecOfSeqAnnotToErase )
1207 {
1208  if( ! annot_h.IsAlign() ) {
1209  return;
1210  }
1211 
1212  CConstRef<CSeq_annot> pAnnot = annot_h.GetCompleteSeq_annot();
1213  if( ! pAnnot ) {
1214  // shouldn't happen
1215  return;
1216  }
1217 
1218  // figure out where each align should go. If the key
1219  // is the empty CSeq_entry_Handle, that means it
1220  // goes on all input entries.
1221 
1222  // all non-deleted aligns should be in some value somewhere in this map
1223  TMapEntryToAlignVec mapEntryToAlignVec;
1224 
1225  // any change at all implies that we have to remove annot_h,
1226  // because it's going to be copied (or at least moved)
1227  bool bAnyAlignNeedsChange = false;
1228 
1229  CSeq_entry_Handle old_input_entry = annot_h.GetParentEntry();
1230  CAlign_CI align_ci(annot_h);
1231  for( ; align_ci; ++align_ci) {
1232  CSeq_align_Handle align = align_ci.GetSeq_align_Handle();
1233 
1234  const CSeq_align::TSegs & segs = align.GetSegs();
1235 
1236  if( segs.IsDendiag() ) {
1237 
1239  align, mapDescendentToInputEntry, mapEntryToAlignVec) )
1240  {
1241  bAnyAlignNeedsChange = true;
1242  }
1243 
1244  } else if( segs.IsDenseg() &&
1245  ! RAW_FIELD_IS_EMPTY(segs.GetDenseg(), Ids) )
1246  {
1248  align, mapDescendentToInputEntry, mapEntryToAlignVec) )
1249  {
1250  bAnyAlignNeedsChange = true;
1251  }
1252 
1253  } else {
1254  // other types stay on the same seq-entry
1255  mapEntryToAlignVec[old_input_entry].push_back(align.GetSeq_align());
1256  }
1257  } // <-- ITERATE through alignments on annot
1258 
1259  // check for the (hopefully common) easy case
1260  // where we don't have to move the annot at all
1261  if( ! bAnyAlignNeedsChange ) {
1262  // easy: nothing to do
1263  return;
1264  }
1265 
1266  // use this as a template so we don't have to repeatedly
1267  // copy the whole annot and erase its aligns
1268  CRef<CSeq_annot> pOldAnnotWithNoAligns( new CSeq_annot );
1269  pOldAnnotWithNoAligns->Assign( *annot_h.GetCompleteSeq_annot() );
1270  pOldAnnotWithNoAligns->SetData().SetAlign().clear();
1271 
1272  // for each destination input entry, fill in mapSeqAnnotToDest
1273  // with a copy of seq-annot that just includes the aligns we care about
1274  ITERATE( TMapEntryToAlignVec::TKeyVec,
1275  entry_to_aligns_iter,
1276  mapEntryToAlignVec.GetKeysInOriginalOrder() )
1277  {
1278  const CSeq_entry_Handle & dest_input_entry = *entry_to_aligns_iter;
1279  TAlignVec & aligns_to_copy =
1280  mapEntryToAlignVec.find(dest_input_entry)->second;
1281 
1282  // make copy of annot without aligns, but then
1283  // add the aligns that are relevant to this dest input entry
1284  CRef<CSeq_annot> pNewAnnot( new CSeq_annot );
1285  pNewAnnot->Assign(*pOldAnnotWithNoAligns);
1286  CSeq_annot::C_Data::TAlign & new_aligns =
1287  pNewAnnot->SetData().SetAlign();
1288 
1289  _ASSERT( new_aligns.empty() );
1290 
1291  ITERATE( TAlignVec, align_it, aligns_to_copy ) {
1292  CRef<CSeq_align> pNewAlign( new CSeq_align );
1293  pNewAlign->Assign( **align_it );
1294  new_aligns.push_back( pNewAlign );
1295  }
1296 
1297  mapSeqAnnotToDest[pNewAnnot] = dest_input_entry;
1298  }
1299 
1300  // erase the old annot, since we made a copy
1301  vecOfSeqAnnotToErase.push_back(annot_h);
1302 }
1303 
1304 void DivvyUpAlignments(const TVecOfSeqEntryHandles & vecOfSeqEntryHandles)
1305 {
1306  // create a mapping from all descendents of each member of
1307  // vecOfSeqEntryHandles to that member.
1308  TMapDescendentToInputEntry mapDescendentToInputEntry;
1309  ITERATE(TVecOfSeqEntryHandles, input_entry_iter, vecOfSeqEntryHandles) {
1310  // it maps to itself, of course
1311  mapDescendentToInputEntry[*input_entry_iter] = *input_entry_iter;
1312  CSeq_entry_CI descendent_entry_iter(*input_entry_iter, CSeq_entry_CI::eRecursive);
1313  for(; descendent_entry_iter; ++descendent_entry_iter ) {
1314  mapDescendentToInputEntry[*descendent_entry_iter] = *input_entry_iter;
1315  }
1316  }
1317 
1318  // This mapping will hold the destination of each Seq_annot that
1319  // should be moved. An empty destination handle
1320  // means "copy to all members of vecOfSeqEntryHandles"
1321  // (read that carefully: one code path moves and the other copies.)
1322  TMapSeqAnnotToDest mapSeqAnnotToDest;
1323 
1324  // this holds the Seq-aligns that we will have to destroy.
1325  TVecOfSeqAnnotsToErase vecOfSeqAnnotsToErase;
1326 
1327  ITERATE(TVecOfSeqEntryHandles, input_entry_iter, vecOfSeqEntryHandles) {
1328  const CSeq_entry_Handle & input_entry_h = *input_entry_iter;
1329  CSeq_annot_CI annot_ci(input_entry_h, CSeq_annot_CI::eSearch_entry );
1330  for( ; annot_ci; ++annot_ci ) {
1332  *annot_ci,
1333  mapDescendentToInputEntry,
1334  mapSeqAnnotToDest,
1335  vecOfSeqAnnotsToErase );
1336  }
1337  }
1338 
1339  // do all the moves and copies that were requested
1340  ITERATE(TMapSeqAnnotToDest::TKeyVec,
1341  annot_move_iter,
1342  mapSeqAnnotToDest.GetKeysInOriginalOrder() )
1343  {
1344  CRef<CSeq_annot> pAnnot = *annot_move_iter;
1345  const CSeq_entry_Handle & dest_entry_h = mapSeqAnnotToDest.find(pAnnot)->second;
1346 
1347  // careful: one code path moves and the other copies.
1348  if( dest_entry_h ) {
1349  dest_entry_h.GetEditHandle().AttachAnnot(*pAnnot);
1350  } else {
1351  // if dest_entry_h is invalid, that means to copy
1352  // the annot to all
1353  ITERATE(TVecOfSeqEntryHandles, input_entry_iter, vecOfSeqEntryHandles) {
1354  CRef<CSeq_annot> pAnnotCopy( new CSeq_annot );
1355  pAnnotCopy->Assign(*pAnnot);
1356  input_entry_iter->GetEditHandle().AttachAnnot(*pAnnotCopy);
1357  }
1358  }
1359  }
1360 
1361  // erase the annots that were requested to be deleted
1362  ITERATE( TVecOfSeqAnnotsToErase, annot_iter, vecOfSeqAnnotsToErase ) {
1363  annot_iter->GetEditHandle().Remove();
1364  }
1365 }
1366 
1367 
1369 {
1370  if (set.IsEmptySeq_set()) {
1371  return;
1372  }
1373  CConstRef<CBioseq_set> top_set = set.GetCompleteBioseq_set();
1374  CRef<CSeq_descr> master(new CSeq_descr());
1375  bool first = true;
1376  ITERATE(CBioseq_set::TSeq_set, it, top_set->GetSeq_set()) {
1377  if ((*it)->IsSetDescr()) {
1378  if (first) {
1379  ITERATE(CSeq_descr::Tdata, d, (*it)->GetDescr().Get()) {
1380  if (!(*d)->IsTitle() && !(*d)->IsMolinfo() && !(*d)->IsSource()) {
1381  // add to master list
1382  CRef<CSeqdesc> cpy(new CSeqdesc());
1383  cpy->Assign(**d);
1384  master->Set().push_back(cpy);
1385  }
1386  }
1387  first = false;
1388  } else {
1389  // remove from master any descriptor not on member
1390  CSeq_descr::Tdata::iterator d = master->Set().begin();
1391  while (d != master->Set().end()) {
1392  bool found = false;
1393  ITERATE(CSeq_descr::Tdata, s, (*it)->GetDescr().Get()) {
1394  if ((*d)->Equals(**s)) {
1395  found = true;
1396  break;
1397  }
1398  }
1399  if (found) {
1400  ++d;
1401  } else {
1402  d = master->Set().erase(d);
1403  }
1404  }
1405  }
1406  } else {
1407  master->Reset();
1408  break;
1409  }
1410  }
1411  if (master->IsSet() && !master->Set().empty()) {
1412  // copy each descriptor to master, remove from member
1414  ITERATE(CSeq_descr::Tdata, d, master->Get()) {
1415  // remove from components
1416  ITERATE(CBioseq_set::TSeq_set, it, top_set->GetSeq_set()) {
1417  if ((*it)->IsSeq()) {
1418  CBioseq_Handle bs = set.GetScope().GetBioseqHandle((*it)->GetSeq());
1419  CBioseq_EditHandle bse(bs);
1420  CBioseq::TDescr::Tdata::iterator di = bse.SetDescr().Set().begin();
1421  while (di != bse.SetDescr().Set().end() && !(*di)->Equals(**d)) {
1422  ++di;
1423  }
1424  if (di != bse.SetDescr().Set().end()) {
1425  bse.RemoveSeqdesc(**di);
1426  }
1427  } else if ((*it)->IsSet()) {
1428  CBioseq_set_Handle bss = set.GetScope().GetBioseq_setHandle((*it)->GetSet());
1429  CBioseq_set_EditHandle bsse(bss);
1430  CBioseq_set::TDescr::Tdata::iterator di = bsse.SetDescr().Set().begin();
1431  while (di != bsse.SetDescr().Set().end() && !(*di)->Equals(**d)) {
1432  ++di;
1433  }
1434  if (di != bsse.SetDescr().Set().end()) {
1435  bsse.RemoveSeqdesc(**di);
1436  }
1437  }
1438  }
1439  CRef<CSeqdesc> cpy(new CSeqdesc());
1440  cpy->Assign(**d);
1441  etop.AddSeqdesc(*cpy);
1442  }
1443  }
1444 }
1445 
1446 
1448  const CBioseq_set_Handle & bioseq_set_h,
1449  const vector<CSeqdesc::E_Choice> &choices_to_delete )
1450 {
1451  if( ! bioseq_set_h ) {
1452  return;
1453  }
1454 
1455  // sort it so we can use binary search on it
1456  CSeqdesc_CI::TDescChoices sorted_choices_to_delete = choices_to_delete;
1457  stable_sort( sorted_choices_to_delete.begin(),
1458  sorted_choices_to_delete.end() );
1459 
1460  // retrieve all the CSeqdescs that we will have to copy
1461  // (if a Seqdesc isn't copied into here, it's implicitly
1462  // deleted )
1463  CConstRef<CSeq_descr> pSeqDescrToCopy;
1464  {
1465  // we have this pSeqDescrWithChosenDescs variable because
1466  // we want pSeqDescrToCopy to be protected
1467  // once it's set
1468  CRef<CSeq_descr> pSeqDescrWithChosenDescs( new CSeq_descr );
1469  CSeqdesc_CI desc_ci( bioseq_set_h.GetParentEntry(), CSeqdesc::e_not_set, 1);
1470  for( ; desc_ci; ++desc_ci ) {
1471  if( ! binary_search( sorted_choices_to_delete.begin(),
1472  sorted_choices_to_delete.end(), desc_ci->Which() ) )
1473  {
1474  // not one of the deleted ones, so add it
1475  pSeqDescrWithChosenDescs->Set().push_back(
1476  CRef<CSeqdesc>( SerialClone(*desc_ci) ) );
1477  }
1478  }
1479  pSeqDescrToCopy = pSeqDescrWithChosenDescs;
1480  }
1481 
1482  // copy to all immediate children
1483  CSeq_entry_CI direct_child_ci( bioseq_set_h, CSeq_entry_CI::eNonRecursive );
1484  for( ; direct_child_ci; ++direct_child_ci ) {
1485  CRef<CSeq_descr> pNewDescr( SerialClone(*pSeqDescrToCopy) );
1486  direct_child_ci->GetEditHandle().AddDescr(
1487  *SerialClone(*pSeqDescrToCopy) );
1488  }
1489 
1490  // remove all descs from the parent
1491  bioseq_set_h.GetEditHandle().ResetDescr();
1492 }
1493 
1494 
1496 {
1497  if (NStr::EqualNocase(label, "LocalId")) {
1498  return CSeq_id::e_Local;
1499  } else if (NStr::EqualNocase(label, "DDBJ")) {
1500  return CSeq_id::e_Ddbj;
1501  } else if (NStr::EqualNocase(label, "EMBL")) {
1502  return CSeq_id::e_Embl;
1503  } else if (NStr::EqualNocase(label, "GenBank")) {
1504  return CSeq_id::e_Genbank;
1505  } else if (NStr::EqualNocase(label, "RefSeq")) {
1506  return CSeq_id::e_Other;
1507  } else if (NStr::EqualNocase(label, "General")) {
1508  return CSeq_id::e_General;
1509  } else {
1510  return CSeq_id::e_not_set;
1511  }
1512 }
1513 
1514 
1516 {
1517  switch (choice) {
1518  case CSeq_id::e_Local:
1519  return "LocalId";
1520  break;
1521  case CSeq_id::e_Ddbj:
1522  return "DDBJ";
1523  break;
1524  case CSeq_id::e_Embl:
1525  return "EMBL";
1526  break;
1527  case CSeq_id::e_Genbank:
1528  return "GenBank";
1529  break;
1530  case CSeq_id::e_Other:
1531  return "RefSeq";
1532  break;
1533  case CSeq_id::e_General:
1534  return "General";
1535  break;
1536  default:
1537  return kEmptyStr;
1538  break;
1539  }
1540 }
1541 
1542 
1544 {
1545  string val;
1546  switch (id.Which()) {
1547  case CSeq_id::e_Local:
1548  if (id.GetLocal().IsStr()) {
1549  val = id.GetLocal().GetStr();
1550  } else if (id.GetLocal().IsId()) {
1551  val = NStr::NumericToString(id.GetLocal().GetId());
1552  }
1553  break;
1554  case CSeq_id::e_Ddbj:
1555  case CSeq_id::e_Embl:
1556  case CSeq_id::e_Genbank:
1557  case CSeq_id::e_Other:
1558  case CSeq_id::e_General:
1559  val = id.AsFastaString();
1560  break;
1561  default:
1562  break;
1563  }
1564  return val;
1565 }
1566 
1567 
1569 {
1570  CRef<CUser_field> field;
1571 
1572  string label = LabelFromType(id.Which());
1573  string val = MakeOriginalLabelForId(id);
1574  if (!NStr::IsBlank(label) && !NStr::IsBlank(val)) {
1575  field = new CUser_field();
1576  field->SetLabel().SetStr(label);
1577  field->SetData().SetStr(val);
1578  }
1579  return field;
1580 }
1581 
1582 
1584 {
1585  if (entry.IsSeq()) {
1586  bool need_object = true;
1587  CBioseq& seq = entry.SetSeq();
1588  if (seq.IsSetDescr()) {
1590  if ((*d)->IsUser()
1591  && (*d)->GetUser().GetObjectType() == CUser_object::eObjectType_OriginalId) {
1592  need_object = false;
1593  break;
1594  }
1595  }
1596  }
1597  if (need_object) {
1598  CRef<CUser_object> obj(new CUser_object());
1600  ITERATE(CBioseq::TId, id, entry.GetSeq().GetId()) {
1601  CRef<CUser_field> field = MakeOriginalIdField(**id);
1602  if (field) {
1603  obj->SetData().push_back(field);
1604  }
1605  }
1606  if (obj->IsSetData()) {
1607  CRef<CSeqdesc> desc(new CSeqdesc());
1608  desc->SetUser(*obj);
1609  seq.SetDescr().Set().push_back(desc);
1610  }
1611  }
1612  } else if (entry.IsSet() && entry.GetSet().IsSetSeq_set()) {
1614  AddLocalIdUserObjects(**s);
1615  }
1616  }
1617 }
1618 
1619 
1620 bool IsMatchingIdMissing(const CUser_field& field, const CBioseq::TId& ids)
1621 {
1622  if (!field.IsSetLabel() || !field.GetLabel().IsStr() ||
1623  NStr::IsBlank(field.GetLabel().GetStr()) ||
1624  !field.IsSetData() || !field.GetData().IsStr() ||
1625  NStr::IsBlank(field.GetData().GetStr())) {
1626  return false;
1627  }
1628  bool found = false;
1629  bool any_type = false;
1630  bool found_mismatch = false;
1631 
1632  CSeq_id::E_Choice choice = TypeFromLabel(field.GetLabel().GetStr());
1633  if (choice == CSeq_id::e_not_set) {
1634  return false;
1635  }
1636 
1637  ITERATE(CBioseq::TId, id_it, ids) {
1638  string expected = MakeOriginalLabelForId(**id_it);
1639  if ((*id_it)->Which() == choice) {
1640  any_type = true;
1641  if (NStr::Equal(field.GetData().GetStr(), expected)) {
1642  found = true;
1643  break;
1644  }
1645  } else if ((*id_it)->Which() == CSeq_id::e_Local) {
1646  if (choice == CSeq_id::e_Ddbj && NStr::StartsWith(expected, "dbj_")) {
1647  found_mismatch = true;
1648  }
1649  if (choice == CSeq_id::e_Embl && NStr::StartsWith(expected, "emb_")) {
1650  found_mismatch = true;
1651  }
1652  if (choice == CSeq_id::e_Genbank && NStr::StartsWith(expected, "gb_")) {
1653  found_mismatch = true;
1654  }
1655  if (choice == CSeq_id::e_Other && NStr::StartsWith(expected, "ref_")) {
1656  found_mismatch = true;
1657  }
1658  }
1659  }
1660  if (!found && (any_type || found_mismatch)) {
1661  return true;
1662  } else {
1663  return false;
1664  }
1665 }
1666 
1667 
1668 bool HasRepairedIDs(const CUser_object& user, const CBioseq::TId& ids)
1669 {
1670  bool rval = false;
1671  if (user.IsSetData()) {
1672  for(auto it: user.GetData()) {
1673  if (IsMatchingIdMissing(*it, ids)) {
1674  rval = true;
1675  break;
1676  }
1677  }
1678  }
1679  return rval;
1680 }
1681 
1682 
1683 bool HasRepairedIDs(const CSeq_entry& entry)
1684 {
1685  bool rval = false;
1686  if (entry.IsSeq()) {
1687  const CBioseq& seq = entry.GetSeq();
1688  if (seq.IsSetDescr() && seq.IsSetId()) {
1690  if ((*d)->IsUser()
1691  && (*d)->GetUser().GetObjectType() == CUser_object::eObjectType_OriginalId) {
1692  rval = HasRepairedIDs((*d)->GetUser(), seq.GetId());
1693  if (rval) {
1694  break;
1695  }
1696  }
1697  }
1698  }
1699  } else if (entry.IsSet() && entry.GetSet().IsSetSeq_set()) {
1701  rval = HasRepairedIDs(**s);
1702  if (rval) {
1703  break;
1704  }
1705  }
1706  }
1707  return rval;
1708 }
1709 
1710 
1712 {
1713  if (entry.IsSeq()) {
1714  CBioseq& seq = entry.SetSeq();
1715  EDIT_EACH_SEQDESC_ON_BIOSEQ(desc_it, seq) {
1716  if ((*desc_it)->IsUser() && (*desc_it)->GetUser().GetObjectType() == type) {
1717  ERASE_SEQDESC_ON_BIOSEQ(desc_it, seq);
1718  }
1719  }
1720  if (seq.IsSetDescr() && seq.GetDescr().Get().empty()) {
1721  seq.ResetDescr();
1722  }
1723  }
1724  else if (entry.IsSet() && entry.GetSet().IsSetSeq_set()) {
1725  CBioseq_set& set = entry.SetSet();
1726  EDIT_EACH_SEQDESC_ON_SEQSET(desc_it, set) {
1727  if ((*desc_it)->IsUser() && (*desc_it)->GetUser().GetObjectType() == type) {
1728  ERASE_SEQDESC_ON_SEQSET(desc_it, set);
1729  }
1730  }
1731  for (auto& entry_it : entry.SetSet().SetSeq_set()) {
1732  RemoveUserObjectType(*entry_it, type);
1733  }
1734  }
1735 }
1736 
1737 
1739 {
1740  AddLocalIdUserObjects(entry);
1741  entry.ReassignConflictingIds();
1742  if (!edit::HasRepairedIDs(entry)) {
1744  }
1745 }
1746 
1747 
1748 void s_AddLiteral(CSeq_inst& inst, const string& element)
1749 {
1750  CRef<CDelta_seq> ds(new CDelta_seq());
1751  ds->SetLiteral().SetSeq_data().SetIupacna().Set(element);
1752  ds->SetLiteral().SetLength(TSeqPos(element.length()));
1753 
1754  inst.SetExt().SetDelta().Set().push_back(ds);
1755 }
1756 
1757 
1758 void s_AddGap(CSeq_inst& inst, size_t n_len, bool is_unknown, bool is_assembly_gap = false, int gap_type = CSeq_gap::eType_unknown, int linkage = -1, int linkage_evidence = -1 )
1759 {
1760  CRef<CDelta_seq> gap(new CDelta_seq());
1761  if (is_assembly_gap)
1762  {
1763  gap->SetLiteral().SetSeq_data().SetGap();
1764  gap->SetLiteral().SetSeq_data().SetGap().SetType(gap_type);
1765  if (linkage >= 0)
1766  {
1767  gap->SetLiteral().SetSeq_data().SetGap().SetLinkage(linkage);
1768  }
1769  if (linkage_evidence >= 0)
1770  {
1772  link_ev->SetType(linkage_evidence);
1773  gap->SetLiteral().SetSeq_data().SetGap().SetLinkage_evidence().push_back(link_ev);
1774  }
1775  }
1776  if (is_unknown) {
1777  gap->SetLiteral().SetFuzz().SetLim(CInt_fuzz::eLim_unk);
1778  }
1779  gap->SetLiteral().SetLength(n_len);
1780  inst.SetExt().SetDelta().Set().push_back(gap);
1781 }
1782 
1783 
1784 /// ConvertRawToDeltaByNs
1785 /// A function to convert a raw sequence to a delta sequence, using runs of
1786 /// Ns to determine the gap location. The size of the run of Ns determines
1787 /// whether a gap should be created and whether the gap should be of type
1788 /// known or unknown. Note that if the ranges overlap, unknown gaps will be
1789 /// preferred (allowing the user to create known length gaps for 20-forever,
1790 /// but unknown length gaps for 100, for example).
1791 /// Use a negative number for a maximum to indicate that there is no upper
1792 /// limit.
1793 /// @param inst The Seq-inst to adjust
1794 /// @param min_unknown The minimum number of Ns to be converted to a gap of
1795 /// unknown length
1796 /// @param max_unknown The maximum number of Ns to be converted to a gap of
1797 /// unknown length
1798 /// @param min_known The minimum number of Ns to be converted to a gap of
1799 /// known length
1800 /// @param max_known The maximum number of Ns to be converted to a gap of
1801 /// known length
1802 ///
1803 /// @return none
1805  size_t min_unknown, int max_unknown,
1806  size_t min_known, int max_known,
1807  bool is_assembly_gap, int gap_type, int linkage, int linkage_evidence )
1808 {
1809  // can only convert if starting as raw
1810  if (!inst.IsSetRepr() || inst.GetRepr() != CSeq_inst::eRepr_raw
1811  || !inst.IsSetSeq_data()) {
1812  return;
1813  }
1814 
1815  string iupacna;
1816 
1817  switch(inst.GetSeq_data().Which()) {
1818  case CSeq_data::e_Iupacna:
1819  iupacna = inst.GetSeq_data().GetIupacna();
1820  break;
1821  case CSeq_data::e_Ncbi2na:
1823  0, inst.GetLength(), iupacna, CSeqUtil::e_Iupacna);
1824  break;
1825  case CSeq_data::e_Ncbi4na:
1827  0, inst.GetLength(), iupacna, CSeqUtil::e_Iupacna);
1828  break;
1829  case CSeq_data::e_Ncbi8na:
1831  0, inst.GetLength(), iupacna, CSeqUtil::e_Iupacna);
1832  break;
1833  default:
1834  return;
1835  break;
1836  }
1837 
1838  string element;
1839  size_t n_len = 0;
1840  ITERATE(string, it, iupacna) {
1841  if ((*it) == 'N') {
1842  n_len++;
1843  element += *it;
1844  } else {
1845  if (n_len > 0) {
1846  // decide whether to turn this past run of Ns into a gap
1847  bool is_unknown = false;
1848  bool is_known = false;
1849 
1850  if (n_len >= min_unknown && (max_unknown < 0 || n_len <= max_unknown)) {
1851  is_unknown = true;
1852  } else if (n_len >= min_known && (max_known < 0 || n_len <= max_known)) {
1853  is_known = true;
1854  }
1855  if (is_unknown || is_known) {
1856  // make literal to contain sequence before gap
1857  if (element.length() > n_len) {
1858  element = element.substr(0, element.length() - n_len);
1859  s_AddLiteral(inst, element);
1860  }
1861  s_AddGap(inst, n_len, is_unknown, is_assembly_gap, gap_type, linkage, linkage_evidence);
1862  element = "";
1863  }
1864  n_len = 0;
1865  }
1866  element += *it;
1867  }
1868  }
1869 
1870  if (n_len > 0) {
1871  // decide whether to turn this past run of Ns into a gap
1872  bool is_unknown = false;
1873  bool is_known = false;
1874 
1875  if (n_len >= min_unknown && (max_unknown < 0 || n_len <= max_unknown)) {
1876  is_unknown = true;
1877  } else if (n_len >= min_known && (max_known < 0 || n_len <= max_known)) {
1878  is_known = true;
1879  }
1880  if (is_unknown || is_known) {
1881  // make literal to contain sequence before gap
1882  if (element.length() > n_len) {
1883  element = element.substr(0, element.length() - n_len);
1884  s_AddLiteral(inst, element);
1885  }
1886  s_AddGap(inst, n_len, is_unknown, is_assembly_gap, gap_type, linkage, linkage_evidence);
1887  } else {
1888  s_AddLiteral(inst, element);
1889  }
1890  } else {
1891  s_AddLiteral(inst, element);
1892  }
1893 
1895  inst.ResetSeq_data();
1896 }
1897 
1898 
1899 /// NormalizeUnknownLengthGaps
1900 /// A function to adjust the length of unknown-length gaps to a specific
1901 /// length (100 by default).
1902 /// @param inst The Seq-inst to adjust
1903 ///
1904 /// @return A vector of the adjustments to the sequence,
1905 /// which can be used to fix the locations of features
1906 /// on the sequence.
1908 {
1909  TLocAdjustmentVector changes;
1910 
1911  // can only adjust if starting as delta sequence
1912  if (!inst.IsSetRepr() || inst.GetRepr() != CSeq_inst::eRepr_delta
1913  || !inst.IsSetExt()) {
1914  return changes;
1915  }
1916 
1917  TSeqPos pos = 0;
1918  NON_CONST_ITERATE(CSeq_ext::TDelta::Tdata, it, inst.SetExt().SetDelta().Set()) {
1919  TSeqPos orig_len = 0;
1920  if ((*it)->IsLiteral()) {
1921  if ((*it)->GetLiteral().IsSetLength()) {
1922  orig_len = (*it)->GetLiteral().GetLength();
1923  }
1924  if ((*it)->GetLiteral().IsSetFuzz()
1925  && orig_len != unknown_length
1926  && (!(*it)->GetLiteral().IsSetSeq_data() || (*it)->GetLiteral().GetSeq_data().IsGap())) {
1927 
1928  int diff = unknown_length - orig_len;
1929  (*it)->SetLiteral().SetLength(unknown_length);
1930  changes.push_back(TLocAdjustment(pos, diff));
1931  inst.SetLength(inst.GetLength() + diff);
1932  }
1933  } else if ((*it)->IsLoc()) {
1934  orig_len = (*it)->GetLoc().GetTotalRange().GetLength();
1935  }
1936 
1937  pos += orig_len;
1938  }
1939 
1940  return changes;
1941 }
1942 
1943 
1945  size_t min_unknown, int max_unknown,
1946  size_t min_known, int max_known,
1947  bool is_assembly_gap, int gap_type, int linkage, int linkage_evidence )
1948 {
1949  CRef<CSeq_inst> inst(new CSeq_inst());
1950  inst->Assign(bsh.GetInst());
1951 
1952  ConvertRawToDeltaByNs(*inst, min_unknown, max_unknown, min_known, max_known, is_assembly_gap, gap_type, linkage, linkage_evidence);
1954  CBioseq_EditHandle beh = bsh.GetEditHandle();
1955  beh.SetInst(*inst);
1956 
1957  if (changes.size() > 0) {
1958  for (CFeat_CI f(bsh); f; ++f) {
1959  CRef<CSeq_feat> cpy(new CSeq_feat());
1960  cpy->Assign(*(f->GetSeq_feat()));
1961  TLocAdjustmentVector::reverse_iterator it = changes.rbegin();
1962  bool cut = false;
1963  bool trimmed = false;
1964  while (it != changes.rend() && !cut) {
1965  if (it->second < 0) {
1966  FeatureAdjustForTrim(*cpy, it->first, it->first - it->second + 1, nullptr, cut, trimmed);
1967  } else {
1968  FeatureAdjustForInsert(*cpy, it->first, it->first + it->second - 1, nullptr);
1969  }
1970  it++;
1971  }
1972  CSeq_feat_EditHandle feh(f->GetSeq_feat_Handle());
1973  if (cut) {
1974  feh.Remove();
1975  } else {
1976  feh.Replace(*cpy);
1977  }
1978  }
1979  }
1980 
1981 }
1982 
1983 
1984 /// SetLinkageType
1985 /// A function to set the linkage_type for gaps in a delta sequence.
1986 /// @param ext The Seq_ext to adjust
1987 /// @param linkage_type The linkage_type to use.
1988 ///
1989 /// @return none
1990 void SetLinkageType(CSeq_ext& ext, CSeq_gap::TType linkage_type)
1991 {
1993  if ((*it)->IsLiteral()
1994  && (!(*it)->GetLiteral().IsSetSeq_data() || (*it)->GetLiteral().GetSeq_data().IsGap())) {
1995  CSeq_gap& gap = (*it)->SetLiteral().SetSeq_data().SetGap();
1996  gap.ChangeType(linkage_type);
1997  }
1998  }
1999 }
2000 
2001 
2002 /// SetLinkageTypeScaffold
2003 /// A special case of SetLinkageType. When type is Scaffold, linkage must be
2004 /// linked and linkage evidence must be provided.
2005 /// @param ext The Seq_ext to adjust
2006 /// @param evidence_type The linkage_type to use.
2007 ///
2008 /// @return none
2010 {
2012  if ((*it)->IsLiteral()
2013  && (!(*it)->GetLiteral().IsSetSeq_data() || (*it)->GetLiteral().GetSeq_data().IsGap())) {
2014  CSeq_gap& gap = (*it)->SetLiteral().SetSeq_data().SetGap();
2015  gap.SetLinkageTypeScaffold(evidence_type);
2016  }
2017  }
2018 }
2019 
2020 
2022 {
2024  if ((*it)->IsLiteral()
2025  && (!(*it)->GetLiteral().IsSetSeq_data() || (*it)->GetLiteral().GetSeq_data().IsGap())) {
2026  CSeq_gap& gap = (*it)->SetLiteral().SetSeq_data().SetGap();
2027  gap.SetLinkageTypeLinkedRepeat(evidence_type);
2028  }
2029  }
2030 }
2031 
2032 
2033 /// AddLinkageEvidence
2034 /// A function to add linkage evidence for gaps in a delta sequence.
2035 /// Note that this function will automatically set the linkage to eLinkage_linked.
2036 /// @param ext The Seq_ext to adjust
2037 /// @param evidence_type The evidence type to use.
2038 ///
2039 /// @return none
2041 {
2043  if ((*it)->IsLiteral()
2044  && (!(*it)->GetLiteral().IsSetSeq_data() || (*it)->GetLiteral().GetSeq_data().IsGap())) {
2045  CSeq_gap& gap = (*it)->SetLiteral().SetSeq_data().SetGap();
2046  gap.AddLinkageEvidence(evidence_type);
2047  }
2048  }
2049 }
2050 
2051 
2052 /// ResetLinkageEvidence
2053 /// A function to clear linkage evidence for gaps in a delta sequence.
2054 /// @param ext The Seq_ext to adjust
2055 ///
2056 /// @return none
2058 {
2060  if ((*it)->IsLiteral()
2061  && (!(*it)->GetLiteral().IsSetSeq_data() || (*it)->GetLiteral().GetSeq_data().IsGap())) {
2062  CSeq_gap& gap = (*it)->SetLiteral().SetSeq_data().SetGap();
2063  if (gap.IsSetType() && gap.GetType() == CSeq_gap::eType_repeat) {
2065  } else {
2066  gap.ResetLinkage();
2067  }
2068  gap.ResetLinkage_evidence();
2069  }
2070  }
2071 }
2072 
2073 
2074 /*******************************************************************************
2075 **** HIGH-LEVEL API
2076 ****
2077 **** Trim functions
2078 *******************************************************************************/
2079 
2081  const TCuts& cuts)
2082 {
2083  // Should be a nuc!
2084  if (!bsh.IsNucleotide()) {
2085  NCBI_THROW(CEditException, eInvalid, "Bioseq is not a nucleotide.");
2086  }
2087 
2088  // Cannot get nuc sequence data
2089  if (!bsh.CanGetInst()) {
2090  NCBI_THROW(CEditException, eInvalid, "Cannot get sequence data for nucleotide.");
2091  }
2092 
2093  // Are the cuts within range of sequence length?
2094  TSeqPos nuc_len = 0;
2095  if (bsh.GetInst().CanGetLength()) {
2096  nuc_len = bsh.GetInst().GetLength();
2097  }
2098 
2099  if (nuc_len <= 0) {
2100  stringstream ss;
2101  ss << "Nuc has invalid sequence length = " << nuc_len;
2102  NCBI_THROW(CEditException, eInvalid, ss.str());
2103  }
2104 
2105  TCuts::const_iterator cit;
2106  for (cit = cuts.begin(); cit != cuts.end(); ++cit) {
2107  const TRange& cut = *cit;
2108  TSeqPos cut_from = cut.GetFrom();
2109  TSeqPos cut_to = cut.GetTo();
2110  if (cut_from < 0 || cut_to < 0 || cut_from >= nuc_len || cut_to >= nuc_len) {
2111  stringstream ss;
2112  ss << "Cut location is invalid = [" << cut_from << " - " << cut_to << "]";
2113  NCBI_THROW(CEditException, eInvalid, ss.str());
2114  }
2115  }
2116 }
2117 
2118 static TRange s_GetRetainedRange(const TCuts& sorted_merged_cuts, TSeqPos seqLength)
2119 {
2120  const auto num_cuts = sorted_merged_cuts.size();
2121  _ASSERT(num_cuts==1 || num_cuts==2); // Should only include terminal cuts
2122 
2123  TRange range;
2124  const auto& first_cut = sorted_merged_cuts[0];
2125  if (num_cuts == 1) { // Need to figure out which end was cut
2126  if (first_cut.GetFrom() == 0) {
2127  range.SetFrom(first_cut.GetTo()+1);
2128  range.SetTo(seqLength-1);
2129  }
2130  else {
2131  range.SetFrom(0);
2132  range.SetTo(first_cut.GetFrom()-1);
2133  }
2134  return range;
2135  }
2136 
2137  // num_cuts==2 case:
2138  _ASSERT(first_cut.GetFrom() > 0);
2139  range.SetTo(first_cut.GetFrom()-1);
2140  range.SetFrom(sorted_merged_cuts[1].GetTo()+1);
2141  return range;
2142 }
2143 
2144 
2145 /// Implementation detail: first trim all associated annotation, then
2146 /// trim sequence data
2148  const TCuts& cuts,
2149  EInternalTrimType internal_cut_conversion)
2150 {
2151  // Check the input data for anomalies
2152  s_BasicValidation(bsh, cuts);
2153 
2154  // Sort the cuts
2155  TCuts sorted_cuts;
2156  GetSortedCuts(bsh, cuts, sorted_cuts, internal_cut_conversion);
2157 
2158  // Trim a copy of seq_inst but don't update the original seq_inst just yet.
2159  // Do the update as the last step after trimming all annotation first.
2160  // I need the trimmed seq_inst when I retranslate a protein sequence.
2161  // Make a copy of seq_inst
2162  CRef<CSeq_inst> copy_inst(new CSeq_inst());
2163  copy_inst->Assign(bsh.GetInst());
2164  // Modify the copy of seq_inst
2165  TrimSeqData(bsh, copy_inst, sorted_cuts);
2166 
2167  // Trim Seq-feat annotation
2169  CFeat_CI feat_ci(bsh, feat_sel);
2170  for (; feat_ci; ++feat_ci) {
2171  // Make a copy of the feature
2172  const auto& original_feat = feat_ci->GetOriginalFeature();
2173  CRef<CSeq_feat> copy_feat(new CSeq_feat());
2174  copy_feat->Assign(feat_ci->GetOriginalFeature());
2175 
2176  // Detect complete deletions of feature
2177  bool bFeatureDeleted = false;
2178 
2179  // Detect case where feature was not deleted but merely trimmed
2180  bool bFeatureTrimmed = false;
2181 
2182  // Modify the copy of the feature
2183  bool isPartialStart = false;
2184  bool isPartialStop = false;
2185  TrimSeqFeat(copy_feat, sorted_cuts, bFeatureDeleted, bFeatureTrimmed, isPartialStart, isPartialStop);
2186 
2187  if (bFeatureDeleted) {
2188  // Delete the feature
2189  // If the feature was a cdregion, delete the protein and
2190  // renormalize the nuc-prot set
2192  }
2193  else
2194  if (bFeatureTrimmed) {
2195  // Further modify the copy of the feature
2196 
2197  // If this feat is a Cdregion, then RETRANSLATE the protein
2198  // sequence AND adjust any protein feature
2199  if ( copy_feat->IsSetData() &&
2200  copy_feat->GetData().Which() == CSeqFeatData::e_Cdregion &&
2201  copy_feat->IsSetProduct() )
2202  {
2203  // Get length of nuc sequence before trimming
2204  TSeqPos original_nuc_len = 0;
2205  if (bsh.GetInst().CanGetLength()) {
2206  original_nuc_len = bsh.GetInst().GetLength();
2207  }
2208 
2209  const auto retainedRange = s_GetRetainedRange(sorted_cuts, original_nuc_len);
2210  auto new_frame = sequence::CFeatTrim::GetCdsFrame(original_feat, retainedRange);
2211  copy_feat->SetData().SetCdregion().SetFrame(new_frame);
2212  // Retranslate the coding region using the new nuc sequence
2213  RetranslateCdregion(bsh, isPartialStart, isPartialStop, copy_inst, copy_feat, sorted_cuts);
2214  }
2215 
2216  // Update the original feature with the modified copy
2217  CSeq_feat_EditHandle feat_eh(*feat_ci);
2218  feat_eh.Replace(*copy_feat);
2219  }
2220  }
2221 
2222  // Trim Seq-align annotation
2224  CAlign_CI align_ci(bsh, align_sel);
2225  for (; align_ci; ++align_ci) {
2226  // Only DENSEG type is supported
2227  const CSeq_align& align = *align_ci;
2228  if ( align.CanGetSegs() &&
2230  {
2231  // Make sure mandatory fields are present in the denseg
2232  const CDense_seg& denseg = align.GetSegs().GetDenseg();
2233  if (! (denseg.CanGetDim() && denseg.CanGetNumseg() &&
2234  denseg.CanGetIds() && denseg.CanGetStarts() &&
2235  denseg.CanGetLens()) )
2236  {
2237  continue;
2238  }
2239 
2240  // Make a copy of the alignment
2241  CRef<CSeq_align> copy_align(new CSeq_align());
2242  copy_align->Assign(align_ci.GetOriginalSeq_align());
2243 
2244  // Modify the copy of the alignment
2245  TrimSeqAlign(bsh, copy_align, sorted_cuts);
2246 
2247  // Update the original alignment with the modified copy
2248  align_ci.GetSeq_align_Handle().Replace(*copy_align);
2249  }
2250  }
2251 
2252  // Trim Seq-graph annotation
2254  CGraph_CI graph_ci(bsh, graph_sel);
2255  for (; graph_ci; ++graph_ci) {
2256  // Only certain types of graphs are supported.
2257  // See C Toolkit function GetGraphsProc in api/sqnutil2.c
2258  const CMappedGraph& graph = *graph_ci;
2259  if ( graph.IsSetTitle() &&
2260  (NStr::CompareNocase( graph.GetTitle(), "Phrap Quality" ) == 0 ||
2261  NStr::CompareNocase( graph.GetTitle(), "Phred Quality" ) == 0 ||
2262  NStr::CompareNocase( graph.GetTitle(), "Gap4" ) == 0) )
2263  {
2264  // Make a copy of the graph
2265  CRef<CSeq_graph> copy_graph(new CSeq_graph());
2266  copy_graph->Assign(graph.GetOriginalGraph());
2267 
2268  // Modify the copy of the graph
2269  TrimSeqGraph(bsh, copy_graph, sorted_cuts);
2270 
2271  // Update the original graph with the modified copy
2272  graph.GetSeq_graph_Handle().Replace(*copy_graph);
2273  }
2274  }
2275 
2276  // Last step - trim sequence data by updating the original seq_inst with the
2277  // modified copy
2278  bsh.GetEditHandle().SetInst(*copy_inst);
2279 }
2280 
2281 
2282 /*******************************************************************************
2283 **** LOW-LEVEL API
2284 ****
2285 **** Trim functions divided up into trimming separate distinct objects, i.e.,
2286 **** the sequence data itself and all associated annotation.
2287 ****
2288 **** Used by callers who need access to each edited object so that they can
2289 **** pass these edited objects to a command undo/redo framework, for example.
2290 *******************************************************************************/
2291 
2292 /// Helper functor to compare cuts during sorting
2293 class CRangeCmp
2294 {
2295 public:
2296  enum ESortOrder {
2297  eAscending,
2298  eDescending
2299  };
2300 
2301  explicit CRangeCmp(ESortOrder sortorder = eAscending)
2302  : m_sortorder(sortorder) {};
2303 
2304  bool operator()(const TRange& a1, const TRange& a2)
2305  {
2306  if (m_sortorder == eAscending) {
2307  if (a1.GetTo() == a2.GetTo()) {
2308  // Tiebreaker
2309  return a1.GetFrom() < a2.GetFrom();
2310  }
2311  return a1.GetTo() < a2.GetTo();
2312  }
2313  else {
2314  if (a1.GetTo() == a2.GetTo()) {
2315  // Tiebreaker
2316  return a1.GetFrom() > a2.GetFrom();
2317  }
2318  return a1.GetTo() > a2.GetTo();
2319  }
2320  };
2321 
2322 private:
2324 };
2325 
2326 
2327 /// Assumes sorted_cuts are sorted in Ascending order!
2328 static void s_MergeCuts(TCuts& sorted_cuts)
2329 {
2330  // Merge abutting and overlapping cuts
2331  TCuts::iterator it;
2332  for (it = sorted_cuts.begin(); it != sorted_cuts.end(); ) {
2333  TRange& cut = *it;
2334  TSeqPos to = cut.GetTo();
2335 
2336  // Does next cut exist?
2337  if ( it+1 != sorted_cuts.end() ) {
2338  TRange& next_cut = *(it+1);
2339  TSeqPos next_from = next_cut.GetFrom();
2340  TSeqPos next_to = next_cut.GetTo();
2341 
2342  if ( next_from <= (to + 1) ) {
2343  // Current and next cuts abut or overlap
2344  // So adjust current cut and delete next cut
2345  cut.SetTo(next_to);
2346  sorted_cuts.erase(it+1);
2347 
2348  // Post condition after erase:
2349  // Since "it" is before the erase, "it" stays valid
2350  // and still refers to current cut
2351  }
2352  else {
2353  ++it;
2354  }
2355  }
2356  else {
2357  // I'm done
2358  break;
2359  }
2360  }
2361 }
2362 
2363 
2364 /// Adjust any internal cuts to terminal cuts
2366  TSeqPos seq_length,
2367  EInternalTrimType internal_cut_conversion)
2368 {
2369  for (TCuts::size_type ii = 0; ii < cuts.size(); ++ii) {
2370  TRange& cut = cuts[ii];
2371  TSeqPos from = cut.GetFrom();
2372  TSeqPos to = cut.GetTo();
2373 
2374  // Is it an internal cut?
2375  if (from != 0 && to != seq_length-1) {
2376  if (internal_cut_conversion == eTrimToClosestEnd) {
2377  // Extend the cut to the closest end
2378  if (from - 0 < seq_length-1 - to) {
2379  cut.SetFrom(0);
2380  }
2381  else {
2382  cut.SetTo(seq_length-1);
2383  }
2384  }
2385  else
2386  if (internal_cut_conversion == eTrimTo5PrimeEnd) {
2387  // Extend the cut to 5' end
2388  cut.SetFrom(0);
2389  }
2390  else {
2391  // Extend the cut to 3' end
2392  cut.SetTo(seq_length-1);
2393  }
2394  }
2395  }
2396 }
2397 
2398 
2399 /// 1) Adjust any internal cuts to terminal cuts according to option.
2400 /// 2) Merge abutting and overlapping cuts.
2401 /// 3) Sort the cuts from greatest to least so that sequence
2402 /// data and annotation will be deleted from greatest loc to smallest loc.
2403 /// That way we don't have to adjust coordinate values after
2404 /// each cut.
2406  const TCuts& cuts,
2407  TCuts& sorted_cuts,
2408  EInternalTrimType internal_cut_conversion)
2409 {
2410  if (internal_cut_conversion == eDoNotTrimInternal) {
2411  // Remove internal cuts
2412  for (TCuts::size_type ii = 0; ii < cuts.size(); ++ii) {
2413  const TRange& cut = cuts[ii];
2414  TSeqPos from = cut.GetFrom();
2415  TSeqPos to = cut.GetTo();
2416 
2417  if (from == 0 || to == bsh.GetBioseqLength()-1) {
2418  sorted_cuts.push_back(cut);
2419  }
2420  }
2421  }
2422  else {
2423  sorted_cuts = cuts;
2424  }
2425 
2426  /***************************************************************************
2427  * Adjust internal cuts to terminal cuts
2428  * Merge abutting and overlapping cuts
2429  ***************************************************************************/
2431  sort(sorted_cuts.begin(), sorted_cuts.end(), asc);
2432 
2433  // Adjust internal cuts to terminal cuts
2434  s_AdjustInternalCutLocations(sorted_cuts, bsh.GetBioseqLength(),
2435  internal_cut_conversion);
2436 
2437  // Merge abutting and overlapping cuts
2438  s_MergeCuts(sorted_cuts);
2439 
2440  /***************************************************************************
2441  * Sort the cuts in descending order
2442  ***************************************************************************/
2443  // Sort the ranges from greatest to least so that sequence
2444  // data and annotation will be deleted from greatest loc to smallest loc.
2445  // That way we don't have to adjust coordinate values after
2446  // each delete.
2448  sort(sorted_cuts.begin(), sorted_cuts.end(), descend);
2449 }
2450 
2451 
2452 /// Update sequence length
2454  CBioseq_Handle& complete_bsh,
2455  CSeqMap_CI& seqmap_ci,
2456  CSeq_inst_Base::TLength& new_length)
2457 {
2458  switch (seqmap_ci.GetType()) {
2459  case CSeqMap::eSeqGap:
2460  {
2461  // Sequence gaps
2462  const CSeq_inst_Base::TLength uGapLength = seqmap_ci.GetLength();
2463  const bool bIsLengthKnown = !seqmap_ci.IsUnknownLength();
2464  CConstRef<CSeq_literal> pOriginalGapSeqLiteral =
2465  seqmap_ci.GetRefGapLiteral();
2466  CAutoInitRef<CDelta_seq> pDeltaSeq;
2467  CAutoInitRef<CSeq_literal> pNewGapLiteral;
2468  if (pOriginalGapSeqLiteral) {
2469  pNewGapLiteral->Assign(*pOriginalGapSeqLiteral);
2470  }
2471  if (!bIsLengthKnown) {
2472  pNewGapLiteral->SetFuzz().SetLim(CInt_fuzz::eLim_unk);
2473  }
2474  pNewGapLiteral->SetLength(uGapLength);
2475  pDeltaSeq->SetLiteral(*pNewGapLiteral);
2476  pDeltaExt->Set().push_back(ncbi::Ref(&*pDeltaSeq));
2477  new_length += uGapLength;
2478  }
2479  break;
2480  case CSeqMap::eSeqData:
2481  {
2482  // Sequence data
2483  string new_data;
2484  CSeqVector seqvec(complete_bsh, CBioseq_Handle::eCoding_Iupac);
2485  seqvec.GetSeqData(seqmap_ci.GetPosition(), seqmap_ci.GetEndPosition(),
2486  new_data);
2487  CRef<CSeq_data> pSeqData(new CSeq_data());
2488  pSeqData->SetIupacna().Set(new_data);
2489  CSeqportUtil::Pack(pSeqData);
2490  CAutoInitRef<CDelta_seq> pDeltaSeq;
2491  pDeltaSeq->SetLiteral().SetLength(seqmap_ci.GetLength());
2492  pDeltaSeq->SetLiteral().SetSeq_data(*pSeqData);
2493  pDeltaExt->Set().push_back(ncbi::Ref(&*pDeltaSeq));
2494  new_length += seqmap_ci.GetLength();
2495  }
2496  break;
2497  }
2498 }
2499 
2500 
2501 /// Trim sequence data
2503  CRef<CSeq_inst> inst,
2504  const TCuts& sorted_cuts)
2505 {
2506  // Should be a nuc!
2507  if (!bsh.IsNucleotide()) {
2508  return;
2509  }
2510 
2511  // Add the complete bioseq to scope
2512  CRef<CBioseq> bseq(new CBioseq);
2513  bseq->Assign(*bsh.GetCompleteBioseq());
2514  CScope& scope = bsh.GetTopLevelEntry().GetScope();
2515  CBioseq_Handle complete_bsh = scope.AddBioseq(*bseq);
2516 
2517  // Determine the "good" range sequence coordinates
2518  TSeqPos left_pos = 0;
2519  TSeqPos right_pos = inst->GetLength() - 1;
2520  for (const auto& cut : sorted_cuts) {
2521  if (cut.GetTo() == right_pos) {
2522  right_pos = cut.GetFrom() - 1;
2523  }
2524 
2525  if (cut.GetFrom() == left_pos) {
2526  left_pos = cut.GetTo() + 1;
2527  }
2528  }
2529 
2530  // Create a new Delta-ext
2531  CAutoInitRef<CDelta_ext> pDeltaExt;
2532  CSeqMap_CI seqmap_ci = complete_bsh.GetSeqMap().ResolvedRangeIterator(&complete_bsh.GetScope(),
2533  left_pos,
2534  1 + (right_pos - left_pos),
2536  size_t(-1),
2538 
2539  // exclude leading and trailing gaps, but take all gaps between data elements
2540  // so figure out new boundaries - first and last data elements
2541  CSeqMap_CI seqmap_ci_first, seqmap_ci_last;
2542  for (; seqmap_ci; ++seqmap_ci) {
2543  if (CSeqMap::eSeqData == seqmap_ci.GetType())
2544  {
2545  if (! seqmap_ci_first)
2546  { // empty, initialize first time
2547  seqmap_ci_first = seqmap_ci;
2548  }
2549  else
2550  { // update last on every data element
2551  seqmap_ci_last = seqmap_ci;
2552  }
2553  }
2554  }
2555 
2556  // seqmap_ci_first, seqmap_ci_last are both inclusive
2557  CSeq_inst_Base::TLength new_length = 0;
2558  for (seqmap_ci = seqmap_ci_first; seqmap_ci && seqmap_ci != seqmap_ci_last; ++seqmap_ci) {
2559  UpdateSeqLength(pDeltaExt, complete_bsh, seqmap_ci, new_length);
2560  }
2561  if (seqmap_ci_last) {
2562  UpdateSeqLength(pDeltaExt, complete_bsh, seqmap_ci_last, new_length);
2563  }
2564 
2565  scope.RemoveBioseq(complete_bsh);
2566 
2567  // Update sequence repr, length and data
2568  inst->ResetExt();
2569  inst->ResetSeq_data();
2570  inst->SetLength(new_length);
2571  if (pDeltaExt->Set().size() == 1) {
2572  // Repr raw
2574  CRef<CDelta_seq> pDeltaSeq = *pDeltaExt->Set().begin();
2575  CSeq_data& seq_data = pDeltaSeq->SetLiteral().SetSeq_data();
2576  inst->SetSeq_data(seq_data);
2577  }
2578  else {
2579  // Repr delta
2580  inst->SetExt().SetDelta(*pDeltaExt);
2581  }
2582 }
2583 
2584 
2586  const TCuts& sorted_cuts,
2587  TSeqPos& trim_start,
2588  TSeqPos& trim_stop)
2589 {
2590  // Set defaults
2591  trim_start = 0;
2592  trim_stop = bsh.GetInst().GetLength() - 1;
2593 
2594  // Assumptions :
2595  // All cuts have been sorted. Internal cuts were converted to terminal.
2596  for (TCuts::size_type ii = 0; ii < sorted_cuts.size(); ++ii) {
2597  const TRange& cut = sorted_cuts[ii];
2598  TSeqPos from = cut.GetFrom();
2599  TSeqPos to = cut.GetTo();
2600 
2601  // Left-side terminal cut. Update trim_start if necessary.
2602  if ( from == 0 ) {
2603  if ( trim_start <= to ) {
2604  trim_start = to + 1;
2605  }
2606  }
2607 
2608  // Right-side terminal cut. Update trim_stop if necessary.
2609  if ( to == bsh.GetInst().GetLength() - 1 ) {
2610  if ( trim_stop >= from ) {
2611  trim_stop = from - 1;
2612  }
2613  }
2614  }
2615 }
2616 
2617 
2619  TSeqPos cut_from,
2620  TSeqPos cut_to,
2621  bool& bCompleteCut,
2622  bool& bTrimmed)
2623 {
2624  // These are required fields
2625  if ( !(interval->CanGetFrom() && interval->CanGetTo()) ) {
2626  return;
2627  }
2628 
2629  // Feature location
2630  TSeqPos feat_from = interval->GetFrom();
2631  TSeqPos feat_to = interval->GetTo();
2632 
2633  // Size of the cut
2634  TSeqPos cut_size = cut_to - cut_from + 1;
2635 
2636  // Case 1: feature is located completely before the cut
2637  if (feat_to < cut_from)
2638  {
2639  // Nothing needs to be done - cut does not affect feature
2640  return;
2641  }
2642 
2643  // Case 2: feature is completely within the cut
2644  if (feat_from >= cut_from && feat_to <= cut_to)
2645  {
2646  // Feature should be deleted
2647  bCompleteCut = true;
2648  return;
2649  }
2650 
2651  // Case 3: feature is completely past the cut
2652  if (feat_from > cut_to)
2653  {
2654  // Shift the feature by the cut_size
2655  feat_from -= cut_size;
2656  feat_to -= cut_size;
2657  interval->SetFrom(feat_from);
2658  interval->SetTo(feat_to);
2659  bTrimmed = true;
2660  return;
2661  }
2662 
2663  /***************************************************************************
2664  * Cases below are partial overlapping cases
2665  ***************************************************************************/
2666  // Case 4: Cut is completely inside the feature
2667  // OR
2668  // Cut is to the "left" side of the feature (i.e., feat_from is
2669  // inside the cut)
2670  // OR
2671  // Cut is to the "right" side of the feature (i.e., feat_to is
2672  // inside the cut)
2673  if (feat_to > cut_to) {
2674  // Left side cut or cut is completely inside feature
2675  feat_to -= cut_size;
2676  }
2677  else {
2678  // Right side cut
2679  feat_to = cut_from - 1;
2680  }
2681 
2682  // Take care of the feat_from from the left side cut case
2683  if (feat_from >= cut_from) {
2684  feat_from = cut_to + 1;
2685  feat_from -= cut_size;
2686  }
2687 
2688  interval->SetFrom(feat_from);
2689  interval->SetTo(feat_to);
2690  bTrimmed = true;
2691 }
2692 
2693 
2695  TSeqPos from,
2696  TSeqPos to,
2697  bool& bCompleteCut,
2698  bool& bTrimmed)
2699 {
2700  // Given a seqloc and a range, cut the seqloc
2701 
2702  switch(loc->Which())
2703  {
2704  // Single interval
2705  case CSeq_loc::e_Int:
2706  {
2707  CRef<CSeq_interval> interval(new CSeq_interval);
2708  interval->Assign(loc->GetInt());
2709  s_SeqIntervalDelete(interval, from, to, bCompleteCut, bTrimmed);
2710  loc->SetInt(*interval);
2711  }
2712  break;
2713 
2714  // Multiple intervals
2716  {
2718  intervals->Assign(loc->GetPacked_int());
2719  if (intervals->CanGet()) {
2720  // Process each interval in the list
2721  CPacked_seqint::Tdata::iterator it;
2722  for (it = intervals->Set().begin();
2723  it != intervals->Set().end(); )
2724  {
2725  // Initial value: assume that all intervals
2726  // will be deleted resulting in bCompleteCut = true.
2727  // Later on if any interval is not deleted, then set
2728  // bCompleteCut = false
2729  if (it == intervals->Set().begin()) {
2730  bCompleteCut = true;
2731  }
2732 
2733  bool bDeleted = false;
2734  s_SeqIntervalDelete(*it, from, to, bDeleted, bTrimmed);
2735 
2736  // Should interval be deleted from list?
2737  if (bDeleted) {
2738  it = intervals->Set().erase(it);
2739  }
2740  else {
2741  ++it;
2742  bCompleteCut = false;
2743  }
2744  }
2745 
2746  // Update the original list
2747  loc->SetPacked_int(*intervals);
2748  }
2749  }
2750  break;
2751 
2752  // Multiple seqlocs
2753  case CSeq_loc::e_Mix:
2754  {
2756  mix->Assign(loc->GetMix());
2757  if (mix->CanGet()) {
2758  // Process each seqloc in the list
2759  CSeq_loc_mix::Tdata::iterator it;
2760  for (it = mix->Set().begin();
2761  it != mix->Set().end(); )
2762  {
2763  // Initial value: assume that all seqlocs
2764  // will be deleted resulting in bCompleteCut = true.
2765  // Later on if any seqloc is not deleted, then set
2766  // bCompleteCut = false
2767  if (it == mix->Set().begin()) {
2768  bCompleteCut = true;
2769  }
2770 
2771  bool bDeleted = false;
2772  s_SeqLocDelete(*it, from, to, bDeleted, bTrimmed);
2773 
2774  // Should seqloc be deleted from list?
2775  if (bDeleted) {
2776  it = mix->Set().erase(it);
2777  }
2778  else {
2779  ++it;
2780  bCompleteCut = false;
2781  }
2782  }
2783 
2784  // Update the original list
2785  loc->SetMix(*mix);
2786  }
2787  }
2788  break;
2789 
2790  // Other choices not supported yet
2791  default:
2792  {
2793  }
2794  break;
2795  }
2796 }
2797 
2798 
2800  const TCuts& sorted_cuts)
2801 {
2802  for (TCuts::size_type ii = 0; ii < sorted_cuts.size(); ++ii) {
2803  const TRange& cut = sorted_cuts[ii];
2804  TSeqPos from = cut.GetFrom();
2805  TSeqPos to = cut.GetTo();
2806 
2807  if (graph->CanGetLoc()) {
2809  new_loc->Assign(graph->GetLoc());
2810  bool bDeleted = false;
2811  bool bTrimmed = false;
2812  s_SeqLocDelete(new_loc, from, to, bDeleted, bTrimmed);
2813  graph->SetLoc(*new_loc);
2814  }
2815  }
2816 }
2817 
2818 
2819 /// Trim Seq-graph annotation
2821  CRef<CSeq_graph> graph,
2822  const TCuts& sorted_cuts)
2823 {
2824  // Get range that original seqgraph data covers
2825  TSeqPos graph_start = graph->GetLoc().GetStart(eExtreme_Positional);
2826  TSeqPos graph_stop = graph->GetLoc().GetStop(eExtreme_Positional);
2827 
2828  // Get range of trimmed sequence
2829  TSeqPos trim_start;
2830  TSeqPos trim_stop;
2831  s_GetTrimCoordinates(bsh, sorted_cuts, trim_start, trim_stop);
2832 
2833  // Determine range over which to copy seqgraph data from old to new
2834  TSeqPos copy_start = graph_start;
2835  if (trim_start > graph_start) {
2836  copy_start = trim_start;
2837  }
2838  TSeqPos copy_stop = graph_stop;
2839  if (trim_stop < graph_stop) {
2840  copy_stop = trim_stop;
2841  }
2842 
2843  // Copy over seqgraph data values. Handle BYTE type only (see
2844  // C Toolkit's GetGraphsProc function in api/sqnutil2.c)
2845  CSeq_graph::TGraph& dst_data = graph->SetGraph();
2846  if (dst_data.IsByte()) {
2847  // Keep original min, max, axis
2848 
2849  // Copy start/stop values are relative to bioseq coordinate system.
2850  // Change them so that they are relative to the BYTE values container.
2851  copy_start -= graph_start;
2852  copy_stop -= graph_start;
2853 
2854  // Update data values via
2855  // 1) copy over the new range to another container
2856  // 2) swap
2857  CByte_graph::TValues subset;
2858  subset.assign(dst_data.GetByte().GetValues().begin() + copy_start,
2859  dst_data.GetByte().GetValues().begin() + copy_stop + 1);
2860  dst_data.SetByte().SetValues().swap(subset);
2861 
2862  // Update numvals
2863  graph->SetNumval(copy_stop - copy_start + 1);
2864 
2865  // Update seqloc
2866  s_UpdateSeqGraphLoc(graph, sorted_cuts);
2867  }
2868 }
2869 
2870 
2871 bool s_FindSegment(const CDense_seg& denseg,
2873  TSeqPos pos,
2874  CDense_seg::TNumseg& seg,
2875  TSeqPos& seg_start)
2876 {
2877  for (seg = 0; seg < denseg.GetNumseg(); ++seg) {
2878  TSignedSeqPos start = denseg.GetStarts()[seg * denseg.GetDim() + row];
2879  TSignedSeqPos len = denseg.GetLens()[seg];
2880  if (start != -1) {
2881  if (pos >= start && pos < start + len) {
2882  seg_start = start;
2883  return true;
2884  }
2885  }
2886  }
2887  return false;
2888 }
2889 
2890 
2893  TSeqPos pos)
2894 {
2895  // Find the segment where pos occurs for the sequence (identified by
2896  // row).
2897  // If pos is not the start of the segment, cut the segment in two, with
2898  // one of the segments using pos as the new start.
2899 
2900 
2901  // Find the segment where pos lies
2902  const CDense_seg& denseg = align->GetSegs().GetDenseg();
2903  CDense_seg::TNumseg foundseg;
2904  TSeqPos seg_start;
2905  if ( !s_FindSegment(denseg, row, pos, foundseg, seg_start) ) {
2906  return;
2907  }
2908 
2909  // Found our segment seg
2910  // If pos falls on segment boundary, do nothing
2911  if (pos == seg_start) {
2912  return;
2913  }
2914 
2915 
2916  // Cut the segment :
2917  // 1) Allocate a new denseg with numseg size = original size + 1
2918  // 2) Copy elements before the cut
2919  // 3) Split segment at pos
2920  // 4) Copy elements after the cut
2921  // 5) Replace old denseg with new denseg
2922 
2923  // Allocate a new denseg with numseg size = original size + 1
2924  CRef<CDense_seg> new_denseg(new CDense_seg);
2925  new_denseg->SetDim( denseg.GetDim() );
2926  new_denseg->SetNumseg( denseg.GetNumseg() + 1 );
2927  ITERATE( CDense_seg::TIds, idI, denseg.GetIds() ) {
2928  CSeq_id *si = new CSeq_id;
2929  si->Assign(**idI);
2930  new_denseg->SetIds().push_back( CRef<CSeq_id>(si) );
2931  }
2932 
2933  // Copy elements (starts, lens, strands) before the cut (up to and including
2934  // foundseg-1 in original denseg)
2935  for (CDense_seg::TNumseg curseg = 0; curseg < foundseg; ++curseg) {
2936  // Copy starts
2937  for (CDense_seg::TDim curdim = 0; curdim < denseg.GetDim(); ++curdim) {
2938  TSeqPos index = curseg * denseg.GetDim() + curdim;
2939  new_denseg->SetStarts().push_back( denseg.GetStarts()[index] );
2940  }
2941 
2942  // Copy lens
2943  new_denseg->SetLens().push_back( denseg.GetLens()[curseg] );
2944 
2945  // Copy strands
2946  if ( denseg.IsSetStrands() ) {
2947  for (CDense_seg::TDim curdim = 0; curdim < denseg.GetDim();
2948  ++curdim)
2949  {
2950  TSeqPos index = curseg * denseg.GetDim() + curdim;
2951  new_denseg->SetStrands().push_back(denseg.GetStrands()[index]);
2952  }
2953  }
2954  }
2955 
2956  // Split segment at pos
2957  // First find the lengths of the split segments, first_len and second_len
2958  TSeqPos first_len, second_len;
2959  TSeqPos index = foundseg * denseg.GetDim() + row;
2960  if ( !denseg.IsSetStrands() || denseg.GetStrands()[index] != eNa_strand_minus )
2961  {
2962  first_len = pos - seg_start;
2963  second_len = denseg.GetLens()[foundseg] - first_len;
2964  }
2965  else {
2966  second_len = pos - seg_start;
2967  first_len = denseg.GetLens()[foundseg] - second_len;
2968  }
2969 
2970  // Set starts, strands, and lens for the split segments (foundseg and foundseg+1)
2971  // Populate foundseg in new denseg
2972  for (CDense_seg::TDim curdim = 0; curdim < denseg.GetDim(); ++curdim) {
2973  TSeqPos index = foundseg * denseg.GetDim() + curdim;
2974  if (denseg.GetStarts()[index] == -1) {
2975  new_denseg->SetStarts().push_back(-1);
2976  }
2977  else if (!denseg.IsSetStrands() || denseg.GetStrands()[index] != eNa_strand_minus) {
2978  new_denseg->SetStarts().push_back(denseg.GetStarts()[index]);
2979  }
2980  else {
2981  new_denseg->SetStarts().push_back(denseg.GetStarts()[index] + second_len);
2982  }
2983 
2984  if (denseg.IsSetStrands()) {
2985  new_denseg->SetStrands().push_back(denseg.GetStrands()[index]);
2986  }
2987  }
2988  new_denseg->SetLens().push_back(first_len);
2989  // Populate foundseg+1 in new denseg
2990  for (CDense_seg::TDim curdim = 0; curdim < denseg.GetDim(); ++curdim) {
2991  TSeqPos index = foundseg * denseg.GetDim() + curdim;
2992  if (denseg.GetStarts()[index] == -1) {
2993  new_denseg->SetStarts().push_back(-1);
2994  }
2995  else if (!denseg.IsSetStrands() || denseg.GetStrands()[index] != eNa_strand_minus) {
2996  new_denseg->SetStarts().push_back(denseg.GetStarts()[index] + first_len);
2997  }
2998  else {
2999  new_denseg->SetStarts().push_back(denseg.GetStarts()[index]);
3000  }
3001 
3002  if (denseg.IsSetStrands()) {
3003  new_denseg->SetStrands().push_back(denseg.GetStrands()[index]);
3004  }
3005  }
3006  new_denseg->SetLens().push_back(second_len);
3007 
3008  // Copy elements (starts, lens, strands) after the cut (starting from foundseg+1 in
3009  // original denseg)
3010  for (CDense_seg::TNumseg curseg = foundseg+1; curseg < denseg.GetNumseg(); ++curseg) {
3011  // Copy starts
3012  for (CDense_seg::TDim curdim = 0; curdim < denseg.GetDim(); ++curdim) {
3013  TSeqPos index = curseg * denseg.GetDim() + curdim;
3014  new_denseg->SetStarts().push_back( denseg.GetStarts()[index] );
3015  }
3016 
3017  // Copy lens
3018  new_denseg->SetLens().push_back( denseg.GetLens()[curseg] );
3019 
3020  // Copy strands
3021  if ( denseg.IsSetStrands() ) {
3022  for (CDense_seg::TDim curdim = 0; curdim < denseg.GetDim();
3023  ++curdim)
3024  {
3025  TSeqPos index = curseg * denseg.GetDim() + curdim;
3026  new_denseg->SetStrands().push_back(denseg.GetStrands()[index]);
3027  }
3028  }
3029  }
3030 
3031  // Update
3032  align->SetSegs().SetDenseg(*new_denseg);
3033 }
3034 
3035 
3036 /// Trim Seq-align annotation
3038  CRef<CSeq_align> align,
3039  const TCuts& sorted_cuts)
3040 {
3041  // Assumption: only DENSEG type is supported so caller should
3042  // ensure only denseg alignments are passed in.
3043  const CDense_seg& denseg = align->GetSegs().GetDenseg();
3044 
3045  // On which "row" of the denseg does the bsh seqid lie?
3046  const CDense_seg::TIds& ids = denseg.GetIds();
3047  CDense_seg::TDim row = -1;
3048  for (CDense_seg::TIds::size_type rr = 0; rr < ids.size(); ++rr) {
3049  if (ids[rr]->Match( *(bsh.GetSeqId()) )) {
3050  row = rr;
3051  break;
3052  }
3053  }
3054  if ( row < 0 || !denseg.CanGetDim() || row >= denseg.GetDim() ) {
3055  return;
3056  }
3057 
3058  // Make the cuts
3059  for (TCuts::size_type ii = 0; ii < sorted_cuts.size(); ++ii) {
3060  const TRange& cut = sorted_cuts[ii];
3061  TSeqPos cut_from = cut.GetFrom();
3062  TSeqPos cut_to = cut.GetTo();
3063 
3064  TSeqPos cut_len = cut_to - cut_from + 1;
3065  if (cut_to < cut_from) {
3066  cut_len = cut_from - cut_to + 1;
3067  cut_from = cut_to;
3068  }
3069 
3070  // Note: row is 0-based
3071 
3072  // May need to cut the segment at both start and stop positions
3073  // if they do not fall on segment boundaries
3074  s_CutDensegSegment(align, row, cut_from);
3075  s_CutDensegSegment(align, row, cut_from + cut_len);
3076 
3077  // Update segment start values for the trimmed sequence row
3078  const CDense_seg& denseg = align->GetSegs().GetDenseg();
3079  for (CDense_seg::TNumseg curseg = 0; curseg < denseg.GetNumseg(); ++curseg) {
3080  TSeqPos index = curseg * denseg.GetDim() + row;
3081  TSignedSeqPos seg_start = denseg.GetStarts()[index];
3082  if (seg_start < 0) {
3083  // This indicates a gap, no change needed
3084  } else if (TSeqPos(seg_start) < cut_from) {
3085  // This is before the cut, no change needed
3086  } else if (TSeqPos(seg_start) >= cut_from &&
3087  TSeqPos(seg_start) + denseg.GetLens()[curseg] <= cut_from + cut_len) {
3088  // This is in the gap, indicate it with a -1
3089  align->SetSegs().SetDenseg().SetStarts()[index] = -1;
3090  } else {
3091  // This is after the cut - subtract the cut_len
3092  align->SetSegs().SetDenseg().SetStarts()[index] -= cut_len;
3093  }
3094  }
3095  }
3096 }
3097 
3098 
3099 void SetPartial(CSeq_loc& loc, CRef<CSeq_feat> feat, CSeq_loc::TStrand strand, bool partial_start, bool partial_stop)
3100 {
3101  if (strand == eNa_strand_minus) {
3102  swap(partial_start, partial_stop);
3103  }
3104 
3105  if (partial_start) {
3107  }
3108  if (partial_stop) {
3110  }
3111 
3112  if (partial_start || partial_stop) {
3113  feat->SetPartial(true);
3114  }
3115 }
3116 
3117 
3118 /// Trim Seq-feat annotation
3119 void TrimSeqFeat(CRef<CSeq_feat> feat, const TCuts& sorted_cuts, bool& bFeatureDeleted, bool& bFeatureTrimmed, bool& partial_start, bool& partial_stop)
3120 {
3121  for (TCuts::size_type ii = 0; ii < sorted_cuts.size(); ++ii) {
3122  const TRange& cut = sorted_cuts[ii];
3123  TSeqPos from = cut.GetFrom();
3124  TSeqPos to = cut.GetTo();
3125 
3126  // Update Seqloc "feature made from"
3127  if (feat->CanGetLocation()) {
3129  new_location->Assign(feat->GetLocation());
3130 
3131  // check if the cut overlaps feature location, then feature should be marked partial
3132  if (to >= new_location->GetStart(eExtreme_Positional) &&
3133  to < new_location->GetStop(eExtreme_Positional) &&
3134  from <= new_location->GetStart(eExtreme_Positional))
3135  {
3136  partial_start = true;
3137  }
3138  if (from <= new_location->GetStop(eExtreme_Positional) &&
3139  from > new_location->GetStart(eExtreme_Positional) &&
3140  to >= new_location->GetStop(eExtreme_Positional))
3141  {
3142  partial_stop = true;
3143  }
3144  s_SeqLocDelete(new_location, from, to, bFeatureDeleted, bFeatureTrimmed);
3145  feat->SetLocation(*new_location);
3146  if (bFeatureTrimmed) {
3147  auto strand = eNa_strand_unknown;
3148  if (feat->CanGetLocation()) {
3149  strand = feat->GetLocation().GetStrand();
3150  }
3151  SetPartial(feat->SetLocation(), feat, strand, partial_start, partial_stop);
3152  }
3153 
3154  // No need to cut anymore nor update. Feature will be completely deleted.
3155  if (bFeatureDeleted) {
3156  return;
3157  }
3158  }
3159 
3160  // Update Seqloc "product of process"
3161  if (feat->CanGetProduct()) {
3163  new_product->Assign(feat->GetProduct());
3164  bool bProdDeleted = false;
3165  bool bProdTrimmed = false;
3166  s_SeqLocDelete(new_product, from, to, bProdDeleted, bProdTrimmed);
3167  feat->SetProduct(*new_product);
3168  }
3169  }
3170 }
3171 
3172 
3173 /// Secondary function needed after trimming Seq-feat.
3174 /// If the trim completely covers the feature (boolean reference bFeatureDeleted
3175 /// from TrimSeqFeat() returns true), then delete protein sequence and
3176 /// re-normalize nuc-prot set.
3178 {
3179  // First, if the feature is a Cdregion, then delete the protein sequence
3180  CMappedFeat mapped_feat(feat_h);
3181  if ( mapped_feat.IsSetData() &&
3182  mapped_feat.GetData().Which() == CSeqFeatData::e_Cdregion &&
3183  mapped_feat.IsSetProduct() )
3184  {
3185  // Use Cdregion feat.product seqloc to get protein bioseq handle
3186  CBioseq_Handle prot_h =
3187  mapped_feat.GetScope().GetBioseqHandle(mapped_feat.GetProduct());
3188 
3189  // Should be a protein!
3190  if ( prot_h.IsProtein() && !prot_h.IsRemoved() ) {
3191  // Get the protein parent set before you remove the protein
3192  CBioseq_set_Handle bssh = prot_h.GetParentBioseq_set();
3193 
3194  // Delete the protein
3195  CBioseq_EditHandle prot_eh(prot_h);
3196  prot_eh.Remove();
3197 
3198  // If lone nuc remains, renormalize the nuc-prot set
3199  if (bssh && bssh.IsSetClass()
3201  && !bssh.IsEmptySeq_set()
3202  && bssh.GetBioseq_setCore()->GetSeq_set().size() == 1)
3203  {
3204  // Renormalize the lone nuc that's inside the nuc-prot set into
3205  // a nuc bioseq. This call will remove annots/descrs from the
3206  // set and attach them to the seq.
3208  }
3209  }
3210  }
3211 
3212  // Finally, delete the feature
3213  CSeq_feat_EditHandle feat_eh(feat_h);
3214  feat_eh.Remove();
3215 }
3216 
3217 
3218 /// Secondary function needed after trimming Seq-feat.
3219 /// If TrimSeqFeat()'s bFeatureTrimmed returns true, then adjust cdregion frame.
3220 void AdjustCdregionFrame(TSeqPos original_nuc_len,
3221  CRef<CSeq_feat> cds,
3222  const TCuts& sorted_cuts)
3223 {
3224  // Get partialness and strand of location before cutting
3225  bool bIsPartialStart = false;
3227  if (cds->CanGetLocation()) {
3228  bIsPartialStart = cds->GetLocation().IsPartialStart(eExtreme_Biological);
3229  eStrand = cds->GetLocation().GetStrand();
3230  }
3231 
3232  for (TCuts::size_type ii = 0; ii < sorted_cuts.size(); ++ii) {
3233  const TRange& cut = sorted_cuts[ii];
3234 
3235  TSeqPos from = cut.GetFrom();
3236  TSeqPos to = cut.GetTo();
3237 
3238  // Adjust Seq-feat.data.cdregion frame
3239  if (cds->CanGetData() &&
3241  cds->GetData().IsCdregion())
3242  {
3243  // Make a copy
3244  CRef<CCdregion> new_cdregion(new CCdregion);
3245  new_cdregion->Assign(cds->GetData().GetCdregion());
3246 
3247  // Edit the copy
3248  if ( (eStrand == eNa_strand_minus &&
3249  to == original_nuc_len - 1 &&
3250  bIsPartialStart)
3251  ||
3252  (eStrand != eNa_strand_minus &&
3253  from == 0 &&
3254  bIsPartialStart) )
3255  {
3256  TSeqPos old_frame = new_cdregion->GetFrame();
3257  if (old_frame == 0) {
3258  old_frame = 1;
3259  }
3260 
3261  TSignedSeqPos new_frame = old_frame - ((to - from + 1) % 3);
3262  if (new_frame < 1) {
3263  new_frame += 3;
3264  }
3265  new_cdregion->SetFrame((CCdregion::EFrame)new_frame);
3266  }
3267 
3268  // Update the original
3269  cds->SetData().SetCdregion(*new_cdregion);
3270  }
3271 
3272 
3273  }
3274 }
3275 
3276 
3277 /// Secondary function needed after trimming Seq-feat.
3278 /// If TrimSeqFeat()'s bFeatureTrimmed returns true, then make new protein
3279 /// sequence.
3281  CRef<CSeq_feat> cds,
3282  CRef<CSeq_inst> new_inst)
3283 {
3284  CRef<CBioseq> new_protein_bioseq;
3285  if (new_inst->IsSetSeq_data()) {
3286  // Generate new protein sequence data and length
3287  new_protein_bioseq = CSeqTranslator::TranslateToProtein(*cds, new_scope);
3288  if (!new_protein_bioseq) {
3289  // too short to translate
3290  }
3291  else if (new_protein_bioseq->GetInst().GetSeq_data().IsIupacaa())
3292  {
3293  new_inst->SetSeq_data().SetIupacaa().Set(
3294  new_protein_bioseq->GetInst().GetSeq_data().GetIupacaa().Get());
3295  new_inst->SetLength( new_protein_bioseq->GetInst().GetLength() );
3296  }
3297  else if (new_protein_bioseq->GetInst().GetSeq_data().IsNcbieaa())
3298  {
3299  new_inst->SetSeq_data().SetNcbieaa().Set(
3300  new_protein_bioseq->GetInst().GetSeq_data().GetNcbieaa().Get());
3301  new_inst->SetLength( new_protein_bioseq->GetInst().GetLength() );
3302  }
3303  }
3304  return new_protein_bioseq;
3305 }
3306 
3307 
3308 /// Secondary function needed after trimming Seq-feat.
3309 /// If TrimSeqFeat()'s bFeatureTrimmed returns true, then retranslate cdregion.
3311  bool isPartialStart,
3312  bool isPartialStop,
3313  CRef<CSeq_inst> trimmed_nuc_inst,
3314  CRef<CSeq_feat> cds,
3315  const TCuts& sorted_cuts)
3316 {
3317  if ( cds->IsSetData() &&
3318  cds->GetData().Which() == CSeqFeatData::e_Cdregion &&
3319  cds->IsSetProduct() )
3320  {
3321  // In order to retranslate correctly, we need to create a
3322  // new scope with the trimmed sequence data.
3323 
3324  // Keep track of original seqinst
3325  CRef<CSeq_inst> orig_inst(new CSeq_inst());
3326  orig_inst->Assign(nuc_bsh.GetInst());
3327 
3328  // Update the seqinst to the trimmed version, set the scope
3329  // and retranslate
3330  CBioseq_EditHandle bseh = nuc_bsh.GetEditHandle();
3331  bseh.SetInst(*trimmed_nuc_inst);
3332  CScope& new_scope = bseh.GetScope();
3333 
3334  // Use Cdregion.Product to get handle to protein bioseq
3335  CBioseq_Handle prot_bsh = new_scope.GetBioseqHandle(cds->GetProduct());
3336  if (!prot_bsh.IsProtein()) {
3337  return;
3338  }
3339 
3340  // Make a copy
3341  CRef<CSeq_inst> new_inst(new CSeq_inst());
3342  new_inst->Assign(prot_bsh.GetInst());
3343 
3344  // Edit the copy
3345  CRef<CBioseq> new_protein_bioseq =
3346  SetNewProteinSequence(new_scope, cds, new_inst);
3347  if ( !new_protein_bioseq ) {
3348  return;
3349  }
3350 
3351  // Update the original
3352  CBioseq_EditHandle prot_eh = prot_bsh.GetEditHandle();
3353  prot_eh.SetInst(*new_inst);
3354 
3355  // set molinfo completeness
3356  bool partial5 = cds->GetLocation().IsPartialStart(eExtreme_Biological);
3357  bool partial3 = cds->GetLocation().IsPartialStop(eExtreme_Biological);
3359  if (partial5 && partial3) {
3360  completeness = CMolInfo::eCompleteness_no_ends;
3361  }
3362  else
3363  if (partial5) {
3364  completeness = CMolInfo::eCompleteness_no_left;
3365  }
3366  else
3367  if (partial3) {
3368  completeness = CMolInfo::eCompleteness_no_right;
3369  }
3370  bool found = false;
3371  CBioseq::TDescr::Tdata::iterator it;
3372  for (it = prot_eh.SetDescr().Set().begin(); it != prot_eh.SetDescr().Set().end(); ++it) {
3373  if ((*it)->IsMolinfo()) {
3374  found = true;
3375  break;
3376  }
3377  }
3378  if (found) {
3379  // update existing descr
3380  if (!(*it)->SetMolinfo().IsSetCompleteness() && completeness != CMolInfo::eCompleteness_complete) {
3381  (*it)->SetMolinfo().SetCompleteness(completeness);
3382  }
3383  else
3384  if (!(*it)->SetMolinfo().IsSetCompleteness() && completeness == CMolInfo::eCompleteness_complete) {
3385  // do nothing, complete is implied by there being no completeness flag
3386  }
3387  else
3388  {
3389  (*it)->SetMolinfo().SetCompleteness(completeness);
3390  }
3391  }
3392  else {
3393  if (completeness != CMolInfo::eCompleteness_complete) {
3394  // add new descr
3395  CRef<CSeqdesc> desc(new CSeqdesc);
3397  desc->SetMolinfo().SetCompleteness(completeness);
3398  prot_eh.SetDescr().Set().push_back(desc);
3399  }
3400  }
3401 
3402  // If protein feature exists, update it
3404  CFeat_CI prot_feat_ci(prot_bsh, sel);
3405  for ( ; prot_feat_ci; ++prot_feat_ci ) {
3406  // Make a copy
3407  CRef<CSeq_feat> new_feat(new CSeq_feat());
3408  new_feat->Assign(prot_feat_ci->GetOriginalFeature());
3409 
3410  if ( new_feat->CanGetLocation() &&
3411  new_feat->GetLocation().IsInt() &&
3412  new_feat->GetLocation().GetInt().CanGetTo() )
3413  {
3414  // Edit the copy
3415  new_feat->SetLocation().SetInt().SetTo(
3416  new_protein_bioseq->GetLength() - 1);
3417 
3418  // set partial flag
3419  // protein feat location does not have strand so we have to use cds here
3420  // to get the strand
3421  auto strand = eNa_strand_unknown;
3422  if (cds->CanGetLocation()) {
3423  strand = cds->GetLocation().GetStrand();
3424  }
3425  SetPartial(new_feat->SetLocation(), new_feat, strand, isPartialStart, isPartialStop);
3426 
3427  // Update the original
3428  CSeq_feat_EditHandle prot_feat_eh(*prot_feat_ci);
3429  prot_feat_eh.Replace(*new_feat);
3430  }
3431  }
3432 
3433  // Restore the original seqinst
3434  bseh.SetInst(*orig_inst);
3435  }
3436 }
3437 
3438 /*******************************************************************************
3439 **** LOW-LEVEL API
3440 ****
3441 **** Trim functions
3442 *******************************************************************************/
3443 
3444 
3445 // For Unverified descriptors
3447 {
3448  if (!seq.IsSetDescr()) {
3449  return CRef<CSeqdesc>();
3450  }
3451  for(auto it: seq.GetDescr().Get()) {
3452  if (it->IsUser() && it->GetUser().GetObjectType() == CUser_object::eObjectType_Unverified) {
3453  return it;
3454  }
3455  }
3456  return {};
3457 }
3458 
3459 
3461 {
3462  if (!seq.IsSetDescr()) {
3463  return false;
3464  }
3465  for(auto it: seq.GetDescr().Get()) {
3466  if (it->IsUser() && it->GetUser().IsUnverifiedOrganism()) {
3467  return true;
3468  }
3469  }
3470  return false;
3471 }
3472 
3473 
3475 {
3476  if (!seq.IsSetDescr()) {
3477  return false;
3478  }
3479  for(auto it: seq.GetDescr().Get()) {
3480  if (it->IsUser() && it->GetUser().IsUnverifiedFeature()) {
3481  return true;
3482  }
3483  }
3484  return false;
3485 }
3486 
3488 {
3489  if (!seq.IsSetDescr()) {
3490  return false;
3491  }
3492  for(auto it: seq.GetDescr().Get()) {
3493  if (it->IsUser() && it->GetUser().IsUnverifiedMisassembled()) {
3494  return true;
3495  }
3496  }
3497  return false;
3498 }
3499 
3500 
3502 {
3503  if (!seq.IsSetDescr()) {
3504  return false;
3505  }
3506  for(auto it: seq.GetDescr().Get()) {
3507  if (it->IsUser() && it->GetUser().IsUnverifiedContaminant()) {
3508  return true;
3509  }
3510  }
3511  return false;
3512 }
3513 
3514 
3516 {
3517  descr.Set().sort(CompareSeqdesc());
3518 }
3519 
3521 {
3522  if (entry.IsSetDescr())
3523  SortSeqDescr(entry.SetDescr());
3524  if (entry.IsSet())
3525  for (auto& it: entry.SetSet().SetSeq_set())
3526  {
3527  SortSeqDescr(*it);
3528  }
3529 }
3530 
3531 
3532 // For Targeted Locus Sequences
3533 
3534 const string& GetTargetedLocusName(const CGene_ref& gene)
3535 {
3536  if (gene.IsSetLocus()) {
3537  return gene.GetLocus();
3538  }
3539  else {
3540  return kEmptyStr;
3541  }
3542 }
3543 
3544 
3545 const string& GetTargetedLocusName(const CProt_ref& prot)
3546 {
3547  if (prot.IsSetName() &&
3548  prot.GetName().size() > 0) {
3549  return prot.GetName().front();
3550  }
3551  else {
3552  return kEmptyStr;
3553  }
3554 
3555 }
3556 
3557 
3558 string GetTargetedLocusName(const CRNA_ref& rna)
3559 {
3560  return rna.GetRnaProductName();
3561 }
3562 
3563 
3564 string GetTargetedLocusName(const CSeq_feat& feat)
3565 {
3566  string tln;
3567  if (feat.IsSetData()) {
3568  switch (feat.GetData().Which()) {
3569  case CSeqFeatData::e_Prot:
3570  tln = GetTargetedLocusName(feat.GetData().GetProt());
3571  break;
3572  case CSeqFeatData::e_Gene:
3573  tln = GetTargetedLocusName(feat.GetData().GetGene());
3574  break;
3575  case CSeqFeatData::e_Rna:
3576  tln = GetTargetedLocusName(feat.GetData().GetRna());
3577  break;
3578  case CSeqFeatData::e_Imp:
3579  switch (feat.GetData().GetSubtype()) {
3581  if (feat.IsSetComment()) {
3582  tln = feat.GetComment();
3583  }
3584  break;
3586  if (feat.IsSetQual()) {
3587  for (auto it : feat.GetQual()) {
3588  if (it->IsSetQual() && NStr::EqualNocase(it->GetQual(), "mobile_element_type") && it->IsSetVal()) {
3589  tln = it->GetVal();
3590  size_t pos = NStr::Find(tln, ":");
3591  if (pos != string::npos) {
3592  tln = tln.substr(pos + 1);
3594  }
3595  break;
3596  }
3597  }
3598  }
3599  break;
3600  default:
3601  break;
3602  }
3603  default:
3604  break;
3605  }
3606  }
3607  return tln;
3608 }
3609 
3610 
3611 string GetTargetedLocusName(const CSeq_feat& cds, CScope& scope)
3612 {
3613  string tls;
3614  CConstRef <CSeq_feat> gene_for_feat = sequence::GetGeneForFeature(cds, scope);
3615  if (gene_for_feat) {
3616  tls = GetTargetedLocusName(*gene_for_feat);
3617  }
3618  if (NStr::IsBlank(tls) && cds.IsSetProduct()) {
3620  if (prot) {
3622  if (f) {
3623  tls = GetTargetedLocusName(*(f->GetSeq_feat()));
3624  }
3625  }
3626  }
3627  return tls;
3628 }
3629 
3630 
3632 {
3633  CFeat_CI f(seq);
3634  string tls;
3635  bool quit = false;
3636  while (f && !quit) {
3637  switch (f->GetData().Which()) {
3639  tls = GetTargetedLocusName(*(f->GetSeq_feat()), seq.GetScope());
3640  quit = true;
3641  break;
3642  case CSeqFeatData::e_Gene:
3643  tls = GetTargetedLocusName(f->GetData().GetGene());
3644  break;
3645  case CSeqFeatData::e_Rna:
3646  case CSeqFeatData::e_Imp:
3647  tls = GetTargetedLocusName(*(f->GetSeq_feat()));
3648  quit = true;
3649  break;
3650  default:
3651  break;
3652  }
3653  ++f;
3654  }
3655  return tls;
3656 }
3657 
3658 
3659 void SetTargetedLocusName(CBioseq_Handle seq, const string& tls)
3660 {
3661  bool found = false;
3662  CBioseq_EditHandle bh(seq);
3663  if (bh.GetCompleteBioseq()->IsSetDescr()) {
3665  if ((*it)->IsUser() &&
3666  (*it)->GetUser().GetObjectType() == CUser_object::eObjectType_AutodefOptions) {
3667  CAutoDefOptions* opts = new CAutoDefOptions();
3668  opts->InitFromUserObject((*it)->GetUser());
3669  opts->SetTargetedLocusName(tls);
3670  CRef<CUser_object> new_obj = opts->MakeUserObject();
3671  delete opts;
3672  (*it)->SetUser().Assign(*new_obj);
3673  found = true;
3674  break;
3675  }
3676  }
3677  }
3678  if (!found) {
3679  CAutoDefOptions * opts = new CAutoDefOptions();
3680  opts->SetTargetedLocusName(tls);
3681  CRef<CUser_object> new_obj = opts->MakeUserObject();
3682  delete opts;
3683  CRef<CSeqdesc> new_desc(new CSeqdesc());
3684  new_desc->SetUser().Assign(*new_obj);
3685  bh.SetDescr().Set().push_back(new_desc);
3686  }
3687 }
3688 
3689 
3690 string GetTargetedLocusNameConsensus(const string& tls1, const string& tls2)
3691 {
3692  // This section is used to calculate the parts of a product name that
3693  // are "the same" for use as the name of an alternatively spliced product.
3694  // The common portion of the string must end at a recognized separator,
3695  // such as a space, comma, or dash instead of in the middle of a word.
3696  // The matching portions of the string could occur at the beginning or end
3697  // of the string, or even occasionally at the beginning and end of a
3698  // string, but not as the center of the string with a different beginning
3699  // and ending.
3700  if (NStr::IsBlank(tls1)) {
3701  return tls2;
3702  } else if (NStr::IsBlank(tls2)) {
3703  return tls1;
3704  }
3705 
3706  if (NStr::Equal(tls1, tls2)) {
3707  return tls1;
3708  } else if (NStr::StartsWith(tls1, tls2)) {
3709  return tls2;
3710  } else if (NStr::StartsWith(tls2, tls1)) {
3711  return tls1;
3712  } else if (NStr::EndsWith(tls1, tls2)) {
3713  return tls2;
3714  } else if (NStr::EndsWith(tls2, tls1)) {
3715  return tls1;
3716  }
3717 
3718  vector<string> tokens1;
3719  NStr::Split(tls1, " ", tokens1, NStr::fSplit_Tokenize);
3720  vector<string> tokens2;
3721  NStr::Split(tls2, " ", tokens2, NStr::fSplit_Tokenize);
3722 
3723  size_t t1_pos = 0;
3724  size_t t1_match_start = string::npos;
3725  size_t t1_match_end = 0;
3726  ITERATE(vector<string>, it1, tokens1){
3727  ITERATE(vector<string>, it2, tokens2) {
3728  if (NStr::Equal(*it1, *it2)) {
3729  t1_match_start = t1_pos;
3730  t1_match_end = t1_pos;
3731  ++it1;
3732  ++it2;
3733  while (it1 != tokens1.end() && it2 != tokens2.end() && NStr::Equal(*it1, *it2)) {
3734  ++t1_match_end;
3735  ++it1;
3736  ++it2;
3737  }
3738  break;
3739  }
3740  }
3741  if (t1_match_start != string::npos) {
3742  break;
3743  }
3744  t1_pos++;
3745  }
3746 
3747  if (t1_match_start == string::npos) {
3748  return kEmptyStr;
3749  }
3750 
3751 
3752  size_t start_pos = 0;
3753  string::const_iterator s = tls1.begin();
3754  while (s != tls1.end() && (*s == ' ' || *s == ',' || *s == '-')) {
3755  ++start_pos;
3756  ++s;
3757  }
3758  size_t i = 0;
3759  while (i < t1_match_start) {
3760  start_pos += tokens1[i].length();
3761  for (size_t k = 0; k < tokens1[i].length(); k++) {
3762  ++s;
3763  }
3764  while (s != tls1.end() && (*s == ' ' || *s == ',' || *s == '-')) {
3765  ++start_pos;
3766  ++s;
3767  }
3768  ++i;
3769  }
3770  size_t match_len = tokens1[i].length();
3771  for (size_t k = 0; k < tokens1[i].length(); k++) {
3772  ++s;
3773  }
3774 
3775  ++i;
3776  while (i <= t1_match_end) {
3777  while (s != tls1.end() && (*s == ' ' || *s == ',' || *s == '-')) {
3778  ++match_len;
3779  ++s;
3780  }
3781  match_len += tokens1[i].length();
3782  for (size_t k = 0; k < tokens1[i].length(); k++) {
3783  ++s;
3784  }
3785  ++i;
3786  }
3787 
3788  string consensus = tls1.substr(start_pos, match_len);
3789 
3790  return consensus;
3791 }
3792 
3793 
3794 END_SCOPE(edit)
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CAlign_CI –.
Definition: align_ci.hpp:63
CRef< CUser_object > MakeUserObject() const
void InitFromUserObject(const CUser_object &obj)
void SetTargetedLocusName(const string &tls)
CAutoInitRef<>::
void Set(T *object)
Initialize with an existing object.
CBioseq_EditHandle –.
CBioseq_Handle –.
CBioseq_set_EditHandle –.
CBioseq_set_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
CCdregion –.
Definition: Cdregion.hpp:66
CConstRef –.
Definition: ncbiobj.hpp:1266
CDelta_seq –.
Definition: Delta_seq.hpp:66
CRef< CDense_seg > ExtractRows(const vector< TDim > &rows) const
Extract specified rows of the alignment, in specified order.
Definition: Dense_seg.cpp:837
CFeat_CI –.
Definition: feat_ci.hpp:64
CGraph_CI –.
Definition: graph_ci.hpp:234
CMappedFeat –.
Definition: mapped_feat.hpp:59
CMappedGraph –.
Definition: graph_ci.hpp:61
Definition: Pub.hpp:56
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
Helper functor to compare cuts during sorting.
bool operator()(const TRange &a1, const TRange &a2)
ESortOrder m_sortorder
CRangeCmp(ESortOrder sortorder=eAscending)
CScope –.
Definition: scope.hpp:92
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
ESubtype GetSubtype(void) const
Iterator over CSeqMap.
Definition: seq_map_ci.hpp:252
@ e_Ncbi8na
Definition: sequtil.hpp:52
@ e_Iupacna
Definition: sequtil.hpp:47
@ e_Ncbi4na
Definition: sequtil.hpp:50
@ e_Ncbi2na
Definition: sequtil.hpp:48
CSeqVector –.
Definition: seq_vector.hpp:65
CSeq_annot_CI –.
CSeq_annot_Handle –.
CSeq_descr_CI –.
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
CSeq_entry_CI –.
CSeq_entry_Handle –.
CSeq_entry_Handle –.
CSeq_entry_I –.
Definition: Seq_entry.hpp:56
void SetDescr(CSeq_descr &value)
Definition: Seq_entry.cpp:134
void ReassignConflictingIds(void)
Definition: Seq_entry.cpp:548
bool IsSetDescr(void) const
Definition: Seq_entry.cpp:106
CSeq_ext –.
Definition: Seq_ext.hpp:66
CSeq_feat_EditHandle –.
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
bool AddLinkageEvidence(CLinkage_evidence::TType evidence_type)
Definition: Seq_gap.cpp:123
void ChangeType(TType linkage_type)
Definition: Seq_gap.cpp:79
void SetLinkageTypeLinkedRepeat(CLinkage_evidence::TType evidence_type)
Definition: Seq_gap.cpp:114
void SetLinkageTypeScaffold(CLinkage_evidence::TType evidence_type)
Definition: Seq_gap.cpp:105
bool IsAa(void) const
Definition: Seq_inst.hpp:113
bool IsNa(void) const
Definition: Seq_inst.hpp:106
bool IsEntrys(void) const
Definition: Seq_submit.cpp:54
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static TSeqPos Pack(CSeq_data *in_seq, TSeqPos uLength=ncbi::numeric_limits< TSeqPos >::max())
void SetObjectType(EObjectType obj_type)
EObjectType
Object Type.
@ eObjectType_AutodefOptions
container_type::const_iterator const_iterator
Definition: map.hpp:53
container_type::iterator iterator
Definition: map.hpp:54
container_type::mapped_type & operator[](const key_type &key)
Definition: map.hpp:171
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
container_type::value_type value_type
Definition: map.hpp:52
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const char si[8][64]
Definition: des.c:146
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static const char * expected[]
Definition: bcp.c:42
int offset
Definition: replacements.h:160
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
TPrim & Set(void)
Definition: serialbase.hpp:351
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
Definition: serialbase.hpp:347
C * SerialClone(const C &src)
Create on heap a clone of the source object.
Definition: serialbase.hpp:512
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetMix(TMix &v)
Definition: Seq_loc.hpp:987
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
Definition: Seq_loc.cpp:3280
void SetPartialStop(bool val, ESeqLocExtremes ext)
Definition: Seq_loc.cpp:3313
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
Definition: sequence.cpp:3839
CConstRef< CSeq_feat > GetGeneForFeature(const CSeq_feat &feat, CScope &scope)
Finds gene for feature, but obeys SeqFeatXref directives.
Definition: sequence.cpp:1529
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
void RemoveBioseq(const CBioseq_Handle &seq)
Revoke Bioseq previously added using AddBioseq().
Definition: scope.cpp:382
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
vector< CBioseq_Handle > TBioseqHandles
Definition: scope.hpp:144
CBioseq_set_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
TSeq ConvertSetToSeq(void) const
Do the same as CollapseSet() when sub-entry is of type bioseq.
bool IsNucleotide(void) const
void SetDescr(TDescr &v) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
TClass GetClass(void) const
const TInst_Ext & GetInst_Ext(void) const
CBioseq_set_Handle GetParentBioseq_set(void) const
Return a handle for the parent Bioseq-set, or null handle.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
void SetDescr(TDescr &v) const
CBioseq_EditHandle MoveTo(const CSeq_entry_EditHandle &entry, int index=-1) const
Move current bioseq into seq-entry.
void SetClass(TClass v) const
void AddDescr(TDescr &v) const
void SetInst_Mol(TInst_Mol v) const
bool IsEmptySeq_set(void) const
Check if the bioseq set is empty.
CScope & GetScope(void) const
Get scope this handle belongs to.
const CSeqFeatData & GetData(void) const
bool CanGetInst(void) const
TSeqPos GetBioseqLength(void) const
void Replace(const CSeq_align &new_obj) const
Replace the Seq-align with new Seq-align object.
TSet GetSet(void) const
bool AddSeqdesc(CSeqdesc &d) const
CSeq_entry_EditHandle TakeEntry(const CSeq_entry_EditHandle &entry, int index=-1) const
Remove seq-entry from its location and attach to current one.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
TSet ConvertSeqToSet(TClass set_class=CBioseq_set::eClass_not_set) const
Convert the entry from Bioseq to Bioseq-set.
void TakeAllAnnots(const CSeq_entry_EditHandle &src_entry) const
Remove all the annotation from seq-entry and attach to current one.
void AddSeq_descr(TDescr &v) const
void SetInst_Length(TInst_Length v) const
void Remove(void) const
Remove the feature from Seq-annot.
CConstRef< CSeq_annot > GetCompleteSeq_annot(void) const
Complete and return const reference to the current seq-annot.
TInst_Mol GetInst_Mol(void) const
const CSeq_align::TSegs & GetSegs(void) const
bool IsSetProduct(void) const
CConstRef< CSeq_align > GetSeq_align(void) const
Get const reference to current seq-align.
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
void SetInst(TInst &v) const
void Remove(ERemoveMode mode=eRemoveSeq_entry) const
CBioseq_set_Handle GetParentBioseq_set(void) const
Get parent bioseq-set handle.
TSeq GetSeq(void) const
bool IsRemoved(void) const
Check if handle points to a removed bioseq.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
bool IsProtein(void) const
void SetInst_Repr(TInst_Repr v) const
CBioseq_EditHandle TakeBioseq(const CBioseq_EditHandle &seq, int index=-1) const
Remove bioseq from its location and attach to current one.
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.
TInst_Length GetInst_Length(void) const
CSeq_entry_EditHandle AttachEntry(CSeq_entry &entry, int index=-1) const
Attach an existing seq-entry.
bool IsSetInst_Repr(void) const
bool IsSetClass(void) const
CRef< CSeqdesc > RemoveSeqdesc(const CSeqdesc &d) const
CScope & GetScope(void) const
Get scope this handle belongs to.
TInst_Repr GetInst_Repr(void) const
CRef< CSeqdesc > RemoveSeqdesc(const CSeqdesc &d) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CBioseq_EditHandle TakeBioseq(const CBioseq_EditHandle &seq, int index=-1) const
Remove bioseq from its location and attach to current one.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeq_entry_EditHandle GetParentEntry(void) const
Navigate object tree.
bool IsAlign(void) const
bool IsSet(void) const
void SetInst_Ext(TInst_Ext &v) const
CBioseq_EditHandle AttachBioseq(CBioseq &seq, int index=-1) const
Attach an existing bioseq.
const CSeqMap & GetSeqMap(void) const
Get sequence map.
bool HasParentEntry(void) const
Check if current seq-entry has a parent.
bool IsNa(void) const
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to the seq-annot.
CSeq_entry_EditHandle GetParentEntry(void) const
Navigate object tree.
bool IsSetData(void) const
CConstRef< CBioseq_set > GetBioseq_setCore(void) const
Return core data for the bioseq-set.
void Replace(const CSeq_graph &new_obj) const
Replace the Seq-graph with new Seq-graph object.
bool CanGetInst_Ext(void) const
int GetSeq_entry_Index(const CSeq_entry_Handle &handle) const
const TInst & GetInst(void) const
bool IsSeq(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
TSeqPos GetEndPosition(void) const
return end position of current segment in sequence (exclusive)
Definition: seq_map_ci.hpp:679
const string & GetTitle(void) const
Definition: graph_ci.hpp:112
bool IsSetTitle(void) const
Definition: graph_ci.hpp:108
const CSeq_align & GetOriginalSeq_align(void) const
Get original alignment.
Definition: align_ci.cpp:225
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
const CSeq_loc & GetProduct(void) const
CSeqMap::ESegmentType GetType(void) const
Definition: seq_map_ci.hpp:651
bool IsUnknownLength(void) const
return true if current segment is a gap of unknown length
Definition: seq_map_ci.cpp:302
const CSeq_graph & GetOriginalGraph(void) const
Get original graph with unmapped location/product.
Definition: graph_ci.hpp:70
vector< CSeqdesc::E_Choice > TDescChoices
Definition: seqdesc_ci.hpp:67
CSeq_align_Handle GetSeq_align_Handle(void) const
Get original alignment handle.
Definition: align_ci.cpp:233
TSeqPos GetPosition(void) const
return position of current segment in sequence
Definition: seq_map_ci.hpp:665
TSeqPos GetLength(void) const
return length of current segment
Definition: seq_map_ci.hpp:672
CConstRef< CSeq_literal > GetRefGapLiteral(void) const
return CSeq_literal with gap data, or null if either the segment is not a gap, or an unspecified gap
Definition: seq_map_ci.cpp:292
CSeq_graph_Handle GetSeq_graph_Handle(void) const
Get original graph handle.
Definition: graph_ci.cpp:93
@ eRecursive
Deprecated.
@ eNonRecursive
Deprecated.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
CSeqMap_CI ResolvedRangeIterator(CScope *scope, TSeqPos from, TSeqPos length, ENa_strand strand=eNa_strand_plus, size_t maxResolve=size_t(-1), TFlags flags=fDefaultFlags) const
Iterate segments in the range with specified strand coordinates.
Definition: seq_map.cpp:868
const_iterator begin(void) const
Definition: seq_vector.hpp:298
@ fFindAny
Definition: seq_map.hpp:138
@ fIgnoreUnresolved
Definition: seq_map.hpp:134
@ eSeqData
real sequence data
Definition: seq_map.hpp:98
@ eSeqGap
gap
Definition: seq_map.hpp:97
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:1401
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
#define kMax_Char
Definition: ncbi_limits.h:174
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
static const char label[]
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
const TStr & GetStr(void) const
Get the variant data.
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TData & GetData(void) const
Get the Data member data.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetLabel(void) const
field label Check if a value has been assigned to Label data member.
TData & SetData(void)
Assign a value to Data data member.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
void SetLabel(TLabel &value)
Assign a value to Label data member.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
const TLabel & GetLabel(void) const
Get the Label member data.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
@ eLim_unk
unknown
Definition: Int_fuzz_.hpp:210
TSub & SetSub(void)
Select the variant.
Definition: Pub_.cpp:195
bool CanGetDim(void) const
Check if it is safe to call GetDim method.
Definition: Dense_seg_.hpp:402
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
Definition: Dense_seg_.hpp:568
void ResetSegs(void)
Reset Segs data member.
Definition: Seq_align_.cpp:301
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
bool CanGetNumseg(void) const
Check if it is safe to call GetNumseg method.
Definition: Dense_seg_.hpp:452
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:427
bool CanGetIds(void) const
Check if it is safe to call GetIds method.
Definition: Dense_seg_.hpp:499
vector< CRef< CSeq_id > > TIds
Definition: Dense_seg_.hpp:106
vector< CRef< CSeq_id > > TIds
Definition: Dense_diag_.hpp:93
bool IsDendiag(void) const
Check if variant Dendiag is selected.
Definition: Seq_align_.hpp:720
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
Definition: Seq_align_.hpp:915
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
TStarts & SetStarts(void)
Assign a value to Starts data member.
Definition: Dense_seg_.hpp:536
TStrands & SetStrands(void)
Assign a value to Strands data member.
Definition: Dense_seg_.hpp:586
const TDendiag & GetDendiag(void) const
Get the variant data.
Definition: Seq_align_.hpp:726
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:474
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Dense_seg_.hpp:505
bool CanGetStarts(void) const
Check if it is safe to call GetStarts method.
Definition: Dense_seg_.hpp:524
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
list< CRef< CDense_diag > > TDendiag
Definition: Seq_align_.hpp:194
const TStrands & GetStrands(void) const
Get the Strands member data.
Definition: Dense_seg_.hpp:580
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool CanGetLens(void) const
Check if it is safe to call GetLens method.
Definition: Dense_seg_.hpp:549
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
Definition: Seq_feat_.hpp:1037
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
Definition: Seq_feat_.hpp:1135
E_Choice Which(void) const
Which variant is currently selected.
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void SetPartial(TPartial value)
Assign a value to Partial data member.
Definition: Seq_feat_.hpp:971
void SetProduct(TProduct &value)
Assign a value to Product data member.
Definition: Seq_feat_.cpp:110
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
bool CanGetData(void) const
Check if it is safe to call GetData method.
Definition: Seq_feat_.hpp:919
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
bool CanGetLocation(void) const
Check if it is safe to call GetLocation method.
Definition: Seq_feat_.hpp:1111
const TCdregion & GetCdregion(void) const
Get the variant data.
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
const TComment & GetComment(void) const
Get the Comment member data.
Definition: Seq_feat_.hpp:1049
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
Definition: Seq_feat_.hpp:1090
const TRna & GetRna(void) const
Get the variant data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
TFrom GetFrom(void) const
Get the From member data.
bool CanGetTo(void) const
Check if it is safe to call GetTo method.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
void SetFrom(TFrom value)
Assign a value to From data member.
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93
TTo GetTo(void) const
Get the To member data.
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
const TMix & GetMix(void) const
Get the variant data.
Definition: Seq_loc_.cpp:282
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
bool CanGetFrom(void) const
Check if it is safe to call GetFrom method.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_General
for other databases
Definition: Seq_id_.hpp:105
@ e_Ddbj
DDBJ.
Definition: Seq_id_.hpp:107
@ e_not_set
No variant selected.
Definition: Seq_id_.hpp:94
@ e_Local
local use
Definition: Seq_id_.hpp:95
@ e_Int
from to
Definition: Seq_loc_.hpp:101
vector< char > TValues
Definition: Byte_graph_.hpp:89
TByte & SetByte(void)
Select the variant.
Definition: Seq_graph_.cpp:159
void SetNumval(TNumval value)
Assign a value to Numval data member.
TValues & SetValues(void)
Assign a value to Values data member.
const TByte & GetByte(void) const
Get the variant data.
Definition: Seq_graph_.cpp:153
void SetGraph(TGraph &value)
Assign a value to Graph data member.
Definition: Seq_graph_.cpp:250
bool CanGetLoc(void) const
Check if it is safe to call GetLoc method.
Definition: Seq_graph_.hpp:863
const TValues & GetValues(void) const
Get the Values member data.
bool IsByte(void) const
Check if variant Byte is selected.
Definition: Seq_graph_.hpp:757
void SetLoc(TLoc &value)
Assign a value to Loc data member.
Definition: Seq_graph_.cpp:224
const TLoc & GetLoc(void) const
Get the Loc member data.
Definition: Seq_graph_.hpp:869
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
void SetClass(TClass value)
Assign a value to Class data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_equiv
a set of equivalent maps or seqs
@ eClass_parts
parts for 2 or 3
@ eClass_pop_set
population study
@ eClass_phy_set
phylogenetic study
@ eClass_conset
constructed sequence + parts
@ eClass_wgs_set
whole genome shotgun project
@ eClass_mut_set
set of mutations
@ eClass_pir
converted pir
@ eClass_eco_set
ecological sample study
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
@ eClass_gibb
geninfo backbone
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_pdb_entry
a complete PDB entry
@ eClass_genbank
converted genbank
@ eClass_swissprot
converted SWISSPROT
@ eClass_segset
segmented sequence + parts
@ eClass_pub_set
all the seqs from a single publication
const TIupacaa & GetIupacaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:530
void SetCompleteness(TCompleteness value)
Assign a value to Completeness data member.
Definition: MolInfo_.hpp:600
TRepr GetRepr(void) const
Get the Repr member data.
Definition: Seq_inst_.hpp:565
const TSeg & GetSeg(void) const
Get the variant data.
Definition: Seq_ext_.cpp:114
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
void ResetDescr(void)
Reset Descr data member.
Definition: Bioseq_.cpp:60
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
Definition: Seq_inst_.hpp:805
bool IsNcbieaa(void) const
Check if variant Ncbieaa is selected.
Definition: Seq_data_.hpp:644
void SetPub(TPub &value)
Assign a value to Pub data member.
Definition: Pubdesc_.cpp:72
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
TPub & SetPub(void)
Select the variant.
Definition: Seqdesc_.cpp:362
bool IsIupacaa(void) const
Check if variant Iupacaa is selected.
Definition: Seq_data_.hpp:524
const TIupacna & GetIupacna(void) const
Get the variant data.
Definition: Seq_data_.hpp:510
void SetExt(TExt &value)
Assign a value to Ext data member.
Definition: Seq_inst_.cpp:147
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
Definition: Seq_inst_.hpp:546
TType GetType(void) const
Get the Type member data.
Definition: Seq_gap_.hpp:282
const TPub & GetPub(void) const
Get the variant data.
Definition: Seqdesc_.cpp:356
Tdata & Set(void)
Assign a value to data member.
Definition: Seg_ext_.hpp:170
bool CanGetLength(void) const
Check if it is safe to call GetLength method.
Definition: Seq_inst_.hpp:646
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
Definition: Seq_inst_.hpp:826
list< CRef< CSeq_align > > TAlign
Definition: Seq_annot_.hpp:194
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
TDelta & SetDelta(void)
Select the variant.
Definition: Seq_ext_.cpp:186
Tdata & Set(void)
Assign a value to data member.
Definition: Delta_ext_.hpp:170
TIupacna & SetIupacna(void)
Select the variant.
Definition: Seq_data_.hpp:517
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
const TNcbieaa & GetNcbieaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:650
bool IsDelta(void) const
Check if variant Delta is selected.
Definition: Seq_ext_.hpp:336
void ResetLinkage(void)
Reset Linkage data member.
Definition: Seq_gap_.hpp:322
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
Definition: Seq_gap_.hpp:263
const TExt & GetExt(void) const
Get the Ext member data.
Definition: Seq_inst_.hpp:838
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
Definition: Bioseq_.hpp:303
E_Choice
Choice variants.
Definition: Seqdesc_.hpp:109
bool IsSet(void) const
Check if a value has been assigned to data member.
Definition: Seq_descr_.hpp:154
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
Definition: MolInfo_.hpp:453
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
bool IsPub(void) const
Check if variant Pub is selected.
Definition: Seqdesc_.hpp:1096
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
const TDelta & GetDelta(void) const
Get the variant data.
Definition: Seq_ext_.cpp:180
TSeg & SetSeg(void)
Select the variant.
Definition: Seq_ext_.cpp:120
void ResetLinkage_evidence(void)
Reset Linkage_evidence data member.
Definition: Seq_gap_.cpp:80
const TNcbi4na & GetNcbi4na(void) const
Get the variant data.
Definition: Seq_data_.hpp:570
TUser & SetUser(void)
Select the variant.
Definition: Seqdesc_.cpp:390
void SetRepr(TRepr value)
Assign a value to Repr data member.
Definition: Seq_inst_.hpp:574
const TNcbi2na & GetNcbi2na(void) const
Get the variant data.
Definition: Seq_data_.hpp:550
const Tdata & Get(void) const
Get the member data.
Definition: Delta_ext_.hpp:164
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seqdesc_.hpp:903
bool CanGetExt(void) const
Check if it is safe to call GetExt method.
Definition: Seq_inst_.hpp:832
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
Definition: Bioseq_.hpp:278
Tdata & Set(void)
Assign a value to data member.
Definition: Seq_descr_.hpp:172
void SetLength(TLength value)
Assign a value to Length data member.
Definition: Seq_inst_.hpp:668
list< CRef< CDelta_seq > > Tdata
Definition: Delta_ext_.hpp:89
const Tdata & Get(void) const
Get the member data.
Definition: Seg_ext_.hpp:164
void SetLinkage(TLinkage value)
Assign a value to Linkage data member.
Definition: Seq_gap_.hpp:338
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
Definition: Seq_inst_.cpp:130
void ResetExt(void)
Reset Ext data member.
Definition: Seq_inst_.cpp:142
const TPub & GetPub(void) const
Get the Pub member data.
Definition: Pubdesc_.hpp:605
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
Definition: Seq_inst_.hpp:817
list< CRef< CSeq_loc > > Tdata
Definition: Seg_ext_.hpp:89
const TNcbi8na & GetNcbi8na(void) const
Get the variant data.
Definition: Seq_data_.hpp:590
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Bioseq_.hpp:315
void Reset(void)
Reset data member.
Definition: Seq_descr_.cpp:51
TMolinfo & SetMolinfo(void)
Select the variant.
Definition: Seqdesc_.cpp:594
void ResetSeq_data(void)
Reset Seq_data data member.
Definition: Seq_inst_.cpp:125
TSeqPos TLength
Definition: Seq_inst_.hpp:147
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_data_.hpp:475
@ eRepr_seg
segmented sequence
Definition: Seq_inst_.hpp:95
@ eRepr_delta
sequence made by changes (delta) to others
Definition: Seq_inst_.hpp:100
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ eCompleteness_complete
complete biological entity
Definition: MolInfo_.hpp:156
@ eCompleteness_no_left
missing 5' or NH3 end
Definition: MolInfo_.hpp:158
@ eCompleteness_no_right
missing 3' or COOH end
Definition: MolInfo_.hpp:159
@ eCompleteness_no_ends
missing both ends
Definition: MolInfo_.hpp:160
@ e_Ncbi2na
2 bit nucleic acid code
Definition: Seq_data_.hpp:106
@ e_Iupacna
IUPAC 1 letter nuc acid code.
Definition: Seq_data_.hpp:104
@ e_Ncbi8na
8 bit extended nucleic acid code
Definition: Seq_data_.hpp:108
@ e_Ncbi4na
4 bit nucleic acid code
Definition: Seq_data_.hpp:107
@ e_Embl
EMBL specific information.
Definition: Seqdesc_.hpp:127
@ e_Het
cofactor, etc associated but not bound
Definition: Seqdesc_.hpp:132
@ e_Org
if all from one organism
Definition: Seqdesc_.hpp:116
@ e_Num
a numbering system
Definition: Seqdesc_.hpp:118
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
@ e_Update_date
date of last update
Definition: Seqdesc_.hpp:129
@ e_Pub
a reference to the publication
Definition: Seqdesc_.hpp:122
@ e_Pir
PIR specific info.
Definition: Seqdesc_.hpp:120
@ e_Genbank
GenBank specific info.
Definition: Seqdesc_.hpp:121
@ e_Prf
PRF specific information.
Definition: Seqdesc_.hpp:130
@ e_Mol_type
type of molecule
Definition: Seqdesc_.hpp:111
@ e_Sp
SWISSPROT specific info.
Definition: Seqdesc_.hpp:125
@ e_Dbxref
xref to other databases
Definition: Seqdesc_.hpp:126
@ e_Comment
a more extensive comment
Definition: Seqdesc_.hpp:117
@ e_Method
sequencing method
Definition: Seqdesc_.hpp:113
@ e_Modelev
model evidence for XM records
Definition: Seqdesc_.hpp:135
@ e_Region
overall region (globin locus)
Definition: Seqdesc_.hpp:123
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Modif
modifiers
Definition: Seqdesc_.hpp:112
@ e_Maploc
map location of this sequence
Definition: Seqdesc_.hpp:119
@ e_Create_date
date entry first created/released
Definition: Seqdesc_.hpp:128
@ e_Title
a title for this sequence
Definition: Seqdesc_.hpp:115
@ e_Pdb
PDB specific information.
Definition: Seqdesc_.hpp:131
@ e_not_set
No variant selected.
Definition: Seqdesc_.hpp:110
@ e_Name
a name for this sequence
Definition: Seqdesc_.hpp:114
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
const TCit & GetCit(void) const
Get the Cit member data.
list< CRef< CSeq_entry > > TEntrys
const TEntrys & GetEntrys(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TSub & GetSub(void) const
Get the Sub member data.
bool IsSetSub(void) const
Check if a value has been assigned to Sub data member.
bool IsSetCit(void) const
citation for this submission Check if a value has been assigned to Cit data member.
int i
int len
void FeatureAdjustForInsert(CSeq_feat &feat, TSeqPos from, TSeqPos to, const CSeq_id *seqid)
Definition: loc_edit.cpp:2141
void FeatureAdjustForTrim(CSeq_feat &feat, TSeqPos from, TSeqPos to, const CSeq_id *seqid, bool &bCompleteCut, bool &bAdjusted)
Definition: loc_edit.cpp:2087
vector< CRef< objects::CSeq_align > > TAlignVec
range(_Ty, _Ty) -> range< _Ty >
constexpr auto make_array(T &&a)
constexpr auto sort(_Init &&init)
Definition: fix_pub.hpp:45
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
const struct ncbi::grid::netcache::search::fields::SIZE size
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static void s_AdjustInternalCutLocations(TCuts &cuts, TSeqPos seq_length, EInternalTrimType internal_cut_conversion)
Adjust any internal cuts to terminal cuts.
void TrimSeqAlign(CBioseq_Handle bsh, CRef< CSeq_align > align, const TCuts &sorted_cuts)
Trim Seq-align annotation.
void SetLinkageType(CSeq_ext &ext, CSeq_gap::TType linkage_type)
SetLinkageType A function to set the linkage_type for gaps in a delta sequence.
void DivvyUpAlignments(const TVecOfSeqEntryHandles &vecOfSeqEntryHandles)
Call this if the alignments directly under these seq-entries are all jumbled up between each other.
static CSeq_id * s_MakeUniqueLocalId(void)
vector< CSeq_annot_Handle > TVecOfSeqAnnotsToErase
void AddBioseqToBioseq(const CBioseq_Handle &to, const CBioseq_Handle &add)
Attach one Bioseq to another.
bool HasRepairedIDs(const CUser_object &user, const CBioseq::TId &ids)
bool IsDeltaSeqGap(CConstRef< CDelta_seq > delta)
string MakeOriginalLabelForId(const CSeq_id &id)
void TrimSeqData(CBioseq_Handle bsh, CRef< CSeq_inst > inst, const TCuts &sorted_cuts)
Trim sequence data.
void s_AddLiteral(CSeq_inst &inst, const string &element)
CRef< CSeqdesc > FindUnverified(const CBioseq &seq)
static void s_AddBioseqToBioseq(const CBioseq_EditHandle &to, const CBioseq_EditHandle &add)
void ConvertRawToDeltaByNs(CSeq_inst &inst, size_t min_unknown, int max_unknown, size_t min_known, int max_known, bool is_assembly_gap, int gap_type, int linkage, int linkage_evidence)
ConvertRawToDeltaByNs A function to convert a raw sequence to a delta sequence, using runs of Ns to d...
static void s_AddBioseqToSegset(CBioseq_set_EditHandle &segset, CBioseq_EditHandle &part)
static bool s_DivvyUpAlignments_ProcessAnnot_Dendiag(const CSeq_align_Handle &align, const TMapDescendentToInputEntry &mapDescendentToInputEntry, TMapEntryToAlignVec &mapEntryToAlignVec)
static void s_AddBioseqToNucProtSet(CBioseq_set_EditHandle &nuc_prot, CBioseq_EditHandle &seq)
CMapWithOriginalOrderingIteration< CSeq_entry_Handle, TAlignVec > TMapEntryToAlignVec
void TrimSeqGraph(CBioseq_Handle bsh, CRef< CSeq_graph > graph, const TCuts &sorted_cuts)
Trim Seq-graph annotation.
void GetSortedCuts(CBioseq_Handle bsh, const TCuts &cuts, TCuts &sorted_cuts, EInternalTrimType internal_cut_conversion)
1) Adjust any internal cuts to terminal cuts according to option.
bool Does3primerAbutGap(const CSeq_feat &feat, CBioseq_Handle seq_hl)
void SetTargetedLocusName(CBioseq_Handle seq, const string &tls)
void SortSeqDescr(CSeq_descr &descr)
vector< CConstRef< CSeq_align > > TAlignVec
bool IsUnverifiedMisassembled(const CBioseq &seq)
void AddSeqdescToSeqDescr(const CSeqdesc &desc, CSeq_descr &seq_descr)
static void s_AddBioseqToPartsSet(CBioseq_set_EditHandle &parts, CBioseq_EditHandle &seq)
void AddSeqEntryToSeqEntry(const CSeq_entry_Handle &target, const CSeq_entry_Handle &insert)
Attach one Seq-entry to another.
void AddSeqdescToBioseq(const CSeqdesc &desc, CBioseq &seq)
void AddLocalIdUserObjects(CSeq_entry &entry)
Creates a User-object descriptor on every sequence that has a local ID Contains the original local ID...
CRef< CSeq_entry > SeqEntryFromSeqSubmit(const CSeq_submit &submit)
Create a Seq-entry from a Seq-submit.
void s_AddGap(CSeq_inst &inst, size_t n_len, bool is_unknown, bool is_assembly_gap=false, int gap_type=CSeq_gap::eType_unknown, int linkage=-1, int linkage_evidence=-1)
void ResetLinkageEvidence(CSeq_ext &ext)
ResetLinkageEvidence A function to clear linkage evidence for gaps in a delta sequence.
map< CSeq_entry_Handle, CSeq_entry_Handle > TMapDescendentToInputEntry
bool s_FindSegment(const CDense_seg &denseg, CDense_seg::TDim row, TSeqPos pos, CDense_seg::TNumseg &seg, TSeqPos &seg_start)
CConstRef< CDelta_seq > GetDeltaSeqForPosition(const unsigned pos, const CBioseq_Handle seq_hl, CScope *scope, unsigned &left_endpoint)
void s_BasicValidation(CBioseq_Handle bsh, const TCuts &cuts)
bool IsSeqDescInList(const CSeqdesc &desc, const CSeq_descr &set)
static bool s_DivvyUpAlignments_ProcessAnnot_Denseg(const CSeq_align_Handle &align, const TMapDescendentToInputEntry &mapDescendentToInputEntry, TMapEntryToAlignVec &mapEntryToAlignVec)
void RemoveUserObjectType(CSeq_entry &entry, CUser_object::EObjectType type)
Removes User-object descriptors of a certain type from the seq-entry.
void TrimSequenceAndAnnotation(CBioseq_Handle bsh, const TCuts &cuts, EInternalTrimType internal_cut_conversion)
Implementation detail: first trim all associated annotation, then trim sequence data.
bool IsUnverifiedOrganism(const CBioseq &seq)
static void s_DivvyUpAlignments_ProcessAnnot(const CSeq_annot_Handle &annot_h, const TMapDescendentToInputEntry &mapDescendentToInputEntry, TMapSeqAnnotToDest &mapSeqAnnotToDest, TVecOfSeqAnnotsToErase &vecOfSeqAnnotToErase)
static void s_UpdateSeqGraphLoc(CRef< CSeq_graph > graph, const TCuts &sorted_cuts)
CRef< CBioseq > SetNewProteinSequence(CScope &new_scope, CRef< CSeq_feat > cds, CRef< CSeq_inst > new_inst)
Secondary function needed after trimming Seq-feat.
void SetLinkageTypeScaffold(CSeq_ext &ext, CLinkage_evidence::TType evidence_type)
SetLinkageTypeScaffold A special case of SetLinkageType.
static void s_SeqLocDelete(CRef< CSeq_loc > loc, TSeqPos from, TSeqPos to, bool &bCompleteCut, bool &bTrimmed)
void SetLinkageTypeLinkedRepeat(CSeq_ext &ext, CLinkage_evidence::TType evidence_type)
CSeq_id::E_Choice TypeFromLabel(const string &label)
void DeleteProteinAndRenormalizeNucProtSet(const CSeq_feat_Handle &feat_h)
Secondary function needed after trimming Seq-feat.
bool Does5primerAbutGap(const CSeq_feat &feat, CBioseq_Handle seq_hl)
static void s_MakeGroupsForUniqueValues(const CSeq_entry_Handle &target, const CScope::TBioseqHandles &bioseq_handles)
void SegregateSetsByBioseqList(const CSeq_entry_Handle &target, const CScope::TBioseqHandles &bioseq_handles)
Split a Seq-entry, where the second part holds the given bioseqs.
string GetTargetedLocusNameConsensus(const string &tls1, const string &tls2)
CMapWithOriginalOrderingIteration< CRef< CSeq_annot >, CSeq_entry_Handle > TMapSeqAnnotToDest
void SetPartial(CSeq_loc &loc, CRef< CSeq_feat > feat, CSeq_loc::TStrand strand, bool partial_start, bool partial_stop)
void TrimSeqFeat(CRef< CSeq_feat > feat, const TCuts &sorted_cuts, bool &bFeatureDeleted, bool &bFeatureTrimmed, bool &partial_start, bool &partial_stop)
Trim Seq-feat annotation.
static void s_AddProtToNuc(const CBioseq_EditHandle &nuc, const CBioseq_EditHandle &prot)
void RetranslateCdregion(CBioseq_Handle nuc_bsh, bool isPartialStart, bool isPartialStop, CRef< CSeq_inst > trimmed_nuc_inst, CRef< CSeq_feat > cds, const TCuts &sorted_cuts)
Secondary function needed after trimming Seq-feat.
static void s_SeqIntervalDelete(CRef< CSeq_interval > interval, TSeqPos cut_from, TSeqPos cut_to, bool &bCompleteCut, bool &bTrimmed)
const string & GetTargetedLocusName(const CGene_ref &gene)
bool IsUnverifiedContaminant(const CBioseq &seq)
bool IsUnverifiedFeature(const CBioseq &seq)
void s_CutDensegSegment(CRef< CSeq_align > align, CDense_seg::TDim row, TSeqPos pos)
void BioseqSetDescriptorPropagateDown(const CBioseq_set_Handle &bioseq_set_h, const vector< CSeqdesc::E_Choice > &choices_to_delete)
Moves descriptors down to children of the given bioseq-set.
TLocAdjustmentVector NormalizeUnknownLengthGaps(CSeq_inst &inst, TSeqPos unknown_length)
NormalizeUnknownLengthGaps A function to adjust the length of unknown-length gaps to a specific lengt...
void AdjustCdregionFrame(TSeqPos original_nuc_len, CRef< CSeq_feat > cds, const TCuts &sorted_cuts)
Secondary function needed after trimming Seq-feat.
static void s_AddPartToSegmentedBioseq(const CBioseq_EditHandle &seg, const CBioseq_EditHandle &part)
static TRange s_GetRetainedRange(const TCuts &sorted_merged_cuts, TSeqPos seqLength)
void AddLinkageEvidence(CSeq_ext &ext, CLinkage_evidence::TType evidence_type)
AddLinkageEvidence A function to add linkage evidence for gaps in a delta sequence.
static void s_PromoteSingletonSetsInSet(const CBioseq_set_Handle &bioseq_set_h)
CRef< CUser_field > MakeOriginalIdField(const CSeq_id &id)
void AddBioseqToBioseqSet(const CBioseq_set_Handle &set, const CBioseq_Handle &seq)
Add a Bioseq to a Bioseq-set.
static bool s_IsSingletonSet(const CBioseq_set_Handle &bioseq_set)
string GenerateTargetedLocusName(CBioseq_Handle seq)
void AddSeqdescToBioseqSet(const CSeqdesc &desc, CBioseq_set &set)
bool AddSeqdescToSeqEntryRecursively(CSeq_entry &entry, CSeqdesc &desc)
void UpdateSeqLength(CAutoInitRef< CDelta_ext > &pDeltaExt, CBioseq_Handle &complete_bsh, CSeqMap_CI &seqmap_ci, CSeq_inst_Base::TLength &new_length)
Update sequence length.
static void s_MergeCuts(TCuts &sorted_cuts)
Assumes sorted_cuts are sorted in Ascending order!
bool IsMatchingIdMissing(const CUser_field &field, const CBioseq::TId &ids)
string LabelFromType(CSeq_id::E_Choice choice)
void BioseqSetDescriptorPropagateUp(CBioseq_set_Handle set)
Moves descriptors up from children of the given bioseq-set if each child has an identical copy of the...
void HandleCollidingIds(CSeq_entry &entry)
static void s_GetTrimCoordinates(CBioseq_Handle bsh, const TCuts &sorted_cuts, TSeqPos &trim_start, TSeqPos &trim_stop)
vector< TLocAdjustment > TLocAdjustmentVector
vector< CSeq_entry_Handle > TVecOfSeqEntryHandles
EInternalTrimType
Any internal cut listed in TCuts will be converted to a terminal cut using one of these options.
@ eTrimToClosestEnd
@ eTrimTo5PrimeEnd
@ eDoNotTrimInternal
vector< TRange > TCuts
pair< TSeqPos, int > TLocAdjustment
#define ERASE_SEQDESC_ON_BIOSEQ(Itr, Var)
ERASE_SEQDESC_ON_BIOSEQ.
Definition: seq_macros.hpp:231
#define EDIT_EACH_SEQDESC_ON_BIOSEQ(Itr, Var)
Definition: seq_macros.hpp:221
#define EDIT_EACH_SEQDESC_ON_SEQSET(Itr, Var)
#define ERASE_SEQDESC_ON_SEQSET(Itr, Var)
ERASE_SEQDESC_ON_SEQSET.
#define FIELD_EQUALS(Var, Fld, Value)
FIELD_EQUALS base macro.
#define RAW_FIELD_IS_EMPTY(Var, Fld)
RAW_FIELD_IS_EMPTY base macro.
#define row(bind, expected)
Definition: string_bind.c:73
set< CConstRef< T >, SSerialObjectLessThan< T > > Type
SAnnotSelector –.
map< CConstRef< T >, string > TMapObjToTextAsn
TMapObjToTextAsn m_ObjAsnCache
bool operator()(const CConstRef< T > &lhs, const CConstRef< T > &rhs) const
const string & x_GetAsnText(const CConstRef< T > &obj) const
Definition: type.c:6
#define _ASSERT
else result
Definition: token2.c:20
Modified on Wed Apr 17 13:10:37 2024 by modify_doxy.py rev. 669887