NCBI C++ ToolKit
seq_entry_edit.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_EDIT___SEQ_ENTRY_EDIT__HPP
2 #define OBJTOOLS_EDIT___SEQ_ENTRY_EDIT__HPP
3 
4 /* $Id: seq_entry_edit.hpp 99416 2023-03-24 14:16:03Z stakhovv $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Mati Shomrat, NCBI
30 *
31 * File Description:
32 * High level Seq-entry edit, for meaningful combination of Seq-entries.
33 */
34 #include <corelib/ncbistd.hpp>
36 #include <objects/seq/Seq_gap.hpp>
40 
43 
44 class CSeq_entry_Handle;
45 class CBioseq_Handle;
46 class CBioseq_set_Handle;
47 class CSeq_annot_Handle;
48 
49 
51 
52 
53 /// Attach one Seq-entry to another
54 ///
55 /// @param to
56 /// Seq-entry to change
57 /// @param add
58 /// Seq-entry to add
59 /// @sa
60 /// Other forms of adding the content of one Seq-entry to another.
63 
64 /// Attach one Bioseq to another
65 ///
66 /// This function will add one Bioseq to another if:
67 /// 1. 'to' is a nucleotide and 'add' is a protein. The result
68 /// is a nuc-prot set contating both elements.
69 /// 2. both Bioseqs have the same molecular type. The result
70 /// is a segmented bioseq of which the two bioseqs serve as parts.
71 /// 3. 'to' is a segmented bioseq and 'add' has the same molecular type
72 /// as 'to'. 'add' will be added as a new part of 'to'.
73 /// @param to
74 /// Bioseq to change
75 /// @param add
76 /// Bioseq to add
77 /// @sa
78 /// AddSeqEntryToSeqEntry()
80 void AddBioseqToBioseq(const CBioseq_Handle& to, const CBioseq_Handle& add);
81 
82 /// Add a Bioseq to a Bioseq-set
83 ///
84 /// This function will add the Bioseq to the set if:
85 /// 1. The set is of class 'parts' and the Bioseq has the same
86 /// molecular type as the other parts.
87 /// 2.
88 /// @param bsst
89 /// Bioseq to change
90 /// @param seq
91 /// Bioseq to add
92 /// @sa
93 /// AddSeqEntryToSeqEntry()
95 void AddBioseqToBioseqSet(const CBioseq_set_Handle& bsst, const CBioseq_Handle& seq);
96 
97 
98 /// Create a Seq-entry from a Seq-submit
99 /// @param submit
100 /// Seq-submit to create Seq-entry from
103 
104 /// Split a Seq-entry, where the second part holds the given bioseqs.
105 /// There are various complex rules here that may not be obvious at first glance.
106 /// @param target
107 /// The Seq-entry to split
108 /// @param bioseq_handles
109 /// The array of bioseqs that should end up in the second part of the target, with the rest in the first part.
111 void SegregateSetsByBioseqList(const CSeq_entry_Handle & target,
112  const CScope::TBioseqHandles & bioseq_handles );
113 
114 typedef vector<CSeq_entry_Handle> TVecOfSeqEntryHandles;
115 
116 /// Call this if the alignments directly under these seq-entries are
117 /// all jumbled up between each other.
118 /// It will move each Seq-align into the proper location.
119 /// In particular, it looks at all the seq-ids in each seq-align. If
120 /// none of them belong to any member of vecOfSeqEntryHandles, then
121 /// that Seq-align is copied to all members of vecOfSeqEntryHandles.
122 /// If it belongs to only one member of vecOfSeqEntryHandles, then it
123 /// goes there. If the align belongs to more than one, it's destroyed.
124 ///
125 /// @param vecOfSeqEntryHandles
126 /// The Seq-entries we're considering for alignments.
128 void DivvyUpAlignments(const TVecOfSeqEntryHandles & vecOfSeqEntryHandles);
129 
130 /// Moves descriptors down to children of the given bioseq-set. Each child
131 /// gets a copy of all the descriptors. It does NOT check for
132 /// duplicate Seqdescs.
133 ///
134 /// @param bioseq_set_h
135 /// This is the bioseq_set whose descriptors we're moving.
136 /// @param choices_to_delete
137 /// If non-empty, it indicates the types of CSeqdescs to delete instead
138 /// of propagating.
141  const CBioseq_set_Handle & bioseq_set_h,
142  const vector<CSeqdesc::E_Choice> &choices_to_delete =
143  vector<CSeqdesc::E_Choice>() );
144 
145 /// Moves descriptors up from children of the given bioseq-set if each child
146 /// has an identical copy of the descriptor. It does NOT check for
147 /// duplicate Seqdescs. Will not move molinfo, title, or source descriptors.
148 ///
149 /// @param bioseq_set_h
150 /// This is the bioseq_set whose descriptors we're moving.
153 
154 
155 /// Creates a User-object descriptor on every sequence that has a local ID
156 /// Contains the original local ID
158 void AddLocalIdUserObjects(CSeq_entry& entry);
159 
160 /// Detects whether colliding IDs were fixed by comparing sequence IDs to
161 /// the contents of the OriginalID User-object descriptor
163 bool HasRepairedIDs(const CSeq_entry& entry);
164 
165 /// Removes User-object descriptors of a certain type from the seq-entry
168 
170 void HandleCollidingIds(CSeq_entry& entry);
171 
172 
175  size_t min_unknown, int max_unknown,
176  size_t min_known, int max_known,
177  bool is_assembly_gap = false, int gap_type = CSeq_gap::eType_unknown, int linkage = -1, int linkage_evidence = -1 );
178 
181  size_t min_unknown, int max_unknown,
182  size_t min_known, int max_known,
183  bool is_assembly_gap = false, int gap_type = CSeq_gap::eType_unknown, int linkage = -1, int linkage_evidence = -1 );
184 
185 typedef pair<TSeqPos, int> TLocAdjustment;
186 typedef vector<TLocAdjustment> TLocAdjustmentVector;
187 
190 
192 void SetLinkageType(CSeq_ext& ext, CSeq_gap::TType linkage_type);
193 
196 
199 
201 void AddLinkageEvidence(CSeq_ext& ext, CLinkage_evidence::TType evidence_type);
202 
205 
207 void SortSeqDescr(CSeq_entry& entry);
208 
210 void SortSeqDescr(CSeq_descr& entry);
211 
212 
213 /*******************************************************************************
214 **** HIGH-LEVEL API
215 ****
216 **** Trim functions
217 *******************************************************************************/
218 
219 /// A list of trim coordinates
221 typedef vector<TRange> TCuts;
222 
223 /// Any internal cut listed in TCuts will be converted to a terminal cut
224 /// using one of these options. The default is eTrimToClosestEnd.
226  eTrimToClosestEnd = 0, // default
230 };
231 
232 /// Trim sequence data and all associated annotation
235  const TCuts& cuts,
236  EInternalTrimType internal_cut_conversion = eTrimToClosestEnd);
237 
238 
239 /*******************************************************************************
240 **** LOW-LEVEL API
241 ****
242 **** Trim functions divided up into trimming separate distinct objects, i.e.,
243 **** the sequence data itself and all associated annotation.
244 ****
245 **** Used by callers who need access to each edited object so that they can
246 **** pass these edited objects to a command undo/redo framework, for example.
247 *******************************************************************************/
248 
249 /// 1) Merge abutting and overlapping cuts.
250 /// 2) Adjust any internal cuts to terminal cuts according to option.
251 /// 3) Sort the cuts from greatest to least so that sequence
252 /// data and annotation will be deleted from greatest loc to smallest loc.
253 /// That way we don't have to adjust coordinate values after
254 /// each cut.
257  const TCuts& cuts,
258  TCuts& sorted_cuts,
259  EInternalTrimType internal_cut_conversion = eTrimToClosestEnd);
260 
261 /// Trim sequence data
263 void TrimSeqData(CBioseq_Handle bsh,
264  CRef<CSeq_inst> inst,
265  const TCuts& sorted_cuts);
266 
267 /// Trim Seq-graph annotation
270  CRef<CSeq_graph> graph,
271  const TCuts& sorted_cuts);
272 
273 /// Trim Seq-align annotation
276  CRef<CSeq_align> align,
277  const TCuts& sorted_cuts);
278 
279 /// Trim Seq-feat annotation
281 void TrimSeqFeat(CRef<CSeq_feat> feat,
282  const TCuts& sorted_cuts,
283  bool& bFeatureDeleted,
284  bool& bFeatureTrimmed,
285  bool& partial_start,
286  bool& partial_stop);
287 
288 /// Secondary function needed after trimming Seq-feat.
289 /// If the trim completely covers the feature (boolean reference bFeatureDeleted
290 /// from TrimSeqFeat() returns true), then delete protein sequence and
291 /// re-normalize nuc-prot set.
294 
295 /// Secondary function needed after trimming Seq-feat.
296 /// If TrimSeqFeat()'s bFeatureTrimmed returns true, then adjust cdregion frame.
297 NCBI_STD_DEPRECATED("AdjustCdregionFrame() doesn't work and needs to be removed.") NCBI_XOBJEDIT_EXPORT
298 void AdjustCdregionFrame(TSeqPos original_nuc_len,
299  CRef<CSeq_feat> cds,
300  const TCuts& sorted_cuts);
301 
302 /// Secondary function needed after trimming Seq-feat.
303 /// If TrimSeqFeat()'s bFeatureTrimmed returns true, then make new protein
304 /// sequence.
307  CRef<CSeq_feat> cds,
308  CRef<CSeq_inst> new_inst);
309 
310 /// Secondary function needed after trimming Seq-feat.
311 /// If TrimSeqFeat()'s bFeatureTrimmed returns true, then retranslate cdregion.
314  bool partial_start,
315  bool partial_stop,
316  CRef<CSeq_inst> trimmed_nuc_inst,
317  CRef<CSeq_feat> cds,
318  const TCuts& sorted_cuts);
319 
320 /*******************************************************************************
321 **** LOW-LEVEL API
322 ****
323 **** Trim functions
324 *******************************************************************************/
325 
326 
327 // For Unverified descriptors
333 
334 // For TargetedLocusSequences
338 NCBI_XOBJEDIT_EXPORT string GetTargetedLocusNameConsensus(const string& tls1, const string& tls2);
339 
343 
344 #endif /* OBJTOOLS_EDIT___SEQ_ENTRY_EDIT__HPP */
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CBioseq_Handle –.
CBioseq_set_Handle –.
CScope –.
Definition: scope.hpp:92
CSeq_annot_Handle –.
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_ext –.
Definition: Seq_ext.hpp:66
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
EObjectType
Object Type.
Definition: set.hpp:45
Include a standard set of the NCBI C++ Toolkit most basic headers.
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
vector< CBioseq_Handle > TBioseqHandles
Definition: scope.hpp:144
#define NCBI_DEPRECATED
#define NCBI_STD_DEPRECATED(message)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJEDIT_EXPORT
Definition: ncbi_export.h:1291
Definition: fix_pub.hpp:45
void TrimSeqAlign(CBioseq_Handle bsh, CRef< CSeq_align > align, const TCuts &sorted_cuts)
Trim Seq-align annotation.
void SetLinkageType(CSeq_ext &ext, CSeq_gap::TType linkage_type)
SetLinkageType A function to set the linkage_type for gaps in a delta sequence.
void DivvyUpAlignments(const TVecOfSeqEntryHandles &vecOfSeqEntryHandles)
Call this if the alignments directly under these seq-entries are all jumbled up between each other.
void AddBioseqToBioseq(const CBioseq_Handle &to, const CBioseq_Handle &add)
Attach one Bioseq to another.
void TrimSequenceAndAnnotation(CBioseq_Handle bsh, const TCuts &cuts, EInternalTrimType internal_cut_conversion=eTrimToClosestEnd)
Trim sequence data and all associated annotation.
void AddSeqEntryToSeqEntry(const CSeq_entry_Handle &to, const CSeq_entry_Handle &add)
Attach one Seq-entry to another.
void TrimSeqData(CBioseq_Handle bsh, CRef< CSeq_inst > inst, const TCuts &sorted_cuts)
Trim sequence data.
TLocAdjustmentVector NormalizeUnknownLengthGaps(CSeq_inst &inst, TSeqPos unknown_length=100)
NormalizeUnknownLengthGaps A function to adjust the length of unknown-length gaps to a specific lengt...
void AddBioseqToBioseqSet(const CBioseq_set_Handle &bsst, const CBioseq_Handle &seq)
Add a Bioseq to a Bioseq-set.
CRef< CSeqdesc > FindUnverified(const CBioseq &seq)
void RetranslateCdregion(CBioseq_Handle nuc_bsh, bool partial_start, bool partial_stop, CRef< CSeq_inst > trimmed_nuc_inst, CRef< CSeq_feat > cds, const TCuts &sorted_cuts)
Secondary function needed after trimming Seq-feat.
void SortSeqDescr(CSeq_entry &entry)
void TrimSeqGraph(CBioseq_Handle bsh, CRef< CSeq_graph > graph, const TCuts &sorted_cuts)
Trim Seq-graph annotation.
void SetTargetedLocusName(CBioseq_Handle seq, const string &tls)
bool IsUnverifiedMisassembled(const CBioseq &seq)
string GetTargetedLocusName(const CSeq_feat &feat)
vector< TLocAdjustment > TLocAdjustmentVector
void AddLocalIdUserObjects(CSeq_entry &entry)
Creates a User-object descriptor on every sequence that has a local ID Contains the original local ID...
CRef< CSeq_entry > SeqEntryFromSeqSubmit(const CSeq_submit &submit)
Create a Seq-entry from a Seq-submit.
void BioseqSetDescriptorPropagateDown(const CBioseq_set_Handle &bioseq_set_h, const vector< CSeqdesc::E_Choice > &choices_to_delete=vector< CSeqdesc::E_Choice >())
Moves descriptors down to children of the given bioseq-set.
NCBI_XOBJEDIT_EXPORT void AdjustCdregionFrame(TSeqPos original_nuc_len, CRef< CSeq_feat > cds, const TCuts &sorted_cuts)
Secondary function needed after trimming Seq-feat.
bool HasRepairedIDs(const CSeq_entry &entry)
Detects whether colliding IDs were fixed by comparing sequence IDs to the contents of the OriginalID ...
void RemoveUserObjectType(CSeq_entry &entry, CUser_object::EObjectType type)
Removes User-object descriptors of a certain type from the seq-entry.
bool IsUnverifiedOrganism(const CBioseq &seq)
vector< CSeq_entry_Handle > TVecOfSeqEntryHandles
CRef< CBioseq > SetNewProteinSequence(CScope &new_scope, CRef< CSeq_feat > cds, CRef< CSeq_inst > new_inst)
Secondary function needed after trimming Seq-feat.
void SetLinkageTypeScaffold(CSeq_ext &ext, CLinkage_evidence::TType evidence_type)
SetLinkageTypeScaffold A special case of SetLinkageType.
void SetLinkageTypeLinkedRepeat(CSeq_ext &ext, CLinkage_evidence::TType evidence_type)
void DeleteProteinAndRenormalizeNucProtSet(const CSeq_feat_Handle &feat_h)
Secondary function needed after trimming Seq-feat.
void ConvertRawToDeltaByNs(CSeq_inst &inst, size_t min_unknown, int max_unknown, size_t min_known, int max_known, bool is_assembly_gap=false, int gap_type=CSeq_gap::eType_unknown, int linkage=-1, int linkage_evidence=-1)
ConvertRawToDeltaByNs A function to convert a raw sequence to a delta sequence, using runs of Ns to d...
string GetTargetedLocusNameConsensus(const string &tls1, const string &tls2)
void TrimSeqFeat(CRef< CSeq_feat > feat, const TCuts &sorted_cuts, bool &bFeatureDeleted, bool &bFeatureTrimmed, bool &partial_start, bool &partial_stop)
Trim Seq-feat annotation.
void GetSortedCuts(CBioseq_Handle bsh, const TCuts &cuts, TCuts &sorted_cuts, EInternalTrimType internal_cut_conversion=eTrimToClosestEnd)
1) Merge abutting and overlapping cuts.
void ResetLinkageEvidence(CSeq_ext &ext, CLinkage_evidence::TType evidence_type)
bool IsUnverifiedContaminant(const CBioseq &seq)
bool IsUnverifiedFeature(const CBioseq &seq)
EInternalTrimType
Any internal cut listed in TCuts will be converted to a terminal cut using one of these options.
@ eTrimToClosestEnd
@ eTrimTo5PrimeEnd
@ eDoNotTrimInternal
@ eTrimTo3PrimeEnd
void AddLinkageEvidence(CSeq_ext &ext, CLinkage_evidence::TType evidence_type)
AddLinkageEvidence A function to add linkage evidence for gaps in a delta sequence.
CRange< TSeqPos > TRange
A list of trim coordinates.
string GenerateTargetedLocusName(CBioseq_Handle seq)
NCBI_XOBJEDIT_EXPORT void SegregateSetsByBioseqList(const CSeq_entry_Handle &target, const CScope::TBioseqHandles &bioseq_handles)
Split a Seq-entry, where the second part holds the given bioseqs.
void BioseqSetDescriptorPropagateUp(CBioseq_set_Handle set)
Moves descriptors up from children of the given bioseq-set if each child has an identical copy of the...
vector< TRange > TCuts
pair< TSeqPos, int > TLocAdjustment
void HandleCollidingIds(CSeq_entry &entry)
Definition: type.c:6
#define const
Definition: zconf.h:232
Modified on Wed Sep 04 14:58:57 2024 by modify_doxy.py rev. 669887