NCBI C++ ToolKit
seq_loc_mapper.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef SEQ_LOC_MAPPER__HPP
2 #define SEQ_LOC_MAPPER__HPP
3 
4 /* $Id: seq_loc_mapper.hpp 87350 2019-08-21 15:03:00Z grichenk $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Aleksey Grichenko
30 *
31 * File Description:
32 * Seq-loc mapper
33 *
34 */
35 
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbiobj.hpp>
38 #include <util/range.hpp>
39 #include <util/rangemap.hpp>
46 
47 
50 
51 
52 /** @addtogroup ObjectManagerCore
53  *
54  * @{
55  */
56 
57 
58 class CScope;
59 class CBioseq_Handle;
60 class CSeqMap;
61 class CSeqMap_CI;
62 struct SSeqMapSelector;
63 class CGC_Assembly;
64 class CGC_Sequence;
65 
66 
67 /////////////////////////////////////////////////////////////////////////////
68 ///
69 /// CSeq_loc_Mapper --
70 ///
71 /// Mapping locations and alignments between bioseqs through seq-locs,
72 /// features, alignments or between parts of segmented bioseqs.
73 
74 
76 {
77 public:
79  eSeqMap_Up, ///< map from segments to the top level bioseq
80  eSeqMap_Down ///< map from a segmented bioseq to segments
81  };
82 
83  /// Mapping through a pre-filled CMappipngRanges. Source(s) and
84  /// destination(s) are considered as having the same width.
85  /// @param mapping_ranges
86  /// CMappingRanges filled with the desired source and destination
87  /// ranges. Must be a heap object (will be stored in a CRef<>).
88  /// NOTE: If the mapper is used with mixed sequence types, the
89  /// ranges must use genomic coordinates (for ranges on proteins
90  /// multiply all coordinates by 3).
91  /// @param scope
92  /// Optional scope (required only for mapping alignments). If any ids
93  /// from the mapping ranges are not available through this object,
94  /// they must be registered using SetSeqTypeById.
95  /// @sa SetSeqTypeById
96  CSeq_loc_Mapper(CMappingRanges* mapping_ranges,
97  CScope* scope = 0,
99 
100  /// Mapping through a feature, both location and product must be set.
101  /// If scope is set, synonyms are resolved for each source ID.
102  CSeq_loc_Mapper(const CSeq_feat& map_feat,
103  EFeatMapDirection dir,
104  CScope* scope = 0,
106 
107  /// Mapping between two seq_locs. If scope is set, synonyms are resolved
108  /// for each source ID.
110  const CSeq_loc& target,
111  CScope* scope = 0,
113 
114  /// Mapping through an alignment. Need to specify target ID or
115  /// target row of the alignment. Any other ID is mapped to the
116  /// target one. If scope is set, synonyms are resolved for each source ID.
117  /// Only the first row matching target ID is used, all other rows
118  /// are considered source.
119  CSeq_loc_Mapper(const CSeq_align& map_align,
120  const CSeq_id& to_id,
121  CScope* scope = 0,
123  CSeq_loc_Mapper(const CSeq_align& map_align,
124  size_t to_row,
125  CScope* scope = 0,
127  /// Mapping through an alignment using specific source and target ids.
128  /// If the alignment is not one of dense-seg, dense-diag or packed-seg, the source
129  /// id is ignored.
130  CSeq_loc_Mapper(const CSeq_id& from_id,
131  const CSeq_id& to_id,
132  const CSeq_align& map_align,
133  CScope* scope = 0,
135  /// Mapping through an alignment using specific source and target row numbers.
136  /// If the alignment is not one of dense-seg, dense-diag or packed-seg, the source
137  /// row is ignored.
138  CSeq_loc_Mapper(size_t from_row,
139  size_t to_row,
140  const CSeq_align& map_align,
141  CScope* scope = 0,
143 
144  /// Mapping between segments and the top level sequence.
145  /// @param target_seq
146  /// Top level bioseq
147  /// @param direction
148  /// Direction of mapping: up (from segments to master) or down.
149  CSeq_loc_Mapper(CBioseq_Handle target_seq,
150  ESeqMapDirection direction,
152 
153  /// Mapping between segments and the top level sequence.
154  /// @param target_seq
155  /// Top level bioseq
156  /// @param direction
157  /// Direction of mapping: up (from segments to master) or down.
158  /// @param selector
159  /// Seq-map selector with additional restrictions (range, strand etc.).
160  /// Some properties of the selector are always adjusted by the mapper.
161  CSeq_loc_Mapper(CBioseq_Handle target_seq,
162  ESeqMapDirection direction,
163  SSeqMapSelector selector,
165 
166  /// Mapping through a seq-map.
167  /// @param seq_map
168  /// Sequence map defining the mapping
169  /// @param direction
170  /// Direction of mapping: up (from segments to master) or down.
171  /// @param top_level_id
172  /// Explicit destination id when mapping up, may be used with
173  /// seq-maps constructed from a seq-loc with multiple ids.
174  CSeq_loc_Mapper(const CSeqMap& seq_map,
175  ESeqMapDirection direction,
176  const CSeq_id* top_level_id = 0,
177  CScope* scope = 0,
179 
180  /// Mapping through a seq-map.
181  /// @param seq_map
182  /// Sequence map defining the mapping
183  /// @param direction
184  /// Direction of mapping: up (from segments to master) or down.
185  /// @param selector
186  /// Seq-map selector with additional restrictions (range, strand etc.).
187  /// Some properties of the selector are always adjusted by the mapper.
188  /// @param top_level_id
189  /// Explicit destination id when mapping up, may be used with
190  /// seq-maps constructed from a seq-loc with multiple ids.
191  CSeq_loc_Mapper(const CSeqMap& seq_map,
192  ESeqMapDirection direction,
193  SSeqMapSelector selector,
194  const CSeq_id* top_level_id = 0,
195  CScope* scope = 0,
197 
198  /// Mapping between segments and the top level sequence limited by depth.
199  /// @param depth
200  /// Mapping depth. Depth of 0 converts synonyms.
201  /// @param top_level_seq
202  /// Top level bioseq
203  /// @param direction
204  /// Direction of mapping: up (from segments to master) or down.
205  CSeq_loc_Mapper(size_t depth,
206  const CBioseq_Handle& top_level_seq,
207  ESeqMapDirection direction,
209 
210  /// Depth-limited mapping through a seq-map.
211  /// @param depth
212  /// Mapping depth. Depth of 0 converts synonyms.
213  /// @param seq_map
214  /// Sequence map defining the mapping
215  /// @param direction
216  /// Direction of mapping: up (from segments to master) or down.
217  /// @param top_level_id
218  /// Explicit destination id when mapping up, may be used with
219  /// seq-maps constructed from a seq-loc with multiple ids.
220  CSeq_loc_Mapper(size_t depth,
221  const CSeqMap& top_level_seq,
222  ESeqMapDirection direction,
223  const CSeq_id* top_level_id = 0,
224  CScope* scope = 0,
226 
227  /// Destination of seq-id mapping through a GC-Assembly.
229  eGCA_Genbank, ///< Map to GenBank alias, prefer GI
230  eGCA_GenbankAcc, ///< Map to GenBank alias, prefer acc.ver
231  eGCA_Refseq, ///< Map to RefSeq alias, prefer GI
232  eGCA_RefseqAcc, ///< Map to RefSeq alias, prefer acc.ver
233  eGCA_UCSC, ///< Map to UCSC alias
234  eGCA_Other ///< Map to 'private' seq-id
235  };
236 
237  /// Using CScope for virtual bioseqs created from GC-Assemblies.
238  enum EScopeFlag {
239  eOriginalScope, ///< Put the generated bioseqs into the original scope.
240  eCopyScope ///< Create a new scope for virtual bioseqs. This keeps
241  ///< The original scope clean, but any changes made to
242  ///< it after creating the mapper will not be picked up
243  ///< by the mapper.
244  };
245 
246  /// Initialize the mapper to map through a GC-Assembly
247  /// to the selected alias type.
248  CSeq_loc_Mapper(const CGC_Assembly& gc_assembly,
249  EGCAssemblyAlias to_alias,
250  CScope* scope = 0,
251  EScopeFlag scope_flag = eCopyScope);
252  /// Initialize the mapper to map through deltas from a GC-Assembly.
253  CSeq_loc_Mapper(const CGC_Assembly& gc_assembly,
254  ESeqMapDirection direction,
255  SSeqMapSelector selector,
256  CScope* scope = 0,
257  EScopeFlag scope_flag = eCopyScope,
259 
260  ~CSeq_loc_Mapper(void);
261 
262 protected:
263  // Create CSeq_align_Mapper, add any necessary arguments
264  virtual CSeq_align_Mapper_Base*
265  InitAlignMapper(const CSeq_align& src_align);
266 
267 private:
270 
271  void x_InitializeSeqMap(const CSeqMap& seq_map,
272  const CSeq_id* top_id,
273  ESeqMapDirection direction);
274  void x_InitializeSeqMap(const CSeqMap& seq_map,
275  size_t depth,
276  const CSeq_id* top_id,
277  ESeqMapDirection direction);
278  void x_InitializeSeqMap(const CSeqMap& seq_map,
279  SSeqMapSelector selector,
280  const CSeq_id* top_id,
281  ESeqMapDirection direction);
282  void x_InitializeSeqMap(CSeqMap_CI seg_it,
283  const CSeq_id* top_id,
284  ESeqMapDirection direction);
285  void x_InitializeSeqMapUp(CSeqMap_CI seg_it,
286  const CSeq_id* top_id);
287  void x_InitializeSeqMapDown(CSeqMap_CI seg_it,
288  const CSeq_id* top_id);
289  void x_InitializeSeqMapSingleLevel(CSeqMap_CI seg_it,
290  const CSeq_id* top_id,
291  ESeqMapDirection direction);
292 
293  // Parse GC-Assembly, collect mappings for each seq-id to the
294  // selected alias type.
295  void x_InitGCAssembly(const CGC_Assembly& gc_assembly,
296  EGCAssemblyAlias to_alias);
297  // Parse GC-Sequence, collect mappings for each seq-id to the
298  // selected alias type.
299  void x_InitGCSequence(const CGC_Sequence& gc_seq,
300  EGCAssemblyAlias to_alias);
301 
302  // Creates and adds to the scope a virtual bioseq. If delta is not null,
303  // the bioseq uses it for the instance. Otherwise a virtual bioseq without
304  // length/data is created.
305  CBioseq_Handle x_AddVirtualBioseq(const TSynonyms& synonyms,
306  const CGC_Sequence& gc_seq);
307 
308  // Helper function to check for UCSC random chromosomes and populate
309  // the set of synonyms.
310  bool x_IsUCSCRandomChr(const CGC_Sequence& gc_seq,
311  CConstRef<CSeq_id>& chr_id,
312  TSynonyms& synonyms) const;
313 
314 private:
316 };
317 
318 
319 /* @} */
320 
321 
324 
325 #endif // SEQ_LOC_MAPPER__HPP
CBioseq_Handle –.
Storage for multiple mapping ranges.
CScope –.
Definition: scope.hpp:92
Iterator over CSeqMap.
Definition: seq_map_ci.hpp:252
CSeqMap –.
Definition: seq_map.hpp:93
Class used to map seq-alignments.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSeq_loc_Mapper_Base –.
CSeq_loc_Mapper_Options –.
CSeq_loc_Mapper –.
Definition: set.hpp:45
Include a standard set of the NCBI C++ Toolkit most basic headers.
static unsigned char depth[2 *(256+1+29)+1]
CSeq_loc_Mapper & operator=(const CSeq_loc_Mapper &)
EGCAssemblyAlias
Destination of seq-id mapping through a GC-Assembly.
EScopeFlag
Using CScope for virtual bioseqs created from GC-Assemblies.
virtual CSeq_align_Mapper_Base * InitAlignMapper(const CSeq_align &src_align)
CSeq_loc_Mapper(const CSeq_loc_Mapper &)
@ eGCA_Genbank
Map to GenBank alias, prefer GI.
@ eGCA_UCSC
Map to UCSC alias.
@ eGCA_Refseq
Map to RefSeq alias, prefer GI.
@ eGCA_GenbankAcc
Map to GenBank alias, prefer acc.ver.
@ eGCA_RefseqAcc
Map to RefSeq alias, prefer acc.ver.
@ eOriginalScope
Put the generated bioseqs into the original scope.
@ eSeqMap_Up
map from segments to the top level bioseq
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJMGR_EXPORT
Definition: ncbi_export.h:1307
const CharType(& source)[N]
Definition: pointer.h:1149
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
Selector used in CSeqMap methods returning iterators.
Definition: seq_map_ci.hpp:113
Modified on Tue Apr 16 20:11:17 2024 by modify_doxy.py rev. 669887