NCBI C++ ToolKit
seq_loc_mapper_base.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef SEQ_LOC_MAPPER_BASE__HPP
2 #define SEQ_LOC_MAPPER_BASE__HPP
3 
4 /* $Id: seq_loc_mapper_base.hpp 99064 2023-02-08 19:14:27Z ucko $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Aleksey Grichenko
30 *
31 * File Description:
32 * Seq-loc mapper base
33 *
34 */
35 
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbiobj.hpp>
38 #include <corelib/ncbi_message.hpp>
39 #include <util/range.hpp>
40 #include <util/rangemap.hpp>
47 
48 
51 
52 
53 /** @addtogroup ObjectManagerCore
54  *
55  * @{
56  */
57 
58 
59 class CSeq_id;
60 class CSeq_loc;
61 class CSeq_interval;
62 class CPacked_seqpnt;
63 class CSeq_loc_CI;
64 class CSeq_feat;
65 class CSeq_align;
67 class CSeq_graph;
69 
70 
71 /// CMappingRange - describes a single interval to interval
72 /// mapping.
74 {
75 public:
77  TSeqPos src_from,
78  TSeqPos src_length,
79  ENa_strand src_strand,
80  CSeq_id_Handle dst_id,
81  TSeqPos dst_from,
82  ENa_strand dst_strand,
83  bool ext_to = false,
84  int frame = 0,
85  TSeqPos src_bioseq_len = kInvalidSeqPos,
86  TSeqPos dst_len = kInvalidSeqPos);
87 
88  /// Check if the id is on the source sequence.
89  bool GoodSrcId(const CSeq_id& id) const;
90  CRef<CSeq_id> GetDstId(void) const;
91  const CSeq_id_Handle& GetDstIdHandle(void) const
92  { return m_Dst_id_Handle; }
93 
96  typedef pair<TFuzz, TFuzz> TRangeFuzz;
97 
98  /// Check if the interval can be mapped through this mapping range.
99  /// Strand direction is checked only if is_set_strand is true.
100  bool CanMap(TSeqPos from,
101  TSeqPos to,
102  bool is_set_strand,
103  ENa_strand strand) const;
104  /// Map a single point
105  TSeqPos Map_Pos(TSeqPos pos) const;
106  /// Map an interval, set fuzz when the mapping truncates the original
107  /// range.
108  TRange Map_Range(TSeqPos from,
109  TSeqPos to,
110  const TRangeFuzz* fuzz = 0) const;
111  /// Map the strand, return true if the destination strand should be
112  /// set (even if it's eNa_strand_unknown -- this may happen if the
113  /// source strand is set to unknown).
114  bool Map_Strand(bool is_set_strand,
115  ENa_strand src,
116  ENa_strand* dst) const;
117  /// Map fuzz if one is set in the original location.
118  TRangeFuzz Map_Fuzz(const TRangeFuzz& fuzz) const;
119 
120 private:
121  // Get new fuzz value when reversing location's strand.
122  CInt_fuzz::ELim x_ReverseFuzzLim(CInt_fuzz::ELim lim) const;
123  void x_Map_Fuzz(TFuzz& fuzz) const;
124 
132  // Whether the mapping reverses the strand or not.
133  // This value can be calculated from source and destination
134  // strands, but is cached for better performance.
135  bool m_Reverse;
136  // Whether to extend the mapped location to the end of
137  // destination range. Used when mapping from a prot to a nuc.
138  // ExtTo is set when both conditions are met:
139  // - the mapping is from a protein to a nucleotide
140  // - the destination interval has partial 'to' (set as fuzz)
141  // ExtTo is used only when the interval to be mapped has
142  // partial 'to' set through the fuzz and the mapped range is
143  // just 1 or 2 bases shorter than the mapping destination.
144  bool m_ExtTo;
145  // Holds the frame shift (0 if none) of the underlying CDS (if any).
146  int m_Frame;
147  // This holds the complete length of the original source bioseq.
148  // Needed to detect whether or not fuzzy edges should be extended to the end.
150  // For example, if the end of a source maps to just before the end of the
151  // dest, then we sometimes extend to the end of the dest, so we do need
152  // to store this, even though it's not needed for the mapping itself.
154  // Group of mapping ranges - used with alignments, e.g. to group
155  // mapped ranges by exon.
156  int m_Group;
157 
158  friend class CSeq_loc_Mapper_Base;
159  //friend class CSeq_loc_Mapper;
160  friend class CMappingRanges;
162  //friend class CSeq_align_Mapper;
163  friend struct CMappingRangeRef_Less;
165 
166 public:
167  // Interface for CPairwiseAln converter
168  TSeqPos GetSrc_from(void) const { return m_Src_from; }
169  TSeqPos GetDst_from(void) const { return m_Dst_from; }
170  TSeqPos GetLength(void) const { return m_Src_to - m_Src_from; }
171  bool GetReverse(void) const { return m_Reverse; }
172  int GetGroup(void) const { return m_Group; }
173  void SetGroup(int grp) { m_Group = grp; }
174 };
175 
176 
177 /// Storage for multiple mapping ranges. Stores mappings grouped
178 /// by the source seq-id, then sorted by start coordinate.
180 {
181 public:
182  CMappingRanges(void);
183 
184  // Conversions
190  typedef vector< CRef<CMappingRange> > TSortedMappings;
191 
192  const TIdMap& GetIdMap() const { return m_IdMap; }
193  TIdMap& GetIdMap(void) { return m_IdMap; }
194 
195  /// Add new mapping range to the proper place.
196  void AddConversion(CRef<CMappingRange> cvt);
197  CRef<CMappingRange> AddConversion(CSeq_id_Handle src_id,
198  TSeqPos src_from,
199  TSeqPos src_length,
200  ENa_strand src_strand,
201  CSeq_id_Handle dst_id,
202  TSeqPos dst_from,
203  ENa_strand dst_strand,
204  bool ext_to = false,
205  int frame = 0,
206  TSeqPos dst_total_len = kInvalidSeqPos,
207  TSeqPos src_bioseq_len = kInvalidSeqPos,
208  TSeqPos dst_len = kInvalidSeqPos );
209 
210  /// Get mapping ranges iterator for the given seq-id and range.
211  TRangeIterator BeginMappingRanges(CSeq_id_Handle id,
212  TSeqPos from,
213  TSeqPos to) const;
214 
215  // Overall source and destination orientation. The order of mapped ranges
216  // is reversed if ReverseSrc != ReverseDst (except in some merging modes).
217  void SetReverseSrc(bool value = true) { m_ReverseSrc = value; };
218  bool GetReverseSrc(void) const { return m_ReverseSrc; }
219  void SetReverseDst(bool value = true) { m_ReverseDst = value; };
220  bool GetReverseDst(void) const { return m_ReverseDst; }
221 
222 private:
224 
225  // Mapping source and destination orientations
228 };
229 
230 
231 /// Helper class for mapping graphs. Used to collect ranges
232 /// relative to the graph location and adjust mapped graph data
233 /// accordingly.
235 {
236 public:
237  CGraphRanges(void) : m_Offset(0) {}
238 
240  typedef vector<TRange> TGraphRanges;
241 
242  // Offset is relative to the original graph location, indicates
243  // the part of the original location which has been already
244  // mapped (or truncated).
245  TSeqPos GetOffset(void) const { return m_Offset; }
246  void SetOffset(TSeqPos offset) { m_Offset = offset; }
247  void IncOffset(TSeqPos inc) { m_Offset += inc; }
248 
249  const TGraphRanges& GetRanges(void) const { return m_Ranges; }
250 
251  // Add new mapped range. The range is relative to the not yet mapped
252  // part of the original location. See:
253  // CSeq_loc_Mapper_Base::x_MapNextRange()
254  // CSeq_loc_Mapper_Base::x_MapInterval()
255  void AddRange(const TRange& rg)
256  {
257  if ( rg.Empty() ) {
258  return;
259  }
260  TRange offset_rg = rg.IsWhole() ? rg :
261  TRange(rg.GetFrom() + m_Offset, rg.GetTo() + m_Offset);
262  m_Ranges.push_back(offset_rg);
263  m_TotalRange.CombineWith(offset_rg);
264  }
265 
266  const TRange& GetTotalRange(void) const { return m_TotalRange; }
267 
268 private:
272 };
273 
274 
275 /////////////////////////////////////////////////////////////////////////////
276 ///
277 /// CSeq_loc_Mapper_Options --
278 ///
279 /// Options passed to CSeq_loc_Mapper[_Base] constructor.
280 ///
281 
283 {
284 public:
285  typedef int TMapOptions;
286 
289  TMapOptions opts = 0);
291 
292  /// Sequence type, length etc. provider. If any ids from the mapping
293  /// ranges are not available through this object, they should be
294  /// registered using CSeq_loc_Mapper_Base::SetSeqTypeById().
295  IMapper_Sequence_Info* GetMapperSequenceInfo(void) const;
296  CSeq_loc_Mapper_Options& SetMapperSequenceInfo(IMapper_Sequence_Info* seq_info);
297 
298  /// Dense-seg mapping option.
299  /// @sa CSeq_loc_Mapper_Base::fAlign_Dense_seg_TotalRange
300  bool GetAlign_Dense_seg_TotalRange(void) const;
301  CSeq_loc_Mapper_Options& SetAlign_Dense_seg_TotalRange(bool value = true);
302 
303  /// Mapping direction when mapping through a sparse-seg.
304  /// @sa CSeq_loc_Mapper_Base::fAlign_Sparse_ToFirst
305  /// @sa CSeq_loc_Mapper_Base::fAlign_Sparse_ToSecond
306  bool GetAlign_Sparse_ToFirst(void) const;
307  bool GetAlign_Sparse_ToSecond(void) const;
308  CSeq_loc_Mapper_Options& SetAlign_Sparse_ToFirst(bool value = true);
309  CSeq_loc_Mapper_Options& SetAlign_Sparse_ToSecond(bool value = true);
310 
311  /// Mapping depth when using a seq-map, a bioseq or a GC-assembly.
312  /// @sa CSeq_loc_Mapper_Base::fMapSingleLevel
313  bool GetMapSingleLevel(void) const;
314  CSeq_loc_Mapper_Options& SetMapSingleLevel(bool value = true);
315 
316  /// Mapped location trimming at sequence end. Off by default.
317  /// @sa CSeq_loc_Mapper_Base::fTrimMappedLocation
318  bool GetTrimMappedLocation(void) const;
319  CSeq_loc_Mapper_Options& SetTrimMappedLocation(bool value = true);
320 
321 private:
322  friend class CSeq_loc_Mapper_Base;
323 
324  IMapper_Sequence_Info& GetSeqInfo(void) const;
325 
326  bool x_IsSetOption(int opt) const;
327  void x_SetOption(int opt, bool enable);
328 
331 };
332 
333 
334 /////////////////////////////////////////////////////////////////////////////
335 ///
336 /// CSeq_loc_Mapper_Base --
337 ///
338 /// Mapping locations and alignments between bioseqs through seq-locs,
339 /// features, alignments or between parts of segmented bioseqs.
340 
342 {
343 public:
344  /// Mapping direction used when initializing the mapper with a feature.
346  eLocationToProduct, ///< Map from the feature's location to product
347  eProductToLocation ///< Map from the feature's product to location
348  };
349 
350  /// Options for interpretations of locations
351  enum EMapOptions {
352  /// Ignore internal dense-seg structure - map each
353  /// dense-seg according to the total ranges involved
354  fAlign_Dense_seg_TotalRange = 1 << 0,
355 
356  /// Flags used to indicate mapping direction when mapping
357  /// through a sparse-seg.
358  fAlign_Sparse_ToFirst = 0, ///< Map to first-id
359  fAlign_Sparse_ToSecond = 1 << 1, ///< Map to second-id
360 
361  /// Flag used when mapping through a seq-map (this includes
362  /// mapping through a bioseq or a GC-assembly). If set, each
363  /// call to Map() goes only one level up or down, unlike normal
364  /// mode which maps from any level as far up/down as possible.
365  /// The result of mapping can be mapped further by making another
366  /// call to Map().
367  fMapSingleLevel = 1 << 2,
368 
369  /// Enable trimming of source/destination ranges at sequence end.
370  /// By default locations can stretch beyond sequence end. With trimming
371  /// enabled the mapper will truncate ranges to fit sequence lengths.
372  fTrimMappedLocation = 1 << 3
373  };
374  typedef int TMapOptions;
375 
376  /// Spliced-seg row indexing constants.
377  enum ESplicedRow {
378  eSplicedRow_Prod = 0,
379  eSplicedRow_Gen = 1
380  };
381 
382  enum FFuzzOption {
383  // used for backwards compatibility with C toolkit's output.
384  // TODO: we should remove this one day since the
385  // normal output is superior.
386  fFuzzOption_CStyle = 1 << 0,
387  // Don't set eLim_tl or eLim_tr and instead set greater than or less
388  // than if appropriate.
389  fFuzzOption_RemoveLimTlOrTr = 1 << 1
390  };
391  typedef int TFuzzOption;
392 
393  /// Mapping through a pre-filled CMappipngRanges.
394  /// @param mapping_ranges
395  /// CMappingRanges filled with the desired source and destination
396  /// ranges. Must be a heap object (will be stored in a CRef<>).
397  /// NOTE: If the mapper is used with mixed sequence types, the
398  /// ranges must use genomic coordinates (for ranges on proteins
399  /// multiply all coordinates by 3).
400  /// @param options
401  /// Mapping options which need to be set during mapper initialization.
402  /// @sa CSeq_loc_Mapper_Options
403  CSeq_loc_Mapper_Base(CMappingRanges* mapping_ranges,
405 
406  /// Mapping through a feature, both location and product must be set.
407  CSeq_loc_Mapper_Base(const CSeq_feat& map_feat,
408  EFeatMapDirection dir,
410 
411  /// Mapping between two seq_locs.
413  const CSeq_loc& target,
415 
416  /// Mapping through an alignment. Need to specify target ID or
417  /// target row of the alignment. Any other ID is mapped to the
418  /// target one. Only the first row matching target ID is used,
419  /// all other rows are considered source.
420  CSeq_loc_Mapper_Base(const CSeq_align& map_align,
421  const CSeq_id& to_id,
423  /// Mapping through an alignment using specific source and target ids.
424  /// If the alignment is not one of dense-seg, dense-diag or packed-seg, the source
425  /// id is ignored.
426  CSeq_loc_Mapper_Base(const CSeq_id& from_id,
427  const CSeq_id& to_id,
428  const CSeq_align& map_align,
430  /// @deprecated Use the version with CSeq_loc_Mapper_Options instead.
432  CSeq_loc_Mapper_Base(const CSeq_align& map_align,
433  const CSeq_id& to_id,
434  TMapOptions opts,
435  IMapper_Sequence_Info* seq_info);
436 
437  /// Sparse alignments require special row indexing since each
438  /// row contains two seq-ids. Use options to specify mapping
439  /// direction.
440  CSeq_loc_Mapper_Base(const CSeq_align& map_align,
441  size_t to_row,
443  /// Mapping through an alignment using specific source and target row numbers.
444  /// If the alignment is not one of dense-seg, dense-diag or packed-seg, the source
445  /// row is ignored.
446  CSeq_loc_Mapper_Base(size_t from_row,
447  size_t to_row,
448  const CSeq_align& map_align,
450  /// @deprecated Use the version with CSeq_loc_Mapper_Options instead.
452  CSeq_loc_Mapper_Base(const CSeq_align& map_align,
453  size_t to_row,
454  TMapOptions opts,
455  IMapper_Sequence_Info* seq_info);
456 
457  ~CSeq_loc_Mapper_Base(void);
458 
459  void SetFuzzOption( TFuzzOption newOption );
460 
461  /// Intervals' merging mode
462  /// MergeNone and MergeAbutting do not change the order of ranges
463  /// in the destination seq-loc. No ranges will be merged if they
464  /// are separated by any other sub-range.
465  /// MergeContained and MergeAll sort ranges before sorting, so that
466  /// any overlapping ranges can be merged. The sorting takes the
467  /// mapped location strand into account.
468  /// NOTE: any merging (except None) is incompatible with collecting
469  /// source ranges.
470  /// @sa IncludeSourceLocs
471 
472  /// No merging
473  CSeq_loc_Mapper_Base& SetMergeNone(void);
474  /// Merge only abutting intervals, keep overlapping
475  CSeq_loc_Mapper_Base& SetMergeAbutting(void);
476  /// Merge only intervals from the same group. Group is created
477  /// for each exon, dense-diag, std-seg and disc sub-alignment.
478  CSeq_loc_Mapper_Base& SetMergeBySeg(void);
479  /// Merge intervals only if one is completely covered by another
480  CSeq_loc_Mapper_Base& SetMergeContained(void);
481  /// Merge any abutting or overlapping intervals
482  CSeq_loc_Mapper_Base& SetMergeAll(void);
483 
484  /// Whether to preserve or remove NULL sub-locations (usually
485  /// indicating gaps) from the result. By default gaps are preserved.
486  CSeq_loc_Mapper_Base& SetGapPreserve(void);
487  CSeq_loc_Mapper_Base& SetGapRemove(void);
488 
489  /// For mapping spliced-segs only: preserve or trim starting/ending
490  /// indels. By default indels are trimmed (only those at the whole
491  /// alignment start and end).
492  CSeq_loc_Mapper_Base& SetTrimSplicedSeg(bool trim);
493 
494  /// Keep ranges which can not be mapped. Does not affect truncation
495  /// of partially mapped ranges. By default non-mapping ranges are
496  /// removed.
497  CSeq_loc_Mapper_Base& KeepNonmappingRanges(void);
498  CSeq_loc_Mapper_Base& TruncateNonmappingRanges(void);
499 
500  /// Check strands before mapping a range. By default strand is not
501  /// checked and a range will be mapped even if its strand does not
502  /// correspond to the strand of the mapping source.
503  CSeq_loc_Mapper_Base& SetCheckStrand(bool value = true);
504 
505  /// When set to 'true' if mapped alignment has exactly one genomic and
506  /// one protein row, convert it to spliced-seg. By default all mixed-type
507  /// alignments are converted to std-seg.
508  CSeq_loc_Mapper_Base& MixedAlignsAsSpliced(bool value = true);
509 
510  /// Include source ranges in the mapped location. If turned
511  /// on, the resulting seq-loc will be an equiv with the
512  /// first sub-loc containing the usual mapped seq-loc, and
513  /// the second one - the set of source locations used in the
514  /// mapping.
515  /// NOTE: this option is incompatible with any merging.
516  /// Merging mode must be set to MergeNone.
517  CSeq_loc_Mapper_Base& IncludeSourceLocs(bool value = true);
518 
519  /// Report source range trimming as an error. If the flag is set,
520  /// any trimming will result in throwing CAnnotMapperException.
521  /// Intended to be used when mapping GC-Assembly aliases.
522  CSeq_loc_Mapper_Base& SetErrorOnPartial(bool value = true);
523 
524  /// Map seq-loc
525  CRef<CSeq_loc> Map(const CSeq_loc& src_loc);
526  /// Take the total range from the location and run it through the mapper.
527  CRef<CSeq_loc> MapTotalRange(const CSeq_loc& seq_loc);
528  /// Map the whole alignment. Searches all rows for ranges
529  /// which can be mapped.
530  CRef<CSeq_align> Map(const CSeq_align& src_align);
531  /// Map a single row of the alignment.
532  CRef<CSeq_align> Map(const CSeq_align& src_align,
533  size_t row);
534  /// Map seq-graph. This will map both location and data.
535  /// The data may be truncated to match the new location.
536  CRef<CSeq_graph> Map(const CSeq_graph& src_graph);
537 
538  /// Flags defining seq-annot mapping options.
540  fAnnotMap_Location = 1 << 0, ///< Map seq-feat locations
541  fAnnotMap_Product = 1 << 1, ///< Map seq-feat products
542  fAnnotMap_Both = fAnnotMap_Location | fAnnotMap_Product,
543 
544  /// Remove annotations which can not be mapped with this mapper.
545  /// If the flag is not set, the original annotation is stored
546  /// in the seq-annot.
547  fAnnotMap_RemoveNonMapping = 1 << 2,
548 
549  /// Throw exception if an annotation can not be mapped.
550  fAnnotMap_ThrowOnFailure = 1 << 3,
551 
552  fAnnotMap_Default = fAnnotMap_Both
553  };
554  typedef int TAnnotMapFlags;
555 
556  /// Result of seq-annot mapping
557  enum EMapResult {
558  /// No annotation was mapped, the input seq-annot is unchanged.
559  eMapped_None = 0,
560  /// Some (not all) annotations were mapped.
562  /// All annotations were mapped, none was removed.
563  eMapped_All
564  };
565 
566  /// Map each object from the Seq-annot and replace the original
567  /// with the mapped one.
568  EMapResult Map(CSeq_annot& annot, TAnnotMapFlags flags = fAnnotMap_Default);
569 
570  /// Check if the last mapping resulted in partial location
571  /// (not all ranges from the original location could be mapped
572  /// to the target).
573  bool LastIsPartial(void);
574 
576 
577  // Collect synonyms for the id, store mapping of each synonym
578  // to the primary id. Returns primary id for the argument or the
579  // argument itself.
580  const CSeq_id_Handle& CollectSynonyms(const CSeq_id_Handle& id) const;
581 
582  // Sequence type - to recalculate coordinates.
583  enum ESeqType {
584  eSeq_unknown = 0,
585  eSeq_nuc = 1,
586  eSeq_prot = 3
587  };
588 
589 protected:
590 
591  // Get molecule type for the given id. The default implementation
592  // returns eSeq_unknown. The overrided methods should return
593  // real sequence type. The returned type is stored in the mapper's
594  // cache. The method should not be called directly, use
595  // GetSeqTypeById instead for it uses the cached types.
596  // It's also a good idea to cache the same sequence type for all
597  // synonyms in the overrided method to prevent multiple requests
598  // to GetSeqType.
599  ESeqType GetSeqType(const CSeq_id_Handle& idh) const;
600 
601  // Get sequence length for the given seq-id. Returns kInvalidSeqPos
602  // if the length is unknown (the default behavior).
603  TSeqPos GetSequenceLength(const CSeq_id& id);
604 
605  // Create CSeq_align_Mapper_Base, add any necessary arguments.
606  virtual CSeq_align_Mapper_Base*
607  InitAlignMapper(const CSeq_align& src_align);
608 
609  // Initialize the mapper from a feature. The feature must have
610  // both location and product set, mapping direction is set by
611  // the flag.
612  void x_InitializeFeat(const CSeq_feat& map_feat,
613  EFeatMapDirection dir);
614  // Map between two locations. Optional frame is used by x_InitializeFeat()
615  // only with cd-region features.
616  void x_InitializeLocs(const CSeq_loc& source,
617  const CSeq_loc& target,
618  int src_frame = 0,
619  int dst_frame = 0);
620  // Initialize the mapper from an alignment. Looks for the first
621  // row containing the id and sets it as mapping target. All other
622  // rows become mapping source.
623  void x_InitializeAlign(const CSeq_align& map_align,
624  const CSeq_id& to_id,
625  const CSeq_id* from_id = nullptr);
626  // Recursive version of the above.
627  void x_InitializeAlign(const CSeq_align& map_align,
628  const TSynonyms& to_ids,
629  const TSynonyms* from_ids = nullptr);
630  // Initialize the mapper from an alignment, map to the specified row.
631  void x_InitializeAlign(const CSeq_align& map_align,
632  size_t to_row,
633  size_t from_row = size_t(-1));
634 
635  // Create dummy mapping from the whole destination location to itself.
636  // This will prevent truncation of ranges already on the target.
637  // For some reason (?) the function is used only by CSeq_loc_Mapper,
638  // not CSeq_loc_Mapper_Base, and only when initializing the mapper
639  // from a bioseq handle or a seq-map. When mapping through a feature
640  // or a pair of seq-locs it's not called and ranges on destination
641  // are truncated or preserved the same way as any other non-mapping
642  // ranges.
643  void x_PreserveDestinationLocs(void);
644 
645  // Add new mapping range while initializing the mapper. The function
646  // adjusts starts and lengths according to the used range and strand.
647  void x_NextMappingRange(const CSeq_id& src_id,
648  TSeqPos& src_start,
649  TSeqPos& src_len,
650  ENa_strand src_strand,
651  const CSeq_id& dst_id,
652  TSeqPos& dst_start,
653  TSeqPos& dst_len,
654  ENa_strand dst_strand,
655  const CInt_fuzz* fuzz_from = 0,
656  const CInt_fuzz* fuzz_to = 0,
657  int frame = 0,
658  TSeqPos src_bioseq_len = kInvalidSeqPos);
659 
660  // Add new CMappingRange. This includes collecting all synonyms for the id,
661  // creating a new mapping for each of them and updating the destination
662  // ranges.
663  void x_AddConversion(const CSeq_id& src_id,
664  TSeqPos src_start,
665  ENa_strand src_strand,
666  const CSeq_id& dst_id,
667  TSeqPos dst_start,
668  ENa_strand dst_strand,
669  TSeqPos length,
670  bool ext_right,
671  int frame,
672  TSeqPos src_bioseq_len,
673  TSeqPos dst_length );
674 
675  // Parse and map the seq-loc.
676  void x_MapSeq_loc(const CSeq_loc& src_loc);
677 
678  // Convert collected ranges into a seq-loc and push it into the destination
679  // seq-loc mix. This is done to preserve the original seq-loc structure
680  // when possible (although some optimizations are done - see
681  // x_OptimizeSeq_loc).
682  void x_PushRangesToDstMix(void);
683 
688 
689  // List and map of target ranges to construct target-to-target mapping
690  typedef list<TRange> TDstRanges;
692  typedef vector<TDstIdMap> TDstStrandMap;
693 
694  // Destination locations arranged by ID/range
696  typedef pair<TFuzz, TFuzz> TRangeFuzz;
697 
698  // Structure to hold information about mapped ranges until they are
699  // converted to seq-loc parts.
700  struct SMappedRange {
701  SMappedRange(void) : group(0) {}
702  SMappedRange(const TRange& rg,
703  const TRangeFuzz& fz,
704  int grp = 0)
705  : range(rg), fuzz(fz), group(grp) {}
706 
709  int group; // used mostly to group ranges by exon
710 
711  bool operator<(const SMappedRange& rg) const
712  {
713  return range < rg.range;
714  }
715  };
716  typedef list<SMappedRange> TMappedRanges;
717  // Ranges grouped by strand. [0] contains ranges without strand,
718  // [i] where i>0 stands for 'eNa_strand_XXXX + 1'.
719  typedef vector<TMappedRanges> TRangesByStrand;
722 
725 
726 private:
729 
731 
732  enum EMergeFlags {
733  eMergeNone, // no merging
734  eMergeAbutting, // merge only abutting intervals, keep overlapping
735  eMergeContained, // merge if one range is contained in another
736  eMergeBySeg, // merge abutting and overlapping ranges by mapping group
737  eMergeAll // merge both abutting and overlapping intervals
738  };
739  enum EGapFlags {
740  eGapPreserve, // Leave gaps as-is
741  eGapRemove // Remove gaps (NULL seq-locs)
742  };
743 
744  // Check types of all sequences referenced by the location,
745  // calculate the total length of the location, return true
746  // if types are known for all sequences.
747  // Set seqtype to the detected sequence type or to unknown
748  // if the type can not be detected or there are multiple types.
749  bool x_CheckSeqTypes(const CSeq_loc& loc,
750  ESeqType& seqtype,
751  TSeqPos& len);
752  // If x_CheckSeqTypes returns false, it may indicate that some
753  // sequence types could not be detected. In this case the mapper
754  // will attempt to find at least one known type in the location
755  // and force it for all sub-locations with unknown types.
756  // The function will fail if there are different known types in the
757  // same seq-loc.
758  ESeqType x_ForceSeqTypes(const CSeq_loc& loc) const;
759 
760  // In some cases the mapper may fail to detect that both source
761  // and destination locations are on proteins rather than on nucs.
762  // CSeq_align_Mapper_Base may detect this mistake while mapping
763  // an alignment. In this case it will try to change all types to
764  // protein.
765  void x_AdjustSeqTypesToProt(const CSeq_id_Handle& idh);
766 
767  // Get sequence length, try to get the real length for
768  // reverse strand, do not use "whole".
769  TSeqPos x_GetRangeLength(const CSeq_loc_CI& it);
770 
771  // Initialize the mapper from different alignment types.
772  void x_InitAlign(const CDense_diag& diag, size_t to_row, size_t from_row);
773  void x_InitAlign(const CDense_seg& denseg, size_t to_row, size_t from_row);
774  void x_InitAlign(const CStd_seg& sseg, size_t to_row);
775  void x_InitAlign(const CPacked_seg& pseg, size_t to_row, size_t from_row);
776  void x_InitSpliced(const CSpliced_seg& spliced,
777  const TSynonyms& to_ids);
778  void x_InitSpliced(const CSpliced_seg& spliced, ESplicedRow to_row);
779  void x_InitSparse(const CSparse_seg& sparse, size_t to_row);
780 
781  void x_IterateExonParts(const CSpliced_exon::TParts& parts,
782  ESplicedRow to_row,
783  const CSeq_id& gen_id,
784  TSeqPos& gen_start,
785  TSeqPos& gen_len,
786  ENa_strand gen_strand,
787  const CSeq_id& prod_id,
788  TSeqPos& prod_start,
789  TSeqPos& prod_len,
790  ENa_strand prod_strand);
791  void x_AddExonPartsMapping(TSeqPos& mapping_len,
792  ESplicedRow to_row,
793  const CSeq_id& gen_id,
794  TSeqPos& gen_start,
795  TSeqPos& gen_len,
796  ENa_strand gen_strand,
797  const CSeq_id& prod_id,
798  TSeqPos& prod_start,
799  TSeqPos& prod_len,
800  ENa_strand prod_strand);
801  // Helper method to simplify getting exon part length regardless of
802  // its type.
803  static TSeqPos sx_GetExonPartLength(const CSpliced_exon_chunk& part);
804 
805  // Map a single range from source to destination.
806  bool x_MapNextRange(const TRange& src_rg,
807  bool is_set_strand,
808  ENa_strand src_strand,
809  const TRangeFuzz& src_fuzz,
810  TSortedMappings& mappings,
811  size_t cvt_idx,
812  TSeqPos* last_src_to);
813  // Map the interval through all matching mappings.
814  bool x_MapInterval(const CSeq_id& src_id,
815  TRange src_rg,
816  bool is_set_strand,
817  ENa_strand src_strand,
818  TRangeFuzz orig_fuzz);
819  // Set the flag to indicate that the last range was truncated
820  // during mapping.
821  void x_SetLastTruncated(void);
822 
823  // Pushes the location to the destination seq-loc mix.
824  // See also x_PushRangesToDstMix.
825  void x_PushLocToDstMix(CRef<CSeq_loc> loc);
826 
827  // Pushes NULL location to the destination mix (when a range
828  // can not be mapped).
829  void x_PushNullLoc(void);
830 
831  // Map the alignment. If row is NULL, map all rows. Otherwise
832  // map only the selected row.
833  CRef<CSeq_align> x_MapSeq_align(const CSeq_align& src_align,
834  size_t* row);
835 
836  // Get mapped ranges for the given id and strand index.
837  // See TRangesByStrand for strand indexing.
838  TMappedRanges& x_GetMappedRanges(const CSeq_id_Handle& id,
839  size_t strand_idx) const;
840  // Push mapped range to the list of mapped ranges. Try to merge the new
841  // range with the existing ones based on the selected merging mode.
842  void x_PushMappedRange(const CSeq_id_Handle& id,
843  size_t strand_idx,
844  const TRange& range,
845  const TRangeFuzz& fuzz,
846  bool push_reverse,
847  int group);
848  // Store the source range just mapped. Used only if storing source
849  // locations is enabled - see IncludeSourceLocs.
850  void x_PushSourceRange(const CSeq_id_Handle& idh,
851  size_t src_strand,
852  size_t dst_strand,
853  const TRange& range,
854  bool push_reverse);
855 
856  // Convert mapped range data to a seq-loc (point or interval).
857  // Set fuzzes to indicate truncated range if necessary.
858  CRef<CSeq_loc> x_RangeToSeq_loc(const CSeq_id_Handle& idh,
859  TSeqPos from,
860  TSeqPos to,
861  size_t strand_idx,
862  TRangeFuzz rg_fuzz);
863 
864  // Convert all collected and not yet converted mapped ranges to a seq-loc.
865  // May be called multiple times while mapping a complex location and
866  // storing its parts to a destination seq-loc mix (see
867  // x_PushRangesToDstMix).
868  CRef<CSeq_loc> x_GetMappedSeq_loc(void);
869 
870  // For mix locations, we remove fuzz from in-between the parts.
871  void x_StripExtraneousFuzz(CRef<CSeq_loc>& loc) const;
872 
873  // This removes fuzz of type "range" if any.
874  // Don't give this mix locations; it won't do anything.
875  CConstRef<CSeq_loc> x_FixNonsenseFuzz( CConstRef<CSeq_loc> loc_piece ) const;
876 
877  // Try to optimize the mapped location if it's a mix.
878  // The allowed optimizations are:
879  // - empty mix is converted to Null
880  // - if the mix contains a single element, use just this element
881  // - if the mix contains only intervals, convert it to packed-int
882  // When mapping a complex location (e.g. a multi-level mix) each
883  // sub-location is optimized individually.
884  void x_OptimizeSeq_loc(CRef<CSeq_loc>& loc) const;
885 
886  // Returns true if the new mapped range should be added to the
887  // existing mapped ranges in the reverse order (in the front).
888  // If merging is set to contained or all, used the provided strand
889  // index to check the order of ranges. For all other merging modes
890  // compares the directions of mapping source and target.
891  bool x_ReverseRangeOrder(int str) const;
892 
893  // Map parts of a complex seq-loc.
894  void x_Map_PackedInt_Element(const CSeq_interval& si);
895  void x_Map_PackedPnt_Element(const CPacked_seqpnt& pp, TSeqPos p);
896 
897  // Get main seq-id for a synonym. If no mapping exists, returns the
898  // original id.
899  const CSeq_id_Handle& x_GetPrimaryId(const CSeq_id_Handle& synonym) const;
900 
901  // Check if the id is in the list of synonyms.
902  bool x_IsSynonym(const CSeq_id& id, const TSynonyms& synonyms) const;
903 
906 
908 
909  // How to merge mapped locations.
911  // How to treat gaps (Null sub-locations) if any.
913 
914  // Other mapping options.
915  enum EMiscFlags {
916  // Trim leading/trailing indels (gaps) from mapped spliced-seg alignments.
917  fTrimSplicedSegs = 1 << 0,
918  // Whether to keep or discard ranges which can not be mapped.
919  fKeepNonmapping = 1 << 1,
920  // Whether to check or not if the original location is on the same strand
921  // as the mapping source.
922  fCheckStrand = 1 << 2,
923  // Whether to include a source of each mapped range to the mapped seq-loc.
924  fIncludeSrcLocs = 1 << 3,
925  // Prefer spliced-seg for mixed alignments.
926  fMixedAlignsAsSpliced = 1 << 4,
927  // Treat any range truncation as an error (added for mapping to GC-Assembly
928  // aliases).
929  fErrorOnPartial = 1 << 5
930  };
931  typedef int TMiscFlags;
932 
933  bool x_IsSetMiscFlag(EMiscFlags flag) const { return (m_MiscFlags & flag) == flag; }
934  void x_SetMiscFlag(EMiscFlags flag, bool value);
935 
937 
938  // Mapped ranges collected from the currently parsed sub-location.
940  // Source locations for all mapped ranges.
942 
943  // Collected ranges for mapped graph. Used to adjust mapped graph data.
945 
946  // Map each synonym to a primary seq-id.
948 
949  // Map each primary seq-id to sequence length.
951 
952 protected:
953  // Storage for sequence types.
955  // Flag indicating if the mapping truncated at least some ranges.
956  bool m_Partial;
957  // Flag indicating if the last range could not be mapped and was
958  // dropped.
960  // Mapping ranges grouped by source id and strand.
962  // Mapped seq-loc
964  // All ranges on the mapping destination.
966  // Current mapping group. Incremented for each mapping sub-location
967  // (e.g. exon).
969  // Control how fuzz is generated and propagated
971  // Misc mapping options
973 
974 public:
975  // Initialize the mapper with default values
977 
978  /// Methods for getting sequence types, use cached types (m_SeqTypes)
979  /// if possible.
980  ESeqType GetSeqTypeById(const CSeq_id_Handle& idh) const;
981  ESeqType GetSeqTypeById(const CSeq_id& id) const;
982  /// Methods for setting sequence types. May be used to populate the
983  /// cache before mapping huge alignments if the types are already
984  /// known. Throw exception if the sequence type is already set to
985  /// a different value.
986  /// NOTE: setting sequence type does not adjust mapping ranges for this
987  /// id. All mapping ranges must use genomic coordinates.
988  void SetSeqTypeById(const CSeq_id_Handle& idh, ESeqType seqtype) const;
989  void SetSeqTypeById(const CSeq_id& id, ESeqType seqtype) const;
990 
991  /// Get sequence width. Return 3 for proteins, 1 for nucleotides and
992  /// unknown sequence types.
993  int GetWidthById(const CSeq_id_Handle& idh) const;
994  int GetWidthById(const CSeq_id& id) const;
995 
996  /// Get mapping ranges.
997  const CMappingRanges& GetMappingRanges(void) const { return *m_Mappings; }
998 
999  /// NOTE: In most cases CollectSynonyms(const CSeq_id_Handle& id) should
1000  /// be used instead, since it takes care of synonym storage and mapping.
1001  /// This method does nothing but storing synonyms in the container.
1002  void CollectSynonyms(const CSeq_id_Handle& id, TSynonyms& synonyms) const;
1003  // Check if ranges which can not be mapped should be replaced with NULL
1004  // locations. By default removed ranges are reported using neighbor's fuzz.
1005  // The flag is controlled from environment/registry:
1006  // MAPPER_NONMAPPING_AS_NULL=t
1007  // [Mapper]/Nonmapping_As_Null=t
1008  static bool GetNonMappingAsNull(void);
1009 };
1010 
1011 
1012 /////////////////////////////////////////////////////////////////////////////
1013 ///
1014 /// IMapper_Sequence_Info
1015 ///
1016 /// Interface for providing sequence information to CSeq_loc_Mapper_Base.
1017 /// Returns information about sequence type, length and synonyms.
1018 
1020 {
1021 public:
1024 
1025  /// Get information about sequence type (nuc or prot).
1026  virtual TSeqType GetSequenceType(const CSeq_id_Handle& idh) = 0;
1027 
1028  /// Get sequence length or kInvalidSeqPos.
1029  virtual TSeqPos GetSequenceLength(const CSeq_id_Handle& idh) = 0;
1030 
1031  /// Collect all synonyms for the id including the id itself.
1032  /// Any derived class must add at least the original id to the collection.
1033  virtual void CollectSynonyms(const CSeq_id_Handle& id,
1034  TSynonyms& synonyms) = 0;
1035 };
1036 
1037 
1038 /////////////////////////////////////////////////////////////////////////////
1039 ///
1040 /// CSeq_loc_Mapper_Message
1041 ///
1042 /// Class used to report CSeq_loc_Mapper_Base issues through
1043 /// IMessageListener.
1045 {
1046 public:
1047  CSeq_loc_Mapper_Message(const string& msg,
1048  EDiagSev sev,
1049  int err_code = 0,
1050  int sub_code = 0);
1051  virtual ~CSeq_loc_Mapper_Message(void);
1052 
1053  virtual CSeq_loc_Mapper_Message* Clone(void) const;
1054  virtual void Write(CNcbiOstream& out) const;
1055 
1061  eSeq_graph
1062  };
1063 
1064  /// Check type of the object stored in the message.
1065  EObjectType Which(void) const { return m_ObjType; }
1066 
1067  /// Set seq-loc object (copy into the message).
1068  void SetLoc(const CSeq_loc& loc);
1069  /// Get seq-loc object or null.
1070  const CSeq_loc* GetLoc(void) const;
1071 
1072  /// Set seq-feat object (copy into the message).
1073  void SetFeat(const CSeq_feat& feat);
1074  /// Get seq-feat object or null.
1075  const CSeq_feat* GetFeat(void) const;
1076 
1077  /// Set seq-align object (copy into the message).
1078  void SetAlign(const CSeq_align& align);
1079  /// Get seq-align object or null.
1080  const CSeq_align* GetAlign(void) const;
1081 
1082  /// Set seq-graph object (copy into the message).
1083  void SetGraph(const CSeq_graph& graph);
1084  /// Get seq-graph object or null.
1085  const CSeq_graph* GetGraph(void) const;
1086 
1087  /// Set the stored object to null.
1088  void ResetObject(void);
1089 
1090 private:
1092 
1094 };
1095 
1096 
1098 {
1099  bool operator()(const CRef<CMappingRange>& x,
1100  const CRef<CMappingRange>& y) const;
1101 };
1102 
1103 
1105 {
1106  bool operator()(const CRef<CMappingRange>& x,
1107  const CRef<CMappingRange>& y) const;
1108 };
1109 
1110 
1111 inline
1113  const CRef<CMappingRange>& y) const
1114 {
1115  // Leftmost first
1116  if (x->m_Src_from != y->m_Src_from) {
1117  return x->m_Src_from < y->m_Src_from;
1118  }
1119  // Longest first
1120  if (x->m_Src_to != y->m_Src_to) {
1121  return x->m_Src_to > y->m_Src_to;
1122  }
1123  return x < y;
1124 }
1125 
1126 
1127 inline
1129  const CRef<CMappingRange>& y) const
1130 {
1131  // Rightmost first
1132  if (x->m_Src_to != y->m_Src_to) {
1133  return x->m_Src_to > y->m_Src_to;
1134  }
1135  // Longest first
1136  if (x->m_Src_from != y->m_Src_from) {
1137  return x->m_Src_from < y->m_Src_from;
1138  }
1139  return x > y;
1140 }
1141 
1142 
1143 inline
1144 bool CMappingRange::GoodSrcId(const CSeq_id& id) const
1145 {
1146  return m_Src_id_Handle == id;
1147 }
1148 
1149 
1150 inline
1152 {
1153  return m_Dst_id_Handle ?
1154  Ref(&const_cast<CSeq_id&>(*m_Dst_id_Handle.GetSeqId())) :
1155  CRef<CSeq_id>(0);
1156 }
1157 
1158 
1159 inline
1161 {
1163  return *this;
1164 }
1165 
1166 
1167 inline
1169 {
1171  return *this;
1172 }
1173 
1174 
1175 inline
1177 {
1179  return *this;
1180 }
1181 
1182 
1183 inline
1185 {
1187  return *this;
1188 }
1189 
1190 
1191 inline
1193 {
1195  return *this;
1196 }
1197 
1198 
1199 inline
1201 {
1203  return *this;
1204 }
1205 
1206 
1207 inline
1209 {
1211  return *this;
1212 }
1213 
1214 
1215 inline
1217 {
1219  return *this;
1220 }
1221 
1222 
1223 inline
1225 {
1227  return *this;
1228 }
1229 
1230 
1231 inline
1233 {
1234  return m_Partial;
1235 }
1236 
1237 
1238 inline
1240 {
1242  return *this;
1243 }
1244 
1245 
1246 inline
1248 {
1250  return *this;
1251 }
1252 
1253 
1254 inline
1256 {
1258  return *this;
1259 }
1260 
1261 
1262 inline
1264 {
1266  return *this;
1267 }
1268 
1269 
1270 inline
1272 {
1274  return *this;
1275 }
1276 
1277 
1278 inline
1280 {
1281  return x_MapSeq_align(src_align, 0);
1282 }
1283 
1284 
1285 inline
1287  size_t row)
1288 {
1289  return x_MapSeq_align(src_align, &row);
1290 }
1291 
1292 
1293 inline
1296 {
1297  CSeq_id_Handle primary_id = CollectSynonyms(idh);
1298  TSeqTypeById::const_iterator it = m_SeqTypes.find(primary_id);
1299  if (it != m_SeqTypes.end()) {
1300  return it->second;
1301  }
1302  return GetSeqType(primary_id);
1303 }
1304 
1305 
1306 inline
1309 {
1311 }
1312 
1313 
1314 inline
1316  ESeqType seqtype) const
1317 {
1319 }
1320 
1321 
1322 inline
1324 {
1325  return (GetSeqTypeById(idh) == eSeq_prot) ? 3 : 1;
1326 }
1327 
1328 
1329 inline
1331 {
1333 }
1334 
1335 
1336 inline
1338  : m_SeqInfo(0), m_Options(0) {}
1339 
1340 inline
1342  TMapOptions opts)
1343  : m_SeqInfo(seq_info), m_Options(opts) {}
1344 
1345 inline
1347  : m_SeqInfo(0), m_Options(opts) {}
1348 
1349 inline
1352 {
1353  return m_SeqInfo;
1354 }
1355 
1356 inline
1359 {
1360  m_SeqInfo = seq_info;
1361  return *this;
1362 }
1363 
1364 inline
1366 {
1368 }
1369 
1370 inline
1373 {
1375  return *this;
1376 }
1377 
1378 inline
1380 {
1382 }
1383 
1384 inline
1386 {
1388 }
1389 
1390 inline
1393 {
1395  return *this;
1396 }
1397 
1398 inline
1401 {
1403  return *this;
1404 }
1405 
1406 inline
1408 {
1410 }
1411 
1412 inline
1415 {
1417  return *this;
1418 }
1419 
1420 inline
1422 {
1424 }
1425 
1426 inline
1429 {
1431  return *this;
1432 }
1433 
1434 inline
1436 {
1437  return (m_Options & opt) != 0;
1438 }
1439 
1440 inline
1441 void CSeq_loc_Mapper_Options::x_SetOption(int opt, bool enable)
1442 {
1443  if ( enable ) {
1444  m_Options |= opt;
1445  }
1446  else {
1447  m_Options &= ~opt;
1448  }
1449 }
1450 
1451 
1452 /* @} */
1453 
1454 
1457 
1458 #endif // SEQ_LOC_MAPPER_BASE__HPP
User-defined methods of the data storage class.
CLocalRange< TOffset > TRange
define for the fundamental building block of sequence ranges
Definition: base.hpp:115
Helper class for mapping graphs.
CMappingRange - describes a single interval to interval mapping.
Storage for multiple mapping ranges.
TIdMap & GetIdMap(void)
Default IMessage implementation: text and severity only.
CObject –.
Definition: ncbiobj.hpp:180
CPacked_seg –.
Definition: Packed_seg.hpp:66
Class used to map seq-alignments.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeq_loc_Mapper_Base –.
CSeq_loc_Mapper_Message.
CSeq_loc_Mapper_Options –.
CSpliced_exon_chunk –.
IMapper_Sequence_Info.
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: set.hpp:45
char value[7]
Definition: config.c:431
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
static const char si[8][64]
Definition: des.c:146
std::ofstream out("events_result.xml")
main entry point for tests
CRange< Position > Map(const CRange< Position > &target, const CRange< Position > &range)
Definition: blast_aux.cpp:826
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
void Write(CObjectOStream &out, TConstObjectPtr object, const CTypeRef &type)
Definition: serial.cpp:55
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
CRef< IMapper_Sequence_Info > m_SeqInfo
pair< TFuzz, TFuzz > TRangeFuzz
CSeq_loc_Mapper_Options & SetMapperSequenceInfo(IMapper_Sequence_Info *seq_info)
CSeq_loc_Mapper_Base & SetErrorOnPartial(bool value=true)
Report source range trimming as an error.
void x_SetMiscFlag(EMiscFlags flag, bool value)
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
bool operator()(const CRef< CMappingRange > &x, const CRef< CMappingRange > &y) const
set< CSeq_id_Handle > TSynonyms
virtual void CollectSynonyms(const CSeq_id_Handle &id, TSynonyms &synonyms)=0
Collect all synonyms for the id including the id itself.
ESeqType GetSeqTypeById(const CSeq_id_Handle &idh) const
Methods for getting sequence types, use cached types (m_SeqTypes) if possible.
TRangeMap::const_iterator TRangeIterator
CRef< CGraphRanges > m_GraphRanges
bool operator<(const SMappedRange &rg) const
int GetGroup(void) const
CSeq_loc_Mapper_Base & TruncateNonmappingRanges(void)
map< CSeq_id_Handle, TRangeMap > TIdMap
EMapResult
Result of seq-annot mapping.
CSeq_id_Handle m_Dst_id_Handle
void AddRange(const TRange &rg)
const CSeq_id_Handle & GetDstIdHandle(void) const
CSeq_align::C_Segs::TDendiag TDendiag
list< SMappedRange > TMappedRanges
const TIdMap & GetIdMap() const
TSeqPos GetLength(void) const
CSeq_loc_Mapper_Options & SetTrimMappedLocation(bool value=true)
ESplicedRow
Spliced-seg row indexing constants.
bool operator()(const CRef< CMappingRange > &x, const CRef< CMappingRange > &y) const
IMapper_Sequence_Info * GetMapperSequenceInfo(void) const
Sequence type, length etc.
CMappingRanges::TRangeIterator TRangeIterator
FAnnotMapFlag
Flags defining seq-annot mapping options.
virtual TSeqPos GetSequenceLength(const CSeq_id_Handle &idh)=0
Get sequence length or kInvalidSeqPos.
bool GetTrimMappedLocation(void) const
Mapped location trimming at sequence end.
void SetSeqTypeById(const CSeq_id_Handle &idh, ESeqType seqtype) const
Methods for setting sequence types.
CSeq_loc_Mapper_Base & SetGapPreserve(void)
Whether to preserve or remove NULL sub-locations (usually indicating gaps) from the result.
CRange< TSeqPos > TRange
EFeatMapDirection
Mapping direction used when initializing the mapper with a feature.
vector< TMappedRanges > TRangesByStrand
CRangeMultimap< CRef< CMappingRange >, TSeqPos > TRangeMap
CSeq_loc_Mapper_Options m_MapOptions
const CSeq_id_Handle & CollectSynonyms(const CSeq_id_Handle &id) const
map< CSeq_id_Handle, TDstRanges > TDstIdMap
CSeq_loc_Mapper_Base & SetMergeNone(void)
Intervals' merging mode MergeNone and MergeAbutting do not change the order of ranges in the destinat...
CSeq_loc_Mapper_Base & SetTrimSplicedSeg(bool trim)
For mapping spliced-segs only: preserve or trim starting/ending indels.
EMapOptions
Options for interpretations of locations.
void SetReverseSrc(bool value=true)
CSeq_loc_Mapper_Options & SetAlign_Sparse_ToFirst(bool value=true)
CSeq_align::C_Segs::TStd TStd
bool GoodSrcId(const CSeq_id &id) const
Check if the id is on the source sequence.
TSeqPos GetDst_from(void) const
CRange< TSeqPos > TRange
void SetReverseDst(bool value=true)
CSeq_loc_Mapper_Options & SetAlign_Dense_seg_TotalRange(bool value=true)
EObjectType Which(void) const
Check type of the object stored in the message.
map< CSeq_id_Handle, CSeq_id_Handle > TSynonymMap
vector< CRef< CMappingRange > > TSortedMappings
bool x_IsSetOption(int opt) const
CSeq_loc_Mapper_Base::ESeqType TSeqType
bool GetAlign_Dense_seg_TotalRange(void) const
Dense-seg mapping option.
CSeq_loc_Mapper_Base & SetMergeAbutting(void)
Merge only abutting intervals, keep overlapping.
int GetWidthById(const CSeq_id_Handle &idh) const
Get sequence width.
CMappingRange::TRange TRange
bool GetAlign_Sparse_ToFirst(void) const
Mapping direction when mapping through a sparse-seg.
CMappingRanges::TRangeMap TRangeMap
TSeqPos GetOffset(void) const
CSeq_loc_Mapper_Base & SetMergeContained(void)
Merge intervals only if one is completely covered by another.
bool GetReverseSrc(void) const
void IncOffset(TSeqPos inc)
CSeq_loc_Mapper_Base & operator=(const CSeq_loc_Mapper_Base &)
bool x_IsSetMiscFlag(EMiscFlags flag) const
CSeq_loc_Mapper_Base & SetMergeAll(void)
Merge any abutting or overlapping intervals.
map< CSeq_id_Handle, TSeqPos > TLengthMap
bool LastIsPartial(void)
Check if the last mapping resulted in partial location (not all ranges from the original location cou...
const CMappingRanges & GetMappingRanges(void) const
Get mapping ranges.
bool GetReverseDst(void) const
bool GetReverse(void) const
CRef< CInt_fuzz > TFuzz
TGraphRanges m_Ranges
CSeq_loc_Mapper_Base & MixedAlignsAsSpliced(bool value=true)
When set to 'true' if mapped alignment has exactly one genomic and one protein row,...
CMappingRange::TRange TRange
vector< TRange > TGraphRanges
CRef< CSeq_align > x_MapSeq_align(const CSeq_align &src_align, size_t *row)
virtual TSeqType GetSequenceType(const CSeq_id_Handle &idh)=0
Get information about sequence type (nuc or prot).
CSeq_loc_Mapper_Base & SetCheckStrand(bool value=true)
Check strands before mapping a range.
CRef< CMappingRanges > m_Mappings
const TGraphRanges & GetRanges(void) const
TSeqPos GetSrc_from(void) const
CSeq_loc_Mapper_Base & SetMergeBySeg(void)
Merge only intervals from the same group.
SMappedRange(const TRange &rg, const TRangeFuzz &fz, int grp=0)
TIdMap::const_iterator TIdIterator
ESeqType GetSeqType(const CSeq_id_Handle &idh) const
CMappingRanges::TSortedMappings TSortedMappings
CSeq_id_Handle m_Src_id_Handle
void SetGroup(int grp)
map< CSeq_id_Handle, ESeqType > TSeqTypeById
const TRange & GetTotalRange(void) const
map< CSeq_id_Handle, TRangesByStrand > TRangesById
CSeq_loc_Mapper_Options & SetMapSingleLevel(bool value=true)
CSeq_loc_Mapper_Base & SetGapRemove(void)
bool GetAlign_Sparse_ToSecond(void) const
pair< TFuzz, TFuzz > TRangeFuzz
CSeq_loc_Mapper_Base & KeepNonmappingRanges(void)
Keep ranges which can not be mapped.
void x_SetOption(int opt, bool enable)
CSeq_loc_Mapper_Base(const CSeq_loc_Mapper_Base &)
CSeq_loc_Mapper_Base & IncludeSourceLocs(bool value=true)
Include source ranges in the mapped location.
vector< TDstIdMap > TDstStrandMap
CSeq_loc_Mapper_Options & SetAlign_Sparse_ToSecond(bool value=true)
CRef< CSeq_id > GetDstId(void) const
bool GetMapSingleLevel(void) const
Mapping depth when using a seq-map, a bioseq or a GC-assembly.
void SetOffset(TSeqPos offset)
CSeq_loc_Mapper_Base::TSynonyms TSynonyms
@ eMapped_Some
Some (not all) annotations were mapped.
@ eLocationToProduct
Map from the feature's location to product.
@ fAlign_Dense_seg_TotalRange
Ignore internal dense-seg structure - map each dense-seg according to the total ranges involved.
@ fTrimMappedLocation
Enable trimming of source/destination ranges at sequence end.
@ fMapSingleLevel
Flag used when mapping through a seq-map (this includes mapping through a bioseq or a GC-assembly).
@ fAlign_Sparse_ToSecond
Map to second-id.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
#define NCBI_DEPRECATED
bool Empty(void) const
Definition: range.hpp:148
bool IsWhole(void) const
Definition: range.hpp:284
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NCBI_SEQ_EXPORT
Definition: ncbi_export.h:825
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
ELim
some limit value
Definition: Int_fuzz_.hpp:209
list< CRef< CStd_seg > > TStd
Definition: Seq_align_.hpp:196
list< CRef< CSpliced_exon_chunk > > TParts
list< CRef< CDense_diag > > TDendiag
Definition: Seq_align_.hpp:194
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
int len
range(_Ty, _Ty) -> range< _Ty >
const CharType(& source)[N]
Definition: pointer.h:1149
IMessage/IMessageListener interfaces and basic implementations.
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
int GetLoc(const string &acc, const string &pat, CSeq_loc &loc, CScope &scope)
int offset
Definition: replacements.h:160
static const char * str(char *buf, int n)
Definition: stats.c:84
Modified on Thu Nov 30 04:56:06 2023 by modify_doxy.py rev. 669887