NCBI C++ ToolKit
gff3_location_merger.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2  * $Id: gff3_location_merger.hpp 100597 2023-08-15 14:28:46Z foleyjp $
3  *
4  * ===========================================================================
5  *
6  * PUBLIC DOMAIN NOTICE
7  * National Center for Biotechnology Information
8  *
9  * This software/database is a "United States Government Work" under the
10  * terms of the United States Copyright Act. It was written as part of
11  * the author's official duties as a United States Government employee and
12  * thus cannot be copyrighted. This software/database is freely available
13  * to the public for use. The National Library of Medicine and the U.S.
14  * Government have not placed any restriction on its use or reproduction.
15  *
16  * Although all reasonable efforts have been taken to ensure the accuracy
17  * and reliability of the software and data, the NLM and the U.S.
18  * Government do not and cannot warrant the performance or results that
19  * may be obtained by using this software or data. The NLM and the U.S.
20  * Government disclaim all warranties, express or implied, including
21  * warranties of performance, merchantability or fitness for any particular
22  * purpose.
23  *
24  * Please cite the author in any work or product based on this material.
25  *
26  * ===========================================================================
27  *
28  * Authors: Frank Ludwig
29  *
30  */
31 
32 #ifndef _GFF3_LOCATION_MERGER_HPP_
33 #define _GFF3_LOCATION_MERGER_HPP_
34 
35 #include <corelib/ncbistd.hpp>
37 
40 
41 class CGff2Record;
42 class CReaderListener;
43 
44 // ============================================================================
46 // ============================================================================
47 {
48 public:
50  const CGff2Record&,
51  unsigned int,
53 
55  const CGff3LocationRecord&);
56 
61  string mType;
62  size_t mPartNum;
64  string mSeqId;
65 
66  static bool ComparePartNumbers(
67  const CGff3LocationRecord& lhs,
68  const CGff3LocationRecord& rhs) { return lhs.mPartNum < rhs.mPartNum; };
69 
70  static bool ComparePositions(
71  const CGff3LocationRecord& lhs,
72  const CGff3LocationRecord& rhs);
73 };
74 
75 // ============================================================================
77 // ============================================================================
78 {
79 public:
81  const CGff2Record& record):
82  mSeqType(record.NormalizedType()),
83  mSeqId(record.Id()),
84  mSeqStart(record.SeqStart()),
85  mSeqStop(record.SeqStop()),
86  mSeqStrand(record.Strand())
87  {};
88 
89  string mSeqType;
90  string mSeqId;
94 };
95 
96 // ============================================================================
98 // ============================================================================
99 {
100 public:
101  CGffIdTracker(CReaderListener* pListener=nullptr);
102 
103  void CheckAndIndexRecord(
104  string id,
105  const CGff2Record& record);
106 
107  void CheckAndIndexRecord(
108  const CGff2Record& record);
109 
110  void CheckIntegrity();
111 
112 private:
116 };
117 
118 
119 // ============================================================================
121 // ============================================================================
122 {
123  using LOCATIONS = list<CGff3LocationRecord>;
125 
126 public:
128  unsigned int flags =0,
130  TSeqPos sequenceSize =0,
131  CReaderListener* pListener=nullptr);
132 
133  void Reset() {
134  mMapIdToLocations.clear();
135  };
136 
138  const string& seqId,
139  TSeqPos sequenceSize) { mSequenceSizes[seqId] = sequenceSize; }
140 
141  bool AddRecord(
142  const CGff2Record&);
143 
144  void AddRecordForId(
145  const string&,
146  const CGff2Record&);
147 
148  void VerifyRecordLocation(
149  const CGff2Record&);
150 
151  LOCATION_MAP& LocationMap() { return mMapIdToLocations; }
152 
153  void GetLocation(
154  const string&,
157 
158  void MergeLocation(
161  LOCATIONS&);
162 
164  if (mSequenceSizes.size() == 1) {
165  return mSequenceSizes.begin()->second;
166  }
167  return 0;
168  }
169 
170  TSeqPos GetSequenceSize(
171  const string&) const;
172 
173  void Validate();
174 
175 private:
176  static bool xGetLocationIds(
177  const CGff2Record&,
178  list<string>&);
179 
180  CRef<CSeq_loc> xGetRecordLocation(
181  const CGff3LocationRecord&);
182 
183  static void xSortLocations(
184  LOCATIONS&);
185 
186  unsigned int mFlags;
189 
192  CReaderListener* m_pMessageListener=nullptr;
193 };
194 
197 
198 #endif // _GFF3_LOCATION_MERGER_HPP_
map< string, TSeqPos > mSequenceSizes
CGff3ReadRecord::SeqIdResolver mIdResolver
LOCATION_MAP & LocationMap()
void SetSequenceSize(const string &seqId, TSeqPos sequenceSize)
list< CGff3LocationRecord > LOCATIONS
CCdregion::EFrame mFrame
static bool ComparePositions(const CGff3LocationRecord &lhs, const CGff3LocationRecord &rhs)
static bool ComparePartNumbers(const CGff3LocationRecord &lhs, const CGff3LocationRecord &rhs)
CGff3LocationRecord(const CGff2Record &, unsigned int, CGff3ReadRecord::SeqIdResolver)
CGffIdTrackRecord(const CGff2Record &record)
CGffIdTracker(CReaderListener *pListener=nullptr)
CReaderListener * m_pMessageListener
map< string, list< CGffIdTrackRecord > > mIds
set< string > mParentIds
void CheckAndIndexRecord(string id, const CGff2Record &record)
static CRef< CSeq_id > AsSeqId(const string &rawId, long flags=0, bool localInts=true)
Convert a raw ID string to a Seq-id, based in given customization flags.
Definition: read_util.cpp:89
CRef –.
Definition: ncbiobj.hpp:618
Definition: map.hpp:338
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJREAD_EXPORT
Definition: ncbi_export.h:1315
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
Modified on Thu Dec 07 10:07:35 2023 by modify_doxy.py rev. 669887