NCBI C++ ToolKit
gff3_location_merger.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2  * $Id: gff3_location_merger.hpp 103113 2024-09-10 14:40:47Z foleyjp $
3  *
4  * ===========================================================================
5  *
6  * PUBLIC DOMAIN NOTICE
7  * National Center for Biotechnology Information
8  *
9  * This software/database is a "United States Government Work" under the
10  * terms of the United States Copyright Act. It was written as part of
11  * the author's official duties as a United States Government employee and
12  * thus cannot be copyrighted. This software/database is freely available
13  * to the public for use. The National Library of Medicine and the U.S.
14  * Government have not placed any restriction on its use or reproduction.
15  *
16  * Although all reasonable efforts have been taken to ensure the accuracy
17  * and reliability of the software and data, the NLM and the U.S.
18  * Government do not and cannot warrant the performance or results that
19  * may be obtained by using this software or data. The NLM and the U.S.
20  * Government disclaim all warranties, express or implied, including
21  * warranties of performance, merchantability or fitness for any particular
22  * purpose.
23  *
24  * Please cite the author in any work or product based on this material.
25  *
26  * ===========================================================================
27  *
28  * Authors: Frank Ludwig
29  *
30  */
31 
32 #ifndef _GFF3_LOCATION_MERGER_HPP_
33 #define _GFF3_LOCATION_MERGER_HPP_
34 
35 #include <corelib/ncbistd.hpp>
37 
40 
41 class CGff2Record;
42 class CReaderListener;
43 
44 // ============================================================================
46 // ============================================================================
47 {
48 public:
50  const CGff2Record&,
51  unsigned int,
53 
58  string mType;
59  size_t mPartNum;
61  string mSeqId;
62 
63  static bool ComparePartNumbers(
64  const CGff3LocationRecord& lhs,
65  const CGff3LocationRecord& rhs) { return lhs.mPartNum < rhs.mPartNum; };
66 
67  static bool ComparePositions(
68  const CGff3LocationRecord& lhs,
69  const CGff3LocationRecord& rhs);
70 };
71 
72 // ============================================================================
74 // ============================================================================
75 {
76 public:
78  const CGff2Record& record):
79  mSeqType(record.NormalizedType()),
80  mSeqId(record.Id()),
81  mSeqStart(record.SeqStart()),
82  mSeqStop(record.SeqStop()),
83  mSeqStrand(record.Strand())
84  {};
85 
86  string mSeqType;
87  string mSeqId;
91 };
92 
93 // ============================================================================
95 // ============================================================================
96 {
97 public:
98  CGffIdTracker(CReaderListener* pListener=nullptr);
99 
100  void CheckAndIndexRecord(
101  string id,
102  const CGff2Record& record);
103 
104  void CheckAndIndexRecord(
105  const CGff2Record& record);
106 
107  void CheckIntegrity();
108 
109 private:
113 };
114 
115 
116 // ============================================================================
118 // ============================================================================
119 {
120  using LOCATIONS = list<CGff3LocationRecord>;
122 
123 public:
125  unsigned int flags =0,
127  TSeqPos sequenceSize =0,
128  CReaderListener* pListener=nullptr);
129 
130  void Reset() {
131  mMapIdToLocations.clear();
132  };
133 
135  const string& seqId,
136  TSeqPos sequenceSize) { mSequenceSizes[seqId] = sequenceSize; }
137 
138  bool AddRecord(
139  const CGff2Record&);
140 
141  void AddRecordForId(
142  const string&,
143  const CGff2Record&);
144 
145  void VerifyRecordLocation(
146  const CGff2Record&);
147 
148  LOCATION_MAP& LocationMap() { return mMapIdToLocations; }
149 
150  void GetLocation(
151  const string&,
154 
155  void MergeLocation(
158  LOCATIONS&);
159 
161  if (mSequenceSizes.size() == 1) {
162  return mSequenceSizes.begin()->second;
163  }
164  return 0;
165  }
166 
167  TSeqPos GetSequenceSize(
168  const string&) const;
169 
170  void Validate();
171 
172 private:
173  static bool xGetLocationIds(
174  const CGff2Record&,
175  list<string>&);
176 
177  CRef<CSeq_loc> xGetRecordLocation(
178  const CGff3LocationRecord&);
179 
180  static void xSortLocations(
181  LOCATIONS&);
182 
183  unsigned int mFlags;
186 
189  CReaderListener* m_pMessageListener=nullptr;
190 };
191 
194 
195 #endif // _GFF3_LOCATION_MERGER_HPP_
map< string, TSeqPos > mSequenceSizes
CGff3ReadRecord::SeqIdResolver mIdResolver
LOCATION_MAP & LocationMap()
void SetSequenceSize(const string &seqId, TSeqPos sequenceSize)
list< CGff3LocationRecord > LOCATIONS
CCdregion::EFrame mFrame
static bool ComparePositions(const CGff3LocationRecord &lhs, const CGff3LocationRecord &rhs)
static bool ComparePartNumbers(const CGff3LocationRecord &lhs, const CGff3LocationRecord &rhs)
CConstRef< CSeq_id > mpGffId
CGff3LocationRecord(const CGff2Record &, unsigned int, CGff3ReadRecord::SeqIdResolver)
CGffIdTrackRecord(const CGff2Record &record)
CGffIdTracker(CReaderListener *pListener=nullptr)
CReaderListener * m_pMessageListener
map< string, list< CGffIdTrackRecord > > mIds
set< string > mParentIds
void CheckAndIndexRecord(string id, const CGff2Record &record)
static CRef< CSeq_id > AsSeqId(const string &rawId, long flags=0, bool localInts=true)
Convert a raw ID string to a Seq-id, based in given customization flags.
Definition: read_util.cpp:89
CRef –.
Definition: ncbiobj.hpp:618
Definition: map.hpp:338
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJREAD_EXPORT
Definition: ncbi_export.h:1315
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
Modified on Fri Sep 20 14:58:26 2024 by modify_doxy.py rev. 669887