98 : m_pMessageListener(pListener)
126 string(
"Bad data line: record ID \"") +
id +
"\" is used multiple times");
133 mapIt =
mIds.emplace(
id, list<CGffIdTrackRecord>()).first;
134 mapIt->second.push_back(trackRecord);
135 if (!parentId.empty()) {
140 auto& recordList = mapIt->second;
142 if (pendingType ==
"exon") {
143 recordList.push_back(trackRecord);
144 if (!parentId.empty()) {
152 auto expectedType = recordList.front().mSeqType;
153 if (pendingType != expectedType) {
157 throw errorDuplicateId;
160 auto pendingSeqId = record.
Id();
161 auto expectedSeqId = recordList.front().mSeqId;
162 if (pendingSeqId != expectedSeqId) {
166 throw errorDuplicateId;
170 if (!parentId.empty()) {
173 recordList.push_back(trackRecord);
188 string(
"Bad data line: Parent \"" + parentId +
189 "\" does not refer to a GFF3 record ID"));
194 throw errorBadParentId;
209 mIdResolver(idResolver),
210 mIdTracker(pListener),
211 m_pMessageListener(pListener)
225 auto seqSize = seqSizeIt->second;
232 string message =
"Bad data line: ";
233 message +=
"feature in-point is outside the containing sequence.";
246 string message =
"Bad data line: ";
247 message +=
"feature is longer than the entire containing sequence.";
278 for (
const auto&
id: ids) {
297 LOCATIONS& locations = existingEntry->second;
299 if (locations.size() == 1 && locations.front().mType ==
"gene") {
303 existingEntry->second.push_front(
location);
325 if (recordType ==
"exon") {
335 for (
auto&
id: ids) {
336 id = record.
Type() +
":" + id;
360 const string& seqId)
const
363 auto sizeIt = mSequenceSizes.find(seqId);
364 if (sizeIt == mSequenceSizes.end()) {
367 return sizeIt->second;
378 if (sequenceSize == 0) {
384 pLocation->
SetInt(*pInterval);
389 if (locRecord.
mStart >= sequenceSize || locRecord. mStop < sequenceSize) {
393 pInterval->
SetTo(locRecord.
mStop % sequenceSize);
395 pLocation->
SetInt(*pInterval);
407 pBottom->
SetTo(sequenceSize - 1);
414 if (locRecord.
mStart >= sequenceSize || locRecord.
mStop < sequenceSize) {
418 pInterval->
SetTo(locRecord.
mStop % sequenceSize);
420 pLocation->
SetInt(*pInterval);
426 pBottom->
SetTo(sequenceSize - 1);
450 if (locations.empty()) {
455 if (locations.size() == 1) {
456 auto& onlyOne = locations.front();
458 frame = onlyOne.mFrame;
462 auto& mix = pSeqLoc->
SetMix();
466 const auto&
front = locations.front();
467 frame =
front.mFrame;
486 for (
const auto&
location: locations) {
bool GetAttribute(const string &, string &) const
void GetLocation(const string &, CRef< CSeq_loc > &, CCdregion::EFrame &)
map< string, TSeqPos > mSequenceSizes
CReaderListener * m_pMessageListener
void MergeLocation(CRef< CSeq_loc > &, CCdregion::EFrame &, LOCATIONS &)
CGff3ReadRecord::SeqIdResolver mIdResolver
LOCATION_MAP mMapIdToLocations
TSeqPos GetSequenceSize(const string &) const
CRef< CSeq_loc > xGetRecordLocation(const CGff3LocationRecord &)
bool AddRecord(const CGff2Record &)
static bool xGetLocationIds(const CGff2Record &, list< string > &)
list< CGff3LocationRecord > LOCATIONS
CGff3LocationMerger(unsigned int flags=0, CGff3ReadRecord::SeqIdResolver=CReadUtil::AsSeqId, TSeqPos sequenceSize=0, CReaderListener *pListener=nullptr)
void VerifyRecordLocation(const CGff2Record &)
void AddRecordForId(const string &, const CGff2Record &)
static void xSortLocations(LOCATIONS &)
static bool ComparePositions(const CGff3LocationRecord &lhs, const CGff3LocationRecord &rhs)
static bool ComparePartNumbers(const CGff3LocationRecord &lhs, const CGff3LocationRecord &rhs)
CGff3LocationRecord(const CGff2Record &, unsigned int, CGff3ReadRecord::SeqIdResolver)
CRef< CSeq_id > GetSeqId(TReaderFlags, SeqIdResolver=nullptr) const
const string & Type() const
ENa_strand Strand() const
const string & Id() const
const string & NormalizedType() const
CGffIdTracker(CReaderListener *pListener=nullptr)
CReaderListener * m_pMessageListener
map< string, list< CGffIdTrackRecord > > mIds
void CheckAndIndexRecord(string id, const CGff2Record &record)
const_iterator end() const
const_iterator find(const key_type &key) const
static const char location[]
The NCBI C++ standard methods for dealing with std::string.
unsigned int TSeqPos
Type for sequence locations and lengths.
@ eDiag_Error
Error message.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
void SetPacked_int(TPacked_int &v)
void SetNull(void)
Override all setters to incorporate cache invalidation.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
@ eFrame_not_set
not set, code uses one
void SetTo(TTo value)
Assign a value to To data member.
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
virtual void Reset(void)
Reset the whole object.
void SetStrand(TStrand value)
Assign a value to Strand data member.
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
constexpr bool empty(list< Ts... >) noexcept