NCBI C++ ToolKit
gff3_annot_assembler.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gff3_annot_assembler.cpp 88364 2019-11-27 03:06:29Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description: Iterate through file names matching a given glob pattern
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbifile.hpp>
35 
44 
46 #include "feat_util.hpp"
47 #include "featid_generator.hpp"
48 #include "gff3_annot_assembler.hpp"
49 
50 #include <assert.h>
51 
54 
55 // ============================================================================
57  CImportMessageHandler& errorReporter):
58 // ============================================================================
59  CFeatAnnotAssembler(errorReporter)
60 {
61 }
62 
63 // ============================================================================
65 // ============================================================================
66 {
67 }
68 
69 // ============================================================================
70 void
72  const CFeatImportData& record_,
73  CSeq_annot& annot)
74 // ============================================================================
75 {
76  assert(dynamic_cast<const CGff3ImportData*>(&record_));
77  const CGff3ImportData& record = static_cast<const CGff3ImportData&>(record_);
78 
79  auto recordId = record.Id();
80  auto parentId = record.Parent();
81  auto pFeature = record.GetData();
82 
83  switch (pFeature->GetData().GetSubtype()) {
84  default:
85  return xProcessFeatureDefault(recordId, parentId, pFeature, annot);
87  return xProcessFeatureExon(recordId, parentId, pFeature, annot);
94  return xProcessFeatureRna(recordId, parentId, pFeature, annot);
96  return xProcessFeatureCds(recordId, parentId, pFeature, annot);
97  }
98 }
99 
100 // ============================================================================
101 void
103  const std::string& recordId,
104  const std::string& parentId,
105  CRef<CSeq_feat> pFeature,
106  CSeq_annot& annot)
107 // ============================================================================
108 {
109  string featureType = CSeqFeatData::SubtypeValueToName(
110  pFeature->GetData().GetSubtype());
111  NStr::ToLower(featureType);
112  pFeature->SetId(*mIdGenerator.GetIdFor(featureType));
113  annot.SetData().SetFtable().push_back(pFeature);
114  if (!recordId.empty()) {
115  mFeatureMap.AddFeature(recordId, pFeature);
116  }
117 }
118 
119 
120 // ============================================================================
121 void
123  const std::string& recordId,
124  const std::string& parentId,
125  CRef<CSeq_feat> pFeature,
126  CSeq_annot& annot)
127 // ============================================================================
128 {
129  auto pExistingCds = mFeatureMap.FindFeature(recordId);
130  if (pExistingCds) {
131  // add new piece to existing piece
132  CRef<CSeq_loc> pUpdatedLocation = FeatUtil::AddLocations(
133  pExistingCds->GetLocation(), pFeature->GetLocation());
134  pExistingCds->SetLocation().Assign(*pUpdatedLocation);
135 
136  // update frame if necessary
137  auto cdsStrand = pExistingCds->GetLocation().GetStrand();
138  auto& existingCds = pExistingCds->SetData().SetCdregion();
139  const auto& newCds = pFeature->GetData().GetCdregion();
140  if (cdsStrand == eNa_strand_plus) {
141  auto existingStart =
142  pExistingCds->GetLocation().GetStart(eExtreme_Positional);
143  auto contributedStart =
145  if (existingStart == contributedStart) {
146  existingCds.SetFrame(newCds.GetFrame());
147  }
148  }
149  else if (cdsStrand == eNa_strand_minus) {
150  auto existingStop =
151  pExistingCds->GetLocation().GetStart(eExtreme_Positional);
152  auto contributedStop =
154  if (existingStop == contributedStop) {
155  existingCds.SetFrame(newCds.GetFrame());
156  }
157  }
158  }
159  else {
160  pFeature->SetId(*mIdGenerator.GetIdFor("cds"));
161  annot.SetData().SetFtable().push_back(pFeature);
162  if (!recordId.empty()) {
163  mFeatureMap.AddFeature(recordId, pFeature);
164  }
165  if (!recordId.empty() && !parentId.empty()) {
166  mXrefMap[recordId] = parentId;
167  }
168  }
169 }
170 
171 
172 // ============================================================================
173 void
175  const std::string& recordId,
176  const std::string& parentId,
177  CRef<CSeq_feat> pFeature,
178  CSeq_annot& annot)
179 // ============================================================================
180 {
181  annot.SetData().SetFtable().push_back(pFeature);
182  if (!recordId.empty()) {
183  mFeatureMap.AddFeature(recordId, pFeature);
184  }
185  if (!recordId.empty() && !parentId.empty()) {
186  mXrefMap[recordId] = parentId;
187  }
188 
189  pFeature->SetId(*mIdGenerator.GetIdFor("mrna"));
190  xMarkLocationPending(*pFeature);
191 
192  vector<CRef<CSeq_feat>> pendingExons;
193  if (!mPendingFeatures.FindPendingFeatures(recordId, pendingExons)) {
194  return;
195  }
196  for (auto pExon: pendingExons) {
197  CRef<CSeq_loc> pUpdatedLocation = FeatUtil::AddLocations(
198  pFeature->GetLocation(), pExon->GetLocation());
199  pFeature->SetLocation().Assign(*pUpdatedLocation);
200  }
202 }
203 
204 
205 // ============================================================================
206 void
208  const std::string& recordId,
209  const std::string& parentId,
210  CRef<CSeq_feat> pFeature,
211  CSeq_annot& annot)
212 // ============================================================================
213 {
214  auto pParentRna = mFeatureMap.FindFeature(parentId);
215  if (pParentRna) {
216  if (xIsLocationPending(*pParentRna)) {
217  pParentRna->SetLocation().Assign(pFeature->GetLocation());
218  xUnmarkLocationPending(*pParentRna);
219  }
220  else {
221  CRef<CSeq_loc> pUpdatedLocation = FeatUtil::AddLocations(
222  pParentRna->GetLocation(), pFeature->GetLocation());
223  pParentRna->SetLocation().Assign(*pUpdatedLocation);
224  }
225  }
226  else {
227  mPendingFeatures.AddFeature(parentId, pFeature);
228  }
229 }
230 
231 // ============================================================================
232 void
234  const CAnnotImportData& annotData,
235  CSeq_annot& annot)
236 // ============================================================================
237 {
238  // generate crefs between genes, mRNAs, and coding regions:
239  for (auto entry: mXrefMap) {
240  auto childId = entry.first;
241  auto parentId = entry.second;
242 
243  auto pChild = mFeatureMap.FindFeature(childId);
244  auto pParent = mFeatureMap.FindFeature(parentId);
245  if (!pChild || !pParent) {
246  continue;
247  }
248  pChild->AddSeqFeatXref(pParent->GetId());
249  pParent->AddSeqFeatXref(pChild->GetId());
250 
251  auto itGrandParent = mXrefMap.find(parentId);
252  if (itGrandParent == mXrefMap.end()) {
253  continue;
254  }
255  auto grandParentId = itGrandParent->second;
256  auto pGrandParent = mFeatureMap.FindFeature(grandParentId);
257  if (!pGrandParent) {
258  continue;
259  }
260  pChild->AddSeqFeatXref(pGrandParent->GetId());
261  pGrandParent->AddSeqFeatXref(pChild->GetId());
262  pParent->AddSeqFeatXref(pGrandParent->GetId());
263  pGrandParent->AddSeqFeatXref(pParent->GetId());
264  }
265 
266  // remove any remaining "under construction" markers:
267  auto& ftable = annot.SetData().SetFtable();
268  for (auto& pFeature: ftable) {
269  xUnmarkLocationPending(*pFeature);
270  }
271 }
272 
273 // ============================================================================
274 bool
276  const CSeq_feat& feat)
277 // ============================================================================
278 {
279  if (!feat.IsSetQual()) {
280  return false;
281  }
282  for (const auto& pQual: feat.GetQual()) {
283  if (pQual->IsSetQual() && pQual->GetQual() == "__location_pending") {
284  return true;
285  }
286  }
287  return false;
288 }
289 
290 // ============================================================================
291 void
293  CSeq_feat& feat)
294 // ============================================================================
295 {
296  feat.AddQualifier("__location_pending", "true");
297 }
298 
299 // ============================================================================
300 void
302  CSeq_feat& feat)
303 // ============================================================================
304 {
305  feat.RemoveQualifier("__location_pending");
306 }
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
CRef< CFeat_id > GetIdFor(const string &recType)
virtual void FinalizeAnnot(const CAnnotImportData &, CSeq_annot &) override
static bool xIsLocationPending(const CSeq_feat &)
CGff3PendingFeatureList mPendingFeatures
static void xUnmarkLocationPending(CSeq_feat &)
void xProcessFeatureDefault(const std::string &, const std::string &, CRef< CSeq_feat >, CSeq_annot &)
void ProcessRecord(const CFeatImportData &, CSeq_annot &) override
CFeatureIdGenerator mIdGenerator
map< string, string > mXrefMap
void xProcessFeatureCds(const std::string &, const std::string &, CRef< CSeq_feat >, CSeq_annot &)
CGff3AnnotAssembler(CImportMessageHandler &)
void xProcessFeatureExon(const std::string &, const std::string &, CRef< CSeq_feat >, CSeq_annot &)
void xProcessFeatureRna(const std::string &, const std::string &, CRef< CSeq_feat >, CSeq_annot &)
static void xMarkLocationPending(CSeq_feat &)
void AddFeature(const std::string &id, CRef< CSeq_feat > pFeature)
CRef< CSeq_feat > FindFeature(const std::string &id)
CRef< CSeq_feat > GetData() const
std::string Id() const
std::string Parent() const
bool FindPendingFeatures(const std::string &id, std::vector< CRef< CSeq_feat >> &features)
void AddFeature(const std::string &id, CRef< CSeq_feat > pFeature)
void MarkFeaturesDone(const std::string &id)
ESubtype GetSubtype(void) const
static CTempString SubtypeValueToName(ESubtype eSubtype)
Turns a ESubtype into its string value which is NOT necessarily related to the identifier of the enum...
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
void AddQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature.
Definition: Seq_feat.cpp:291
void RemoveQualifier(const string &qual_name)
Remove all qualifiers with the given name; do nothing if no such qualifier exists.
Definition: Seq_feat.cpp:315
bool AddSeqFeatXref(const CSeqFeatXref::TId &id)
Definition: Seq_feat.cpp:279
static CRef< CSeq_loc > AddLocations(const CSeq_loc &, const CSeq_loc &)
Definition: feat_util.cpp:53
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
USING_SCOPE(objects)
string
Definition: cgiapp.hpp:687
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
Definition: Seq_feat_.hpp:1135
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_feat_.cpp:73
const TCdregion & GetCdregion(void) const
Get the variant data.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
#define assert(x)
Definition: srv_diag.hpp:58
#define ftable
Definition: utilfeat.h:37
Modified on Tue Oct 03 02:50:49 2023 by modify_doxy.py rev. 669887