NCBI C++ ToolKit
bed_writer.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bed_writer.cpp 95055 2021-10-01 16:39:33Z ludwigf $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Frank Ludwig
27  *
28  * File Description: Write bed file
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
43 
44 #include <objmgr/scope.hpp>
45 #include <objmgr/annot_ci.hpp>
46 #include <objmgr/feat_ci.hpp>
47 #include <objmgr/mapped_feat.hpp>
48 
54 
57 
58 // ----------------------------------------------------------------------------
60  const CSeq_feat& feature)
61 // ----------------------------------------------------------------------------
62 {
63  string threeFeatType;
64  if (!feature.IsSetId() || !feature.GetId().IsLocal()
65  || !feature.GetId().GetLocal().IsId()) {
66  return false;
67  }
68  if (!CWriteUtil::GetThreeFeatType(feature, threeFeatType)) {
69  return false;
70  }
71  bool assigned = false;
72  if (threeFeatType == "chrom") {
73  mpChrom.Reset(new CSeq_feat);
74  mpChrom->Assign(feature);
75  assigned = true;
76  }
77  if (threeFeatType == "thick") {
78  mpThick.Reset(new CSeq_feat);
79  mpThick->Assign(feature);
80  assigned = true;
81  }
82  if (threeFeatType == "block") {
84  mpBlocks->Assign(feature);
85  assigned = true;
86  }
87  if (!assigned) {
88  return false;
89  }
90  int featId = feature.GetId().GetLocal().GetId();
91  xAddFound(featId);
92  if (!feature.IsSetXref()) {
93  return true;
94  }
95  for (CRef<CSeqFeatXref> pXref: feature.GetXref()) {
96  if (!pXref->IsSetId() || !pXref->GetId().IsLocal() ||
97  !pXref->GetId().GetLocal().IsId()) {
98  continue;
99  }
100  int featId = pXref->GetId().GetLocal().GetId();
101  xAddAll(featId);
102  }
103  return true;
104 };
105 
106 // ----------------------------------------------------------------------------
108 // ----------------------------------------------------------------------------
109 {
110  return (mFeatsFound.size() == mFeatsAll.size());
111 }
112 
113 // ----------------------------------------------------------------------------
114 bool
116  CBedFeatureRecord& bedRecord) const
117 // ----------------------------------------------------------------------------
118 {
119  bedRecord = CBedFeatureRecord();
120  if (!mpChrom) {
121  return false;
122  }
123  if (!bedRecord.SetLocation(mpChrom->GetLocation())) {
124  return false;
125  }
126  if (!bedRecord.SetName(mpChrom->GetData())) {
127  return false;
128  }
129  int score;
130  if (!CWriteUtil::GetThreeFeatScore(*mpChrom, score)) {
131  score = 0;
132  }
133  if (!bedRecord.SetScore(score)) {
134  return false;
135  }
136  if (mpThick) {
137  if (!bedRecord.SetThick(mpThick->GetLocation())) {
138  return false;
139  }
140  }
141  else {
142  if (!bedRecord.SetNoThick(mpChrom->GetLocation())) {
143  return false;
144  }
145  }
146  string color;
147  if (CWriteUtil::GetThreeFeatRgb(*mpChrom, color)) {
148  if (!bedRecord.SetRgb(color)) {
149  return false;
150  }
151  }
152  if (mpBlocks) {
153  if (!bedRecord.SetBlocks(
154  mpChrom->GetLocation(), mpBlocks->GetLocation())) {
155  return false;
156  }
157  }
158  return true;
159 }
160 
161 // ----------------------------------------------------------------------------
162 bool
164  int featId)
165 //
166 // Expectation: featId is not listed as found already
167 // ----------------------------------------------------------------------------
168 {
169  vector<int>::iterator it = std::find(
170  mFeatsFound.begin(), mFeatsFound.end(), featId);
171  if (it != mFeatsFound.end()) {
172  return false;
173  }
174  mFeatsFound.push_back(featId);
175  return xAddAll(featId);
176 }
177 
178 // ----------------------------------------------------------------------------
179 bool
181  int featId)
182 // ----------------------------------------------------------------------------
183 {
184  vector<int>::iterator it = std::find(
185  mFeatsAll.begin(), mFeatsAll.end(), featId);
186  if (it == mFeatsAll.end()) {
187  mFeatsAll.push_back(featId);
188  }
189  return true;
190 }
191 
192 // ----------------------------------------------------------------------------
194  const CSeq_feat& feature)
195 // ----------------------------------------------------------------------------
196 {
197  RECORD_IT it = xFindExistingRecord(feature);
198  if (it == mRecords.end()) {
199  RECORD_IT addIt = xAddRecord(feature);
200  return (addIt != mRecords.end());
201  }
202  else {
203  return it->AddFeature(feature);
204  }
205 }
206 
207 // ----------------------------------------------------------------------------
209  const CSeq_feat& feature)
210 // ----------------------------------------------------------------------------
211 {
212  RECORD_IT it = xFindExistingRecord(feature);
213  if (it == mRecords.end()) {
214  return false;
215  }
216  return (it->IsRecordComplete());
217 }
218 
219 // ----------------------------------------------------------------------------
220 bool
222  const CSeq_feat& feature,
223  CBedFeatureRecord& bedRecord)
224 // ----------------------------------------------------------------------------
225 {
226  RECORD_IT it = xFindExistingRecord(feature);
227  if (it == mRecords.end()) {
228  return false;
229  }
230  if (!it->GetBedFeature(bedRecord)) {
231  return false;
232  }
233  mRecords.erase(it);
234  return true;
235 }
236 
237 // ----------------------------------------------------------------------------
238 bool
240  CBedFeatureRecord& bedRecord)
241 // ----------------------------------------------------------------------------
242 {
243  if (mRecords.empty()) {
244  return false;
245  }
246  RECORD_IT it = mRecords.end() - 1;
247  if (!it->GetBedFeature(bedRecord)) {
248  return false;
249  }
250  mRecords.erase(it);
251  return true;
252 }
253 
254 // ----------------------------------------------------------------------------
257  const CSeq_feat& feature)
258 // ----------------------------------------------------------------------------
259 {
260  if (!feature.IsSetId() || !feature.GetId().IsLocal()
261  || !feature.GetId().GetLocal().IsId()) {
262  return mRecords.end();
263  }
264  int featId = feature.GetId().GetLocal().GetId();
265  for (RECORD_IT it = mRecords.begin(); it != mRecords.end(); ++it) {
266  vector<int>::iterator iit = std::find(
267  it->mFeatsAll.begin(), it->mFeatsAll.end(), featId);
268  if (iit != it->mFeatsAll.end()) {
269  return it;
270  }
271  }
272  return mRecords.end();
273 }
274 
275 // ----------------------------------------------------------------------------
278  const CSeq_feat& feature)
279 // ----------------------------------------------------------------------------
280 {
281  CThreeFeatRecord threeFeatRecord;
282  if (!threeFeatRecord.AddFeature(feature)) {
283  return mRecords.end();
284  }
285  mRecords.push_back(threeFeatRecord);
286  return (mRecords.end() - 1);
287 }
288 
289 // ----------------------------------------------------------------------------
291  CScope& scope,
292  CNcbiOstream& ostr,
293  unsigned int colCount,
294  unsigned int uFlags ) :
295 // ----------------------------------------------------------------------------
296  CWriterBase(ostr, uFlags),
297  m_Scope(scope),
298  m_colCount(colCount)
299 {
300  // the first three columns are mandatory
301  if (m_colCount < 3) {
302  m_colCount = 3;
303  }
304 };
305 
306 // ----------------------------------------------------------------------------
308 // ----------------------------------------------------------------------------
309 {
310 };
311 
312 // ----------------------------------------------------------------------------
313 
314 // ----------------------------------------------------------------------------
316  const CSeq_annot& annot,
317  const string&,
318  const string& )
319 // ----------------------------------------------------------------------------
320 {
321  m_colCount = 6;
322  if( annot.CanGetDesc() ) {
323  ITERATE(CAnnot_descr::Tdata, DescIter, annot.GetDesc().Get()) {
324  const CAnnotdesc& desc = **DescIter;
325  if(desc.IsUser()) {
326  if(desc.GetUser().HasField("NCBI_BED_COLUMN_COUNT")) {
327  CConstRef< CUser_field > field = desc.GetUser().GetFieldRef("NCBI_BED_COLUMN_COUNT");
328  if(field && field->CanGetData() && field->GetData().IsInt()) {
329  m_colCount = field->GetData().GetInt();
330  }
331  }
332  }
333  }
334  }
335 
336  CBedTrackRecord track;
337  if ( ! track.Assign(annot) ) {
338  return false;
339  }
340  track.Write(m_Os);
341 
343  bool result = xWriteTrackedAnnot(track, sah);
345  return result;
346 }
347 
348 // ----------------------------------------------------------------------------
350  CSeq_entry_Handle seh,
351  const string& strAssemblyName,
352  const string& strAssemblyAccession )
353 // ----------------------------------------------------------------------------
354 {
355  CBedTrackRecord track;
356 
357  SAnnotSelector sel;
358  for (CAnnot_CI aci(seh, sel); aci; ++aci) {
359  auto sah = *aci;
360  if (track.Assign(*sah.GetCompleteSeq_annot()) ) {
361  track.Write(m_Os);
362  }
363 
364  if (!xWriteTrackedAnnot(track, sah)) {
365  return false;
366  }
367  }
368  return true;
369 }
370 
371 // ----------------------------------------------------------------------------
373  const CBedTrackRecord& track,
374  const CSeq_annot_Handle& sah)
375 // ----------------------------------------------------------------------------
376 {
377  CThreeFeatManager threeFeatManager;
378  bool isThreeFeatData = CWriteUtil::IsThreeFeatFormat(*sah.GetSeq_annotCore());
380  CFeat_CI pMf(sah, sel);
381  feature::CFeatTree featTree(pMf);
382  vector<CMappedFeat> vRoots = featTree.GetRootFeatures();
383  std::sort(vRoots.begin(), vRoots.end(), CWriteUtil::CompareFeatures);
384  for (auto pit = vRoots.begin(); pit != vRoots.end(); ++pit) {
385  CMappedFeat mRoot = *pit;
386  if (isThreeFeatData) {
387  if (!xWriteFeaturesThreeFeatData(threeFeatManager, featTree, *pit)) {
388  return false;
389  }
390  }
391  else {
392  if (!xWriteFeaturesTracked(track, featTree, *pit)) {
393  return false;
394  }
395  }
396  }
397  return true;
398 }
399 
400 // ----------------------------------------------------------------------------
402  CThreeFeatManager& threeFeatManager,
403  feature::CFeatTree& featTree,
404  const CMappedFeat& mf)
405 // ----------------------------------------------------------------------------
406 {
407  CBedFeatureRecord bedRecord;
408 
409  if (IsCanceled()) {
410  NCBI_THROW(
412  eInterrupted,
413  "Processing terminated by user");
414  }
415  const CSeq_feat& feature = mf.GetOriginalFeature();
416  if (!threeFeatManager.AddFeature(feature)) {
417  return false;
418  }
419  if (!threeFeatManager.IsRecordComplete(feature)) {
420  return true;
421  }
422  if (!threeFeatManager.ProcessRecord(feature, bedRecord)) {
423  return true;
424  }
425  if (!bedRecord.Write(m_Os, m_colCount)) {
426  return false;
427  }
428  return xWriteChildrenThreeFeatData(threeFeatManager, featTree, mf);
429 }
430 
431 // ----------------------------------------------------------------------------
433  CThreeFeatManager& threeFeatManager,
434  feature::CFeatTree& featTree,
435  const CMappedFeat& mf)
436  // ----------------------------------------------------------------------------
437 {
438  vector<CMappedFeat> vChildren;
439  featTree.GetChildrenTo(mf, vChildren);
440  for (auto cit = vChildren.begin(); cit != vChildren.end(); ++cit) {
441  CMappedFeat mChild = *cit;
442  if (!xWriteFeaturesThreeFeatData(threeFeatManager, featTree, mChild)) {
443  return false;
444  }
445  if (!xWriteChildrenThreeFeatData(threeFeatManager, featTree, mChild)) {
446  return false;
447  }
448  }
449  return true;
450 }
451 
452 // ----------------------------------------------------------------------------
454  const CBedTrackRecord& track,
455  feature::CFeatTree& featTree,
456  const CMappedFeat& mf)
457  // ----------------------------------------------------------------------------
458 {
459  CBedFeatureRecord record;
460  if (!record.AssignName(mf)) {
461  return false;
462  }
463  if (!record.AssignDisplayData(mf, track.UseScore())) {
464  // feature did not contain display data ---
465  // Is there any alternative way to populate some of the bed columns?
466  // For now, keep going, emit at least the locations ...
467  }
468 
469  CRef<CSeq_loc> pPackedInt(new CSeq_loc(CSeq_loc::e_Mix));
470  pPackedInt->Add(mf.GetLocation());
471  CWriteUtil::ChangeToPackedInt(*pPackedInt);
472 
473  if (!pPackedInt->IsPacked_int() || !pPackedInt->GetPacked_int().CanGet()) {
474  // nothing to do
475  return true;
476  }
477 
478  const list<CRef<CSeq_interval> >& sublocs = pPackedInt->GetPacked_int().Get();
479  list<CRef<CSeq_interval> >::const_iterator it;
480  for (it = sublocs.begin(); it != sublocs.end(); ++it) {
481  if (!record.AssignLocation(m_Scope, **it) || !record.Write(m_Os, m_colCount)) {
482  return false;
483  }
484  }
485  return xWriteChildrenTracked(track, featTree, mf);
486 }
487 
488 // ----------------------------------------------------------------------------
490  const CBedTrackRecord& track,
491  feature::CFeatTree& featTree,
492  const CMappedFeat& mf)
493  // ----------------------------------------------------------------------------
494 {
495  vector<CMappedFeat> vChildren;
496  featTree.GetChildrenTo(mf, vChildren);
497  for (auto cit = vChildren.begin(); cit != vChildren.end(); ++cit) {
498  CMappedFeat mChild = *cit;
499  if (!xWriteFeaturesTracked(track, featTree, mChild)) {
500  return false;
501  }
502  if (!xWriteChildrenTracked(track, featTree, mChild)) {
503  return false;
504  }
505  }
506  return true;
507 }
508 
509 
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
USING_SCOPE(objects)
CAnnot_CI –.
Definition: annot_ci.hpp:59
CAnnotdesc –.
Definition: Annotdesc.hpp:66
Encapsulation of the BED feature record.
bool AssignName(const CMappedFeat &)
bool Write(CNcbiOstream &, unsigned int)
bool AssignLocation(CScope &, const CSeq_interval &)
bool AssignDisplayData(const CMappedFeat &, bool)
Encapsulation of a track line in the BED file format.
bool UseScore() const
bool Assign(const CSeq_annot &)
bool Write(CNcbiOstream &)
CBedWriter(CScope &scope, CNcbiOstream &ostr, unsigned int colCount=12, unsigned int flags=fNormal)
Constructor.
Definition: bed_writer.cpp:290
unsigned int m_colCount
Definition: bed_writer.hpp:196
bool xWriteChildrenThreeFeatData(CThreeFeatManager &, feature::CFeatTree &, const CMappedFeat &)
Definition: bed_writer.cpp:432
bool xWriteFeaturesThreeFeatData(CThreeFeatManager &, feature::CFeatTree &, const CMappedFeat &)
Definition: bed_writer.cpp:401
virtual ~CBedWriter()
Definition: bed_writer.cpp:307
bool xWriteFeaturesTracked(const CBedTrackRecord &, feature::CFeatTree &, const CMappedFeat &)
Definition: bed_writer.cpp:453
bool WriteAnnot(const CSeq_annot &, const string &="", const string &="") override
Write a raw Seq-annot to the internal output stream.
Definition: bed_writer.cpp:315
bool xWriteTrackedAnnot(const CBedTrackRecord &, const CSeq_annot_Handle &)
Definition: bed_writer.cpp:372
bool xWriteChildrenTracked(const CBedTrackRecord &, feature::CFeatTree &, const CMappedFeat &)
Definition: bed_writer.cpp:489
CScope & m_Scope
Definition: bed_writer.hpp:195
bool WriteSeqEntryHandle(CSeq_entry_Handle seh, const string &asmblyName="", const string &asmblyAccession="") override
Write a Seq-entry handle to the internal output stream.
Definition: bed_writer.cpp:349
CFeat_CI –.
Definition: feat_ci.hpp:64
bool IsCanceled() const
Definition: writer.hpp:62
CMappedFeat –.
Definition: mapped_feat.hpp:59
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
CSeq_annot_Handle –.
CSeq_entry_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
bool ProcessRecord(const CSeq_feat &, CBedFeatureRecord &)
Definition: bed_writer.cpp:221
RECORD_IT xAddRecord(const CSeq_feat &)
Definition: bed_writer.cpp:277
RECORD_IT xFindExistingRecord(const CSeq_feat &)
Definition: bed_writer.cpp:256
RECORDS::iterator RECORD_IT
Definition: bed_writer.hpp:79
bool AddFeature(const CSeq_feat &)
Definition: bed_writer.cpp:193
bool GetAnyRecord(CBedFeatureRecord &)
Definition: bed_writer.cpp:239
bool IsRecordComplete(const CSeq_feat &)
Definition: bed_writer.cpp:208
bool AddFeature(const CSeq_feat &)
Definition: bed_writer.cpp:59
bool xAddFound(int)
Definition: bed_writer.cpp:163
bool GetBedFeature(CBedFeatureRecord &) const
Definition: bed_writer.cpp:115
bool IsRecordComplete() const
Definition: bed_writer.cpp:107
CRef< CSeq_feat > mpChrom
Definition: bed_writer.hpp:66
CRef< CSeq_feat > mpThick
Definition: bed_writer.hpp:67
vector< int > mFeatsAll
Definition: bed_writer.hpp:69
CRef< CSeq_feat > mpBlocks
Definition: bed_writer.hpp:68
bool xAddAll(int)
Definition: bed_writer.cpp:180
vector< int > mFeatsFound
Definition: bed_writer.hpp:70
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Definition: User_object.cpp:84
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
static void ChangeToPackedInt(CSeq_loc &loc)
Definition: write_util.cpp:622
static bool CompareFeatures(const CMappedFeat &lhs, const CMappedFeat &rhs)
static bool GetThreeFeatType(const CSeq_feat &, string &)
static bool GetThreeFeatRgb(const CSeq_feat &, string &)
static bool IsThreeFeatFormat(const CSeq_annot &)
static bool GetThreeFeatScore(const CSeq_feat &, int &)
Defines and provides stubs for a general interface to a variety of file formatters.
Definition: writer.hpp:81
virtual SAnnotSelector & SetAnnotSelector(void)
Definition: writer.hpp:246
CNcbiOstream & m_Os
Definition: writer.hpp:267
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
Definition: Seq_loc.cpp:3875
void RemoveSeq_annot(const CSeq_annot_Handle &annot)
Revoke Seq-annot previously added using AddSeq_annot().
Definition: scope.cpp:388
CSeq_annot_Handle AddSeq_annot(CSeq_annot &annot, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add Seq-annot, return its CSeq_annot_Handle.
Definition: scope.cpp:538
CConstRef< CSeq_annot > GetSeq_annotCore(void) const
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TData & GetData(void) const
Get the Data member data.
bool CanGetData(void) const
Check if it is safe to call GetData method.
bool IsInt(void) const
Check if variant Int is selected.
TInt GetInt(void) const
Get the variant data.
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_feat_.hpp:904
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Feat_id_.cpp:134
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
Definition: Seq_feat_.hpp:1296
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Feat_id_.hpp:353
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
Definition: Seq_feat_.hpp:892
const TXref & GetXref(void) const
Get the Xref member data.
Definition: Seq_feat_.hpp:1308
const Tdata & Get(void) const
Get the member data.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
Definition: Seq_loc_.hpp:534
bool CanGet(void) const
Check if it is safe to call Get method.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
const Tdata & Get(void) const
Get the member data.
const TUser & GetUser(void) const
Get the variant data.
Definition: Annotdesc_.cpp:184
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Seq_annot_.hpp:852
bool CanGetDesc(void) const
Check if it is safe to call GetDesc method.
Definition: Seq_annot_.hpp:846
bool IsUser(void) const
Check if variant User is selected.
Definition: Annotdesc_.hpp:561
list< CRef< CAnnotdesc > > Tdata
n background color
constexpr auto sort(_Init &&init)
SAnnotSelector –.
else result
Definition: token2.c:20
#define const
Definition: zconf.h:230
Modified on Thu Dec 07 10:09:11 2023 by modify_doxy.py rev. 669887