NCBI C++ ToolKit
bed_feature_record.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bed_feature_record.cpp 93574 2021-04-30 16:19:19Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Frank Ludwig
27  *
28  * File Description: Write bed file
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
35 
41 
42 #include <objmgr/mapped_feat.hpp>
43 #include <objmgr/util/sequence.hpp>
44 
48 
51 
52 #define BUMP( x, y ) if ( x < (y) ) x = (y)
53 
54 // ----------------------------------------------------------------------------
56 // ----------------------------------------------------------------------------
57  m_uColumnCount( 0 ),
58  m_strChrom( "." ),
59  m_strChromStart( "." ),
60  m_strChromEnd( "." ),
61  m_strName( "." ),
62  m_strScore( "0" ),
63  m_strStrand( "." ),
64  m_strThickStart( "." ),
65  m_strThickEnd( "." ),
66  m_strItemRgb( "." ),
67  m_strBlockCount( "." ),
68  m_strBlockSizes( "." ),
69  m_strBlockStarts( "." )
70 {
71 }
72 
73 // ----------------------------------------------------------------------------
75 // ----------------------------------------------------------------------------
76 {
77 }
78 
79 // ----------------------------------------------------------------------------
80 const CGene_ref&
82  const CMappedFeat& mf)
83 // ----------------------------------------------------------------------------
84 {
85  static const CGene_ref noRef;
86  if (mf.GetData().IsGene()) {
87  return mf.GetData().GetGene();
88  }
89  if (mf.IsSetXref()) {
90  const auto& xrefs = mf.GetXref();
91  for (auto it = xrefs.begin(); it != xrefs.end(); ++it) {
92  const auto& xref = **it;
93  if (xref.CanGetData() && xref.GetData().IsGene()) {
94  return xref.GetData().GetGene();
95  }
96  }
97  }
99  if (gene && gene.IsSetData() && gene.GetData().IsGene()) {
100  return gene.GetData().GetGene();
101  }
102  return noRef;
103 }
104 
105 
106 // ----------------------------------------------------------------------------
108  const CMappedFeat& mf)
109 // ----------------------------------------------------------------------------
110 {
111  //auto featType = mf.GetFeatSubtype();
112  //if (featType == CSeqFeatData::eSubtype_gene) {
113  // cerr << "";
114  //}
115  if (mf.GetData().IsRegion()) {
116  m_strName = mf.GetData().GetRegion();
117  std::transform(m_strName.begin(), m_strName.end(), m_strName.begin(),
118  [](char c) -> char { return std::isspace(c) ? '_' : c; });
119  return true;
120  }
121  const auto& geneRef = sGetClosestGeneRef(mf);
122  if (geneRef.IsSetLocus()) {
123  m_strName = geneRef.GetLocus();
124  return true;
125  }
126  if (geneRef.IsSetLocus_tag()) {
127  m_strName = geneRef.GetLocus_tag();
128  return true;
129  }
130 
131  // try harder only if the given feature itself is a gene:
132  if (!mf.GetData().IsGene()) {
133  return true;
134  }
135  if (geneRef.IsSetDesc()) {
136  m_strName = geneRef.GetDesc();
137  }
138  if (geneRef.IsSetSyn()) {
139  m_strName = geneRef.GetSyn().front();
140  return true;
141  }
142  return true;
143 }
144 
145 // ----------------------------------------------------------------------------
147  const CMappedFeat& mf,
148  bool bUseScore )
149 // ----------------------------------------------------------------------------
150 {
151  if ( ! mf.GetData().IsUser() ) {
152  return false;
153  }
154  const CUser_object& uo = mf.GetData().GetUser();
155  if ( ! uo.IsSetType() || ! uo.GetType().IsStr() ||
156  uo.GetType().GetStr() != "Display Data" ) {
157  return false;
158  }
159  const vector< CRef< CUser_field > >& fields = uo.GetData();
160  vector< CRef< CUser_field > >::const_iterator it = fields.begin();
161  for ( ; it != fields.end(); ++it ) {
162  if ( ! (*it)->CanGetLabel() || ! (*it)->GetLabel().IsStr() ) {
163  continue;
164  }
165  string strLabel = (*it)->GetLabel().GetStr();
166  if ( strLabel == "name" ) {
167  BUMP( m_uColumnCount, 4 );
168  if ( (*it)->IsSetData() && (*it)->GetData().IsStr() ) {
169  m_strName = (*it)->GetData().GetStr();
170  }
171  continue;
172  }
173  if ( strLabel == "score" && ! bUseScore ) {
174  BUMP( m_uColumnCount, 5 );
175  if ( (*it)->IsSetData() && (*it)->GetData().IsInt() ) {
176  m_strScore = NStr::UIntToString( (*it)->GetData().GetInt() );
177  }
178  continue;
179  }
180  if ( strLabel == "greylevel" && bUseScore ) {
181  BUMP( m_uColumnCount, 5 );
182  if ( (*it)->IsSetData() && (*it)->GetData().IsInt() ) {
183  m_strScore = NStr::UIntToString( (*it)->GetData().GetInt() );
184  }
185  continue;
186  }
187  if ( strLabel == "thickStart" ) {
188  BUMP( m_uColumnCount, 7 );
189  if ( (*it)->IsSetData() && (*it)->GetData().IsInt() ) {
190  m_strThickStart = NStr::UIntToString( (*it)->GetData().GetInt());
191  }
192  continue;
193  }
194  if ( strLabel == "thickEnd" ) {
195  BUMP( m_uColumnCount, 8 );
196  if ( (*it)->IsSetData() && (*it)->GetData().IsInt() ) {
197  m_strThickEnd = NStr::UIntToString( (*it)->GetData().GetInt()+1);
198  }
199  continue;
200  }
201  if ( strLabel == "itemRGB" ) {
202  BUMP( m_uColumnCount, 9 );
203  if ( (*it)->IsSetData() && (*it)->GetData().IsInt() ) {
204  m_strItemRgb = NStr::UIntToString( (*it)->GetData().GetInt() );
205  continue;
206  }
207  if ( (*it)->IsSetData() && (*it)->GetData().IsStr() ) {
208  m_strItemRgb = (*it)->GetData().GetStr();
209  continue;
210  }
211  continue;
212  }
213  if ( strLabel == "blockCount" ) {
214  BUMP( m_uColumnCount, 10 );
215  if ( (*it)->IsSetData() && (*it)->GetData().IsInt() ) {
216  m_strBlockCount = NStr::UIntToString( (*it)->GetData().GetInt() );
217  }
218  continue;
219  }
220  if ( strLabel == "blockSizes" ) {
221  BUMP( m_uColumnCount, 11 );
222  if ( (*it)->IsSetData() && (*it)->GetData().IsStr() ) {
223  m_strBlockSizes = (*it)->GetData().GetStr();
224  }
225  continue;
226  }
227  if ( strLabel == "blockStarts" ) {
228  BUMP( m_uColumnCount, 12 );
229  if ( (*it)->IsSetData() && (*it)->GetData().IsStr() ) {
230  m_strBlockStarts = (*it)->GetData().GetStr();
231  }
232  continue;
233  }
234  }
235  return true;
236 }
237 
238 // ----------------------------------------------------------------------------
240  CScope& scope,
241  const CSeq_interval& interval )
242 // ----------------------------------------------------------------------------
243 {
244  if ( interval.CanGetId() ) {
245  m_strChrom = interval.GetId().GetSeqIdString(true);
246  try {
247  string bestId;
249  CSeq_id_Handle::GetHandle(interval.GetId()), scope, bestId);
250  m_strChrom = bestId;
251  // its OK for this to silently fail.
252  // Seq-ids that dont look up are just meant to be output directly
253  } catch(...) { ; }
254  }
255  if ( interval.IsSetFrom() ) {
257  }
258  if ( interval.IsSetTo() ) {
259  m_strChromEnd = NStr::UIntToString( interval.GetTo() + 1 );
260  }
261  BUMP( m_uColumnCount, 3 );
262  m_strStrand = "+";
263  if (interval.IsSetStrand()) {
264  ENa_strand strand = interval.GetStrand();
265  if (strand == eNa_strand_minus) {
266  m_strStrand = "-";
267  }
268  }
269  return true;
270 }
271 
272 // ----------------------------------------------------------------------------
274  CNcbiOstream& ostr,
275  unsigned int columnCount)
276 // ----------------------------------------------------------------------------
277 {
278  ostr << Chrom();
279  ostr << "\t" << ChromStart();
280  ostr << "\t" << ChromEnd();
281  if ( columnCount >= 4 ) {
282  ostr << "\t" << Name();
283  }
284  if ( columnCount >= 5 ) {
285  ostr << "\t" << Score();
286  }
287  if ( columnCount >= 6 ) {
288  ostr << "\t" << Strand();
289  }
290  if ( columnCount >= 7 ) {
291  ostr << "\t" << ThickStart();
292  }
293  if ( columnCount >= 8 ) {
294  ostr << "\t" << ThickEnd();
295  }
296  if ( columnCount >= 9 ) {
297  ostr << "\t" << ItemRgb();
298  }
299  if ( columnCount >= 10 ) {
300  ostr << "\t" << BlockCount();
301  }
302  if ( columnCount >= 11 ) {
303  ostr << "\t" << BlockSizes();
304  }
305  if ( columnCount >= 12 ) {
306  ostr << "\t" << BlockStarts();
307  }
308  ostr << '\n';
309  return true;
310 }
311 
312 
313 // ----------------------------------------------------------------------------
315  const CSeq_loc& loc)
316 // ----------------------------------------------------------------------------
317 {
318  string bestId;
319  if (!CGenbankIdResolve::Get().GetBestId(loc, bestId)) {
320  return false;
321  }
322  if (loc.IsInt()) {
323  const CSeq_interval& chromInt = loc.GetInt();
324  m_strChrom = bestId;
326  m_strChromEnd = NStr::IntToString(chromInt.GetTo()+1);
327  m_strStrand = "+";
328  if (chromInt.IsSetStrand()) {
329  ENa_strand strand = chromInt.GetStrand();
330  if (strand == eNa_strand_minus) {
331  m_strStrand = "-";
332  }
333  }
334  return true;
335  }
336 
337  if (loc.IsPnt()) {
338  const CSeq_point& chromPt = loc.GetPnt();
339  m_strChrom = bestId;
342  m_strStrand = "+";
343  if (chromPt.IsSetStrand()) {
344  ENa_strand strand = chromPt.GetStrand();
345  if (strand == eNa_strand_minus) {
346  m_strStrand = "-";
347  }
348  }
349  return true;
350  }
351  return false;
352 }
353 
354 // -----------------------------------------------------------------------------
356  const CSeqFeatData& data)
357 // -----------------------------------------------------------------------------
358 {
359  if (data.IsRegion()) {
360  m_strName = data.GetRegion();
361  return true;
362  }
363  if (data.IsGene()) {
364  }
365  return false;
366 }
367 
368 // -----------------------------------------------------------------------------
370  int score)
371 // -----------------------------------------------------------------------------
372 {
373  m_strScore = NStr::IntToString(score);
374  return true;
375 }
376 
377 // ----------------------------------------------------------------------------
379  const CSeq_loc& loc)
380 // ----------------------------------------------------------------------------
381 {
382  if (loc.IsInt()) {
383  const CSeq_interval& thickInt = loc.GetInt();
385  m_strThickEnd = NStr::IntToString(thickInt.GetTo()+1);
386  return true;
387  }
388  if (loc.IsPnt()) {
389  const CSeq_point& thickPoint = loc.GetPnt();
391  m_strThickEnd = NStr::IntToString(thickPoint.GetPoint()+1);
392  return true;
393  }
394  return false;
395 }
396 
397 // -----------------------------------------------------------------------------
399  const CSeq_loc& chrom,
400  const CSeq_loc& blocks)
401 // -----------------------------------------------------------------------------
402 {
403  if (blocks.IsInt()) {
404  return true;
405  }
406  if (blocks.IsPacked_int()) {
407  const list<CRef<CSeq_interval> >& intervals = blocks.GetPacked_int().Get();
408  ENa_strand strand = blocks.GetStrand();
409  int offset = chrom.GetStart(eExtreme_Positional);
410  list<string> blockStarts;
411  list<string> blockSizes;
412 
413  for (auto pInterval: intervals) {
414  const CSeq_interval& interval = *pInterval;
415  if (strand == eNa_strand_minus) {
416  blockStarts.push_front(NStr::NumericToString(interval.GetFrom()-offset));
417  blockSizes.push_front(NStr::NumericToString(interval.GetLength()));
418  }
419  else {
420  blockStarts.push_back(NStr::NumericToString(interval.GetFrom()-offset));
421  blockSizes.push_back(NStr::NumericToString(interval.GetLength()));
422  }
423  }
424  m_strBlockCount = NStr::NumericToString(intervals.size());
425  m_strBlockStarts = NStr::Join(blockStarts, ",");
426  m_strBlockSizes = NStr::Join(blockSizes, ",");
427  return true;
428  }
429  return false;
430 }
431 
432 // -----------------------------------------------------------------------------
434  const CSeq_loc& loc)
435 // -----------------------------------------------------------------------------
436 {
437  if (loc.IsInt()) {
438  const CSeq_interval& chromInt = loc.GetInt();
441  return true;
442  }
443 
444  if (loc.IsPnt()) {
445  const CSeq_point& chromPt = loc.GetPnt();
448  return true;
449  }
450  return false;
451 }
452 
453 // -----------------------------------------------------------------------------
455  const string& color)
456 // -----------------------------------------------------------------------------
457 {
458  if (color == "0 0 0") {
459  m_strItemRgb = "0";
460  return true;
461  }
462  vector<string> rgb;
463  NStr::Split(color, " ", rgb);
464  m_strItemRgb = NStr::Join(rgb, ",");
465  return true;
466 }
467 
468 // -----------------------------------------------------------------------------
469 
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
const CGene_ref & sGetClosestGeneRef(const CMappedFeat &mf)
USING_SCOPE(objects)
#define BUMP(x, y)
void transform(Container &c, UnaryFunction *op)
Definition: chainer.hpp:86
bool AssignName(const CMappedFeat &)
bool SetThick(const CSeq_loc &)
const string & BlockCount() const
bool SetLocation(const CSeq_loc &)
const string & Chrom() const
bool SetBlocks(const CSeq_loc &, const CSeq_loc &)
const string & ChromEnd() const
bool SetName(const CSeqFeatData &)
const string & ItemRgb() const
const string & ChromStart() const
bool Write(CNcbiOstream &, unsigned int)
const string & Strand() const
bool AssignLocation(CScope &, const CSeq_interval &)
bool AssignDisplayData(const CMappedFeat &, bool)
const string & BlockStarts() const
const string & BlockSizes() const
bool SetRgb(const string &)
const string & ThickEnd() const
const string & Name() const
const string & Score() const
const string & ThickStart() const
bool SetNoThick(const CSeq_loc &)
static CGenbankIdResolve & Get()
bool GetBestId(CSeq_id_Handle, CScope &, string &)
CMappedFeat –.
Definition: mapped_feat.hpp:59
CScope –.
Definition: scope.hpp:92
TSeqPos GetLength(void) const
int offset
Definition: replacements.h:160
char data[12]
Definition: iconv.c:80
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
CMappedFeat GetBestGeneForFeat(const CMappedFeat &feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0, CFeatTree::EBestGeneType lookup_type=CFeatTree::eBestGene_TreeOnly)
Definition: feature.cpp:3443
const CSeq_feat::TXref & GetXref(void) const
const CSeqFeatData & GetData(void) const
bool IsSetXref(void) const
bool IsSetData(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2699
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5103
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TData & GetData(void) const
Get the Data member data.
const TType & GetType(void) const
Get the Type member data.
bool IsUser(void) const
Check if variant User is selected.
const TRegion & GetRegion(void) const
Get the variant data.
bool IsGene(void) const
Check if variant Gene is selected.
const TUser & GetUser(void) const
Get the variant data.
const TGene & GetGene(void) const
Get the variant data.
bool IsRegion(void) const
Check if variant Region is selected.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const TId & GetId(void) const
Get the Id member data.
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
TFrom GetFrom(void) const
Get the From member data.
TStrand GetStrand(void) const
Get the Strand member data.
Definition: Seq_point_.hpp:350
bool IsSetTo(void) const
Check if a value has been assigned to To data member.
bool CanGetId(void) const
Check if it is safe to call GetId method.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
Definition: Seq_point_.hpp:331
TStrand GetStrand(void) const
Get the Strand member data.
TTo GetTo(void) const
Get the To member data.
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
bool IsPnt(void) const
Check if variant Pnt is selected.
Definition: Seq_loc_.hpp:540
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
n background color
CConstRef< CSeq_id > GetBestId(const CBioseq &bioseq)
static DP_BlockInfo * blocks
Modified on Fri Sep 20 14:58:29 2024 by modify_doxy.py rev. 669887