NCBI C++ ToolKit
feattree_sample.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: feattree_sample.cpp 90014 2020-05-04 17:30:22Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7 
8  * This software/database is a "United States Government Work" under the
9  * terms of the United States Copyright Act. It was written as part of
10  * the author's official duties as a United States Government employee and
11  * thus cannot be copyrighted. This software/database is freely available
12  * to the public for use. The National Library of Medicine and the U.S.
13  * Government have not placed any restriction on its use or reproduction.
14  *
15  * Although all reasonable efforts have been taken to ensure the accuracy
16  * and reliability of the software and data, the NLM and the U.S.
17  * Government do not and cannot warrant the performance or results that
18  * may be obtained by using this software or data. The NLM and the U.S.
19  * Government disclaim all warranties, express or implied, including
20  * warranties of performance, merchantability or fitness for any particular
21  * purpose.
22  *
23  * Please cite the author in any work or product based on this material.
24  *
25  * ===========================================================================
26  *
27  * Authors: David McElhany
28  *
29  * File Description:
30  * Demonstrate using the CFeatTree class. Specifically: create the feature
31  * tree, then iterate over the tree mapping each feature to the best gene for
32  * the feature. In this demo, the output is simple information about the
33  * selected Bioseq, the features and genes found, and the size of the map.
34  * In real applications, something more useful would be done with the map.
35  * The point of this demo is just to show the mechanics of using the feature
36  * tree.
37  *
38  */
39 
40 #include <ncbi_pch.hpp>
41 
42 #include <corelib/ncbi_param.hpp>
43 #include <corelib/ncbiapp.hpp>
44 #include <corelib/ncbiargs.hpp>
45 #include <corelib/ncbimisc.hpp>
46 #include <corelib/ncbistr.hpp>
47 #include <corelib/ncbistre.hpp>
48 
53 
54 #include <objmgr/bioseq_handle.hpp>
55 #include <objmgr/feat_ci.hpp>
57 #include <objmgr/scope.hpp>
59 #include <objmgr/util/feature.hpp>
60 #include <objmgr/util/sequence.hpp>
61 
63 
64 #include <serial/objistr.hpp>
65 
66 
69 USING_SCOPE(ncbi::objects::sequence);
70 
71 
72 /////////////////////////////////////////////////////////////////////////////
73 // CFeatTreeSampleApp::
74 
76 {
77 private:
78  virtual void Init(void);
79  virtual int Run(void);
80  virtual void Exit(void);
81 
82  // Helper function to get a unique string for a Seq-feat (the ASN.1 data).
83  static string FeatString(CConstRef<CSeq_feat> seq_feat)
84  {
85  CNcbiOstrstream oss;
86  oss << MSerial_AsnText << *seq_feat;
87  return CNcbiOstrstreamToString(oss);
88  }
89 
93 };
94 
95 
96 /////////////////////////////////////////////////////////////////////////////
97 // Initialize configuration parameters
98 
100 {
101  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
102 
103  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
104  "Demonstrate CFeatTree class.");
105 
106  arg_desc->AddDefaultKey("gi", "gi",
107  "The gi for the Bioseq of interest, e.g. 455025.",
108  CArgDescriptions::eIntId, "455025");
109 
110  arg_desc->AddDefaultKey("from", "from",
111  "The starting position of the range.",
113 
114  arg_desc->AddDefaultKey("to", "to",
115  "The ending position of the range.",
117 
118  // Setup arg.descriptions for this application.
119  SetupArgDescriptions(arg_desc.release());
120 
121  // Get the arguments.
122  const CArgs& args = GetArgs();
123 
124  // Get configuration.
125  m_gi = GI_FROM(TIntId, args["gi"].AsIntId());
126  m_range_from = args["from"].AsInteger();
127  m_range_to = args["to"].AsInteger();
128 }
129 
130 
131 /////////////////////////////////////////////////////////////////////////////
132 // Run the application
133 
135 {
136  // Get access to the object manager.
138 
139  // Register the GenBank data loader with the OM.
141 
142  // Create a new scope and add default loaders (GB loader).
143  CScope scope(*object_manager);
144  scope.AddDefaults();
145 
146  // Create a Seq-id and set it to the specified GI.
147  CSeq_id seq_id;
148  seq_id.SetGi(m_gi);
149 
150  // Get a Bioseq handle for the Seq-id.
151  CBioseq_Handle bioseq_handle = scope.GetBioseqHandle(seq_id);
152  if (bioseq_handle.GetState()) {
153  // print blob state:
154  cout << "Bioseq state: 0x"
155  << hex << bioseq_handle.GetState()
156  << dec << endl;
157  return 1;
158  }
160  cout << "Title: " << gen.GenerateDefline(bioseq_handle) << endl;
161 
162  // Construct the Seq-loc to get features for.
163  CSeq_loc seq_loc;
164  if (m_range_from == 0 && m_range_to == 0) {
165  seq_loc.SetWhole().SetGi(m_gi);
166  cout << "Searching whole bioseq, gi|" << m_gi << " length = "
167  << bioseq_handle.GetBioseqLength() << endl;
168  } else {
169  seq_loc.SetInt().SetId(seq_id);
170  seq_loc.SetInt().SetFrom(m_range_from);
171  seq_loc.SetInt().SetTo(m_range_to);
172  cout << "Searching bioseq interval, gi|" << m_gi << ":"
173  << seq_loc.GetInt().GetFrom() << "-"
174  << seq_loc.GetInt().GetTo() << endl;
175  }
176 
177  // Make a selector to limit features to those of interest.
178  SAnnotSelector sel;
179  sel.SetResolveAll();
180  sel.SetAdaptiveDepth(true);
181 
182  // Exclude SNP's and STS's since they won't add anything interesting
183  // but could significantly degrade performance.
184  sel.ExcludeNamedAnnots("SNP");
185  sel.ExcludeNamedAnnots("STS");
186 
187  // Use a CFeat_CI iterator to iterate through all selected features.
188  CFeat_CI feat_it(CFeat_CI(scope, seq_loc, sel));
189  cout << feat_it.GetSize() << " features found." << endl;
190 
191  // Create the feature tree and add to it the features found by the
192  // feature iterator.
193  feature::CFeatTree feat_tree;
194  feat_tree.AddFeatures(feat_it);
195 
196  // Create some data structures to map features to genes
197  // and track unique genes found.
198  map< string, string > feattree_map;
199  set< string, less<string> > gene_set;
200 
201  // Find the best gene for each feature using CFeatTree.
202  // Loop through all the features, mapping each feature to the best
203  // related gene, if any.
205  for (feat_it.Rewind(); feat_it; ++feat_it) {
206 
207  // Get the underlying Seq-feat.
208  CConstRef<CSeq_feat> seq_feat_ref(feat_it->GetSeq_feat());
209 
210  // Get a unique string representation of this feature.
211  string feat_str(FeatString(seq_feat_ref));
212 
213  // Add this feature and its best gene to the feature map.
214  // First, find the best gene.
215 
216  // Is this feature itself a gene?
217  if (seq_feat_ref->GetData().GetSubtype() ==
219  gene.Reset(seq_feat_ref);
220  } else {
221  // Does this feature have a parent that's a gene?
222  CMappedFeat parent(feat_tree.GetParent(*feat_it,
224  if (parent) {
225  gene.Reset(parent.GetSeq_feat());
226  } else {
227  // No gene found for this feature.
228  continue;
229  }
230  }
231 
232  // Get a unique string representation for the best gene.
233  string gene_str(FeatString(gene));
234 
235  // Now map this feature to its best gene.
236  feattree_map[feat_str] = gene_str;
237  gene_set.insert(gene_str);
238  }
239 
240  // For this demo, just print the size of the map and number of genes.
241  // For real applications, do something interesting with the results.
242  cout << feattree_map.size() << " features mapped to genes." << endl;
243  cout << gene_set.size() << " unique genes found." << endl;
244 
245  return 0;
246 }
247 
248 
249 /////////////////////////////////////////////////////////////////////////////
250 // Cleanup
251 
253 {
254  // Do your after-Run() cleanup here
255 }
256 
257 
258 /////////////////////////////////////////////////////////////////////////////
259 // MAIN
260 
261 int NcbiSys_main(int argc, ncbi::TXChar* argv[])
262 {
263  // Execute main application function
264  return CFeatTreeSampleApp().AppMain(argc, argv);
265 }
size_t GetSize(void) const
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
CBioseq_Handle –.
Class for computing sequences' titles ("definitions").
virtual void Exit(void)
Cleanup on application exit.
static string FeatString(CConstRef< CSeq_feat > seq_feat)
virtual void Init(void)
Initialize the application.
virtual int Run(void)
Run the application.
CFeat_CI –.
Definition: feat_ci.hpp:64
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
CMappedFeat –.
Definition: mapped_feat.hpp:59
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
size_type size() const
Definition: map.hpp:148
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
size_type size() const
Definition: set.hpp:132
The NCBI C++ standard methods for dealing with std::string.
API (CDeflineGenerator) for computing sequences' titles ("definitions").
USING_SCOPE(ncbi::objects)
int NcbiSys_main(int argc, ncbi::TXChar *argv[])
USING_NCBI_SCOPE
#define GI_FROM(T, value)
Definition: ncbimisc.hpp:1086
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
Int8 TIntId
Definition: ncbimisc.hpp:999
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eIntId
Convertible to TIntId (int or Int8 depending on NCBI_INT8_GI)
Definition: ncbiargs.hpp:593
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
TBioseqStateFlags GetState(void) const
Get state of the bioseq.
TSeqPos GetBioseqLength(void) const
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
SAnnotSelector & ExcludeNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to exclude.
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
void Rewind(void)
Definition: feat_ci.hpp:239
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
char TXChar
Definition: ncbistr.hpp:172
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
TFrom GetFrom(void) const
Get the From member data.
TGi & SetGi(void)
Select the variant.
Definition: Seq_id_.hpp:896
TTo GetTo(void) const
Get the To member data.
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
static void hex(unsigned char c)
Definition: mdb_dump.c:56
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Miscellaneous common-use basic types and functionality.
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
The Object manager core.
SAnnotSelector –.
Modified on Wed Apr 17 13:08:48 2024 by modify_doxy.py rev. 669887