NCBI C++ ToolKit
regexplocdemo.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: regexplocdemo.cpp 91966 2020-12-17 12:53:47Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Clifford Clausen
27 *
28 * File Description: Demo for CRegexp to CSeq_loc class (CRegexp_loc)
29 *
30 * ===========================================================================
31 */
32 
33 #include <ncbi_pch.hpp>
35 
36 #include <corelib/ncbiapp.hpp>
37 #include <corelib/ncbiargs.hpp>
38 #include <corelib/ncbienv.hpp>
39 
42 #include <objmgr/scope.hpp>
43 #include <objmgr/seq_vector.hpp>
44 
45 #include <serial/iterator.hpp>
46 #include <serial/objistr.hpp>
47 #include <serial/objostr.hpp>
48 #include <serial/serial.hpp>
49 
52 
54 public:
56  virtual void Init(void);
57  virtual int Run(void);
58 };
59 
61 {
62  unique_ptr<CArgDescriptions> argDescr(new CArgDescriptions);
63  argDescr->AddKey("a", "accession",
64  "GENBANK accession", CArgDescriptions::eString);
65 
66  argDescr->AddKey("r", "PCRE",
67  "Perl Compatible Regular Expression",
69 
70  argDescr->SetUsageContext(GetArguments().GetProgramBasename(),
71  "Displays CSeq_loc for CRegexp\n", false);
72  SetupArgDescriptions(argDescr.release());
73 }
74 
75 // Display CSeq_loc
77 {
78  unique_ptr<CObjectOStream> los(CObjectOStream::Open(eSerial_AsnText, os));
79  *los << loc;
80  return os;
81 };
82 
83 // Gets raw sequence for input accession, matches pattern to it creating
84 // a CSeq_loc, then displays the CSeq_loc
85 int GetLoc(const string& acc, const string &pat, CSeq_loc &loc, CScope &scope)
86 {
87  CRef<CSeq_id> seq_id;
88  try {
89  seq_id.Reset(new CSeq_id(acc));
90  } catch (CSeqIdException& e) {
91  cerr << "Invalid seq-id: '" << acc << "': " << e.what() << endl;
92  return 1;
93  }
94 
95  CBioseq_Handle bioseq_handle = scope.GetBioseqHandle(*seq_id);
96  if (bioseq_handle) {
97  CSeqVector sv =
99 
100  // Get the raw sequence data and display it
101  string seq;
102  seq.reserve(sv.size());
103  sv.GetSeqData(0, sv.size(), seq);
104  cout << "seq=" << seq << endl;
105 
106  // Set pattern
107  CRegexp_loc rl(pat);
108 
109  // Find matches
110  TSeqPos offset = 0;
111  do {
112  // Get match
113  offset = rl.GetLoc(seq.c_str(), &loc, offset) + 1;
114  cout << offset << endl;
115  // Add seq_id to loc so it will display OK
116  for (CTypeIterator<CSeq_interval> it(Begin(loc)); it; ++it) {
117  it->SetId(*seq_id);
118  }
119  // Display loc
120  cout << loc << endl;
121  } while (offset != 0 && offset < seq.size());
122 
123  return 0;
124  } else {
125  cerr << "Bioseq load FAILED." << endl;
126  return 2;
127  }
128 }
129 
131 {
132  const CArgs& args = GetArgs();
135  CScope scope(*objMgr);
136  scope.AddDefaults();
137  int retCode = 0;
138 
139  CSeq_loc loc;
140  retCode = GetLoc(args["a"].AsString(),
141  args["r"].AsString(),
142  loc,
143  scope);
144  return retCode;
145 }
146 
147 int main(int argc, char** argv)
148 {
149  CRegexpLocApp theApp;
150  return theApp.AppMain(argc, argv, NULL, eDS_Default, 0);
151 }
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
CBioseq_Handle –.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
virtual void Init(void)
Initialize the application.
virtual int Run(void)
Run the application.
Class used to convert a PCRE match to a char* sequence into a CSeq_loc.
Definition: regexp_loc.hpp:55
TSeqPos GetLoc(const char *seq, CSeq_loc *loc, TSeqPos offset=0, CRegexp::TMatch flags=0)
Gets a CSeq_loc for PCRE match to char* sequence.
Definition: regexp_loc.cpp:53
CScope –.
Definition: scope.hpp:92
CSeqIdException –.
Definition: Seq_id.hpp:1001
CSeqVector –.
Definition: seq_vector.hpp:65
Template class for iteration on objects of class C.
Definition: iterator.hpp:673
int offset
Definition: replacements.h:160
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:832
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1208
void DisableArgDescriptions(TDisableArgDesc disable=fDisableStdArgs)
Definition: ncbiapp.cpp:1319
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
#define NULL
Definition: ncbistd.hpp:225
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
Definition: ncbidiag.hpp:1790
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
Definition: iterator.hpp:1004
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
Definition: objostr.cpp:126
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
The Object manager core.
header file for creating CSeq_locs from CRegexps.
USING_SCOPE(objects)
int main(int argc, char **argv)
int GetLoc(const string &acc, const string &pat, CSeq_loc &loc, CScope &scope)
CNcbiOstream & operator<<(CNcbiOstream &os, const CSeq_loc &loc)
USING_NCBI_SCOPE
Modified on Fri Sep 20 14:57:02 2024 by modify_doxy.py rev. 669887