NCBI C++ ToolKit
process_defline.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: process_defline.hpp 91317 2020-10-08 18:29:47Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 
34 
35 #ifndef __process_defline__hpp__
36 #define __process_defline__hpp__
37 
38 // ============================================================================
40 // ============================================================================
41  : public CScopedProcess
42 {
43 public:
44  // ------------------------------------------------------------------------
46  // ------------------------------------------------------------------------
47  : CScopedProcess()
48  , m_out (0)
49  , m_flags (0)
56  {};
57 
58  // ------------------------------------------------------------------------
59  CDeflineProcess(bool use_indexing)
60  // ------------------------------------------------------------------------
61  : CScopedProcess()
62  , m_out (0)
63  , m_flags (0)
68  , m_do_indexed (use_indexing)
70  {};
71 
72  // ------------------------------------------------------------------------
73  CDeflineProcess(bool use_indexing, bool gpipe_mode)
74  // ------------------------------------------------------------------------
75  : CScopedProcess()
76  , m_out (0)
77  , m_flags (0)
82  , m_do_indexed (use_indexing)
83  , m_gpipe_mode (gpipe_mode)
84  {};
85 
86  // ------------------------------------------------------------------------
88  // ------------------------------------------------------------------------
89  {
90  };
91 
92  // ------------------------------------------------------------------------
94  const CArgs& args )
95  // ------------------------------------------------------------------------
96  {
98 
99  m_out = args["o"] ? &(args["o"].AsOutputFile()) : &cout;
100 
101  string options = args["options"].AsString();
102  if ( options == "ignore_existing" ) {
104  }
105  if ( options == "show_modifiers" ) {
107  }
108  if (m_gpipe_mode) {
111  }
112 
113  string skip = args["skip"].AsString();
114  if ( NStr::Find(skip, "virtual") != NPOS) {
115  m_skip_virtual = true;
116  }
117  if ( NStr::Find(skip, "segmented") != NPOS) {
118  m_skip_segmented = true;
119  }
120  if ( NStr::Find(skip, "nucleotide") != NPOS) {
121  m_skip_nucleotide = true;
122  }
123  if ( NStr::Find(skip, "protein") != NPOS) {
124  m_skip_protein = true;
125  }
126  /*
127  if ( skip == "virtual" ) {
128  m_skip_virtual = true;
129  }
130  if ( skip == "segmented" ) {
131  m_skip_segmented = true;
132  }
133  if ( skip == "both" ) {
134  m_skip_virtual = true;
135  m_skip_segmented = true;
136  }
137  */
138 
139  m_debug = args["debug"];
140 
141  m_timer = CStopWatch();
142  m_timer.Start();
143  };
144 
145  // ------------------------------------------------------------------------
147  // ------------------------------------------------------------------------
148  {
149  }
150 
151  // ------------------------------------------------------------------------
152  virtual void SeqEntryInitialize(
153  CRef<CSeq_entry>& se )
154  // ------------------------------------------------------------------------
155  {
157  };
158 
159  // ------------------------------------------------------------------------
160  void x_FastaSeqIdWrite(const CBioseq& bioseq)
161  // ------------------------------------------------------------------------
162  {
163  string gi_string;
164  string accn_string;
165 
166  FOR_EACH_SEQID_ON_BIOSEQ (sid_itr, bioseq) {
167  const CSeq_id& sid = **sid_itr;
168  TSEQID_CHOICE chs = sid.Which();
169  switch (chs) {
170  case NCBI_SEQID(Gi):
171  {
172  const string str = sid.AsFastaString();
173  gi_string = str;
174  break;
175  }
176  default:
177  break;
178  }
179  }
180 
181  FOR_EACH_SEQID_ON_BIOSEQ (sid_itr, bioseq) {
182  const CSeq_id& sid = **sid_itr;
183  TSEQID_CHOICE chs = sid.Which();
184  switch (chs) {
185  case NCBI_SEQID(Other):
186  case NCBI_SEQID(Genbank):
187  case NCBI_SEQID(Embl):
188  case NCBI_SEQID(Ddbj):
189  case NCBI_SEQID(Tpg):
190  case NCBI_SEQID(Tpe):
191  case NCBI_SEQID(Tpd):
192  {
193  const string str = sid.AsFastaString();
194  accn_string = str;
195  break;
196  }
197  default:
198  break;
199  }
200  }
201 
202  if (gi_string.empty() || accn_string.empty()) {
203  CSeq_id::WriteAsFasta (*m_out, bioseq);
204  } else {
205  *m_out << gi_string << "|" << accn_string;
206  }
207  }
208 
209  // ------------------------------------------------------------------------
211  // ------------------------------------------------------------------------
212  {
213  try {
215 
216  bool has_segset = false;
217  if (m_skip_segmented) {
219  const CBioseq_set& bss = *bst;
220  if (bss.IsSetClass()) {
221  CBioseq_set::EClass mclass = bss.GetClass();
222  if (mclass == CBioseq_set::eClass_segset ||
223  mclass == CBioseq_set::eClass_parts) {
224  has_segset = true;
225  }
226  }
227  }
228  }
229 
231  const CBioseq& bioseq = *bit;
232 
233  bool okay = true;
234  if (m_skip_virtual) {
235  if (bioseq.IsSetInst()) {
236  const CSeq_inst& inst = bioseq.GetInst();
237  if (inst.IsSetRepr()) {
238  TSEQ_REPR repr = inst.GetRepr();
239  if (repr == CSeq_inst::eRepr_virtual) {
240  okay = false;
241  }
242  }
243  }
244  }
245  if (m_skip_segmented) {
246  if (has_segset) {
247  okay = false;
248  }
249  if (bioseq.IsSetInst()) {
250  const CSeq_inst& inst = bioseq.GetInst();
251  if (inst.IsSetRepr()) {
252  TSEQ_REPR repr = inst.GetRepr();
253  if (repr == CSeq_inst::eRepr_seg) {
254  okay = false;
255  }
256  }
257  }
258  /*
259  CSeq_entry* se;
260  se = bioseq.GetParentEntry();
261  while (se) {
262  if (se->IsSet()) {
263  const CBioseq_set& seqset = se->GetSet();
264  if (seqset.IsSetClass()) {
265  CBioseq_set::EClass mclass = seqset.GetClass();
266  if (mclass == CBioseq_set::eClass_segset ||
267  mclass == CBioseq_set::eClass_parts) {
268  okay = false;
269  }
270  }
271  }
272  se = se->GetParentEntry();
273  }
274  */
275  }
276  if (m_skip_nucleotide) {
277  if (bioseq.IsSetInst()) {
278  const CSeq_inst& inst = bioseq.GetInst();
279  if (inst.IsSetMol()) {
280  TSEQ_MOL mol = inst.GetMol();
281  if (mol == CSeq_inst::eMol_dna ||
282  mol == CSeq_inst::eMol_rna ||
283  mol == CSeq_inst::eMol_na) {
284  okay = false;
285  }
286  }
287  }
288  }
289  if (m_skip_protein) {
290  if (bioseq.IsSetInst()) {
291  const CSeq_inst& inst = bioseq.GetInst();
292  if (inst.IsSetMol()) {
293  TSEQ_MOL mol = inst.GetMol();
294  if (mol == CSeq_inst::eMol_aa) {
295  okay = false;
296  }
297  }
298  }
299  }
300 
301  if (okay) {
302  const string& title = gen.GenerateDefline (bioseq, *m_scope, m_flags);
303 
304  *m_out << ">";
305  x_FastaSeqIdWrite (bioseq);
306  *m_out << " ";
307  *m_out << title << endl;
308  ++m_objectcount;
309  }
310  }
311  }
312  catch (CException& e) {
313  ERR_POST(Error << "error processing seqentry: " << e.what());
314  }
315  };
316 
317  // ------------------------------------------------------------------------
319  // ------------------------------------------------------------------------
320  {
321  try {
323 
325  const CBioseq& bioseq = *bit;
326 
327  bool okay = true;
328  if (m_skip_virtual) {
329  if (bioseq.IsSetInst()) {
330  const CSeq_inst& inst = bioseq.GetInst();
331  if (inst.IsSetRepr()) {
332  TSEQ_REPR repr = inst.GetRepr();
333  if (repr == CSeq_inst::eRepr_virtual) {
334  okay = false;
335  }
336  }
337  }
338  }
339  if (m_skip_segmented) {
340  CSeq_entry* se;
341  se = bioseq.GetParentEntry();
342  if (se) {
343  se = se->GetParentEntry();
344  if (se) {
345  if (se->IsSet()) {
346  const CBioseq_set& seqset = se->GetSet();
347  if (seqset.IsSetClass()) {
348  CBioseq_set::EClass mclass = seqset.GetClass();
349  if (mclass == CBioseq_set::eClass_segset ||
350  mclass == CBioseq_set::eClass_parts) {
351  okay = false;
352  }
353  }
354  }
355  }
356  }
357  }
358 
359  if (okay) {
360  const string& title = gen.GenerateDefline (bioseq, *m_scope, m_flags);
361 
362  *m_out << ">";
363  x_FastaSeqIdWrite (bioseq);
364  *m_out << " ";
365  *m_out << title << endl;
366  ++m_objectcount;
367  }
368  }
369  }
370  catch (CException& e) {
371  ERR_POST(Error << "error processing seqentry: " << e.what());
372  }
373  };
374 
375  // ------------------------------------------------------------------------
377  // ------------------------------------------------------------------------
378  {
379  if (m_do_indexed) {
380  IndexedProcess ();
381  } else {
382  UnindexedProcess ();
383  }
384  };
385 
386 protected:
395  bool m_debug;
397 };
398 
399 #endif
CArgs –.
Definition: ncbiargs.hpp:379
CSeq_entry * GetParentEntry(void) const
Definition: Bioseq.hpp:174
Class for computing sequences' titles ("definitions").
void x_FastaSeqIdWrite(const CBioseq &bioseq)
CDeflineGenerator::TUserFlags m_flags
CDeflineProcess(bool use_indexing, bool gpipe_mode)
virtual void SeqEntryInitialize(CRef< CSeq_entry > &se)
CNcbiOstream * m_out
void ProcessInitialize(const CArgs &args)
CDeflineProcess(bool use_indexing)
void ProcessInitialize(const CArgs &args)
virtual void SeqEntryInitialize(CRef< CSeq_entry > &se)
CSeq_entry_Handle m_topseh
CRef< CScope > m_scope
CRef< CSeq_entry > m_entry
Definition: process.hpp:114
unsigned int m_objectcount
Definition: process.hpp:108
Definition: Seq_entry.hpp:56
CSeq_entry * GetParentEntry(void) const
Definition: Seq_entry.hpp:131
CStopWatch –.
Definition: ncbitime.hpp:1938
API (CDeflineGenerator) for computing sequences' titles ("definitions").
#define false
Definition: bool.h:36
static const char * str(char *buf, int n)
Definition: stats.c:84
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
virtual void WriteAsFasta(ostream &out) const
Implement serializable interface.
Definition: Seq_id.cpp:2164
int TUserFlags
Binary "OR" of EUserFlags.
@ fGpipeMode
Use GPipe defaults.
@ fShowModifiers
Show key-value pair modifiers (e.g. "[organism=Homo sapiens]")
@ fIgnoreExisting
Generate fresh titles unconditionally.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
void Start(void)
Start the timer.
Definition: ncbitime.hpp:2765
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
@ eClass_parts
parts for 2 or 3
@ eClass_segset
segmented sequence + parts
TRepr GetRepr(void) const
Get the Repr member data.
Definition: Seq_inst_.hpp:565
ERepr
representation class
Definition: Seq_inst_.hpp:91
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
Definition: Seq_inst_.hpp:546
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
Definition: Seq_inst_.hpp:593
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:324
TMol GetMol(void) const
Get the Mol member data.
Definition: Seq_inst_.hpp:612
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
@ eRepr_seg
segmented sequence
Definition: Seq_inst_.hpp:95
@ eRepr_virtual
no seq data
Definition: Seq_inst_.hpp:93
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
Definition: seq_macros.hpp:308
#define NCBI_SEQID(Type)
@NAME Convenience macros for NCBI objects
#define VISIT_ALL_SEQSETS_WITHIN_SEQENTRY(Itr, Var)
VISIT_ALL_SEQSETS_WITHIN_SEQENTRY.
#define VISIT_ALL_BIOSEQS_WITHIN_SEQENTRY(Itr, Var)
VISIT_ALL_BIOSEQS_WITHIN_SEQENTRY.
Modified on Tue Apr 23 07:39:08 2024 by modify_doxy.py rev. 669887