NCBI C++ ToolKit
process_seqvector.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 
33 #ifndef __process_seqvector__hpp__
34 #define __process_seqvector__hpp__
35 
36 // ============================================================================
38 // ============================================================================
39  : public CScopedProcess
40 {
41 public:
42  // ------------------------------------------------------------------------
44  // ------------------------------------------------------------------------
45  : CScopedProcess()
46  , m_out( 0 )
47  {};
48 
49  // ------------------------------------------------------------------------
51  // ------------------------------------------------------------------------
52  {
53  };
54 
55  // ------------------------------------------------------------------------
57  const CArgs& args )
58  // ------------------------------------------------------------------------
59  {
61 
62  m_out = args["o"] ? &(args["o"].AsOutputFile()) : &cout;
63  m_gap_mode = args["gap-mode"].AsString();
64 
65  m_debug = args["debug"];
66 
67  m_timer = CStopWatch();
68  m_timer.Start();
69  };
70 
71  // ------------------------------------------------------------------------
73  // ------------------------------------------------------------------------
74  {
75  }
76 
77  // ------------------------------------------------------------------------
78  virtual void SeqEntryInitialize(
79  CRef<CSeq_entry>& se )
80  // ------------------------------------------------------------------------
81  {
83  };
84 
85  enum EGapType {
86  eGT_one_dash, ///< A single dash, followed by a line break.
87  eGT_dashes, ///< Multiple inline dashes.
88  eGT_letters, ///< Multiple inline Ns or Xs as appropriate (default).
89  eGT_count ///< >?N or >?unk100, as appropriate.
90  };
91 
92  // ------------------------------------------------------------------------
93  void x_FsaSeqIdWrite(const CBioseq& bioseq)
94  // ------------------------------------------------------------------------
95  {
96  string gi_string;
97  string accn_string;
98 
99  FOR_EACH_SEQID_ON_BIOSEQ (sid_itr, bioseq) {
100  const CSeq_id& sid = **sid_itr;
101  TSEQID_CHOICE chs = sid.Which();
102  switch (chs) {
103  case NCBI_SEQID(Gi):
104  {
105  const string str = sid.AsFastaString();
106  gi_string = str;
107  break;
108  }
109  default:
110  break;
111  }
112  }
113 
114  FOR_EACH_SEQID_ON_BIOSEQ (sid_itr, bioseq) {
115  const CSeq_id& sid = **sid_itr;
116  TSEQID_CHOICE chs = sid.Which();
117  switch (chs) {
118  case NCBI_SEQID(Other):
119  case NCBI_SEQID(Genbank):
120  case NCBI_SEQID(Embl):
121  case NCBI_SEQID(Ddbj):
122  case NCBI_SEQID(Tpg):
123  case NCBI_SEQID(Tpe):
124  case NCBI_SEQID(Tpd):
125  {
126  const string str = sid.AsFastaString();
127  accn_string = str;
128  break;
129  }
130  default:
131  break;
132  }
133  }
134 
135  if (gi_string.empty() || accn_string.empty()) {
136  CSeq_id::WriteAsFasta (*m_out, bioseq);
137  } else {
138  *m_out << gi_string << "|" << accn_string;
139  }
140  }
141 
142  // ------------------------------------------------------------------------
144  // ------------------------------------------------------------------------
145  {
146  try {
148 
150  const CBioseq& bioseq = *bit;
151  // !!! NOTE CALL TO OBJECT MANAGER !!!
152  const CBioseq_Handle& bsh = m_scope->GetBioseqHandle (bioseq);
153 
154  const string& title = gen.GenerateDefline (bsh, 0);
155 
156  *m_out << ">";
157  x_FsaSeqIdWrite (bioseq);
158  *m_out << " ";
159  *m_out << title << endl;
160 
162 
163  EGapType gap_type = eGT_letters;
164  if ( m_gap_mode == "one-dash" ) {
165  gap_type = eGT_one_dash;
166  } else if ( m_gap_mode == "dashes" ) {
167  gap_type = eGT_dashes;
168  } else if ( m_gap_mode == "letters" ) {
169  gap_type = eGT_letters;
170  } else if ( m_gap_mode == "count" ) {
171  gap_type = eGT_count;
172  }
173 
174  int pos = 0;
175  for ( CSeqVector_CI sv_iter(sv); (sv_iter); ++sv_iter ) {
176  if ( gap_type != eGT_letters && sv_iter.GetGapSizeForward() > 0 ) {
177  int gap_len = sv_iter.SkipGap();
178  if ( gap_type == eGT_one_dash ) {
179  *m_out << "-\n";
180  pos = 0;
181  } else if ( gap_type == eGT_dashes ) {
182  while ( gap_len > 0 ) {
183  *m_out << "-";
184  pos++;
185  if ( pos >= 60 ) {
186  pos = 0;
187  *m_out << endl;
188  }
189  gap_len--;
190  }
191  }
192  // *m_out << endl << "Gap length " << NStr::IntToString (gap_len) << endl;
193  // pos = 0;
194  } else {
195  CSeqVector::TResidue res = *sv_iter;
196  *m_out << res;
197  pos++;
198  if ( pos >= 60 ) {
199  pos = 0;
200  *m_out << endl;
201  }
202  }
203  }
204 
205  *m_out << endl;
206  }
207  }
208  catch (CException& e) {
209  ERR_POST(Error << "error processing seqentry: " << e.what());
210  }
211  };
212 
213 protected:
215  string m_gap_mode;
216  bool m_debug;
218 };
219 
220 #endif
CArgs –.
Definition: ncbiargs.hpp:379
CBioseq_Handle –.
Class for computing sequences' titles ("definitions").
void ProcessInitialize(const CArgs &args)
virtual void SeqEntryInitialize(CRef< CSeq_entry > &se)
CSeq_entry_Handle m_topseh
CRef< CScope > m_scope
CRef< CSeq_entry > m_entry
Definition: process.hpp:114
@ eGT_dashes
Multiple inline dashes.
@ eGT_one_dash
A single dash, followed by a line break.
@ eGT_count
>?N or >?unk100, as appropriate.
@ eGT_letters
Multiple inline Ns or Xs as appropriate (default).
virtual void SeqEntryInitialize(CRef< CSeq_entry > &se)
void x_FsaSeqIdWrite(const CBioseq &bioseq)
void ProcessInitialize(const CArgs &args)
CSeqVector –.
Definition: seq_vector.hpp:65
CStopWatch –.
Definition: ncbitime.hpp:1938
static const char * str(char *buf, int n)
Definition: stats.c:84
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
virtual void WriteAsFasta(ostream &out) const
Implement serializable interface.
Definition: Seq_id.cpp:2164
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
unsigned char TResidue
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
void Start(void)
Start the timer.
Definition: ncbitime.hpp:2765
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
Definition: seq_macros.hpp:308
#define NCBI_SEQID(Type)
@NAME Convenience macros for NCBI objects
#define VISIT_ALL_BIOSEQS_WITHIN_SEQENTRY(Itr, Var)
VISIT_ALL_BIOSEQS_WITHIN_SEQENTRY.
Modified on Sun Apr 21 03:38:35 2024 by modify_doxy.py rev. 669887