NCBI C++ ToolKit
stream_source.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: stream_source.cpp 102068 2024-03-25 18:55:35Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbifile.hpp>
34 #include <util/file_manifest.hpp>
35 #include <util/stream_source.hpp>
36 
38 
40 
42  const string &prefix,
43  const string &description,
44  bool is_mandatory)
45 {
46  arg_desc.SetCurrentGroup("Input Options for " + prefix);
47  if (prefix == "input") {
48  arg_desc.AddDefaultKey("input", "InputFile",
49  "Stream of " + description,
51  "-");
52  arg_desc.AddAlias("i", "input");
53  } else {
54  if (is_mandatory) {
55  arg_desc.AddKey(prefix, "InputFile",
56  "Stream of " + description,
58  }
59  else {
60  arg_desc.AddOptionalKey(prefix, "InputFile",
61  "Stream of " + description,
63  }
64  }
65 
66  arg_desc.AddOptionalKey(prefix + "-path", "InputPath",
67  "Path to " + description,
69  arg_desc.AddOptionalKey(prefix + "-mask", "FileMask",
70  "File pattern to search for " + description,
72  arg_desc.SetDependency(prefix + "-mask",
74  prefix + "-path");
75 
76  arg_desc.AddOptionalKey(prefix + "-manifest", "InputFile",
77  "File containing a list of files containing " + description,
79 
80  arg_desc.SetDependency(prefix,
82  prefix + "-manifest");
83 
84  arg_desc.SetDependency(prefix,
86  prefix + "-path");
87 
88  arg_desc.SetDependency(prefix + "-manifest",
90  prefix + "-path");
91 
92  if (prefix == "input") {
93  arg_desc.AddAlias("I", "input-manifest");
94  }
95 
97 }
98 
99 vector<string> CInputStreamSource::RecreateInputArgs(const CArgs& args, const string &prefix)
100 {
101  vector<string> result;
102  if (args[prefix + "-path"].HasValue()) {
103  result.push_back("-" + prefix + "-path");
104  result.push_back(args[prefix + "-path"].AsString());
105  if (args[prefix + "-mask"]) {
106  result.push_back("-" + prefix + "-mask");
107  result.push_back(args[prefix + "-mask"].AsString());
108  }
109  }
110  else if (args[prefix + "-manifest"].HasValue()) {
111  result.push_back("-" + prefix + "-manifest");
112  result.push_back(args[prefix + "-manifest"].AsString());
113  }
114  else {
115  result.push_back("-" + prefix);
116  result.push_back(args[prefix].AsString());
117  }
118  return result;
119 }
120 
122 {
123  return s_InputStreamSourcePrefixes.count(prefix) ? true : false;
124 }
125 
127  : m_Istr(NULL), m_CurrIndex(0)
128 {
129 }
130 
131 
133  : m_Istr(NULL), m_CurrIndex(0)
134 {
135  InitArgs(args, prefix);
136 }
137 
138 
140 {
141 }
142 
143 void CInputStreamSource::InitArgs(const CArgs& args, const string &prefix)
144 {
145  m_Args.Assign(args);
146  m_Prefix = prefix;
147 
148  if (m_Args[prefix + "-path"].HasValue()) {
149  string path = m_Args[prefix + "-path"].AsString();
150  string mask;
151  if (m_Args[prefix + "-mask"]) {
152  mask = m_Args[prefix + "-mask"].AsString();
153  }
155  }
156  else if (m_Args[prefix + "-manifest"].HasValue()) {
157  InitManifest(m_Args[prefix + "-manifest"].AsString());
158  }
159  else if (m_Args[prefix].HasValue() && m_Args[prefix].AsString() == "-") {
160  /// NOTE: this is ignored if either -input-path or -input-mask is
161  /// provided
162  InitStream(m_Args[prefix].AsInputFile(), m_Args[prefix].AsString());
163  }
164  else if (m_Args[prefix].HasValue()) {
165  /// Input file; init as input file, so it can be opened multiple times
166  InitFile(m_Args[prefix].AsString());
167  }
168 }
169 
170 
171 /// Initialize from a given stream which is the sole content.
172 /// As precondition, expect that the stream is in a good condition
173 /// prior to being handed off to consumers.
174 ///
175 void CInputStreamSource::InitStream(CNcbiIstream& istr, const string& fname)
176 {
177  if (m_Istr || m_CurrIndex < m_Files.size()) {
179  "CInputStreamSource::InitManifest(): "
180  "attempt to init already initted class");
181  }
182  if (! istr) {
184  "CInputStreamSource::InitStream(): "
185  "stream is bad");
186  }
187  m_Files.clear();
188  m_Istr = &istr;
189  m_CurrFile = fname;
190  m_CurrIndex = 0;
191 }
192 
193 
194 /// Initialize from a single file path.
195 ///
196 void CInputStreamSource::InitFile(const string& file_path)
197 {
198  if (m_Istr || m_CurrIndex < m_Files.size()) {
200  "CInputStreamSource::InitFile(): "
201  "attempt to init already initted class");
202  }
203 
204  /**
205  * commented out: this breaks stream processing
206  if ( !CFile(file_path).Exists() ) {
207  NCBI_THROW(CException, eUnknown,
208  "input file " + file_path + " does not exist");
209  }
210  **/
211 
212  m_Files.clear();
213  m_Files.push_back(file_path);
214  Rewind();
215 }
216 
217 
218 /// Initialize from a manifest file.
219 ///
220 /// @see CFileManifest
221 void CInputStreamSource::InitManifest(const string& manifest)
222 {
223  if (m_Istr || m_CurrIndex < m_Files.size()) {
225  "CInputStreamSource::InitManifest(): "
226  "attempt to init already initted class");
227  }
228 
229  m_Files.clear();
230  CFileManifest src(manifest);
231  vector<string> all(src.GetAllFilePaths());
232  std::copy( all.begin(), all.end(), std::back_inserter(m_Files));
233 
234  _TRACE("Added " << m_Files.size() << " files from input manifest");
235 
236  Rewind();
237 }
238 
239 
240 /// Initialize from a file search path
241 ///
242 void CInputStreamSource::InitFilesInDirSubtree(const string& file_path,
243  const string& file_mask)
244 {
245  if (m_Istr || m_CurrIndex < m_Files.size()) {
247  "CInputStreamSource::InitFilesInDirSubtree(): "
248  "atemmpt to init already initted class");
249  }
250 
251  CDir d(file_path);
252  if ( !d.Exists() ) {
254  "input directory " + file_path + " does not exist");
255  }
256 
257  vector<string> paths;
258  paths.push_back(file_path);
259 
260  vector<string> masks;
261  if ( !file_mask.empty() ) {
262  masks.push_back(file_mask);
263  } else {
264  masks.push_back("*");
265  }
266 
267  m_Files.clear();
269  paths.begin(), paths.end(),
270  masks.begin(), masks.end(),
272  _TRACE("Added " << m_Files.size() << " files from input path");
273 
274  Rewind();
275 }
276 
277 
279 {
280  if (m_Istr) {
281  if (fname) {
282  *fname = m_CurrFile;
283  }
284  return *m_Istr;
285  }
286 
287  if (m_IstrOwned.get()) {
288  if (fname) {
289  *fname = m_CurrFile;
290  }
291  return *m_IstrOwned;
292  }
293 
294  NCBI_THROW(CException, eUnknown, "All input streams consumed");
295 }
296 
298 {
299  if (m_Istr) {
300  return *m_Istr;
301  }
302 
303  if (m_IstrOwned.get()) {
304  return *m_IstrOwned;
305  }
306 
307  NCBI_THROW(CException, eUnknown, "All input streams consumed");
308 }
309 
310 
312 {
313  return GetStream();
314 }
315 
316 
318 {
319  // The next stream can be held in either of two places. Clear both.
320 
321  // Clear first place.
322  if (m_Istr) {
323  if (m_Istr->bad()) {
324  // Check that the stream, at the end, didn't go bad as might
325  // happen if there was a disk read error. On the other hand,
326  // ok if it has failbit set so ignore that, e.g. getline sets
327  // failbit at the last line, if it has a teminator.
329  "CInputStreamSource::operator++(): "
330  "Unknown error in input stream, "
331  "which is in a bad state after use");
332  }
333  m_Istr = NULL;
334  }
335 
336  // Clear second place.
337  if (m_IstrOwned.get()) {
338  if (m_IstrOwned->bad()) {
339  // Samecheck as for m_Istr.
340  string msg("CInputStreamSource::operator++(): "
341  "Unknown error reading file, "
342  "which is in a bad state after use: ");
344  }
345  m_IstrOwned.reset();
346  }
347 
348  // The current filename currently applies to only the first source,
349  // but someday might apply to others, so clear it here rather than
350  // inside the above conditionals.
351  m_CurrFile.erase();
352 }
353 
355 {
356  if (m_CurrIndex < m_Files.size()) {
359  }
360 }
361 
363 {
364  if (is->fail()) {
365  // Do not provide to clients with streams that are already
366  // known not to be good (fail, meaning badbit or failbit).
367  string msg("CInputStreamSource: File is not accessible: ");
369  }
370  m_IstrOwned.reset(is);
371 }
372 
374 {
375  x_Reset();
376  x_OpenNextFile();
377  return *this;
378 }
379 
381 {
382  x_Reset();
383 
384  m_CurrIndex = index;
385  x_OpenNextFile();
386  return *this;
387 }
388 
390 {
391  m_CurrIndex = 0;
392  ++(*this);
393  return *this;
394 }
395 
397 {
398  return m_CurrFile;
399 }
400 
401 size_t CInputStreamSource::GetCurrentStreamIndex(size_t* count) const
402 {
403  if (count) {
404  *count = m_Files.size();
405  }
406  return m_CurrIndex;
407 }
408 
409 CInputStreamSource::operator bool() const
410 {
411  // The stream contains data if it references a stream (given on input)
412  // owns a stream (extracted from a manifest), or still has a non-empty
413  // queued list of files.
414  return (m_Istr || m_IstrOwned.get() || m_CurrIndex < m_Files.size());
415 }
416 
417 
418 
420 
ncbi::TMaskedQueryRegions mask
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
CDir –.
Definition: ncbifile.hpp:1696
vector< string > GetAllFilePaths() const
Returns all the file paths referenced by the manifest.
class CInputStreamSource encapsulates details of how we supply applications with input data through s...
void InitManifest(const string &manifest)
Initialize from a manifest file.
CInputStreamSource & operator++()
Advance to the next stream in the class.
CInputStreamSource & JumpToFile(unsigned index)
Jump to specified file in input list.
virtual void x_OpenNextFile()
vector< string > m_Files
static bool HaveArgsForPrefix(const string &prefix="input")
Check whether the arguments for a specific prefix have been set up in this application.
size_t GetCurrentStreamIndex(size_t *count=nullptr) const
Returns the current file index and the total number of files.
string GetCurrentFileName(void) const
Returns the current file name.
CNcbiIstream * m_Istr
virtual ~CInputStreamSource()
void x_OpenOwnedStream(CNcbiIstream *is)
static vector< string > RecreateInputArgs(const CArgs &args, const string &prefix="input")
Get the standard input arguments that are present in args so we can pass them on to some other progra...
CNcbiIstream & operator*()
Dereferencing the stream class returns the current stream.
CInputStreamSource()
Default ctor This ctor leaves the stream source empty.
CNcbiIstream & GetStream(void)
Access the current stream.
CInputStreamSource & Rewind(void)
Resets the iterator to the first stream in the class.
static void SetStandardInputArgs(CArgDescriptions &arg_desc, const string &prefix="input", const string &description="data to process", bool is_mandatory=false)
Supply a standard set of arguments via argument descriptions to an application.
void InitFilesInDirSubtree(const string &file_path, const string &file_mask=kEmptyStr)
Initialize from a file search path.
unique_ptr< CNcbiIstream > m_IstrOwned
virtual void InitArgs(const CArgs &args, const string &prefix="input")
Initialize from a set of arguments.
void InitStream(CNcbiIstream &istr, const string &fname=kEmptyStr)
Initialize from a stream No ownership is claimed by this class - lifetime management of the stream is...
void InitFile(const string &file_path)
Initialize from a single file path.
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
#define true
Definition: bool.h:35
#define bool
Definition: bool.h:34
void SetDependency(const string &arg1, EDependency dep, const string &arg2)
Define a dependency.
Definition: ncbiargs.cpp:2618
void AddKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)
Add description for mandatory key.
Definition: ncbiargs.cpp:2412
void AddAlias(const string &alias, const string &arg_name)
Add argument alias.
Definition: ncbiargs.cpp:2557
CArgs & Assign(const CArgs &other)
Copy contents of another object into this one.
Definition: ncbiargs.cpp:1770
void AddOptionalKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)
Add description for optional key without default value.
Definition: ncbiargs.cpp:2427
void SetCurrentGroup(const string &group)
Set current arguments group name.
Definition: ncbiargs.cpp:2632
void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)
Add description for optional key with default value.
Definition: ncbiargs.cpp:2442
@ eRequires
One argument requires another.
Definition: ncbiargs.hpp:956
@ eExcludes
One argument excludes another.
Definition: ncbiargs.hpp:957
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void FindFiles(TPathIterator path_begin, TPathIterator path_end, const vector< string > &masks, TFindFunc &find_func, TFindFiles flags=fFF_Default)
Generic algorithm for file search.
Definition: ncbifile.hpp:3146
virtual bool Exists(void) const
Check if directory "dirname" exists.
Definition: ncbifile.hpp:4066
@ fFF_Recursive
descend into sub-dirs
Definition: ncbifile.hpp:3013
@ fFF_File
find files
Definition: ncbifile.hpp:3009
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
static const char * prefix[]
Definition: pcregrep.c:405
static set< string > s_InputStreamSourcePrefixes
else result
Definition: token2.c:20
Modified on Wed Jul 24 17:18:15 2024 by modify_doxy.py rev. 669887