NCBI C++ ToolKit
blast_vdb_cmd.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_vdb_cmd.cpp 101147 2023-11-06 15:02:33Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Amelia Fong
27  *
28 ** @file blast_vdb_cmd.cpp
29  * Command line tool to get vdb info.
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbiapp.hpp>
34 #include <objmgr/util/sequence.hpp>
40 
42 USING_SCOPE(blast);
43 
45 
46 /// The application class
48 {
49 public:
50  /** @inheritDoc */
54  }
55 private:
56  /** @inheritDoc */
57  virtual void Init();
58  /** @inheritDoc */
59  virtual int Run();
60 
61  /// Initializes the application's data members
62  void x_InitApplicationData();
63 
64  /// Get vdb util
66 
67  /// Prints the BLAST database information (e.g.: handles -info command line
68  /// option)
70 
71  /// Processes all requests except printing the BLAST database information
72  /// @return 0 on success; 1 if some sequences were not retrieved
74 
75  /// Print vdb paths
76  int x_PrintVDBPaths(bool recursive);
77 
78  /// Resolve vdb paths
79  void x_GetFullPaths();
80 
81  /// Retrieve the queries from the command line arguments
82  vector<string> x_GetQueries();
83 
84  string x_FormatRuntime(const CStopWatch& sw) const;
85 
86  void x_AddCmdOptions();
87 
88  // Store all db names
89  string m_allDbs;
90  string m_origDbs;
91  bool m_isRef;
95 };
96 
97 
98 CBlastVdbCmdApp::CBlastVdbCmdApp(): m_allDbs(kEmptyStr), m_origDbs(kEmptyStr), m_isRef(false), m_includeFilteredReads(false) {
100  version->SetVersionInfo(new CBlastVersion());
102  m_StopWatch.Start();
103  if (m_UsageReport.IsEnabled()) {
105  m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blast_vdb_cmd");
106  }
107  }
108 
109 /** Class to extract FASTA (as returned by the blast_sra library) from SRA
110  * data.
111  *
112  * Inspired by the CSeqFormatter class
113  */
115 public:
116  CVdbFastaExtractor(CRef<CVDBBlastUtil> sraobj, CNcbiOstream& out, const string & fmt_spec,
117  TSeqPos line_width = 80);
118 
119  void Write(CRef<CSeq_id> seqid);
120  void Write(CRef<CBioseq> bioseq, int oid);
121  void DumpAll();
122 
123 private:
126  const string m_FmtSpec;
128  /// Vector of offsets where the replacements will take place
129  vector<string> m_Seperators;
130  /// Vector of convertor objects
131  vector<char> m_ReplTypes;
133  bool m_LoadSeq;
134 
135 };
136 
138  : m_VdbBlastDB(sraobj), m_Out(out), m_FmtSpec(fmt_spec), m_LineWidth(line_width), m_FastaOnly(false), m_LoadSeq(false)
139 {
140  string sp = kEmptyStr;
141  for (SIZE_TYPE i = 0; i < m_FmtSpec.size(); i++) {
142  if (m_FmtSpec[i] == '%') {
143  if ( m_FmtSpec[i+1] == '%') {
144  // remove the escape character for '%'
145  i++;
146  sp += m_FmtSpec[i];
147  continue;
148  }
149  i++;
150  m_ReplTypes.push_back(m_FmtSpec[i]);
151  m_Seperators.push_back(sp);
152  sp = kEmptyStr;
153  }
154  else {
155  sp += m_FmtSpec[i];
156  }
157  }
158  m_Seperators.push_back(sp);
159 
160  if (m_ReplTypes.empty() || (m_ReplTypes.size() + 1 != m_Seperators.size())) {
161  NCBI_THROW(CBlastException, eInvalidOptions,
162  "Invalid format specification");
163  }
164 
165  for (unsigned int i=0; i < m_ReplTypes.size(); i++) {
166  if(m_ReplTypes[i] == 'f') {
167  m_FastaOnly = true;
168  m_LoadSeq = true;
169  break;
170  }
171  else if(m_ReplTypes[i] == 's') {
172  m_LoadSeq = true;
173  }
174  }
175 }
176 
178 {
180  if (bioseq.Empty()) {
181  ERR_POST("Failed to find Bioseq for '" + seqid->AsFastaString() + "'");
182  return;
183  }
184 
185  if (m_FastaOnly) {
186  CFastaOstream fasta(m_Out);
187  fasta.SetWidth(m_LineWidth);
189  fasta.Write(*bioseq);
190  }
191  else {
192  int oid = -1;
193  Write(bioseq, oid);
194  }
195 }
196 
198 {
199  if ((!bioseq->IsSetInst()) || (!bioseq->GetInst().IsSetSeq_data())) {
200  ERR_POST("Bioseq constains no sequence data");
201  return;
202  }
203  const CSeq_inst & si = bioseq->GetInst();
204 
205  for(unsigned int i =0; i < m_ReplTypes.size(); i++) {
206  m_Out << m_Seperators[i];
207  switch (m_ReplTypes[i]) {
208  case 's':
209  {
210  const CSeq_data & d = si.GetSeq_data();
211  string sa = "N/A";
212  if (d.IsIupacna()) {
213  sa = d.GetIupacna().Get();
214  }
215  m_Out << sa;
216  break;
217  }
218  case 'a':
219  {
220  const CSeq_id * id = bioseq->GetFirstId();
221  m_Out << id->GetSeqIdString(true);
222  break;
223  }
224  case 'i':
225  {
226  const CSeq_id * id = bioseq->GetFirstId();
227  m_Out << id->AsFastaString() ;
228  break;
229  }
230  case 'o':
231  {
232  if (oid == -1) {
233  CRef<CSeq_id> cid(const_cast<CSeq_id *> (bioseq->GetFirstId()));
234  oid = (int) m_VdbBlastDB->GetOIDFromVDBSeqId(cid);
235  }
237  break;
238  }
239  case 't':
240  {
241  string t = "N/A";
242  if(bioseq->IsSetDescr() && bioseq->GetDescr().IsSet()) {
243  CRef<CSeqdesc> descTitle = bioseq->GetDescr().Get().front();
244  t = descTitle->GetTitle();
245  }
246  m_Out << t;
247  break;
248  }
249  case 'l':
250  {
251  string l = "N/A";
252  if(si.IsSetLength()){
253  l = NStr::NumericToString(si.GetLength());
254  }
255  m_Out << l;
256  break;
257  }
258  default:
259  CNcbiOstrstream os;
260  os << "Unrecognized format specification: '%" << m_ReplTypes[i] << "'";
262  }
263  }
264  m_Out << m_Seperators.back();
265  m_Out << endl;
266 }
267 
270  BlastSeqSrcGetSeqArg seq_arg = { '\0' };
272  if (m_FastaOnly) {
273  CFastaOstream fasta(m_Out);
274  fasta.SetWidth(m_LineWidth);
276  while ((seq_arg.oid = BlastSeqSrcIteratorNext(seqsrc, itr)) != BLAST_SEQSRC_EOF) {
277  if (seq_arg.oid == BLAST_SEQSRC_ERROR) {
278  ERR_POST("Iterator returns BLAST_SEQSRC_ERROR");
279  exit(1);
280  }
282  if (bioseq.Empty()) {
283  ERR_POST("Empty Bioseq");
284  exit(1);
285  }
286  fasta.Write(*bioseq);
287  }
288  }
289  else {
290  while ((seq_arg.oid = BlastSeqSrcIteratorNext(seqsrc, itr)) != BLAST_SEQSRC_EOF) {
291  if (seq_arg.oid == BLAST_SEQSRC_ERROR) {
292  ERR_POST("Iterator returns BLAST_SEQSRC_ERROR");
293  exit(1);
294  }
296  if (bioseq.Empty()) {
297  ERR_POST("Empty Bioseq");
298  continue;
299  }
300  Write(bioseq, seq_arg.oid);
301  }
302 
303  }
304  }
305 
306 string s_GetCSRADBs(const string & db_list, string & not_csra_list) {
307  vector<string> dbs;
308  string csra_list = kEmptyStr;
309  not_csra_list = kEmptyStr;
310  NStr::Split(db_list, " ", dbs);
311  for(unsigned int i=0; i < dbs.size(); i++) {
312  if(CVDBBlastUtil::IsCSRA(dbs[i])) {
313  csra_list += dbs[i] + " ";
314  }
315  else {
316  not_csra_list += dbs[i] + " ";
317  }
318  }
319  return csra_list;
320 }
321 
322 vector<string>
324 {
325  const CArgs& args = GetArgs();
326  vector<string> retval;
327 
328  if (args["entry"].HasValue()) {
329 
330  static const string kDelim(",");
331  const string& entry = args["entry"].AsString();
332 
333  if (entry.find(kDelim[0]) != string::npos) {
334  vector<string> tokens;
335  NStr::Split(entry, kDelim, tokens);
336  retval.swap(tokens);
337  } else {
338  retval.push_back(entry);
339  }
340 
341  } else if (args["entry_batch"].HasValue()) {
342 
343  CNcbiIstream& input = args["entry_batch"].AsInputFile();
344  retval.reserve(256); // arbitrary value
345  while (input) {
346  string line;
347  NcbiGetlineEOL(input, line);
348  if ( !line.empty() ) {
349  retval.push_back(line);
350  }
351  }
352  } else {
353  NCBI_THROW(CInputException, eInvalidInput,
354  "Must specify query type: one of 'entry', or 'entry_batch'");
355  }
356 
357  if (retval.empty()) {
358  NCBI_THROW(CInputException, eInvalidInput,
359  "Entry not found in BLAST database");
360  }
361 
362  return retval;
363 }
364 
365 int
367 {
368  const CArgs& args = GetArgs();
369  CNcbiOstream& out = args["out"].AsOutputFile();
370 
371  bool errors_found = false;
372 
373  /* Special case: full db dump */
374  if (args["entry"].HasValue() && args["entry"].AsString() == "all") {
375  try {
377  CVdbFastaExtractor seq_fmt(util, out, args["outfmt"].AsString(), args["line_length"].AsInteger());
378  seq_fmt.DumpAll();
379  } catch (const CException& e) {
380  ERR_POST(Error << e.GetMsg());
381  errors_found = true;
382  } catch (...) {
383  ERR_POST(Error << "Failed to retrieve requested item");
384  errors_found = true;
385  }
386  return errors_found ? 1 : 0;
387  }
388 
389  vector<string> queries = x_GetQueries();
390  _ASSERT( !queries.empty() );
391 
393  CVdbFastaExtractor seq_fmt(util, out, args["outfmt"].AsString(), args["line_length"].AsInteger());
394 
395  CRef<CVDBBlastUtil> util_csra = x_GetVDBBlastUtil(true);
396  CVdbFastaExtractor * seq_fmt_csra = NULL;
397  if(util_csra.NotEmpty()) {
398  seq_fmt_csra = new CVdbFastaExtractor(util_csra, out, args["outfmt"].AsString(), args["line_length"].AsInteger());
399  }
400 
401  NON_CONST_ITERATE(vector<string>, itr, queries) {
402  try {
403  CRef<CSeq_id> seq_id;
404  try {
405  seq_id.Reset(new CSeq_id(*itr));
406  } catch (const CException & e) {
407  *itr = "SRA:" + *itr;
408  seq_id.Reset(new CSeq_id(*itr));
409  }
410  switch (CVDBBlastUtil::VDBIdType(*seq_id)) {
413  seq_fmt.Write(seq_id);
414  break;
417  {
418  if(seq_fmt_csra == NULL) {
419  NCBI_THROW(CInputException, eInvalidInput, *itr + ": CSRA ref seq id for non CSRA db");
420  }
421  seq_fmt_csra->Write(seq_id);
422  }
423  break;
424  default :
425  NCBI_THROW(CInputException, eInvalidInput, *itr + " is not a valid SRA, CSRA ref or WGS id");
426  break;
427  }
428 
429  } catch (const CException& e) {
430  ERR_POST(e.GetMsg());
431  errors_found = true;
432  } catch (...) {
433  ERR_POST("Failed to retrieve requested item");
434  errors_found = true;
435  }
436 
437  }
438  if(seq_fmt_csra != NULL) {
439  delete seq_fmt_csra;
440  }
441  return errors_found ? 1 : 0;
442 }
443 
444 string
446 {
447  return sw.AsSmartString();
448 }
449 
450 void
452 {
453  const CArgs& args = GetArgs();
454  string strAllRuns;
455  if (args["db"]) {
456  strAllRuns = args["db"].AsString();
457 
458  } else {
459  CNcbiIstream& in = args["dbs_file"].AsInputFile();
460  string line;
461  while (NcbiGetline(in, line, "\n")) {
462  if (line.empty()) {
463  continue;
464  }
465  strAllRuns += line + " ";
466  }
467  }
468  list<string> tmp;
469  NStr::Split(strAllRuns, "\n\t ", tmp, NStr::fSplit_Tokenize);
470  m_origDbs = NStr::Join(tmp, " ");
471  if (args["ref"]) {
472  m_isRef = true;
473  }
474  else {
475  m_isRef = false;
476  }
477 
478  if (args["include_filtered_reads"]) {
479  m_includeFilteredReads = true;
480  }
481 }
482 
483 void
485 {
486  vector<string> vdbs;
487  vector<string> vdb_alias;
488  vector<string> db_alias;
489  CVDBAliasUtil::FindVDBPaths(m_origDbs, false, vdbs, &db_alias, &vdb_alias, true, true);
490 
491  m_allDbs = NStr::Join(vdbs, " ");
492 }
493 
496 {
498  if (isCSRA) {
499  string not_csra_list = kEmptyStr;
500  string csra_list = s_GetCSRADBs(m_allDbs, not_csra_list);
501  if(csra_list == kEmptyStr) {
502  return util;
503  }
504 
505  CStopWatch sw;
506  sw.Start();
507  util.Reset(new CVDBBlastUtil(csra_list, true, true, m_includeFilteredReads));
508  sw.Stop();
509  LOG_POST(Info << "PERF: blast_vdb library csra initialization: " << x_FormatRuntime(sw));
510  }
511  else {
512  CStopWatch sw;
513  sw.Start();
514  util.Reset(new CVDBBlastUtil(m_allDbs, true, false, m_includeFilteredReads));
515  sw.Stop();
516  LOG_POST(Info << "PERF: blast_vdb library initialization: " << x_FormatRuntime(sw));
517  }
518  return util;
519 }
520 
521 void s_PrintStr(const string & str, unsigned int line_width, CNcbiOstream & out)
522 {
523  list<string> print_str;
524  NStr::Wrap(str, line_width, print_str);
525  ITERATE(list<string>, itr, print_str) {
526  out << *itr << endl;
527  }
528 }
529 
530 int
532 {
533  const string kLetters("bases");
534  const CArgs& args = GetArgs();
535  CNcbiOstream& out = args["out"].AsOutputFile();
536  unsigned int line_width = args["line_length"].AsInteger();
537  Uint8 num_seqs(0), length(0), max_seq_length(0), av_seq_length(0);
538  Uint8 ref_num_seqs(0), ref_length(0);
539  string not_csra_dbs = kEmptyStr;
540  string csra_dbs = s_GetCSRADBs(m_allDbs, not_csra_dbs);
541  CStopWatch sw;
542  sw.Start();
543  CVDBBlastUtil::GetVDBStats(m_allDbs, num_seqs, length, max_seq_length, av_seq_length);
544  if(csra_dbs != kEmptyStr) {
545  CVDBBlastUtil::GetVDBStats(csra_dbs, ref_num_seqs, ref_length, true);
546  }
547  sw.Stop();
548 
549  // Print basic database information
550  out << "Database(s): ";
551  if(m_origDbs.size() > line_width) {
552  out << endl;
553  s_PrintStr(m_origDbs, line_width, out);
554  }
555  else {
556  out << m_origDbs << endl;
557  }
558 
559  out << "Database(s) Full Path: ";
560  if(m_allDbs.size() > line_width) {
561  out << endl;
562  s_PrintStr(m_allDbs, line_width, out);
563  }
564  else {
565  out << m_allDbs << endl;
566  }
567  out << "\t" << NStr::ULongToString(num_seqs, kFlags) << " sequences; ";
568  out << NStr::ULongToString(length, kFlags) << " total " << kLetters << " (unclipped)" << endl;
569  out << "\tLongest sequence: " << NStr::ULongToString(max_seq_length, kFlags) << " " << kLetters << endl;
570  out << "\tAverage sequence: " << NStr::ULongToString(av_seq_length, kFlags) << " " << kLetters << endl;
571 
572  if(csra_dbs != kEmptyStr) {
573  if(not_csra_dbs != kEmptyStr) {
574  out << "CSRA Database(s): ";
575  if(csra_dbs.size() > line_width) {
576  out << endl;
577  s_PrintStr(csra_dbs, line_width, out);
578  }
579  else {
580  out << csra_dbs << endl;
581  }
582  }
583  out << "\t" << NStr::ULongToString(ref_num_seqs, kFlags) << " ref sequences; ";
584  out << NStr::ULongToString(ref_length, kFlags) << " total ref " << kLetters << endl;
585  }
586 
587  LOG_POST(Info << "PERF: Get all BLASTDB metadata: " << x_FormatRuntime(sw));
588  return 0;
589 }
590 
591 int
593 {
594  const CArgs& args = GetArgs();
595  CNcbiOstream& out = args["out"].AsOutputFile();
596  vector<string> vdbs;
597  vector<string> vdb_alias;
598  vector<string> db_alias;
599 
600  CStopWatch sw;
601  sw.Start();
602  CVDBAliasUtil::FindVDBPaths(m_origDbs, false, vdbs, &db_alias, &vdb_alias, recursive, true);
603  sw.Stop();
604 
605  // Print basic database information
606  out << "VDB(s): ";
607  if(vdbs.empty()) {
608  out << "None" << endl;
609  }
610  else {
611  out << endl;
612  ITERATE(vector<string>, itr, vdbs)
613  out << *itr << endl;
614  }
615 
616  if(recursive) {
617  out << "VDB Alias File(s): ";
618  if(vdb_alias.empty()) {
619  out << "None" << endl;
620  }
621  else {
622  out << endl;
623  ITERATE(vector<string>, itr, vdb_alias)
624  out << *itr << endl;
625  }
626 
627  out << "Blats DB Alias File(s): ";
628  if(db_alias.empty()) {
629  out << "None" << endl;
630  }
631  else {
632  out << endl;
633  ITERATE(vector<string>, itr, db_alias)
634  out << *itr << endl;
635  }
636  }
637  LOG_POST(Info << "PERF: Get Paths : " << x_FormatRuntime(sw));
638  return 0;
639 }
640 
642 {
644 
645  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
646 
647  // Specify USAGE context
648  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), "BLAST-VDB Cmd");
649 
650  // SRA-related parameters
651  arg_desc->SetCurrentGroup("VDB 'BLASTDB' options");
652  arg_desc->AddKey("db", "VDB_ACCESSIONS",
653  "List of whitespace-separated VDB accessions",
655  arg_desc->AddKey("dbs_file", "Input_File_with_VDB_ACCESSIONS",
656  "File with a newline delimited list of VDB Run accessions",
658  arg_desc->SetDependency("db", CArgDescriptions::eExcludes, "dbs_file");
659  // The format specifiers below should be handled in
660  // CSeqFormatter::x_Builder
661  arg_desc->AddDefaultKey("outfmt", "format",
662  "Output format, where the available format specifiers are:\n"
663  "\t\t%f means sequence in FASTA format\n"
664  "\t\t%s means sequence data (without defline)\n"
665  "\t\t%a means accession\n"
666  "\t\t%o means ordinal id (OID)\n"
667  "\t\t%i means sequence id\n"
668  "\t\t%t means sequence title\n"
669  "\t\t%l means sequence length\n"
670  "\tFor every format except '%f', each line of output will "
671  "correspond\n\tto a sequence.\n",
673 
674  arg_desc->SetCurrentGroup("Retrieval options");
675  arg_desc->AddOptionalKey("entry", "sequence_identifier",
676  "Comma-delimited search string(s) of sequence identifiers"
677  ":\n\te.g.: 'gnl|SRR|SRR066117.18823.2', or 'all' "
678  "to select all\n\tsequences in the database",
680  arg_desc->AddOptionalKey("entry_batch", "input_file",
681  "Input file for batch processing (Format: one entry per line)",
683  arg_desc->SetDependency("entry_batch", CArgDescriptions::eExcludes, "entry");
684  arg_desc->AddDefaultKey("line_length", "number", "Line length for output",
686  NStr::IntToString(80));
687  arg_desc->SetConstraint("line_length",
689 
690  arg_desc->AddFlag("include_filtered_reads", "Include Filtered reads", true);
691  arg_desc->SetDependency("include_filtered_reads", CArgDescriptions::eExcludes, "info");
692 
693  const char* exclusions[] = { "entry", "entry_batch"};
694  for (size_t i = 0; i < sizeof(exclusions)/sizeof(*exclusions); i++) {
695  arg_desc->SetDependency(exclusions[i], CArgDescriptions::eExcludes, "info");
696  }
697 
698  arg_desc->AddFlag("info", "Print VDB information", true);
699  arg_desc->AddFlag("ref",
700  "Dump reference seqs", true);
701  arg_desc->SetDependency("ref", CArgDescriptions::eExcludes, "info");
702  arg_desc->SetDependency("ref", CArgDescriptions::eExcludes, "entry_batch");
703  arg_desc->SetDependency("ref", CArgDescriptions::eExcludes, "include_filtered_reads");
704 
705  arg_desc->AddFlag("paths", "Get top level paths", true);
706  arg_desc->AddFlag("paths_all", "Get all vdb and alias paths", true);
707  const char* exclude_paths[] = { "scan_uncompressed", "scan_compressed", "info", "entry", "entry_batch", "include_filtered_reads"};
708  for (size_t i = 0; i < sizeof(exclude_paths)/sizeof(*exclude_paths); i++) {
709  arg_desc->SetDependency("paths", CArgDescriptions::eExcludes, exclude_paths[i]);
710  arg_desc->SetDependency("paths_all", CArgDescriptions::eExcludes, exclude_paths[i]);
711  }
712  arg_desc->SetCurrentGroup("Output configuration options");
713  arg_desc->AddDefaultKey("out", "output_file", "Output file name",
715 
716  SetupArgDescriptions(arg_desc.release());
717 }
718 
720 {
721  int status = 0;
722  const CArgs& args = GetArgs();
723 
725  SetDiagPostPrefix("blast_vdb_cmd");
727  try {
728  if(args["paths"].HasValue()) {
729  status = x_PrintVDBPaths(false);
730  return status;
731  }
732  if(args["paths_all"].HasValue()) {
733  status = x_PrintVDBPaths(true);
734  return status;
735  }
736 
737  x_GetFullPaths();
738  if (args["info"]) {
740  }
741  else if (args["entry"].HasValue() || args["entry_batch"].HasValue()) {
742  status = x_ProcessSearchRequest();
743  }
744  } catch (const CException& e) {
745  LOG_POST(Error << "VDB Blast error: " << e.GetMsg()); \
746  status = 1;
747  } catch (const exception& e) {
748  status = 1;
749  } catch (...) {
750  LOG_POST(Error << "Unknown exception!");
751  status = 1;
752  }
753  x_AddCmdOptions();
755  return status;
756 }
757 
759 {
760  const CArgs & args = GetArgs();
761  if (args["info"]) {
763  }
764  else if(args["entry"].HasValue() || args["entry_batch"].HasValue()) {
766  if (args["entry"].HasValue() && args["entry"].AsString() == "all") {
768  }
769  }
770  if(args["outfmt"].HasValue()) {
771  m_UsageReport.AddParam(CBlastUsageReport::eOutputFmt, args["outfmt"].AsString());
772  }
773 
774  if (m_origDbs != kEmptyStr) {
776  }
777 }
778 
779 
780 #ifndef SKIP_DOXYGEN_PROCESSING
781 int main(int argc, const char* argv[] /*, const char* envp[]*/)
782 {
783  return CBlastVdbCmdApp().AppMain(argc, argv, 0, eDS_Default, "");
784 }
785 #endif /* SKIP_DOXYGEN_PROCESSING */
Interface for converting sources of sequence data into blast sequence input.
Auxiliary classes/functions for BLAST input library.
#define BLAST_SEQSRC_ERROR
Error while retrieving sequence.
Definition: blast_seqsrc.h:291
Int4 BlastSeqSrcIteratorNext(const BlastSeqSrc *seq_src, BlastSeqSrcIterator *itr)
Increments the BlastSeqSrcIterator.
Definition: blast_seqsrc.c:425
BlastSeqSrcIterator * BlastSeqSrcIteratorNewEx(unsigned int chunk_sz)
Allocate and initialize an iterator over a BlastSeqSrc.
Definition: blast_seqsrc.c:387
#define BLAST_SEQSRC_EOF
No more sequences available.
Definition: blast_seqsrc.h:292
BLAST usage report api.
void s_PrintStr(const string &str, unsigned int line_width, CNcbiOstream &out)
string s_GetCSRADBs(const string &db_list, string &not_csra_list)
USING_SCOPE(blast)
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
static const NStr::TNumToStringFlags kFlags
Class to constrain the values of an argument to those greater than or equal to the value specified in...
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
const CSeq_id * GetFirstId() const
Definition: Bioseq.cpp:271
Defines BLAST error codes (user errors included)
void AddParam(EUsageParams p, int val)
The application class.
CStopWatch m_StopWatch
string x_FormatRuntime(const CStopWatch &sw) const
int x_PrintVDBPaths(bool recursive)
Print vdb paths.
void x_InitApplicationData()
Initializes the application's data members.
virtual void Init()
@inheritDoc
CBlastUsageReport m_UsageReport
CBlastVdbCmdApp()
@inheritDoc
int x_ProcessSearchRequest()
Processes all requests except printing the BLAST database information.
CRef< CVDBBlastUtil > x_GetVDBBlastUtil(bool isCSRA)
Get vdb util.
int x_PrintBlastDatabaseInformation()
Prints the BLAST database information (e.g.
vector< string > x_GetQueries()
Retrieve the queries from the command line arguments.
void x_GetFullPaths()
Resolve vdb paths.
virtual int Run()
@inheritDoc
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
Definition: version.hpp:53
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
Definition: sequence.hpp:770
Defines user input exceptions.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CStopWatch –.
Definition: ncbitime.hpp:1938
static void FindVDBPaths(const string &dbname, bool isProtein, vector< string > &paths, vector< string > *db_alias_list=NULL, vector< string > *vdb_alias_list=NULL, bool recursive=true, bool expand_links=true, bool verify_dbs=true)
Get the list of vdb names.
Definition: vdbalias.cpp:620
CVDBBlastUtil.
BlastSeqSrc * GetSRASeqSrc()
Return the stored SRA BlastSeqSrc object.
Uint4 GetOIDFromVDBSeqId(CRef< objects::CSeq_id > seqId)
Get the ordinal number (OID) for the given SRA sequence.
static void GetVDBStats(const string &strAllRuns, Uint8 &num_seqs, Uint8 &length, bool getRefStats=false)
Fucntion to get around the OID (blastseqsrc) limit So num of seqs > int4 can be returned.
CRef< objects::CBioseq > CreateBioseqFromVDBSeqId(CRef< objects::CSeq_id > seqId)
Construct a Bioseq object for the given SRA sequence.
static bool IsCSRA(const string &db_name)
CRef< objects::CBioseq > CreateBioseqFromOid(Uint8 oid)
static IDType VDBIdType(const CSeq_id &id)
Class to extract FASTA (as returned by the blast_sra library) from SRA data.
CRef< CVDBBlastUtil > m_VdbBlastDB
void Write(CRef< CSeq_id > seqid)
const string m_FmtSpec
vector< char > m_ReplTypes
Vector of convertor objects.
CNcbiOstream & m_Out
CVdbFastaExtractor(CRef< CVDBBlastUtil > sraobj, CNcbiOstream &out, const string &fmt_spec, TSeqPos line_width=80)
vector< string > m_Seperators
Vector of offsets where the replacements will take place.
void Print(const CCompactSAMApplication::AlignInfo &ai)
static const char si[8][64]
Definition: des.c:146
std::ofstream out("events_result.xml")
main entry point for tests
#define false
Definition: bool.h:36
static const char * str(char *buf, int n)
Definition: stats.c:84
static char tmp[3200]
Definition: utf8.c:42
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
Definition: ncbiapp.cpp:1174
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
Definition: ncbiapp.cpp:1312
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
CVersionInfo GetVersion(void) const
Get the program version information.
Definition: ncbiapp.cpp:1184
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
@ eExcludes
One argument excludes another.
Definition: ncbiargs.hpp:957
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
#define NULL
Definition: ncbistd.hpp:225
void SetDiagPostPrefix(const char *prefix)
Specify a string to prefix all subsequent error postings with.
Definition: ncbidiag.cpp:6097
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
Definition: ncbidiag.hpp:1790
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
const TPrim & Get(void) const
Definition: serialbase.hpp:347
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
Definition: sequence.cpp:2727
void SetWidth(TSeqPos width)
Definition: sequence.cpp:3456
void SetAllFlags(TFlags flags)
Definition: sequence.hpp:858
@ fNoExpensiveOps
don't try too hard to find titles
Definition: sequence.hpp:780
@ fKeepGTSigns
don't convert '>' to '_' in title
Definition: sequence.hpp:777
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
CNcbiIstream & NcbiGetline(CNcbiIstream &is, string &str, char delim, string::size_type *count=NULL)
Read from "is" to "str" up to the delimiter symbol "delim" (or EOF)
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
int TNumToStringFlags
Bitwise OR of "ENumToStringFlags".
Definition: ncbistr.hpp:266
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static string ULongToString(unsigned long value, TNumToStringFlags flags=0, int base=10)
Convert unsigned long to string.
Definition: ncbistr.hpp:5150
static void Wrap(const string &str, SIZE_TYPE width, IWrapDest &dest, TWrapFlags flags, const string *prefix, const string *prefix1)
Definition: ncbistr.cpp:5347
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
@ fWithCommas
Use commas as thousands separator.
Definition: ncbistr.hpp:254
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2776
void Stop(void)
Suspend the timer.
Definition: ncbitime.hpp:2793
string AsSmartString(CTimeSpan::ESmartStringPrecision precision, ERound rounding, CTimeSpan::ESmartStringZeroMode zero_mode=CTimeSpan::eSSZ_SkipZero) const
Transform elapsed time to "smart" string.
Definition: ncbitime.hpp:2851
void Start(void)
Start the timer.
Definition: ncbitime.hpp:2765
#define CVersion
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
Definition: Seq_inst_.hpp:805
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
const TIupacna & GetIupacna(void) const
Get the variant data.
Definition: Seq_data_.hpp:510
const TTitle & GetTitle(void) const
Get the variant data.
Definition: Seqdesc_.hpp:1032
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:324
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
Definition: Bioseq_.hpp:303
bool IsSet(void) const
Check if a value has been assigned to data member.
Definition: Seq_descr_.hpp:154
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Bioseq_.hpp:315
bool IsIupacna(void) const
Check if variant Iupacna is selected.
Definition: Seq_data_.hpp:504
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
static CStopWatch sw
exit(2)
static int input()
int i
static int version
Definition: mdb_load.c:29
EIPRangeType t
Definition: ncbi_localip.c:101
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
std::istream & in(std::istream &in_, double &x_)
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...
Definition: blast_seqsrc.h:257
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Definition: blast_seqsrc.h:259
Complete type definition of Blast Sequence Source Iterator.
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
#define _ASSERT
Defines database alias file access classes.
Modified on Wed May 01 14:20:20 2024 by modify_doxy.py rev. 669887