138 : m_VdbBlastDB(sraobj), m_Out(
out), m_FmtSpec(fmt_spec), m_LineWidth(line_width), m_FastaOnly(
false), m_LoadSeq(
false)
162 "Invalid format specification");
180 if (bioseq.
Empty()) {
189 fasta.
Write(*bioseq);
200 ERR_POST(
"Bioseq constains no sequence data");
221 m_Out <<
id->GetSeqIdString(
true);
227 m_Out <<
id->AsFastaString() ;
252 if(
si.IsSetLength()){
260 os <<
"Unrecognized format specification: '%" <<
m_ReplTypes[
i] <<
"'";
278 ERR_POST(
"Iterator returns BLAST_SEQSRC_ERROR");
282 if (bioseq.
Empty()) {
286 fasta.
Write(*bioseq);
292 ERR_POST(
"Iterator returns BLAST_SEQSRC_ERROR");
296 if (bioseq.
Empty()) {
311 for(
unsigned int i=0;
i < dbs.size();
i++) {
313 csra_list += dbs[
i] +
" ";
316 not_csra_list += dbs[
i] +
" ";
326 vector<string> retval;
330 static const string kDelim(
",");
331 const string& entry = args[
"entry"].AsString();
333 if (entry.find(kDelim[0]) != string::npos) {
334 vector<string> tokens;
338 retval.push_back(entry);
341 }
else if (args[
"entry_batch"].
HasValue()) {
348 if ( !line.empty() ) {
349 retval.push_back(line);
354 "Must specify query type: one of 'entry', or 'entry_batch'");
357 if (retval.empty()) {
359 "Entry not found in BLAST database");
371 bool errors_found =
false;
374 if (args[
"entry"].
HasValue() && args[
"entry"].AsString() ==
"all") {
386 return errors_found ? 1 : 0;
398 seq_fmt_csra =
new CVdbFastaExtractor(util_csra,
out, args[
"outfmt"].AsString(), args[
"line_length"].AsInteger());
407 *itr =
"SRA:" + *itr;
413 seq_fmt.
Write(seq_id);
418 if(seq_fmt_csra ==
NULL) {
421 seq_fmt_csra->
Write(seq_id);
433 ERR_POST(
"Failed to retrieve requested item");
438 if(seq_fmt_csra !=
NULL) {
441 return errors_found ? 1 : 0;
456 strAllRuns = args[
"db"].AsString();
465 strAllRuns += line +
" ";
478 if (args[
"include_filtered_reads"]) {
487 vector<string> vdb_alias;
488 vector<string> db_alias;
523 list<string> print_str;
525 ITERATE(list<string>, itr, print_str) {
533 const string kLetters(
"bases");
536 unsigned int line_width = args[
"line_length"].AsInteger();
537 Uint8 num_seqs(0), length(0), max_seq_length(0), av_seq_length(0);
538 Uint8 ref_num_seqs(0), ref_length(0);
550 out <<
"Database(s): ";
559 out <<
"Database(s) Full Path: ";
574 out <<
"CSRA Database(s): ";
575 if(csra_dbs.size() > line_width) {
580 out << csra_dbs << endl;
597 vector<string> vdb_alias;
598 vector<string> db_alias;
608 out <<
"None" << endl;
612 ITERATE(vector<string>, itr, vdbs)
617 out <<
"VDB Alias File(s): ";
618 if(vdb_alias.empty()) {
619 out <<
"None" << endl;
623 ITERATE(vector<string>, itr, vdb_alias)
627 out <<
"Blats DB Alias File(s): ";
628 if(db_alias.empty()) {
629 out <<
"None" << endl;
633 ITERATE(vector<string>, itr, db_alias)
648 arg_desc->SetUsageContext(
GetArguments().GetProgramBasename(),
"BLAST-VDB Cmd");
651 arg_desc->SetCurrentGroup(
"VDB 'BLASTDB' options");
652 arg_desc->AddKey(
"db",
"VDB_ACCESSIONS",
653 "List of whitespace-separated VDB accessions",
655 arg_desc->AddKey(
"dbs_file",
"Input_File_with_VDB_ACCESSIONS",
656 "File with a newline delimited list of VDB Run accessions",
661 arg_desc->AddDefaultKey(
"outfmt",
"format",
662 "Output format, where the available format specifiers are:\n"
663 "\t\t%f means sequence in FASTA format\n"
664 "\t\t%s means sequence data (without defline)\n"
665 "\t\t%a means accession\n"
666 "\t\t%o means ordinal id (OID)\n"
667 "\t\t%i means sequence id\n"
668 "\t\t%t means sequence title\n"
669 "\t\t%l means sequence length\n"
670 "\tFor every format except '%f', each line of output will "
671 "correspond\n\tto a sequence.\n",
674 arg_desc->SetCurrentGroup(
"Retrieval options");
675 arg_desc->AddOptionalKey(
"entry",
"sequence_identifier",
676 "Comma-delimited search string(s) of sequence identifiers"
677 ":\n\te.g.: 'gnl|SRR|SRR066117.18823.2', or 'all' "
678 "to select all\n\tsequences in the database",
680 arg_desc->AddOptionalKey(
"entry_batch",
"input_file",
681 "Input file for batch processing (Format: one entry per line)",
684 arg_desc->AddDefaultKey(
"line_length",
"number",
"Line length for output",
687 arg_desc->SetConstraint(
"line_length",
690 arg_desc->AddFlag(
"include_filtered_reads",
"Include Filtered reads",
true);
693 const char* exclusions[] = {
"entry",
"entry_batch"};
694 for (
size_t i = 0;
i <
sizeof(exclusions)/
sizeof(*exclusions);
i++) {
698 arg_desc->AddFlag(
"info",
"Print VDB information",
true);
699 arg_desc->AddFlag(
"ref",
700 "Dump reference seqs",
true);
705 arg_desc->AddFlag(
"paths",
"Get top level paths",
true);
706 arg_desc->AddFlag(
"paths_all",
"Get all vdb and alias paths",
true);
707 const char* exclude_paths[] = {
"scan_uncompressed",
"scan_compressed",
"info",
"entry",
"entry_batch",
"include_filtered_reads"};
708 for (
size_t i = 0;
i <
sizeof(exclude_paths)/
sizeof(*exclude_paths);
i++) {
712 arg_desc->SetCurrentGroup(
"Output configuration options");
713 arg_desc->AddDefaultKey(
"out",
"output_file",
"Output file name",
747 }
catch (
const exception& e) {
766 if (args[
"entry"].
HasValue() && args[
"entry"].AsString() ==
"all") {
780 #ifndef SKIP_DOXYGEN_PROCESSING
781 int main(
int argc,
const char* argv[] )
#define BLAST_SEQSRC_ERROR
Error while retrieving sequence.
Int4 BlastSeqSrcIteratorNext(const BlastSeqSrc *seq_src, BlastSeqSrcIterator *itr)
Increments the BlastSeqSrcIterator.
BlastSeqSrcIterator * BlastSeqSrcIteratorNewEx(unsigned int chunk_sz)
Allocate and initialize an iterator over a BlastSeqSrc.
#define BLAST_SEQSRC_EOF
No more sequences available.
void s_PrintStr(const string &str, unsigned int line_width, CNcbiOstream &out)
string s_GetCSRADBs(const string &db_list, string ¬_csra_list)
int main(int argc, const char *argv[])
static const NStr::TNumToStringFlags kFlags
Class to constrain the values of an argument to those greater than or equal to the value specified in...
const CSeq_id * GetFirstId() const
Defines BLAST error codes (user errors included)
void AddParam(EUsageParams p, int val)
string x_FormatRuntime(const CStopWatch &sw) const
int x_PrintVDBPaths(bool recursive)
Print vdb paths.
void x_InitApplicationData()
Initializes the application's data members.
virtual void Init()
@inheritDoc
CBlastUsageReport m_UsageReport
CBlastVdbCmdApp()
@inheritDoc
int x_ProcessSearchRequest()
Processes all requests except printing the BLAST database information.
CRef< CVDBBlastUtil > x_GetVDBBlastUtil(bool isCSRA)
Get vdb util.
int x_PrintBlastDatabaseInformation()
Prints the BLAST database information (e.g.
vector< string > x_GetQueries()
Retrieve the queries from the command line arguments.
void x_GetFullPaths()
Resolve vdb paths.
virtual int Run()
@inheritDoc
bool m_includeFilteredReads
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
FASTA-format output; see also ReadFasta in <objtools/readers/fasta.hpp>
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
static void FindVDBPaths(const string &dbname, bool isProtein, vector< string > &paths, vector< string > *db_alias_list=NULL, vector< string > *vdb_alias_list=NULL, bool recursive=true, bool expand_links=true, bool verify_dbs=true)
Get the list of vdb names.
BlastSeqSrc * GetSRASeqSrc()
Return the stored SRA BlastSeqSrc object.
Uint4 GetOIDFromVDBSeqId(CRef< objects::CSeq_id > seqId)
Get the ordinal number (OID) for the given SRA sequence.
static void GetVDBStats(const string &strAllRuns, Uint8 &num_seqs, Uint8 &length, bool getRefStats=false)
Fucntion to get around the OID (blastseqsrc) limit So num of seqs > int4 can be returned.
CRef< objects::CBioseq > CreateBioseqFromVDBSeqId(CRef< objects::CSeq_id > seqId)
Construct a Bioseq object for the given SRA sequence.
static bool IsCSRA(const string &db_name)
CRef< objects::CBioseq > CreateBioseqFromOid(Uint8 oid)
static IDType VDBIdType(const CSeq_id &id)
void Print(const CCompactSAMApplication::AlignInfo &ai)
static const char si[8][64]
std::ofstream out("events_result.xml")
main entry point for tests
static const char * str(char *buf, int n)
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
CVersionInfo GetVersion(void) const
Get the program version information.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
@ eExcludes
One argument excludes another.
@ eInputFile
Name of file (must exist and be readable)
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
@ eInteger
Convertible into an integer number (int or Int8)
void SetDiagPostPrefix(const char *prefix)
Specify a string to prefix all subsequent error postings with.
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
@ eDiag_Warning
Warning message.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
void Info(CExceptionArgs_Base &args)
const TPrim & Get(void) const
const string AsFastaString(void) const
virtual void Write(const CSeq_entry_Handle &handle, const CSeq_loc *location=0)
Unspecified locations designate complete sequences; non-empty custom titles override the usual title ...
void SetWidth(TSeqPos width)
void SetAllFlags(TFlags flags)
@ fNoExpensiveOps
don't try too hard to find titles
@ fKeepGTSigns
don't convert '>' to '_' in title
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint64_t Uint8
8-byte (64-bit) unsigned integer
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
CNcbiIstream & NcbiGetline(CNcbiIstream &is, string &str, char delim, string::size_type *count=NULL)
Read from "is" to "str" up to the delimiter symbol "delim" (or EOF)
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
NCBI_NS_STD::string::size_type SIZE_TYPE
int TNumToStringFlags
Bitwise OR of "ENumToStringFlags".
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
static string ULongToString(unsigned long value, TNumToStringFlags flags=0, int base=10)
Convert unsigned long to string.
static void Wrap(const string &str, SIZE_TYPE width, IWrapDest &dest, TWrapFlags flags, const string *prefix, const string *prefix1)
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
@ fWithCommas
Use commas as thousands separator.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
void Stop(void)
Suspend the timer.
string AsSmartString(CTimeSpan::ESmartStringPrecision precision, ERound rounding, CTimeSpan::ESmartStringZeroMode zero_mode=CTimeSpan::eSSZ_SkipZero) const
Transform elapsed time to "smart" string.
void Start(void)
Start the timer.
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
const TInst & GetInst(void) const
Get the Inst member data.
const TIupacna & GetIupacna(void) const
Get the variant data.
const TTitle & GetTitle(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
bool IsIupacna(void) const
Check if variant Iupacna is selected.
unsigned int
A callback function used to compare two keys in a database.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
std::istream & in(std::istream &in_, double &x_)
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Complete type definition of Blast Sequence Source Iterator.
Complete type definition of Blast Sequence Source ADT.
Defines database alias file access classes.