58 #ifndef SKIP_DOXYGEN_PROCESSING
69 bool have_seqid =
false;
77 m_LogFile <<
"Seq-id " << seqid->AsFastaString()
78 <<
" contains id " <<
id->AsFastaString() << endl;
83 m_LogFile <<
"WARNING: multiple GIs discovered; gi[0] = "
87 m_LogFile <<
"Seq-id " << seqid->AsFastaString()
89 <<
id->GetGi() << endl;
93 }
else if ((! have_seqid) && (id->
Which() == seqid->Which())) {
94 m_LogFile <<
"Remote: Resolving <" << seqid->AsFastaString()
95 <<
"> to <" <<
id->AsFastaString() <<
">" << endl;
101 <<
"Warning: Resolution still does not provide version."
120 ITERATE(vector<string>,
id, ids) {
135 bool specific =
false;
150 m_LogFile <<
"Did not recognize id: \"" << *
id <<
"\"" << endl;
158 m_LogFile <<
"Found numerical GI:" << gi << endl;
195 bool found(
false),
done(
false);
197 ITERATE(vector<int>, oid, oids) {
204 size_t pos =
S.find(acc);
206 if (pos != string::npos) {
207 size_t endpos = pos + acc.size();
209 bool start_okay = (pos == 0 ||
S[pos-1] ==
'|');
210 bool end_okay = ((endpos ==
S.size()) ||
214 if (start_okay && end_okay) {
218 if (
done || (! found)) {
283 m_LogFile <<
"Duplication from source DB; duplicated "
284 <<
count <<
" sequences in " <<
t <<
" seconds." << endl;
292 if ((! bs->CanGetInst()) || bs->GetInst().CanGetSeq_data() ||
293 ! bs->GetInst().CanGetExt() || ! bs->GetInst().GetExt().IsDelta()) {
297 if (bs->GetInst().CanGetMol() &&
301 oss <<
id->AsFastaString() <<
": Protein delta sequences are not supported.";
307 const CDelta_ext & dext = bs->GetInst().GetExt().GetDelta();
312 typedef list< CRef< CDelta_seq > >
TItems;
319 if (bs->GetInst().CanGetLength()) {
320 seq8na.reserve(bs->GetInst().GetLength());
326 if(((**item).IsLoc()) && ((**item).GetLoc().IsNull())) {
327 seq8na.append(1, 0x0f);
339 "Part of the delta sequence, including its length, is un-available.");
361 "Unhandled type of sequence data encountered.");
371 int length = seq8na.size();
386 bs2->
SetId() = bs->GetId();
389 if (bs->IsSetDescr()) {
395 inst->
SetSeq_data().SetNcbi4na().Set().swap(seq4na);
402 if (bs->IsSetAnnot()) {
410 "Bioseq must have Seq-data or "
411 "Delta containing only literals.");
437 headers->RemoveGIs();
444 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
445 (!defined(NCBI_COMPILER_MIPSPRO)) )
461 gis.push_back((*id)->GetGi());
470 objects::CSeqVector * sv,
484 if(bs->GetInst().CanGetSeq_data())
499 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
500 (!defined(NCBI_COMPILER_MIPSPRO)) )
501 const list< CRef<CSeq_id> > & ids = bs->GetId();
523 m_LogFile <<
"Caught exception for query: "
524 << seqid.AsFastaString() << endl
543 m_LogFile <<
"Could not find entry for: "
544 << seqid.AsFastaString() << endl;
551 m_LogFile <<
"-- REMOTE: Found sequence "
552 << seqid.AsFastaString() << endl;
560 bool found_all =
true;
565 for(
i = 0;
i < num_gis;
i++) {
574 m_LogFile <<
" not found locally; adding remotely." << endl;
585 m_LogFile <<
" found locally; not adding remotely." << endl;
591 for(
i = 0;
i < num_seqids;
i++) {
601 m_LogFile <<
" not found locally; adding remotely." << endl;
605 string acc = gi_list.
GetKey<
string>(
i);
611 m_LogFile <<
" found locally; not adding remotely." << endl;
618 m_LogFile <<
"Adding sequences from remote source; added "
619 <<
count <<
" sequences in " <<
t <<
" seconds." << endl;
635 for(
i = 0;
i < num_gis;
i++) {
642 <<
" was not resolvable." << endl;
649 <<
" found locally." << endl;
655 for(
i = 0;
i < num_seqids;
i++) {
663 <<
" was not resolvable." << endl;
671 <<
" found locally." << endl;
676 m_LogFile <<
"Could not resolve " << unresolved <<
" IDs." << endl;
705 : m_FastaReader(
NULL)
732 TFlags
flags = (TFlags) iflags;
743 if (!
value.empty()) {
745 catch (
const exception&) {}
763 static const string kKeyword(
"m_Pos = ");
766 string pos(
"unknown");
768 start += kKeyword.size();
769 pos =
string(e.what()).substr(start, end-start);
771 string msg = e.GetMsg();
772 const string extra_string(
"CFastaReader: ");
774 msg.erase(0, extra_string.size());
807 string bioseq_id(
"Unknown");
810 const list< CRef<CSeq_id> > & ids = bs->
GetId();
815 bool skip_this =
false;
816 for(list<
CRef<CSeq_id> >::const_iterator it = ids.begin(); it != ids.end(); it++ ){
817 if( it->NotEmpty() ){
819 if(
info == CSeq_id::EAccessionInfo::eAcc_gi ){
820 check_gi = (*it)->GetGi();
821 if( check_gi > max_gi32_val )
829 m_LogFile <<
"Ignoring gi '" << check_gi <<
"' as it has value larger then " << 0xFFFFFFFF<< endl;
836 if (! ids.empty() && ids.front().NotEmpty()) {
837 bioseq_id.assign(ids.front()->AsFastaString());
846 for (
auto& it: bss->
SetId()) {
848 if (!it->IsLocal() && !it->IsGi() &&
852 string label = it->GetSeqIdString(
true);
868 "Sequences longer than 2,147,483,647 bases are not supported. Offending sequence is " + bioseq_id);
872 m_LogFile <<
"Ignoring sequence '" << bioseq_id
873 <<
"' as it has no sequence data" << endl;
879 m_LogFile <<
"Adding bioseq from fasta; first id is: '" << bioseq_id
897 m_LogFile <<
"Adding sequences from FASTA; added "
898 <<
count <<
" sequences in " <<
t <<
" seconds." << endl;
911 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
912 (!defined(NCBI_COMPILER_MIPSPRO)) )
915 vector<string> all_names;
921 for(
int i = 0;
i < (
int) all_names.size();
i++) {
922 string name = all_names[
i];
926 if (name ==
"BlastDb/MaskData") {
939 ITERATE(StringPairMap, iter, meta) {
943 in2out[in_id] = out_id;
948 vector<CTempString> column_blobs;
949 vector<int> column_ids;
967 _ASSERT(column_blobs.size() == column_ids.size());
969 if (sequence.
empty()) {
971 "Error in raw data: no sequence");
976 "Error in raw data: "
977 "protein db cannot with ambiguities");
980 if (deflines.
Empty()) {
982 "Error in raw data: no headers provided");
991 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
992 (!defined(NCBI_COMPILER_MIPSPRO)) )
993 for(
int i = 0;
i < (
int)column_ids.size();
i++) {
994 int in_id = column_ids[
i];
995 if (in_id == mask_id)
continue;
997 if (column_blobs[
i].
size() == 0)
1002 int out_id = in2out[in_id];
1014 const list< CRef<CSeq_id> > & ids = (*defline)->GetSeqid();
1016 if ((*id)->IsGi()) {
1017 gis.push_back((*id)->GetGi());
1023 mask_data.insert(mask_data.end(), rng.begin(), rng.end());
1029 if (!mask_data.
empty()) {
1044 m_LogFile <<
"Adding sequences from raw db source; added "
1045 <<
count <<
" sequences in " <<
t <<
" seconds." << endl;
1055 if (dir_name.empty()) {
1062 string msg(
"Failed to create directory '" + d.
GetName() +
"'");
1067 string msg(
"You do not have write permissions on '" +
1074 const string & title,
1083 bool scan_bioseq_4_cfastareader_usrobj)
1084 : m_IsProtein (is_protein),
1085 m_KeepLinks (
false),
1086 m_KeepMbits (
false),
1087 m_KeepLeafs (
false),
1089 m_LogFile (*logfile),
1095 m_LongIDs (long_seqids),
1096 m_FoundMatchingMasks(
false),
1097 m_SkipCopyingGis(
false),
1098 m_SkipLargeGis(
true),
1100 m_ScanBioseq4CFastaReaderUsrObjct(scan_bioseq_4_cfastareader_usrobj)
1104 m_LogFile <<
"\n\nBuilding a new DB, current time: "
1107 m_LogFile <<
"New DB name: " << output_dbname << endl;
1108 m_LogFile <<
"New DB title: " << title << endl;
1109 const string mol_type(is_protein ?
"Protein" :
"Nucleotide");
1110 m_LogFile <<
"Sequence type: " << mol_type << endl;
1112 m_LogFile <<
"Deleted existing " << mol_type
1113 <<
" BLAST database named " << output_dbname << endl;
1137 const string & title,
1147 bool scan_bioseq_4_cfastareader_usrobj)
1148 : m_IsProtein (is_protein),
1149 m_KeepLinks (
false),
1150 m_KeepMbits (
false),
1151 m_KeepLeafs (
false),
1153 m_LogFile (*logfile),
1158 m_ParseIDs (parse_seqids),
1159 m_LongIDs (long_seqids),
1160 m_FoundMatchingMasks(
false),
1161 m_SkipCopyingGis(
false),
1162 m_SkipLargeGis(
true),
1163 m_ScanBioseq4CFastaReaderUsrObjct(scan_bioseq_4_cfastareader_usrobj)
1167 m_LogFile <<
"\n\nBuilding a new DB, current time: "
1170 m_LogFile <<
"New DB name: " << output_dbname << endl;
1171 m_LogFile <<
"New DB title: " << title << endl;
1172 const string mol_type(is_protein ?
"Protein" :
"Nucleotide");
1173 m_LogFile <<
"Sequence type: " << mol_type << endl;
1175 m_LogFile <<
"Deleted existing " << mol_type
1176 <<
" BLAST database named " << output_dbname << endl;
1196 scan_bioseq_4_cfastareader_usrobj));
1207 ERR_POST(
Error <<
"No sequences matched any of the masks provided.\n"
1208 <<
"Please ensure that the -parse_seqids option is used "
1209 <<
"in the\nfiltering program as well as makeblastdb.");
1212 ERR_POST(
Error <<
"No sequences matched any of the taxids provided.");
1265 m_LogFile <<
"Keep Linkouts: " << (keep_links ?
"T" :
"F") << endl;
1273 m_LogFile <<
"Keep MBits: " << (keep_mbits ?
"T" :
"F") << endl;
1283 m_LogFile <<
"Keep Leaf Taxids: " << (keep_taxids ?
"T" :
"F") << endl;
1296 bool success =
AddIds(ids);
1304 success = success && success2;
1311 m_LogFile <<
"Total time to build database: "
1312 <<
t <<
" seconds.\n" << endl;
1324 bool success =
true;
1373 if (this_oid != -1) {
1374 if (seen_it.
find(this_oid) == seen_it.
end()) {
1375 seen_it[this_oid] = this_gi;
1378 <<
" is duplicate of GI "
1379 << seen_it[this_oid]
1401 bool success =
true;
1411 if (success ==
false)
1430 }
catch (exception& e) {
1441 bool success =
true;
1443 vector<string> vols;
1444 vector<string> files;
1451 _ASSERT(vols.empty() == files.empty());
1459 ITERATE(vector<string>, iterf, files) {
1462 _TRACE(
"Deleting " << *iterf);
1469 if (close_exception) {
1471 close_exception->
GetMsg());
1480 objects::CBlast_def_line& defline,
1483 vector<string>& keys
1490 if (!
key->empty()) {
1492 if (item != leafs.
end()) {
1494 taxids.
insert(item->second.begin(), item->second.end());
1506 defline.SetLeafTaxIds(tv);
1509 defline.SetLeafTaxIds(tv);
1520 vector<string> & keys)
1531 if (item != bitmap.
end()) {
1533 value |= item->second;
1538 list<int> & linkv = (is_memb
1539 ? defline.SetMemberships()
1540 : defline.SetLinks());
1546 if (linkv.empty()) {
1547 linkv.push_back(
value);
1549 linkv.front() |=
value;
1554 defline.ResetMemberships();
1556 defline.ResetLinks();
1565 vector<string> keys;
1585 const string & options,
1586 const string & name)
1588 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
1589 (!defined(NCBI_COMPILER_MIPSPRO)) )
1598 const string & description,
1599 const string & options)
1601 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
1602 (!defined(NCBI_COMPILER_MIPSPRO)) )
static CConstRef< CBioseq > s_FixBioseqDeltas(CConstRef< objects::CBioseq > bs)
static void s_SetDeflineLeafs(objects::CBlast_def_line &defline, TIdToLeafs &leafs, bool keep_old, vector< string > &keys)
static void s_SetDeflineBits(objects::CBlast_def_line &defline, TIdToBits &bitmap, bool keep_old, bool is_memb, vector< string > &keys)
Code to build a database given various sources of sequence data.
TSeqPos GetLength(void) const
`Blob' Class for SeqDB (and WriteDB).
void WriteRaw(const char *begin, int size)
Write raw data to the blob (moving the write pointer).
void Clear()
Clear all owned data and reference an empty string.
Implementation of ILineReader for IReader.
bool AddSequences(IBioseqSource &src, bool add_pig=false)
Add sequences from an IBioseqSource object.
bool m_IsProtein
True for a protein database, false for nucleotide.
bool AddFasta(CNcbiIstream &fasta_file)
Add sequences from a file containing FASTA data.
int m_DeflineCount
Define count.
bool m_KeepLinks
True to keep linkout bits from source dbs, false to discard.
bool m_SkipLargeGis
If set to true, skip GIs with value > 0x7FFFFFFF.
TIdToBits m_Id2Links
Table of linkout bits to apply to sequences.
bool m_FoundMatchingMasks
If true, there were sequences whose IDs matched those in the provided masking locations (via SetMaskD...
static void CreateDirectories(const string &dbname)
Create Directory for blast db.
bool x_ReportUnresolvedIds(const CInputGiList &gi_list) const
Write log messages for any unresolved IDs.
bool m_UseRemote
Whether to use remote resolution and sequence fetching.
void x_EditHeaders(CRef< objects::CBlast_def_line_set > headers)
Modify deflines with linkout and membership bits and taxids.
objects::CScope & x_GetScope()
Get a scope for remote loading of objects.
void SetMaskDataSource(IMaskDataSource &ranges)
Specify an object mapping Seq-id to subject masking data.
void SetMembBits(const TLinkoutMap &membbits, bool keep_mbits)
Specify a membership bit lookup object.
bool m_ParseIDs
If true, string IDs found in FASTA input will be parsed as Seq-ids.
int RegisterMaskingAlgorithm(EBlast_filter_program program, const string &options, const string &name="")
Define a masking algorithm.
int m_OIDCount
Number of OIDs stored in this database.
void SetSourceDb(const string &src_db_name)
Specify source database(s) via the database name(s).
bool m_ScanBioseq4CFastaReaderUsrObjct
bool m_KeepMbits
True to keep membership bits from source dbs, false to discard.
CRef< CTaxIdSet > m_Taxids
Set of TaxIDs configured to apply to sequences.
CRef< objects::CScope > m_Scope
Sequence scope, used for remote fetching.
CRef< objects::CObjectManager > m_ObjMgr
Object manager, used for remote fetching.
ostream & m_LogFile
Logfile.
bool m_Verbose
If true, more detailed log messages will be produced.
bool m_KeepLeafs
True to keep leaf taxids from source dbs, false to discard.
bool x_EndBuild(bool erase, const CException *close_exception)
bool m_SkipCopyingGis
If set to true, when copying BLASTDBs, skip the GIs.
CRef< CWriteDB > m_OutputDb
Database being produced here.
CRef< CSeqDBExpert > m_SourceDb
Database for duplicating sequences locally (-sourcedb option.)
void SetTaxids(CTaxIdSet &taxids)
Specify a mapping of sequence ids to taxonomic ids.
CRef< CInputGiList > x_ResolveGis(const vector< string > &ids)
Resolve various input IDs (as strings) to GIs.
void x_SetLinkAndMbit(CRef< objects::CBlast_def_line_set > headers)
Store linkout (now deprecated) and membership bits in provided headers.
CRef< IMaskDataSource > m_MaskData
Subject masking data.
bool x_EditAndAddBioseq(CConstRef< objects::CBioseq > bs, objects::CSeqVector *sv, bool add_pig=false)
Modify a Bioseq as needed and add it to the database.
bool m_LongIDs
If true, use long sequence ids (database|accession)
bool Build(const vector< string > &ids, CNcbiIstream *fasta_file)
Build the database.
void x_AddOneRemoteSequence(const objects::CSeq_id &seqid, bool &found, bool &error)
Fetch a sequence from the remote service and add it to the db.
void x_AddMasksForSeqId(const list< CRef< CSeq_id > > &ids)
Add the masks for the Seq-id(s) (usually just one) to the database being created.
TIdToLeafs m_Id2Leafs
Table of leaf taxids to apply to sequences.
void x_ResolveRemoteId(CRef< objects::CSeq_id > &seqid, TGi &gi)
Resolve an ID remotely.
void x_DupLocal()
Duplicate IDs from local databases.
TIdToBits m_Id2Mbits
Table of membership bits to apply to sequences.
bool x_ResolveFromSource(const string &acc, CRef< objects::CSeq_id > &id)
Determine if this string ID can be found in the source database.
void StartBuild()
Start building a new database.
bool EndBuild(bool erase=false)
Finish building a new database.
void SetLinkouts(const TLinkoutMap &linkouts, bool keep_links)
Specify a linkout bit lookup object.
void SetMaxFileSize(Uint8 max_file_size)
Set the maximum size of database component files.
void x_AddPig(CRef< objects::CBlast_def_line_set > headers)
Add pig if id can be extracted from the deflines.
bool AddIds(const vector< string > &ids)
Add the specified sequences from the source database.
void SetMaskLetters(const string &mask_letters)
Specify letters to mask out of protein sequence data.
bool x_AddRemoteSequences(CInputGiList &gi_list)
Duplicate IDs from local databases.
CBuildDatabase(const string &dbname, const string &title, bool is_protein, CWriteDB::TIndexType indexing, bool use_gi_mask, ostream *logfile, bool long_seqids=false, EBlastDbVersion dbver=eBDB_Version4, bool limit_defline=false, Uint8 oid_masks=EOidMaskType::fNone, bool scan_bioseq_4_cfastareader_usrobj=true)
Constructor.
void SetLeafTaxIds(const TIdToLeafs &taxids, bool keep_taxids)
Specify a leaf-taxids object.
CFastaBioseqSource(CNcbiIstream &fasta_file, bool is_protein, bool parse_ids, bool long_ids)
virtual CConstRef< CBioseq > GetNext()
Get a Bioseq object if there are any more to get.
CFastaReader * m_FastaReader
CRef< ILineReader > m_LineReader
Base class for reading FASTA sequences.
Thrown on an attempt to access wrong choice variant.
This represents a set of masks for a given sequence.
bool empty() const
Redefine empty to mean no elements or none of its elements being empty.
static CNcbiApplication * Instance(void)
Singleton method.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const
Raw Sequence and Ambiguity Data.
int GetNumGis() const
Get the number of GIs in the array.
const SGiOid & GetGiOid(int index) const
Access an element of the array.
int GetNumSis() const
Get the number of Seq-ids in the array.
const SSiOid & GetSiOid(int index) const
Access an element of the array.
T GetKey(int index) const
const string & GetDBNameList() const
Get list of database names.
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Gets a list of sequence identifiers.
ESeqType GetSequenceType() const
Returns the type of database opened - protein or nucleotide.
string GetTitle() const
Returns the database title.
void AccessionToOids(const string &acc, vector< int > &oids) const
Translate an Accession to a list of OIDs.
bool CheckOrFindOID(int &next_oid) const
Find an included OID, incrementing next_oid if necessary.
string GetDate() const
Returns the construction date of the database.
CRef< CBlast_def_line_set > GetHdr(int oid) const
Get the ASN.1 header for the sequence.
@Seq_descr.hpp User-defined methods of the data storage class.
void FixTaxId(CRef< objects::CBlast_def_line_set > deflines)
Check that each defline has the specified taxid; if not, replace the defline and set the taxid.
bool HasEverFixedId() const
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
int CreateUserColumn(const string &title)
Set up a user-defined CWriteDB column.
void SetMaxFileSize(Uint8 sz)
Set maximum size for output files.
@ eProtein
Protein database.
@ eNucleotide
Nucleotide database.
int RegisterMaskAlgorithm(EBlast_filter_program program, const string &options=string(), const string &name=string())
Register a type of filtering data found in this database.
void AddColumnMetaData(int col_id, const string &key, const string &value)
Add meta data to a user-defined column.
int FindColumn(const string &title) const
Find an existing column.
void ListFiles(vector< string > &files)
List Filenames.
CBlastDbBlob & SetBlobData(int column_id)
Add blob data to a user-defined column.
void SetMaskData(const CMaskedRangesVector &ranges, const vector< TGi > &gis)
Set filtering data for a sequence.
void SetPig(int pig)
Set the PIG to be used for the sequence.
void AddSequence(const CBioseq &bs)
Add a sequence as a CBioseq.
EIndexType
Whether and what kind of indices to build.
@ eDefault
Like eFullIndex but also build a numeric Trace ID index.
@ eSparseIndex
Use only simple accessions in the string index.
void SetMaskedLetters(const string &masked)
Set letters that should not be used in sequences.
void ListVolumes(vector< string > &vols)
List Volumes.
int TIndexType
Bitwise OR of "EIndexType".
static CRef< CBlast_def_line_set > ExtractBioseqDeflines(const CBioseq &bs, bool parse_ids=true, bool long_ids=false, bool scan_bioseq_4_cfastareader_usrobj=false)
Extract Deflines From Bioseq.
void SetDeflines(const CBlast_def_line_set &deflines)
Set the deflines to be used for the sequence.
void Close()
Close the Database.
Interface to a source of Bioseq objects.
virtual CConstRef< objects::CBioseq > GetNext()=0
Get a Bioseq object if there are any more to get.
@ eProblem_ModifierFoundButNoneExpected
@ eProblem_TooManyAmbiguousResidues
An interface providing lookups of mask-data by Seq-id.
virtual CMaskedRangesVector & GetRanges(const list< CRef< CSeq_id > > &id)=0
Get ranges of masking data for the given Seq-ids.
Interface to a source of raw sequence data.
virtual const map< string, string > & GetColumnMetaData(int id)=0
Get metadata for the column with the specified Column ID.
virtual bool GetNext(CTempString &sequence, CTempString &ambiguities, CRef< objects::CBlast_def_line_set > &deflines, vector< SBlastDbMaskData > &mask_ranges, vector< int > &column_ids, vector< CTempString > &column_blobs)=0
Get a raw sequence.
virtual void GetColumnNames(vector< string > &names)=0
Get the names of all columns defined by this sequence source.
virtual int GetColumnId(const string &name)=0
Get the column ID for a column mentioned by name.
container_type::iterator iterator
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator end() const
Operators to edit gaps in sequences.
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_EXCEPTION_VAR(name, exception_class, err_code, message)
Create an instance of the exception to be thrown later.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
#define NCBI_RETHROW(prev_exception, exception_class, err_code, message)
Generic macro to re-throw an exception.
static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)
Get an absolute path from some, possibly relative, path.
string GetDir(EIfEmptyPath mode=eIfEmptyPath_Current) const
Get the directory component for this directory entry.
bool CreatePath(TCreateFlags flags=fCreate_Default) const
Create the directory path recursively possibly more than one at a time.
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
bool CheckAccess(TMode access_mode) const
Check access rights.
virtual bool Exists(void) const
Check if directory "dirname" exists.
string GetName(void) const
Get the base entry name with extension (if any).
@ eIfEmptyPath_Empty
Return empty string.
@ fWrite
Write permission.
#define MSerial_AsnText
I/O stream manipulators –.
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)
Read a single effective sequence, which may turn out to be a segmented set.
EFlags
Note on fAllSeqIds: some databases (notably nr) have merged identical sequences, joining their deflin...
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
void SetMaxIDLength(Uint4 max_len)
If this is set, an exception will be thrown if a Sequence ID exceeds the given length.
void IgnoreProblem(ILineError::EProblem problem)
@ fNoParseID
Generate an ID (whole defline -> title)
@ fQuickIDCheck
Just check local IDs' first characters.
@ fRequireID
Reject deflines that lack IDs.
@ fDisableNoResidues
If no residues found do not raise an error.
@ fForceType
Force specified type regardless of accession.
@ fParseRawID
Try to identify raw accessions.
@ fAssumeNuc
Assume nucs unless accns indicate otherwise.
@ fParseGaps
Make a delta sequence if gaps found.
@ fAssumeProt
Assume prots unless accns indicate otherwise.
const string AsFastaString(void) const
EAccessionInfo
For IdentifyAccession (below)
static int BestRank(const CRef< CSeq_id > &id)
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
vector< CSeq_id_Handle > TIds
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
TBioseqStateFlags GetState(void) const
Get state of the bioseq.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
TObjectType * GetNonNullPointer(void) const
Get pointer value and throw a null pointer exception if pointer is null.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint64_t Uint8
8-byte (64-bit) unsigned integer
virtual const string & Get(const string §ion, const string &name, TFlags flags=0) const
Get the parameter value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
NCBI_NS_STD::string::size_type SIZE_TYPE
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
const char * data(void) const
Return a pointer to the array represented.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
size_type size(void) const
Return the length of the represented array.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const
Transform time to string.
@ eCurrent
Use current time. See also CCurrentTime.
@ eStart
Start timer immediately after creating.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
static const char label[]
EBlast_filter_program
This defines the possible sequence filtering algorithms to be used in a BLAST database.
bool IsSetOther_info(void) const
In proteins this stores the PIG, in nucleotides this stores the "origin GIs" (if one sequence is desc...
const Tdata & Get(void) const
Get the member data.
list< CRef< CBlast_def_line > > Tdata
const TOther_info & GetOther_info(void) const
Get the Other_info member data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsGi(void) const
Check if variant Gi is selected.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
const TSeq & GetSeq(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
TId & SetId(void)
Assign a value to Id data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TId & GetId(void) const
Get the Id member data.
bool IsNcbi4na(void) const
Check if variant Ncbi4na is selected.
TLength GetLength(void) const
Get the Length member data.
bool CanGetLength(void) const
Check if it is safe to call GetLength method.
void SetInst(TInst &value)
Assign a value to Inst data member.
bool CanGetId(void) const
Check if it is safe to call GetId method.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
const TNcbi4na & GetNcbi4na(void) const
Get the variant data.
void SetRepr(TRepr value)
Assign a value to Repr data member.
bool CanGetSeq_data(void) const
Check if it is safe to call GetSeq_data method.
const TNcbi2na & GetNcbi2na(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
void SetLength(TLength value)
Assign a value to Length data member.
bool IsGap(void) const
Check if variant Gap is selected.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
bool IsNcbi2na(void) const
Check if variant Ncbi2na is selected.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
void SetMol(TMol value)
Assign a value to Mol data member.
@ eRepr_raw
continuous sequence
@ eMol_na
just a nucleic acid
@ e_Literal
a piece of sequence
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
bool CheckAccession(const string &acc, TGi &gi, CRef< objects::CSeq_id > &seqid, bool &specific)
void GetDeflineKeys(const objects::CBlast_def_line &defline, vector< string > &keys)
Get all keys for a defline.
void MapToLMBits(const TLinkoutMap &gilist, TIdToBits &gi2links)
Read a set of GI lists, each a vector of strings, and combine the bits into the resulting linkbits ma...
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Defines unified interface to application:
CSeqDB::ESeqType ParseMoleculeTypeString(const string &str)
Convert a string to a CSeqDB ESeqType object.
bool DeleteBlastDb(const string &dbpath, CSeqDB::ESeqType seq_type)
Deletes all files associated with a BLAST database.
EBlastDbVersion
BLAST database version.
Defines `expert' version of CSeqDB interfaces.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
TGi gi
The GI or 0 if unknown.
int oid
The OID or -1 if unknown.
int oid
The OID or -1 if unknown.
static bool ambig(char c)
Defines BLAST database construction classes.