158 virtual void Init(
void);
159 virtual int Run(
void);
160 virtual void Exit(
void);
167 unsigned delta_level,
251 template <
typename T,
typename Consumer>
287 arg_desc->SetUsageContext(
GetArguments().GetProgramBasename(),
288 "Seq-id-to-ASN-cache converter");
290 arg_desc->AddDefaultKey(
"i",
"InputFile",
291 "FASTA file to process",
294 arg_desc->AddOptionalKey(
"input-manifest",
"Manifest",
295 "Manifest file listing FASTA files to process, "
299 arg_desc->AddOptionalKey(
"submit-block-template",
"Manifest",
300 "Manifest file with template",
303 arg_desc->AddDefaultKey(
"ifmt",
"InputFormat",
304 "Format of input data",
308 arg_desc->SetConstraint(
"ifmt",
317 arg_desc->AddOptionalKey(
"taxid",
"Taxid",
318 "Taxid of input FASTA sequences",
321 arg_desc->AddOptionalKey(
"taxid-table",
"TaxidTable",
322 "Table of taxids for individual sequences",
324 arg_desc->AddOptionalKey(
"taxid-table-manifest",
"TaxidTableManifest",
325 "Manifest of taxid tables",
327 arg_desc->SetDependency(
"taxid-table-manifest",
329 arg_desc->AddDefaultKey(
"taxid-column",
"TaxidColumn",
330 "column in taxid table with taxid",
335 arg_desc->AddOptionalKey(
"molinfo",
"Molinfo",
336 "Type of molecule that sequences represent",
341 molinfo_options->
Allow(it->first);
343 arg_desc->SetConstraint(
"molinfo", molinfo_options);
345 arg_desc->AddOptionalKey(
"biosource",
"Biosource",
346 "genome source of sequences",
352 "taxid-table-manifest");
356 "taxid-table-manifest");
361 biosource_options->
Allow(it->first);
363 arg_desc->SetConstraint(
"biosource", biosource_options);
365 arg_desc->AddDefaultKey(
"inst-mol",
"InstMol",
366 "Value for Seq.inst.mol",
371 inst_mol_options->
Allow(it->first);
373 arg_desc->SetConstraint(
"inst-mol", inst_mol_options);
375 arg_desc->AddOptionalKey(
"uniprot-source-table",
"UniprotSourceTable",
376 "Table of uniprot source for individual sequences",
378 arg_desc->AddOptionalKey(
"uniprot-source-table-manifest",
379 "UniprotSourceTableManifest",
380 "Manifest of uniprot source tables",
382 arg_desc->SetDependency(
"uniprot-source-table-manifest",
384 arg_desc->AddDefaultKey(
"uniprot-source-column",
"UniprotSourceColumn",
385 "column in uniprot source table with uniprot source",
388 arg_desc->AddKey(
"cache",
"OutputFile",
389 "Path to the cache directory",
392 arg_desc->AddDefaultKey(
"oseq-ids",
"OutputFile",
393 "Seq-ids that were added to the cache",
397 arg_desc->AddDefaultKey(
"seq-id-type",
"SeqIdType",
398 "If sequence has several seq-ids, which one to choose",
400 arg_desc->SetConstraint(
"seq-id-type",
405 arg_desc->AddFlag(
"no-title",
406 "For FASTA input, don't put a title on the Bioseq");
408 arg_desc->AddOptionalKey(
"max-fasta-id",
"MaxFastaIDLength",
409 "For FASTA input, maximum ID size, overriding "
410 "CSeq_id-defined limits",
413 arg_desc->AddOptionalKey(
"id-prefix",
"FASTAIdPrefix",
414 "For FASTA input with local ids, add this prefix to each id",
417 arg_desc->AddOptionalKey(
"strip-annots-and-inst-mol",
"StripAnnotsAndInstMol",
418 "Comma-separated list of molecule classes of instances - Seq.inst.mol - to strip.",
421 arg_desc->AddFlag(
"split-sequences",
422 "Split group of sequences packaged together. Applicable to asn(b)-seq-entry format.");
424 arg_desc->AddFlag(
"extract-delta",
425 "Extract and index delta-seq far-pointers");
426 arg_desc->SetDependency(
"split-sequences",
429 arg_desc->AddOptionalKey(
"delta-level",
"RecursionLevel",
430 "Number of levels to descend when retrieving "
431 "items in delta sequences",
433 arg_desc->SetDependency(
"delta-level",
436 arg_desc->AddFlag(
"resume",
"Resume interrupted previous execution");
438 arg_desc->AddFlag(
"non-exclusive",
439 "Can run this cache process in parallel with other "
440 "tasks; use this if writing to a dedicated cache rather "
441 "than the build's standard cache. Ignored by "
442 "application, but provides information to action node");
445 arg_desc->SetCurrentGroup(
"Default application arguments");
462 }
else if (entry.
IsSeq()) {
463 const objects::CBioseq& bioseq = entry.
GetSeq();
477 bool updated =
false;
479 for(
auto& orig_desc: descs) {
480 if(new_desc->
Which() != orig_desc->Which() )
continue;
481 switch ( orig_desc->Which() ) {
495 orig_desc->Assign(*new_desc);
502 descs.push_back(new_desc);
530 if (
GetArgs()[
"max-fasta-id"]) {
533 objects::CGPipeMessageListener messageListener;
534 while ( !reader.
AtEOF() ) {
537 ostr_seqids <<
"#Clean wrapup\n";
539 "trapped signal, exiting");
546 if ((*id_it)->IsLocal()) {
547 if ((*id_it)->GetLocal().IsStr()) {
548 (*id_it)->SetLocal().SetStr()
549 .insert(0,
GetArgs()[
"id-prefix"].AsString());
552 (*id_it)->GetLocal().GetId());
553 (*id_it)->SetLocal().SetStr(
554 GetArgs()[
"id-prefix"].AsString() + str_id);
576 bool molinfo_found=
false;
577 bool source_found=
false;
579 switch ( desc->Which() ) {
583 if( ! desc->GetMolinfo().IsSetCompleteness()
587 if( ! desc->GetMolinfo().IsSetBiomol()
600 desc->SetSource().SetGenome(
m_Genome );
604 ! desc->GetSource().IsSetOrg() || ! desc->GetSource().GetOrg().IsSetOrgname()
607 desc->SetSource().SetOrg().Assign(*org_data.
orgref);
617 if(!source_found && org_data.
biosource) {
629 if ((*desc)->IsTitle()) {
638 uniprot_source_comment->
SetComment(
"Uniprot Source: "
640 descs.push_back(uniprot_source_comment);
659 ostr_seqids << idh << endl;
662 if (count % 100000 == 0) {
672 void CPrimeCacheApplication::x_Process_SRA(
CNcbiIstream& istr,
692 for ( ; iter; ++iter) {
702 entry->SetSeq().SetInst().SetMol(
m_InstMol);
705 entry->SetSeq().SetDescr().Set().push_back(
m_MolInfo);
707 if (
m_Orgs.begin()->second.biosource) {
708 entry->SetSeq().SetDescr().Set().push_back(
709 m_Orgs.begin()->second.biosource);
713 entry->SetSeq().SetDescr().Set().push_back(*desc);
718 ostr_seqids <<
"#Clean wrapup\n";
720 "trapped signal, exiting");
735 ostr_seqids << idh << endl;
741 if (count % 100000 == 0) {
745 ostr_seqids <<
"#Completed run " << acc << endl;
761 if ( args[
"split-sequences"] ) {
778 while ( !is->EndOfData() ) {
781 "trapped signal, exiting");
785 is->ResetLocalHooks();
801 while ( !is->EndOfData() ) {
805 "trapped signal, exiting");
839 for (
CBioseq_CI bioseq_it(seh); bioseq_it; ++bioseq_it) {
841 if ( trimmed_bioseqs.
empty() || !trimmed_bioseqs.count(idh) ) {
842 ostr_seqids << idh <<
'\n';
850 if (count % 100000 == 0) {
851 LOG_POST(
Error <<
"Cache Seq-entry: processed " << count <<
" entries...");
855 LOG_POST(
Error <<
"Cache Seq-entry: done, cached " << count <<
" items");
864 if (line.empty() || line[0] ==
'#') {
873 unsigned delta_level,
894 "failed to retrieve sequence for id: " + idh.
AsString());
921 for (
CBioseq_CI bioseq_it(seh); bioseq_it; ++bioseq_it) {
923 if ( trimmed_bioseqs.
empty() || !trimmed_bioseqs.count(idh) ) {
924 if (delta_level == 0) {
925 ostr_seqids << idh <<
'\n';
934 if (count % 100000 == 0) {
948 if ( seq_entry.
IsSet() ) {
950 list<CRef<CSeq_entry> >& coll = bset.
SetSeq_set();
968 else if ( seq_entry.
IsSeq() ) {
992 timestamp_(
CTime(
CTime::eCurrent).GetTimeT()),
1006 if (!parent_->m_StripInstMol.empty()) {
1007 if (
false == parent_->x_StripSeqEntry(scope, *entry, trimmed_bioseqs)) {
1016 parent_->m_MainChunk.OpenForWrite(parent_->m_CachePath);
1017 size_t offset = parent_->m_MainChunk.GetOffset();
1018 parent_->m_MainChunk.Write(blob);
1019 size_t size = parent_->m_MainChunk.GetOffset() -
offset;
1020 Uint4 chunk_id = parent_->m_MainChunk.GetChunkSerialNum();
1023 parent_->x_ExtractAndIndex(*entry, timestamp_, chunk_id,
offset,
size);
1027 for (
CBioseq_CI bioseq_it(seh); bioseq_it; ++bioseq_it) {
1029 (*ostr_seqids_) << idh <<
'\n';
1033 if (count_ % 100000 == 0) {
1034 LOG_POST(
Error <<
" processed " << count_ <<
" entries...");
1053 string ifmt = args[
"ifmt"].AsString();
1055 if ((args[
"taxid"] || args[
"taxid-table"] || args[
"taxid-table-manifest"] ||
1056 args[
"molinfo"] || args[
"biosource"] || args[
"submit-block-template"])
1057 && ifmt !=
"fasta" && ifmt !=
"csra")
1060 "metadata parameters only allowed with fasta or SRA input");
1062 if ((args[
"uniprot-source-table"] || args[
"uniprot-source-table-manifest"])
1066 "uniprot source parameters only allowed with fasta input");
1068 if (args[
"resume"] && ifmt !=
"fasta" && ifmt !=
"csra")
1071 "Resume only supported with fasta or SRA input");
1074 if (args[
"taxid"] || args[
"taxid-table"] || args[
"taxid-table-manifest"]) {
1075 if (args[
"taxid"]) {
1079 if (args[
"taxid-table"]) {
1080 taxids_source.
InitStream(args[
"taxid-table"].AsInputFile());
1081 }
else if (args[
"taxid-table-manifest"]) {
1082 taxids_source.
InitManifest(args[
"taxid-table-manifest"].AsString());
1084 unsigned col = args[
"taxid-column"].AsInteger() - 1;
1085 for (; taxids_source; ++taxids_source) {
1088 if (line.empty() || line[0] ==
'#') {
1091 vector<string> tokens;
1101 TTaxId taxid = seq_taxid.second;
1102 if (
m_Orgs.count(taxid)) {
1109 "failed to find Org-ref for taxid " +
1114 m_Orgs[taxid].biosource->SetSource().SetOrg().Assign(*ref);
1116 m_Orgs[taxid].orgref->Assign(*ref);
1120 if (args[
"uniprot-source-table"] || args[
"uniprot-source-table-manifest"]) {
1121 unsigned col = args[
"uniprot-source-column"].AsInteger() - 1;
1123 if (args[
"uniprot-source-table"]) {
1124 uniprot_sources_source.
InitStream(args[
"uniprot-source-table"].AsInputFile());
1125 }
else if (args[
"uniprot-source-table-manifest"]) {
1126 uniprot_sources_source.
InitManifest(args[
"uniprot-source-table-manifest"].AsString());
1128 for (; uniprot_sources_source; ++uniprot_sources_source) {
1131 if (line.empty() || line[0] ==
'#') {
1134 vector<string> tokens;
1137 . AsString()] = tokens[col];
1147 m_InstMol = sm_InstMolTypes.find(args[
"inst-mol"].AsString().c_str())->second;
1149 if (args[
"biosource"]) {
1150 if (!
m_Orgs.begin()->second.biosource) {
1153 m_Orgs.begin()->second.biosource->SetSource().SetGenome(
1154 sm_GenomeTypes.find(args[
"biosource"].AsString().c_str())->second);
1155 m_Genome = sm_GenomeTypes.find(args[
"biosource"].AsString().c_str())->second;
1159 if (args[
"molinfo"]) {
1162 sm_BiomolTypes.find(args[
"molinfo"].AsString().c_str())->second);
1164 if (args[
"submit-block-template"]) {
1166 CNcbiIstream& istr_manifest = args[
"submit-block-template"].AsInputFile();
1167 unique_ptr<CObjectIStream> is
1169 while ( !is->EndOfData() ) {
1170 if ( !submit_block ) {
1172 *is >> *submit_block;
1178 switch (desc->
Which()) {
1218 bool resuming_from_clean_wrapup =
false;
1219 if (args[
"resume"]) {
1220 CFile output_file(args[
"oseq-ids"].AsString());
1221 if (!output_file.
Exists()) {
1223 "Can't resums; " + output_file.
GetPath() +
" not found");
1230 if (!line.empty() && line[0] !=
'#') {
1233 previous_execution_runs.
insert(line.substr(15));
1235 if ((resuming_from_clean_wrapup = (line ==
"#Clean wrapup"))) {
1250 ostr <<
"#interrupted-execution\n";
1252 ostr <<
"#Completed run " << run <<
'\n';
1256 id.
GetSeqId()->GetGeneral().GetTag().IsStr())
1258 string tag =
id.GetSeqId()->GetGeneral().GetTag().GetStr();
1266 ostr <<
"#Clean wrapup\n";
1268 ostr <<
"#" << args[
"seq-id-type"].AsString() <<
"-id" << endl;
1269 m_id_type = args[
"seq-id-type"].AsString() ==
"canonical"
1272 if (args[
"strip-annots-and-inst-mol"]) {
1273 list<string> mol_types;
1274 NStr::Split(args[
"strip-annots-and-inst-mol"].AsString(),
string(
","), mol_types, 0);
1275 vector<string> unknown_mols;
1276 for (list<string>::const_iterator mol = mol_types.cbegin(); mol != mol_types.cend(); ++mol) {
1279 if ( record != sm_InstMolTypes.end() ) {
1283 unknown_mols.push_back(
key);
1287 if (!unknown_mols.empty()) {
1289 oss <<
"Unknown molecule classes: [";
1290 for (vector<string>::const_iterator
i = unknown_mols.cbegin();
i != unknown_mols.end(); ++
i ) {
1293 oss <<
"]. Valid classes: [aa, dna, na, other, rna]";
1300 if (args[
"delta-level"]) {
1306 if (args[
"input-manifest"]) {
1307 CNcbiIstream& istr = args[
"input-manifest"].AsInputFile();
1311 if (line.empty() || line[0] ==
'#') {
1316 if (ifmt ==
"ids") {
1319 else if (ifmt ==
"fasta") {
1322 #ifdef HAVE_NCBI_VDB
1323 else if (ifmt ==
"csra") {
1324 x_Process_SRA(is, ostr);
1327 else if (ifmt ==
"asn-seq-entry") {
1330 else if (ifmt ==
"asnb-seq-entry") {
1335 "unhandled input format");
1341 if (ifmt ==
"ids") {
1344 else if (ifmt ==
"fasta") {
1347 #ifdef HAVE_NCBI_VDB
1348 else if (ifmt ==
"csra") {
1349 x_Process_SRA(istr, ostr);
1352 else if (ifmt ==
"asn-seq-entry") {
1355 else if (ifmt ==
"asnb-seq-entry") {
1360 "unhandled input format");
1382 ITERATE (CBioseq::TInst::TExt::TDelta::Tdata, iter,
1386 for ( ; id_iter; ++id_iter) {
1408 int main(
int argc,
const char* argv[])
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
size_t IndexABioseq(const objects::CBioseq &bioseq, CAsnIndex &index, CAsnIndex::TTimestamp timestamp, CAsnIndex::TChunkId chunk_id, CAsnIndex::TOffset offset, CAsnIndex::TSize size)
This is a simple BDB structure holding information about a given accession and its indexed location.
const CSeq_id * GetFirstId() const
void Pack(const CSeq_entry &entry)
void OpenForWrite(const std::string &root_path="")
void Write(const CCache_blob &cache_blob)
unsigned int GetChunkSerialNum() const
Base class for reading FASTA sequences.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
static void AddArguments(CArgDescriptions &arg_desc)
CConstRef< objects::COrg_ref > GetOrgRef(TTaxid taxid)
CObjectEnum(const CObjectEnum &)
CObjectEnum & operator=(const CObjectEnum &)
void SkipObject(CObjectIStream &istr, const CObjectTypeInfo &info)
CObjectEnum(Consumer consumer)
CNcbiOstream * ostr_seqids_
CRef< CObjectManager > om_
CPrimeCacheApplication * parent_
CCacheBioseq(CPrimeCacheApplication *p, CNcbiOstream *ostr)
void operator()(CBioseq &bseq)
void x_ExtractDelta(CBioseq_Handle bsh, set< CSeq_id_Handle > &delta_ids)
bool x_StripSeqEntry(CScope &scope, CSeq_entry &entry, set< CSeq_id_Handle > &trimmed_bioseqs)
void x_UpsertDescriptor(list< CRef< CSeqdesc > > &descs, CRef< CSeqdesc > new_desc)
void x_Process_Ids(const set< CSeq_id_Handle > &ids, CNcbiOstream &ostr_seqids, unsigned delta_level, size_t count)
void x_Read_Ids(CNcbiIstream &istr, set< CSeq_id_Handle > &ids)
map< string, TTaxId > m_SequenceTaxids
void x_CacheSeqEntry(CNcbiIstream &istr, CNcbiOstream &ostr_seqids, ESerialDataFormat serial_fmt, set< CSeq_id_Handle > &delta_ids, size_t &count)
virtual void Init(void)
Initialize the application.
void x_Process_SeqEntry(CNcbiIstream &istr, CNcbiOstream &ostr_seqids, ESerialDataFormat serial_fmt, set< CSeq_id_Handle > &delta_ids, size_t &count)
sequence::EGetIdType m_id_type
void x_ExtractAndIndex(const CSeq_entry &entry, CAsnIndex::TTimestamp timestamp, CAsnIndex::TChunkId chunk_id, CAsnIndex::TOffset offset, CAsnIndex::TSize size)
CSeq_inst::EMol m_InstMol
void x_SplitAndCacheSeqEntry(CNcbiIstream &istr, CNcbiOstream &ostr_seqids, ESerialDataFormat serial_fmt)
map< TTaxId, SOrgData > m_Orgs
CBioSource::EGenome m_Genome
set< CSeq_id_Handle > m_CachedIds
CSeqIdChunkFile m_SeqIdChunk
list< CRef< CSeqdesc > > m_other_descs
map< string, string > m_SequenceUniprotSources
CRef< CSeqdesc > m_MolInfo
set< string > m_PreviousExecutionRuns
set< CSeq_inst::EMol > m_StripInstMol
virtual int Run(void)
Run the application.
virtual void Exit(void)
Cleanup on application exit.
void x_Process_Fasta(CNcbiIstream &istr, CNcbiOstream &ostr_seqids)
set< CSeq_id_Handle > m_PreviousExecutionIds
void OpenForWrite(const std::string &root_path="")
void Write(const objects::CBioseq::TId &seq_ids)
const CSeq_descr & GetDescr(void) const
bool IsSetDescr(void) const
Skip hook for a standalone object.
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
TBase::const_iterator const_iterator
Template class for iteration on objects of class C (non-medifiable version)
iterator_bool insert(const value_type &val)
Operators to edit gaps in sequences.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
SStrictId_Tax::TId TTaxId
Taxon id type.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
#define TAX_ID_FROM(T, value)
CArgAllow_Strings * Allow(const string &value)
Add allowed string values.
@ eRequires
One argument requires another.
@ eExcludes
One argument excludes another.
@ eInputFile
Name of file (must exist and be readable)
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
@ eInteger
Convertible into an integer number (int or Int8)
@ fAppend
Open file in append mode.
void SetCacheSize(unsigned int cache_size)
Set Berkeley DB memory cache size for the file (default is 256K).
void Open(const string &filename, EOpenMode open_mode, bool support_dirty_read=false, unsigned rec_len=0)
Open file with specified access mode.
@ eReadWriteCreate
read-write, create if it doesn't exist
void PrintRequestStop(void)
Print request stop message (for request-driven applications)
CDiagContext & GetDiagContext(void)
Get diag context instance.
void PrintRequestStart(const string &message)
Print request start message (for request-driven applications)
static CRequestContext & GetRequestContext(void)
Shortcut to CDiagContextThreadData::GetThreadData().GetRequestContext()
void SetRequestStatus(int status)
const CStopWatch & GetRequestTimer(void) const
Request execution timer.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")
Set diagnostic stream.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
bool CreatePath(TCreateFlags flags=fCreate_Default) const
Create the directory path recursively possibly more than one at a time.
virtual bool Exists(void) const
Check if directory "dirname" exists.
const string & GetPath(void) const
Get entry path.
virtual bool Exists(void) const
Check existence of file.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
ESerialDataFormat
Data file format.
@ eSerial_AsnText
ASN.1 text.
@ eSerial_AsnBinary
ASN.1 binary.
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)
Read a single effective sequence, which may turn out to be a segmented set.
long TFlags
binary OR of EFlags
bool AtEOF(void) const
Indicates (negatively) whether there is any more input.
void SetMaxIDLength(Uint4 max_len)
If this is set, an exception will be thrown if a Sequence ID exceeds the given length.
@ fRequireID
Reject deflines that lack IDs.
@ fAddMods
Parse defline mods and add to SeqEntry.
@ fNoUserObjs
Don't save raw deflines in User-objects.
@ fForceType
Force specified type regardless of accession.
@ fAssumeNuc
Assume nucs unless accns indicate otherwise.
@ fAssumeProt
Assume prots unless accns indicate otherwise.
@ fDisableParseRange
No ranges in seq-ids. Ranges part of seq-id instead.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
void SetLocalSkipHook(CObjectIStream &stream, CSkipObjectHook *hook) const
Set local (for the specified stream) skip hook.
void ReadObject(const CObjectInfo &object)
Read child object.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
const TInst & GetInst(void) const
void Reset(void)
Reset reference object.
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
time_t GetTimeT(void) const
Get time in time_t format.
void Start(void)
Start the timer.
@ eCurrent
Use current time. See also CCurrentTime.
static bool IsSignaled(TSignalMask signals=eSignal_Any)
Check that any of specified signals is received.
static void TrapSignals(TSignalMask signals)
Sets interrupt signal handling.
@ eSignal_TERM
Termination.
void SetTimestamp(TTimestamp value)
Assign a value to Timestamp data member.
const TOrg & GetOrg(void) const
Get the Org member data.
EGenome
biological context
@ e_General
for other databases
const TSeq & GetSeq(void) const
Get the variant data.
TSet & SetSet(void)
Select the variant.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
void ResetAnnot(void)
Reset Annot data member.
bool IsSet(void) const
Check if variant Set is selected.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
void ResetDescr(void)
Reset Descr data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
list< CRef< CSeqdesc > > Tdata
TId & SetId(void)
Assign a value to Id data member.
void ResetDescr(void)
Reset Descr data member.
const TInst & GetInst(void) const
Get the Inst member data.
const TSource & GetSource(void) const
Get the variant data.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
void ResetAnnot(void)
Reset Annot data member.
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
const Tdata & Get(void) const
Get the member data.
list< CRef< CSeq_id > > TId
TMol GetMol(void) const
Get the Mol member data.
TComment & SetComment(void)
Select the variant.
bool IsDelta(void) const
Check if variant Delta is selected.
void SetInst(TInst &value)
Assign a value to Inst data member.
const TExt & GetExt(void) const
Get the Ext member data.
TBiomol GetBiomol(void) const
Get the Biomol member data.
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
EMol
molecule class in living organism
void SetDescr(TDescr &value)
Assign a value to Descr data member.
const TDelta & GetDelta(void) const
Get the variant data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
const Tdata & Get(void) const
Get the member data.
E_Choice Which(void) const
Which variant is currently selected.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
TMolinfo & SetMolinfo(void)
Select the variant.
bool CanGetInst(void) const
Check if it is safe to call GetInst method.
@ eBiomol_pre_RNA
precursor RNA of any sort really
@ eBiomol_cRNA
viral RNA genome copy intermediate
@ eBiomol_snoRNA
small nucleolar RNA
@ eBiomol_genomic_mRNA
reported a mix of genomic and cdna sequence
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
@ eBiomol_other_genetic
other genetic material
@ e_Embl
EMBL specific information.
@ e_Update_date
date of last update
@ e_Pir
PIR specific info.
@ e_Genbank
GenBank specific info.
@ e_Prf
PRF specific information.
@ e_Sp
SWISSPROT specific info.
@ e_Molinfo
info on the molecule and techniques
@ e_Create_date
date entry first created/released
@ e_Title
a title for this sequence
@ e_Pdb
PDB specific information.
@ e_Name
a name for this sequence
@ e_Source
source of materials, includes Org-ref
@ eMol_na
just a nucleic acid
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
Setup interrupt signal handling.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
SStaticPair< const char *, const CSeq_inst::EMol > TInstMolTypeKey
CStaticPairArrayMap< const char *, const CBioSource::EGenome, PCase > TGenomeTypeMap
CStaticPairArrayMap< const char *, const CSeq_inst::EMol, PCase > TInstMolTypeMap
SStaticPair< const char *, const CBioSource::EGenome > TGenomeTypeKey
DEFINE_STATIC_ARRAY_MAP(TBiomolTypeMap, sm_BiomolTypes, db_biomol_type_name_to_enum)
static const TBiomolTypeKey db_biomol_type_name_to_enum[]
int main(int argc, const char *argv[])
CStaticPairArrayMap< const char *, const CMolInfo::EBiomol, PCase > TBiomolTypeMap
static const TInstMolTypeKey db_inst_mol_type_name_to_enum[]
SStaticPair< const char *, const CMolInfo::EBiomol > TBiomolTypeKey
static const TGenomeTypeKey db_genome_type_name_to_enum[]
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
Defines CRequestContext class for NCBI C++ diagnostic API.
CRef< objects::CObjectManager > om
CRef< CSeqdesc > biosource
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...