102 using namespace ncbi;
113 template<
class TLoader>
115 : m_loader{
info.GetLoader() } {}
127 CSeq_submit::GetTypeInfo(), CSeq_entry::GetTypeInfo(), CSeq_annot::GetTypeInfo(),
128 CSeq_feat::GetTypeInfo(), CBioSource::GetTypeInfo(), CPubdesc::GetTypeInfo(),
129 CBioseq_set::GetTypeInfo(), CBioseq::GetTypeInfo(), CSeqdesc::GetTypeInfo(),
136 entry->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_dna);
137 entry->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
138 entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"AATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
139 entry->SetSeq().SetInst().SetLength(60);
142 id->SetLocal().SetStr(
"good");
143 entry->SetSeq().SetId().push_back(
id);
146 mdesc->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_genomic);
147 entry->SetSeq().SetDescr().Set().push_back(mdesc);
155 mAppConfig(appConfig)
233 in.ReadClassMember(member);
254 unique_ptr<CNcbiIstream> hold_stream;
257 if (!fname.empty()) {
259 hold_stream = make_unique<CNcbiIfstream>(fname, ios::binary);
260 InputStream = hold_stream.get();
279 hold_stream.release();
280 hold_stream.reset(decompress);
281 InputStream = hold_stream.get();
284 format =
fg.GuessFormatAndContent(contentInfo);
287 unique_ptr<CObjectIStream> objectStream;
293 hold_stream.release();
307 msgHandler.
Write(ignoreInferences);
314 os <<
"Unable to read invalid ASN.1";
320 os <<
": " <<
mpIstr->GetPosition();
323 os <<
": unexpected end of file";
360 LOG_POST_XX(Corelib_App, 1,
"FAILURE: Record is not a batch Seq-submit, do not use -a u to process.");
383 LOG_POST_XX(Corelib_App, 1,
"FAILURE: Record is not a batch Bioseq-set, do not use -a t to process.");
458 se->SetSet(*bioseqset);
495 for (
auto& se: ss->
SetData().SetEntrys() ) {
568 if (asninfo==
nullptr) {
570 if (content.size() == 1) {
571 asninfo = *content.begin();
574 if (asninfo==
nullptr) {
581 auto obj_info =
mpIstr->Read(asninfo);
582 serial.
Reset(
static_cast<CSerialObject*
>(obj_info.GetObjectPtr()));
592 string asn_type = asninfo->
GetName();
593 bool unhandledType{
false};
597 if (asn_type ==
"Seq-submit") {
599 }
else if (asn_type ==
"Seq-entry") {
601 }
else if (asn_type ==
"Seq-annot") {
603 }
else if (asn_type ==
"Seq-feat") {
605 }
else if (asn_type ==
"BioSource") {
607 }
else if (asn_type ==
"Pubdesc") {
609 }
else if (asn_type ==
"Bioseq-set") {
611 }
else if (asn_type ==
"Bioseq") {
613 }
else if (asn_type ==
"Seqdesc") {
616 unhandledType =
true;
622 string errstr = e.
GetMsg();
637 const string& loader_name,
644 if (!loader_name.empty())
651 if (scope->
Exists(seq_id_h)) {
670 pSubmit->SetSub().Assign(*pSubmitBlock);
671 pSubmit->SetData().SetEntrys().push_back(pEntry);
698 auto& reader = process.GetReader();
700 auto info = edit::CHugeAsnDataLoader::RegisterInObjectManager(
703 CAutoRevoker autorevoker(
info);
708 if (
const auto& topIds = reader.GetTopIds(); !topIds.empty()) {
745 if (!process.ReadNextBlob())
749 catch (
const edit::CHugeFileException& e) {
750 if (e.GetErrCode() == edit::CHugeFileException::eDuplicateSeqIds)
773 const string& loader_name,
782 _this->
ValidateAsync(loader_name, pSubmitBlock, seqid, msgHandler);
787 string errstr = e.
GetMsg();
801 auto& reader = process.GetReader();
802 auto writer_task = std::async([
this, &ignoreInferences, &msgHandler] {
if(msgHandler.
InvokeWrite()){ msgHandler.Write(ignoreInferences); } });
806 auto topids_task = std::async(std::launch::async, [
this, &val_queue, &loader_name, &reader, &msgHandler]()
808 auto pSubmitBlock = reader.GetSubmitBlock();
809 for (
auto seqid : reader.GetTopIds())
811 auto fut = std::async(std::launch::async, ValidateWorker,
812 this, loader_name, pSubmitBlock, seqid, std::ref(msgHandler));
814 val_queue.push_back(std::move(fut));
817 val_queue.push_back({});
823 auto result = val_queue.pop_front();
830 auto exit_data =
result.get();
844 const string& loader_name,
845 edit::CHugeFileProcess& process,
848 auto& reader = process.GetReader();
850 for (
auto seqid : reader.GetTopIds())
852 auto pSubmitBlock = reader.GetSubmitBlock();
883 if (asninfo == CBioseq_set::GetTypeInfo()) {
890 if (asninfo == CSeq_submit::GetTypeInfo()) {
891 const auto commandLineOptions =
m_Options;
906 LOG_POST_XX(Corelib_App, 1,
"FAILURE: Record is neither a Seq-submit nor Bioseq-set; do not use -batch to process.");
914 unique_ptr<IMessageHandler> pMsgHandler;
922 result.mReported += pMsgHandler->GetNumReported();
934 unique_ptr<edit::CHugeFileProcess> mpHugeFileProcess;
936 if (filename.empty())
939 auto huge_reader =
Ref(
new edit::CHugeAsnReader());
945 mpHugeFileProcess.reset(
new edit::CHugeFileProcess(huge_reader.GetPointer()));
948 asninfo = mpHugeFileProcess->GetFile().m_content;
951 mpHugeFileProcess.reset();
957 mpIstr = mpHugeFileProcess->GetReader().MakeObjStream(0);
974 LOG_POST_XX(Corelib_App, 1,
"FAILURE: Unable to process invalid ASN.1 file " + filename);
995 string errstr = e.
GetMsg();
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eErr_GENERIC_InvalidAsn
@ eErr_GENERIC_DuplicateIDs
@ eErr_INTERNAL_Exception
void ProcessPubdesc(CRef< CSerialObject > serial, IMessageHandler &msgHandler)
void ValidateBlobAsync(const string &loader_name, edit::CHugeFileProcess &process, IMessageHandler &msgHandler)
bool ValidateTraditionally(TTypeInfo asninfo, IMessageHandler &msgHandler)
void ValidateBlobSequential(const string &loader_name, edit::CHugeFileProcess &process, IMessageHandler &msgHandler)
void ReportReadFailure(const CException *p_exception, IMessageHandler &msgHandler)
void ProcessSeqAnnot(CRef< CSerialObject > serial, IMessageHandler &msgHandler)
shared_ptr< SValidatorContext > m_pContext
static CThreadExitData ValidateWorker(CAsnvalThreadState *_this, const string &loader_name, CConstRef< CSubmit_block > pSubmitBlock, CConstRef< CSeq_id > seqid, IMessageHandler &msgHandler)
void ProcessBioSource(CRef< CSerialObject > serial, IMessageHandler &msgHandler)
void ReadClassMember(CObjectIStream &in, const CObjectInfo::CMemberIterator &member, IMessageHandler &msgHandler)
std::list< CConstRef< CValidError > > m_eval
void ProcessSeqEntry(CSeq_entry &se, IMessageHandler &msgHandler)
CHugeFileValidator::TGlobalInfo m_GlobalInfo
void ProcessBioseq(CRef< CSerialObject > serial, IMessageHandler &msgHandler)
CRef< CScope > BuildScope() const
bool ValidateBatchMode(TTypeInfo asninfo, IMessageHandler &msgHandler)
CAsnvalThreadState(const CAppConfig &, SValidatorContext::taxupdate_func_t taxon)
void ProcessBioseqset(CRef< CSerialObject > serial, IMessageHandler &msgHandler)
void ProcessSeqSubmit(CRef< CSerialObject > serial, IMessageHandler &msgHandler)
void ValidateOneHugeFile(edit::CHugeFileProcess &process, IMessageHandler &msgHandler)
CThreadExitData ValidateOneFile(const string &infilename, CNcbiOstream &ostr)
void ProcessSSMReleaseFile(IMessageHandler &msgHandler)
const CAppConfig & mAppConfig
void ProcessSeqFeat(CRef< CSerialObject > serial, IMessageHandler &msgHandler)
unique_ptr< CObjectIStream > OpenFile(TTypeInfo &asn_info, const string &filename) const
void ProcessBSSReleaseFile(IMessageHandler &msgHandler)
std::atomic< size_t > m_Reported
void ValidateInput(TTypeInfo asninfo, IMessageHandler &msgHandler)
void ValidateAsync(const string &loader_name, CConstRef< CSubmit_block > pSubmitBlock, CConstRef< CSeq_id > seqid, IMessageHandler &msgHandler) const
void ProcessSeqDesc(CRef< CSerialObject > serial, IMessageHandler &msgHandler)
void ValidateOneHugeBlob(edit::CHugeFileProcess &process, IMessageHandler &msgHandler)
CRef< CObjectManager > m_ObjMgr
unique_ptr< CObjectIStream > mpIstr
TChanges BasicCleanup(CSeq_entry &se, Uint4 options=0)
void SetScope(CScope *scope)
void ReportGlobalErrors(const TGlobalInfo &globalInfo, IValidError &errors) const
void UpdateValidatorContext(const TGlobalInfo &globalInfo, SValidatorContext &context) const
static void RegisterReaderHooks(CObjectIStream &objStream, SGlobalInfo &m_GlobalInfo)
void ReportPostErrors(const SValidatorContext &context, IValidError &errors) const
Reading (iterating through) elements of containers (SET OF, SEQUENCE OF).
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Root class for all serialization exceptions.
CTypeInfo class contains all information about C++ types (both basic and classes): members and layout...
static void SetSuppressedCodes(const CUser_object &user, TCodes &errCodes)
CRef< CValidError > Validate(const CSeq_entry &se, CScope *scope=nullptr, Uint4 options=0)
virtual void Write(bool ignoreInferences=true)=0
virtual bool InvokeWrite() const =0
void AddValidErrItem(EDiagSev sev, unsigned int ec, const string &msg, const string &desc, const CSerialObject &obj, const string &acc, const int ver, const string &location=kEmptyStr, const int seq_offset=0) override
virtual void RequestStop()=0
Include a standard set of the NCBI C++ Toolkit most basic headers.
static void cleanup(void)
static const struct type types[]
@ eTakeOwnership
An object can take ownership of another.
@ eNoOwnership
No ownership is assumed.
EMethod
Compression/decompression methods.
@ eNone
no compression method (copy "as is")
@ eGZipFile
.gz file (including concatenated files)
@ fDefault
Use algorithm-specific defaults.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST_XX(error_name, err_subcode, message)
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
@ eDiag_Critical
Critical error message.
void Error(CExceptionArgs_Base &args)
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)
Get an absolute path from some, possibly relative, path.
@ eSerial_AsnText
ASN.1 text.
@ eSerial_AsnBinary
ASN.1 binary.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
@ eEOF
Unexpected end-of-file.
void ResetHistory(EActionIfLocked action=eKeepIfLocked)
Clean all unused TSEs from the scope's cache and release the memory.
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
CSeq_annot_Handle AddSeq_annot(CSeq_annot &annot, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add Seq-annot, return its CSeq_annot_Handle.
void RemoveTopLevelSeqEntry(const CTSE_Handle &entry)
Revoke TSE previously added using AddTopLevelSeqEntry() or AddBioseq().
bool RevokeDataLoader(CDataLoader &loader)
Revoke previously registered data loader.
bool Exists(const CSeq_id &id)
Check existence of sequence with this id.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
const TId & GetId(void) const
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
@ eNocase
Case insensitive compare.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
@ eStart
Start timer immediately after creating.
const string & GetName(void) const
Get name of this type.
ENcbiOwnership
Ownership relations between objects.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
bool IsEntrys(void) const
Check if variant Entrys is selected.
Definition of all error codes used in corelib (xncbi.lib).
Magic spell ;-) needed for some weird compilers... very empiric.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
std::istream & in(std::istream &in_, double &x_)
void SetLocalReadHook(const CObjectTypeInfo &obj_type_info, CObjectIStream &ostr, _Func _func)
C++ I/O stream wrappers to compress/decompress data on-the-fly.
int CumulativeInferenceCount
function< CRef< CTaxon3_reply >(const vector< CRef< COrg_ref > > &list)> taxupdate_func_t
const set< TTypeInfo > s_known_types
static void s_StartWrite(IMessageHandler &msgHandler, bool ignoreInferences=false)
static CRef< objects::CSeq_entry > s_BuildGoodSeq()
CFileContentInfoGenbank mInfoGenbank
const int InferenceAccessionCutoff