54 #define _TRACE(arg) ((void)0)
69 _TRACE(
"Resolving identifier: " << identifier);
72 _TRACE(
"Trying to get it as node value (IMacroVar) from m_MacroRep: " <<
m_MacroRep);
78 _TRACE(
"Trying to get it as an RT Var");
82 _TRACE(
"Successfully resolved as node value");
86 _TRACE(
"Trying to get it via ResolveIdentToSimple() from m_DataIter: " <<
m_DataIter);
92 _TRACE(
"Successfully resolved via ResolveIdentToSimple() from m_DataIter");
96 _TRACE(
"All attempts to resolve failed");
100 _TRACE(
"Identifier resolved successfully: " << res);
108 _TRACE(
"Looking for: " << identifier);
113 string sRTVarRoot, sRTVarFields;
123 _TRACE(
"set reference from node: " << rt_var <<
" to node: " << &v);
133 _TRACE(
"resolved subfields of run-time vars");
137 _TRACE(
"set reference from node: " << rt_var <<
" to node: " << &
val);
143 _TRACE(
"set reference from node: " << rt_var <<
" to node: " << &
val);
151 _TRACE(
"got value using iterator in form of objects (from tmpObj)");
155 _TRACE(
"got simple value using iterator");
161 _TRACE(
"RT Var not found");
169 _TRACE(
"Looking for: " << name);
190 _TRACE(
"Looking for: " << name);
215 _TRACE(
"Looking for (whether it exists) :" << name);
257 "Function '" + name +
"' not implemented", &qnode);
276 string message(
"Function ");
278 message +=
" failed";
293 catch (
const std::exception& e) {
334 string name = iter.first;
339 do_funcs.push_back(name);
342 where_funcs.push_back(name);
345 do_funcs.push_back(name);
346 where_funcs.push_back(name);
689 while (!line_reader->
AtEOF()) {
702 vector<CRef<CMacroRep>> parsed_vec;
707 while (parser.
Parse(
false)) {
709 parsed_vec.push_back(mr);
721 lib.reserve(lib.size() + parsed_vec.size());
722 lib.insert(lib.end(), parsed_vec.begin(), parsed_vec.end());
735 while (!line_reader->
AtEOF()) {
748 macro_list.resize(0);
752 while (parser.
Parse(
false)) {
754 macro_list.push_back(mr);
761 macro_list.resize(0);
775 if (macro_text.empty()) {
788 string msg = (ex.GetMsg().empty()) ? ex.what() : ex.GetMsg();
811 exec_info.
m_ErrorMessage =
"Missing FOR EACH statement or incorrect selector specified or incorrect NA provided";
814 if (throw_on_error) {
817 msg =
"Missing FOR EACH statement, or unsupported selector";
820 msg =
"Incorrect NA provided or missing FOR EACH statement or unsupported selector specified";
828 CMacroResolver resolver(macro_rep, data_iter, CmdComposite, context, stat);
829 bool evaluateDo =
true;
835 while (!data_iter->IsEnd()) {
845 "Wrong type of computed WHERE clause",
tree->GetQueryTree());
858 data_iter->BuildEditedObject();
863 if (data_iter->IntendToDelete()) {
864 data_iter->RunDeleteCommand(CmdComposite);
867 if (data_iter->IsModified()) {
868 data_iter->RunEditCommand(CmdComposite);
876 int nr_nonmatched = 0;
877 const string msg = data_iter->GetUnMatchedTableEntries(nr_nonmatched);
891 oss <<
"[Error] Line " << exec_info.
m_Line <<
", Pos " << exec_info.
m_Column;
919 exec_info.
m_ErrorMessage =
"Missing FOR EACH statement or incorrect selector specified or incorrect NA provided";
922 if (throw_on_error) {
925 msg =
"Missing FOR EACH statement, or unsupported selector";
928 msg =
"Incorrect NA provided or missing FOR EACH statement or unsupported selector specified";
937 CMacroResolver resolver(macro_rep, data_iter, CmdComposite, func_context, *stat);
938 bool evaluateDo =
true;
948 while (!data_iter->IsEnd()) {
960 "Wrong type of computed WHERE clause",
tree->GetQueryTree());
973 data_iter->BuildEditedObject();
978 if (data_iter->IntendToDelete()) {
979 data_iter->RunDeleteCommand(CmdComposite);
982 if (data_iter->IsModified()) {
983 data_iter->RunEditCommand(CmdComposite);
1002 oss <<
"[Error] Line " << exec_info.
m_Line <<
", Pos " << exec_info.
m_Column;
1048 if (thread_count == 1) {
1054 bool is_data_bioseq = data.
GetTSE().IsSeq();
1055 if (!is_data_bioseq) {
1057 exec_info.
m_ErrorMessage =
"For parallel execution, top seq-entry is expected to be a bioseq";
1063 if (!is_feat_selector) {
1065 exec_info.
m_ErrorMessage =
"For parallel execution, FOR EACH selector is expected to be a feature selector";
1077 vector<future<stringstream>> futures(thread_count);
1078 vector<thread> threads(thread_count);
1083 vector<CRef<CMacroStat>>
stats;
1084 stats.reserve(thread_count);
1085 for (
unsigned i = 0;
i < thread_count; ++
i) {
1089 vector<CRef<CMacroRep>> reps_for_threads;
1090 reps_for_threads.reserve(thread_count);
1091 for (
unsigned i = 0;
i < thread_count; ++
i) {
1103 for (
unsigned i = 0;
i < thread_count; ++
i) {
1107 if (start == total_range.
GetFrom()) {
1111 LOG_POST(
Info <<
"Thread " <<
i <<
": [" << start <<
", " << stop <<
"]");
1116 LOG_POST(
Info <<
"Thread " <<
i <<
": (" << start <<
", " << stop <<
"]");
1121 futures[
i] = task.get_future();
1123 threads[
i] = thread(std::move(task), cref(reps_for_threads[
i].GetObject()), cref(data), feat_int,
1124 ref(CmdComposite), ref(
stats[
i]), throw_on_error);
1131 stringstream result_stream;
1133 for (
auto&& it : futures) {
1134 result_stream << it.get().str();
1139 *ostream << result_stream.str();
1141 for (
auto&& stat_it :
stats) {
1142 auto report = stat_it->GetMacroReport();
1143 if (!report.GetLog().empty()) {
1144 LOG_POST(
Info << report.GetName() <<
":\n" << report.GetLog());
1166 unsigned thread_count = 0;
1178 LOG_POST(
Info <<
"Starting to get number of SNP Features");
1181 if (feat_nrs < 100'000) {
1184 else if (feat_nrs <= 5'000'000) {
1190 LOG_POST(
Info <<
"Number of SNP Features: " << feat_nrs);
1191 LOG_POST(
Info <<
"Number of threads to be used is " << thread_count);
1195 LOG_POST(
Info <<
"Number of threads to be used is " << thread_count);
1202 return thread_count;
Subclass of the IQueryParseUserObject which is held as the user-defined object in each CQueryParseNod...
The following asn-selectors are defined to be used in the FOR EACH statement:
class CMacroExecException
class CMacroExecException
Subclass of CQueryExec that adds: 1) Macro identifiers resolution 2) Where clause evaluation.
Class provides macro language interface for bulk bio-editing.
Class for parsed macro representation.
CMacroResolver Variable and function resolver Provides interfaces for:
CMacroStat - collecting statistics about a single macro.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Query tree and associated utility methods.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
definition of a Culling tree
Base class for any user function that performs editing operations on ASN.1 data.
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
static const int chunk_size
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define DIAG_COMPILE_INFO
Make compile time diagnostic information object to use in CNcbiDiag and CException.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
void Info(CExceptionArgs_Base &args)
static const char * sm_BsrcForSeq
static CTempString GetFuncName()
static const char * sm_Stop
vector< CRef< CMacroRep > > TMacroLibrary
void GetFunctionNames(CMacroParser::TFunctionNamesList &where_funcs, CMacroParser::TFunctionNamesList &do_funcs) const
static const char * sm_Automatic
static CTempString GetFuncName()
static const char * sm_Contain
static CTempString GetFuncName()
static CTempString GetFuncName()
CRef< CMacroCmdComposite > m_CmdComposite
SMacroInfo m_ParsingInfo
Status, error message related to parsing a macro.
virtual void ResetTmpRTVarObjects()
static const char * sm_Lower
static CTempString GetFuncName()
IMacroBioDataIter * CreateIterator(const string &selector, const string &named_annot, const TSeqRange &range, CNcbiOstream *ostr=nullptr) const
Create specific iterator as a subclass of IMacroBioDataIter.
bool Parse(bool bSingleMacroMode=true, CQueryExec *exec=NULL)
Parse the macro and build its representation Function throws CMacroException if parsing is unsuccessf...
static const char * sm_BsrcForSeqdesc
static const char * sm_PubIssue
static const char * sm_SeqNa
class CMacroFunction_LocationSeqType IS_SEQ_AA(["location"]) - returns true if the sequence identifie...
static CTempString GetFuncName()
EScopeEnum GetFuncScope()
bool GetNodeValue(const string &name, IQueryMacroUserObject &v) const
Function fills in IQueryMacroUserObject-derived node out of stored variable.
void AddToReport(const string &report)
function gradually builds the log report
void x_SetFunctions(CMacroParser &parser)
Initialize do/where lists of functions in parser.
void x_InitSetOfBuiltInFunctions()
void GatherUnMatchedTableEntries(const string &report, const int count)
Used when applying a table to the entry: it stores the values and number of table entries that were n...
static CTempString GetFuncName()
static stringstream s_ExecuteInParallel(const CMacroRep ¯o_rep, const CMacroBioData &data, const SFeatInterval &feat_interval, CRef< CMacroCmdComposite > CmdComposite, CRef< CMacroStat > &stat, bool throw_on_error)
static const char * sm_SeqForFeat
static CTempString GetFuncName()
void ThrowCMacroExecException(const CDiagCompileInfo &info, CMacroExecException::EErrCode code, const string &message, const CQueryParseTree::TNode *treeNode, const CException *previous=nullptr)
Throws CMacroExecException with the specified message and error location from the TNode.
void SetSource(const char *sMacroText)
Reset the parser and sets macro text to be parsed.
virtual bool ResolveIdentifier(const string &identifier, CMQueryNodeValue &val, const CQueryParseTree::TNode *parent)
Resolve identifier and store the value in @val.
virtual ~IMacroFunctionContext()
static CTempString GetFuncName()
void SetError(const string &message, Uint4 line, Uint4 column)
static const char * sm_FunctionName
class CMacroFunction_SetPubField SetPub_Date(year, month, day, season, hour, minute,...
bool m_Status
status of the activity (parsing or execution)
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
const string & GetForEachString() const
Return "for each" string.
static CTempString GetFuncName()
static const char * sm_FunctionName
class CMacroFunction_ApplyPublication SetPub_Sub(author_field_name, author_field_value) Apply new pub...
static CTempString GetFuncName()
static CTempString GetFuncName()
void EvaluateTree(CQueryParseTree &Qtree, IResolver &resolver, bool query_tree, bool case_sensitive=false)
Evaluates tree.
static const char * sm_InList
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_PubJournal
static const char * sm_Equal
static const char * sm_Last
const string & GetErrorReport() const
static const char * sm_SeqForDescr
class CMacroFunction_Sequence_For_Seqdesc SEQUENCE_FOR_SEQDESC(field_name) SEQUENCE_FOR_SEQFEAT(field...
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
CMacroRep * DetachMacroRep()
Detach macro representation for futher processing Caller is responsible for deletion of returned obje...
static const char * sm_PartialStart
class CMacroFunction_LocPartialTest - tests whether the location is 5'/3' partial ISPARTIALSTART() - ...
list< SResolvedField > TObs
static CTempString GetFuncName()
static const char * sm_SeqAa
Uint4 m_Column
location of problem within the macro script
static CTempString GetFuncName()
static const char * sm_PubSerialNumber
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_MolinfoForFeat
bool GetSimpleTypeValue(CObjectInfo &oi, const string &field_name, CMQueryNodeValue &value)
Get single node data from the node specified by parameter of type CObjectInfo and additionally field ...
static const char * sm_UpperAll
static const char * sm_FuncVolume
class CMacroFunction_SetPubVolIssuePages SetPubVolume(newValue, existing_text, delimiter,...
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
const TObs & GetObjects() const
unsigned x_GetThreadCount(const CMacroRep ¯o_rep, const CMacroBioData &data)
static const char * sm_PubAuthors
void LogStart(const string ¯o_name)
log start of macro execution
CIRef< IMacroBioDataIter > m_DataIter
bool ResolveIdentToObjects(const CObjectInfo &oi, const string &identifier, CMQueryNodeValue &v)
Resolve name to the list of objects.
static CTempString GetFuncName()
static CTempString GetFuncName()
string m_ErrorMessage
error message
static CTempString GetFuncName()
static const char * sm_FromStart
class CMacroFunction_LocationDistConstraint DISTFROMSTART() [>][=][<] distance DISTFROMSTOP()
static const char * sm_PartialStop
static CTempString GetFuncName()
CConstIRef< IMacroFunctionContext > m_FuncContext
unsigned GetColumnNo(void) const
Returns the column number where error occurred.
static CTempString GetFuncName()
static CTempString GetFuncName()
bool ReadAndParseMacros(const string &filename, vector< CRef< CMacroRep >> ¯o_list)
Parse a file containing macros and store them in a vector.
bool Exec(const CMacroRep ¯o_rep, const CMacroBioData &data, CRef< CMacroCmdComposite > CmdComposite, bool throw_on_error=false, CNcbiOstream *ostream=nullptr)
Execute a macro.
static const char * sm_PunctAll
static const char * sm_Punct
static const char * sm_Upper
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
void LogStop(bool status, const string &err_message)
log end of macro execution this includes time of execution, status, count of changed qualifiers and a...
CQueryParseTree * GetAssignmentWhereClause(int index) const
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_SNP
EType GetDataType() const
static CTempString GetFuncName()
static const char * sm_First
CMacroFunction_FirstItem FIRSTOF(objects) - returns the first item from the list of objects LASTOF(ob...
CQueryParseTree::TFunctionNames TFunctionNamesList
Type for the list of functions in Where/Do clauses.
static const char * sm_FunctionName
void SetFunctionNames(const TFunctionNamesList &wh_funcs, const TFunctionNamesList &do_funcs)
initializes lists of known functions that should be recognized by the parser when going through WHERE...
objects::CSeq_entry_Handle GetTSE() const
static CTempString GetFuncName()
static CTempString GetFuncName()
const TSeqRange & GetSeqRange() const
Return sequence range.
static const char * sm_BsrcForFeat
const string & GetNamedAnnot() const
Return "from" named annotation.
static CTempString GetFuncName()
static CTempString GetFuncName()
static bool s_IsFeatSelector(const string &selector)
virtual bool GetTmpRTVarObject(const string &name, CObjectInfo &oi)
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_PubVolume
virtual bool ExistRTVar(const string &name)
Return true if the run-time variable, identified by its name, is defined.
static CTempString GetFuncName()
static CTempString GetFuncName()
virtual CQueryParseTree * GetAssignmentWhereClause(int index) const
Return the "where" sub-query from the DO-DONE section identified by the index in the main parsed tree...
const string & GetName() const
Return macro name.
static CTempString GetFuncName()
virtual CRef< CMQueryNodeValue > GetOrCreateRTVar(const string &name)
Get or create run-time variable.
static CTempString GetFuncName()
CMacroStat m_MacroStat
Most recent executed macro statistics.
bool IsNotSetType() const
Check/get functions result from the top node after calculation.
static CTempString GetFuncName()
CRef< CMQueryNodeValue > x_LocateRTVar(const string &identifier)
Return data associated with the RT variable based on its name.
static CTempString GetFuncName()
static CTempString GetFuncName()
CConstRef< CMacroRep > m_MacroRep
bool ResolveIdentToSimple(const CObjectInfo &oi, const string &identifier, CMQueryNodeValue &v)
Resolve name to simple types value.
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_FuncPages
static CTempString GetFuncName()
static const char * sm_PubStatus
static CTempString GetFuncName()
void SetNestedState(ENestedFunc type)
const string & GetFuncReport() const
Function extracts statistic from the object.
static CTempString GetFuncName()
static CMacroEngineParallel::SMacroInfo s_ExecuteSingleThread(const CMacroRep ¯o_rep, const CMacroBioData &data, CRef< CMacroCmdComposite > CmdComposite, CConstIRef< IMacroFunctionContext > context, CMacroStat &stat, bool throw_on_error, CNcbiOstream *ostream)
static CTempString GetFuncName()
static CTempString GetFuncName()
bool x_AppendToLibrary(const string &filename, CMacroLib::TMacroLibrary &lib)
static CTempString GetFuncName()
static const char * sm_PubTitle
class CMacroFunction_PubFields PUB_TITLE(), PUB_ISSUE(), PUB_AFFIL(subfield) - returns a list of CObj...
static CTempString GetFuncName()
static CTempString GetFuncName()
CIRef< IMacroFunctionContext > m_EngineFuncContext
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_End
TBuiltInFunctionsMap m_BuiltInFunctions
unsigned GetLineNo(void) const
Returns the line number where error occurred.
static CTempString GetFuncName()
static const char * sm_LowerAll
virtual void CallFunction(const string &name, CQueryParseTree::TNode &qnode)
Function call the function specified by name, passing args as parameters and it can update nv as a re...
static const char * sm_PubPMID
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_PubDate
static const char * sm_PubAffil
CMacroRep * Parse(const string ¯o_text)
Parse the macro script into its binary representation.
static CTempString GetFuncName()
CQueryParseTree * GetDoTree() const
Return "do" clause.
static CTempString GetFuncName()
static const char * sm_BsrcForMolinfo
class CMacroFunction_GetSeqdesc BIOSOURCE_FOR_MOLINFO(field_name) or BIOSOURCE_FOR_MOLINFO(container,...
static const char * sm_PubCit
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_FromStop
static CTempString GetFuncName()
void Reset()
reset the macro name, qualifier and iteration counts
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_Start
static const char * sm_FunctionName
EditRelatedFeatureQual(feat_type, field_name, find_text, repl_text, location, case_sensitive,...
TTempRTVarsMap m_TempRTVars
bool GetBoolValue() const
static CTempString GetFuncName()
static const char * sm_PubPages
static CTempString GetFuncName()
IEditMacroFunction * ResolveFunctionName(const string &name) const
Return pointer to macro function identified by its name.
bool AppendToLibrary(const string &filename, CMacroLib::TMacroLibrary &lib)
Parse a file containing macros and append the results to the map that stores these macros.
static CTempString GetFuncName()
static CTempString GetFuncName()
CQueryParseTree * GetWhereClause() const
Return "where" clause.
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_FuncIssue
static CTempString GetFuncName()
void AddToErrorReport(const string &error)
store encountered errors
static CTempString GetFuncName()
static const char * sm_PubClass
const string & GetSource() const
bool x_ResolveRTVar(const string &identifier, CMQueryNodeValue &val, const CQueryParseTree::TNode *parent)
Return the value of Run-Time (RT) variable.
virtual void AddTmpRTVarObject(const string &name, CObjectInfo &oi)
used together with the Assignment Operator (CMQueryFunctionAssignment)
static const char * sm_Start
class CMacroFunction_LocEnd Start() and Stop() - return the positional extremes of a location
static CTempString GetFuncName()
void x_InitSetOfBuiltInFunctions()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
const string & GetThreadCount() const
Return number of threads.
static const char * sm_MolinfoForBsrc
@ eFunctionNotImplemented
static size_t EstimateSNPCount(const TSeqRange &range, const string &sAnnotName, CBioseq_Handle &Handle)
CTempString GetCurrentLine(void) const
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
TSeqPos GetBioseqLength(void) const
TObjectType * GetNCPointerOrNull(void) const THROWS_NONE
Get pointer value.
TObjectType * GetPointerOrNull(void) THROWS_NONE
Get pointer value.
TObjectType & GetNCObject(void) const
Get object.
unsigned pos
Position in the src line.
unsigned line
Src line number.
position_type GetLength(void) const
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
const TValue & GetValue(void) const
Return node's value.
const TTreeType * GetParent(void) const
Get node's parent.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
@ ePublication_field_affiliation
@ ePublication_field_pages
@ ePublication_field_pmid
@ ePublication_field_pub_class
@ ePublication_field_issue
@ ePublication_field_date
@ ePublication_field_title
@ ePublication_field_serial_number
@ ePublication_field_authors
@ ePublication_field_volume
@ ePublication_field_journal
@ eSeqtype_constraint_prot
@ eSeqtype_constraint_nuc
@ e_Molinfo
info on the molecule and techniques
@ e_Source
source of materials, includes Org-ref
Lightweight interface for getting lines of data with minimal memory copying.
Functions that resolve field names described in asn format.
Interface class for macro function implementation.
Functions used in the DO/DONE section affecting the top seq-entry.
Source location (points to the position in the original src) All positions are 0 based.