NCBI C++ ToolKit
macro_engine_parallel.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: macro_engine_parallel.cpp 47559 2023-09-01 19:52:26Z lavr $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Anatoly Osipov, Dmitry Rudnev, Andrea Asztalos
27  *
28  * File Description: Macro engine for macro processing
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
49 #include <gui/objutils/snp_gui.hpp>
50 #include <util/line_reader.hpp>
51 #include <future>
52 
53 #undef _TRACE
54 #define _TRACE(arg) ((void)0)
55 
56 /** @addtogroup GUI_MACRO_SCRIPTS_UTIL
57  *
58  * @{
59  */
60 
62 BEGIN_SCOPE(macro)
64 
65 // CMacroResolver
66 bool CMacroResolver::ResolveIdentifier(const string& identifier, CMQueryNodeValue& val, const CQueryParseTree::TNode* parent)
67 {
68  _TRACE("<<<<");
69  _TRACE("Resolving identifier: " << identifier);
70  bool res = false;
71 
72  _TRACE("Trying to get it as node value (IMacroVar) from m_MacroRep: " << m_MacroRep);
73  if (!res && m_MacroRep) {
74  res = m_MacroRep->GetNodeValue(identifier, val);
75  }
76 
77  if (!res) {
78  _TRACE("Trying to get it as an RT Var");
79  res = x_ResolveRTVar(identifier, val, parent);
80  }
81  else
82  _TRACE("Successfully resolved as node value");
83 
84  if (!res) {
85  // resolving variables like WHERE biomol = "peptide", example Autofix_008
86  _TRACE("Trying to get it via ResolveIdentToSimple() from m_DataIter: " << m_DataIter);
87  if (m_DataIter) {
88  CObjectInfo obj = m_DataIter->GetEditedObject();
89  res = ResolveIdentToSimple(obj, identifier, val);
90  }
91  if (res)
92  _TRACE("Successfully resolved via ResolveIdentToSimple() from m_DataIter");
93  }
94 
95  if (!res) {
96  _TRACE("All attempts to resolve failed");
97  val.SetDataType(CMQueryNodeValue::eNotSet);
98  }
99 
100  _TRACE("Identifier resolved successfully: " << res);
101  _TRACE(">>>>");
102  return res;
103 }
104 
105 bool CMacroResolver::x_ResolveRTVar(const string& identifier, CMQueryNodeValue& val, const CQueryParseTree::TNode* parent)
106 {
107  _TRACE("<<<<");
108  _TRACE("Looking for: " << identifier);
109  bool isOK(false);
110 
111  // check for a "." inside the RTVar name --
112  // it may mean that a field of the var is required
113  string sRTVarRoot, sRTVarFields;
114  NStr::SplitInTwo(identifier, ".", sRTVarRoot, sRTVarFields);
115  CRef<CMQueryNodeValue> rt_var = x_LocateRTVar(sRTVarRoot);
116 
117  if (rt_var) {
118  CObjectInfo tmpObj;
119  if (!GetTmpRTVarObject(sRTVarRoot, tmpObj)) {
120  _TRACE("datatype of rt_var: " << rt_var->GetDataType());
121  if (NStr::IsBlank(sRTVarFields)) { // EditStringQual(o, find_text, ...) where o = Resolve("org.taxname");
122  val.SetRef(rt_var);
123  _TRACE("set reference from node: " << rt_var << " to node: " << &v);
124  isOK = true;
125  }
126  else { // EditStringQual("o.taxname", find_text, ...), where o = Resolve("org");
127  if (rt_var->AreObjects()) {
128  const CMQueryNodeValue::TObs& objs = rt_var->GetObjects();
129  if (!objs.empty()) {
130  ITERATE(CMQueryNodeValue::TObs, obj_it, objs) {
131  isOK |= ResolveIdentToObjects(obj_it->field, sRTVarFields, val);
132  }
133  _TRACE("resolved subfields of run-time vars");
134  }
135  else { // if there are no resolved objects
136  val.SetRef(rt_var);
137  _TRACE("set reference from node: " << rt_var << " to node: " << &val);
138  isOK = true;
139  }
140  }
141  else if (rt_var->IsNotSet()) { // when the run-time variable is not set
142  val.SetRef(rt_var);
143  _TRACE("set reference from node: " << rt_var << " to node: " << &val);
144  isOK = true;
145  }
146  }
147  }
148  else if (m_DataIter) { // used in the WHERE clause
149  if (parent && parent->GetValue().GetType() == CQueryParseNode::eFunction) { // EQUALS(o, match_text)
150  isOK = ResolveIdentToObjects(tmpObj, sRTVarFields, val);
151  _TRACE("got value using iterator in form of objects (from tmpObj)");
152  }
153  else { // o.subtype (= "isolate")
154  isOK = GetSimpleTypeValue(tmpObj, sRTVarFields, val);
155  _TRACE("got simple value using iterator");
156  }
157  }
158  _TRACE(">>>> " << isOK);
159  return isOK;
160  }
161  _TRACE("RT Var not found");
162  _TRACE(">>>> false");
163  return false;
164 }
165 
167 {
168  _TRACE("<<<<");
169  _TRACE("Looking for: " << name);
170  if (name.empty())
171  return CRef<CMQueryNodeValue>();
172 
173  auto rt_var = x_LocateRTVar(name);
174  if (rt_var) {
175  _TRACE("found!");
176  _TRACE(">>>>");
177  return rt_var;
178  }
179 
181  m_RTVars[name] = node;
182  _TRACE("created as node: " << node.GetPointerOrNull());
183  _TRACE(">>>>");
184  return node;
185 }
186 
188 {
189  _TRACE("<<<<");
190  _TRACE("Looking for: " << name);
191  /*
192  #ifdef _DEBUG
193  _TRACE("Current m_RTVars snapshot:");
194  ITERATE(TRTVarsMap, iRTVars, m_RTVars) {
195  _TRACE(iRTVars->first);
196  }
197  #endif
198  */
199  auto it = m_RTVars.find(name);
200  if (it != m_RTVars.end()) {
201  _TRACE("found!");
202  _TRACE(">>>>");
203  return (*it).second;
204  }
205  else {
206  _TRACE("not found!");
207  _TRACE(">>>>");
208  return CRef<CMQueryNodeValue>();
209  }
210 }
211 
212 bool CMacroResolver::ExistRTVar(const string& name)
213 {
214  _TRACE("<<<<");
215  _TRACE("Looking for (whether it exists) :" << name);
216  auto it = m_RTVars.find(name);
217  if (it != m_RTVars.end()) {
218  _TRACE("found!");
219  return true;
220  }
221 
222  _TRACE("not found!");
223  return false;
224 }
225 
226 void CMacroResolver::AddTmpRTVarObject(const string& name, CObjectInfo& oi)
227 {
228  m_TempRTVars[name] = oi;
229 }
230 
231 bool CMacroResolver::GetTmpRTVarObject(const string& name, CObjectInfo& oi)
232 {
233  auto it = m_TempRTVars.find(name);
234  if (it != m_TempRTVars.end()) {
235  _TRACE("found!");
236  _TRACE(">>>> true");
237  oi = it->second;
238  return true;
239  }
240  else {
241  _TRACE("not found!");
242  _TRACE(">>>> false");
243  return false;
244  }
245 }
246 
248 {
250 }
251 
252 void CMacroResolver::CallFunction(const string& name, CQueryParseTree::TNode& qnode)
253 {
254  IEditMacroFunction* func = m_FuncContext->ResolveFunctionName(name);
255  if (!func)
257  "Function '" + name + "' not implemented", &qnode);
258 
261 
262  const CQueryParseTree::TNode* parent = qnode.GetParent();
263  if (parent
264  && (parent->GetValue().GetType() == CQueryParseNode::eFunction // used in the Where clause
265  || parent->GetValue().GetType() == CQueryParseNode::eFrom)) { // used in the rhs of the assignment operator
267  }
268  else {
270  }
271 
272  try {
273  (*func)(m_CmdComposite.GetNCObject(), m_DataIter, qnode);
274  }
275  catch (const CMacroDataException &err) {
276  string message("Function ");
277  message += name;
278  message += " failed";
280  }
281  catch (CMacroExecException &error) {
282  // Set the macro position information
283  const CQueryParseNode::SSrcLoc &loc = qnode.GetValue().GetLoc();
284  // SSrcLoc positions are 0 based
285  error.SetLineNo(loc.line + 1);
286  error.SetColumnNo(loc.pos + 1);
287  throw;
288  }
289  catch (const CException& e) {
290  LOG_POST(Info << "Error in calling function " << name << ": " << e.GetMsg());
292  }
293  catch (const std::exception& e) {
294  LOG_POST(Info << "Error in calling function " << name << ": " << e.what());
296  }
297 
298  if (m_MacroStat) {
301  }
302 }
303 
305 {
306  return m_MacroRep->GetAssignmentWhereClause(index);
307 }
308 
309 
310 // IMacroFunctionContext
312 {
314 
315  for (; it != m_BuiltInFunctions.end(); ++it)
316  delete it->second;
317 }
318 
320 {
321  IEditMacroFunction* func = nullptr;
322 
323  auto it = m_BuiltInFunctions.find(name);
324  if (it != m_BuiltInFunctions.end())
325  func = it->second;
326 
327  return func;
328 }
329 
331  CMacroParser::TFunctionNamesList& do_funcs) const
332 {
333  for (auto& iter : m_BuiltInFunctions) {
334  string name = iter.first;
335  IEditMacroFunction* func = iter.second;
336  if (func) {
337  switch (func->GetFuncScope()) {
339  do_funcs.push_back(name);
340  break;
342  where_funcs.push_back(name);
343  break;
345  do_funcs.push_back(name);
346  where_funcs.push_back(name);
347  break;
348  default:
349  break;
350  }
351  }
352  }
353 }
354 
356 {
357 
358  // Regarding Function Names: where clause function names have to be in uppercase! (limitation of qparse lib)
368  m_BuiltInFunctions.emplace(CMacroFunction_SwapRelatedFeaturesQual_Depr::GetFuncName(), new CMacroFunction_SwapRelatedFeaturesQual_Depr(IEditMacroFunction::eDo));
375  m_BuiltInFunctions.emplace(CMacroFunction_ParseToDBLink::GetFuncName(), new CMacroFunction_ParseToDBLink(IEditMacroFunction::eDo));
378  m_BuiltInFunctions.emplace(CMacroFunction_AddUnverified::GetFuncName(), new CMacroFunction_AddUnverified(IEditMacroFunction::eDo));
379 
381  m_BuiltInFunctions.emplace("AECRParseStringQual", new CMacroFunction_AddParsedText(IEditMacroFunction::eDo)); //deprecated
388  m_BuiltInFunctions.emplace(CMacroFunction_Resolve::GetFuncName(), new CMacroFunction_Resolve(IEditMacroFunction::eDo));
389  m_BuiltInFunctions.emplace(CMacroFunction_ResolveBioSourceQuals::GetFuncName(), new CMacroFunction_ResolveBioSourceQuals(IEditMacroFunction::eDo));
390  m_BuiltInFunctions.emplace(CMacroFunction_ResolveNASeq::GetFuncName(), new CMacroFunction_ResolveNASeq(IEditMacroFunction::eDo));
391  m_BuiltInFunctions.emplace(CMacroFunction_LocalID::GetFuncName(), new CMacroFunction_LocalID(IEditMacroFunction::eDo));
393  m_BuiltInFunctions.emplace(CMacroFunction_SetRnaProduct::GetFuncName(), new CMacroFunction_SetRnaProduct(IEditMacroFunction::eDo));
394  m_BuiltInFunctions.emplace(CMacroFunction_RemoveRnaProduct::GetFuncName(), new CMacroFunction_RemoveRnaProduct(IEditMacroFunction::eDo));
395  m_BuiltInFunctions.emplace(CMacroFunction_TrimStringQual::GetFuncName(), new CMacroFunction_TrimStringQual(IEditMacroFunction::eDo));
397  m_BuiltInFunctions.emplace(CMacroFunction_SetStructCommField::GetFuncName(), new CMacroFunction_SetStructCommField(IEditMacroFunction::eDo));
401  m_BuiltInFunctions.emplace(CMacroFunction_RmvRelFeatQual::GetFuncName(), new CMacroFunction_RmvRelFeatQual(IEditMacroFunction::eDo));
402  m_BuiltInFunctions.emplace(CMacroFunction_SetAutodefOption::GetFuncName(), new CMacroFunction_SetAutodefOption(IEditMacroFunction::eDo));
403 
404  m_BuiltInFunctions.emplace(CMacroFunction_FixSourceQualCaps::GetFuncName(), new CMacroFunction_FixSourceQualCaps(IEditMacroFunction::eDo));
407  m_BuiltInFunctions.emplace(CMacroFunction_FixMouseStrain::GetFuncName(), new CMacroFunction_FixMouseStrain(IEditMacroFunction::eDo));
408  m_BuiltInFunctions.emplace(CMacroFunction_EditSubfield::GetFuncName(), new CMacroFunction_EditSubfield(IEditMacroFunction::eDo));
409  m_BuiltInFunctions.emplace(CMacroFunction_RemoveSubfield::GetFuncName(), new CMacroFunction_RemoveSubfield(IEditMacroFunction::eDo));
410 
411  m_BuiltInFunctions.emplace(CMacroFunction_RemoveDescriptor::GetFuncName(), new CMacroFunction_RemoveDescriptor(IEditMacroFunction::eDo));
412  m_BuiltInFunctions.emplace(CMacroFunction_RemoveSequence::GetFuncName(), new CMacroFunction_RemoveSequence(IEditMacroFunction::eDo));
413  m_BuiltInFunctions.emplace(CMacroFunction_RemoveAlignment::GetFuncName(), new CMacroFunction_RemoveAlignment(IEditMacroFunction::eDo));
414  m_BuiltInFunctions.emplace(CMacroFunction_UnculturedTaxLookup::GetFuncName(), new CMacroFunction_UnculturedTaxLookup(IEditMacroFunction::eDo));
415  m_BuiltInFunctions.emplace(CMacroFunction_CulturedTaxLookup::GetFuncName(), new CMacroFunction_CulturedTaxLookup(IEditMacroFunction::eDo));
416  m_BuiltInFunctions.emplace(CMacroFunction_CorrectGeneticCodes::GetFuncName(), new CMacroFunction_CorrectGeneticCodes(IEditMacroFunction::eDo));
417 
418  m_BuiltInFunctions.emplace(CMacroFunction_RemoveModifier::GetFuncName(), new CMacroFunction_RemoveModifier(IEditMacroFunction::eDo));
421  m_BuiltInFunctions.emplace(CMacroFunction_FixUSAandStatesAbbrev::GetFuncName(), new CMacroFunction_FixUSAandStatesAbbrev(IEditMacroFunction::eDo));
423  m_BuiltInFunctions.emplace(CMacroFunction_RemoveLineageSourceNotes::GetFuncName(), new CMacroFunction_RemoveLineageSourceNotes(IEditMacroFunction::eDo));
424 
425  m_BuiltInFunctions.emplace(CMacroFunction_ChangeSeqInst::GetFuncName(), new CMacroFunction_ChangeSeqInst(IEditMacroFunction::eDo));
426 
427  m_BuiltInFunctions.emplace(CMacroFunction_FixAuthorCaps::GetFuncName(), new CMacroFunction_FixAuthorCaps(IEditMacroFunction::eDo));
433 
435  m_BuiltInFunctions.emplace(CMacroFunction_RmvDuplStructComments::GetFuncName(), new CMacroFunction_RmvDuplStructComments(IEditMacroFunction::eDo));
436  m_BuiltInFunctions.emplace(CMacroFunction_ReorderStructComment::GetFuncName(), new CMacroFunction_ReorderStructComment(IEditMacroFunction::eDo));
438  m_BuiltInFunctions.emplace(CMacroFunction_AddProteinTitles::GetFuncName(), new CMacroFunction_AddProteinTitles(IEditMacroFunction::eDo));
439  m_BuiltInFunctions.emplace(CMacroFunction_SetCodonsRecognized::GetFuncName(), new CMacroFunction_SetCodonsRecognized(IEditMacroFunction::eDo));
441  m_BuiltInFunctions.emplace(CMacroFunction_ToUnknownLengthGap::GetFuncName(), new CMacroFunction_ToUnknownLengthGap(IEditMacroFunction::eDo));
442 
445  m_BuiltInFunctions.emplace(CMacroFunction_DOILookup::GetFuncName(), new CMacroFunction_DOILookup(IEditMacroFunction::eDo));
446  m_BuiltInFunctions.emplace(CMacroFunction_GlobalDOILookup::GetFuncName(), new CMacroFunction_GlobalDOILookup(IEditMacroFunction::eDo));
447  m_BuiltInFunctions.emplace(CMacroFunction_ApplyPmidToEntry::GetFuncName(), new CMacroFunction_ApplyPmidToEntry(IEditMacroFunction::eDo));
448  m_BuiltInFunctions.emplace(CMacroFunction_ApplyDOIToEntry::GetFuncName(), new CMacroFunction_ApplyDOIToEntry(IEditMacroFunction::eDo));
453 
454  m_BuiltInFunctions.emplace(CMacroFunction_AddGeneXref::GetFuncName(), new CMacroFunction_AddGeneXref(IEditMacroFunction::eDo));
456  m_BuiltInFunctions.emplace(CMacroFunction_JoinShorttRNAs::GetFuncName(), new CMacroFunction_JoinShorttRNAs(IEditMacroFunction::eDo));
457  m_BuiltInFunctions.emplace(CMacroFunction_TrimStopFromCompleteCDS::GetFuncName(), new CMacroFunction_TrimStopFromCompleteCDS(IEditMacroFunction::eDo));
458  m_BuiltInFunctions.emplace(CMacroFunction_SynchronizeCDSPartials::GetFuncName(), new CMacroFunction_SynchronizeCDSPartials(IEditMacroFunction::eDo));
459  m_BuiltInFunctions.emplace(CMacroFunction_AdjustConsensusSpliceSites::GetFuncName(), new CMacroFunction_AdjustConsensusSpliceSites(IEditMacroFunction::eDo));
460  m_BuiltInFunctions.emplace(CMacroFunction_RetranslateCDS::GetFuncName(), new CMacroFunction_RetranslateCDS(IEditMacroFunction::eDo));
465  m_BuiltInFunctions.emplace(CMacroFunction_RestoreRNAEditing::GetFuncName(), new CMacroFunction_RestoreRNAEditing(IEditMacroFunction::eDo));
466 
468  m_BuiltInFunctions.emplace(CMacroFunction_RemoveInvalidECNumbers::GetFuncName(), new CMacroFunction_RemoveInvalidECNumbers(IEditMacroFunction::eDo));
469  m_BuiltInFunctions.emplace(CMacroFunction_UpdateReplacedECNumbers::GetFuncName(), new CMacroFunction_UpdateReplacedECNumbers(IEditMacroFunction::eDo));
470  m_BuiltInFunctions.emplace(CMacroFunction_UpdatemRNAProduct::GetFuncName(), new CMacroFunction_UpdatemRNAProduct(IEditMacroFunction::eDo));
471  m_BuiltInFunctions.emplace(CMacroFunction_CopyNameToCDSNote::GetFuncName(), new CMacroFunction_CopyNameToCDSNote(IEditMacroFunction::eDo));
472  m_BuiltInFunctions.emplace(CMacroFunction_RemoveFeature::GetFuncName(), new CMacroFunction_RemoveFeature(IEditMacroFunction::eDo));
474  m_BuiltInFunctions.emplace(CMacroFunction_RemoveDuplFeatures::GetFuncName(), new CMacroFunction_RemoveDuplFeatures(IEditMacroFunction::eDo));
475 
480 
484  m_BuiltInFunctions.emplace(CMacroFunction_DiscrepancyAutofix::GetFuncName(), new CMacroFunction_DiscrepancyAutofix(IEditMacroFunction::eDo));
485  m_BuiltInFunctions.emplace(CMacroFunction_TaxLookup::GetFuncName(), new CMacroFunction_TaxLookup(IEditMacroFunction::eDo));
487  m_BuiltInFunctions.emplace(CMacroFunction_ConvertRawToDeltabyNs::GetFuncName(), new CMacroFunction_ConvertRawToDeltabyNs(IEditMacroFunction::eDo));
488  m_BuiltInFunctions.emplace(CMacroFunction_AddGapFeaturesByNs::GetFuncName(), new CMacroFunction_AddGapFeaturesByNs(IEditMacroFunction::eDo));
490 
501 
509 
513 
523  m_BuiltInFunctions.emplace(CMacroFunction_SetPubCitation::GetFuncName(), new CMacroFunction_SetPubCitation(IEditMacroFunction::eDo));
524  m_BuiltInFunctions.emplace(CMacroFunction_SetSerialNumber::GetFuncName(), new CMacroFunction_SetSerialNumber(IEditMacroFunction::eDo));
528  m_BuiltInFunctions.emplace(CMacroFunction_SetPubPMID::GetFuncName(), new CMacroFunction_SetPubPMID(IEditMacroFunction::eDo));
529  m_BuiltInFunctions.emplace(CMacroFunction_SetPubAuthorMI::GetFuncName(), new CMacroFunction_SetPubAuthorMI(IEditMacroFunction::eDo));
530  m_BuiltInFunctions.emplace(CMacroFunction_RemovePubAuthorMI::GetFuncName(), new CMacroFunction_RemovePubAuthorMI(IEditMacroFunction::eDo));
531  m_BuiltInFunctions.emplace(CMacroFunction_RemovePubAuthors::GetFuncName(), new CMacroFunction_RemovePubAuthors(IEditMacroFunction::eDo));
532 
533  m_BuiltInFunctions.emplace(CMacroFunction_ValueFromTable::GetFuncName(), new CMacroFunction_ValueFromTable(IEditMacroFunction::eDo));
535 
536 
537  // printing functions
538  m_BuiltInFunctions.emplace(CMacroFunction_PrintCSV::GetFuncName(), new CMacroFunction_PrintCSV(IEditMacroFunction::eDo));
539  m_BuiltInFunctions.emplace(CMacroFunction_PrintTSV::GetFuncName(), new CMacroFunction_PrintTSV(IEditMacroFunction::eDo));
540  m_BuiltInFunctions.emplace(CMacroFunction_PrintVerbatim::GetFuncName(), new CMacroFunction_PrintVerbatim(IEditMacroFunction::eDo));
541  m_BuiltInFunctions.emplace(CMacroFunction_PrintBankit::GetFuncName(), new CMacroFunction_PrintBankit(IEditMacroFunction::eDo));
542  m_BuiltInFunctions.emplace(CMacroFunction_PrintLiteral::GetFuncName(), new CMacroFunction_PrintLiteral(IEditMacroFunction::eDo));
543 
546  m_BuiltInFunctions.emplace(CMacroFunction_GeneQual::GetFuncName(), new CMacroFunction_GeneQual(IEditMacroFunction::eBoth));
547  m_BuiltInFunctions.emplace(CMacroFunction_StructVoucherPart::GetFuncName(), new CMacroFunction_StructVoucherPart(IEditMacroFunction::eBoth));
550  m_BuiltInFunctions.emplace(CMacroFunction_GetRnaProduct::GetFuncName(), new CMacroFunction_GetRnaProduct(IEditMacroFunction::eBoth));
551 
552  // SNP related functions
553  m_BuiltInFunctions.emplace(CMacroFunction_VariationType::GetFuncName(), new CMacroFunction_VariationType(IEditMacroFunction::eBoth));
554  m_BuiltInFunctions.emplace(CMacroFunction_RefAllele::GetFuncName(), new CMacroFunction_RefAllele(IEditMacroFunction::eBoth));
555  m_BuiltInFunctions.emplace(CMacroFunction_AltAlleles::GetFuncName(), new CMacroFunction_AltAlleles(IEditMacroFunction::eBoth));
556  m_BuiltInFunctions.emplace(CMacroFunction_Consequence::GetFuncName(), new CMacroFunction_Consequence(IEditMacroFunction::eBoth));
557  m_BuiltInFunctions.emplace(CMacroFunction_SnpID::GetFuncName(), new CMacroFunction_SnpID(IEditMacroFunction::eBoth));
558  m_BuiltInFunctions.emplace(CMacroFunction_VcfSevenCol::GetFuncName(), new CMacroFunction_VcfSevenCol(IEditMacroFunction::eDo));
559 
560  m_BuiltInFunctions.emplace(CMacroFunction_ChoiceType::GetFuncName(), new CMacroFunction_ChoiceType(IEditMacroFunction::eWhere));
561  m_BuiltInFunctions.emplace(CMacroFunction_Features_For_Object::GetFuncName(), new CMacroFunction_Features_For_Object(IEditMacroFunction::eWhere));
562  m_BuiltInFunctions.emplace(CMacroFunction_CDSTranslation::GetFuncName(), new CMacroFunction_CDSTranslation(IEditMacroFunction::eWhere));
563  m_BuiltInFunctions.emplace(CMacroFunction_SeqID::GetFuncName(), new CMacroFunction_SeqID(IEditMacroFunction::eWhere));
564  m_BuiltInFunctions.emplace(CMacroFunction_Accession::GetFuncName(), new CMacroFunction_Accession(IEditMacroFunction::eBoth));
565  m_BuiltInFunctions.emplace(CMacroFunction_InconsistentTaxa::GetFuncName(), new CMacroFunction_InconsistentTaxa(IEditMacroFunction::eWhere));
566  m_BuiltInFunctions.emplace(CMacroFunction_InTable::GetFuncName(), new CMacroFunction_InTable(IEditMacroFunction::eWhere));
567 
574  m_BuiltInFunctions.emplace(CMacroFunction_GetDBLink::GetFuncName(), new CMacroFunction_GetDBLink(IEditMacroFunction::eBoth));
577  m_BuiltInFunctions.emplace(CMacroFunction_GetDBXref::GetFuncName(), new CMacroFunction_GetDBXref(IEditMacroFunction::eBoth));
578 
579  // Location constraints
584  m_BuiltInFunctions.emplace(CMacroFunction_LocationStrand::GetFuncName(), new CMacroFunction_LocationStrand(IEditMacroFunction::eBoth));
585  m_BuiltInFunctions.emplace(CMacroFunction_LocationStrandSymbol::GetFuncName(), new CMacroFunction_LocationStrandSymbol(IEditMacroFunction::eBoth));
591 
594  m_BuiltInFunctions.emplace(CMacroFunction_Contained::GetFuncName(), new CMacroFunction_Contained(IEditMacroFunction::eWhere));
595  m_BuiltInFunctions.emplace(CMacroFunction_GeneType::GetFuncName(), new CMacroFunction_GeneType(IEditMacroFunction::eBoth));
596  m_BuiltInFunctions.emplace(CMacroFunction_Label::GetFuncName(), new CMacroFunction_Label(IEditMacroFunction::eBoth));
597 
598  // Both WHERE and DO clause functions:
599  // String constraints
614 
615  m_BuiltInFunctions.emplace(CMacroFunction_StringLength::GetFuncName(), new CMacroFunction_StringLength(IEditMacroFunction::eBoth));
617  m_BuiltInFunctions.emplace(CMacroFunction_RelatedFeatures::GetFuncName(), new CMacroFunction_RelatedFeatures(IEditMacroFunction::eBoth));
618  m_BuiltInFunctions.emplace(CMacroFunction_StructCommField::GetFuncName(), new CMacroFunction_StructCommField(IEditMacroFunction::eBoth));
619  m_BuiltInFunctions.emplace(CMacroFunction_StructCommDatabase::GetFuncName(), new CMacroFunction_StructCommDatabase(IEditMacroFunction::eBoth));
620  m_BuiltInFunctions.emplace(CMacroFunction_StructCommFieldname::GetFuncName(), new CMacroFunction_StructCommFieldname(IEditMacroFunction::eBoth));
623  m_BuiltInFunctions.emplace(CMacroFunction_IllegalDbXref::GetFuncName(), new CMacroFunction_IllegalDbXref(IEditMacroFunction::eBoth));
624 
625 
626  // Publication fields
640 }
641 
642 
644 {
646  m_BuiltInFunctions.emplace(CMacroFunction_Resolve::GetFuncName(), new CMacroFunction_Resolve(IEditMacroFunction::eDo));
649  m_BuiltInFunctions.emplace(CMacroFunction_LocationStrand::GetFuncName(), new CMacroFunction_LocationStrand(IEditMacroFunction::eBoth));
650  m_BuiltInFunctions.emplace(CMacroFunction_LocationStrandSymbol::GetFuncName(), new CMacroFunction_LocationStrandSymbol(IEditMacroFunction::eBoth));
651  m_BuiltInFunctions.emplace(CMacroFunction_SeqID::GetFuncName(), new CMacroFunction_SeqID(IEditMacroFunction::eWhere));
652  m_BuiltInFunctions.emplace(CMacroFunction_Accession::GetFuncName(), new CMacroFunction_Accession(IEditMacroFunction::eBoth));
653  m_BuiltInFunctions.emplace(CMacroFunction_GeneType::GetFuncName(), new CMacroFunction_GeneType(IEditMacroFunction::eBoth));
654  m_BuiltInFunctions.emplace(CMacroFunction_Label::GetFuncName(), new CMacroFunction_Label(IEditMacroFunction::eBoth));
655 
656  // SNP related functions
657  m_BuiltInFunctions.emplace(CMacroFunction_VariationType::GetFuncName(), new CMacroFunction_VariationType(IEditMacroFunction::eBoth));
658  m_BuiltInFunctions.emplace(CMacroFunction_RefAllele::GetFuncName(), new CMacroFunction_RefAllele(IEditMacroFunction::eBoth));
659  m_BuiltInFunctions.emplace(CMacroFunction_AltAlleles::GetFuncName(), new CMacroFunction_AltAlleles(IEditMacroFunction::eBoth));
660  m_BuiltInFunctions.emplace(CMacroFunction_Consequence::GetFuncName(), new CMacroFunction_Consequence(IEditMacroFunction::eBoth));
661  m_BuiltInFunctions.emplace(CMacroFunction_SnpID::GetFuncName(), new CMacroFunction_SnpID(IEditMacroFunction::eBoth));
662  m_BuiltInFunctions.emplace(CMacroFunction_VcfSevenCol::GetFuncName(), new CMacroFunction_VcfSevenCol(IEditMacroFunction::eDo));
663 
664  // printing functions
665  m_BuiltInFunctions.emplace(CMacroFunction_PrintCSV::GetFuncName(), new CMacroFunction_PrintCSV(IEditMacroFunction::eDo));
666  m_BuiltInFunctions.emplace(CMacroFunction_PrintTSV::GetFuncName(), new CMacroFunction_PrintTSV(IEditMacroFunction::eDo));
667  m_BuiltInFunctions.emplace(CMacroFunction_PrintVerbatim::GetFuncName(), new CMacroFunction_PrintVerbatim(IEditMacroFunction::eDo));
668  m_BuiltInFunctions.emplace(CMacroFunction_PrintBankit::GetFuncName(), new CMacroFunction_PrintBankit(IEditMacroFunction::eDo));
669  m_BuiltInFunctions.emplace(CMacroFunction_PrintLiteral::GetFuncName(), new CMacroFunction_PrintLiteral(IEditMacroFunction::eDo));
670 
671 }
672 
673 ///////////////////////////////////////////////////////////////////////////////
674 /// class CMacroEngineParallel
675 ///
676 
678 {
679  return x_AppendToLibrary(filename, lib);
680 }
681 
683 {
685 
686  string filetext;
687  try {
688  CRef<ILineReader> line_reader(ILineReader::New(filename));
689  while (!line_reader->AtEOF()) {
690  line_reader->ReadLine();
691  filetext += line_reader->GetCurrentLine();
692  filetext += "\n";
693  }
694  }
695  catch (const CException& ex) {
696  ERR_POST(ex.ReportAll());
697  m_ParsingInfo.SetError(string("Problem with reading file:") + filename, 0, 0);
698  return m_ParsingInfo.m_Status;
699  }
700 
701  CMacroParser parser;
702  vector<CRef<CMacroRep>> parsed_vec;
703  try {
704  x_SetFunctions(parser);
705  parser.SetSource(filetext.c_str());
706 
707  while (parser.Parse(false)) {
708  CRef<CMacroRep> mr(parser.DetachMacroRep());
709  parsed_vec.push_back(mr);
710  }
711  m_ParsingInfo.m_Status = true;
712  }
713  catch (const CMacroParseException& ex) {
714  string msg = (ex.GetMsg().empty()) ? ex.what() : ex.GetMsg();
715  m_ParsingInfo.SetError(msg, 0, 0);
716  return m_ParsingInfo.m_Status;
717  }
718 
719  if (m_ParsingInfo.m_Status) {
720  // save the macro in the library only when all steps have been successfully parsed
721  lib.reserve(lib.size() + parsed_vec.size());
722  lib.insert(lib.end(), parsed_vec.begin(), parsed_vec.end());
723  }
724 
725  return m_ParsingInfo.m_Status;
726 }
727 
728 bool CMacroEngineParallel::ReadAndParseMacros(const string& filename, vector<CRef<CMacroRep>>& macro_list)
729 {
731 
732  string filetext;
733  try {
734  CRef<ILineReader> line_reader(ILineReader::New(filename));
735  while (!line_reader->AtEOF()) {
736  line_reader->ReadLine();
737  filetext += line_reader->GetCurrentLine();
738  filetext += "\n";
739  }
740  }
741  catch (const CException& ex) {
742  ERR_POST(ex.ReportAll());
743  m_ParsingInfo.SetError(string("Problem with reading file:") + filename, 0, 0);
744  return m_ParsingInfo.m_Status;
745  }
746 
747  CMacroParser parser;
748  macro_list.resize(0);
749  try {
750  x_SetFunctions(parser);
751  parser.SetSource(filetext.c_str());
752  while (parser.Parse(false)) {
753  CRef<CMacroRep> mr(parser.DetachMacroRep());
754  macro_list.push_back(mr);
755  }
756 
757  m_ParsingInfo.m_Status = true;
758  }
759  catch (const CMacroParseException& ex) {
760  // if there is an error, none of the macros will be read
761  macro_list.resize(0);
762 
763  string msg = (ex.GetMsg().empty()) ? ex.what() : ex.GetMsg();
764  m_ParsingInfo.SetError(msg, 0, 0);
765  return m_ParsingInfo.m_Status;
766  }
767 
768  return m_ParsingInfo.m_Status;
769 }
770 
771 CMacroRep* CMacroEngineParallel::Parse(const string& macro_text)
772 {
774 
775  if (macro_text.empty()) {
776  m_ParsingInfo.SetError("The macro is empty", 0, 0);
777  return nullptr;
778  }
779 
780  CMacroParser parser;
781  try {
782  x_SetFunctions(parser);
783  parser.SetSource(macro_text.c_str());
784  parser.Parse();
785  return parser.DetachMacroRep();
786  }
787  catch (const CMacroParseException& ex) {
788  string msg = (ex.GetMsg().empty()) ? ex.what() : ex.GetMsg();
789  m_ParsingInfo.SetError(msg, 0, 0);
790  }
791 
792  return nullptr;
793 }
794 
797  bool throw_on_error, CNcbiOstream* ostream)
798 {
800 
801  // Using SFeatInterval for single thread yields slower run times,
802  // so we use the sequence range alone to create the iterator
804  macro_rep.GetForEachString(),
805  macro_rep.GetNamedAnnot(),
806  macro_rep.GetSeqRange(),
807  ostream));
808 
809  if (!data_iter) {
810  exec_info.m_Status = false;
811  exec_info.m_ErrorMessage = "Missing FOR EACH statement or incorrect selector specified or incorrect NA provided";
812  exec_info.m_Line = 0; // TBD: implement error location
813  exec_info.m_Column = 0; // TBD: implement error location
814  if (throw_on_error) {
815  string msg;
816  if (macro_rep.GetNamedAnnot().empty()) {
817  msg = "Missing FOR EACH statement, or unsupported selector";
818  }
819  else {
820  msg = "Incorrect NA provided or missing FOR EACH statement or unsupported selector specified";
821  }
823  }
824  return exec_info;
825  }
826 
827  CMacroExec macro_exec;
828  CMacroResolver resolver(macro_rep, data_iter, CmdComposite, context, stat);
829  bool evaluateDo = true;
830 
831  try {
832  data_iter->Begin();
833  // m_MacroStat.AddToReport("Processing features count: " + NStr::NumericToString(m_DataIter->GetCount()) + "\n");
834  // m_MacroStat.AddToReport("Best description: " + m_DataIter->GetBestDescr() + "\n");
835  while (!data_iter->IsEnd()) {
836  evaluateDo = true;
837 
838  CQueryParseTree* tree = macro_rep.GetWhereClause();
839  if (tree) {
840  evaluateDo = false;
841 
842  macro_exec.EvaluateTree(*tree, resolver, true);
843  if (!macro_exec.IsBoolType() && !macro_exec.IsNotSetType())
845  "Wrong type of computed WHERE clause", tree->GetQueryTree());
846 
847  if (macro_exec.IsBoolType() && macro_exec.GetBoolValue()) {
848  evaluateDo = true;
849  }
850  else {
851  data_iter->Next();
852  }
853  }
854 
855  if (evaluateDo) {
856  tree = macro_rep.GetDoTree();
857  _ASSERT(tree);
858  data_iter->BuildEditedObject();
859  // make all the necessary changes to the asn selector object and
860  // execute commands on the dependent objects
861  macro_exec.EvaluateTree(*tree, resolver, false);
862 
863  if (data_iter->IntendToDelete()) {
864  data_iter->RunDeleteCommand(CmdComposite);
865  }
866  else {
867  if (data_iter->IsModified()) {
868  data_iter->RunEditCommand(CmdComposite);
869  }
870  data_iter->Next();
871  }
872  }
873  }
874  exec_info.m_Status = true;
875 
876  int nr_nonmatched = 0;
877  const string msg = data_iter->GetUnMatchedTableEntries(nr_nonmatched);
878  stat.GatherUnMatchedTableEntries(msg, nr_nonmatched);
879  }
880  catch (const CMacroExecException& ex) {
881  exec_info.m_Status = false;
882 
883  exec_info.m_Line = ex.GetLineNo();
884  exec_info.m_Column = ex.GetColumnNo();
885  exec_info.m_ErrorMessage = ex.GetMsg();
886  if (exec_info.m_ErrorMessage.empty()) {
887  exec_info.m_ErrorMessage = ex.what();
888  }
889 
890  CNcbiOstrstream oss;
891  oss << "[Error] Line " << exec_info.m_Line << ", Pos " << exec_info.m_Column;
892  oss << ": " << exec_info.m_ErrorMessage;
893  exec_info.m_ErrorMessage.assign(CNcbiOstrstreamToString(oss));
894 
895  if (throw_on_error)
896  throw;
897  }
898 
899  return exec_info;
900 }
901 
902 // Designed to work specifically with printing macro functions - read-only operations on the asn data
903 static stringstream s_ExecuteInParallel(const CMacroRep& macro_rep, const CMacroBioData& data, const SFeatInterval& feat_interval,
904  CRef<CMacroCmdComposite> CmdComposite, CRef<CMacroStat>& stat, bool throw_on_error)
905 {
907  // This error information is currently not returned to the main thread
908  // In case of an error, an exception is thrown
909 
910  stringstream ss;
912  macro_rep.GetForEachString(),
913  macro_rep.GetNamedAnnot(),
914  feat_interval,
915  &ss));
916 
917  if (!data_iter) {
918  exec_info.m_Status = false;
919  exec_info.m_ErrorMessage = "Missing FOR EACH statement or incorrect selector specified or incorrect NA provided";
920  exec_info.m_Line = 0; // TBD: implement error location
921  exec_info.m_Column = 0; // TBD: implement error location
922  if (throw_on_error) {
923  string msg;
924  if (macro_rep.GetNamedAnnot().empty()) {
925  msg = "Missing FOR EACH statement, or unsupported selector";
926  }
927  else {
928  msg = "Incorrect NA provided or missing FOR EACH statement or unsupported selector specified";
929  }
931  }
932  return ss;
933  }
934 
935  CMacroExec macro_exec;
937  CMacroResolver resolver(macro_rep, data_iter, CmdComposite, func_context, *stat);
938  bool evaluateDo = true;
939 
940  // CMacroContextForListing is restricted to non-editing macro functions
941  // If editing macro functions are included, be careful using CmdComposite as this is shared data among threads
942  // For now, CmdComposite is not used
943 
944  try {
945  data_iter->Begin();
946  // m_MacroStat.AddToReport("Processing features count: " + NStr::NumericToString(m_DataIter->GetCount()) + "\n");
947  // m_MacroStat.AddToReport("Best description: " + m_DataIter->GetBestDescr() + "\n");
948  while (!data_iter->IsEnd()) {
949  evaluateDo = true;
950 
951  CQueryParseTree* tree = macro_rep.GetWhereClause();
952  // the tree would be shared among the threads if we used a single macro_rep for all threads
953  // results of function evaluations are stored in the nodes of the tree (IQueryMacroUserObject)
954  if (tree) {
955  evaluateDo = false;
956 
957  macro_exec.EvaluateTree(*tree, resolver, true);
958  if (!macro_exec.IsBoolType() && !macro_exec.IsNotSetType())
960  "Wrong type of computed WHERE clause", tree->GetQueryTree());
961 
962  if (macro_exec.IsBoolType() && macro_exec.GetBoolValue()) {
963  evaluateDo = true;
964  }
965  else {
966  data_iter->Next();
967  }
968  }
969 
970  if (evaluateDo) {
971  tree = macro_rep.GetDoTree();
972  _ASSERT(tree);
973  data_iter->BuildEditedObject();
974  // make all the necessary changes to the asn selector object and
975  // execute commands on the dependent objects
976  macro_exec.EvaluateTree(*tree, resolver, false);
977 
978  if (data_iter->IntendToDelete()) {
979  data_iter->RunDeleteCommand(CmdComposite);
980  }
981  else {
982  if (data_iter->IsModified()) {
983  data_iter->RunEditCommand(CmdComposite);
984  }
985  data_iter->Next();
986  }
987  }
988  }
989  exec_info.m_Status = true;
990  }
991  catch (const CMacroExecException& ex) {
992  exec_info.m_Status = false;
993 
994  exec_info.m_Line = ex.GetLineNo();
995  exec_info.m_Column = ex.GetColumnNo();
996  exec_info.m_ErrorMessage = ex.GetMsg();
997  if (exec_info.m_ErrorMessage.empty()) {
998  exec_info.m_ErrorMessage = ex.what();
999  }
1000 
1001  CNcbiOstrstream oss;
1002  oss << "[Error] Line " << exec_info.m_Line << ", Pos " << exec_info.m_Column;
1003  oss << ": " << exec_info.m_ErrorMessage;
1004  exec_info.m_ErrorMessage.assign(CNcbiOstrstreamToString(oss));
1005 
1006  if (throw_on_error)
1007  throw;
1008  }
1009 
1010  return ss;
1011 }
1012 
1013 
1014 /* Assumptions made for parallel execution:
1015 
1016  - no editing function is used in the DO-DONE section
1017  --> initialize the macroengine object with the set of functions that are acceptable to be used
1018  with parallel execution (CMacroContextForListing)
1019 
1020  - for now, parallel execution is implemented for 'feature ranges' (SV-4624)
1021  [left, right) - interpreted as features that straddle the 'open' boundary of the interval
1022  are NOT included in the interval. ObjectManager's selectors/iterators always include all features
1023  that straddle left and right boundaries of an interval. There is no flag to modify this behavior.
1024  - the top seq-entry needs to be a bioseq
1025  - the FOR EACH selector needs to be a feature selector
1026  - Sequence range (in the form of RANGE [start, stop]) does not necessarily need to be specified
1027 
1028  - parallel execution is explicitly requested in the macro via the DO_P keyword (instead of the Do keyword)
1029 
1030 */
1031 
1032 bool CMacroEngineParallel::Exec(const CMacroRep& macro_rep, const CMacroBioData& data,
1033  CRef<CMacroCmdComposite> CmdComposite, bool throw_on_error, CNcbiOstream* ostream)
1034 {
1035 /*
1036 #ifdef _DEBUG
1037  macro_rep.Print(NcbiCout);
1038 #endif
1039 */
1040 
1041  m_MacroStat.Reset();
1042  m_MacroStat.LogStart(macro_rep.GetName());
1043  SMacroInfo exec_info;
1044 
1045  unsigned thread_count = x_GetThreadCount(macro_rep, data);
1046  _ASSERT(thread_count > 0);
1047 
1048  if (thread_count == 1) {
1049  // By passing 'm_EngineFuncContext' to this function we assure that
1050  // any kind of macros can be executed in a single thread mode
1051  exec_info = s_ExecuteSingleThread(macro_rep, data, CmdComposite, m_EngineFuncContext, m_MacroStat, throw_on_error, ostream);
1052  }
1053  else {
1054  bool is_data_bioseq = data.GetTSE().IsSeq();
1055  if (!is_data_bioseq) {
1056  exec_info.m_Status = false;
1057  exec_info.m_ErrorMessage = "For parallel execution, top seq-entry is expected to be a bioseq";
1058  if (throw_on_error)
1059  throw;
1060  }
1061 
1062  bool is_feat_selector = data.s_IsFeatSelector(macro_rep.GetForEachString());
1063  if (!is_feat_selector) {
1064  exec_info.m_Status = false;
1065  exec_info.m_ErrorMessage = "For parallel execution, FOR EACH selector is expected to be a feature selector";
1066  if (throw_on_error)
1067  throw;
1068  }
1069 
1070  CBioseq_Handle bsh = data.GetTSE().GetSeq();
1071  _ASSERT(bsh);
1072  TSeqRange total_range = macro_rep.GetSeqRange();
1073  if ((total_range.GetFrom() == total_range.GetTo()) && (total_range.GetFrom() == 0)) {
1074  total_range.SetTo(bsh.GetBioseqLength());
1075  }
1076 
1077  vector<future<stringstream>> futures(thread_count);
1078  vector<thread> threads(thread_count); // default threads, with no task attached to them
1079  CJoinThreads joiner(threads);
1080 
1081  // make as many copies of CMacroStat-s and CMacroRep-s as many threads
1082  // otherwise, these are shared data
1083  vector<CRef<CMacroStat>> stats;
1084  stats.reserve(thread_count);
1085  for (unsigned i = 0; i < thread_count; ++i) {
1086  stats.emplace_back(new CMacroStat());
1087  }
1088 
1089  vector<CRef<CMacroRep>> reps_for_threads;
1090  reps_for_threads.reserve(thread_count);
1091  for (unsigned i = 0; i < thread_count; ++i) {
1092  // as CMacroRep and CQueryParseTree classes don't have copy constructors,
1093  // the macro needs to be parsed to create new CMacroRep instances
1094  reps_for_threads.push_back(CRef<CMacroRep>(Parse(macro_rep.GetSource())));
1095  }
1096 
1097  unsigned chunk_size = total_range.GetLength() / thread_count;
1098  TSeqPos start = total_range.GetFrom();
1099 
1100  using TTaskType = stringstream(const CMacroRep&, const CMacroBioData&, const SFeatInterval&,
1102 
1103  for (unsigned i = 0; i < thread_count; ++i) {
1104  TSeqPos stop = min(total_range.GetTo(), start + chunk_size + 1);
1105 
1106  SFeatInterval feat_int(TSeqRange(start, stop));
1107  if (start == total_range.GetFrom()) {
1108  // the first interval is slightly different:
1109  feat_int.left_closed = true;
1110  feat_int.right_closed = true;
1111  LOG_POST(Info << "Thread " << i << ": [" << start << ", " << stop << "]");
1112  }
1113  else {
1114  feat_int.left_closed = false;
1115  feat_int.right_closed = true;
1116  LOG_POST(Info << "Thread " << i << ": (" << start << ", " << stop << "]");
1117  }
1118 
1119  packaged_task<TTaskType> task(s_ExecuteInParallel);
1120 
1121  futures[i] = task.get_future(); // hold the future for the tasks's promise
1122 
1123  threads[i] = thread(std::move(task), cref(reps_for_threads[i].GetObject()), cref(data), feat_int,
1124  ref(CmdComposite), ref(stats[i]), throw_on_error);
1125 
1126  start = stop;
1127  }
1128 
1129  // keep results in a temporary stream in case one of the threads throws an exception
1130  // otherwise, the output file might contain partial information
1131  stringstream result_stream;
1132 
1133  for (auto&& it : futures) {
1134  result_stream << it.get().str();
1135  // a call to get() rethrows the exception
1136  }
1137 
1138  // if no exception was thrown, copy it to the output stream
1139  *ostream << result_stream.str();
1140 
1141  for (auto&& stat_it : stats) {
1142  auto report = stat_it->GetMacroReport();
1143  if (!report.GetLog().empty()) {
1144  LOG_POST(Info << report.GetName() << ":\n" << report.GetLog());
1145  }
1146  }
1147  }
1148 
1149 
1150 #ifdef _DEBUG
1151  m_MacroStat.LogStop(exec_info.m_Status, exec_info.m_ErrorMessage);
1152 #endif
1153  return exec_info.m_Status;
1154 }
1155 
1157 {
1158  CMacroParser::TFunctionNamesList where_funcs, do_funcs;
1159  m_EngineFuncContext->GetFunctionNames(where_funcs, do_funcs);
1160  parser.SetFunctionNames(where_funcs, do_funcs);
1161 }
1162 
1163 unsigned CMacroEngineParallel::x_GetThreadCount(const CMacroRep& macro_rep, const CMacroBioData& data)
1164 {
1165  const string& thread_str = macro_rep.GetThreadCount();
1166  unsigned thread_count = 0;
1167 
1168  if (NStr::EqualNocase(thread_str, CMacroParser::sm_Automatic)) {
1169  if (data.GetTSE().IsSeq() && data.s_IsFeatSelector(macro_rep.GetForEachString())) {
1170  CBioseq_Handle bsh = data.GetTSE().GetSeq();
1171  _ASSERT(bsh);
1172  TSeqRange total_range = macro_rep.GetSeqRange();
1173  if ((total_range.GetFrom() == total_range.GetTo()) && (total_range.GetFrom() == 0)) {
1174  total_range.SetTo(bsh.GetBioseqLength());
1175  }
1176 
1178  LOG_POST(Info << "Starting to get number of SNP Features");
1179  size_t feat_nrs = NSnpAnnot::EstimateSNPCount(total_range, macro_rep.GetNamedAnnot(), bsh);
1180 
1181  if (feat_nrs < 100'000) {
1182  thread_count = 1;
1183  }
1184  else if (feat_nrs <= 5'000'000) {
1185  thread_count = 10;
1186  }
1187  else {
1188  thread_count = 16;
1189  }
1190  LOG_POST(Info << "Number of SNP Features: " << feat_nrs);
1191  LOG_POST(Info << "Number of threads to be used is " << thread_count);
1192  }
1193  else {
1194  thread_count = 10;
1195  LOG_POST(Info << "Number of threads to be used is " << thread_count);
1196  }
1197  }
1198  }
1199  else {
1200  thread_count = NStr::StringToInt(thread_str);
1201  }
1202  return thread_count;
1203 }
1204 
1205 END_SCOPE(macro)
1207 
1208 /* @} */
CBioseq_Handle –.
Subclass of the IQueryParseUserObject which is held as the user-defined object in each CQueryParseNod...
Definition: macro_exec.hpp:71
The following asn-selectors are defined to be used in the FOR EACH statement:
class CMacroExecException
Definition: macro_ex.hpp:146
class CMacroExecException
Definition: macro_ex.hpp:196
Subclass of CQueryExec that adds: 1) Macro identifiers resolution 2) Where clause evaluation.
Definition: macro_exec.hpp:319
Macro exception.
Definition: macro_ex.hpp:55
Class provides macro language interface for bulk bio-editing.
Class for parsed macro representation.
Definition: macro_rep.hpp:254
CMacroResolver Variable and function resolver Provides interfaces for:
CMacroStat - collecting statistics about a single macro.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObjectInfo –.
Definition: objectinfo.hpp:597
Query tree and associated utility methods.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
definition of a Culling tree
Definition: ncbi_tree.hpp:100
Base class for any user function that performs editing operations on ASN.1 data.
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
static const int chunk_size
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define DIAG_COMPILE_INFO
Make compile time diagnostic information object to use in CNcbiDiag and CException.
Definition: ncbidiag.hpp:170
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
Definition: ncbiexpt.cpp:370
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
static const char * sm_BsrcForSeq
static CTempString GetFuncName()
static const char * sm_Stop
vector< CRef< CMacroRep > > TMacroLibrary
Definition: macro_lib.hpp:58
void GetFunctionNames(CMacroParser::TFunctionNamesList &where_funcs, CMacroParser::TFunctionNamesList &do_funcs) const
static const char * sm_Automatic
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
CRef< CMacroCmdComposite > m_CmdComposite
SMacroInfo m_ParsingInfo
Status, error message related to parsing a macro.
virtual void ResetTmpRTVarObjects()
static CTempString GetFuncName()
IMacroBioDataIter * CreateIterator(const string &selector, const string &named_annot, const TSeqRange &range, CNcbiOstream *ostr=nullptr) const
Create specific iterator as a subclass of IMacroBioDataIter.
bool Parse(bool bSingleMacroMode=true, CQueryExec *exec=NULL)
Parse the macro and build its representation Function throws CMacroException if parsing is unsuccessf...
static const char * sm_BsrcForSeqdesc
static const char * sm_PubIssue
static const char * sm_SeqNa
class CMacroFunction_LocationSeqType IS_SEQ_AA(["location"]) - returns true if the sequence identifie...
static CTempString GetFuncName()
EScopeEnum GetFuncScope()
bool GetNodeValue(const string &name, IQueryMacroUserObject &v) const
Function fills in IQueryMacroUserObject-derived node out of stored variable.
Definition: macro_rep.cpp:497
void AddToReport(const string &report)
function gradually builds the log report
void x_SetFunctions(CMacroParser &parser)
Initialize do/where lists of functions in parser.
void GatherUnMatchedTableEntries(const string &report, const int count)
Used when applying a table to the entry: it stores the values and number of table entries that were n...
static CTempString GetFuncName()
static stringstream s_ExecuteInParallel(const CMacroRep &macro_rep, const CMacroBioData &data, const SFeatInterval &feat_interval, CRef< CMacroCmdComposite > CmdComposite, CRef< CMacroStat > &stat, bool throw_on_error)
static const char * sm_SeqForFeat
static CTempString GetFuncName()
void ThrowCMacroExecException(const CDiagCompileInfo &info, CMacroExecException::EErrCode code, const string &message, const CQueryParseTree::TNode *treeNode, const CException *previous=nullptr)
Throws CMacroExecException with the specified message and error location from the TNode.
Definition: macro_ex.hpp:279
void SetSource(const char *sMacroText)
Reset the parser and sets macro text to be parsed.
virtual bool ResolveIdentifier(const string &identifier, CMQueryNodeValue &val, const CQueryParseTree::TNode *parent)
Resolve identifier and store the value in @val.
static CTempString GetFuncName()
void SetError(const string &message, Uint4 line, Uint4 column)
static const char * sm_FunctionName
class CMacroFunction_SetPubField SetPub_Date(year, month, day, season, hour, minute,...
bool m_Status
status of the activity (parsing or execution)
static CTempString GetFuncName()
static CTempString GetFuncName()
const string & GetForEachString() const
Return "for each" string.
Definition: macro_rep.hpp:359
static CTempString GetFuncName()
static const char * sm_FunctionName
class CMacroFunction_ApplyPublication SetPub_Sub(author_field_name, author_field_value) Apply new pub...
void EvaluateTree(CQueryParseTree &Qtree, IResolver &resolver, bool query_tree, bool case_sensitive=false)
Evaluates tree.
Definition: macro_exec.cpp:519
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_PubJournal
const string & GetErrorReport() const
static const char * sm_SeqForDescr
class CMacroFunction_Sequence_For_Seqdesc SEQUENCE_FOR_SEQDESC(field_name) SEQUENCE_FOR_SEQFEAT(field...
static CTempString GetFuncName()
bool AreObjects() const
Definition: macro_exec.hpp:152
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
CMacroRep * DetachMacroRep()
Detach macro representation for futher processing Caller is responsible for deletion of returned obje...
static const char * sm_PartialStart
class CMacroFunction_LocPartialTest - tests whether the location is 5'/3' partial ISPARTIALSTART() - ...
bool IsBoolType() const
Definition: macro_exec.hpp:339
list< SResolvedField > TObs
Definition: macro_exec.hpp:92
static CTempString GetFuncName()
Uint4 m_Column
location of problem within the macro script
static CTempString GetFuncName()
static const char * sm_PubSerialNumber
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_MolinfoForFeat
bool GetSimpleTypeValue(CObjectInfo &oi, const string &field_name, CMQueryNodeValue &value)
Get single node data from the node specified by parameter of type CObjectInfo and additionally field ...
static const char * sm_FuncVolume
class CMacroFunction_SetPubVolIssuePages SetPubVolume(newValue, existing_text, delimiter,...
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
const TObs & GetObjects() const
Definition: macro_exec.hpp:144
unsigned x_GetThreadCount(const CMacroRep &macro_rep, const CMacroBioData &data)
static const char * sm_PubAuthors
void LogStart(const string &macro_name)
log start of macro execution
CIRef< IMacroBioDataIter > m_DataIter
bool ResolveIdentToObjects(const CObjectInfo &oi, const string &identifier, CMQueryNodeValue &v)
Resolve name to the list of objects.
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_FromStart
class CMacroFunction_LocationDistConstraint DISTFROMSTART() [>][=][<] distance DISTFROMSTOP()
static CTempString GetFuncName()
CConstIRef< IMacroFunctionContext > m_FuncContext
unsigned GetColumnNo(void) const
Returns the column number where error occurred.
Definition: macro_ex.hpp:253
static CTempString GetFuncName()
static CTempString GetFuncName()
bool ReadAndParseMacros(const string &filename, vector< CRef< CMacroRep >> &macro_list)
Parse a file containing macros and store them in a vector.
bool Exec(const CMacroRep &macro_rep, const CMacroBioData &data, CRef< CMacroCmdComposite > CmdComposite, bool throw_on_error=false, CNcbiOstream *ostream=nullptr)
Execute a macro.
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
void LogStop(bool status, const string &err_message)
log end of macro execution this includes time of execution, status, count of changed qualifiers and a...
CQueryParseTree * GetAssignmentWhereClause(int index) const
Definition: macro_rep.cpp:280
static CTempString GetFuncName()
bool IsNotSet() const
Definition: macro_exec.hpp:153
static CTempString GetFuncName()
static const char * sm_SNP
EType GetDataType() const
Definition: macro_exec.hpp:122
static CTempString GetFuncName()
static const char * sm_First
CMacroFunction_FirstItem FIRSTOF(objects) - returns the first item from the list of objects LASTOF(ob...
CQueryParseTree::TFunctionNames TFunctionNamesList
Type for the list of functions in Where/Do clauses.
void SetFunctionNames(const TFunctionNamesList &wh_funcs, const TFunctionNamesList &do_funcs)
initializes lists of known functions that should be recognized by the parser when going through WHERE...
objects::CSeq_entry_Handle GetTSE() const
static CTempString GetFuncName()
static CTempString GetFuncName()
const TSeqRange & GetSeqRange() const
Return sequence range.
Definition: macro_rep.hpp:363
static const char * sm_BsrcForFeat
const string & GetNamedAnnot() const
Return "from" named annotation.
Definition: macro_rep.hpp:361
static CTempString GetFuncName()
static CTempString GetFuncName()
static bool s_IsFeatSelector(const string &selector)
virtual bool GetTmpRTVarObject(const string &name, CObjectInfo &oi)
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_PubVolume
virtual bool ExistRTVar(const string &name)
Return true if the run-time variable, identified by its name, is defined.
static CTempString GetFuncName()
static CTempString GetFuncName()
virtual CQueryParseTree * GetAssignmentWhereClause(int index) const
Return the "where" sub-query from the DO-DONE section identified by the index in the main parsed tree...
const string & GetName() const
Return macro name.
Definition: macro_rep.hpp:354
static CTempString GetFuncName()
virtual CRef< CMQueryNodeValue > GetOrCreateRTVar(const string &name)
Get or create run-time variable.
static CTempString GetFuncName()
CMacroStat m_MacroStat
Most recent executed macro statistics.
bool IsNotSetType() const
Check/get functions result from the top node after calculation.
Definition: macro_exec.hpp:338
static CTempString GetFuncName()
CRef< CMQueryNodeValue > x_LocateRTVar(const string &identifier)
Return data associated with the RT variable based on its name.
static CTempString GetFuncName()
static CTempString GetFuncName()
CConstRef< CMacroRep > m_MacroRep
bool ResolveIdentToSimple(const CObjectInfo &oi, const string &identifier, CMQueryNodeValue &v)
Resolve name to simple types value.
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_PubStatus
static CTempString GetFuncName()
void SetNestedState(ENestedFunc type)
const string & GetFuncReport() const
Function extracts statistic from the object.
static CTempString GetFuncName()
static CMacroEngineParallel::SMacroInfo s_ExecuteSingleThread(const CMacroRep &macro_rep, const CMacroBioData &data, CRef< CMacroCmdComposite > CmdComposite, CConstIRef< IMacroFunctionContext > context, CMacroStat &stat, bool throw_on_error, CNcbiOstream *ostream)
static CTempString GetFuncName()
static CTempString GetFuncName()
bool x_AppendToLibrary(const string &filename, CMacroLib::TMacroLibrary &lib)
static CTempString GetFuncName()
static const char * sm_PubTitle
class CMacroFunction_PubFields PUB_TITLE(), PUB_ISSUE(), PUB_AFFIL(subfield) - returns a list of CObj...
static CTempString GetFuncName()
static CTempString GetFuncName()
CIRef< IMacroFunctionContext > m_EngineFuncContext
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
TBuiltInFunctionsMap m_BuiltInFunctions
unsigned GetLineNo(void) const
Returns the line number where error occurred.
Definition: macro_ex.hpp:247
static CTempString GetFuncName()
virtual void CallFunction(const string &name, CQueryParseTree::TNode &qnode)
Function call the function specified by name, passing args as parameters and it can update nv as a re...
static const char * sm_PubPMID
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_PubDate
static const char * sm_PubAffil
CMacroRep * Parse(const string &macro_text)
Parse the macro script into its binary representation.
static CTempString GetFuncName()
CQueryParseTree * GetDoTree() const
Return "do" clause.
Definition: macro_rep.hpp:367
static CTempString GetFuncName()
static const char * sm_BsrcForMolinfo
class CMacroFunction_GetSeqdesc BIOSOURCE_FOR_MOLINFO(field_name) or BIOSOURCE_FOR_MOLINFO(container,...
static const char * sm_PubCit
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
void Reset()
reset the macro name, qualifier and iteration counts
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static const char * sm_FunctionName
EditRelatedFeatureQual(feat_type, field_name, find_text, repl_text, location, case_sensitive,...
TTempRTVarsMap m_TempRTVars
bool GetBoolValue() const
Definition: macro_exec.hpp:340
static CTempString GetFuncName()
static const char * sm_PubPages
IEditMacroFunction * ResolveFunctionName(const string &name) const
Return pointer to macro function identified by its name.
bool AppendToLibrary(const string &filename, CMacroLib::TMacroLibrary &lib)
Parse a file containing macros and append the results to the map that stores these macros.
static CTempString GetFuncName()
CQueryParseTree * GetWhereClause() const
Return "where" clause.
Definition: macro_rep.hpp:365
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
void AddToErrorReport(const string &error)
store encountered errors
static CTempString GetFuncName()
static const char * sm_PubClass
const string & GetSource() const
Definition: macro_rep.hpp:334
bool x_ResolveRTVar(const string &identifier, CMQueryNodeValue &val, const CQueryParseTree::TNode *parent)
Return the value of Run-Time (RT) variable.
virtual void AddTmpRTVarObject(const string &name, CObjectInfo &oi)
used together with the Assignment Operator (CMQueryFunctionAssignment)
static const char * sm_Start
class CMacroFunction_LocEnd Start() and Stop() - return the positional extremes of a location
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
static CTempString GetFuncName()
const string & GetThreadCount() const
Return number of threads.
Definition: macro_rep.hpp:369
static const char * sm_MolinfoForBsrc
static size_t EstimateSNPCount(const TSeqRange &range, const string &sAnnotName, CBioseq_Handle &Handle)
Definition: snp_gui.cpp:139
@ eUnknown
Definition: app_popup.hpp:72
CTempString GetCurrentLine(void) const
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
Definition: line_reader.cpp:49
void ReadLine(void)
Definition: line_reader.hpp:88
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
TSeqPos GetBioseqLength(void) const
TObjectType * GetNCPointerOrNull(void) const THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:1162
TObjectType * GetPointerOrNull(void) THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:986
TObjectType & GetNCObject(void) const
Get object.
Definition: ncbiobj.hpp:1187
unsigned pos
Position in the src line.
unsigned line
Src line number.
@ eFunction
Function.
Definition: query_parse.hpp:91
position_type GetLength(void) const
Definition: range.hpp:158
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define USING_SCOPE(ns)
Use the specified namespace.
Definition: ncbistl.hpp:78
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define kEmptyStr
Definition: ncbistr.hpp:123
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3550
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5352
const TValue & GetValue(void) const
Return node's value.
Definition: ncbi_tree.hpp:184
const TTreeType * GetParent(void) const
Get node's parent.
Definition: ncbi_tree.hpp:139
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
@ ePublication_field_affiliation
@ ePublication_field_cit
@ ePublication_field_pages
@ ePublication_field_pmid
@ ePublication_field_pub_class
@ ePublication_field_issue
@ ePublication_field_date
@ ePublication_field_title
@ ePublication_field_serial_number
@ ePublication_field_authors
@ ePublication_field_volume
@ ePublication_field_journal
@ eSeqtype_constraint_prot
@ eSeqtype_constraint_nuc
@ e_Sub
submission
Definition: Pub_.hpp:103
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
int i
Lightweight interface for getting lines of data with minimal memory copying.
#define _TRACE(arg)
Macro exceptions.
Functions that resolve field names described in asn format.
Interface class for macro function implementation.
Functions used in the DO/DONE section affecting the top seq-entry.
Macro parser components.
T min(T x_, T y_)
Source location (points to the position in the original src) All positions are 0 based.
#define _ASSERT
Modified on Wed Nov 29 02:20:26 2023 by modify_doxy.py rev. 669887