NCBI C++ ToolKit
feature_filter.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id:
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Liangshou Wu
27  *
28  * File Description:
29  * Implementation for feature filter query execution
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbiobj.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbistr.hpp>
42 
43 
45 
46 static const char* kVariantQuality = "variant_quality";
47 static const char* kPilot = "pilot";
48 static const char* kClinicalAssertion = "clinical_assertion";
49 static const char* kConcordant = "concordant";
50 static const char* kDiscordant = "discordant";
51 static const char* kSamplesetType = "sampleset_type";
52 static const char* kValidationStatus = "validation_status";
53 static const char* kFrom = "from";
54 static const char* kTo = "to";
55 static const char* kIntronReads = "reads";
56 
57 ///////////////////////////////////////////////////////////////////////////////
58 /// class CFeatureFilter implementation
59 ///
61 {
62 }
63 
64 
65 void CFeatureFilter::Init(const string& filter)
66 {
67  if (filter.empty()) return;
68 
73  AddId("allele_type", CQueryParseNode::eString);
74  AddId("variant_type", CQueryParseNode::eString);
83 
84 
85  string filter_str = filter;
86  NStr::ReplaceInPlace(filter_str, " eq ", "=");
87  NStr::ReplaceInPlace(filter_str, " gt ", ">");
88  NStr::ReplaceInPlace(filter_str, " ge ", ">=");
89  NStr::ReplaceInPlace(filter_str, " lt ", "<");
90  NStr::ReplaceInPlace(filter_str, " le ", "<=");
91  try {
92  // If query tree parsing is case sensitive, keywords like AND and LIKE
93  // have to be uppercase. For tree queries, cas sensitivity will just
94  // refer to string comparisons for each node.
97 
98  //CNcbiOstrstream strstrm;
99  //m_Qtree.Print(strstrm);
100 
101  //// Logging merges lines (even if i call
102  //// UnsetDiagPostFlag(eDPF_PreMergeLines/eDPF_MergeLines);
103  //LOG_POST(Info << "Parsed Query: " << filter_str);
104  //vector<string> arr;
105  //string s = (string)CNcbiOstrstreamToString(strstrm);
106  //NStr::Split(s, "\n", arr);
107  //for (size_t i=0; i<arr.size(); ++i) {
108  // LOG_POST(Info << arr[i]);
109  //}
110 
111  // do preprocessing/initialization/validation ob query tree
114 
115  m_NeedFiltering = true;
116 
117  } catch (CFeatFilterQueryException& e) {
118  LOG_POST(Info << "Error parsing query: " << e.GetMsg());
119  return;
120  }
121 
122  // Logical operators:
128 
129  // Constants:
134 
135  // Comparison operators:
144 
145 }
146 
147 
148 bool CFeatureFilter::Pass(const objects::CMappedFeat* feat)
149 {
150  m_EvalFeat = feat;
151  try {
152  Evaluate(m_Qtree);
154  CSGQueryNodeValue* v =
155  dynamic_cast<CSGQueryNodeValue*>((*top_node)->GetUserObject());
156  _ASSERT(v);
158  v->m_Bool) {
159  return true;
160  }
161 
162  } catch (CQueryParseException&) {
163  // Could avoid flooding log since many may be identical, but
164  // maybe the last one is the one you need to see (for debugging...)
165  // _TRACE("Query execution error: " << e.GetMsg());
166  } catch (CFeatFilterQueryException& ex) {
167  // Could avoid flooding log since many may be identical, but
168  // maybe the last one is the one you need to see (for debugging...)
169  LOG_POST(Info << "Query execution error: " << ex.GetMsg());
170  }
171 
172 
173  return false;
174 }
175 
176 
178 {
179  m_IdDict[id] = type;
180 }
181 
182 
184 {
185  return &m_IdDict;
186 }
187 
188 
190 {
191  return m_NeedFiltering;
192 }
193 
194 
196  bool& value)
197 {
198  if(!m_EvalFeat) {
199  NCBI_THROW(CFeatFilterQueryException, eNullFeature,
200  "Error: NULL feature evaluation");
201 
202  }
203  const objects::CSeq_feat& feat = m_EvalFeat->GetOriginalFeature();
204 
205  if (feat.GetData().IsClone()) {
206  if (identifier == kConcordant) {
207  if (feat.GetData().GetClone().IsSetConcordant()) {
208  value = feat.GetData().GetClone().GetConcordant();
209  } else {
210  value = false;
211  }
212  } else if (identifier == kDiscordant) {
213  if (feat.GetData().GetClone().IsSetConcordant()) {
214  value = !feat.GetData().GetClone().GetConcordant();
215  } else {
216  value = false;
217  }
218  }
219  } else {
220  return false;
221  }
222 
223  return true;
224 }
225 
226 
228  Int8& value)
229 {
230  value = 0;
231  if (!m_EvalFeat)
232  NCBI_THROW(CFeatFilterQueryException, eNullFeature, "Error: NULL feature evaluation");
233 
234  if (identifier == "len") {
235  const objects::CSeq_loc& loc = m_EvalFeat->GetLocation();
236  if (loc.GetId()) {
237  value = loc.GetTotalRange().GetLength();
238  return true;
239  }
240  } else if (identifier == kFrom) {
241  const objects::CSeq_loc& loc = m_EvalFeat->GetLocation();
242  if (loc.GetId()) {
243  value = loc.GetTotalRange().GetFrom();
244  return true;
245  }
246  } else if (identifier == kTo) {
247  const objects::CSeq_loc& loc = m_EvalFeat->GetLocation();
248  if (loc.GetId()) {
249  value = loc.GetTotalRange().GetTo();
250  return true;
251  }
252  } else if (identifier == kIntronReads) {
253  for (const auto& iter : m_EvalFeat->GetOriginalFeature().GetExts()) {
254  if (iter->GetType().IsStr() &&
255  iter->GetType().GetStr() == "Support" &&
256  iter->GetFieldRef("Total") &&
257  !iter->GetFieldRef("Total").IsNull()) {
258  const auto& d = iter->GetFieldRef("Total")->GetData();
259  if (d.IsInt())
260  value = d.GetInt();
261  else if (d.IsReal())
262  value = float(d.GetReal() + 0.5);
263  return true;
264  }
265  }
266  }
267  return false;
268 }
269 
270 
273 {
274  if(!m_EvalFeat) {
275  NCBI_THROW(CFeatFilterQueryException, eNullFeature,
276  "Error: NULL feature evaluation");
277  }
278  const objects::CSeq_feat& feat = m_EvalFeat->GetOriginalFeature();
279  if (identifier == kVariantQuality) {
280  value = feat.GetNamedQual("Variant Quality");
281  if (value.empty()) {
282  value = "high";
283  }
284  } else if (identifier == kPilot) {
285  value = feat.GetNamedQual("Pilot");
286  } else if (identifier == kClinicalAssertion &&
287  feat.GetData().IsVariation()) {
288  const objects::CVariation_ref& var = feat.GetData().GetVariation();
289  ITERATE (objects::CVariation_ref::TPhenotype, pnt_iter, var.GetPhenotype()) {
290  if ((*pnt_iter)->CanGetClinical_significance()) {
291  switch ((*pnt_iter)->GetClinical_significance()) {
292  case objects::CPhenotype::eClinical_significance_non_pathogenic:
293  value = "benign";
294  break;
295  case objects::CPhenotype::eClinical_significance_pathogenic:
296  value = "pathogenic";
297  break;
298  case objects::CPhenotype::eClinical_significance_probable_pathogenic:
299  value = "likely_pathogenic";
300  break;
301  case objects::CPhenotype::eClinical_significance_probable_non_pathogenic:
302  value = "likely_benign";
303  break;
304  case objects::CPhenotype::eClinical_significance_unknown:
305  value = "likely_unknown";
306  break;
307  case objects::CPhenotype::eClinical_significance_untested:
308  value = "not_tested";
309  break;
310  case objects::CPhenotype::eClinical_significance_other:
311  default:
312  value = "other";
313  break;
314  }
315  break;
316  }
317  }
318  if (value.empty()) {
319  value = "other";
320  }
321  } else if (identifier == kSamplesetType &&
322  feat.GetData().IsVariation()) {
323  value = feat.GetNamedQual("sampleset_type");
324  } else if (identifier == kValidationStatus && feat.GetData().IsVariation()) {
325  value = "other";
326  if (feat.IsSetExts()) {
327  const objects::CSeq_feat::TExts& exts = feat.GetExts();
328  ITERATE (objects::CSeq_feat::TExts, iter, exts) {
329  if ( (*iter)->GetType().IsStr() &&
330  NStr::EqualNocase((*iter)->GetType().GetStr(), "Validation") &&
331  (*iter)->GetFieldRef("Status") &&
332  (*iter)->GetFieldRef("Status")->GetData().IsStr()) {
333  value = (*iter)->GetFieldRef("Status")->GetData().GetStr();
334  break;
335  }
336  }
337  }
338  } else {
339  return false;
340  }
341  return true;
342 }
343 
344 
346 {
347  return m_IdDict.count(id) > 0;
348 }
349 
350 ///////////////////////////////////////////////////////////////////////////////
351 /// class CSGQueryNodeValue implementation
352 ///
354 {
355  // we only do data type converstion to int, float, bool and string, and
356  // not data field node
359  IsDataField()) {
360  return false;
361  }
362 
363  bool success = true;
364  CQueryParseNode::EType src_type = (*m_Node)->GetType();
365  switch (type) {
367  {{
368  switch (src_type) {
370  m_Int = (*m_Node)->GetInt();
371  break;
373  m_Int = (Int8)(*m_Node)->GetDouble();
374  break;
376  m_Int = (Int8)(*m_Node)->GetBool();
377  break;
378  default:
379  // treat it as a string
380  try {
381  m_Int = NStr::StringToInt8((*m_Node)->GetStrValue());
382  } catch (CException&) {
383  // fail to convert
384  success = false;
385  }
386  break;
387  }
388  }}
389  break;
391  {{
392  switch (src_type) {
394  m_Double = (double)(*m_Node)->GetInt();
395  break;
397  m_Double = (*m_Node)->GetDouble();
398  break;
400  // no bool to double
401  success = false;
402  break;
403  default:
404  // treat it as a string
405  try {
406  m_Double = NStr::StringToDouble((*m_Node)->GetStrValue());
407  } catch (CException&) {
408  // fail to convert
409  success = false;
410  }
411  break;
412  }
413  }}
414  break;
416  {{
417  switch (src_type) {
419  m_Bool = (bool)(*m_Node)->GetInt();
420  break;
422  // no double to bool
423  success = false;
424  break;
426  m_Bool = (*m_Node)->GetBool();
427  break;
428  default:
429  // treat it as a string
430  try {
431  m_Bool = NStr::StringToBool((*m_Node)->GetStrValue());
432  } catch (CException&) {
433  // fail to convert
434  success = false;
435  }
436  break;
437  }
438  }}
439  break;
441  default:
442  {{
443  try {
444  switch (src_type) {
446  m_String = NStr::Int8ToString((*m_Node)->GetInt());
447  break;
449  m_String = NStr::DoubleToString((*m_Node)->GetDouble());
450  break;
452  m_String = NStr::BoolToString((*m_Node)->GetBool());
453  break;
454  default:
455  m_String = (*m_Node)->GetStrValue();
456  break;
457  }
458  } catch (CException&) {
459  // fail to convert
460  success = false;
461  }
462  }}
463  break;
464 
465  }
466 
467  if ( !success ) {
469  } else {
470  SetDataType(type);
471  }
472 
473  return success;
474 }
475 
476 
477 ///////////////////////////////////////////////////////////////////////////////
478 /// class CSGQueryNodePreprocessor implementation
479 ///
481  : m_Dictionary(d)
482 {}
483 
486 {
487  if (delta == 0 || delta == 1) {
488  // If node has children, we skip it and process on the way back
489  if (!tr.IsLeaf()) {
490  return eTreeTraverse;
491  }
492  }
493 
494  CSGQueryNodeValue* v = new CSGQueryNodeValue(&tr);
495  tr->SetUserObject(v);
496  CQueryParseNode::EType node_type = tr->GetType();
497 
498  if (tr.IsLeaf()) {
499  // leaf node will be initialized by it parent if it is not
500  // an verifed identifier node
501  const string& id = tr->GetOriginalText();
503  if (iter != m_Dictionary->end()) {
504  v->SetDataType(iter->second);
505  v->SetDataField(true);
506  }
507  return eTreeTraverse;
508  }
509 
510  bool valid = false;
511  int child_num = tr.CountNodes();
512 
513  // all no-leaf node is in bool type
515 
516  if (node_type == CQueryParseNode::eAnd ||
517  node_type == CQueryParseNode::eOr ||
518  node_type == CQueryParseNode::eSub ||
519  node_type == CQueryParseNode::eXor ||
520  node_type == CQueryParseNode::eNot) {
521  // verify having correct number of child nodes
522  if ( (node_type == CQueryParseNode::eNot && child_num != 1) ||
523  (node_type != CQueryParseNode::eNot && child_num != 2) ) {
524  NCBI_THROW(CFeatFilterQueryException, eWrongArgumentCount,
525  "Incorrect query node count");
526  }
527 
529  while (iter != tr.SubNodeEnd()) {
530  CQueryParseNode& sub_qnode = (*iter)->GetValue();
531  IQueryParseUserObject* uo = sub_qnode.GetUserObject();
532  CSGQueryNodeValue* sub_v = dynamic_cast<CSGQueryNodeValue*>(uo);
533  _ASSERT(sub_v);
534 
535  if ( !sub_v->IsValid() ) {
536  // no need to do further check at all.
537  continue;
538  }
539 
540  // All child nodes must be eBoolConst. If not and it is leaf node
541  // with const value, we will do data conversion. Otherwise, we
542  // mark it as invalid.
543  if ((*iter)->IsLeaf() && !sub_v->IsDataField()) {
545  }
546 
547  if (sub_v->GetDataType() != CQueryParseNode::eBoolConst) {
548  sub_v->SetValid(false);
549  } else {
550  // For logic node, it if valid if there is at least one of its
551  // child nodes is valid. We simply ingnore it other invlid nodes
552  // during query execution.
553  valid = true;
554  }
555  ++iter;
556  }
557  } else if (node_type == CQueryParseNode::eLike ||
558  node_type == CQueryParseNode::eBetween ||
559  node_type == CQueryParseNode::eIn ||
560  node_type == CQueryParseNode::eEQ ||
561  node_type == CQueryParseNode::eGT ||
562  node_type == CQueryParseNode::eGE ||
563  node_type == CQueryParseNode::eLT ||
564  node_type == CQueryParseNode::eLE) {
565 
566  // assume valid first, but if any child node is invalid,
567  // it will be marked as invalid
568  valid = true;
569 
570  // verify having correct number of child nodes
571  if ( (node_type == CQueryParseNode::eBetween && child_num != 3) ||
572  (node_type == CQueryParseNode::eIn && child_num < 2) ||
573  (( (node_type >= CQueryParseNode::eEQ &&
574  node_type <= CQueryParseNode::eLE) ||
575  node_type == CQueryParseNode::eLike ) && child_num != 2)) {
576  valid = false;
577  NCBI_THROW(CFeatFilterQueryException, eWrongArgumentCount,
578  "Incorrect query node count");
579  }
580 
581  // verify all child has the same type
583  CSGQueryNodeValue* primary_v =
584  dynamic_cast<CSGQueryNodeValue*>((*iter)->GetValue().GetUserObject());
585  _ASSERT(primary_v);
586 
587  if ( !primary_v->IsDataField() ) {
588  NCBI_THROW(CFeatFilterQueryException, eIncorrectNodeType,
589  "The first argument has to be an indentifier");
590  }
591 
592  CQueryParseNode::EType p_type = primary_v->GetDataType();
593  if (node_type == CQueryParseNode::eLike &&
594  p_type != CQueryParseNode::eString) {
595  valid = false;
596  }
597 
598  if (node_type == CQueryParseNode::eBetween &&
599  p_type == CQueryParseNode::eBoolConst) {
600  valid = false;
601  }
602 
603  while (valid && ++iter != tr.SubNodeEnd()) {
604  CQueryParseNode& sub_qnode = (*iter)->GetValue();
605  IQueryParseUserObject* uo = sub_qnode.GetUserObject();
606  CSGQueryNodeValue* sub_v = dynamic_cast<CSGQueryNodeValue*>(uo);
607  _ASSERT(sub_v);
608 
609  // All other child nodes must use p_type. If not, we will do data
610  // conversion.
611  if ((*iter)->IsLeaf()) {
612  sub_v->PromoteTo(p_type);
613  // all other sub_node can't be a data field
614  sub_v->SetDataField(false);
615  }
616 
617  if (sub_v->GetDataType() != p_type) {
618  valid = false;
619  }
620  }
621 
622  // mark all children as invlid if the parent node is invalid
623  if ( !valid ) {
624  for (iter = tr.SubNodeBegin(); iter != tr.SubNodeEnd(); ++iter) {
625  CQueryParseNode& sub_qnode = (*iter)->GetValue();
626  IQueryParseUserObject* uo = sub_qnode.GetUserObject();
627  CSGQueryNodeValue* sub_v = dynamic_cast<CSGQueryNodeValue*>(uo);
628  _ASSERT(sub_v);
629 
630  sub_v->SetValid(false);
631  }
632  }
633  }
634 
635  v->SetValid(valid);
636 
637  return eTreeTraverse;
638 }
639 
640 
641 ///////////////////////////////////////////////////////////////////////////////
642 /// class CSGQueryFunctionValue
643 ///
645 {
646  CSGQueryNodeValue* v =
647  dynamic_cast<CSGQueryNodeValue*>(node->GetUserObject());
648  _ASSERT(v);
649 
650  if (v->IsValid() && v->IsDataField()) {
651  const string& id = node->GetStrValue();
652  // udpate the data field
653  switch (v->GetDataType()) {
656  break;
659  break;
662  break;
665  break;
666  default:
667  break;
668  }
669  }
670 }
671 
672 
673 ///////////////////////////////////////////////////////////////////////////////
674 /// class CSGQueryFunctionLogic
675 ///
677 {
678  CSGQueryNodeValue* v =
679  dynamic_cast<CSGQueryNodeValue*>(node->GetUserObject());
680  _ASSERT(v);
681 
682  if ( !v->IsValid() ) {
683  return;
684  }
685 
686  v->m_Bool = false;
687 
689  this->MakeArgVector(node, args);
690  CQueryParseNode::EType node_type = node->GetType();
691 
692  // check if it is a binary operator:
693  if (node_type == CQueryParseNode::eAnd ||
694  node_type == CQueryParseNode::eOr ||
695  node_type == CQueryParseNode::eSub ||
696  node_type == CQueryParseNode::eXor) {
697 
698  // Since this is a logical operator, both arguments must be the
699  // boolean result of a previous expression or promotable to
700  // boolean. No need to do type promotion for this.
701 
702  CSGQueryNodeValue* sub_v1 =
703  dynamic_cast<CSGQueryNodeValue*>((*args[0])->GetUserObject());
704  CSGQueryNodeValue* sub_v2 =
705  dynamic_cast<CSGQueryNodeValue*>((*args[1])->GetUserObject());
706  _ASSERT(sub_v1);
707  _ASSERT(sub_v2);
708 
709  // preprocessor gurantees at least one of them is valid
710  if ( !sub_v1->IsValid() ) {
711  v->m_Bool = sub_v2->m_Bool;
712  } else if ( !sub_v2->IsValid() ) {
713  v->m_Bool = sub_v1->m_Bool;
714  } else {
715  switch (node_type) {
717  v->m_Bool = sub_v1->m_Bool && sub_v2->m_Bool;
718  break;
720  v->m_Bool = sub_v1->m_Bool || sub_v2->m_Bool;
721  break;
723  v->m_Bool = sub_v1->m_Bool && !sub_v2->m_Bool;
724  break;
726  v->m_Bool = sub_v1->m_Bool != sub_v2->m_Bool;
727  break;
728  default:
729  break;
730  }
731  }
732 
733  if (node->IsNot()) {
734  v->m_Bool = !v->m_Bool;
735  }
736 
737  } else if (node_type == CQueryParseNode::eNot) {
738  CSGQueryNodeValue* sub_v =
739  dynamic_cast<CSGQueryNodeValue*>((*args[0])->GetUserObject());
740  _ASSERT(sub_v);
741 
742  v->m_Bool = !sub_v->m_Bool;
743 
744  } else {
745  NCBI_THROW(CFeatFilterQueryException, eInvalidQuery,
746  "Error: Unexpected logical operand:" + node->GetOriginalText());
747  }
748 }
749 
750 
751 ///////////////////////////////////////////////////////////////////////////////
752 /// class CSGQueryFunctionCompare
753 ///
755 {
756  CSGQueryNodeValue* v =
757  dynamic_cast<CSGQueryNodeValue*>(node->GetUserObject());
758  _ASSERT(v);
759 
760  if ( !v->IsValid() ) {
761  return;
762  }
763 
764  v->m_Bool = false;
765 
767  this->MakeArgVector(node, args);
768 
769  // get operation type
770  CQueryParseNode::EType node_type = node->GetType();
771 
772  // handle all the binary comparisons
773  if (node_type == CQueryParseNode::eEQ ||
774  node_type == CQueryParseNode::eGT ||
775  node_type == CQueryParseNode::eGE ||
776  node_type == CQueryParseNode::eLT ||
777  node_type == CQueryParseNode::eLE ||
778  node_type == CQueryParseNode::eLike) {
779 
780  CSGQueryNodeValue* sub_v1 =
781  dynamic_cast<CSGQueryNodeValue*>((*args[0])->GetUserObject());
782  CSGQueryNodeValue* sub_v2 =
783  dynamic_cast<CSGQueryNodeValue*>((*args[1])->GetUserObject());
784  _ASSERT(sub_v1);
785  _ASSERT(sub_v2);
786 
788  switch (node_type) {
791  v->m_Bool = sub_v1->m_Bool == sub_v2->m_Bool;
792  } else if (value_type == CQueryParseNode::eIntConst) {
793  v->m_Bool = sub_v1->m_Int == sub_v2->m_Int;
795  v->m_Bool = sub_v1->m_Double == sub_v2->m_Double;
796  } else if (value_type == CQueryParseNode::eString) {
797  v->m_Bool = NStr::EqualNocase(sub_v1->m_String, sub_v2->m_String);
798  }
799  if (node->IsNot())
800  v->m_Bool = !v->m_Bool;
801  break;
802 
805  v->m_Bool = NStr::MatchesMask(sub_v1->m_String.c_str(),
806  sub_v2->m_String.c_str(), NStr::eNocase);
807  }
808  if (node->IsNot()) {
809  v->m_Bool = !v->m_Bool;
810  }
811  break;
812 
815  v->m_Bool = sub_v1->m_Bool > sub_v2->m_Bool;
816  } else if (value_type == CQueryParseNode::eIntConst) {
817  v->m_Bool = sub_v1->m_Int > sub_v2->m_Int;
819  v->m_Bool = sub_v1->m_Double > sub_v2->m_Double;
820  } else if (value_type == CQueryParseNode::eString) {
821  int result = NStr::Compare(sub_v1->m_String.c_str(),
822  sub_v2->m_String.c_str(), NStr::eNocase);
823  v->m_Bool = result > 0;
824  }
825  break;
826 
829  v->m_Bool = sub_v1->m_Bool >= sub_v2->m_Bool;
830  } else if (value_type == CQueryParseNode::eIntConst) {
831  v->m_Bool = sub_v1->m_Int >= sub_v2->m_Int;
833  v->m_Bool = sub_v1->m_Double >= sub_v2->m_Double;
834  } else if (value_type == CQueryParseNode::eString) {
835  int result = NStr::Compare(sub_v1->m_String.c_str(),
836  sub_v2->m_String.c_str(), NStr::eNocase);
837  v->m_Bool = result >= 0;
838  }
839  break;
840 
843  v->m_Bool = sub_v1->m_Bool < sub_v2->m_Bool;
844  } else if (value_type == CQueryParseNode::eIntConst) {
845  v->m_Bool = sub_v1->m_Int < sub_v2->m_Int;
847  v->m_Bool = sub_v1->m_Double < sub_v2->m_Double;
848  } else if (value_type == CQueryParseNode::eString) {
849  int result = NStr::Compare(sub_v1->m_String.c_str(),
850  sub_v2->m_String.c_str(), NStr::eNocase);
851  v->m_Bool = result < 0;
852  }
853  break;
854 
857  v->m_Bool = sub_v1->m_Bool <= sub_v2->m_Bool;
858  } else if (value_type == CQueryParseNode::eIntConst) {
859  v->m_Bool = sub_v1->m_Int <= sub_v2->m_Int;
861  v->m_Bool = sub_v1->m_Double <= sub_v2->m_Double;
862  } else if (value_type == CQueryParseNode::eString) {
863  int result = NStr::Compare(sub_v1->m_String.c_str(),
864  sub_v2->m_String.c_str(), NStr::eNocase);
865  v->m_Bool = result <= 0;
866  }
867  break;
868 
869  default:
870  // We already check for this wih previous 'if' so should not happen
871  break;
872  }
873 
874  } else if (node_type == CQueryParseNode::eBetween) { // handle the 'between' operator
875  // Set is_between to true if the value is eiter >=sub_v2 and <=res3.
876 
877  CSGQueryNodeValue* sub_v1 =
878  dynamic_cast<CSGQueryNodeValue*>((*args[0])->GetUserObject());
879  CSGQueryNodeValue* sub_v2 =
880  dynamic_cast<CSGQueryNodeValue*>((*args[1])->GetUserObject());
881  CSGQueryNodeValue* sub_v3 =
882  dynamic_cast<CSGQueryNodeValue*>((*args[2])->GetUserObject());
883  _ASSERT(sub_v1);
884  _ASSERT(sub_v2);
885  _ASSERT(sub_v3);
886 
888 
889  bool is_between = false;
891  is_between = sub_v1->m_Int >= sub_v2->m_Int;
893  is_between = sub_v1->m_Double >= sub_v2->m_Double;
894  } else if (value_type == CQueryParseNode::eString) {
895  int result = NStr::Compare(sub_v1->m_String.c_str(),
896  sub_v2->m_String.c_str(),
897  NStr::eNocase);
898  is_between = (result >= 0);
899  }
900 
901  // If we passed first test (sub_v1>=sub_v2), try second test (sub_v1<=sub_v3):
902  if (is_between) {
904  is_between = sub_v1->m_Int <= sub_v3->m_Int;
906  is_between = sub_v1->m_Double <= sub_v3->m_Double;
907  } else if (value_type == CQueryParseNode::eString) {
908  int result = NStr::Compare(sub_v1->m_String.c_str(),
909  sub_v3->m_String.c_str(),
910  NStr::eNocase);
911  is_between = (result <= 0);
912  }
913  }
914  v->m_Bool = is_between;
915 
916  } else if (node_type == CQueryParseNode::eIn) { // handle the 'In' operator
917  // Get underlying data and data type. Since there may be any number of operands,
918  // iterate through them and check each time if they need to be promoted.
919  // This allows the user to mix types between the parms, e.g. : dist IN (0.57, "", 0.56)
920  // without throwing an exception.
921  CSGQueryNodeValue* sub_v1 =
922  dynamic_cast<CSGQueryNodeValue*>((*args[0])->GetUserObject());
923  _ASSERT(sub_v1);
925 
926  bool found = false;
927  for (size_t i = 1; i < args.size() && !found; ++i) {
928  CSGQueryNodeValue* sub_v2 =
929  dynamic_cast<CSGQueryNodeValue*>((*args[i])->GetUserObject());
930  _ASSERT(sub_v2);
931 
933  found = (sub_v1->m_Bool == sub_v2->m_Bool);
934  } else if (value_type == CQueryParseNode::eIntConst) {
935  found = (sub_v1->m_Int == sub_v2->m_Int);
937  found = (sub_v1->m_Double == sub_v2->m_Double);
938  } else if (value_type == CQueryParseNode::eString) {
939  int result = NStr::Compare(sub_v1->m_String.c_str(),
940  sub_v2->m_String.c_str(),
941  NStr::eNocase);
942  found = (result == 0);
943  }
944  }
945  v->m_Bool = found;
946 
947  } else {
948  NCBI_THROW(CFeatFilterQueryException, eInvalidQuery,
949  "Error - Unhandled comparison operator: " + node->GetOriginalText());
950  }
951 }
952 
953 
955 
956 
957 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Query parser exceptions class CFeatFilterQueryException.
TIdentifierDict m_IdDict
Identifier dictionary.
const TIdentifierDict * GetIdDictionary() const
virtual bool ResolveIdentifier(const std::string &identifier, bool &value)
The following functions search in the feature asn for a value with the name 'identifier' and,...
bool NeedFiltering() const
bool Pass(const objects::CMappedFeat *feat)
CFeatureFilter()
class CFeatureFilter implementation
const objects::CMappedFeat * m_EvalFeat
Evaluated object.
void Init(const string &filter)
CQueryParseTree m_Qtree
void AddId(const string &id, CQueryParseNode::EType type)
virtual bool HasIdentifier(const std::string &id)
Search for 'identifier' in nodes feature list and true if it exists.
Query parser exceptions.
Query node class.
Definition: query_parse.hpp:79
class CSGQueryFunctionCompare
virtual void Evaluate(CQueryParseTree::TNode &node)
class CSGQueryFunctionCompare
class CSGQueryFunctionLogic
virtual void Evaluate(CQueryParseTree::TNode &node)
class CSGQueryFunctionLogic
class CSGQueryFunctionValue
virtual void Evaluate(CQueryParseTree::TNode &node)
class CSGQueryFunctionValue
class CSGQueryNodePreprocessor
TIdentifierDict * m_Dictionary
CSGQueryNodePreprocessor(TIdentifierDict *d)
class CSGQueryNodePreprocessor implementation
ETreeTraverseCode operator()(CTreeNode< CQueryParseNode > &tr, int delta)
class CQueryNodeValue
Int8 m_Int
Int data, if data was an integer or converted into one.
bool PromoteTo(CQueryParseNode::EType type)
promote this value to the given type.
bool IsDataField() const
Get to indicate if this is a field from the tree or simple string.
bool IsValid() const
void SetDataType(CQueryParseNode::EType dt)
Set/get underlying data type.
double m_Double
Floating point data, if data was a double or converted into one.
std::string m_String
String data, if data came from a string or data fied in the tree.
CQueryParseNode::EType GetDataType() const
bool m_Bool
Bool data, if data base a constant boolean or converted into one.
void SetDataField(bool f)
void SetValid(bool f)
definition of a Culling tree
Definition: ncbi_tree.hpp:100
Base class for query node user defined object.
Definition: query_parse.hpp:60
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static const char * kConcordant
static const char * kValidationStatus
static const char * kTo
static const char * kDiscordant
static const char * kClinicalAssertion
static const char * kSamplesetType
static const char * kVariantQuality
static const char * kIntronReads
static const char * kPilot
static const char * kFrom
#define false
Definition: bool.h:36
#define bool
Definition: bool.h:34
static int type
Definition: getdata.c:31
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
string
Definition: cgiapp.hpp:687
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
CQueryExec * m_QExec
Definition: query_exec.hpp:119
const IQueryParseUserObject * GetUserObject() const
Get user object.
void Parse(const char *query_str, ECase case_sense=eCaseInsensitive, ESyntaxCheck syntax_check=eSyntaxCheck, bool verbose=false, const TFunctionNames &functions=TFunctionNames(0), unsigned line=0, unsigned linePos=0)
Query parser front-end function.
Definition: parser.cpp:368
vector< CQueryParseTree::TNode * > TArgVector
Vector for easy argument access.
Definition: query_exec.hpp:72
const TNode * GetQueryTree() const
EType
Query node type.
Definition: query_parse.hpp:84
virtual bool ResolveIdentifier(const std::string &, bool &)
If query has an identifier, this will resolve it in an application-specific way.
Definition: query_exec.hpp:192
void AddFunc(CQueryParseNode::EType func_type, CQueryFunctionBase *func)
Register function implementation.
Definition: query_exec.cpp:96
void MakeArgVector(CQueryParseTree::TNode &qnode, TArgVector &args)
Created vector of arguments (translate sub-nodes to vector)
Definition: query_exec.cpp:44
virtual void Evaluate(CQueryParseTree &qtree)
Run query tree evaluation.
Definition: query_exec.cpp:110
@ eCaseInsensitive
Case insensitive parsing (AnD)
@ eNotSet
Produced by the (private) default constructor.
Definition: query_parse.hpp:85
@ eFloatConst
Floating point const.
Definition: query_parse.hpp:88
@ eIntConst
Integer const.
Definition: query_parse.hpp:87
@ eBoolConst
Boolean (TRUE or FALSE)
Definition: query_parse.hpp:89
@ eString
String ("free text")
Definition: query_parse.hpp:90
@ eSyntaxCheck
Best possible check for errors.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static bool StringToBool(const CTempString str)
Convert string to bool.
Definition: ncbistr.cpp:2821
static string Int8ToString(Int8 value, TNumToStringFlags flags=0, int base=10)
Convert Int8 to string.
Definition: ncbistr.hpp:5159
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5187
static Int8 StringToInt8(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to Int8.
Definition: ncbistr.cpp:793
static bool MatchesMask(CTempString str, CTempString mask, ECase use_case=eCase)
Match "str" against the "mask".
Definition: ncbistr.cpp:389
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
Definition: ncbistr.cpp:1387
static const string BoolToString(bool value)
Convert bool to string.
Definition: ncbistr.cpp:2815
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
Definition: ncbistr.hpp:5297
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3405
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
TNodeList::iterator TNodeList_I
Definition: ncbi_tree.hpp:109
Fun TreeDepthFirstTraverse(TTreeNode &tree_node, Fun func)
Depth-first tree traversal algorithm.
Definition: ncbi_tree.hpp:504
ETreeTraverseCode
Tree traverse code returned by the traverse predicate function.
Definition: ncbi_tree.hpp:51
unsigned int CountNodes(unsigned int depth=1, TCountNodes how=0) const
Count nodes of the tree of which this node is a root.
Definition: ncbi_tree.hpp:999
TNodeList_CI SubNodeBegin(void) const
Return first const iterator on subnode list.
Definition: ncbi_tree.hpp:160
bool IsLeaf() const
Report whether this is a leaf node.
Definition: ncbi_tree.hpp:296
TNodeList_CI SubNodeEnd(void) const
Return last const iterator on subnode list.
Definition: ncbi_tree.hpp:166
@ eTreeTraverse
Keep traversal.
Definition: ncbi_tree.hpp:52
int i
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
Int4 delta(size_t dimension_, const Int4 *score_)
Definition: type.c:6
#define _ASSERT
else result
Definition: token2.c:20
Modified on Fri Jun 14 16:54:19 2024 by modify_doxy.py rev. 669887