NCBI C++ ToolKit
discrepancy_core.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: discrepancy_core.cpp 100215 2023-07-09 00:39:58Z gotvyans $
2  * =========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * =========================================================================
25  *
26  * Authors: Sema Kachalo
27  *
28  */
29 
30 #include <ncbi_pch.hpp>
31 #include "discrepancy_core.hpp"
32 #include "utils.hpp"
33 #include <algorithm>
34 #include <sstream>
36 #include <objmgr/seqdesc_ci.hpp>
37 #include <objmgr/util/sequence.hpp>
38 #include <serial/objcopy.hpp>
40 #include <util/format_guess.hpp>
41 #include <valarray>
42 
46 
48 {
49 public:
50  static constexpr size_t num_test_cases = static_cast<size_t>(eTestNames::max_test_names);
51  using TArray = std::array<const CDiscrepancyCaseProps**, num_test_cases>;
52 
54 
55 protected:
56  template<size_t i>
57  static constexpr auto xGetProps()
58  {
59 #ifdef NCBI_COMPILER_ANY_CLANG
60 # pragma GCC diagnostic push
61 # pragma GCC diagnostic ignored "-Wundefined-var-template"
62 #endif
64 #ifdef NCBI_COMPILER_ANY_CLANG
65 # pragma GCC diagnostic pop
66 #endif
67  }
68 
69  template<std::size_t... I>
70  static constexpr TArray xAssembleArray(std::index_sequence<I...>)
71  {
72  return {xGetProps<I>()...};
73  }
74 
75 public:
76 
77  static constexpr TArray PopulateTests()
78  {
79  return xAssembleArray(std::make_index_sequence<num_test_cases>{});
80  }
82  {
83  static constexpr TTestNamesSet autofix_names{DISC_AUTOFIX_TESTNAMES};
84  return autofix_names;
85  }
86 
87  static const TAliasMap& GetAliasMap();
88  static const CDiscrepancyCaseProps& GetProps(eTestNames name);
89 };
90 
92 
94 {
95  CCaseRegistry::TAliasMap s_alias_map;
96  for (size_t i=0; i<g_test_registry.size(); ++i) {
97  auto aliases = (**g_test_registry[i]).Aliases;
98  if (aliases)
99  for (auto alias_name: *aliases) {
100  s_alias_map[alias_name] = static_cast<eTestNames>(i);
101  }
102  }
103  return s_alias_map;
104 }
105 
106 
108 {
109  static TAliasMap g_alias_map = xPopulateAliases();
110  return g_alias_map;
111 }
112 
114 {
115  if (name < eTestNames::max_test_names) {
116  auto prop_ref = g_test_registry[static_cast<size_t>(name)];
117  if (prop_ref && *prop_ref) {
118  const CDiscrepancyCaseProps& props = **prop_ref;
119  return props;
120  }
121  }
122  throw std::out_of_range("eTestNames");
123 }
124 
125 std::ostream& operator<<(std::ostream& str, NDiscrepancy::eTestNames name)
126 {
127  str << GetDiscrepancyCaseName(name);
128  return str;
129 }
130 
131 eTestNames GetDiscrepancyCaseName(string_view name)
132 {
133  for (size_t i=0; i<g_test_registry.size(); ++i) {
134  const CDiscrepancyCaseProps& props = **(g_test_registry[i]);
135  if (props.sName == name)
136  return static_cast<eTestNames>(i);
137  }
138 
139  auto it = CCaseRegistry::GetAliasMap().find(name);
140  if (it != CCaseRegistry::GetAliasMap().end())
141  return it->second;
142 
143  if (NStr::StartsWith(name, "DISC_")) {
144  return GetDiscrepancyCaseName(name.substr(5));
145  }
146 
147  return eTestNames::notset;
148 }
149 
150 string_view GetDiscrepancyCaseName(eTestNames name)
151 {
152  return CCaseRegistry::GetProps(name).sName;
153 }
154 
155 vector<string_view> GetDiscrepancyAliases(eTestNames name)
156 {
157  vector<string_view> V;
158  auto alias_names = CCaseRegistry::GetProps(name).Aliases;
159  if (alias_names) {
160  V.reserve(alias_names->size());
161  for (auto rec: *alias_names)
162  {
163  V.push_back(rec);
164  }
165  }
166  return V;
167 }
168 
169 
170 string_view GetDiscrepancyDescr(string_view name)
171 {
173 }
174 
175 string_view GetDiscrepancyDescr(eTestNames name)
176 {
177  return CCaseRegistry::GetProps(name).Descr;
178 }
179 
180 TGroup GetDiscrepancyGroup(eTestNames name)
181 {
182  return CCaseRegistry::GetProps(name).Group;
183 }
184 
185 TGroup GetDiscrepancyGroup(string_view name)
186 {
188 }
189 
190 vector<string> GetDiscrepancyNames(TGroup group)
191 {
192  auto tests = GetDiscrepancyTests(group);
193  vector<string> names; names.reserve(tests.size());
194  for (auto tn: tests) {
195  names.push_back(std::string(GetDiscrepancyCaseName(tn)));
196  }
197 
198  return names;
199 }
200 
202 {
204  if (group == eAutofix) {
206  } else {
207  for (auto rec: g_test_registry)
208  {
209  auto props = *rec;
210  if (props->sName[0] != '_' && (props->Group & group) == group) {
211  names.set(props->Name);
212  }
213  }
214  }
215  return names;
216 }
217 
218 
220 {
221  auto& node = m_Map[name];
222  if (!node)
223  node = Ref(new CReportNode(name));
224  return *node;
225 }
226 
227 
229 {
230  // BIG FILE
231  if (unique && hash.find(&obj) != hash.end()) {
232  return;
233  }
234  list.push_back(CRef<CReportObj>(&obj));
235  hash.insert(&obj);
236 }
237 
238 
240 {
241  for (auto& it : objs) {
242  Add(list, hash, *it, unique);
243  }
244 }
245 
247 {
248  _ASSERT(m_Name == other.m_Name);
249  #if 0
250  if (!m_Name.empty())
251  std::cerr << m_Name
252  << ":" << m_Count << ":" << other.m_Count
253  << ":" << m_Map.size() << ":" << other.m_Map.size()
254  << ":" << m_Objs.size() << ":" << other.m_Objs.size()
255  << ":" << m_Hash.size() << ":" << other.m_Hash.size()
256  << "\n";
257  #endif
258 
259  for (auto it: other.m_Map) {
260  auto& rec = m_Map[it.first];
261  if (rec)
262  rec->Merge(*it.second);
263  else
264  rec = it.second;
265  }
266 
267  m_Count += other.m_Count;
268  m_Objs.insert(m_Objs.end(), other.m_Objs.begin(), other.m_Objs.end());
269  m_Hash.merge(other.m_Hash);
270 
271  return *this;
272 }
273 
275 {
276  m_Map = other->m_Map;
277  m_Objs = other->m_Objs;
278  m_Hash = other->m_Hash;
279  m_Severity = other->m_Severity;
280  m_Autofix = other->m_Autofix;
281  m_Ext = other->m_Ext;
282  m_Summ = other->m_Summ;
283  m_NoRec = other->m_NoRec;
284 }
285 
286 
288 {
289  if (m_Map.size() == 1) {
290  CRef<CReportNode> other = m_Map.begin()->second;
291  Copy(other);
292  return true;
293  }
294  return false;
295 }
296 
297 
299 {
300  TReportObjectList objs = m_Objs;
302  TReportItemList subs;
303  bool autofix = false;
305  string unit;
306  for (const auto& it : m_Map) {
307  CRef<CReportItem> sub = it.second->Export(test, unique);
308  if (severity < it.second->m_Severity) {
309  severity = it.second->m_Severity;
310  }
311  if (severity < sub->GetSeverity()) {
312  severity = sub->GetSeverity();
313  }
314  autofix = autofix || sub->CanAutofix();
315  if (unit.empty()) {
316  unit = sub->GetUnit();
317  }
318  subs.push_back(sub);
319  if (!m_NoRec) {
320  TReportObjectList details = sub->GetDetails();
321  for (auto& ob : details) {
322  Add(objs, hash, *ob, unique);
323  }
324  }
325  }
326  for (auto& ob : objs) {
327  if (ob->CanAutofix()) {
328  static_cast<CDiscrepancyObject&>(*ob).m_Case.Reset(&test);
329  autofix = true;
330  }
331  }
332  string str = m_Name;
334  for (size_t n = NStr::Find(str, "[*"); n != NPOS; n = NStr::Find(str, "[*")) {
335  size_t k = NStr::Find(str, "*]");
336  if (k != NPOS) {
337  str.erase(n, k - n + 2);
338  }
339  else {
340  str.erase(n);
341  }
342  }
343  string msg = str;
344  string xml = str;
345  size_t count = m_Count > 0 ? m_Count : objs.size();
346 
347  NStr::ReplaceInPlace(msg, "[n]", NStr::Int8ToString(count));
348  NStr::ReplaceInPlace(msg, "[n/2]", NStr::Int8ToString(count / 2));
349  NStr::ReplaceInPlace(msg, "[s]", count == 1 ? "" : "s"); // nouns
350  NStr::ReplaceInPlace(msg, "[S]", count == 1 ? "s" : ""); // verbs
351  NStr::ReplaceInPlace(msg, "[is]", count == 1 ? "is" : "are");
352  NStr::ReplaceInPlace(msg, "[does]", count == 1 ? "does" : "do");
353  NStr::ReplaceInPlace(msg, "[has]", count == 1 ? "has" : "have");
354  NStr::ReplaceInPlace(msg, "[(]", "");
355  NStr::ReplaceInPlace(msg, "[)]", "");
356 
357  NStr::ReplaceInPlace(xml, "[n]", "##");
358  NStr::ReplaceInPlace(xml, "[n/2]", "##");
359  NStr::ReplaceInPlace(xml, "[s]", "s");
360  NStr::ReplaceInPlace(xml, "[S]", "");
361  NStr::ReplaceInPlace(xml, "[is]", "are");
362  NStr::ReplaceInPlace(xml, "[does]", "do");
363  NStr::ReplaceInPlace(xml, "[has]", "have");
364  NStr::ReplaceInPlace(xml, "[(]", "");
365  NStr::ReplaceInPlace(xml, "[)]", "");
366 
367  size_t n = str.find("[n]");
368  if (n != string::npos) {
369  str = str.substr(n + 4);
370  }
371  else if ((n = str.find("[n/2]")) != string::npos) {
372  str = str.substr(n + 6);
373  count /= 2;
374  }
375  if (n != string::npos) {
376  if ((n = str.find("[s]")) != string::npos) {
377  unit = str.substr(0, n);
378  }
379  else if (0 == str.find("CDS ")) {
380  unit = "CDS";
381  }
382  else if ((n = str.find("s ")) != string::npos) {
383  unit = str.substr(0, n);
384  }
385  }
386  CRef<CDiscrepancyItem> item(new CDiscrepancyItem(test.GetSName(), m_Name, msg, xml, unit, count));
387  item->m_Autofix = autofix;
388  item->m_Severity = severity;
389  item->m_Ext = m_Ext;
390  item->m_Summ = m_Summ;
391  item->m_Subs = subs;
392  item->m_Objs = objs;
393  return item;
394 }
395 
397 {
399 }
400 
402 {
403  TReportObjectList ret;
405  auto items = GetReport();
406  for (const auto& rep : items) {
407  TReportObjectList objs = rep->GetDetails();
408  for (auto& obj : objs) {
409  CReportNode::Add(ret, hash, *obj);
410  }
411  }
412  return ret;
413 }
414 
415 CRef<CReportItem> CReportItem::CreateReportItem(const string& name, const CReportObj& obj, const string& msg, bool autofix)
416 {
418  string s = msg;
419  NStr::ReplaceInPlace(s, "[(]", "");
420  NStr::ReplaceInPlace(s, "[)]", "");
421  CRef<CDiscrepancyItem> item(new CDiscrepancyItem(test->GetSName(), msg, s, s, kEmptyCStr, 0));
422  item->SetAutofix(autofix);
423 
424  CRef<CReportObj> new_obj = CReportObjFactory::Create(test, obj, autofix);
425  item->SetDetails().push_back(new_obj);
426 
427  return CRef<CReportItem>(item);
428 }
429 
430 CRef<CReportItem> CReportItemFactory::Create(const string& test_name, const string& name, const CReportObj& main_obj, const TReportObjectList& report_objs, bool autofix)
431 {
433  string msg = name;
434  NStr::ReplaceInPlace(msg, "[(]", "");
435  NStr::ReplaceInPlace(msg, "[)]", "");
436 
437  CDiscrepancyItem* disc_item = new CDiscrepancyItem(test->GetSName(), name, msg, msg, kEmptyStr, 0);
438  disc_item->SetAutofix(autofix);
439 
440  CRef<CReportObj> new_obj = CReportObjFactory::Create(test, main_obj, autofix);
441  disc_item->SetDetails().push_back(new_obj);
442  for (const auto& it : report_objs) {
443  disc_item->SetDetails().push_back(it);
444  }
445 
446  return CRef<CReportItem>(disc_item);
447 }
448 
450 {
451  auto disc_obj = dynamic_cast<const CDiscrepancyObject&>(obj);
452  auto ref = disc_obj.RefNode();
453 
454  CDiscrepancyObject* new_obj = CDiscrepancyObject::CreateInternal(ref.GetNCPointerOrNull(), disc_core, autofix);
455  return CRef<CReportObj>(new_obj);
456 }
457 
458 
460 {
461  CDiscrepancyObject* disc_obj = new CDiscrepancyObject(ref);
462  disc_obj->m_Case = disc_core;
463  if (autofix) {
464  disc_obj->m_Fix = ref;
465  }
466  return disc_obj;
467 }
468 
469 // need to rewrite as a DiscrepancyContext method
471 {
472  CDiscrepancyObject* obj = new CDiscrepancyObject(*this);
473  if (fix) {
474  obj->m_Fix.Reset(obj->m_Ref);
475  }
476  obj->m_More = data;
477  return obj;
478 }
479 
481 {
482  try {
483  Visit(context);
484  }
485  catch (const CException& e) {
486  string ss = "EXCEPTION caught: "; ss += e.what();
487  m_Objs[ss];
488  }
489 }
490 
491 
492 std::atomic<bool> CDiscrepancySet::m_Gui = false;
493 
495 
496 
497 string CDiscrepancySet::Format(const string& s, unsigned int count)
498 {
499  string str = s;
502  NStr::ReplaceInPlace(str, "[n/2]", NStr::Int8ToString(count / 2));
503  NStr::ReplaceInPlace(str, "[s]", count == 1 ? "" : "s"); // nouns
504  NStr::ReplaceInPlace(str, "[S]", count == 1 ? "s" : ""); // verbs
505  NStr::ReplaceInPlace(str, "[is]", count == 1 ? "is" : "are");
506  NStr::ReplaceInPlace(str, "[does]", count == 1 ? "does" : "do");
507  NStr::ReplaceInPlace(str, "[has]", count == 1 ? "has" : "have");
508  NStr::ReplaceInPlace(str, "[(]", "");
509  NStr::ReplaceInPlace(str, "[)]", "");
510  for (size_t n = NStr::Find(str, "[*"); n != NPOS; n = NStr::Find(str, "[*")) {
511  size_t k = NStr::Find(str, "*]");
512  if (k != NPOS) {
513  str.erase(n, k - n + 2);
514  }
515  else {
516  str.erase(n);
517  }
518  }
519  return str;
520 }
521 
522 void CDiscrepancyContext::AddTest(string_view name)
523 {
525 }
526 
527 void CDiscrepancyContext::AddTest(eTestNames name)
528 {
529  if (!m_Tests[name].Empty())
530  return;
531 
533  m_Tests[name] = test;
534  m_Enabled.set(test->GetType());
535 
536 #define REGISTER_DISCREPANCY_TYPE(type) \
537  if (test->GetType() == eTestTypes::type) { \
538  auto* p = test.GetPointer(); \
539  m_All_##type.push_back(p); \
540  return; \
541  }
542 
552 
553 }
554 
555 
556 void CDiscrepancyContext::Push(const CSerialObject& root, const string& fname)
557 {
558  if (!fname.empty()) {
559  m_RootNode.Reset(new CParseNode(eFile, 0));
560  m_RootNode->m_Ref->m_Text = fname;
561  }
562  else if (!m_RootNode) {
563  m_RootNode.Reset(new CParseNode(eNone, 0));
564  }
565  m_NodeMap[m_RootNode->m_Ref] = &*m_RootNode;
566  m_CurrentNode.Reset(m_RootNode);
567 
568  auto* pTypeInfo = root.GetThisTypeInfo();
569  if (!pTypeInfo) {
570  NCBI_THROW(CException, eUnknown, "Object has unknown type");
571  }
572 
573 
574  if (pTypeInfo == CBioseq::GetTypeInfo()) {
576  return;
577  }
578 
579  if (pTypeInfo == CBioseq_set::GetTypeInfo()) {
581  return;
582  }
583 
584  if (pTypeInfo == CSeq_entry::GetTypeInfo()) {
586  return;
587  }
588 
589  if (pTypeInfo == CSeq_submit::GetTypeInfo()) {
591  return;
592  }
593 
595  "Unsupported type - " + pTypeInfo->GetName());
596 }
597 
598 
600 {
601  for (auto& tt : m_Tests) {
602  auto& test = *tt.second;
603  test.Summarize();
604  }
605 }
606 
608 {
609  unsigned severity = 0;
610  for (auto& tt : m_Tests) {
611  auto& test = *tt.second;
612  test.Summarize();
613  for (const auto& rep : test.GetReport()) {
614  unsigned sev = rep->GetSeverity();
615  severity = sev > severity ? sev : severity;
616  }
617  }
618  return severity;
619 }
620 
621 
623 {
624  _ASSERT(m_ReportItems.empty());
625  m_Objs.Merge(other.m_Objs);
626 }
627 
628 
630 {
631  auto other_ptr = static_cast<CDiscrepancyProductImpl&>(other);
632  for (auto it: other_ptr.m_Tests) {
633  if (it.second && !it.second->Empty())
634  {
635  auto& current = m_Tests[it.first];
636  if (current && !current->Empty())
637  current->Merge(*it.second);
638  else
639  current = it.second;
640  }
641  }
642 }
643 
644 
645 #if 0
646 void CDiscrepancyContext::TestString(const string& str)
647 {
648  for (auto* it : m_All_STRING) {
649  Call(*it, str);
650  }
651 }
652 #endif
653 
654 
655 CDiscrepancyGroup::CDiscrepancyGroup(const string& name, const string& test)
656  : m_Name(name)
657 {
659 }
660 
662 {
664  for (const auto& it : m_List) {
665  TReportItemList tmp = it->Collect(tests, false);
666  for (const auto& tt : tmp) {
667  out.push_back(tt);
668  }
669  }
670  if (m_Test != eTestNames::notset && tests.find(m_Test) != tests.end()) {
671  TReportItemList tmp = tests[m_Test]->GetReport();
672  for (const auto& tt : tmp) {
673  out.push_back(tt);
674  }
675  tests.erase(m_Test);
676  }
677  if (!m_Name.empty()) {
678  TReportObjectList objs;
681  di->m_Subs = out;
682  bool empty = true;
683  for (const auto& tt : out) {
684  TReportObjectList details = tt->GetDetails();
685  if (!details.empty() || tt->GetCount() > 0) {
686  empty = false;
687  }
688  for (auto& ob : details) {
689  CReportNode::Add(objs, hash, *ob);
690  }
691  if (tt->CanAutofix()) {
692  di->m_Autofix = true;
693  }
694  if (tt->IsInfo()) {
695  di->m_Severity = CDiscrepancyItem::eSeverity_info;
696  }
697  else if (tt->IsFatal()) {
698  di->m_Severity = CDiscrepancyItem::eSeverity_error;
699  }
700  }
701  di->m_Objs = objs;
702  out.clear();
703  if (!empty) {
704  out.push_back(CRef<CReportItem>(di));
705  }
706  }
707  if (all) {
708  for (const auto& it : tests) {
709  TReportItemList list = it.second->GetReport();
710  for (const auto& it2 : list) {
711  out.push_back(it2);
712  }
713  }
714  }
715 
716  return out;
717 }
718 
720 {
721  if (m_CurrentNode->m_Type == eBioseq) {
723  for (const auto& feat : GetAllFeat()) {
724  CollectFeature(feat);
725  }
726  for (auto* test : m_All_SEQUENCE) {
727  test->Call(*this);
728  }
729  for (auto* test : m_All_FEAT) {
730  test->Call(*this);
731  }
732  for (auto* test : m_All_DESC) {
733  test->Call(*this);
734  }
735  if (!m_CurrentNode->m_Pubdescs.empty()) {
736  for (auto* test : m_All_PUBDESC) {
737  test->Call(*this);
738  }
739  for (auto* test : m_All_AUTHORS) {
740  test->Call(*this);
741  }
742  }
743  if (m_CurrentNode->m_Biosource) {
744  for (auto* test : m_All_BIOSRC) {
745  test->Call(*this);
746  }
747  }
748  }
749  else if (IsSeqSet(m_CurrentNode->m_Type)) {
750  for (auto* test : m_All_SEQ_SET) {
751  test->Call(*this);
752  }
753  for (auto* test : m_All_FEAT) {
754  test->Call(*this);
755  }
756  for (auto* test : m_All_DESC) {
757  test->Call(*this);
758  }
759  if (!m_CurrentNode->m_Pubdescs.empty()) {
760  for (auto* test : m_All_PUBDESC) {
761  test->Call(*this);
762  }
763  for (auto* test : m_All_AUTHORS) {
764  test->Call(*this);
765  }
766  }
767  if (m_CurrentNode->m_Biosource) {
768  for (auto* test : m_All_BIOSRC) {
769  test->Call(*this);
770  }
771  }
772  }
773  else if (m_CurrentNode->m_Type == eSubmit) {
774  for (auto* test : m_All_SUBMIT) {
775  test->Call(*this);
776  }
777  if (!m_CurrentNode->m_Authors.empty()) {
778  for (auto* test : m_All_AUTHORS) {
779  test->Call(*this);
780  }
781  }
782  }
783  else if (m_CurrentNode->m_Type == eString) {
784  for (auto* test : m_All_STRING) {
785  test->Call(*this);
786  }
787  }
788  else if (m_CurrentNode->m_Type == eFile) {
789  return;
790  }
791  else if (m_CurrentNode->m_Obj) {
792  ERR_POST(Info << "Tests for "
793  << m_CurrentNode->m_Obj->GetThisTypeInfo()->GetName()
794  << " are not yet implemented...");
795  }
796  else if (m_CurrentNode->m_Parent &&
797  (m_CurrentNode->m_Parent->m_Type != eFile)) {
798  _ASSERT(m_CurrentNode->m_Type == eNone);
799  // Only the root node or a child node of a file
800  // are permitted to have type None
801  NCBI_THROW(CException, eUnknown, "Node has unspecified type");
802  }
803 }
804 
805 
807 {
808  if (m_Type == eBioseq) {
809  size_t brk = m_Text.find('\n');
810  return brk == string::npos ? m_Text : m_Text.substr(0, brk) + " " + m_Text.substr(brk + 1);
811  }
812  else if (IsSeqSet(m_Type)) {
813  switch (m_Type) {
814  case eSeqSet_NucProt:
815  return "np|" + (m_Text.empty() ? "(EMPTY BIOSEQ-SET)" : m_Text);
816  case eSeqSet_SegSet:
817  return "ss|" + (m_Text.empty() ? "(EMPTY BIOSEQ-SET)" : m_Text);
818  default:
819  return m_Text.empty() ? "BioseqSet" : "Set containing " + m_Text;
820  }
821  }
822  else if (m_Type == eSubmit) {
823  return m_Text.empty() ? "Cit-sub" : "Cit-sub for " + m_Text;
824  }
825  else if (m_Type == eSeqDesc) {
826  string label = GetBioseqLabel();
827  return label.empty() ? m_Text : label + ":" + m_Text;
828  }
829  else if (m_Type == eSeqFeat) {
830  return m_Text;
831  }
832  else if (m_Type == eString) {
833  return m_Text;
834  }
835  return CDiscrepancyContext::TypeName(m_Type) + " - coming soon...";
836 }
837 
838 
840 {
841  for (const CRefNode* node = this; node; node = node->m_Parent) {
842  if (node->m_Type == eBioseq) {
843  size_t brk = node->m_Text.find('\n');
844  return brk == string::npos ? kEmptyStr : node->m_Text.substr(0, brk);
845  }
846  if (IsSeqSet(node->m_Type) || node->m_Type == eSubmit) {
847  return node->m_Text;
848  }
849  }
850  return kEmptyStr;
851 }
852 
853 
855  vector<const CRefNode*> A, B;
856  for (const CRefNode* node = static_cast<CDiscrepancyObject&>(*a).m_Ref; node; node = node->m_Parent) {
857  A.push_back(node);
858  }
859  reverse(A.begin(), A.end());
860  for (const CRefNode* node = static_cast<CDiscrepancyObject&>(*b).m_Ref; node; node = node->m_Parent) {
861  B.push_back(node);
862  }
863  reverse(B.begin(), B.end());
864  size_t n = min(A.size(), B.size());
865  for (size_t i = 0; i < n; i++) {
866  if (A[i] != B[i]) {
867  if (A[i]->m_Type == eSeqFeat && B[i]->m_Type != eSeqFeat) {
868  return true;
869  }
870  if (B[i]->m_Type == eSeqFeat && A[i]->m_Type != eSeqFeat) {
871  return false;
872  }
873  if (A[i]->m_Type == eSeqDesc && B[i]->m_Type != eSeqDesc) {
874  return true;
875  }
876  if (B[i]->m_Type == eSeqDesc && A[i]->m_Type != eSeqDesc) {
877  return false;
878  }
879  if (A[i]->m_Index != B[i]->m_Index) {
880  return A[i]->m_Index < B[i]->m_Index;
881  }
882  }
883  }
884  return A.size() == B.size() ? &*a < &*b : A.size() < B.size();
885 }
886 
887 
888 CDiscrepancyItem::CDiscrepancyItem(CDiscrepancyCore& t, const string& s, const string& m, const string& x, const string& o, size_t n)
889  : m_Title(t.GetSName()), m_Str(s), m_Msg(m), m_Xml(x), m_Unit(o), m_Count(n)
890 {}
891 
892 
893 // CDiscrepancyContext
895 {
896  TReportObjectList tofix;
897  map<string, size_t> report;
898  for (const auto& tst : m_Tests) {
899  const TReportItemList& list = tst.second->GetReport();
900  for (const auto& it : list) {
901  for (auto& obj : it->GetDetails()) {
902  if (obj->CanAutofix()) {
903  tofix.push_back(obj);
904  }
905  }
906  }
907  }
908  Autofix(tofix, report);
909  return report;
910 }
911 
913 {
914  TDiscrepancyCaseMap retval;
915  for (auto rec: m_Tests)
916  {
917  retval[rec.first] = rec.second;
918  }
919  return retval;
920 }
921 
static constexpr auto xGetProps()
static const TAliasMap & GetAliasMap()
static const TTestNamesSet & GetAutofixTests()
static constexpr TArray xAssembleArray(std::index_sequence< I... >)
static const CDiscrepancyCaseProps & GetProps(eTestNames name)
std::array< const CDiscrepancyCaseProps **, num_test_cases > TArray
static constexpr TArray PopulateTests()
vector< CDiscrepancyCore * > m_All_PUBDESC
vector< CDiscrepancyCore * > m_All_SUBMIT
vector< CDiscrepancyCore * > m_All_DESC
TDiscrepancyCaseMap GetTests() const override
void AddTest(eTestNames name) override
ct::const_bitset< static_cast< size_t >eTestTypes::max_num_types), eTestTypes > m_Enabled
TDiscrepancyCoreMap m_Tests
void Push(const CSerialObject &root, const string &fname) override
CRef< CParseNode > m_RootNode
void ParseObject(const CBioseq &root)
vector< CDiscrepancyCore * > m_All_SEQ_SET
CRef< CParseNode > m_CurrentNode
vector< CDiscrepancyCore * > m_All_BIOSRC
unsigned Summarize() override
map< string, size_t > Autofix() override
CSeq_feat_run GetAllFeat()
static string TypeName(EObjType n)
void CollectFeature(const CSeq_feat &feat)
map< const CRefNode *, CParseNode * > m_NodeMap
static bool CompareRefs(CRef< CReportObj > a, CRef< CReportObj > b)
vector< CDiscrepancyCore * > m_All_FEAT
vector< CDiscrepancyCore * > m_All_STRING
vector< CDiscrepancyCore * > m_All_AUTHORS
static bool IsSeqSet(EObjType n)
vector< CDiscrepancyCore * > m_All_SEQUENCE
void Merge(CDiscrepancyCore &other)
virtual void Visit(CDiscrepancyContext &context)=0
TReportObjectList GetObjects() const override
TReportItemList m_ReportItems
const TReportItemList & GetReport() const override
void Call(CDiscrepancyContext &context)
vector< CRef< CDiscrepancyGroup > > m_List
TReportItemList Collect(TDiscrepancyCaseMap &tests, bool all=true) const
CDiscrepancyGroup(const string &name="", const string &test="")
TReportObjectList & SetDetails()
void SetAutofix(bool value)
CDiscrepancyItem(const string &msg)
static CDiscrepancyObject * CreateInternal(CDiscrepancyContext::CRefNode *ref, CRef< CDiscrepancyCore > disc_core, bool autofix)
CRef< CDiscrepancyContext::CRefNode > m_Ref
CReportObj * Clone(bool fix, CConstRef< CObject > data) const
CConstRef< CObject > m_More
CRef< CDiscrepancyContext::CRefNode > m_Fix
CRef< CDiscrepancyCore > m_Case
CDiscrepancyObject(CDiscrepancyContext::CRefNode *ref, CDiscrepancyContext::CRefNode *fix=nullptr, const CObject *more=nullptr)
TDiscrepancyCoreMap m_Tests
void Merge(CDiscrepancyProduct &other) override
static CRef< CDiscrepancySet > New(objects::CScope &scope)
static std::atomic< bool > m_Gui
static string Format(const string &str, unsigned int count)
CRef –.
Definition: ncbiobj.hpp:618
static CRef< CReportItem > Create(const string &test_name, const string &name, const CReportObj &main_obj, const TReportObjectList &report_objs, bool autofix=false)
virtual vector< CRef< CReportItem > > GetSubitems() const =0
virtual string GetUnit() const =0
virtual ESeverity GetSeverity() const =0
virtual TReportObjectList GetDetails() const =0
virtual bool CanAutofix() const =0
static CRef< CReportItem > CreateReportItem(const string &test, const CReportObj &obj, const string &msg, bool autofix=false)
static void Add(TReportObjectList &list, TReportObjectSet &hash, CReportObj &obj, bool unique=true)
CReportNode & Merge(CReportNode &other)
TReportObjectSet m_Hash
CReportNode & operator[](const string &name)
CReportNode()=default
TReportObjectList m_Objs
CRef< CReportItem > Export(CDiscrepancyCore &test, bool unique=true) const
CReportItem::ESeverity m_Severity
void Copy(CRef< CReportNode > other)
static CRef< CReportObj > Create(CRef< CDiscrepancyCore > disc_core, const CReportObj &obj, bool autofix)
CScope –.
Definition: scope.hpp:92
Base class for all serializable objects.
Definition: serialbase.hpp:150
size_type size() const
Definition: map.hpp:148
const_iterator begin() const
Definition: map.hpp:151
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
size_type size() const
Definition: set.hpp:132
vector< CRef< CReportItem > > TReportItemList
vector< CRef< CReportObj > > TReportObjectList
@ eAutofix
unsigned short TGroup
USING_SCOPE(objects)
eTestNames GetDiscrepancyCaseName(string_view name)
string_view GetDiscrepancyDescr(string_view name)
#define REGISTER_DISCREPANCY_TYPE(type)
TGroup GetDiscrepancyGroup(eTestNames name)
vector< string > GetDiscrepancyNames(TGroup group)
static CCaseRegistry::TAliasMap xPopulateAliases()
vector< string_view > GetDiscrepancyAliases(eTestNames name)
TTestNamesSet GetDiscrepancyTests(TGroup group)
std::ostream & operator<<(std::ostream &str, NDiscrepancy::eTestNames name)
static constexpr CCaseRegistry::TArray g_test_registry
#define A(i)
Definition: ecp_curves.c:948
std::ofstream out("events_result.xml")
main entry point for tests
bool Empty(const CNcbiOstrstream &src)
Definition: fileutil.cpp:523
static const struct name_t names[]
#define test(a, b, c, d, e)
Definition: numeric.c:170
static const char * str(char *buf, int n)
Definition: stats.c:84
static DbTestEntry * tests
Definition: testodbc.c:388
static char tmp[3200]
Definition: utf8.c:42
static char test_name[128]
Definition: utf8_2.c:34
char data[12]
Definition: iconv.c:80
string
Definition: cgiapp.hpp:687
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string Int8ToString(Int8 value, TNumToStringFlags flags=0, int base=10)
Convert Int8 to string.
Definition: ncbistr.hpp:5159
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3405
const char *const kEmptyCStr
Empty "C" string (points to a '\0').
Definition: ncbistr.cpp:68
static const char label[]
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
int i
yy_size_t n
constexpr bool empty(list< Ts... >) noexcept
XML library namespace.
Definition: attributes.hpp:57
unsigned int a
Definition: ncbi_localip.c:102
EIPRangeType t
Definition: ncbi_localip.c:101
T min(T x_, T y_)
The Object manager core.
C++ I/O stream wrappers to compress/decompress data on-the-fly.
const std::initializer_list< const char * > * Aliases
Definition: _hash_fun.h:40
#define _ASSERT
static CS_CONTEXT * context
Definition: will_convert.c:21
Modified on Sat Apr 13 11:49:50 2024 by modify_doxy.py rev. 669887