NCBI C++ ToolKit
discrepancy_stream.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: discrepancy_stream.cpp 99131 2023-02-16 15:52:04Z foleyjp $
2  * =========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * =========================================================================
25  *
26  * Authors: Sema Kachalo
27  *
28  */
29 
30 #include <ncbi_pch.hpp>
31 #include "discrepancy_core.hpp"
32 #include "utils.hpp"
33 #include <sstream>
35 #include <objmgr/seqdesc_ci.hpp>
36 #include <objmgr/util/sequence.hpp>
37 #include <serial/objcopy.hpp>
39 #include <util/line_reader.hpp>
40 #include <util/format_guess.hpp>
41 
42 
46 
47 
48 static size_t offset = 0;
49 string Offset()
50 {
51  return string(offset<<1, ' ');
52 }
53 
55 {
56 public:
58  void ReadObject(CObjectIStream& stream, const CObjectInfo& passed_info) override
59  {
60  if (m_Context->Skip()) {
62  //cout << Offset() << "Skipping Bioseq_set " << m_Context->m_CurrentNode->m_Index << "\n";
63  m_Context->m_CurrentNode->m_Pos = stream.GetStreamPos();
64  DefaultSkip(stream, passed_info);
65  m_Context->PopNode();
66  }
67  else {
68  bool repeat = m_Context->m_CurrentNode->m_Repeat;
69  m_Context->m_CurrentNode->m_Repeat = false;
70  if (!repeat) {
72  }
73  //cout << Offset() << "Reading " << m_Context->m_CurrentNode->Path() << "\n";
74  offset++;
75  DefaultRead(stream, passed_info);
76  offset--;
77  //cout << Offset() << "Done " << m_Context->m_CurrentNode->Path() << "\n";
78  m_Context->m_CurrentNode->m_Obj.Reset(static_cast<CSerialObject*>(passed_info.GetObjectPtr()));
79  if (!repeat) {
80  m_Context->PopNode();
81  }
82  }
83  }
84 protected:
86 };
87 
88 
90 {
91 public:
93  void ReadObject(CObjectIStream& stream, const CObjectInfo& passed_info) override
94  {
95  if (m_Context->Skip()) {
97  //cout << Offset() << "Skipping Bioseq " << m_Context->m_CurrentNode->m_Index << "\n";
98  m_Context->m_CurrentNode->m_Pos = stream.GetStreamPos();
99  DefaultSkip(stream, passed_info);
100  m_Context->PopNode();
101  }
102  else {
103  bool repeat = m_Context->m_CurrentNode->m_Repeat;
104  m_Context->m_CurrentNode->m_Repeat = false;
105  if (!repeat) {
107  }
108  //cout << Offset() << "Reading " << m_Context->m_CurrentNode->Path() << "\n";
109  DefaultRead(stream, passed_info);
110  //m_Context->m_CurrentNode->m_Obj.Reset((CObject*)passed_info.GetObjectPtr());
111  m_Context->m_CurrentNode->m_Obj.Reset(static_cast<CSerialObject*>(passed_info.GetObjectPtr()));
112  if (!repeat) {
113  m_Context->PopNode();
114  }
115  }
116  }
117 protected:
119 };
120 
121 
123 {
124 public:
126  void ReadClassMember(CObjectIStream& stream, const CObjectInfoMI& passed_info) override
127  {
128  DefaultRead(stream, passed_info);
129  const CBioseq_set::TClass& cl = *(const CBioseq_set::TClass*)passed_info.GetMember().GetObjectPtr();
130  switch (cl) {
133  break;
136  break;
139  break;
142  break;
148  break;
149  default:
150  break;
151  }
152  }
153 
154 protected:
156 };
157 
158 
160 {
161  CRef<CParseNode> new_node(new CParseNode(type, (unsigned)m_CurrentNode->m_Children.size(), m_CurrentNode));
162  m_CurrentNode->m_Children.push_back(new_node);
163  m_CurrentNode.Reset(new_node);
164 }
165 
166 
168 {
169  // Not skipping the first child or nuc-prot children
170  return m_Skip && m_CurrentNode->m_Type == eSeqSet && !m_CurrentNode->m_Repeat && m_CurrentNode->m_Children.size();
171 }
172 
173 
174 void CDiscrepancyContext::ParseStrings(const string& fname)
175 {
176  CNcbiIfstream istr(fname);
177  if (!istr) {
178  NCBI_THROW(CException, eUnknown, "Unable to read " + fname);
179  }
180 
181  m_RootNode.Reset(new CParseNode(eFile, 0));
182  m_RootNode->m_Ref->m_Text = fname;
183  m_CurrentNode.Reset(m_RootNode);
184  CStreamLineReader line_reader(istr);
185  do {
186  PushNode(eString);
187  m_CurrentNode->m_Ref->m_Text = *++line_reader;
188  RunTests();
189  PopNode();
190  }
191  while (!line_reader.AtEOF());
192 }
193 
194 
195 void CDiscrepancyContext::ParseStream(CObjectIStream& stream, const string& fname, bool skip, const string& default_header)
196 {
197  m_Skip = skip;
201 
202  m_RootNode.Reset(new CParseNode(eFile, 0));
203  m_RootNode->m_Ref->m_Text = fname;
204  m_CurrentNode.Reset(m_RootNode);
205 
206  while (true) {
207  string header = stream.ReadFileHeader();
208  if (header.empty()) {
209  header = default_header;
210  }
211  //cout << "Reading " << header << "\n";
212  PushNode(eNone);
213 
214  if (header == CSeq_submit::GetTypeInfo()->GetName()) {
215  PushNode(eSubmit);
218  m_CurrentNode->m_Obj.Reset(ss);
219  PopNode();
220  }
221  else if (header == CSeq_entry::GetTypeInfo()->GetName()) {
224  }
225  else if (header == CBioseq_set::GetTypeInfo()->GetName()) {
228  }
229  else if (header == CBioseq::GetTypeInfo()->GetName()) {
230  CRef<CBioseq> seq(new CBioseq);
232  }
233  else {
234  NCBI_THROW(CException, eUnknown, "Unsupported type " + header + " in " + fname);
235  }
236  CNcbiStreampos position = stream.GetStreamPos();
237  Extend(*m_CurrentNode, stream);
238  if (m_Skip) {
239  stream.SetStreamPos(position);
240  }
241  PopNode();
242  if (stream.EndOfData()) {
243  break;
244  }
245  }
246 }
247 
248 
250 {
251 //cout << "Reading " << node.Path() << "\n";
252  bool load = (node.m_Type == eSeqSet_NucProt && !InGenProdSet(&node)) || (node.m_Type == eSeqSet_GenProd && !InNucProtSet(&node)) || (node.m_Type == eBioseq && !InNucProtSet(&node) && !InGenProdSet(&node));
253  if (load) {
254  CRef<CSeq_entry> se(new CSeq_entry());
255  if (node.m_Type == eBioseq) {
256  se->SetSeq((CBioseq&)*node.m_Obj);
257  }
258  else { // node.m_Type == eSeqSet_NucProt
259  se->SetSet((CBioseq_set&)*node.m_Obj);
260  }
261  auto handle = m_Scope->AddTopLevelSeqEntry(*se);
262  m_FeatTree.Reset(new feature::CFeatTree(handle));
263  }
264  Populate(node);
265 
266  for (size_t i = 0; i < node.m_Children.size(); i++) {
267  CParseNode& item = *node.m_Children[i];
268  if (!item.m_Obj) {
269  stream.SetStreamPos(item.m_Pos);
270  item.m_Repeat = true;
271  m_CurrentNode.Reset(&item);
272  if (item.m_Type == eBioseq) {
273  CRef<CBioseq> seq(new CBioseq);
275  }
276  else if (item.m_Type == eSeqSet) {
279  }
280  }
281  Extend(item, stream);
282  if (node.m_Type != eSeqSet_NucProt && node.m_Type != eSeqSet_GenProd) {
283  node.m_Children[i].Reset();
284  }
285  }
286 
287 //cout << "Running tests on " << node.Path() << " ...\n";
288  m_CurrentNode.Reset(&node);
289  RunTests();
290 
291  if (load) {
292  //m_FeatTree.Reset();
293  m_Scope->ResetDataAndHistory();
294  }
295 }
296 
297 
299 {
301  PushNode(eBioseq);
302  m_CurrentNode->m_Obj.Reset(&root);
303  m_CurrentNode = current;
304 }
305 
306 
308 {
311  if (root.IsSetClass()) {
312  switch (root.GetClass()) {
315  break;
318  break;
321  break;
324  break;
330  break;
331  default:
332  break;
333  }
334  }
335  PushNode(type);
336  m_CurrentNode->m_Obj.Reset(&root);
337  if (root.CanGetSeq_set()) {
338  for (const auto& entry : root.GetSeq_set()) {
339  ParseObject(*entry);
340  }
341  }
342  m_CurrentNode = current;
343 }
344 
345 
347 {
348  if (root.IsSet()) {
349  ParseObject(root.GetSet());
350  }
351  else if (root.IsSeq()) {
352  ParseObject(root.GetSeq());
353  }
354 }
355 
356 
358 {
360  PushNode(eSubmit);
361  m_CurrentNode->m_Obj.Reset(&root);
362  if (root.CanGetData() && root.GetData().IsEntrys()) {
363  for (const auto& entry : root.GetData().GetEntrys()) {
364  ParseObject(*entry);
365  }
366  }
367  m_CurrentNode = current;
368 }
369 
370 
372 {
373  Populate(node);
374  for (auto& item : node.m_Children) {
375  ParseAll(*item);
376  }
377  m_CurrentNode.Reset(&node);
378  RunTests();
379 }
380 
381 
383 {
384  switch (node.m_Type) {
385  case eSeqSet:
386  case eSeqSet_NucProt:
387  case eSeqSet_GenProd:
388  case eSeqSet_SegSet:
389  case eSeqSet_Genome:
390  case eSeqSet_Funny:
391  PopulateSeqSet(node);
392  break;
393  case eBioseq:
394  PopulateBioseq(node);
395  break;
396  case eSubmit:
397  PopulateSubmit(node);
398  break;
399  default:
400  break;
401  }
402 }
403 
404 
406 {
407  const CSeq_submit& sub = dynamic_cast<const CSeq_submit&>(*node.m_Obj);
408  if (sub.IsSetSub()) {
409  if (sub.GetSub().IsSetCit() && sub.GetSub().GetCit().CanGetAuthors()) {
410  const CAuth_list* auth = &sub.GetSub().GetCit().GetAuthors();
411  node.m_Authors.push_back(auth);
412  node.m_AuthorMap[auth] = &node;
413  }
414  }
415 }
416 
417 
419 {
420  const CBioseq& bioseq = dynamic_cast<const CBioseq&>(*node.m_Obj);
421  if (bioseq.CanGetDescr() && bioseq.GetDescr().CanGet()) {
422  for (const auto& desc : bioseq.GetDescr().Get()) {
423  node.AddDescriptor(*desc);
424  if (desc->IsMolinfo()) {
425  node.m_Molinfo.Reset(desc);
426  }
427  else if (desc->IsSource()) {
428  node.m_Biosource.Reset(desc);
429  }
430  else if (desc->IsTitle()) {
431  node.m_Title.Reset(desc);
432  }
433  }
434  }
435  if (bioseq.IsSetAnnot()) {
436  for (const auto& annot : bioseq.GetAnnot()) {
437  if (annot->IsFtable()) {
438  for (const auto& feat : annot->GetData().GetFtable()) {
439  node.AddFeature(*feat);
440  }
441  }
442  }
443  }
444  node.m_BioseqSummary.reset(new CSeqSummary());
445  BuildSeqSummary(bioseq, *node.m_BioseqSummary);
446  string label = node.m_BioseqSummary->Label;
447  node.m_Ref->m_Text = node.m_BioseqSummary->Label + "\n" + node.m_BioseqSummary->GetStats();
448  for (CParseNode* n = node.m_Parent; n; n = n->m_Parent) {
449  if ((!IsSeqSet(n->m_Type) && n->m_Type != eSubmit) || !n->m_Ref->m_Text.empty()) {
450  break;
451  }
452  n->m_Ref->m_Text = n->m_Type == eSeqSet_NucProt || n->m_Type == eSeqSet_SegSet ? node.m_BioseqSummary->Label : label;
453  label = n->m_Ref->GetText();
454  }
455  if (node.m_Biosource) {
456  for (CParseNode* n = node.m_Parent; n && IsSeqSet(n->m_Type); n = n->m_Parent) {
457  if (n->m_Type == eSeqSet_Genome || n->m_Type == eSeqSet_Funny) {
458  n->m_SetBiosources.push_back(node.m_Biosource);
459  }
460  }
461  }
462 }
463 
464 
466 {
467  const CBioseq_set& seqset = dynamic_cast<const CBioseq_set&>(*node.m_Obj);
468  if (seqset.CanGetDescr() && seqset.GetDescr().CanGet()) {
469  for (const auto& desc : seqset.GetDescr().Get()) {
470  node.AddDescriptor(*desc);
471  if (desc->IsMolinfo()) {
472  node.m_Molinfo.Reset(desc);
473  }
474  else if (desc->IsSource()) {
475  node.m_Biosource.Reset(desc);
476  }
477  else if (desc->IsTitle()) {
478  node.m_Title.Reset(desc);
479  }
480  }
481  }
482  if (seqset.IsSetAnnot()) {
483  for (const auto& annot : seqset.GetAnnot()) {
484  if (annot->IsFtable()) {
485  for (const auto& feat : annot->GetData().GetFtable()) {
486  node.AddFeature(*feat);
487  }
488  }
489  }
490  }
491  if (node.m_Biosource) {
492  for (CParseNode* n = &node; n && IsSeqSet(n->m_Type); n = n->m_Parent) {
493  if (n->m_Type == eSeqSet_Genome) {
494  n->m_SetBiosources.push_back(node.m_Biosource);
495  }
496  }
497  }
498 }
499 
500 
502 {
503  auto it = m_NodeMap.find(&ref);
504  if (it != m_NodeMap.end()) {
505  return it->second;
506  }
507  if (ref.m_Parent) {
508  CParseNode* p = FindNode(*ref.m_Parent);
509  if (p) {
510  switch (ref.m_Type) {
511  case eSeqFeat:
512  m_NodeMap[&ref] = p->m_Features[ref.m_Index];
513  break;
514  case eSeqDesc:
515  m_NodeMap[&ref] = p->m_Descriptors[ref.m_Index];
516  break;
517  default:
518  m_NodeMap[&ref] = p->m_Children[ref.m_Index];
519  break;
520  }
521  return m_NodeMap[&ref];
522  }
523  }
524  return nullptr;
525 }
526 
527 
529 {
530  CDiscrepancyObject& p = static_cast<CDiscrepancyObject&>(obj);
531  CParseNode* node = FindNode(alt ? *p.m_Fix : *p.m_Ref);
532  //return node ? dynamic_cast<const CSerialObject*>(&*node->m_Obj) : nullptr;
533  return node ? node->m_Obj.GetPointerOrNull() : nullptr;
534 }
535 
536 
538 {
539  CDiscrepancyObject* p = static_cast<CDiscrepancyObject*>(&obj);
540  CParseNode* node = FindNode(alt ? *p->m_Fix : *p->m_Ref);
541  node->m_Obj.Reset(ser);
542 }
543 
544 
545 void CDiscrepancyContext::ReplaceSeq_feat(CReportObj& obj, const CSeq_feat& old_feat, CSeq_feat& new_feat, bool alt)
546 {
547  if (m_AF_Seq_annot) {
548  auto& ftable = m_AF_Seq_annot->SetData().SetFtable();
549  for (auto& feat : ftable) {
550  if (&*feat == &old_feat) {
551  feat.Reset(&new_feat);
552  }
553  }
554  }
555  else {
556  CSeq_feat_EditHandle feh(GetScope().GetSeq_featHandle(old_feat));
557  feh.Replace(new_feat);
558  }
559  ReplaceObject(obj, &new_feat, alt);
560 }
561 
562 
563 // AUTOFIX ////////////////////////////////////////////////////////////////////////
564 
566 {
567 public:
569  void CopyObject(CObjectStreamCopier& copier, const CObjectTypeInfo& passed_info) override
570  {
572  if (m_Context->CanFixBioseq_set()) {
574  copier.In().ReadObject(m_Context->m_AF_Bioseq_set, passed_info.GetTypeInfo());
576  CRef<CSeq_entry> se(new CSeq_entry());
578  auto handle = m_Context->m_Scope->AddTopLevelSeqEntry(*se);
579  m_Context->m_FeatTree.Reset(new feature::CFeatTree(handle));
581  copier.Out().WriteObject(m_Context->m_AF_Bioseq_set, passed_info.GetTypeInfo());
583  }
584  else {
585  DefaultCopy(copier, passed_info);
586  }
587  m_Context->PopNode();
588  }
589 protected:
591 };
592 
593 
595 {
596 public:
598  void CopyObject(CObjectStreamCopier& copier, const CObjectTypeInfo& passed_info) override
599  {
601  if (m_Context->CanFixBioseq()) {
603  copier.In().ReadObject(m_Context->m_AF_Bioseq, passed_info.GetTypeInfo());
605  CRef<CSeq_entry> se(new CSeq_entry());
607  auto handle = m_Context->m_Scope->AddTopLevelSeqEntry(*se);
608  m_Context->m_FeatTree.Reset(new feature::CFeatTree(handle));
610  copier.Out().WriteObject(m_Context->m_AF_Bioseq, passed_info.GetTypeInfo());
612  }
613  else {
614  DefaultCopy(copier, passed_info);
615  }
616  m_Context->PopNode();
617  }
618 protected:
620 };
621 
622 
624 {
625 public:
627  void CopyObject(CObjectStreamCopier& copier, const CObjectTypeInfo& passed_info) override
628  {
629  if (m_Context->CanFixSeqdesc()) {
631  copier.In().ReadObject(m_Context->m_AF_Seq_descr, passed_info.GetTypeInfo());
633  copier.Out().WriteObject(m_Context->m_AF_Seq_descr, passed_info.GetTypeInfo());
635  }
636  else {
637  DefaultCopy(copier, passed_info);
638  }
639  }
640 protected:
642 };
643 
644 
646 {
647 public:
649  void CopyObject(CObjectStreamCopier& copier, const CObjectTypeInfo& passed_info) override
650  {
651  if (m_Context->CanFixSeq_annot()) {
653  copier.In().ReadObject(m_Context->m_AF_Seq_annot, passed_info.GetTypeInfo());
655  copier.Out().WriteObject(m_Context->m_AF_Seq_annot, passed_info.GetTypeInfo());
657  }
658  else {
659  DefaultCopy(copier, passed_info);
660  }
661  }
662 protected:
664 };
665 
666 
668 {
669 public:
671  void CopyObject(CObjectStreamCopier& copier, const CObjectTypeInfo& passed_info) override
672  {
673  if (m_Context->CanFixSubmit_block()) {
675  copier.In().ReadObject(m_Context->m_AF_Submit_block, passed_info.GetTypeInfo());
677  copier.Out().WriteObject(m_Context->m_AF_Submit_block, passed_info.GetTypeInfo());
679  }
680  else {
681  DefaultCopy(copier, passed_info);
682  }
683  }
684 protected:
686 };
687 
688 
689 unique_ptr<CObjectIStream> OpenUncompressedStream(const string& fname, bool& compressed) // One more copy!!!
690 {
691  unique_ptr<CNcbiIstream> InputStream(new CNcbiIfstream(fname, ios::binary));
693 
695  switch (format) {
696  case CFormatGuess::eGZip: method = CCompressStream::eGZipFile; break;
697  case CFormatGuess::eBZip2: method = CCompressStream::eBZip2; break;
698  case CFormatGuess::eLzo: method = CCompressStream::eLZO; break;
699  default: method = CCompressStream::eNone; break;
700  }
701  compressed = method != CCompressStream::eNone;
702  if (compressed) {
703  InputStream.reset(new CDecompressIStream(*InputStream.release(), method, CCompressStream::fDefault, eTakeOwnership));
704  format = CFormatGuess::Format(*InputStream);
705  }
706 
707  unique_ptr<CObjectIStream> objectStream;
708  switch (format)
709  {
712  objectStream.reset(CObjectIStream::Open(format == CFormatGuess::eBinaryASN ? eSerial_AsnBinary : eSerial_AsnText, *InputStream.release(), eTakeOwnership));
713  break;
714  default:
715  break;
716  }
717  objectStream->SetDelayBufferParsingPolicy(CObjectIStream::eDelayBufferPolicyAlwaysParse);
718  return objectStream;
719 }
720 
721 
722 void CDiscrepancyContext::Autofix(TReportObjectList& tofix, map<string, size_t>& rep, const string& default_header)
723 {
724  if (!tofix.empty()) {
725  sort(tofix.begin(), tofix.end(), CompareRefs);
726  bool in_file = false;
727  for (const CRefNode* node = static_cast<CDiscrepancyObject&>(*tofix[0]).m_Fix; node; node = node->m_Parent) {
728  if (node->m_Type == eFile) in_file = true;
729  }
730  if (!in_file) { // GBench etc. -- all objects already in the scope
731  for (auto& fix : tofix) {
732  CDiscrepancyObject& obj = static_cast<CDiscrepancyObject&>(*fix);
733  CRef<CAutofixReport> result = obj.m_Case->Autofix(&obj, *this);
734  if (result) {
735  rep[result->GetS()] += result->GetN();
736  }
737  }
738  return;
739  }
740 
741  vector<vector<CDiscrepancyObject*>> all_fixes;
742  string current_path;
743  for (auto& fix : tofix) {
744  string path;
745  CDiscrepancyObject& obj = static_cast<CDiscrepancyObject&>(*fix);
746  for (const CRefNode* node = obj.m_Fix; node; node = node->m_Parent) {
747  if (node->m_Type == eFile) {
748  path = node->m_Text;
749  break;
750  }
751  }
752  if (path != current_path) {
753  current_path = path;
754  vector<CDiscrepancyObject*> fixes;
755  all_fixes.push_back(fixes);
756  }
757  all_fixes.back().push_back(&obj);
758  }
759  for (auto& fix : all_fixes) {
760  AutofixFile(fix, default_header);
761  }
762  }
763 }
764 
765 
766 void CDiscrepancyContext::AutofixFile(vector<CDiscrepancyObject*>&fixes, const string& default_header)
767 {
768  string path;
769  for (CRefNode* node = fixes[0]->m_Fix; node; node = node->m_Parent) {
770  if (node->m_Type == eFile) {
771  path = node->m_Text;
772  break;
773  }
774  }
775  bool compressed = false;
776  unique_ptr<CObjectIStream> in = OpenUncompressedStream(path, compressed);
777  cout << "Autofixing " << path << "\n";
778 
779  size_t dot = path.find_last_of('.');
780  if (dot != string::npos) {
781  size_t slash = path.find_last_of("/\\");
782  if (slash != string::npos && slash >= dot) {
783  dot = string::npos;
784  }
785  }
786  string fixed_path = !compressed && (dot != string::npos) ? path.substr(0, dot) + ".autofix" + path.substr(dot) : path + ".autofix.sqn";
787 
788  string header = in->ReadFileHeader();
789  in = OpenUncompressedStream(path, compressed);
790  unique_ptr<CObjectOStream> out(CObjectOStream::Open(eSerial_AsnText, fixed_path));
791  CObjectStreamCopier copier(*in, *out);
792 
793  m_Fixes = &fixes;
794  m_RootNode.Reset(new CParseNode(eFile, 0));
795  m_CurrentNode.Reset(m_RootNode);
796 
802 
803  while (true) {
804  if (header.empty()) {
805  header = default_header;
806  }
807  //cout << "Reading " << header << "\n";
808 
809  PushNode(eNone);
810 
811  if (header == CSeq_submit::GetTypeInfo()->GetName()) {
812  PushNode(eSubmit);
813  copier.Copy(CSeq_submit::GetTypeInfo());
814  PopNode();
815  }
816  else if (header == CSeq_entry::GetTypeInfo()->GetName()) {
817  copier.Copy(CSeq_entry::GetTypeInfo());
818  }
819  else if (header == CBioseq_set::GetTypeInfo()->GetName()) {
820  copier.Copy(CBioseq_set::GetTypeInfo());
821  }
822  else if (header == CBioseq::GetTypeInfo()->GetName()) {
823  copier.Copy(CBioseq::GetTypeInfo());
824  }
825  else {
826  NCBI_THROW(CException, eUnknown, "Unsupported type " + header);
827  }
828  PopNode();
829  if (in->EndOfData()) {
830  break;
831  }
832  else {
833  // this will crash if the file is both compressed and concatenated,
834  // but we are not going to support those
835  CNcbiStreampos position = in->GetStreamPos();
836  header = in->ReadFileHeader();
837  in->SetStreamPos(position);
838  }
839  }
840 }
841 
842 
844 {
845  if (IsSeqSet(refnode.m_Type)) {
846  CRef<CRefNode> A(&refnode);
847  auto B = m_CurrentNode->m_Ref;
848  while (A && B) {
849  if (A->m_Index != B->m_Index) {
850  return false;
851  }
852  A = A->m_Parent;
853  B = B->m_Parent;
854  if (!A && !B) {
855  return true;
856  }
857  }
858  }
859  return false;
860 }
861 
862 
864 {
865  for (auto* fix : *m_Fixes) {
866  if (CanFixBioseq_set(*fix->m_Fix)) {
867  return true;
868  }
869  }
870  return false;
871 }
872 
873 
874 
876 {
877  if (refnode.m_Type == eBioseq) {
878  CRef<CRefNode> A(&refnode);
879  auto B = m_CurrentNode->m_Ref;
880  while (A && B) {
881  if (A->m_Index != B->m_Index) {
882  return false;
883  }
884  A = A->m_Parent;
885  B = B->m_Parent;
886  if (!A && !B) {
887  return true;
888  }
889  }
890  }
891  return false;
892 }
893 
894 
896 {
897  for (auto* fix : *m_Fixes) {
898  if (CanFixBioseq(*fix->m_Fix)) {
899  return true;
900  }
901  }
902  return false;
903 }
904 
905 
907 {
908  if (refnode.m_Type == eSeqFeat) {
909  auto A = refnode.m_Parent;
910  auto B = m_CurrentNode->m_Ref;
911  while (A && B) {
912  if (A->m_Index != B->m_Index) {
913  return false;
914  }
915  A = A->m_Parent;
916  B = B->m_Parent;
917  if (!A && !B) {
918  return true;
919  }
920  }
921  }
922  return false;
923 }
924 
925 
927 {
928  for (auto* fix : *m_Fixes) {
929  if (CanFixFeat(*fix->m_Fix)) {
930  return true;
931  }
932  }
933  return false;
934 }
935 
936 
938 {
939  if (refnode.m_Type == eSeqDesc) {
940  auto A = refnode.m_Parent;
941  auto B = m_CurrentNode->m_Ref;
942  while (A && B) {
943  if (A->m_Index != B->m_Index) {
944  return false;
945  }
946  A = A->m_Parent;
947  B = B->m_Parent;
948  if (!A && !B) {
949  return true;
950  }
951  }
952  }
953  return false;
954 }
955 
956 
958 {
959  for (auto* fix : *m_Fixes) {
960  if (CanFixDesc(*fix->m_Fix)) {
961  return true;
962  }
963  }
964  return false;
965 }
966 
967 
969 {
970  if (refnode.m_Type == eSubmit && m_CurrentNode->m_Ref->m_Type == eSubmit) {
971  CRef<CRefNode> A(&refnode);
972  auto B = m_CurrentNode->m_Ref;
973  while (A && B) {
974  if (A->m_Index != B->m_Index) {
975  return false;
976  }
977  A = A->m_Parent;
978  B = B->m_Parent;
979  if (!A && !B) {
980  return true;
981  }
982  }
983  }
984  return false;
985 }
986 
987 
989 {
990  for (auto* fix : *m_Fixes) {
991  if (CanFixSubmit_block(*fix->m_Fix)) {
992  return true;
993  }
994  }
995  return false;
996 }
997 
998 
1000 {
1001  if (m_AF_Seq_annot->IsFtable()) {
1002  for (auto& feat : m_AF_Seq_annot->GetData().GetFtable()) {
1003  m_CurrentNode->AddFeature(*feat);
1004  }
1005  }
1006 
1007  for (auto* fix : *m_Fixes) {
1008  if (CanFixFeat(*fix->m_Fix) && fix->m_Fix->m_Index < m_CurrentNode->m_Features.size()) {
1009  m_NodeMap[&*fix->m_Fix] = m_CurrentNode->m_Features[fix->m_Fix->m_Index];
1010  CRef<CAutofixReport> result = (*fix->m_Case).Autofix(fix, *this);
1011  }
1012  }
1013 }
1014 
1015 
1017 {
1018  if (m_AF_Seq_descr->CanGet()) {
1019  for (auto& desc : m_AF_Seq_descr->Get()) {
1020  m_CurrentNode->AddDescriptor(*desc);
1021  }
1022  }
1023 
1024  for (auto* fix : *m_Fixes) {
1025  if (CanFixDesc(*fix->m_Fix) && fix->m_Fix->m_Index < m_CurrentNode->m_Descriptors.size()) {
1026  m_NodeMap[&*fix->m_Fix] = m_CurrentNode->m_Descriptors[fix->m_Fix->m_Index];
1027  CRef<CAutofixReport> result = (*fix->m_Case).Autofix(fix, *this);
1028  }
1029  }
1030 }
1031 
1032 
1034 {
1035  CRef<CParseNode> sblock(new CParseNode(eSubmitBlock, 0));
1036  //sblock->m_Obj.Reset(static_cast<CSerialObject*>(&*m_AF_Submit_block));
1037  sblock->m_Obj.Reset(m_AF_Submit_block.GetPointer());
1038 
1039  for (auto* fix : *m_Fixes) {
1040  if (CanFixSubmit_block(*fix->m_Fix)) {
1041  m_NodeMap[&*fix->m_Fix] = sblock;
1042  CRef<CAutofixReport> result = (*fix->m_Case).Autofix(fix, *this);
1043  }
1044  }
1045 }
1046 
1047 
1049 {
1050  const CBioseq_set* bss = static_cast<const CBioseq_set*>(&*m_CurrentNode->m_Obj);
1051  if (bss->CanGetDescr() && bss->GetDescr().CanGet()) {
1052  for (auto& desc : bss->GetDescr().Get()) {
1053  m_CurrentNode->AddDescriptor(*desc);
1054  }
1055  }
1056  if (bss->IsSetAnnot()) {
1057  for (auto& annot : bss->GetAnnot()) {
1058  if (annot->IsFtable()) {
1059  for (auto& feat : annot->GetData().GetFtable()) {
1060  m_CurrentNode->AddFeature(*feat);
1061  }
1062  }
1063  }
1064  }
1065 
1066  for (auto& se : bss->GetSeq_set()) {
1067  if (se->IsSet()) {
1069  m_CurrentNode->m_Obj.Reset(&se->GetSet());
1071  }
1072  else {
1074  m_CurrentNode->m_Obj.Reset(&se->GetSeq());
1075  AutofixBioseq();
1076  }
1077  PopNode();
1078  }
1079 
1080  for (auto* fix : *m_Fixes) {
1081  if (CanFixBioseq_set(*fix->m_Fix)) {
1082  m_NodeMap[&*fix->m_Fix] = m_CurrentNode;
1083  CRef<CAutofixReport> result = (*fix->m_Case).Autofix(fix, *this);
1084  }
1085  }
1086 }
1087 
1088 
1090 {
1091  const CBioseq* bs = static_cast<const CBioseq*>(&*m_CurrentNode->m_Obj);
1092  if (bs->CanGetDescr() && bs->GetDescr().CanGet()) {
1093  for (auto& desc : bs->GetDescr().Get()) {
1094  m_CurrentNode->AddDescriptor(*desc);
1095  }
1096  }
1097  if (bs->IsSetAnnot()) {
1098  for (auto& annot : bs->GetAnnot()) {
1099  if (annot->IsFtable()) {
1100  for (auto& feat : annot->GetData().GetFtable()) {
1101  m_CurrentNode->AddFeature(*feat);
1102  }
1103  }
1104  }
1105  }
1106 
1107  for (auto* fix : *m_Fixes) {
1108  if (CanFixBioseq(*fix->m_Fix)) {
1109  m_NodeMap[&*fix->m_Fix] = m_CurrentNode;
1110  CRef<CAutofixReport> result = (*fix->m_Case).Autofix(fix, *this);
1111  }
1112  }
1113 }
1114 
1115 
#define static
@Auth_list.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:57
CCopyHook_Bioseq_set(CDiscrepancyContext *context)
CDiscrepancyContext * m_Context
void CopyObject(CObjectStreamCopier &copier, const CObjectTypeInfo &passed_info) override
CDiscrepancyContext * m_Context
void CopyObject(CObjectStreamCopier &copier, const CObjectTypeInfo &passed_info) override
CCopyHook_Bioseq(CDiscrepancyContext *context)
void CopyObject(CObjectStreamCopier &copier, const CObjectTypeInfo &passed_info) override
CCopyHook_Seq_annot(CDiscrepancyContext *context)
CDiscrepancyContext * m_Context
CDiscrepancyContext * m_Context
void CopyObject(CObjectStreamCopier &copier, const CObjectTypeInfo &passed_info) override
CCopyHook_Seq_descr(CDiscrepancyContext *context)
void CopyObject(CObjectStreamCopier &copier, const CObjectTypeInfo &passed_info) override
CCopyHook_Submit_block(CDiscrepancyContext *context)
CDiscrepancyContext * m_Context
Copy hook for a standalone object.
Definition: objhook.hpp:254
CDecompressIStream –.
friend class CReadHook_Bioseq_set
BIG FILE.
CRef< CBioseq > m_AF_Bioseq
friend class CReadHook_Bioseq_set_class
vector< CDiscrepancyObject * > * m_Fixes
CRef< CSeq_descr > m_AF_Seq_descr
void PopulateSeqSet(CParseNode &node)
CRef< objects::CScope > m_Scope
void Populate(CParseNode &node)
CParseNode * FindNode(const CRefNode &obj)
bool CanFixDesc(CRefNode &refnode)
CRef< CBioseq_set > m_AF_Bioseq_set
friend class CCopyHook_Seq_annot
CRef< CParseNode > m_RootNode
void AutofixFile(vector< CDiscrepancyObject * > &fixes, const string &default_header)
void ParseObject(const CBioseq &root)
CRef< feature::CFeatTree > m_FeatTree
friend class CCopyHook_Seq_descr
void ReplaceSeq_feat(CReportObj &obj, const CSeq_feat &old_feat, CSeq_feat &new_feat, bool alt=false)
CRef< CParseNode > m_CurrentNode
const CSerialObject * FindObject(CReportObj &obj, bool alt=false) override
objects::CScope & GetScope() const
map< string, size_t > Autofix() override
friend class CCopyHook_Bioseq_set
friend class CCopyHook_Bioseq
bool CanFixFeat(CRefNode &refnode)
void BuildSeqSummary(const CBioseq &bs, CSeqSummary &summary)
void PopulateSubmit(CParseNode &node)
void PopulateBioseq(CParseNode &node)
void Extend(CParseNode &node, CObjectIStream &stream)
friend class CReadHook_Bioseq
map< const CRefNode *, CParseNode * > m_NodeMap
friend class CCopyHook_Submit_block
CRef< CSeq_annot > m_AF_Seq_annot
static bool InNucProtSet(const CParseNode *node)
static bool CompareRefs(CRef< CReportObj > a, CRef< CReportObj > b)
void ReplaceObject(CReportObj &obj, CSerialObject *, bool alt=false)
void ParseAll(CParseNode &node)
CRef< CSubmit_block > m_AF_Submit_block
void ParseStrings(const string &fname) override
void ParseStream(CObjectIStream &stream, const string &fname, bool skip, const string &default_header=kEmptyStr) override
static bool IsSeqSet(EObjType n)
virtual CRef< CAutofixReport > Autofix(CDiscrepancyObject *obj, CDiscrepancyContext &context) const =0
CRef< CDiscrepancyContext::CRefNode > m_Ref
CRef< CDiscrepancyContext::CRefNode > m_Fix
CRef< CDiscrepancyCore > m_Case
EFormat
The formats are checked in the same order as declared here.
@ eBZip2
bzip2 compressed file
@ eBinaryASN
Binary ASN.1.
@ eLzo
lzo compressed file
@ eGZip
GNU zip compressed file.
@ eTextASN
Text ASN.1.
static EFormat Format(const string &path, EOnError onerror=eDefault)
Guess file format.
CObjectIStream –.
Definition: objistr.hpp:93
CObjectInfoMI –.
Definition: objectiter.hpp:432
CObjectInfo –.
Definition: objectinfo.hpp:597
CObjectStreamCopier –.
Definition: objcopy.hpp:71
CObjectTypeInfo –.
Definition: objectinfo.hpp:94
Read hook for data member of a containing object (eg, SEQUENCE)
Definition: objhook.hpp:78
void ReadClassMember(CObjectIStream &stream, const CObjectInfoMI &passed_info) override
This method will be called at approriate time when the object of requested type is to be read.
CDiscrepancyContext * m_Context
CReadHook_Bioseq_set_class(CDiscrepancyContext *context)
CReadHook_Bioseq_set(CDiscrepancyContext *context)
void ReadObject(CObjectIStream &stream, const CObjectInfo &passed_info) override
This method will be called at approriate time when the object of requested type is to be read.
CDiscrepancyContext * m_Context
CReadHook_Bioseq(CDiscrepancyContext *context)
CDiscrepancyContext * m_Context
void ReadObject(CObjectIStream &stream, const CObjectInfo &passed_info) override
This method will be called at approriate time when the object of requested type is to be read.
Read hook for a standalone object.
Definition: objhook.hpp:59
CRef –.
Definition: ncbiobj.hpp:618
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
Definition: Seq_entry.hpp:56
CSeq_feat_EditHandle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Base class for all serializable objects.
Definition: serialbase.hpp:150
Simple implementation of ILineReader for i(o)streams.
CSubmit_block –.
Definition: set.hpp:45
vector< CRef< CReportObj > > TReportObjectList
unique_ptr< CObjectIStream > OpenUncompressedStream(const string &fname, bool &compressed)
USING_SCOPE(objects)
string Offset()
static size_t offset
std::ofstream out("events_result.xml")
main entry point for tests
@ eTakeOwnership
An object can take ownership of another.
Definition: ncbi_types.h:136
string
Definition: cgiapp.hpp:687
EMethod
Compression/decompression methods.
Definition: stream_util.hpp:98
@ eLZO
LZO (LZO1X)
@ eNone
no compression method (copy "as is")
Definition: stream_util.hpp:99
@ eGZipFile
.gz file (including concatenated files)
@ fDefault
Use algorithm-specific defaults.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
bool AtEOF(void) const
Indicates (negatively) whether there is any more input.
void DefaultRead(CObjectIStream &in, const CObjectInfoMI &object)
Definition: objhook.cpp:183
void Read(const CObjectInfo &object)
Read object of know type.
Definition: objistr.cpp:952
void DefaultRead(CObjectIStream &in, const CObjectInfo &object)
Default read.
Definition: objhook.cpp:171
void SetLocalReadHook(CObjectIStream &stream, CReadObjectHook *hook) const
Set local (for the specified stream) read hook.
Definition: objectinfo.cpp:366
pair< TObjectPtr, TTypeInfo > ObjectInfo(C &obj)
Definition: objectinfo.hpp:762
virtual bool EndOfData(void)
Check if there is still some meaningful data that can be read; in text streams this function will ski...
Definition: objistr.cpp:588
void ReadObject(const CObjectInfo &object)
Read child object.
Definition: objistr.cpp:1097
TObjectPtr GetObjectPtr(void) const
Get pointer to object.
CMemberIterator FindMember(const string &memberName) const
Find class member by its name.
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
Definition: objostr.cpp:126
virtual string ReadFileHeader(void)
Read file header.
Definition: objistr.cpp:1121
CObjectInfo GetMember(void) const
Get class member data.
void SetStreamPos(CNcbiStreampos pos)
Set the current read position in underlying input stream This is the same as istream::seekg()
Definition: objistr.cpp:800
void WriteObject(const CConstObjectInfo &object)
Definition: objostr.cpp:566
CNcbiStreampos GetStreamPos(void) const
Get the current stream position.
Definition: objistr.cpp:790
TTypeInfo GetTypeInfo(void) const
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
CObjectIStream & In(void) const
void DefaultCopy(CObjectStreamCopier &copier, const CObjectTypeInfo &type)
Definition: objhook.cpp:243
void SetLocalCopyHook(CObjectStreamCopier &stream, CCopyObjectHook *hook) const
Set local (for the specified stream) copy hook.
Definition: objectinfo.cpp:437
void DefaultSkip(CObjectIStream &in, const CObjectTypeInfo &object)
Default skip.
Definition: objhook.cpp:177
CObjectOStream & Out(void) const
void SetLocalReadHook(CObjectIStream &stream, CReadClassMemberHook *hook) const
Definition: objectiter.cpp:96
void Copy(const CObjectTypeInfo &type)
Copy data.
Definition: objcopy.cpp:74
@ eDelayBufferPolicyAlwaysParse
Parse always.
Definition: objistr.hpp:522
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType * GetPointerOrNull(void) const THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:1672
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
IO_PREFIX::streampos CNcbiStreampos
Portable alias for streampos.
Definition: ncbistre.hpp:134
static const char label[]
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_sub_.hpp:357
bool CanGetAuthors(void) const
Check if it is safe to call GetAuthors method.
Definition: Cit_sub_.hpp:351
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
bool CanGetDescr(void) const
Check if it is safe to call GetDescr method.
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
bool CanGetSeq_set(void) const
Check if it is safe to call GetSeq_set method.
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
@ eClass_pop_set
population study
@ eClass_phy_set
phylogenetic study
@ eClass_mut_set
set of mutations
@ eClass_eco_set
ecological sample study
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_segset
segmented sequence + parts
@ eClass_small_genome_set
viral segments or mitochondrial minicircles
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
bool CanGet(void) const
Check if it is safe to call Get method.
Definition: Seq_descr_.hpp:160
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
Definition: Bioseq_.hpp:354
const TAnnot & GetAnnot(void) const
Get the Annot member data.
Definition: Bioseq_.hpp:366
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
bool CanGetDescr(void) const
Check if it is safe to call GetDescr method.
Definition: Bioseq_.hpp:309
const TFtable & GetFtable(void) const
Get the variant data.
Definition: Seq_annot_.hpp:621
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Bioseq_.hpp:315
const TCit & GetCit(void) const
Get the Cit member data.
const TEntrys & GetEntrys(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TSub & GetSub(void) const
Get the Sub member data.
bool IsSetSub(void) const
Check if a value has been assigned to Sub data member.
bool IsEntrys(void) const
Check if variant Entrys is selected.
bool IsSetCit(void) const
citation for this submission Check if a value has been assigned to Cit data member.
bool CanGetData(void) const
Check if it is safe to call GetData method.
int i
yy_size_t n
Lightweight interface for getting lines of data with minimal memory copying.
constexpr auto sort(_Init &&init)
static Format format
Definition: njn_ioutil.cpp:53
std::istream & in(std::istream &in_, double &x_)
The Object manager core.
C++ I/O stream wrappers to compress/decompress data on-the-fly.
CParseNode & AddDescriptor(const CSeqdesc &seqdesc)
vector< CRef< CParseNode > > m_Children
vector< CRef< CParseNode > > m_Features
vector< CRef< CParseNode > > m_Descriptors
CConstRef< CSerialObject > m_Obj
CParseNode & AddFeature(const CSeq_feat &feat)
map< const CAuth_list *, CParseNode * > m_AuthorMap
shared_ptr< CSeqSummary > m_BioseqSummary
vector< const CAuth_list * > m_Authors
Definition: type.c:6
else result
Definition: token2.c:20
#define ftable
Definition: utilfeat.h:37
static CS_CONTEXT * context
Definition: will_convert.c:21
Modified on Fri May 24 14:57:12 2024 by modify_doxy.py rev. 669887