NCBI C++ ToolKit
subs_collector.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Alexey Dobronadezhdin
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 
33 #include <ncbi_pch.hpp>
34 
35 #include <serial/objistr.hpp>
36 
45 
47 
48 #include "subs_collector.hpp"
49 
50 /////////////////////////////////////////////////
51 // CSubmissionCollector
52 
53 namespace subfuse
54 {
55 
56 using namespace std;
59 
61  m_out(out),
62  m_header_not_set(true),
63  m_cur_offset(0),
64  m_cur_max_id(0)
65 {}
66 
67 static void StartWriting(CObjectOStream& out, const CSeq_submit& seq_submit)
68 {
69  const CTypeInfo* seq_submit_type_info = seq_submit.GetThisTypeInfo();
70  out.WriteFileHeader(seq_submit_type_info);
71 
72  const CClassTypeInfo* seq_submit_class_type =
73  CTypeConverter<CClassTypeInfo>::SafeCast(seq_submit_type_info);
74 
75  out.BeginClass(seq_submit_class_type);
76 
77  out.BeginClassMember(seq_submit_class_type->GetMemberInfo("sub")->GetId());
78  out.WriteObject(&seq_submit.GetSub(), seq_submit.GetSub().GetThisTypeInfo());
79  out.EndClassMember();
80 
81  // start writing 'data'
82  const CMemberInfo* data_info = seq_submit_class_type->GetMemberInfo("data");
83  out.BeginClassMember(data_info->GetId());
84 
85  const CPointerTypeInfo* data_pointer_type = CTypeConverter<CPointerTypeInfo>::SafeCast(data_info->GetTypeInfo());
86  const CChoiceTypeInfo* data_choice_type = CTypeConverter<CChoiceTypeInfo>::SafeCast(data_pointer_type->GetPointedType());
87 
88  out.PushFrame(CObjectStackFrame::eFrameChoice, data_choice_type, &data_info->GetId());
89  out.BeginChoice(data_choice_type);
90 
91  // start writing 'entrys'
92  const CVariantInfo* data_variant_info = data_choice_type->GetVariantInfo("entrys");
93 
94  out.PushFrame(CObjectStackFrame::eFrameChoiceVariant, data_variant_info->GetId());
95  out.BeginChoiceVariant(data_choice_type, data_variant_info->GetId());
96 
97  const CContainerTypeInfo* data_container_type = CTypeConverter<CContainerTypeInfo>::SafeCast(data_variant_info->GetTypeInfo());
98  out.BeginContainer(data_container_type);
99 
100  // start writing 'set'
101  CObjectTypeInfo entry_type_info = CType<CSeq_entry>();
102 
103  out.BeginContainerElement(entry_type_info.GetTypeInfo()); // begins the next seq-entry
104 
105  const CChoiceTypeInfo* entry_choice_type = entry_type_info.GetChoiceTypeInfo();
106  out.BeginChoice(entry_choice_type);
107 
108  // start writing entry 'bioseq-set' of 'genbank' class
109  const CVariantInfo* entry_variant_info = entry_choice_type->GetVariantInfo("set");
110  out.BeginChoiceVariant(entry_choice_type, entry_variant_info->GetId());
111 
112  const CClassTypeInfo* entry_class_type = CTypeConverter<CClassTypeInfo>::SafeCast(entry_variant_info->GetTypeInfo());
113  out.BeginClass(entry_class_type);
114 
115  // writing class 'genbank'
116  CObjectTypeInfo set_type_info = CType<CBioseq_set>();
117 
118  const CMemberInfo* set_class_info = set_type_info.FindMember("class").GetMemberInfo();
120 
121  out.WriteClassMember(set_class_info->GetId(), set_class_info->GetTypeInfo(), &set_class);
122 
123  // start writing 'entrys' inside genbank bioseq-set
124  const CMemberInfo* seqset_class_info = set_type_info.FindMember("seq-set").GetMemberInfo();
125  out.BeginClassMember(seqset_class_info->GetId());
126 
127  const CContainerTypeInfo* seqset_container_type = CTypeConverter<CContainerTypeInfo>::SafeCast(seqset_class_info->GetTypeInfo());
128  out.BeginContainer(seqset_container_type);
129 }
130 
132 {
133  if (entry.IsSet()) {
134 
135  if (entry.SetSet().IsSetSeq_set()) {
136  for (auto cur_entry : entry.SetSet().SetSeq_set()) {
137  AdjustLocalIds(*cur_entry);
138  }
139  }
140  }
141 
142  if (entry.IsSetAnnot()) {
143 
144  for (auto annot : entry.SetAnnot()) {
145  if (annot->IsFtable()) {
146  AdjustLocalIds(*annot);
147  }
148  }
149  }
150 }
151 
153 {
154  _ASSERT(annot.IsFtable() && "annot should be a feature table at this point");
155 
156  if (annot.IsSetData()) {
157 
158  for (auto feat : annot.SetData().SetFtable()) {
159 
160  AdjustLocalIds(*feat);
161  }
162  }
163 }
164 
166 {
167  if (feat.IsSetId() && feat.GetId().IsLocal() && feat.GetId().GetLocal().IsId()) {
168 
169  CObject_id& obj_id = feat.SetId().SetLocal();
170 
171  int id = obj_id.GetId();
172  obj_id.SetId(id + m_cur_offset);
173 
174  if (id > m_cur_max_id) {
175  m_cur_max_id = id;
176  }
177  }
178 
179  if (feat.IsSetXref()) {
180 
181  for (auto xref : feat.SetXref()) {
182 
183  if (xref->IsSetId() && xref->GetId().IsLocal() && xref->GetId().GetLocal().IsId()) {
184 
185  CObject_id& obj_id = xref->SetId().SetLocal();
186 
187  int id = obj_id.GetId();
188  obj_id.SetId(id + m_cur_offset);
189  }
190  }
191  }
192 }
193 
195 {
196  out.BeginContainerElement(entry.GetThisTypeInfo());
197  out.WriteObject(&entry, entry.GetThisTypeInfo());
198  out.EndContainerElement();
199 }
200 
201 bool CSubmissionCollector::ProcessFile(const string& name)
202 {
203  CNcbiIfstream in(name.c_str());
204  unique_ptr<CObjectIStream> obj_in(CObjectIStream::Open(eSerial_AsnText, in, eNoOwnership));
205 
206  bool process = true;
207  while (process) {
208 
209  CSeq_submit seq_submit;
210 
211  // Get seq-submit to validate
212  try {
213 
214  string header = obj_in->ReadFileHeader();
215 
216  if (header != "Seq-submit") {
217  return false;
218  }
219  obj_in->Read(ObjectInfo(seq_submit), CObjectIStream::eNoFileHeader);
220  }
221  catch (CEofException& ) {
222  process = false;
223  continue;
224  }
225  catch (CException&) {
226  return false;
227  }
228 
229  if (m_header_not_set) {
230 
231  StartWriting(m_out, seq_submit);
232  m_header_not_set = false;
233  }
234 
236 
237  if (seq_submit.GetData().IsEntrys()) {
238  NON_CONST_ITERATE(CSeq_submit::TData::TEntrys, entry, seq_submit.SetData().SetEntrys()) {
239 
240  m_cur_max_id = 0;
241  if ((*entry)->IsSet() && (*entry)->GetSet().IsSetClass() && (*entry)->GetSet().GetClass() == CBioseq_set::eClass_genbank) {
242 
243  if ((*entry)->GetSet().IsSetSeq_set()) {
244  NON_CONST_ITERATE(CBioseq_set::TSeq_set, internal_entry, (*entry)->SetSet().SetSeq_set()) {
245 
246  AdjustLocalIds(**internal_entry);
247  cleanup.BasicCleanup(**internal_entry);
248  WriteContainerElement(m_out, **internal_entry);
249  }
250  }
251  }
252  else {
253  AdjustLocalIds(**entry);
254  cleanup.BasicCleanup(**entry);
255  WriteContainerElement(m_out, **entry);
256  }
257 
259  }
260  }
261  }
262 
263  return true;
264 }
265 
267 {
269 
270  out.EndContainer(); // seq-set contains entries
271  out.EndClassMember(); // seq-set
272 
273  out.EndClass(); // set
274  out.EndChoiceVariant(); // set
275  out.EndChoice(); // set
276  out.EndContainerElement(); // set
277 
278  out.EndContainer(); // data contains entries
279  out.EndChoiceVariant(); // entries
280  out.EndChoice(); // entries
281 
282  out.EndClassMember(); // data
283  out.EndClass(); // seq_submit
284 
285  return true;
286 }
287 
288 }
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CObjectOStream –.
Definition: objostr.hpp:83
CObjectTypeInfo –.
Definition: objectinfo.hpp:94
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
Definition: Seq_entry.hpp:56
bool IsSetAnnot(void) const
Definition: Seq_entry.cpp:165
TAnnot & SetAnnot(void)
Definition: Seq_entry.cpp:195
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CTypeInfo class contains all information about C++ types (both basic and classes): members and layout...
Definition: typeinfo.hpp:76
void AdjustLocalIds(CSeq_entry &entry)
bool ProcessFile(const string &path)
CSubmissionCollector(CNcbiOstream &out)
std::ofstream out("events_result.xml")
main entry point for tests
static void cleanup(void)
Definition: ct_dynamic.c:30
#define true
Definition: bool.h:35
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
@ eNoOwnership
No ownership is assumed.
Definition: ncbi_types.h:135
const CMemberId & GetId(void) const
TTypeInfo GetTypeInfo(void) const
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
static const TObjectType * SafeCast(TTypeInfo type)
Definition: serialutil.hpp:76
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
pair< TObjectPtr, TTypeInfo > ObjectInfo(C &obj)
Definition: objectinfo.hpp:762
const CMemberInfo * GetMemberInfo(void) const
CMemberIterator FindMember(const string &memberName) const
Find class member by its name.
TTypeInfo GetTypeInfo(void) const
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
const CChoiceTypeInfo * GetChoiceTypeInfo(void) const
Definition: objectinfo.cpp:67
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
TTypeInfo GetPointedType(void) const
const CMemberInfo * GetMemberInfo(TMemberIndex index) const
const CVariantInfo * GetVariantInfo(TMemberIndex index) const
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
TId & SetId(void)
Select the variant.
Definition: Object_id_.hpp:277
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
TXref & SetXref(void)
Assign a value to Xref data member.
Definition: Seq_feat_.hpp:1314
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_feat_.hpp:904
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Feat_id_.cpp:134
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
Definition: Seq_feat_.hpp:1296
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Feat_id_.hpp:353
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_feat_.cpp:73
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
Definition: Seq_feat_.hpp:892
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
list< CRef< CSeq_entry > > TSeq_set
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_genbank
converted genbank
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
Definition: Seq_annot_.hpp:861
list< CRef< CSeq_entry > > TEntrys
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
const TSub & GetSub(void) const
Get the Sub member data.
bool IsEntrys(void) const
Check if variant Entrys is selected.
USING_SCOPE(objects)
static void StartWriting(CObjectOStream &out, const CSeq_submit &seq_submit)
static void WriteContainerElement(CObjectOStream &out, const CSeq_entry &entry)
std::istream & in(std::istream &in_, double &x_)
#define _ASSERT
Modified on Thu Apr 25 08:20:51 2024 by modify_doxy.py rev. 669887