NCBI C++ ToolKit
msmerge.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Lewis Y. Geer
27  *
28  * File Description:
29  * Contains code for reading in spectrum data sets.
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using specifications from the data definition file
34  * 'omssa.asn'.
35  */
36 
37 // standard includes
38 #include <ncbi_pch.hpp>
39 #include <corelib/ncbistd.hpp>
40 #include <corelib/ncbi_limits.hpp>
41 #include <corelib/ncbifile.hpp>
44 #include <serial/serial.hpp>
45 #include <serial/objistrasn.hpp>
46 #include <serial/objistrasnb.hpp>
47 #include <serial/objostrasn.hpp>
48 #include <serial/objostrasnb.hpp>
49 #include <serial/iterator.hpp>
50 #include <serial/objostrxml.hpp>
52 
53 #include "msmerge.hpp"
54 
55 #include <algorithm>
56 #include <iterator>
57 
60 BEGIN_SCOPE(omssa)
61 
62 
63 void COMSSASearch::CopyCMSSearch(CRef <COMSSASearch> OldSearch)
64 {
65  // SerialAssign(*OldSearch);
66  CConn_MemoryStream memory_stream;
67 
68 {
69  CObjectOStreamAsnBinary omemorystream(memory_stream);
70  omemorystream << *OldSearch;
71  omemorystream.Flush();
72 }
73 
74  CObjectIStreamAsnBinary imemorystream(memory_stream);
75  imemorystream >> *this;
76 }
77 
78 
79 const string COMSSASearch::FileEnding(const ESerialDataFormat FileType) const
80 {
81  if(FileType == eSerial_Xml) return "omx";
82  else if(FileType == eSerial_AsnBinary) return "oms";
83  else if(FileType == eSerial_AsnText) return "omt";
84  return "";
85 }
86 
87 
88 void
90  const CMSSearchSettings& SearchSettings,
91  int& Min,
92  int& Max
93  ) const
94 {
95  if(SearchSettings.GetSettingid() < Min) Min = SearchSettings.GetSettingid();
96  if(SearchSettings.GetSettingid() > Max) Max = SearchSettings.GetSettingid();
97 }
98 
99 void
101  int& Min,
102  int& Max
103  ) const
104 {
107 
108  if(GetRequest().size() == 0)
109  NCBI_THROW(COMSSAException, eMSParseException, "No Search Requests");
110 
111  FindMinMaxForOneSetting((*GetRequest().begin())->GetSettings(), Min, Max);
112  if((*GetRequest().begin())->CanGetMoresettings() && (*GetRequest().begin())->GetMoresettings().CanGet()) {
113  ITERATE(CMSSearchSettingsSet::Tdata, iSettings, (*GetRequest().begin())->GetMoresettings().Get()) {
114  FindMinMaxForOneSetting(**iSettings, Min, Max);
115  }
116  }
117 }
118 
119 
120 void
122  int& Min,
123  int& Max
124  ) const
125 {
128 
129  if(GetRequest().size() == 0)
130  NCBI_THROW(COMSSAException, eMSParseException, "No Search Requests");
131 
132  if((*GetRequest().begin())->GetSpectra().Get().size() == 0)
133  NCBI_THROW(COMSSAException, eMSParseException, "No Spectra");
134 
135  ITERATE(CMSSpectrumset::Tdata, iSpectrum, (*GetRequest().begin())->GetSpectra().Get()) {
136  if((*iSpectrum)->GetNumber() > Max)
137  Max = (*iSpectrum)->GetNumber();
138  if((*iSpectrum)->GetNumber() < Min)
139  Min = (*iSpectrum)->GetNumber();
140  }
141 }
142 
143 void
145 {
146  NON_CONST_ITERATE(CMSSpectrumset::Tdata, iSpectrum, (*OldSearch->SetRequest().begin())->SetSpectra().Set()) {
147  (*SetRequest().begin())->SetSpectra().Set().push_back(*iSpectrum);
148  }
149 }
150 
151 void
153 {
154  NON_CONST_ITERATE(CMSResponse::THitsets, iHits, (*OldSearch->SetResponse().begin())->SetHitsets()) {
155  (*SetResponse().begin())->SetHitsets().push_back(*iHits);
156  }
157 }
158 
159 void
161 {
162  CRef <CMSSearchSettings> OldSettings (&((*OldSearch->SetRequest().begin())->SetSettings()));
163  (*SetRequest().begin())->SetMoresettings().Set().push_back(OldSettings);
164 
165  if((*OldSearch->GetRequest().begin())->CanGetMoresettings() &&
166  (*OldSearch->GetRequest().begin())->GetMoresettings().CanGet()) {
167  NON_CONST_ITERATE(CMSSearchSettingsSet::Tdata, iSettings, (*OldSearch->SetRequest().begin())->SetMoresettings().Set()) {
168  (*SetRequest().begin())->SetMoresettings().Set().push_back(*iSettings);
169  }
170  }
171 }
172 
173 const bool
174 COMSSASearch::CheckLibraryNameAndSize(const string Name, const int Size) const
175 {
176  if(GetRequest().size() == 0)
177  NCBI_THROW(COMSSAException, eMSParseException, "No Search Requests");
178  if(GetResponse().size() == 0)
179  NCBI_THROW(COMSSAException, eMSParseException, "No Search Responses");
180 
181 
182  if((*GetRequest().begin())->GetSettings().GetDb() != Name ||
183  (*GetResponse().begin())->GetDbversion() != Size)
184  return false;
185 
186  if((*GetRequest().begin())->CanGetMoresettings() && (*GetRequest().begin())->GetMoresettings().CanGet()) {
187  ITERATE(CMSSearchSettingsSet::Tdata, iSettings, (*GetRequest().begin())->GetMoresettings().Get()) {
188  if((*iSettings)->GetDb() != Name) return false;
189  }
190  }
191 
192  return true;
193 }
194 
195 void
197  CMSSearchSettings& SearchSettings,
198  const int Min
199  )
200 {
201  SearchSettings.SetSettingid() += Min;
202 }
203 
204 void
206 {
207 
208  RenumberOneSearchSettingId((*SetRequest().begin())->SetSettings(), Min);
209 
210  if((*GetRequest().begin())->CanGetMoresettings() && (*GetRequest().begin())->GetMoresettings().CanGet()) {
211  NON_CONST_ITERATE(CMSSearchSettingsSet::Tdata, iSettings, (*SetRequest().begin())->SetMoresettings().Set()) {
212  RenumberOneSearchSettingId(**iSettings, Min);
213  }
214  }
215 
216  NON_CONST_ITERATE(CMSResponse::THitsets, iHits, (*SetResponse().begin())->SetHitsets()) {
217  (*iHits)->SetSettingid() += Min;
218  }
219 }
220 
221 void
223 {
224 
225 
226  NON_CONST_ITERATE(CMSResponse::THitsets, iHits, (*SetResponse().begin())->SetHitsets()) {
227  (*iHits)->SetNumber() += Min;
228  }
229 
230  NON_CONST_ITERATE(CMSSpectrumset::Tdata, iSpectrum, (*SetRequest().begin())->SetSpectra().Set()) {
231  (*iSpectrum)->SetNumber() += Min;
232  }
233 }
234 
235 
236 void
238 {
239  ITERATE(CMSBioseqSet::Tdata, iMSBioseq, (*SetResponse().begin())->SetBioseqs().Set()) {
240  SetOids().insert((*iMSBioseq)->GetOid());
241  }
242 }
243 
244 
245 void
247 {
248  CRef <CMSBioseq> MSBioseq (new CMSBioseq);
249  MSBioseq->SetSeq(*(const_cast <CBioseq *> (Bioseq.GetPointerOrNull())));
250  MSBioseq->SetOid() = oid;
251  (*SetResponse().begin())->SetBioseqs().Set().push_back(MSBioseq);
252 }
253 
254 void
256 {
257  int Min, Max, OldMin, OldMax;
258 
259  if(GetRequest().size() == 0)
260  NCBI_THROW(COMSSAException, eMSParseException, "No Search Requests");
261  if(GetResponse().size() == 0)
262  NCBI_THROW(COMSSAException, eMSParseException, "No Search Responses");
263 
264 
265  // check library
266  if(!CheckLibraryNameAndSize((*GetRequest().begin())->GetSettings().GetDb(),
267  (*GetResponse().begin())->GetDbversion()))
268  NCBI_THROW(COMSSAException, eMSNoMatchException, "unmatched sequence library");
269 
270  // renumber search settings
271  FindMinMaxSearchSettingId(Min, Max);
272  OldSearch->FindMinMaxSearchSettingId(OldMin, OldMax);
273  OldSearch->RenumberSearchSettingId(Max - OldMin + 1);
274 
275  // copy over search settings
276  CopySettings(OldSearch);
277 
278  // renumber spectra
279  FindMinMaxSpectrumNumber(Min, Max);
280  OldSearch->FindMinMaxSpectrumNumber(OldMin, OldMax);
281  OldSearch->RenumberSpectrumNumber(Max - OldMin + 1);
282 
283  // copy over spectra
284  CopySpectra(OldSearch);
285 
286  // copy over hits
287  CopyHitsets(OldSearch);
288 
289  // look for bioseqs to add
290  PopulateOidList();
291  OldSearch->PopulateOidList();
292 
293  TOid Diff;
294  set_difference(OldSearch->GetOids().begin(), OldSearch->GetOids().end(), GetOids().begin(), GetOids().end(),
295  inserter(Diff, Diff.begin()));
296  ITERATE(TOid, iDiff, Diff) {
297  AppendBioseq(*iDiff, (*OldSearch->GetResponse().begin())->GetBioseqs().GetBioseqByOid(*iDiff));
298  }
299 }
300 
301 
303 {
304  if(DataFormat == eSerial_Xml && os) {
305  // turn on xml schema
306  CObjectOStreamXml *xml_out = dynamic_cast <CObjectOStreamXml *> (os);
307  xml_out->SetReferenceSchema();
308  // turn off names in named integers
309  xml_out->SetWriteNamedIntegersByValue(true);
310  }
311  return os;
312 }
313 
314 
315 
316 END_SCOPE(omssa)
In-memory stream (a la strstream or stringstream)
CMSBioseq –.
Definition: MSBioseq.hpp:66
@MSSearchSettings.hpp User-defined methods of the data storage class.
generic exception class for omssa
Definition: msms.hpp:435
const string FileEnding(const ESerialDataFormat FileType) const
return a file ending based on encoding type
Definition: msmerge.cpp:79
void PopulateOidList(void)
fill in the oid list from existing object
Definition: msmerge.cpp:237
void AppendBioseq(const int oid, CConstRef< CBioseq > Bioseq)
add a bioseq to the bioseq list
Definition: msmerge.cpp:246
void RenumberOneSearchSettingId(CMSSearchSettings &SearchSettings, const int Min)
renumber one search setting
Definition: msmerge.cpp:196
void FindMinMaxForOneSetting(const CMSSearchSettings &SearchSettings, int &Min, int &Max) const
helper function for FindMinMaxSearchSettingId
Definition: msmerge.cpp:89
void AppendSearch(CRef< COMSSASearch > OldSearch)
add a search to this object
Definition: msmerge.cpp:255
const TOid & GetOids(void) const
get the oid list
Definition: msmerge.hpp:202
void FindMinMaxSearchSettingId(int &Min, int &Max) const
find the maximum and minimum search setting id in this search
Definition: msmerge.cpp:100
void RenumberSearchSettingId(const int Min)
renumber search settings by adding a minimum value
Definition: msmerge.cpp:205
void CopySpectra(CRef< COMSSASearch > OldSearch)
copy a spectra from old search into this search
Definition: msmerge.cpp:144
void RenumberSpectrumNumber(const int Min)
renumber spectrum numbers by adding a minimum value
Definition: msmerge.cpp:222
void FindMinMaxSpectrumNumber(int &Min, int &Max) const
find the maximum and minimum spectrum number in this search
Definition: msmerge.cpp:121
const bool CheckLibraryNameAndSize(const string Name, const int Size) const
check for matching library name and size in all search settings
Definition: msmerge.cpp:174
void CopySettings(CRef< COMSSASearch > OldSearch)
copy Settings from old search into new this search
Definition: msmerge.cpp:160
TOid & SetOids(void)
set the oid list
Definition: msmerge.hpp:209
void CopyHitsets(CRef< COMSSASearch > OldSearch)
copy Hitsets from old search into new this search
Definition: msmerge.cpp:152
CObjectIStreamAsnBinary –.
Definition: objistrasnb.hpp:59
CObjectOStreamAsnBinary –.
Definition: objostrasnb.hpp:58
CObjectOStreamXml –.
Definition: objostrxml.hpp:54
CObjectOStream –.
Definition: objostr.hpp:83
CRef –.
Definition: ncbiobj.hpp:618
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_Xml
XML.
Definition: serialdef.hpp:75
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
void SetReferenceSchema(bool use_schema=true)
Make generated XML document reference XML schema.
Definition: objostrxml.cpp:128
void SetWriteNamedIntegersByValue(bool set)
Set up writing named integers (in ANS.1 sense) by value only.
void Flush(void)
TObjectType * GetPointerOrNull(void) const THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:1672
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOMSSA_EXPORT
Definition: ncbi_export.h:1355
TSettingid GetSettingid(void) const
Get the Settingid member data.
const TRequest & GetRequest(void) const
Get the Request member data.
Definition: MSSearch_.hpp:204
list< CRef< CMSSpectrum > > Tdata
list< CRef< CMSSearchSettings > > Tdata
const TResponse & GetResponse(void) const
Get the Response member data.
Definition: MSSearch_.hpp:229
list< CRef< CMSBioseq > > Tdata
vector< CRef< CMSHitSet > > THitsets
Definition: MSResponse_.hpp:93
TResponse & SetResponse(void)
Assign a value to Response data member.
Definition: MSSearch_.hpp:235
TRequest & SetRequest(void)
Assign a value to Request data member.
Definition: MSSearch_.hpp:210
void SetSettingid(TSettingid value)
Assign a value to Settingid data member.
CObjectOStream * SetUpOutputFile(CObjectOStream *os, ESerialDataFormat DataFormat)
sets up output file based on DataFormat in particular, initializes xml stream appropriately
Definition: msmerge.cpp:302
const TYPE & Get(const CNamedParameterList *param)
const struct ncbi::grid::netcache::search::fields::SIZE size
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
T max(T x_, T y_)
T min(T x_, T y_)
Int4 TOid
Ordinal ID in BLAST databases.
Definition: seqdbcommon.hpp:58
Modified on Fri Sep 20 14:57:28 2024 by modify_doxy.py rev. 669887