NCBI C++ ToolKit
pepxml.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Douglas J. Slotta
27  *
28  * File Description:
29  * Code for converting OMSSA to PepXML
30  *
31  */
32 
33 // standard includes
34 #include <ncbi_pch.hpp>
35 #include <objects/seq/Bioseq.hpp>
36 #include <objects/seq/Seq_inst.hpp>
37 #include <util/xregexp/regexp.hpp>
38 
39 #include "pepxml.hpp"
40 #include "omssa.hpp"
41 
44 BEGIN_SCOPE(omssa)
45 
46 // const double PROTON_MASS = 1.007276466;
47 const double HYDROGEN_MASS = 1.00794;
48 const double OH_MASS = 17.00734;
49 
50 // char const * const kMolNames[5] = {
51 // "not set",
52 // "DNA",
53 // "RNA",
54 // "AA",
55 // "NA"
56 // };
57 
58 string CPepXML::ConvertDouble(double n) {
59  string val = NStr::DoubleToString(n,15);
60  int len = val.length();
61  while (NStr::EndsWith(val,"0")) {
62  val.erase(--len);
63  }
64  if (NStr::EndsWith(val,".")) {
65  val.append("0");
66  }
67  return val;
68 }
69 
70 
71 char CPepXML::ConvertAA(char in) {
72  string out;
74  return out[0];
75 }
76 
77 typedef pair<int, string> TAAModPair;
79 
81 
82  CRef<CModification_info> modInfo(new CModification_info); // modification_info is parent mod element. attributes include
83  // modified_peptide and mod_[nc]term_mass
84 
85  TAAModMap modMap;
86  string pep = msHits->GetPepstring();
87  bool hasMod(false);
88 
89  ITERATE(CMSHits::TMods, iMod, msHits->GetMods()) { // iterate through list of modifications
90  int pos = (*iMod)->GetSite();
91  int num = (*iMod)->GetModtype(); // poorly named in OMSSA, is actually MSMod, not MSModType
92  vModSet.insert(num);
93  EMSModType type = Modset->GetModType(num); // aa specific, nterminal, etc.
94  double mdiff = MSSCALE2DBL(Modset->GetModMass(num));
95  char aa = pep[pos];
96  double aaMass = m_aaMassMap.find(aa)->second;
97  double mass = aaMass + mdiff;
98  string iMass = "[" + NStr::IntToString(static_cast<int>(mass)) + "]";
99 
100  modMap.insert(TAAModPair(pos,iMass));
101 
102  CRef<CMod_aminoacid_mass> modaaMass(new CMod_aminoacid_mass); // child tag of modInfo used for aa specific mods
103 
104  switch (type) {
105  case eMSModType_modaa:
106  modaaMass->SetAttlist().SetPosition(pos+1); // fill out subelement mod_aminoacid_mass
107  modaaMass->SetAttlist().SetMass(mass);
108  modInfo->SetMod_aminoacid_mass().push_back(modaaMass);
109  hasMod = true;
110  break;
111  case eMSModType_modn:
112  case eMSModType_modnaa:
113  case eMSModType_modnp:
114  case eMSModType_modnpaa:
115  modInfo->SetAttlist().SetMod_nterm_mass(mass);
116  hasMod = true;
117  break;
118  case eMSModType_modc:
119  case eMSModType_modcaa:
120  case eMSModType_modcp:
121  case eMSModType_modcpaa:
122  modInfo->SetAttlist().SetMod_cterm_mass(mass);
123  hasMod = true;
124  break;
125  default:
126  // perhaps some error handling here
127  break;
128  }
129 
130  }
131 
132  // iterate through peptide looking for aa specific mods. If found, insert mass into peptide string
133 
134  string modPep;
135 
136  for (unsigned int i=0; i<pep.length(); i++) {
137  char p = pep[i];
138  modPep.append(1, p);
140  it = modMap.find(i);
141  if (it != modMap.end()) {
142  modPep.append(it->second); // see if AA has corresponding mod, if so, append mass text to peptide string
143  } else if (m_staticModSet.count(p)>0) { // else if there is a static modification associated with the AA, then
144  // create a mod_aminoacid_mass subelement
146  modaaMass->SetAttlist().SetPosition(i+1);
147  double staticMass = m_aaMassMap.find(p)->second;
148  modaaMass->SetAttlist().SetMass(staticMass);
149  modInfo->SetMod_aminoacid_mass().push_back(modaaMass);
150  hasMod = true;
151  }
152  }
153 
154 // todo: does not return n or c term peptide or protein fixed mods.
155 // to do this, iterate through the mod set for the search and print them out.
156 
157  CMSSearchSettings::TFixed::const_iterator iterF;
158  for (iterF = inOMSSA.GetRequest().front()->GetSettings().GetFixed().begin();
159  iterF != inOMSSA.GetRequest().front()->GetSettings().GetFixed().end(); ++iterF) {
160  int type = Modset->GetModType(*iterF);
161  double mass = MSSCALE2DBL(Modset->GetModMass(*iterF));
162  if (type % 2 != 0) {
163  switch (type) {
164  case eMSModType_modn:
165  case eMSModType_modnaa:
166  case eMSModType_modnp:
167  case eMSModType_modnpaa:
168  modInfo->SetAttlist().SetMod_nterm_mass(mass + HYDROGEN_MASS);
169  hasMod = true;
170  break;
171  case eMSModType_modc:
172  case eMSModType_modcaa:
173  case eMSModType_modcp:
174  case eMSModType_modcpaa:
175  modInfo->SetAttlist().SetMod_cterm_mass(mass + OH_MASS);
176  hasMod = true;
177  break;
178  }
179  }
180  }
181 
182 // only return if we have declared a mod
183  if(!hasMod) return null;
184  modInfo->SetAttlist().SetModified_peptide(modPep);
185 
186  return modInfo;
187 }
188 
189 
190 void CPepXML::ConvertModSetting(CRef<CSearch_summary> sSum, CRef<CMSModSpecSet> Modset, int modnum, bool fixed) {
191  // NB: pepXML does not seem to allow for modification to the terminus of a protein at particular amino acids
192  int type = Modset->GetModType(modnum);
193  if ( type % 2 == 0) { // Must apply to a paticular amino acid
194  for (int i=0; i< Modset->GetModNumChars(modnum); i++) {
196  int modchar = Modset->GetModChar(modnum, i);
197  char aa = ConvertAA(modchar);
198  string aaStr(1, aa);
199  aaMod->SetAttlist().SetAminoacid(aaStr);
200  double mdiff = MSSCALE2DBL(Modset->GetModMass(modnum));
201  double aaMass = m_aaMassMap.find(aa)->second;
202  double mass = aaMass + mdiff;
203  aaMod->SetAttlist().SetMassdiff(ConvertDouble(mdiff));
204  aaMod->SetAttlist().SetMass(mass);
205  if (fixed) {
206  aaMod->SetAttlist().SetVariable("N");
207  m_aaMassMap.erase(aa);
208  m_aaMassMap.insert(TAminoAcidMassPair(aa, aaMass + mdiff));
210  } else {
211  aaMod->SetAttlist().SetVariable("Y");
212  }
213  if (type > 0) {
214  if (type == eMSModType_modnpaa) aaMod->SetAttlist().SetPeptide_terminus("N");
215  if (type == eMSModType_modcpaa) aaMod->SetAttlist().SetPeptide_terminus("C");
216  }
217  aaMod->SetAttlist().SetDescription(Modset->GetUnimodName(modnum));
218  sSum->SetAminoacid_modification().push_back(aaMod);
219  }
220  } else {
222  double mass = MSSCALE2DBL(Modset->GetModMass(modnum));
223  termMod->SetAttlist().SetMassdiff(ConvertDouble(mass));
224  if (fixed) {
225  termMod->SetAttlist().SetVariable("N");
226  } else {
227  termMod->SetAttlist().SetVariable("Y");
228  }
229  termMod->SetAttlist().SetDescription(Modset->GetUnimodName(modnum));
230  switch (type) {
231  case eMSModType_modn:
232  termMod->SetAttlist().SetTerminus("n");
233  termMod->SetAttlist().SetProtein_terminus("Y");
234  termMod->SetAttlist().SetMass(mass + HYDROGEN_MASS);
235  break;
236  case eMSModType_modnp:
237  termMod->SetAttlist().SetTerminus("n");
238  termMod->SetAttlist().SetProtein_terminus("N");
239  termMod->SetAttlist().SetMass(mass + HYDROGEN_MASS);
240  break;
241  case eMSModType_modc:
242  termMod->SetAttlist().SetTerminus("c");
243  termMod->SetAttlist().SetProtein_terminus("Y");
244  termMod->SetAttlist().SetMass(mass + OH_MASS);
245  break;
246  case eMSModType_modcp:
247  termMod->SetAttlist().SetTerminus("c");
248  termMod->SetAttlist().SetProtein_terminus("N");
249  termMod->SetAttlist().SetMass(mass + OH_MASS);
250  break;
251  }
252  sSum->SetTerminal_modification().push_back(termMod);
253  }
254 }
255 
256 // Parses a spectrum identifier string
257 // SpecID: the string to parse
258 // field: 0 = whole string
259 // 1 = dta file name
260 // 2 = start scan
261 // 3 = end scan
262 // 4 = charge state
263 // 5 = file extension (.dta)
264 // query: string to return if SpecID is not a dta filename
265 void CPepXML::ConvertScanID(CRef<CSpectrum_query> sQuery, string SpecID, int query, int charge) {
266  string specFile, startScan, stopScan, dtaCharge;
267 
268  dtaCharge = NStr::IntToString(charge);
269 
271  if (RxpLocus.IsMatch(SpecID)) {
272  specFile = SpecID;
273  startScan = NStr::IntToString(query);
274  stopScan = startScan;
275  } else {
276  CRegexp RxpParse("(.*)\\.(\\d+)\\.(\\d+)\\.(\\d+)(\\.dta)?", CRegexp::fCompile_ignore_case);
277  specFile = RxpParse.GetMatch(SpecID, 0, 1);
278  if (specFile == "") {
279  specFile = SpecID;
280  }
281  startScan= RxpParse.GetMatch(SpecID, 0, 2);
282  if (startScan == "") {
283  startScan = NStr::IntToString(query);
284  stopScan = startScan;
285  } else {
286  stopScan = RxpParse.GetMatch(SpecID, 0, 3);
287  if (stopScan == "") {
288  stopScan = startScan;
289  }
290  }
291  }
292 
293  sQuery->SetAttlist().SetSpectrum(specFile + "." + startScan + "." + stopScan + "." + dtaCharge);
294  sQuery->SetAttlist().SetStart_scan(NStr::StringToInt(startScan));
295  sQuery->SetAttlist().SetEnd_scan(NStr::StringToInt(stopScan));
296 }
297 
299  if (pHit->CanGetAccession()) {
300  return pHit->GetAccession();
301  } else if (pHit->CanGetGi()) {
302  return "gi:" + NStr::NumericToString(pHit->GetGi());
303  }
304  return pHit->GetDefline();
305 }
306 
307 
310  CRef<CMSModSpecSet> Modset,
311  set<int>& variableMods,
312  CMSSearch& inOMSSA)
313 {
314  if (pHitSet->GetHits().empty())
315  return;
316 
317  CMSHitSet::THits::const_iterator iHit;
318  set<int> charges;
319 
320  // First, find all possible charge states
321  for(iHit = pHitSet->GetHits().begin(); iHit != pHitSet->GetHits().end(); iHit++) {
322  charges.insert((*iHit)->GetCharge());
323  }
324 
325 
326  ITERATE(set<int>, iCharge, charges) {
327  iHit = pHitSet->GetHits().begin();
328  int charge = (*iHit)->GetCharge();
329 
330  // advance to the first instance with a matching charge
331  while ( charge != *iCharge ) {
332  iHit++;
333  charge = (*iHit)->GetCharge();
334  }
335 
337  string spectrumID;
338  if(!(pHitSet->GetIds().empty())) {
339  spectrumID = *(pHitSet->GetIds().begin());
340  }
341  //string query = NStr::IntToString(pHitSet->GetNumber());
342 
343  ConvertScanID(sQuery, spectrumID, pHitSet->GetNumber(), charge);
344 
345  //double neutral_precursor_mass = ((*iHit)->GetMass()/m_scale)/charge - (charge * PROTON_MASS);
346  double neutral_precursor_mass = (*iHit)->GetMass()/m_scale;
347  sQuery->SetAttlist().SetPrecursor_neutral_mass(neutral_precursor_mass);
348  sQuery->SetAttlist().SetAssumed_charge(charge);
349  sQuery->SetAttlist().SetIndex(m_index++);
350 
351  // Only one search_result per query (for now)
353 
354  CMSHits::TPephits::const_iterator iPephit;
355  int hitRank = 1;
356  //double prevEValue = (*iHit)->GetEvalue();
357  for( ; iHit != pHitSet->GetHits().end(); iHit++) {
358  // skip this hit if it is not the right charge
359  charge = (*iHit)->GetCharge();
360  if ( charge != *iCharge ) {
361  continue;
362  }
363 
364  // First protein is associated with search_hit, the rest go into alternative_proteins
365  iPephit = (*iHit)->GetPephits().begin();
366  // Each set of MSHits is a search_hit
367  CRef<CSearch_hit> sHit(new CSearch_hit);
368  //if (prevEValue < (*iHit)->GetEvalue()) hitRank++; // This sets those hits with the same score to have the same rank
369  sHit->SetAttlist().SetHit_rank(hitRank);
370  hitRank++; // Arbitrarily advances the rank, ever if the scores are the same
371  sHit->SetAttlist().SetPeptide((*iHit)->GetPepstring());
372  if((*iHit)->CanGetPepstart())
373  sHit->SetAttlist().SetPeptide_prev_aa((*iHit)->GetPepstart());
374  if((*iHit)->CanGetPepstop())
375  sHit->SetAttlist().SetPeptide_next_aa((*iHit)->GetPepstop());
376 
377  sHit->SetAttlist().SetProtein(GetProteinName(*iPephit));
378 
379  sHit->SetAttlist().SetNum_tot_proteins((*iHit)->GetPephits().size());
380  sHit->SetAttlist().SetNum_matched_ions((*iHit)->GetMzhits().size());
381  int tot_num_ions = ((*iHit)->GetPepstring().length()-1) * 2;
382  sHit->SetAttlist().SetTot_num_ions(tot_num_ions);
383  sHit->SetAttlist().SetCalc_neutral_pep_mass((*iHit)->GetTheomass()/m_scale);
384  sHit->SetAttlist().SetMassdiff(ConvertDouble(neutral_precursor_mass - ((*iHit)->GetTheomass())/m_scale));
385  //sHit->SetSearch_hit().SetAttlist().SetNum_tol_term("42"); //skip
386  //sHit->SetSearch_hit().SetAttlist().SetNum_missed_cleavages("42"); //skip
387  sHit->SetAttlist().SetIs_rejected(CSearch_hit::C_Attlist::eAttlist_is_rejected_0);
388  sHit->SetAttlist().SetProtein_descr((*iPephit)->GetDefline());
389  //sHit->SetSearch_hit().SetAttlist().SetCalc_pI("42"); //skip
390  //sHit->SetSearch_hit().SetAttlist().SetProtein_mw("42"); //skip
392  pValue->SetAttlist().SetName("pvalue");
393  pValue->SetAttlist().SetValue(ConvertDouble((*iHit)->GetPvalue()));
395  eValue->SetAttlist().SetName("expect");
396  eValue->SetAttlist().SetValue(ConvertDouble((*iHit)->GetEvalue()));
397  sHit->SetSearch_score().push_back(pValue);
398  sHit->SetSearch_score().push_back(eValue);
399  if ((*iHit)->CanGetScores()) {
400  ITERATE(CMSHits::TScores, iScore, (*iHit)->GetScores()) {
402  score->SetAttlist().SetName((*iScore)->GetName());
403  score->SetAttlist().SetValue(ConvertDouble((*iScore)->GetValue()));
404  sHit->SetSearch_score().push_back(score);
405  }
406  }
407  // Generate alternative_proteins
408  for (iPephit++ ; iPephit != (*iHit)->GetPephits().end(); iPephit++) {
410  altPro->SetAttlist().SetProtein(GetProteinName(*iPephit));
411  altPro->SetAttlist().SetProtein_descr((*iPephit)->GetDefline());
412  //altPro->SetAlternative_protein().SetAttlist().SetNum_tol_term(); //skip
413  //altPro->SetAlternative_protein().SetAttlist().SetProtein_mw(); //skip
414  sHit->SetAlternative_protein().push_back(altPro);
415  }
416  CRef<CModification_info> modInfo = ConvertModifications(*iHit, Modset, variableMods, inOMSSA);
417  if (modInfo) sHit->SetModification_info(*modInfo);
418 
419  sResult->SetSearch_hit().push_back(sHit);
420  }
421  sQuery->SetSearch_result().push_back(sResult);
422  sQueries.push_back(sQuery);
423  }
424 }
425 
426 
427 void CPepXML::ConvertFromOMSSA(CMSSearch& inOMSSA, CRef <CMSModSpecSet> Modset, string basename, string newname) {
428 
429  m_scale = static_cast<float>(inOMSSA.GetRequest().front()->GetSettings().GetScale());
430 
431  // set up m_aaMassMap for modifications
432  for (int modchar=0; modchar < 29; modchar++) {
433  char aa = ConvertAA(modchar);
434  double aaMass = MonoMass[modchar];
436  }
437 
438 
439  CTime datetime(CTime::eCurrent);
440  datetime.SetFormat("Y-M-DTh:m:s");
441  this->SetAttlist().SetDate(datetime.AsString());
442  this->SetAttlist().SetSummary_xml(newname);
443 
444  // Create the Run Summary (need to generalize)
446  rSum->SetAttlist().SetBase_name(basename);
447  rSum->SetAttlist().SetRaw_data_type("raw");
448  rSum->SetAttlist().SetRaw_data(".mzXML");
449  EMSEnzymes enzyme = static_cast <EMSEnzymes>(inOMSSA.GetRequest().front()->GetSettings().GetEnzyme());
450  string enzymeName = kEnzymeNames[enzyme];
452  rSum->SetSample_enzyme().SetAttlist().SetName(enzymeName);
453 
454  CRef<CSpecificity> specificity(new CSpecificity);
455  specificity->SetAttlist().SetCut(cleave->GetCleaveAt());
456  switch (cleave->GetCleaveSense()[0]) {
457  case 'c':
458  case 'C':
459  specificity->SetAttlist().SetSense(CSpecificity::C_Attlist::eAttlist_sense_C);
460  break;
461  case 'n':
462  case 'N':
463  specificity->SetAttlist().SetSense(CSpecificity::C_Attlist::eAttlist_sense_N);
464  break;
465  default:
466  // Should be some sort of error here
467  cerr << "Hmm, a cleavage with no sense, how odd." << endl;
468  }
469  if (cleave->GetCheckProline()) {
470  specificity->SetAttlist().SetNo_cut("P");
471  }
472  rSum->SetSample_enzyme().SetSpecificity().push_back(specificity);
473 
474  // Create the Search Summary
476  //sSum->SetAttlist().SetBase_name(baseFile.GetName());
477  sSum->SetAttlist().SetBase_name(basename);
478  sSum->SetAttlist().SetSearch_engine("OMSSA");
479  sSum->SetAttlist().SetOut_data_type("n/a");
480  sSum->SetAttlist().SetOut_data("n/a");
481 
482  EMSSearchType searchType = static_cast <EMSSearchType>(inOMSSA.GetRequest().front()->GetSettings().GetPrecursorsearchtype());
483  //string searchTypeName = kSearchType[searchType];
484  switch (searchType) {
486  sSum->SetAttlist().SetPrecursor_mass_type(CSearch_summary::C_Attlist::eAttlist_precursor_mass_type_average);
487  break;
490  case eMSSearchType_exact:
492  sSum->SetAttlist().SetPrecursor_mass_type(CSearch_summary::C_Attlist::eAttlist_precursor_mass_type_monoisotopic);
493  break;
494  default:
495  // Should be some sort of error here
496  cerr << "Hmm, a typeless search, how odd." << endl;
497  sSum->SetAttlist().SetPrecursor_mass_type(CSearch_summary::C_Attlist::eAttlist_precursor_mass_type_monoisotopic);
498  }
499 
500 
501  searchType = static_cast <EMSSearchType>(inOMSSA.GetRequest().front()->GetSettings().GetProductsearchtype());
502  //searchTypeName = kSearchType[searchType];
503  switch (searchType) {
505  sSum->SetAttlist().SetFragment_mass_type(CSearch_summary::C_Attlist::eAttlist_fragment_mass_type_average);
506  break;
509  case eMSSearchType_exact:
511  sSum->SetAttlist().SetFragment_mass_type(CSearch_summary::C_Attlist::eAttlist_fragment_mass_type_monoisotopic);
512  break;
513  default:
514  // Should be some sort of error here
515  cerr << "Hmm, a typeless search, how odd." << endl;
516  sSum->SetAttlist().SetFragment_mass_type(CSearch_summary::C_Attlist::eAttlist_fragment_mass_type_monoisotopic);
517  }
518  //sSum->SetAttlist().SetFragment_mass_type(searchTypeName);
519  sSum->SetAttlist().SetSearch_id(1); // Should be count based upon search number
520 
521  string dbname = inOMSSA.GetRequest().front()->GetSettings().GetDb();
522  sSum->SetSearch_database().SetAttlist().SetLocal_path(dbname);
523 
524  int dbtype(3);
525  if(inOMSSA.GetResponse().front()->IsSetBioseqs() && inOMSSA.GetResponse().front()->GetBioseqs().Get().size() > 0)
526  dbtype = inOMSSA.GetResponse().front()->GetBioseqs().Get().front()->GetSeq().GetInst().GetMol();
527  switch (dbtype) {
528  case 3:
529  sSum->SetSearch_database().SetAttlist().SetType(CSearch_database::C_Attlist::eAttlist_type_AA);
530  break;
531  default:
532  sSum->SetSearch_database().SetAttlist().SetType(CSearch_database::C_Attlist::eAttlist_type_NA);
533  }
534 
535  sSum->SetSearch_database().SetAttlist().SetSize_in_db_entries(inOMSSA.GetResponse().front()->GetDbversion());
536 
537  sSum->SetEnzymatic_search_constraint().SetAttlist().SetEnzyme(enzymeName);
538  sSum->SetEnzymatic_search_constraint().SetAttlist().SetMax_num_internal_cleavages(inOMSSA.GetRequest().front()->GetSettings().GetMissedcleave()); //check this
539  sSum->SetEnzymatic_search_constraint().SetAttlist().SetMin_number_termini(cleave->GetCleaveNum()); //check this
540 
541  // Fixed mods
542  CMSSearchSettings::TFixed::const_iterator iterF;
543  for (iterF = inOMSSA.GetRequest().front()->GetSettings().GetFixed().begin();
544  iterF != inOMSSA.GetRequest().front()->GetSettings().GetFixed().end(); ++iterF) {
545  ConvertModSetting(sSum, Modset, *iterF, true);
546  }
547 
548  // Variable mods
549  // Delay processing until all hits are examined, in case the spectral library search
550  // adds extra mods not seen here.
551  set<int> variableMods;
552  CMSSearchSettings::TVariable::const_iterator iterV;
553  for (iterV = inOMSSA.GetRequest().front()->GetSettings().GetVariable().begin();
554  iterV != inOMSSA.GetRequest().front()->GetSettings().GetVariable().end(); ++iterV) {
555  //ConvertModSetting(sSum, Modset, *iterV, false);
556  variableMods.insert(*iterV);
557  }
558 
559  // Now for the Spectrum Queries
560  CMSResponse::THitsets::const_iterator iHits;
561  m_index = 1;
562  for (iHits = inOMSSA.GetResponse().front()->GetHitsets().begin();
563  iHits != inOMSSA.GetResponse().front()->GetHitsets().end(); iHits++) {
564  //CRef< CMSHitSet > HitSet = *iHits;
565  ConvertMSHitSet(*iHits, rSum->SetSpectrum_query(), Modset, variableMods, inOMSSA);
566  }
567 
568  ITERATE(set<int>, iVMod, variableMods) {
569  ConvertModSetting(sSum, Modset, *iVMod, false);
570  }
571 
572  rSum->SetSearch_summary().push_back(sSum);
573  this->SetMsms_run_summary().push_back(rSum);
574 
575 }
576 
577 END_SCOPE(omssa)
char const *const kEnzymeNames[eMSEnzymes_max]
@MSEnzymes.hpp User-defined methods of the data storage class.
Definition: MSEnzymes.hpp:55
#define MSSCALE2DBL(x)
Definition: MSMod.hpp:65
CAlternative_protein –.
CAminoacid_modification –.
static CRef< CCleave > CleaveFactory(const EMSEnzymes enzyme)
Simple factory to return back object for enzyme.
Definition: msms.cpp:545
bool GetCheckProline(void) const
Should we check for proline?
Definition: msms.hpp:696
const string GetCleaveAt(void) const
What are the cleavage chars?
Definition: msms.hpp:684
const char * GetCleaveSense(void) const
Should we check for proline?
Definition: msms.hpp:706
int GetCleaveNum(void) const
Get the number of cleavage chars.
Definition: msms.hpp:878
const char * GetUnimodName(int Mod) const
get unimod name
int GetModNumChars(int Mod) const
get the number of modification AA's
char GetModChar(int Mod, int Number) const
get modification AA's
EMSModType GetModType(int Mod) const
get modification type
int GetModMass(int Mod) const
get modification mass
CMod_aminoacid_mass –.
CModification_info –.
CMsms_run_summary –.
TAminoAcidMassMap m_aaMassMap
Definition: pepxml.hpp:80
float m_scale
Definition: pepxml.hpp:83
CRef< CModification_info > ConvertModifications(CRef< CMSHits > msHits, CRef< CMSModSpecSet > Modset, set< int > &vModSet, CMSSearch &inOMSSA)
Definition: pepxml.cpp:80
void ConvertModSetting(CRef< CSearch_summary > sSum, CRef< CMSModSpecSet > Modset, int modnum, bool fixed)
Definition: pepxml.cpp:190
string ConvertDouble(double n)
Definition: pepxml.cpp:58
char ConvertAA(char in)
Definition: pepxml.cpp:71
string GetProteinName(CRef< CMSPepHit > pHit)
Definition: pepxml.cpp:298
void ConvertFromOMSSA(CMSSearch &inOMSSA, CRef< CMSModSpecSet > Modset, string basename, string newname)
convert OMSSA to PepXML
Definition: pepxml.cpp:427
int m_index
Definition: pepxml.hpp:84
pair< char, double > TAminoAcidMassPair
Definition: pepxml.hpp:77
set< char > m_staticModSet
Definition: pepxml.hpp:81
void ConvertMSHitSet(CRef< CMSHitSet > pHitSet, CMsms_run_summary::TSpectrum_query &sQueries, CRef< CMSModSpecSet > Modset, set< int > &variableMods, CMSSearch &inOMSSA)
Definition: pepxml.cpp:308
void ConvertScanID(CRef< CSpectrum_query > sQuery, string SpecID, int query, int charge)
Definition: pepxml.cpp:265
CRegexp –.
Definition: regexp.hpp:74
CSearch_hit –.
Definition: Search_hit.hpp:66
CSearch_result –.
CSearch_score –.
CSearch_summary –.
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
@ e_Ncbieaa
Definition: sequtil.hpp:57
@ e_Ncbistdaa
Definition: sequtil.hpp:58
CSpecificity –.
Definition: Specificity.hpp:66
CSpectrum_query –.
CTerminal_modification –.
CTime –.
Definition: ncbitime.hpp:296
void erase(iterator pos)
Definition: map.hpp:167
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
const_iterator find(const key_type &key) const
Definition: map.hpp:153
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
std::ofstream out("events_result.xml")
main entry point for tests
#define basename(path)
Definition: replacements.h:116
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
bool IsMatch(CTempString str, TMatch flags=fMatch_default)
Check existence substring which match a specified pattern.
Definition: regexp.cpp:253
CTempString GetMatch(CTempString str, size_t offset=0, size_t idx=0, TMatch flags=fMatch_default, bool noreturn=false)
Get matching pattern and subpatterns.
Definition: regexp.cpp:242
@ fCompile_ignore_case
Definition: regexp.hpp:111
@ fCompile_newline
Definition: regexp.hpp:113
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5181
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5424
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static void SetFormat(const CTimeFormat &fmt)
Set the current time format.
Definition: ncbitime.cpp:1268
string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const
Transform time to string.
Definition: ncbitime.cpp:1512
@ eCurrent
Use current time. See also CCurrentTime.
Definition: ncbitime.hpp:300
list< CRef< CMSScoreSet > > TScores
Definition: MSHits_.hpp:107
const TRequest & GetRequest(void) const
Get the Request member data.
Definition: MSSearch_.hpp:204
EMSSearchType
what type of atomic mass to use
list< CRef< CMSModHit > > TMods
Definition: MSHits_.hpp:101
const TResponse & GetResponse(void) const
Get the Response member data.
Definition: MSSearch_.hpp:229
EMSModType
enumerate modification types
Definition: MSModType_.hpp:64
EMSEnzymes
enumerate enzymes
Definition: MSEnzymes_.hpp:64
@ eMSSearchType_multiisotope
@ eMSSearchType_average
@ eMSSearchType_monon15
@ eMSSearchType_monoisotopic
@ eMSSearchType_exact
@ eMSModType_modn
at the N terminus of a protein
Definition: MSModType_.hpp:66
@ eMSModType_modc
at the C terminus of a protein
Definition: MSModType_.hpp:68
@ eMSModType_modnpaa
at the N terminus of a peptide at particular amino acids
Definition: MSModType_.hpp:71
@ eMSModType_modcpaa
at the C terminus of a peptide at particular amino acids
Definition: MSModType_.hpp:73
@ eMSModType_modnp
at the N terminus of a peptide
Definition: MSModType_.hpp:70
@ eMSModType_modaa
at particular amino acids
Definition: MSModType_.hpp:65
@ eMSModType_modcaa
at the C terminus of a protein at particular amino acids
Definition: MSModType_.hpp:69
@ eMSModType_modcp
at the C terminus of a peptide
Definition: MSModType_.hpp:72
@ eMSModType_modnaa
at the N terminus of a protein at particular amino acids
Definition: MSModType_.hpp:67
TAttlist & SetAttlist(void)
Assign a value to Attlist data member.
TMod_aminoacid_mass & SetMod_aminoacid_mass(void)
Assign a value to Mod_aminoacid_mass data member.
void SetDate(const TDate &value)
Assign a value to Date data member.
void SetAttlist(TAttlist &value)
Assign a value to Attlist data member.
void SetSummary_xml(const TSummary_xml &value)
Assign a value to Summary_xml data member.
list< CRef< CSpectrum_query > > TSpectrum_query
TMsms_run_summary & SetMsms_run_summary(void)
Assign a value to Msms_run_summary data member.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
int i
yy_size_t n
int len
const double MonoMass[]
Definition: msms.hpp:77
std::istream & in(std::istream &in_, double &x_)
const double HYDROGEN_MASS
Definition: pepxml.cpp:47
const double OH_MASS
Definition: pepxml.cpp:48
map< int, string > TAAModMap
Definition: pepxml.cpp:78
pair< int, string > TAAModPair
Definition: pepxml.cpp:77
static string query
Definition: type.c:6
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
#define const
Definition: zconf.h:232
Modified on Fri Sep 20 14:58:31 2024 by modify_doxy.py rev. 669887