NCBI C++ ToolKit
omssa.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the authors in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Lewis Y. Geer, Douglas J. Slotta
27 *
28 * File Description:
29 * code to do the ms/ms search and score matches
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include <util/miscmath.h>
38 #include <util/compress/bzip2.hpp>
39 
40 
41 
42 #include "SpectrumSet.hpp"
43 #include "omssa.hpp"
44 #include "pepxml.hpp"
45 
46 #include <fstream>
47 #include <string>
48 #include <list>
49 #include <deque>
50 #include <algorithm>
51 
52 #include <math.h>
53 
56 USING_SCOPE(omssa);
57 
58 
59 
60 int
61 CSearchHelper::ReadModFiles(const string& ModFileName,
62  const string& UserModFileName,
63  const string& Path,
64  CRef <CMSModSpecSet> Modset)
65 {
66  CDirEntry DirEntry(Path);
67  string FileName;
68  try {
69  if(ModFileName == "")
70  ERR_POST(Critical << "modification filename is blank!");
71  if(!CDirEntry::IsAbsolutePath(ModFileName))
72  FileName = DirEntry.GetDir() + ModFileName;
73  else FileName = ModFileName;
74  unique_ptr<CObjectIStream>
75  modsin(CObjectIStream::Open(FileName.c_str(), eSerial_Xml));
76  if(modsin->fail()) {
77  ERR_POST(Fatal << "ommsacl: unable to open modification file" <<
78  FileName);
79  return 1;
80  }
81  modsin->Read(ObjectInfo(*Modset));
82  modsin->Close();
83 
84  } catch (NCBI_NS_STD::exception& e) {
85  ERR_POST(Fatal << "Unable to read modification file " <<
86  FileName << " with error " << e.what());
87  }
88 
89  // read in user mod file, if any
90  if(UserModFileName != "") {
91  try {
92  CRef <CMSModSpecSet> UserModset(new CMSModSpecSet);
93  if(!CDirEntry::IsAbsolutePath(UserModFileName))
94  FileName = DirEntry.GetDir() + UserModFileName;
95  else FileName = UserModFileName;
96  unique_ptr<CObjectIStream>
97  usermodsin(CObjectIStream::Open(FileName.c_str(), eSerial_Xml));
98  if(usermodsin->fail()) {
99  ERR_POST(Warning << "ommsacl: unable to open user modification file" <<
100  ModFileName);
101  return 0;
102  }
103  usermodsin->Read(ObjectInfo(*UserModset));
104  usermodsin->Close();
105  Modset->Append(*UserModset);
106  } catch (NCBI_NS_STD::exception& e) {
107  ERR_POST(Fatal << "Unable to read user modification file " <<
108  FileName << " with error " << e.what());
109  }
110  }
111  return 0;
112 }
113 
114 
115 void
116 CSearchHelper::ReadTaxFile(string& Filename, TTaxNameMap& TaxNameMap)
117 {
118  ifstream taxnames(Filename.c_str());
119  string line;
120  list<string> linelist;
121  list<string>::iterator ilist;
122  while(taxnames && !taxnames.eof()) {
123  getline(taxnames, line);
124  linelist.clear();
125  NStr::Split(line, ",", linelist, NStr::fSplit_Tokenize);
126  if(!linelist.empty()) {
127  ilist = linelist.begin();
128  ilist++;
129  TaxNameMap[NStr::StringToInt(*ilist)] = *(linelist.begin());
130  }
131  }
132 }
133 
134 void
136 {
137  if(!xml_out) return;
138  // turn on xml schema
139  xml_out->SetReferenceSchema();
140  // turn off names in named integers
141  xml_out->SetWriteNamedIntegersByValue(true);
142 }
143 
144 
145 
146 int
147 CSearchHelper::ReadFile(const string& Filename,
148  const EMSSpectrumFileType FileType,
149  CMSSearch& MySearch)
150 {
151  CRef <CMSRequest> Request (new CMSRequest);
152  MySearch.SetRequest().push_back(Request);
153 // CRef <CMSResponse> Response (new CMSResponse);
154 // MySearch.SetResponse().push_back(Response);
155 
156  CNcbiIfstream PeakFile(Filename.c_str());
157  if(!PeakFile) {
158  ERR_POST(Fatal <<" omssacl: not able to open spectrum file " <<
159  Filename);
160  return 1;
161  }
162 
163  CRef <CSpectrumSet> SpectrumSet(new CSpectrumSet);
164  (*MySearch.SetRequest().begin())->SetSpectra(*SpectrumSet);
165  return SpectrumSet->LoadFile(FileType, PeakFile);
166 }
167 
168 int
169 CSearchHelper::ReadSearchRequest(const string& Filename,
170  const ESerialDataFormat DataFormat,
171  CMSSearch& MySearch)
172 {
173  CRef <CMSRequest> Request (new CMSRequest);
174  MySearch.SetRequest().push_back(Request);
175 // CRef <CMSResponse> Response (new CMSResponse);
176 // MySearch.SetResponse().push_back(Response);
177 
178  unique_ptr<CObjectIStream>
179  in(CObjectIStream::Open(Filename.c_str(), DataFormat));
180  in->Open(Filename.c_str(), DataFormat);
181  if(in->fail()) {
182  ERR_POST(Warning << "omssacl: unable to search file" <<
183  Filename);
184  return 1;
185  }
186  in->Read(ObjectInfo(*Request));
187  in->Close();
188  return 0;
189 }
190 
191 
192 int
193 CSearchHelper::ReadCompleteSearch(const string& Filename,
194  const ESerialDataFormat DataFormat,
195  bool bz2,
196  CMSSearch& MySearch)
197 {
198  unique_ptr <CNcbiIfstream> raw_in;
199  unique_ptr <CCompressionIStream> compress_in;
200  unique_ptr <CObjectIStream> in;
201 
202  if( bz2 ) {
203  raw_in.reset(new CNcbiIfstream(Filename.c_str()));
204  compress_in.reset( new CCompressionIStream (*raw_in,
207  in.reset(CObjectIStream::Open(DataFormat, *compress_in));
208  }
209  else {
210  in.reset(CObjectIStream::Open(Filename.c_str(), DataFormat));
211  }
212  if(in->fail()) {
213  ERR_POST(Warning << "omssacl: unable to search file" <<
214  Filename);
215  return 1;
216  }
217  in->Read(ObjectInfo(MySearch));
218  in->Close();
219  return 0;
220 }
221 
222 
223 int
225  CConstRef <CMSInFile> InFile,
226  bool* SearchEngineIterative)
227 {
228  string Filename(InFile->GetInfile());
229  EMSSpectrumFileType DataFormat =
230  static_cast <EMSSpectrumFileType> (InFile->GetInfiletype());
231 
232  switch (DataFormat) {
238  return CSearchHelper::ReadFile(Filename, DataFormat, MySearch);
239  break;
241  if(SearchEngineIterative) *SearchEngineIterative = true;
242  return CSearchHelper::ReadCompleteSearch(Filename, eSerial_AsnBinary, false, MySearch);
243  break;
245  if(SearchEngineIterative) *SearchEngineIterative = true;
246  return CSearchHelper::ReadCompleteSearch(Filename, eSerial_Xml, false, MySearch);
247  break;
249  return CSearchHelper::ReadSearchRequest(Filename, eSerial_Xml, MySearch);
250  break;
252  return CSearchHelper::ReadCompleteSearch(Filename, eSerial_Xml, true, MySearch);
253  break;
258  default:
259  break;
260  }
261  return 1; // not supported
262 }
263 
264 
266  const string Filename,
267  ESerialDataFormat FileFormat,
268  bool IncludeRequest,
269  bool bz2)
270 {
271  unique_ptr <CNcbiOfstream> raw_out;
272  unique_ptr <CCompressionOStream> compress_out;
273  unique_ptr <CObjectOStream> txt_out;
274 
275  if( bz2 ) {
276  raw_out.reset(new CNcbiOfstream(Filename.c_str()));
277  compress_out.reset( new CCompressionOStream (*raw_out,
278  new CBZip2StreamCompressor(),
280  txt_out.reset(CObjectOStream::Open(FileFormat, *compress_out));
281  }
282  else {
283  txt_out.reset(CObjectOStream::Open(Filename.c_str(), FileFormat));
284  }
285 
286  if(FileFormat == eSerial_Xml) {
287  CObjectOStreamXml *xml_out = dynamic_cast <CObjectOStreamXml *> (txt_out.get());
289  }
290  if(IncludeRequest)
291  txt_out->Write(ObjectInfo(MySearch));
292  else
293  txt_out->Write(ObjectInfo(**MySearch.SetResponse().begin()));
294 }
295 
296 
297 int
300  CRef <CMSModSpecSet> Modset)
301 {
302  CMSSearchSettings::TOutfiles::const_iterator iOutFile;
303 
304  for(iOutFile = OutFiles.begin(); iOutFile != OutFiles.end(); ++iOutFile) {
305  string Filename((*iOutFile)->GetOutfile());
306  EMSSerialDataFormat DataFormat =
307  static_cast <EMSSerialDataFormat> ((*iOutFile)->GetOutfiletype());
309 
310  unique_ptr <CObjectOStream> txt_out;
311  if(DataFormat == eMSSerialDataFormat_asntext)
312  FileFormat = eSerial_AsnText;
313  if(DataFormat == eMSSerialDataFormat_asnbinary)
314  FileFormat = eSerial_AsnBinary;
315  if(DataFormat == eMSSerialDataFormat_xml)
316  FileFormat = eSerial_Xml;
317  if(DataFormat == eMSSerialDataFormat_xmlbz2)
318  FileFormat = eSerial_Xml;
319 
320  switch (DataFormat) {
325  Filename,
326  FileFormat,
327  (*iOutFile)->GetIncluderequest(),
328  false);
329  break;
332  Filename,
333  FileFormat,
334  (*iOutFile)->GetIncluderequest(),
335  true);
336  break;
338  {
339  CPepXML outPepXML;
340  outPepXML.ConvertFromOMSSA(MySearch, Modset, Filename, Filename);
341  unique_ptr<CObjectOStream> file_out(CObjectOStream::Open(Filename, eSerial_Xml));
342  *file_out << outPepXML;
343  }
344  break;
346  {
347  CNcbiOfstream oscsv;
348  oscsv.open(Filename.c_str());
349  (*MySearch.SetResponse().begin())->PrintCSV(oscsv, Modset);
350  oscsv.close();
351  }
352  break;
354  default:
355  {
356  ERR_POST(Error << "Unknown output file format " << DataFormat);
357  }
358  return 1;
359  break;
360  }
361  }
362  return 0;
363 }
364 
365 void
367 {
368  list <string> ValidError;
369  if(Settings->Validate(ValidError) != 0) {
370  list <string>::iterator iErr;
371  for(iErr = ValidError.begin(); iErr != ValidError.end(); iErr++)
372  ERR_POST(Warning << *iErr);
373  ERR_POST(Fatal << "Unable to validate settings");
374  }
375 }
376 
377 
378 void
380  CRef<CMSSearchSettings> &Settings)
381 {
382  if(FileName != "" ) {
383  try {
384  unique_ptr<CObjectIStream>
385  paramsin(CObjectIStream::Open(FileName.c_str(), eSerial_Xml));
386  if(paramsin->fail()) {
387  ERR_POST(Fatal << "ommsacl: unable to open parameter file" <<
388  FileName);
389  return;
390  }
391  paramsin->Read(ObjectInfo(*Settings));
392  paramsin->Close();
393 
394  } catch (NCBI_NS_STD::exception& e) {
395  ERR_POST(Fatal << "Unable to read parameter file " <<
396  FileName << " with error " << e.what());
397  }
398  }
399 }
400 
401 
402 
403 
404 /////////////////////////////////////////////////////////////////////////////
405 //
406 // CSearch::
407 //
408 // Performs the ms/ms search
409 //
410 
411 
412 CSearch::CSearch(int tNum):
413 UseRankScore(false),
414 Iterative(false),
415 RestrictedSearch(false)
416 {
417  ThreadNum = tNum;
418 }
419 
420 
422 {
423  iSearchGlobal = -1;
424  MaxMZ = 0;
425  SharedPeakSet.Reset(0);
426 }
427 
428 
429 int CSearch::InitBlast(const char *blastdb, bool use_mmap)
430 {
431  if (!blastdb) return 0;
432  rdfp.Reset(new CSeqDB(blastdb, CSeqDB::eProtein,
433  0, 0, use_mmap));
434  numseq = rdfp->GetNumOIDs();
435  return 0;
436 }
437 
438 
439 // create the ladders from sequence
440 
442  int iSearch,
443  int position,
444  int endposition,
445  int *Masses,
446  int iMissed,
447  CAA& AA,
448  int iMod,
449  CMod ModList[],
450  int NumMod)
451 {
453  SetLadderContainer().Begin(Iter);
454  while(Iter != SetLadderContainer().SetLadderMap().end()) {
455  bool NoProline = find(GetSettings()->GetNoprolineions().begin(),
456  GetSettings()->GetNoprolineions().end(),
457  CMSMatchedPeakSetMap::Key2Series(Iter->first)) !=
458  GetSettings()->GetNoprolineions().end();
459  if (!(*(Iter->second))[iMod]->
460  CreateLadder(CMSMatchedPeakSetMap::Key2Series(Iter->first),
462  Sequence,
463  iSearch,
464  position,
465  endposition,
466  Masses[iMissed],
467  MassArray,
468  AA,
469  SetMassAndMask(iMissed, iMod).Mask,
470  ModList,
471  NumMod,
472  *SetSettings(),
473  NoProline
474  )) return 1;
475  SetLadderContainer().Next(Iter);
476  }
477 
478  return 0;
479 }
480 
481 
482 // compare ladders to experiment
484  CMSPeak *Peaks,
485  bool OrLadders,
486  const TMassPeak *MassPeak)
487 {
488  EMSPeakListTypes Which = Peaks->GetWhich(MassPeak->Charge);
489 
490  int ChargeLimitLo(0), ChargeLimitHi(0);
491  if (MassPeak) {
492  if(MassPeak->Charge < Peaks->GetConsiderMult()) {
493  ChargeLimitLo = 1;
494  ChargeLimitHi = 1;
495  }
496  else {
497  ChargeLimitLo = 0;
498  ChargeLimitHi = 0;
499  }
500  }
501 
503  SetLadderContainer().Begin(Iter, ChargeLimitLo, ChargeLimitHi);
504  vector<bool> usedPeaks(Peaks->SetPeakLists()[Which]->GetNum(), false);
505  while(Iter != SetLadderContainer().SetLadderMap().end()) {
506  Peaks->CompareSortedRank(*((*(Iter->second))[iMod]), Which, usedPeaks);
507  SetLadderContainer().Next(Iter, ChargeLimitLo, ChargeLimitHi);
508  }
509  return 0;
510 }
511 
512 
513 // compare ladders to experiment
515  CMSPeak *Peaks,
516  const TMassPeak *MassPeak)
517 {
518  int ChargeLimitLo(0), ChargeLimitHi(0);
519  if (MassPeak) {
520  if(MassPeak->Charge < Peaks->GetConsiderMult()) {
521  ChargeLimitLo = 1;
522  ChargeLimitHi = 1;
523  }
524  else {
525  ChargeLimitLo = 0;
526  ChargeLimitHi = 0;
527  }
528  }
529 
531  SetLadderContainer().Begin(Iter, ChargeLimitLo, ChargeLimitHi);
532  while(Iter != SetLadderContainer().SetLadderMap().end()) {
533  if(Peaks->CompareTop(*((*(Iter->second))[iMod]))) return true;
534  SetLadderContainer().Next(Iter, ChargeLimitLo, ChargeLimitHi);
535  }
536  return false;
537 }
538 
539 
540 const bool
541 CSearch::ReSearch(const int Number) const
542 {
543  if ( GetSettings()->GetIterativesettings().GetResearchthresh() != 0.0) {
544  // look for hitset
545  CRef <CMSHitSet> HitSet;
546  HitSet = GetResponse()->FindHitSet(Number);
547  if (HitSet.IsNull()) return true;
548  if (HitSet->GetHits().empty()) return true;
549  if ((*HitSet->GetHits().begin())->GetEvalue() <=
550  GetSettings()->GetIterativesettings().GetResearchthresh())
551  return false;
552  else return true;
553  }
554  return true;
555 }
556 
558 {
559  return abs(input);
560 }
561 
562 // loads spectra into peaks
563 //void CSearch::Spectrum2Peak(CMSPeakSet& PeakSet)
565 {
566  CSpectrumSet::Tdata::const_iterator iSpectrum;
567  CMSPeak* Peaks;
568 
569  iSpectrum = GetRequest()->GetSpectra().Get().begin();
570  for (; iSpectrum != GetRequest()->GetSpectra().Get().end(); iSpectrum++) {
571  CRef <CMSSpectrum> Spectrum = *iSpectrum;
572  if (!Spectrum) {
573  ERR_POST(Error << "omssa: unable to find spectrum");
574  return;
575  }
576 
577  // reset charges so that they are absolute values. The charge sign is indicated
578  // by GetSettings()->GetChargehandling().GetNegative()
579  transform(Spectrum->SetCharge().begin(), Spectrum->SetCharge().end(), Spectrum->SetCharge().begin(), PositiveSign);
580 
581  // if iterative search and spectrum should not be re-search, skip
582  if (GetIterative() && !ReSearch(Spectrum->GetNumber()))
583  continue;
584 
585  Peaks = new CMSPeak(GetSettings()->GetHitlistlen());
586  if (!Peaks) {
587  ERR_POST(Error << "omssa: unable to allocate CMSPeak");
588  return;
589  }
590 
591  Peaks->ReadAndProcess(*Spectrum, *GetSettings());
592 #if 0
593  {
594  ofstream os("test.dta");
596  }
597 #endif
598  PeakSet->AddPeak(Peaks);
599 
600  }
601  int Numisotopes(0);
602  if(GetSettings()->CanGetNumisotopes())
603  Numisotopes = GetSettings()->GetNumisotopes();
604  bool Pepppm(false);
605  if(GetSettings()->CanGetPepppm())
606  Pepppm = GetSettings()->GetPepppm();
607  MaxMZ = PeakSet->SortPeaks(MSSCALE2INT(GetSettings()->GetPeptol()),
608  GetSettings()->GetZdep(),
609  Numisotopes, Pepppm, GetSettings()->GetChargehandling().GetNegative());
610 
611 }
612 
613 // compares TMassMasks. Lower m/z first in sort.
615  bool operator() (const TMassMask& x, const TMassMask& y)
616  {
617  if (x.Mass < y.Mass) return true;
618  return false;
619  }
620 };
621 
622 /**
623  * delete variable mods that overlap with fixed mods
624  * @param NumMod the number of modifications
625  * @param ModList modification information
626  */
628  CMod ModList[])
629 {
630  int i, j;
631  for (i = 0; i < NumMod; i++) {
632  // if variable mod
633  if (ModList[i].GetFixed() != 1) {
634  // iterate thru all mods for comparison
635  for (j = 0; j < NumMod; j++) {
636  // if fixed and at same site
637  if (ModList[j].GetFixed() == 1 &&
638  ModList[i].GetSite() == ModList[j].GetSite()) {
639  // mark mod for deletion
640  ModList[i].SetFixed() = -1;
641  }
642  } // j loop
643  } // IsFixed
644  } // i loop
645 
646  // now do the deletion
647  for (i = 0; i < NumMod;) {
648  if (ModList[i].GetFixed() == -1) {
649  NumMod--;
650  // if last mod, then just return
651  if (i == NumMod) return;
652  // otherwise, delete the modification
653  for (j=i; j < NumMod; ++j) {
654  ModList[j] = ModList[j+1];
655  }
656  }
657  else i++;
658  }
659  return;
660 }
661 
662 // update sites and masses for new peptide
664  const char *PepStart[],
665  const char *PepEnd[],
666  int NumMod[],
667  CMod ModList[][MAXMOD],
668  int Masses[],
669  int EndMasses[],
670  int NumModSites[],
671  CRef <CMSModSpecSet> &Modset)
672 {
673  // iterate over missed cleavages
674  int iMissed;
675  // maximum mods allowed
676  //int ModMax;
677  // iterate over mods
678  int iMod;
679 
680 
681  // update the longer peptides to add the new peptide (Missed-1) on the end
682  for (iMissed = 0; iMissed < Missed - 1; iMissed++) {
683  // skip start
684  if (PepStart[iMissed] == (const char *)-1) continue;
685  // reset the end sequences
686  PepEnd[iMissed] = PepEnd[Missed - 1];
687 
688  // update new mod masses to add in any new mods from new peptide
689 
690  // first determine the maximum value for updated mod list
691  //if(NumMod[iMissed] + NumMod[Missed-1] >= MAXMOD)
692  // ModMax = MAXMOD - NumMod[iMissed];
693  //else ModMax = NumMod[Missed-1];
694 
695  // now interate thru the new entries
696  const char *OldSite(0);
697  int NumModSitesCount(0), NumModCount(0);
698  for (iMod = 0; iMod < NumMod[Missed-1]; iMod++) {
699 
700  // don't do more than the maximum number of modifications
701  if (NumModCount + NumMod[iMissed] >= MAXMOD) break;
702 
703  // if n-term peptide mod and not at the start of the peptide, don't copy
704  if ((Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modnp ||
705  Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modnpaa) &&
706  PepStart[iMissed] != ModList[Missed-1][iMod].GetSite()) {
707  continue;
708  }
709 
710  // if n-term protein mod, don't copy
711  if (Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modn ||
712  Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modnaa) {
713  continue;
714  }
715 
716  // copy the mod to the old peptide
717  ModList[iMissed][NumModCount + NumMod[iMissed]] =
718  ModList[Missed-1][iMod];
719 
720  // increment site count if not fixed mod and not the same site
721  if (OldSite != ModList[iMissed][NumModCount + NumMod[iMissed]].GetSite() &&
722  ModList[iMissed][NumModCount + NumMod[iMissed]].GetFixed() != 1) {
723  NumModSitesCount++;
724  OldSite = ModList[iMissed][NumModCount + NumMod[iMissed]].GetSite();
725  }
726 
727  // increment number of mods
728  NumModCount++;
729 
730 
731  }
732 
733  // update old masses
734  Masses[iMissed] += Masses[Missed - 1];
735 
736  // update end masses
737  EndMasses[iMissed] = EndMasses[Missed - 1];
738 
739  // update number of Mods
740  NumMod[iMissed] += NumModCount;
741 
742  // update number of Modification Sites
743  NumModSites[iMissed] += NumModSitesCount;
744  }
745 }
746 
747 
748 /**
749  * count the number of unique sites modified
750  *
751  * @param NumModSites the number of unique mod sites
752  * @param NumMod the number of mods
753  * @param ModList modification information
754  */
755 void CSearch::CountModSites(int &NumModSites,
756  int NumMod,
757  CMod ModList[])
758 {
759  NumModSites = 0;
760  int i;
761  const char *OldSite(0);
762 
763  for (i = 0; i < NumMod; i++) {
764  // skip repeated sites and fixed mods
765  if (ModList[i].GetSite() != OldSite && ModList[i].GetFixed() != 1 ) {
766  NumModSites++;
767  OldSite = ModList[i].GetSite();
768  }
769  }
770 }
771 
772 
773 // create the various combinations of mods
775  const char *PepStart[],
776  int Masses[],
777  int EndMasses[],
778  int NumMod[],
779  int NumMassAndMask[],
780  int NumModSites[],
781  CMod ModList[][MAXMOD]
782  )
783 {
784  // need to iterate thru combinations that have iMod.
785  // i.e. iMod = 3 and NumMod=5
786  // 00111, 01011, 10011, 10101, 11001, 11010, 11100, 01101,
787  // 01110
788  // i[0] = 0 --> 5-3, i[1] = i[0]+1 -> 5-2, i[3] = i[1]+1 -> 5-1
789  // then construct bool mask
790 
791  // holders for calculated modification mask and modified peptide masses
792  unsigned Mask, MassOfMask;
793  // iterate thru active mods
794  int iiMod;
795  // keep track of the number of unique masks created. each corresponds to a ladder
796  int iModCount;
797  // missed cleavage
798  int iMissed;
799  // number of mods to consider
800  int iMod;
801  // positions of mods
802  int ModIndex[MAXMOD];
803 
804  // go thru missed cleaves
805  for (iMissed = 0; iMissed < Missed; iMissed++) {
806  // skip start
807  if (PepStart[iMissed] == (const char *)-1) continue;
808  iModCount = 0;
809 
810  // set up non-modified mass
811  SetMassAndMask(iMissed, iModCount).Mass =
812  Masses[iMissed] + EndMasses[iMissed];
813  SetMassAndMask(iMissed, iModCount).Mask = 0;
814 
815  int NumVariable(NumMod[iMissed]); // number of variable mods
816  int NumFixed;
817  // add in fixed mods
818  for (iMod = 0; iMod < NumMod[iMissed]; iMod++) {
819  if (ModList[iMissed][iMod].GetFixed()) {
820  SetMassAndMask(iMissed, iModCount).Mass += ModList[iMissed][iMod].GetPrecursorDelta();
821  SetMassAndMask(iMissed, iModCount).Mask |= 1 << iMod;
822  NumVariable--;
823  }
824  }
825  iModCount++;
826  NumFixed = NumMod[iMissed] - NumVariable;
827 
828  // go thru number of mods allowed
829 // for(iMod = 0; iMod < NumVariable && iModCount < MaxModPerPep; iMod++) {
830  for (iMod = 0; iMod < NumModSites[iMissed] && iModCount < MaxModPerPep; iMod++) {
831 
832  // initialize ModIndex that points to mod sites
833 
834  // todo: ModIndex must always include fixed mods
835 
836  InitModIndex(ModIndex, iMod, NumMod[iMissed],
837  NumModSites[iMissed], ModList[iMissed]);
838  do {
839 
840  // calculate mass
841  MassOfMask = SetMassAndMask(iMissed, 0).Mass;
842  for (iiMod = 0; iiMod <= iMod; iiMod++ )
843  MassOfMask += ModList[iMissed][ModIndex[iiMod + NumFixed]].GetPrecursorDelta();
844  // make bool mask
845  Mask = MakeBoolMask(ModIndex, iMod + NumFixed);
846  // put mass and mask into storage
847  SetMassAndMask(iMissed, iModCount).Mass = MassOfMask;
848  SetMassAndMask(iMissed, iModCount).Mask = Mask;
849 #if 0
850  printf("NumMod = %d iMod = %d, Mask = \n", NumMod[iMissed], iMod);
851  int iii;
852  for (iii=NumMod[iMissed]-1; iii >= 0; iii--) {
853  if (Mask & 1 << iii) printf("1");
854  else printf("0");
855  }
856  printf("\n");
857 #endif
858  // keep track of the number of ladders
859  iModCount++;
860 
861  } while (iModCount < MaxModPerPep &&
862  CalcModIndex(ModIndex, iMod, NumMod[iMissed], NumFixed,
863  NumModSites[iMissed], ModList[iMissed]));
864  } // iMod
865 
866  // if exact mass, add neutrons as appropriate
867  if (SetSettings()->GetPrecursorsearchtype() == eMSSearchType_exact) {
868  int ii;
869  for (ii = 0; ii < iModCount; ++ii) {
870  SetMassAndMask(iMissed, ii).Mass +=
871  SetMassAndMask(iMissed, ii).Mass /
872  MSSCALE2INT(GetSettings()->GetExactmass()) *
874  }
875  }
876 
877 
878  // sort mask and mass by mass
879  sort(MassAndMask.get() + iMissed*MaxModPerPep, MassAndMask.get() + iMissed*MaxModPerPep + iModCount,
880  CMassMaskCompare());
881  // keep track of number of MassAndMask
882  NumMassAndMask[iMissed] = iModCount;
883 
884  } // iMissed
885 }
886 
887 
888 void CSearch::SetIons(list <EMSIonSeries> & Ions)
889 {
890  if (GetSettings()->GetIonstosearch().size() < 1) {
891  ERR_POST(Fatal << "omssa: at least one ions series to search need to be specified");
892  }
893  CMSSearchSettings::TIonstosearch::const_iterator i;
894  i = GetSettings()->GetIonstosearch().begin();
895  for(; i != GetSettings()->GetIonstosearch().end(); ++i) {
896  Ions.push_back(static_cast <EMSIonSeries> (*i));
897  }
898 }
899 
900 
901 void CSearch::InitLadders(list <EMSIonSeries> & Ions)
902 {
903 
904  int MaxLadderSize = GetSettings()->GetMaxproductions();
905  if (MaxLadderSize == 0) MaxLadderSize = kMSLadderMax;
906 
907  int i;
909  list <EMSIonSeries> ::const_iterator iIons;
910 
911  for (iIons = Ions.begin(); iIons != Ions.end(); ++iIons) {
912  for(i = 1; i <= GetSettings()->GetChargehandling().GetMaxproductcharge(); ++i) {
914  push_back(TSeriesChargePairList::value_type(i, *iIons));
915  }
916  }
918 }
919 
920 
922 {
923  SetOidSet().clear();
924  if (GetSettings()->GetIterativesettings().GetSubsetthresh() != 0.0) {
925  SetRestrictedSearch() = true;
926  GetResponse()->
927  GetOidsBelowThreshold(
928  SetOidSet(),
929  GetSettings()->GetIterativesettings().GetSubsetthresh());
930  }
931 }
932 
933 int CSearch::iSearchGlobal = -1;
934 int CSearch::MaxMZ = 0;
938 DEFINE_STATIC_FAST_MUTEX(PeaksExaminedMutex);
939 
941  CRef <CMSResponse> MyResponseIn,
942  CRef <CMSModSpecSet> Modset,
943  CRef <CMSSearchSettings> SettingsIn,
944  TOMSSACallback Callback,
945  void *CallbackData)
946 {
947  initRequestIn = MyRequestIn;
948  initResponseIn = MyResponseIn;
949  initModset = Modset;
950  initSettingsIn = SettingsIn;
951  initCallback = Callback;
952  initCallbackData = CallbackData;
953 }
954 
955 void* CSearch::Main(void)
956 {
959  initModset,
961  initCallback);
962 
963  return new bool(true);
964 }
965 
966 void CSearch::OnExit(void)
967 {
968 }
969 
971 {
972  initRequestIn = fromObj->initRequestIn;
973  initResponseIn = fromObj->initResponseIn;
974  initModset = fromObj->initModset;
975  initSettingsIn = fromObj->initSettingsIn;
976  initCallback = fromObj->initCallback;
978  UseRankScore = fromObj->UseRankScore;
979  Iterative = fromObj->Iterative;
980  numseq = fromObj->numseq;
981  rdfp = fromObj->rdfp;
982 
983 }
984 
986  CRef <CMSResponse> MyResponseIn,
987  CRef <CMSModSpecSet> Modset,
988  CRef <CMSSearchSettings> SettingsIn,
989  TOMSSACallback Callback,
990  void *CallbackData)
991 {
992  try {
993  SetSettings().Reset(SettingsIn);
994  SetRequest().Reset(MyRequestIn);
995  SetResponse().Reset(MyResponseIn);
996 
997  // force the mass scale settings to what is currently used.
1000 
1001  // set up automatic number of peaks per bin for noise filter
1002  if (GetSettings()->GetSinglenum() == 0) {
1003  SetSettings()->SetSinglenum() = GetSettings()->GetIonstosearch().size();
1004  }
1005  if (GetSettings()->GetDoublenum() == 0) {
1006  SetSettings()->SetDoublenum() = GetSettings()->GetIonstosearch().size();
1007  }
1008 
1010  (GetSettings()->GetEnzyme()));
1011 
1012  // do iterative search setup
1013  if (GetIterative()) {
1014  // check to see if the same sequence library
1015  if (GetResponse()->GetDbversion() != Getnumseq())
1016  ERR_POST(Fatal <<
1017  "number of sequences in search library is not the same as previously searched. Unable to do iterative search.");
1018  // if restricted sequence search
1019  // scan thru hits and make map of oids
1020  MakeOidSet();
1021  }
1022 
1023  // set maximum number of ladders to calculate per peptide
1024  MaxModPerPep = GetSettings()->GetMaxmods();
1026 
1027  list <EMSIonSeries> Ions;
1028  SetIons(Ions);
1029  InitLadders(Ions);
1030 
1032  CAA AA;
1033 
1034  int Missed; // number of missed cleaves allowed + 1
1035  if (GetEnzyme()->GetNonSpecific()) Missed = 1;
1036  else Missed = GetSettings()->GetMissedcleave()+1;
1037 
1038  int iMissed; // iterate thru missed cleavages
1039 
1040  int iSearch, hits;
1041  int endposition, position;
1042 
1043  // initialize fixed mods
1044  FixedMods.Init(GetSettings()->GetFixed(), Modset);
1045  MassArray.Init(FixedMods, GetSettings()->GetProductsearchtype(), Modset);
1047  GetSettings()->GetPrecursorsearchtype(), Modset);
1048  // initialize variable mods and set enzyme to use n-term methionine cleavage
1049  SetEnzyme()->SetNMethionine() =
1050  VariableMods.Init(GetSettings()->GetVariable(), Modset) ||
1052 
1053  const int *IntMassArray = MassArray.GetIntMass();
1054  const int *PrecursorIntMassArray = PrecursorMassArray.GetIntMass();
1055  const char *PepStart[MAXMISSEDCLEAVE];
1056  const char *PepEnd[MAXMISSEDCLEAVE];
1057 
1058  // contains informations on individual mod sites
1059  CMod ModList[MAXMISSEDCLEAVE][MAXMOD];
1060 
1061  int NumMod[MAXMISSEDCLEAVE];
1062  // the number of modification sites. always less than NumMod.
1063  int NumModSites[MAXMISSEDCLEAVE];
1064 
1065 
1066  // calculated masses and masks
1068 
1069  // the number of masses and masks for each peptide
1070  int NumMassAndMask[MAXMISSEDCLEAVE];
1071 
1072  // set up mass array, indexed by missed cleavage
1073  // note that EndMasses is the end mass of peptide, kept separate to allow
1074  // reuse of Masses array in missed cleavage calc
1075  int Masses[MAXMISSEDCLEAVE];
1076  int EndMasses[MAXMISSEDCLEAVE];
1077 
1078  int iMod; // used to iterate thru modifications
1079 
1080  bool SequenceDone; // are we done iterating through the sequences?
1081 
1082  const CMSSearchSettings::TTaxids& Tax = GetSettings()->GetTaxids();
1083  CMSSearchSettings::TTaxids::const_iterator iTax;
1084 
1085  CMSHit NewHit; // a new hit of a ladder to an m/z value
1086  CMSHit *NewHitOut; // copy of new hit
1087 
1088  const TMassPeak *MassPeak; // peak currently in consideration
1089  CMSPeak* Peaks;
1090  CIntervalTree::const_iterator im; // iterates over interval tree
1091 
1092  // iterates over ladders
1093  TLadderMap::iterator Iter;
1094 
1095  {{
1096  CFastMutexGuard guard(PeakSetMutex);
1097  if (SharedPeakSet == null) {
1098  SharedPeakSet = new CMSPeakSet();
1100  }
1101  }}
1102  vector <int> taxids;
1103  vector <int>::iterator itaxids;
1104  bool TaxInfo(false); // check to see if any tax information in blast library
1105  bool iSearchNotDone(true);
1106 
1107  // iterate through sequences
1108  //for (iSearch = 0; rdfp->CheckOrFindOID(iSearch); iSearch++) {
1109  while (iSearchNotDone) {
1110  {{
1111  CFastMutexGuard guard(iSearchMutex);
1112  iSearchGlobal++;
1114  iSearchNotDone = false;
1115  continue;
1116  }
1117  iSearch = iSearchGlobal;
1118  if (iSearch % 10000 == 0) {
1119  if(Callback) Callback(Getnumseq(), iSearch, CallbackData);
1120  }
1121  }}
1122 
1123  // if oid restricted search, check to see if oid is in set
1124  if (GetRestrictedSearch() && SetOidSet().find(iSearch) == SetOidSet().end())
1125  continue;
1126 
1127  if (SetSettings()->IsSetTaxids()) {
1128  rdfp->GetTaxIDs(iSearch, taxids, false);
1129  for (itaxids = taxids.begin(); itaxids != taxids.end(); ++itaxids) {
1130  if (*itaxids == 0) continue;
1131  TaxInfo = true;
1132  for (iTax = Tax.begin(); iTax != Tax.end(); ++iTax) {
1133  if (*itaxids == *iTax) goto TaxContinue;
1134  }
1135  }
1136  continue;
1137  }
1138  TaxContinue:
1139  CSeqDBSequence Sequence(rdfp.GetPointer(), iSearch);
1140  SequenceDone = false;
1141 
1142  // initialize missed cleavage matrix
1143  for (iMissed = 0; iMissed < Missed; iMissed++) {
1144  PepStart[iMissed] = (const char *)-1; // mark start
1145  PepEnd[iMissed] = Sequence.GetData();
1146  Masses[iMissed] = 0;
1147  EndMasses[iMissed] = 0;
1148  NumMod[iMissed] = 0;
1149  NumModSites[iMissed] = 0;
1150 
1151  ModList[iMissed][0].Reset();
1152  }
1153  PepStart[Missed - 1] = Sequence.GetData();
1154 
1155  // if non-specific enzyme, set stop point
1156  if (SetEnzyme()->GetNonSpecific()) {
1157  SetEnzyme()->SetStop() = Sequence.GetData() + SetSettings()->GetMinnoenzyme() - 1;
1158  }
1159 
1160  // iterate thru the sequence by digesting it
1161  while (!SequenceDone) {
1162 
1163 
1164  // zero out no missed cleavage peptide mass and mods
1165  // note that Masses and EndMass are separate to reuse
1166  // masses during the missed cleavage calculation
1167  Masses[Missed - 1] = 0;
1168  EndMasses[Missed - 1] = 0;
1169  NumMod[Missed - 1] = 0;
1170  NumModSites[Missed - 1] = 0;
1171  // init no modification elements
1172  ModList[Missed - 1][0].Reset();
1173 
1174  // calculate new stop and mass
1175  SequenceDone =
1176  SetEnzyme()->CalcAndCut(Sequence.GetData(),
1177  Sequence.GetData() + Sequence.GetLength() - 1,
1178  &(PepEnd[Missed - 1]),
1179  &(Masses[Missed - 1]),
1180  NumMod[Missed - 1],
1181  MAXMOD,
1182  &(EndMasses[Missed - 1]),
1184  ModList[Missed - 1],
1185  IntMassArray,
1186  PrecursorIntMassArray,
1187  Modset,
1188  SetSettings()->GetMaxproductions()
1189  );
1190 
1191  // delete variable mods that overlap with fixed mods
1192  DeleteVariableOverlap(NumMod[Missed - 1],
1193  ModList[Missed - 1]);
1194 
1195  // count the number of unique sites modified
1196  CountModSites(NumModSites[Missed - 1],
1197  NumMod[Missed - 1],
1198  ModList[Missed - 1]);
1199 
1200  UpdateWithNewPep(Missed, PepStart, PepEnd, NumMod, ModList,
1201  Masses, EndMasses, NumModSites, Modset);
1202 
1203  CreateModCombinations(Missed, PepStart, Masses,
1204  EndMasses, NumMod, NumMassAndMask,
1205  NumModSites, ModList);
1206 
1207 
1208  int OldMass; // keeps the old peptide mass for comparison
1209  bool NoMassMatch; // was there a match to the old mass?
1210 
1211  for (iMissed = 0; iMissed < Missed; iMissed++) {
1212  if (PepStart[iMissed] == (const char *)-1) continue; // skip start
1213 
1214  // get the start and stop position, inclusive, of the peptide
1215  position = PepStart[iMissed] - Sequence.GetData();
1216  endposition = PepEnd[iMissed] - Sequence.GetData();
1217 
1218  // init bool for "Has ladder been calculated?"
1219  ClearLadderCalc(NumMassAndMask[iMissed]);
1220 
1221  OldMass = 0;
1222  NoMassMatch = true;
1223 
1224  // go thru total number of mods
1225  for (iMod = 0; iMod < NumMassAndMask[iMissed]; iMod++) {
1226 
1227  // have we seen this mass before?
1228  if (SetMassAndMask(iMissed, iMod).Mass == OldMass &&
1229  NoMassMatch) continue;
1230  NoMassMatch = true;
1231  OldMass = SetMassAndMask(iMissed, iMod).Mass;
1232 
1233  // return peaks where theoretical mass is <= precursor mass + tol
1234  // and >= precursor mass - tol
1235  if (!SetEnzyme()->GetTopDown())
1237  // if top-down enzyme, skip the interval tree match
1238  else
1240 
1241  for (; im; ++im ) {
1242  MassPeak = static_cast <const TMassPeak *> (im.GetValue().GetPointerOrNull());
1243 
1244  Peaks = MassPeak->Peak;
1245  // make sure we look thru other mod masks with the same mass
1246  NoMassMatch = false;
1247 
1248  if (!GetLadderCalc(iMod)) {
1249  if (CreateLadders(Sequence.GetData(),
1250  iSearch,
1251  position,
1252  endposition,
1253  Masses,
1254  iMissed,
1255  AA,
1256  iMod,
1257  ModList[iMissed],
1258  NumMod[iMissed]) != 0) continue;
1259  SetLadderCalc(iMod) = true;
1260  // continue to next sequence if ladders not successfully made
1261  }
1262  else {
1263  TLadderMap::iterator Iter;
1264  SetLadderContainer().Begin(Iter);
1265  while(Iter != SetLadderContainer().SetLadderMap().end()) {
1266  (*(Iter->second))[iMod]->ClearHits();
1267  SetLadderContainer().Next(Iter);
1268  }
1269  }
1270 
1271  if (UseRankScore) {
1272  {{
1273  CFastMutexGuard guard(PeaksExaminedMutex);
1274  Peaks->SetPeptidesExamined(MassPeak->Charge)++;
1275  }}
1276  }
1277  if (CompareLaddersTop(iMod,
1278  Peaks,
1279  MassPeak)
1280  ) {
1281  if (!UseRankScore) {
1282  {{
1283  CFastMutexGuard guard(PeaksExaminedMutex);
1284  Peaks->SetPeptidesExamined(MassPeak->Charge)++;
1285  }}
1286  }
1287  CompareLadders(iMod,
1288  Peaks,
1289  false,
1290  MassPeak);
1291  hits = 0;
1292  SetLadderContainer().Begin(Iter);
1293  while(Iter != SetLadderContainer().SetLadderMap().end()) {
1294  hits += (*(Iter->second))[iMod]->HitCount();
1295  SetLadderContainer().Next(Iter);
1296  }
1297 
1298 
1299  {{
1300  CFastMutexGuard guard(PeakSetMutex);
1301  if (hits >= SetSettings()->GetMinhit()) {
1302  // need to save mods. bool map?
1303  NewHit.SetHits() = hits;
1304  NewHit.SetCharge() = MassPeak->Charge;
1305  // only record if hit kept
1306  if (Peaks->AddHit(NewHit, NewHitOut)) {
1307  NewHitOut->SetStart() = position;
1308  NewHitOut->SetStop() = endposition;
1309  NewHitOut->SetSeqIndex() = iSearch;
1310  NewHitOut->SetExpMass() = MassPeak->Mass;
1311  // record the hits
1312  NewHitOut->
1313  RecordMatches(SetLadderContainer(),
1314  iMod,
1315  Peaks,
1316  SetMassAndMask(iMissed, iMod).Mask,
1317  ModList[iMissed],
1318  NumMod[iMissed],
1319  PepStart[iMissed],
1320  SetSettings()->GetSearchctermproduct(),
1321  SetSettings()->GetSearchb1(),
1322  SetMassAndMask(iMissed, iMod).Mass
1323  );
1324  }
1325  }
1326  }}
1327  } // new addition
1328  } // MassPeak
1329  } //iMod
1330  } // iMissed
1331  if (SetEnzyme()->GetNonSpecific()) {
1332  int NonSpecificMass(Masses[0] + EndMasses[0]);
1333  PartialLoop:
1334 
1335  // check that stop is within bounds
1336  //// upper bound is max precursor mass divided by lightest AA
1337  //// if(enzyme->GetStop() - PepStart[0] < MaxMZ/MonoMass[7]/MSSCALE &&
1338  // upper bound redefined so that minimum mass of existing peptide
1339  // is less than the max precursor mass minus the mass of glycine
1340  // assumes that any mods have positive mass
1341 
1342  // argghh, doesn't work for semi-tryptic, which resets the mass
1343  // need to use different criterion if semi-tryptic and start position was
1344  // moved. otherwise this criterion is OK
1345  if (NonSpecificMass < MaxMZ /*- MSSCALE2INT(MonoMass[7]) */&&
1346  SetEnzyme()->GetStop() < Sequence.GetData() + Sequence.GetLength() - 1 /*-1 added*/ &&
1347  (SetSettings()->GetMaxnoenzyme() == 0 ||
1348  SetEnzyme()->GetStop() - PepStart[0] + 1 < SetSettings()->GetMaxnoenzyme())
1349  ) {
1350  SetEnzyme()->SetStop()++;
1351  NonSpecificMass += PrecursorIntMassArray[AA.GetMap()[*(SetEnzyme()->GetStop())]];
1352  }
1353  // reset to new start with minimum size
1354  else if ( PepStart[0] < Sequence.GetData() + Sequence.GetLength() -
1355  SetSettings()->GetMinnoenzyme()) {
1356  PepStart[0]++;
1357  SetEnzyme()->SetStop() = PepStart[0] + SetSettings()->GetMinnoenzyme() - 1;
1358 
1359  // reset mass
1360  NonSpecificMass = 0;
1361  const char *iSeqChar;
1362  for (iSeqChar = PepStart[0]; iSeqChar <= SetEnzyme()->GetStop(); iSeqChar++)
1363  NonSpecificMass += PrecursorIntMassArray[AA.GetMap()[*iSeqChar]];
1364  // reset sequence done flag if at end of sequence
1365  SequenceDone = false;
1366  }
1367  else SequenceDone = true;
1368 
1369  // if this is partial tryptic, loop back if one end or the other is not tryptic
1370  // for start, need to check sequence before (check for start of seq)
1371  // for end, need to deal with end of protein case
1372  if (!SequenceDone && SetEnzyme()->GetCleaveNum() > 0 &&
1373  PepStart[0] != Sequence.GetData() &&
1374  SetEnzyme()->GetStop() != Sequence.GetData() + Sequence.GetLength() - 1 /* -1 added */ ) {
1375  if (!SetEnzyme()->CheckCleaveChar(PepStart[0]-1) &&
1377  goto PartialLoop;
1378  }
1379 
1380  PepEnd[0] = PepStart[0];
1381  }
1382  else {
1383  if (!SequenceDone) {
1384  int NumModCount;
1385  const char *OldSite;
1386  int NumModSitesCount;
1387  // get rid of longest peptide and move the other peptides down the line
1388  for (iMissed = 0; iMissed < Missed - 1; iMissed++) {
1389  // move masses to next missed cleavage
1390  Masses[iMissed] = Masses[iMissed + 1];
1391  // don't move EndMasses as they are recalculated
1392 
1393  // move the modification data
1394  NumModCount = 0;
1395  OldSite = 0;
1396  NumModSitesCount = 0;
1397  for (iMod = 0; iMod < NumMod[iMissed + 1]; iMod++) {
1398  // throw away the c term peptide mods as we have a new c terminus
1399  if (Modset->GetModType(ModList[iMissed + 1][iMod].GetEnum()) != eMSModType_modcp &&
1400  Modset->GetModType(ModList[iMissed + 1][iMod].GetEnum()) != eMSModType_modcpaa) {
1401  ModList[iMissed][NumModCount] = ModList[iMissed + 1][iMod];
1402  NumModCount++;
1403  // increment mod site count if new site and not fixed mod
1404  if (OldSite != ModList[iMissed + 1][iMod].GetSite() &&
1405  ModList[iMissed + 1][iMod].GetFixed() != 1) {
1406  NumModSitesCount++;
1407  OldSite = ModList[iMissed + 1][iMod].GetSite();
1408  }
1409  }
1410  }
1411  NumMod[iMissed] = NumModCount;
1412  NumModSites[iMissed] = NumModSitesCount;
1413 
1414  // copy starts to next missed cleavage
1415  PepStart[iMissed] = PepStart[iMissed + 1];
1416  }
1417 
1418  // init new start from old stop
1419  PepEnd[Missed-1] += 1;
1420  PepStart[Missed-1] = PepEnd[Missed-1];
1421  }
1422  }
1423 
1424  }
1425 
1426 
1427  }
1428 
1429 
1430  if (GetSettings()->IsSetTaxids() && !TaxInfo)
1431  ERR_POST(Warning <<
1432  "Taxonomically restricted search specified and no matching organisms found in sequence library. Did you use a sequence library with taxonomic information?");
1433 
1434  }
1435  catch (NCBI_NS_STD::exception& e) {
1436  ERR_POST(Info << "Exception caught in CSearch::Search: " << e.what());
1437  throw;
1438  }
1439 
1440  //return PeakSet;
1441 }
1442 
1443 ///
1444 /// Adds modification information to hitset
1445 ///
1446 
1448 {
1449  int i;
1450  for (i = 0; i < MSHit->GetNumModInfo(); i++) {
1451  // screen out fixed mods
1452  if (MSHit->GetModInfo(i).GetIsFixed() == 1) continue;
1453  CRef< CMSModHit > ModHit(new CMSModHit);
1454  ModHit->SetSite() = MSHit->GetModInfo(i).GetSite();
1455  ModHit->SetModtype() = MSHit->GetModInfo(i).GetModEnum() ;
1456  Hit->SetMods().push_back(ModHit);
1457  }
1458 }
1459 
1460 
1461 ///
1462 /// Adds ion information to hitset
1463 ///
1464 
1466 {
1467  int i;
1468  for (i = 0; i < MSHit->GetHits(); i++) {
1469  CRef<CMSMZHit> IonHit(new CMSMZHit);
1470  IonHit->SetIon() = MSHit->GetHitInfo(i).GetIonSeries();
1471  IonHit->SetCharge() = MSHit->GetHitInfo(i).GetCharge();
1472  IonHit->SetNumber() = MSHit->GetHitInfo(i).GetNumber();
1473  IonHit->SetMz() = MSHit->GetHitInfo(i).GetMZ();
1474  Hit->SetMzhits().push_back(IonHit);
1475  }
1476 }
1477 
1478 
1479 ///
1480 /// Makes a string hashed out of the sequence plus mods
1481 ///
1482 
1483 void CSearch::MakeModString(string& seqstring, string& modseqstring, CMSHit *MSHit)
1484 {
1485  int i;
1486  modseqstring = seqstring;
1487  for (i = 0; i < MSHit->GetNumModInfo(); i++) {
1488  modseqstring += NStr::IntToString(MSHit->GetModInfo(i).GetSite()) + ":" +
1489  NStr::IntToString(MSHit->GetModInfo(i).GetModEnum()) + ",";
1490  }
1491 }
1492 
1493 
1495  int Stop,
1496  string &seqstring,
1498 {
1499  int iseq;
1500  seqstring.erase();
1501 
1502  for (iseq = Start; iseq <= Stop; iseq++) {
1503  seqstring += UniqueAA[Sequence.GetData()[iseq]];
1504  }
1505 }
1506 
1507 
1509 {
1510 
1511  double ThreshStart = GetSettings()->GetCutlo();
1512  double ThreshEnd = GetSettings()->GetCuthi();
1513  double ThreshInc = GetSettings()->GetCutinc();
1514  double Evalcutoff = GetSettings()->GetCutoff();
1515 
1516  CMSPeak* Peaks;
1517 
1518  TScoreList ScoreList;
1519  TScoreList::iterator iScoreList;
1520  CMSHit * MSHit;
1521 
1522  // set the search library version
1524 
1525  // Reset the oid set for tracking results
1526  SetOidSet().clear();
1527 
1528  while(!PeakSet->GetPeaks().empty()) {
1529  Peaks = *(PeakSet->GetPeaks().begin());
1530 
1531  // add to hitset
1532  CRef< CMSHitSet > HitSet(null);
1533 
1534  // if iterative search, try to find hitset
1535  if (GetIterative()) {
1536  HitSet = SetResponse()->FindHitSet(Peaks->GetNumber());
1537  if (HitSet.IsNull())
1538  ERR_POST(Warning << "unable to find matching hitset");
1539  }
1540 
1541  // create a hitset if necessary
1542  if (HitSet.IsNull()) {
1543  HitSet = new CMSHitSet;
1544  if (!HitSet) {
1545  ERR_POST(Error << "omssa: unable to allocate hitset");
1546  return;
1547  }
1548  HitSet->SetNumber(Peaks->GetNumber());
1549  HitSet->SetIds() = Peaks->GetName();
1550  SetResponse()->SetHitsets().push_back(HitSet);
1551  }
1552  HitSet->SetSettingid() = GetSettings()->GetSettingid();
1553 
1554  // if there weren't enough peaks to do a search, note in error status
1555  if (Peaks->GetError() == eMSHitError_notenuffpeaks) {
1556  _TRACE("empty set");
1557  HitSet->SetError(eMSHitError_notenuffpeaks);
1558  ScoreList.clear();
1559  delete *(PeakSet->GetPeaks().begin());
1560  PeakSet->GetPeaks().pop_front();
1561  continue;
1562  }
1563 
1564  double Threshold, MinThreshold(ThreshStart), MinEval(1000000.0L);
1565  if (!UseRankScore) {
1566  // now calculate scores and sort
1567  for (Threshold = ThreshStart; Threshold <= ThreshEnd;
1568  Threshold += ThreshInc) {
1569  CalcNSort(ScoreList, Threshold, Peaks);
1570  if (!ScoreList.empty()) {
1571  _TRACE("Threshold = " << Threshold <<
1572  "EVal = " << ScoreList.begin()->first);
1573  }
1574  if (!ScoreList.empty() && ScoreList.begin()->first < MinEval) {
1575  MinEval = ScoreList.begin()->first;
1576  MinThreshold = Threshold;
1577  }
1578  ScoreList.clear();
1579  }
1580  }
1581  _TRACE("Min Threshold = " << MinThreshold);
1582  CalcNSort(ScoreList,
1583  MinThreshold,
1584  Peaks);
1585 
1586  // if iterative search, check to see if hitset needs to be replaced
1587  if (GetIterative() && !ScoreList.empty()) {
1588  if ((GetSettings()->GetIterativesettings().GetReplacethresh() == 0.0 &&
1589  (HitSet->GetHits().empty() ||
1590  ScoreList.begin()->first <= (*HitSet->GetHits().begin())->GetEvalue())) ||
1591  (GetSettings()->GetIterativesettings().GetReplacethresh() != 0.0 &&
1592  ScoreList.begin()->first <= GetSettings()->GetIterativesettings().GetReplacethresh())) {
1593  HitSet->SetHits().clear();
1594  }
1595  else {
1596  ScoreList.clear();
1597  delete *(PeakSet->GetPeaks().begin());
1598  PeakSet->GetPeaks().pop_front();
1599  continue;
1600  }
1601  }
1602 
1603  const CMSSearchSettings::TTaxids& Tax = GetSettings()->GetTaxids();
1604  CMSSearchSettings::TTaxids::const_iterator iTax;
1605 
1606  // keep a list of redundant peptides
1607  map <string, CMSHits * > PepDone;
1608  int HitNum(0);
1609  // add to hitset by score
1610  for (iScoreList = ScoreList.begin();
1611  iScoreList != ScoreList.end();
1612  ++iScoreList,++HitNum) {
1613 
1614  double Score = iScoreList->first;
1615  if (Score > Evalcutoff)
1616  continue;
1617  if(GetSettings()->CanGetReportedhitcount())
1618  if(GetSettings()->GetReportedhitcount() != 0 && HitNum >= GetSettings()->GetReportedhitcount())
1619  continue;
1620 
1621  CMSHits * Hit;
1622  CMSPepHit * Pephit;
1623 
1624  MSHit = iScoreList->second;
1625 
1626  CBlast_def_line_set::Tdata::const_iterator iDefLine;
1628  // scan taxids
1629  for (iDefLine = Hdr->Get().begin();
1630  iDefLine != Hdr->Get().end();
1631  ++iDefLine) {
1632  if (GetSettings()->IsSetTaxids()) {
1633  for (iTax = Tax.begin(); iTax != Tax.end(); iTax++) {
1634  if ((*iDefLine)->GetTaxid() == *iTax) goto TaxContinue2;
1635  }
1636  continue;
1637  }
1638  TaxContinue2:
1639  string seqstring, modseqstring;
1640 
1641  // keep a list of the oids
1642  SetOidSet().insert(MSHit->GetSeqIndex());
1643  // get the sequence
1645 
1646  string tempstartstop;
1647  CreateSequence(MSHit->GetStart(), MSHit->GetStop(),
1648  seqstring, Sequence);
1649  MakeModString(seqstring, modseqstring, MSHit);
1650 
1651  if (PepDone.find(modseqstring) != PepDone.end()) {
1652  Hit = PepDone[modseqstring];
1653  }
1654  else {
1655  Hit = new CMSHits;
1656  Hit->SetTheomass(MSHit->GetTheoreticalMass());
1657  Hit->SetPepstring(seqstring);
1658  // set the start AA, if there is one
1659  if (MSHit->GetStart() > 0) {
1660  tempstartstop = UniqueAA[Sequence.GetData()[MSHit->GetStart()-1]];
1661  Hit->SetPepstart(tempstartstop);
1662  }
1663  else Hit->SetPepstart("");
1664 
1665  // set the end AA, if there is one
1666  if (MSHit->GetStop() < Sequence.GetLength() - 1) {
1667  tempstartstop = UniqueAA[Sequence.GetData()[MSHit->GetStop()+1]];
1668  Hit->SetPepstop(tempstartstop);
1669  }
1670  else Hit->SetPepstop("");
1671 
1672  if (isnan(Score)) {
1673  ERR_POST(Info << "Not a number in hitset " <<
1674  HitSet->GetNumber() <<
1675  " peptide " << modseqstring);
1676  Score = kHighEval;
1677  }
1678  else if (!finite(Score)) {
1679  ERR_POST(Info << "Infinite number in hitset " <<
1680  HitSet->GetNumber() <<
1681  " peptide " << modseqstring);
1682  Score = kHighEval;
1683  }
1684  Hit->SetEvalue(Score);
1685  Hit->SetPvalue(Score/Peaks->
1686  GetPeptidesExamined(MSHit->
1687  GetCharge()));
1688  Hit->SetCharge(MSHit->GetCharge());
1689  Hit->SetMass(MSHit->GetExpMass());
1690  // insert mods here
1691  AddModsToHit(Hit, MSHit);
1692  // insert ions here
1693  AddIonsToHit(Hit, MSHit);
1694  CRef<CMSHits> hitref(Hit);
1695  HitSet->SetHits().push_back(hitref);
1696  PepDone[modseqstring] = Hit;
1697 
1698  }
1699 
1700  Pephit = new CMSPepHit;
1701 
1702  if ((*iDefLine)->CanGetSeqid()) {
1703  // find a gi
1704  ITERATE(list< CRef<CSeq_id> >, seqid, (*iDefLine)->GetSeqid()) {
1705  if ((**seqid).IsGi()) {
1706  Pephit->SetGi((**seqid).GetGi());
1707  break;
1708  }
1709  }
1710 
1711  Pephit->SetAccession(
1712  FindBestChoice((*iDefLine)->GetSeqid(), CSeq_id::Score)->
1713  GetSeqIdString(false));
1714  }
1715 
1716 
1717  Pephit->SetStart(MSHit->GetStart());
1718  Pephit->SetStop(MSHit->GetStop());;
1719  Pephit->SetDefline((*iDefLine)->GetTitle());
1720  Pephit->SetProtlength(Sequence.GetLength());
1721  Pephit->SetOid(MSHit->GetSeqIndex());
1722  CRef<CMSPepHit> pepref(Pephit);
1723  Hit->SetPephits().push_back(pepref);
1724 
1725  }
1726  }
1727  ScoreList.clear();
1728  delete *(PeakSet->GetPeaks().begin());
1729  PeakSet->GetPeaks().pop_front();
1730  }
1731  // write bioseqs to output
1732  WriteBioseqs();
1733 }
1734 
1735 
1737 {
1739  CConstRef <CMSBioseq::TSeq> Bioseq(SetResponse()->SetBioseqs().GetBioseqByOid(*iOids));
1740  if (Bioseq.IsNull()) {
1741  CRef <CMSBioseq> MSBioseq (new CMSBioseq);
1742  MSBioseq->SetSeq(*rdfp->GetBioseq(*iOids));
1743  MSBioseq->SetOid() = *iOids;
1744  SetResponse()->SetBioseqs().Set().push_back(MSBioseq);
1745  }
1746  }
1747 }
1748 
1749 
1751  int SeriesCharge,
1752  int Ion,
1753  int minintensity,
1754  int Which,
1755  CMSPeak *Peaks,
1756  int Maxproductions)
1757 {
1758  int iii;
1759  int lowmz(0), highmz;
1760 
1761  unsigned Size = Hit.GetStop() - Hit.GetStart();
1762  if (Maxproductions == 0) Maxproductions = kMSLadderMax;
1763 
1764 
1765  // decide if there is any terminal bias
1766  EMSTerminalBias TerminalBias(eMSNoTerminalBias);
1767 
1768  for(iii = 0; iii < GetEnzyme()->GetCleaveNum(); ++iii) {
1769  // n term
1770  if(GetEnzyme()->GetCleaveOffset()[iii] == 1 ) {
1771  // check to see if should be biases on both ends
1772  if(TerminalBias == eMSNTerminalBias || TerminalBias == eMSNoTerminalBias)
1773  TerminalBias = eMSNTerminalBias;
1774  else
1775  TerminalBias = eMSBothTerminalBias;
1776  }
1777  // c term
1778  else if (GetEnzyme()->GetCleaveOffset()[iii] == 0 ) {
1779  // check to see if should be biases on both ends
1780  if(TerminalBias == eMSCTerminalBias || TerminalBias == eMSNoTerminalBias)
1781  TerminalBias = eMSCTerminalBias;
1782  else
1783  TerminalBias = eMSBothTerminalBias;
1784  }
1785  }
1786 
1787 //#if 0
1788  // make a copy of the peptide sequence
1790  string seqstring;
1791  CreateSequence(Hit.GetStart(),
1792  Hit.GetStop(),
1793  seqstring,
1794  Sequence);
1795 //#endif
1796  bool NoProline = find(GetSettings()->GetNoprolineions().begin(),
1797  GetSettings()->GetNoprolineions().end(),
1798  Ion) !=
1799  GetSettings()->GetNoprolineions().end();
1800  // fill in the matched ions
1801  Hit.FillMatchedPeaks(SeriesCharge,
1802  Ion,
1803  Size,
1804  minintensity,
1805  false,
1806  TerminalBias,
1807  SeriesCharge*Maxproductions
1808 //#if 0
1809  ,
1810  seqstring,
1811  NoProline
1812 //#endif
1813  );
1814  CMSMatchedPeakSet *MatchPeakSet = Hit.SetIonSeriesMatchMap().SetSeries(SeriesCharge, Ion);
1815  TMatchedPeakSet::iterator bin, prev, next;
1816 
1817  for ( bin = MatchPeakSet->SetMatchedPeakSet().begin(); bin != MatchPeakSet->SetMatchedPeakSet().end(); ++bin) {
1818  // need to go thru match info, not hit info.
1819  if(bin != MatchPeakSet->SetMatchedPeakSet().begin()) {
1820  lowmz = ((*bin)->GetMZ() + (*prev)->GetMZ())/2;
1821  }
1822  next = bin;
1823  ++next;
1824  if(next != MatchPeakSet->SetMatchedPeakSet().end()) {
1825  highmz = ((*bin)->GetMZ() + (*next)->GetMZ())/2;
1826  }
1827  else highmz = Hit.GetExpMass()/SeriesCharge;
1828  (*bin)->SetExpIons() =
1829  Peaks->CountMZRange(lowmz,
1830  highmz,
1831  minintensity,
1832  Which) /
1833  (double)(highmz - lowmz);
1834 
1835  (*bin)->SetMassTolerance() = (Peaks->GetTol())/SeriesCharge;
1836  prev = bin;
1837  }
1838  return MatchPeakSet;
1839 }
1840 
1841 
1842 
1843 
1845  CMSHit& Hit,
1846  EMSPeakListTypes Which,
1847  int minintensity,
1848  const TSeriesChargePairList::const_iterator &iPairList,
1849  list<CMSMatchedPeakSet *> &Forward,
1850  list<CMSMatchedPeakSet *> &Backward)
1851 {
1852  CMSMatchedPeakSet * current;
1853 
1854  current = PepCharge(Hit,
1855  iPairList->first,
1856  iPairList->second,
1857  minintensity,
1858  Which,
1859  Peaks,
1860  GetSettings()->GetMaxproductions());
1861 
1862  if (kIonDirection[iPairList->second] == 1)
1863  Forward.push_back(current);
1864  else if (kIonDirection[iPairList->second] == -1)
1865  Backward.push_back(current);
1866 }
1867 
1868 
1869 void CSearch::DoubleCompare(list<CMSMatchedPeakSet *> &SingleForward,
1870  list<CMSMatchedPeakSet *> &SingleBackward,
1871  list<CMSMatchedPeakSet *> &Double,
1872  bool DoubleForward)
1873 {
1874  list<CMSMatchedPeakSet *>::iterator iDouble, iFront, iBack;
1875 
1876  for (iDouble = Double.begin(); iDouble != Double.end(); ++iDouble) {
1877 
1878  for(iFront = SingleForward.begin(); iFront != SingleForward.end(); ++iFront) {
1879  (*iDouble)->Compare(*iFront, DoubleForward);
1880  }
1881 
1882  for(iBack = SingleBackward.begin(); iBack != SingleBackward.end(); ++iBack) {
1883  (*iDouble)->Compare(*iBack, !DoubleForward);
1884  }
1885  }
1886 }
1887 
1888 
1890  double Threshold,
1891  CMSPeak* Peaks
1892  )
1893 {
1894  int iCharges;
1895  int iHitList;
1896  int Tophitnum = GetSettings()->GetTophitnum();
1897 
1898  for (iCharges = 0; iCharges < Peaks->GetNumCharges(); iCharges++) {
1899 
1900  TMSHitList& HitList = Peaks->GetHitList(iCharges);
1901  for (iHitList = 0; iHitList != Peaks->GetHitListIndex(iCharges);
1902  iHitList++) {
1903 
1904  int tempMass = HitList[iHitList].GetExpMass();
1905  int Charge = HitList[iHitList].GetCharge();
1906  EMSPeakListTypes Which = Peaks->GetWhich(Charge);
1907 
1908  // set up new score
1909 
1910 
1911  // minimum intensity
1912  int minintensity = static_cast <int> (Threshold * Peaks->GetMaxI(Which));
1913 
1914 
1915  TSeriesChargePairList::const_iterator iPairList;
1916  list <CMSMatchedPeakSet *> SingleForward, SingleBackward, DoubleForward, DoubleBackward;
1917 
1918  for (iPairList = SetLadderContainer().GetSeriesChargePairList().begin();
1919  iPairList != SetLadderContainer().GetSeriesChargePairList().end();
1920  ++iPairList) {
1921 
1922  // charge 1
1923  if (iPairList->first == 1) {
1924  MatchAndSort(Peaks, HitList[iHitList], Which, minintensity,
1925  iPairList, SingleForward, SingleBackward);
1926  }
1927  else if (Charge >= Peaks->GetConsiderMult()) {
1928  MatchAndSort(Peaks, HitList[iHitList], Which, minintensity,
1929  iPairList, DoubleForward, DoubleBackward);
1930  }
1931  }
1932 
1933  list <CMSMatchedPeakSet *> ::iterator iFront, iBack, iDouble;
1934 
1935  if(GetSettings()->GetNocorrelationscore() == 0) {
1936  // do the singly charge comparison
1937  for (iFront = SingleForward.begin(); iFront != SingleForward.end(); ++iFront) {
1938  for(iBack = SingleBackward.begin(); iBack != SingleBackward.end(); ++iBack) {
1939  (*iFront)->Compare(*iBack, false);
1940  }
1941  }
1942  if (Charge >= Peaks->GetConsiderMult()) {
1943  DoubleCompare(SingleForward, SingleBackward, DoubleForward, true);
1944  DoubleCompare(SingleForward, SingleBackward, DoubleBackward, false);
1945  }
1946  }
1947 
1948 
1949  double adjust = HitList[iHitList].GetMaxDelta() /
1950  MSSCALE2INT(GetSettings()->GetMsmstol());
1951  if(adjust < GetSettings()->GetAutomassadjust())
1952  adjust = GetSettings()->GetAutomassadjust();
1953  if(adjust > 1.0)
1954  adjust = 1.0;
1955  double a =
1956  HitList[iHitList].CalcPoissonMean(GetSettings()->GetProbfollowingion(),
1957  GetEnzyme()->GetCleaveNum(),
1958  GetSettings()->GetProbfollowingion(),
1959  19,
1960  adjust);
1961 
1962  if (a == 0) {
1963  // threshold probably too high
1964  continue;
1965  }
1966  if (a < 0 ) {
1967  _TRACE("poisson mean is < 0");
1968  continue;
1969  }
1970  else if (isnan(a) || !finite(a)) {
1971  ERR_POST(Info << "poisson mean is NaN or is infinite");
1972  continue;
1973  }
1974 
1975  // keep going if obviously insignificant
1976  if (HitList[iHitList].GetHits() < a) continue;
1977 
1978  double pval; // statistical p-value
1979  int N; // number of peptides
1980  N = Peaks->GetPeptidesExamined(Charge) +
1981  (GetSettings()->GetZdep() * (Charge - 1) + 1) *
1982  GetSettings()->GetPseudocount();
1983 
1984  if (!UseRankScore) {
1985  int High, Low, NumPeaks, NumLo, NumHi;
1986  Peaks->HighLow(High, Low, NumPeaks, tempMass, Charge, Threshold, NumLo, NumHi);
1987 
1988  double TopHitProb = ((double)Tophitnum)/NumPeaks;
1989  // correct for situation where more tophits than experimental peaks
1990  if (TopHitProb > 1.0) TopHitProb = 1.0;
1991  int numhits = HitList[iHitList].CountHits(Threshold, Peaks->GetMaxI(Which));
1992  double Normal = HitList[iHitList].CalcNormalTopHit(a, TopHitProb);
1993  pval = HitList[iHitList].CalcPvalueTopHit(a, numhits, Normal, TopHitProb);
1994  }
1995  else {
1996  pval = HitList[iHitList].CalcPvalue(a, HitList[iHitList].CountHits(Threshold, Peaks->GetMaxI(Which)));
1997  }
1998  if (UseRankScore && !GetPoissonOnly()) {
1999  if (HitList[iHitList].GetM() != 0.0) {
2000  double Perf = HitList[iHitList].CalcRankProb();
2001  _TRACE( "Perf=" << Perf << " pval=" << pval << " N=" << N );
2002  pval *= Perf;
2003  pval *= 10.0; // correction to scales
2004  }
2005  else ERR_POST(Info << "M is zero");
2006  }
2007  double eval = 3e3 * pval * N;
2008 // _TRACE( " pval=" << pval << " eval=" << eval );
2009  ScoreList.insert(pair<const double, CMSHit *>
2010  (eval, &(HitList[iHitList])));
2011  }
2012  }
2013 }
2014 
2016 {
2017 }
2018 
2019 
const char *const UniqueAA
Definition: MSMod.hpp:58
#define MSSCALE2INT(x)
Definition: MSMod.hpp:64
#define MSSCALE
Definition: MSMod.hpp:63
void transform(Container &c, UnaryFunction *op)
Definition: chainer.hpp:86
Definition: msms.hpp:143
const char *const GetMap(void) const
return the map for translating AA char to AA number
Definition: msms.hpp:160
CBZip2StreamCompressor – bzip2 based compression stream processor.
Definition: bzip2.hpp:516
CBZip2StreamDecompressor – bzip2 based decompression stream processor.
Definition: bzip2.hpp:565
static CRef< CCleave > CleaveFactory(const EMSEnzymes enzyme)
Simple factory to return back object for enzyme.
Definition: msms.cpp:545
const char *& SetStop(void)
Set the enzyme stop value.
Definition: msms.hpp:872
bool GetNonSpecific(void) const
Is this a non-specific search?
Definition: msms.hpp:860
bool CalcAndCut(const char *SeqStart, const char *SeqEnd, const char **PepStart, int *Masses, int &NumMod, int MaxNumMod, int *EndMasses, CMSMod &VariableMods, CMSMod &FixedMods, CMod ModList[], const int *IntCalcMass, const int *PrecursorIntCalcMass, CRef< CMSModSpecSet > &Modset, int Maxproductions)
cleaves the sequence.
Definition: msms.cpp:116
bool & SetNMethionine(void)
Set n-term methionine cleavage.
Definition: msms.hpp:897
const char * GetStop(void) const
Get the enzyme stop value.
Definition: msms.hpp:866
bool CheckCleaveChar(const char *iPepStart) const
is the character given one of the cleavage chars?
Definition: msms.hpp:720
int GetCleaveNum(void) const
Get the number of cleavage chars.
Definition: msms.hpp:878
CConstRef –.
Definition: ncbiobj.hpp:1266
CDirEntry –.
Definition: ncbifile.hpp:262
void Next(TLadderMap::iterator &Iter, TMSCharge BeginCharge=0, TMSCharge EndCharge=0, TMSIonSeries SeriesType=eMSIonTypeUnknown)
iterate over the ladder map over the charge range and series type indicated
Definition: msladder.cpp:310
TSeriesChargePairList & SetSeriesChargePairList(void)
return the list of charge, series type pairs that are used to initialize the maps
Definition: msladder.hpp:550
void Begin(TLadderMap::iterator &Iter, TMSCharge BeginCharge=0, TMSCharge EndCharge=0, TMSIonSeries SeriesType=eMSIonTypeUnknown)
Definition: msladder.cpp:324
void CreateLadderArrays(int MaxModPerPep, int MaxLadderSize)
populate the Ladder Map with arrays based on the ladder
Definition: msladder.cpp:271
const TSeriesChargePairList & GetSeriesChargePairList(void) const
return the list of charge, series type pairs that are used to initialize the maps
Definition: msladder.hpp:557
const TMSNumber GetNumber(void) const
Get the ion series number.
Definition: msscore.hpp:256
const TMSIonSeries GetIonSeries(void) const
Get the ion type.
Definition: msscore.hpp:232
const TMSCharge GetCharge(void) const
Get the ion charge.
Definition: msscore.hpp:244
const TMSMZ GetMZ(void) const
get the m/z value of the peak
Definition: msscore.hpp:145
CMSBioseq –.
Definition: MSBioseq.hpp:66
CMSHitSet –.
Definition: MSHitSet.hpp:66
class to contain preliminary hits.
Definition: mspeak.hpp:175
const int GetSeqIndex(void) const
get blast oid
Definition: mspeak.hpp:389
int & SetStart()
set sequence start
Definition: mspeak.hpp:371
int & SetSeqIndex(void)
set blast oid
Definition: mspeak.hpp:395
const int GetStart(void) const
get sequence start
Definition: mspeak.hpp:365
int CountHits(double Threshold, int MaxI)
return number of hits above threshold
Definition: mspeak.cpp:182
const CMSModInfo & GetModInfo(int n) const
get modification info
Definition: mspeak.hpp:407
const int GetStop(void) const
get sequence stop
Definition: mspeak.hpp:377
int & SetStop(void)
set sequence stop
Definition: mspeak.hpp:383
const int GetNumModInfo(void) const
get size of modification info array
Definition: mspeak.hpp:413
@MSHits.hpp User-defined methods of the data storage class.
Definition: MSHits.hpp:56
CMSMZHit –.
Definition: MSMZHit.hpp:66
static const TMSIonSeries Key2Series(int Key)
convert a key into a series type
Definition: msscore.cpp:240
static const TMSCharge Key2Charge(int Key)
convert a key into a charge
Definition: msscore.cpp:234
CMSMatchedPeakSet * SetSeries(TMSCharge Charge, TMSIonSeries Series)
get a series for modification
Definition: msscore.cpp:213
container for a set of matches
Definition: msscore.hpp:376
TMatchedPeakSet & SetMatchedPeakSet(void)
Set the match info.
Definition: msscore.hpp:442
CMSModHit –.
Definition: MSModHit.hpp:66
const int GetModEnum(void) const
Definition: mspeak.hpp:127
const int GetIsFixed(void) const
Definition: mspeak.hpp:152
const int GetSite(void) const
Definition: mspeak.hpp:139
@MSModSpecSet.hpp User-defined methods of the data storage class.
void Append(const CMSModSpecSet &ModsIn)
concatenates in another CMSModSpecSet
EMSModType GetModType(int Mod) const
get modification type
bool Init(const CMSSearchSettings::TVariable &Mods, CRef< CMSModSpecSet > Modset)
initialize variable mod type array
Definition: Mod.cpp:56
int SortPeaks(int Peptol, int Zdep, int Numisotopes, bool Pepppm, int ChargeSign)
put the pointers into an array sorted by mass
Definition: mspeak.cpp:1253
CIntervalTree & SetIntervalTree(void)
Definition: mspeak.hpp:1397
void AddPeak(CMSPeak *PeakIn)
Definition: mspeak.hpp:1391
TPeakSet & GetPeaks(void)
Definition: mspeak.hpp:1403
class to hold spectral data for filtering and statistical characterization
Definition: mspeak.hpp:648
int CompareSortedRank(CLadder &Ladder, EMSPeakListTypes Which, vector< bool > &usedPeaks)
Compare the ladder and peaks and return back rank statistics.
Definition: mspeak.cpp:443
const EMSHitError GetError(void) const
return any errors in computing on peaks
Definition: mspeak.hpp:1244
const bool CompareTop(CLadder &Ladder)
compares only the top hits
Definition: mspeak.cpp:503
const int GetMaxI(const EMSPeakListTypes Which) const
Get Maximum intensity.
Definition: mspeak.cpp:1216
TMSHitList & GetHitList(const int Index)
Get a hit list.
Definition: mspeak.hpp:1189
const int GetPeptidesExamined(const int ChargeIn) const
return number of peptides examine for each charge state
Definition: mspeak.hpp:1213
const int GetNumber(void) const
get the spectrum number
Definition: mspeak.hpp:1274
void ReadAndProcess(const CMSSpectrum &Spectrum, const CMSSearchSettings &Settings)
Read and process a spectrum set into a CMSPeak.
Definition: mspeak.cpp:548
void HighLow(int &High, int &Low, int &NumPeaks, const int PrecursorMass, const int Charge, const double Threshold, int &NumLo, int &NumHi)
return the lowest culled peak and the highest culled peak less than the precursor mass passed in
Definition: mspeak.cpp:1137
TPeakLists & SetPeakLists(void)
set the peak lists
Definition: mspeak.hpp:1171
const bool AddHit(CMSHit &in, CMSHit *&out)
add hit to hitlist.
Definition: mspeak.cpp:349
const int GetTol(void) const
get the product mass tolerance in Daltons.
Definition: mspeak.hpp:1286
int & SetPeptidesExamined(const int ChargeIn)
set the number of peptides examine for each charge state
Definition: mspeak.hpp:1219
const CMSSpectrum::TIds & GetName(void) const
get the names of the spectrum
Definition: mspeak.hpp:1262
const int CountMZRange(const int StartIn, const int StopIn, const double MinIntensity, const int Which) const
return the number of peaks in a range
Definition: mspeak.cpp:595
const int GetNumCharges(void) const
return number of allowed computed charges
Definition: mspeak.hpp:1317
const int GetHitListIndex(const int Index) const
Get size of hit list.
Definition: mspeak.hpp:1195
const EMSPeakListTypes GetWhich(const int Charge) const
returns the cull array index
Definition: mspeak.hpp:1305
const int GetConsiderMult(void) const
gets min precursor charge to consider multiply charged product ions
Definition: mspeak.hpp:1238
void Write(std::ostream &FileOut, const EMSSpectrumFileType FileType, const EMSPeakListTypes Which) const
Write out a CMSPeak in dta format (useful for debugging)
Definition: mspeak.cpp:757
CMSPepHit –.
Definition: MSPepHit.hpp:66
CRef< CMSHitSet > FindHitSet(const int Number) const
Find hitset with given number.
Definition: MSResponse.cpp:148
int Validate(std::list< std::string > &Error) const
Validate Search Settings returns 0 if OK, 1 if not Error contains explanations.
const double CalcPoissonMean(double ProbTerminal=0.0L, int NumTerminalMasses=2, double ProbDependent=0.0L, int NumUniqueMasses=19, double ToleranceAdjust=1.0L) const
calculate the mean value of the poisson distribution for this match
Definition: msscore.cpp:452
const TMSMZ GetExpMass(void) const
Get the experimental m/z of the spectrum.
Definition: msscore.hpp:822
void FillMatchedPeaks(TMSCharge ChargeIn, TMSIonSeries Series, unsigned Size, TMSIntensity MinIntensity, bool Skipb1, EMSTerminalBias TerminalIon, int Maxproductions, string &Sequence, bool NoProline)
copies hit array into match array fills in missing peaks does not fill in exp peak values.
Definition: msscore.cpp:287
const double CalcPvalueTopHit(double Mean, int HitsIn, double Normal, double TopHitProb) const
calculate the p-value using poisson distribution and the top hit prob
Definition: msscore.cpp:554
CMSMatchedPeakSetMap & SetIonSeriesMatchMap(void)
Set map from ion series to CMSMatchedPeakSet *.
Definition: msscore.hpp:924
const CMSBasicMatchedPeak & GetHitInfo(int n) const
Get the hit info at array position n.
Definition: msscore.hpp:906
const int GetHits(void) const
return the size of the HitInfo array
Definition: msscore.hpp:894
const TMSMZ GetMaxDelta(void) const
calc max abs difference between experimental and theoretical mass values
Definition: msscore.cpp:629
const TMSMZ GetTheoreticalMass(void) const
return theoretical mass of the hit
Definition: msscore.hpp:834
TMSCharge & SetCharge()
set the charge
Definition: msscore.hpp:852
const double CalcNormalTopHit(double Mean, double TopHitProb) const
integrate CalcPoissonTopHit over all i
Definition: msscore.cpp:509
TMSMZ & SetExpMass(void)
Set the experimental mass of the spectrum.
Definition: msscore.hpp:828
int & SetHits(void)
set the size of the HitInfo array
Definition: msscore.hpp:900
const double CalcRankProb(void) const
calculate the rank score
Definition: msscore.cpp:585
const TMSCharge GetCharge(void) const
get the charge
Definition: msscore.hpp:846
const double CalcPvalue(double Mean, int HitsIn) const
calculate the p-value using poisson distribution
Definition: msscore.cpp:526
const int *const GetIntMass(void) const
Definition: msms.hpp:127
void Init(const CMSSearchSettings::TProductsearchtype &SearchType)
initialize mass arrays with fixed mods
Definition: msms.cpp:641
contains information for a post translational modification at a particular sequence site
Definition: msms.hpp:172
TSite GetSite(void) const
Get the site position.
Definition: msms.hpp:343
TFixed & SetFixed(void)
set mod state (1 = fixed)
Definition: msms.hpp:424
TEnum GetEnum(void) const
Get mod type.
Definition: msms.hpp:397
void Reset(void)
reset to default values
Definition: msms.hpp:306
TFixed GetFixed(void) const
Is the mod fixed?
Definition: msms.hpp:415
CObjectOStreamXml –.
Definition: objostrxml.hpp:54
void ConvertFromOMSSA(CMSSearch &inOMSSA, CRef< CMSModSpecSet > Modset, string basename, string newname)
convert OMSSA to PepXML
Definition: pepxml.cpp:427
static int SaveAnyFile(CMSSearch &MySearch, CMSSearchSettings::TOutfiles OutFiles, CRef< CMSModSpecSet > Modset)
Write out a complete search.
Definition: omssa.cpp:298
static void ConditionXMLStream(CObjectOStreamXml *xml_out)
correctly set up xml stream
Definition: omssa.cpp:135
static void SaveOneFile(CMSSearch &MySearch, const string Filename, ESerialDataFormat FileFormat, bool IncludeRequest, bool bz2)
Definition: omssa.cpp:265
static int ReadCompleteSearch(const string &Filename, const ESerialDataFormat DataFormat, bool bz2, CMSSearch &MySearch)
Read in a complete search (typically for an iterative search)
Definition: omssa.cpp:193
static void ReadTaxFile(string &Filename, TTaxNameMap &TaxNameMap)
Definition: omssa.cpp:116
static int ReadSearchRequest(const string &Filename, const ESerialDataFormat DataFormat, CMSSearch &MySearch)
Read in an MSRequest.
Definition: omssa.cpp:169
static void ValidateSearchSettings(CRef< CMSSearchSettings > &Settings)
Validates Search Settings.
Definition: omssa.cpp:366
static void CreateSearchSettings(string FileName, CRef< CMSSearchSettings > &Settings)
create search setting object from file or brand new
Definition: omssa.cpp:379
static int ReadFile(const string &Filename, const EMSSpectrumFileType FileType, CMSSearch &MySearch)
Read in a spectrum file.
Definition: omssa.cpp:147
static int LoadAnyFile(CMSSearch &MySearch, CConstRef< CMSInFile > InFile, bool *SearchEngineIterative=0)
Read in any input file.
Definition: omssa.cpp:224
static int ReadModFiles(const string &ModFileName, const string &UserModFileName, const string &Path, CRef< CMSModSpecSet > Modset)
read in modification files.
Definition: omssa.cpp:61
unsigned MakeBoolMask(int *ModIndex, int iMod)
Definition: omssa.hpp:694
CMSMod VariableMods
Definition: omssa.hpp:551
CMSResponse::TOidSet & SetOidSet(void)
get the oidset
Definition: omssa.hpp:856
void SetResult(CRef< CMSPeakSet > PeakSet)
Definition: omssa.cpp:1508
void DeleteVariableOverlap(int &NumMod, CMod ModList[])
delete variable mods that overlap with fixed mods
Definition: omssa.cpp:627
void UpdateWithNewPep(int Missed, const char *PepStart[], const char *PepEnd[], int NumMod[], CMod ModList[][32], int Masses[], int EndMasses[], int NumModSites[], CRef< CMSModSpecSet > &Modset)
update sites and masses for new peptide
Definition: omssa.cpp:663
const bool GetIterative(void) const
Gets iterate search.
Definition: omssa.hpp:814
Int1 & SetLadderCalc(int i)
Set the bit that indicates whether a ladder was calculated.
Definition: omssa.hpp:772
AutoPtr< Int1, ArrayDeleter< Int1 > > LadderCalc
bool array that indicates if the ladders been calculated
Definition: omssa.hpp:583
void AddModsToHit(CMSHits *Hit, CMSHit *MSHit)
Adds modification information to hitset.
Definition: omssa.cpp:1447
CLadderContainer & SetLadderContainer(void)
set the ladder container
Definition: omssa.hpp:899
bool CalcModIndex(int *ModIndex, int &iMod, int &NumMod, int NumFixed, int NumModSites, CMod CModList[])
Definition: omssa.hpp:715
void * initCallbackData
Definition: omssa.hpp:645
CMSMod FixedMods
Definition: omssa.hpp:552
CRef< CMSResponse > & SetResponse(void)
Set search response.
Definition: omssa.hpp:844
void Spectrum2Peak(CRef< CMSPeakSet > PeakSet)
Definition: omssa.cpp:564
CRef< CSeqDB > rdfp
blast library
Definition: omssa.hpp:546
bool UseRankScore
boolean to turn on rank scoring
Definition: omssa.hpp:604
CConstRef< CMSRequest > GetRequest(void) const
Get search request.
Definition: omssa.hpp:838
virtual void OnExit(void)
Override this to execute finalization code.
Definition: omssa.cpp:966
void WriteBioseqs(void)
write oidset to result
Definition: omssa.cpp:1736
Int1 GetLadderCalc(int i) const
Get the bit that indicates whether a ladder was calculated.
Definition: omssa.hpp:766
int InitBlast(const char *blastdb, bool use_mmap=false)
init blast databases.
Definition: omssa.cpp:429
void CreateModCombinations(int Missed, const char *PepStart[], int Masses[], int EndMasses[], int NumMod[], int NumMassAndMask[], int NumModSites[], CMod ModList[][32])
Definition: omssa.cpp:774
void MatchAndSort(CMSPeak *Peaks, CMSHit &Hit, EMSPeakListTypes Which, int minintensity, const TSeriesChargePairList::const_iterator &iPairList, list< CMSMatchedPeakSet * > &SingleForward, list< CMSMatchedPeakSet * > &SingleBackward)
Creates match ion match lists.
Definition: omssa.cpp:1844
int CompareLadders(int iMod, CMSPeak *Peaks, bool OrLadders, const TMassPeak *MassPeak)
compare ladders to experiment
Definition: omssa.cpp:483
void CopySettings(CRef< CSearch > fromObj)
Definition: omssa.cpp:970
TMassMask & SetMassAndMask(int i, int j)
Set the mask and mass of mod bit array.
Definition: omssa.hpp:784
void Search(CRef< CMSRequest > MyRequestIn, CRef< CMSResponse > MyResponseIn, CRef< CMSModSpecSet > Modset, CRef< CMSSearchSettings > SettingsIn, TOMSSACallback Callback=0, void *CallbackData=0)
Performs the ms/ms search.
Definition: omssa.cpp:985
void ClearLadderCalc(int Max)
Clear the ladder calc array up to max index.
Definition: omssa.hpp:778
CMassArray PrecursorMassArray
Definition: omssa.hpp:550
CConstRef< CCleave > GetEnzyme(void) const
Get search enzyme.
Definition: omssa.hpp:892
bool & SetRestrictedSearch(void)
is this search restricted to the oid set?
Definition: omssa.hpp:874
CRef< CMSRequest > & SetRequest(void)
Set search request.
Definition: omssa.hpp:832
void InitModIndex(int *ModIndex, int &iMod, int NumMod, int NumModSites, CMod ModList[])
Definition: omssa.hpp:654
CConstRef< CMSSearchSettings > GetSettings(void) const
Get search settings.
Definition: omssa.hpp:826
CConstRef< CMSResponse > GetResponse(void) const
Get search response.
Definition: omssa.hpp:850
void AddIonsToHit(CMSHits *Hit, CMSHit *MSHit)
Adds ion information to hitset.
Definition: omssa.cpp:1465
void DoubleCompare(list< CMSMatchedPeakSet * > &SingleForward, list< CMSMatchedPeakSet * > &SingleBackward, list< CMSMatchedPeakSet * > &Double, bool DoubleForward)
Definition: omssa.cpp:1869
CMSMatchedPeakSet * PepCharge(CMSHit &Hit, int SeriesCharge, int Ion, int MinIntensity, int Which, CMSPeak *Peaks, int Maxproductions)
fill out MatchedPeakSet
Definition: omssa.cpp:1750
static void MakeModString(string &seqstring, string &modseqstring, CMSHit *MSHit)
Makes a string hashed out of the sequence plus mods.
Definition: omssa.cpp:1483
const bool GetPoissonOnly(void) const
Gets the scoring to use rank statistics only with Poisson.
Definition: omssa.hpp:802
const CMSResponse::TOidSet & GetOidSet(void) const
get the oidset
Definition: omssa.hpp:862
const bool GetRestrictedSearch(void) const
is this search restricted to the oid set?
Definition: omssa.hpp:880
static int iSearchGlobal
Tracks the iSearch number for all search threads.
Definition: omssa.hpp:629
CRef< CCleave > & SetEnzyme(void)
Set search enzyme.
Definition: omssa.hpp:886
void CountModSites(int &NumModSites, int NumMod, CMod ModList[])
count the number of unique sites modified
Definition: omssa.cpp:755
CMassArray MassArray
Definition: omssa.hpp:549
bool CompareLaddersTop(int iMod, CMSPeak *Peaks, const TMassPeak *MassPeak)
Definition: omssa.cpp:514
void InitLadders(std::list< EMSIonSeries > &Ions)
initialize mass ladders
Definition: omssa.cpp:901
int CreateLadders(const char *Sequence, int iSearch, int position, int endposition, int *Masses, int iMissed, CAA &AA, int iMod, CMod ModList[], int NumMod)
create the ladders from sequence
Definition: omssa.cpp:441
void SetIons(list< EMSIonSeries > &Ions)
set up the ions to use
Definition: omssa.cpp:888
void SetupSearch(CRef< CMSRequest > MyRequestIn, CRef< CMSResponse > MyResponseIn, CRef< CMSModSpecSet > Modset, CRef< CMSSearchSettings > SettingsIn, TOMSSACallback Callback=0, void *CallbackData=0)
Setup the ms/ms search.
Definition: omssa.cpp:940
void CreateSequence(int Start, int Stop, string &seqstring, CSeqDBSequence &Sequence)
Generate a peptide sequence.
Definition: omssa.cpp:1494
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
Definition: omssa.cpp:955
CRef< CMSSearchSettings > & SetSettings(void)
Set search settings.
Definition: omssa.hpp:820
CRef< CMSModSpecSet > initModset
Definition: omssa.hpp:642
void CalcNSort(TScoreList &ScoreList, double Threshold, CMSPeak *Peaks)
calculate the evalues of the top hits and sort
Definition: omssa.cpp:1889
void MakeOidSet(void)
makes map of oid from previous search used in iterative searching
Definition: omssa.cpp:921
static void ResetGlobals(void)
Reset global parameters used in threaded search.
Definition: omssa.cpp:421
TOMSSACallback initCallback
Definition: omssa.hpp:644
const int Getnumseq(void) const
Definition: omssa.hpp:868
int numseq
Definition: omssa.hpp:553
CRef< CMSRequest > initRequestIn
These are so CSearch::Main() can call CSearch::Search() in a threaded run, this requires CSearch::Set...
Definition: omssa.hpp:640
CRef< CMSResponse > initResponseIn
Definition: omssa.hpp:641
int MaxModPerPep
maximum number of mod combinations per peptide
Definition: omssa.hpp:593
bool Iterative
boolean to turn on iterative search
Definition: omssa.hpp:614
AutoPtr< TMassMask, ArrayDeleter< TMassMask > > MassAndMask
contains bit mask of modifications and resulting mass
Definition: omssa.hpp:588
const bool ReSearch(const int Number) const
examines a hitset to see if any good hits
Definition: omssa.cpp:541
static int MaxMZ
maximum m/z value of all spectra precursors used to bound non-specific cleavage searches
Definition: omssa.hpp:599
static CRef< CMSPeakSet > SharedPeakSet
Definition: omssa.hpp:225
CRef< CMSSearchSettings > initSettingsIn
Definition: omssa.hpp:643
int ThreadNum
The threadid number.
Definition: omssa.hpp:634
CSeqDBSequence –.
Definition: seqdb.hpp:1596
CSeqDB.
Definition: seqdb.hpp:161
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
Definition: seqdb.cpp:680
@ eProtein
Definition: seqdb.hpp:174
CRef< CBioseq > GetBioseq(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence.
Definition: seqdb.cpp:504
void GetTaxIDs(int oid, map< TGi, TTaxId > &gi_to_taxid, bool persist=false) const
Get taxid for an OID.
Definition: seqdb.cpp:441
bool CheckOrFindOID(int &next_oid) const
Find an included OID, incrementing next_oid if necessary.
Definition: seqdb.cpp:728
CRef< CBlast_def_line_set > GetHdr(int oid) const
Get the ASN.1 header for the sequence.
Definition: seqdb.cpp:418
int Mass
Definition: mspeak.hpp:1337
CMSPeak * Peak
Definition: mspeak.hpp:1343
int Charge
Definition: mspeak.hpp:1342
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
void clear()
Definition: map.hpp:309
const_iterator end() const
Definition: map.hpp:292
iterator insert(const value_type &val)
Definition: map.hpp:305
const_iterator begin() const
Definition: map.hpp:291
bool empty() const
Definition: map.hpp:289
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
void clear()
Definition: set.hpp:153
string GetSeqIdString(const CSeq_id &id)
Definition: compartp.cpp:100
string Path(const string &dir, const string &file)
Definition: fileutil.cpp:243
#define false
Definition: bool.h:36
#define bool
Definition: bool.h:34
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
Definition: ncbimisc.hpp:480
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define _TRACE(message)
Definition: ncbidbg.hpp:122
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define isnan
Definition: ncbifloat.h:89
void Critical(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1203
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
void Fatal(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1209
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
#define finite
Define value of finite (Is Finite).
Definition: ncbifloat.h:109
string GetDir(EIfEmptyPath mode=eIfEmptyPath_Current) const
Get the directory component for this directory entry.
Definition: ncbifile.cpp:475
static bool IsAbsolutePath(const string &path)
Check if a "path" is absolute for the current OS.
Definition: ncbifile.cpp:508
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_Xml
XML.
Definition: serialdef.hpp:75
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
const_iterator AllIntervals(void) const
const_iterator IntervalsContaining(coordinate_type point) const
reference GetValue(void) const
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
Definition: Seq_id.hpp:772
pair< TObjectPtr, TTypeInfo > ObjectInfo(C &obj)
Definition: objectinfo.hpp:762
void SetReferenceSchema(bool use_schema=true)
Make generated XML document reference XML schema.
Definition: objostrxml.cpp:128
void SetWriteNamedIntegersByValue(bool set)
Set up writing named integers (in ANS.1 sense) by value only.
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
Definition: objostr.cpp:126
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:1401
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
int8_t Int1
1-byte (8-bit) signed integer
Definition: ncbitype.h:98
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
const Tdata & Get(void) const
Get the member data.
CSearch(void)
Definition: Search.hpp:88
const THits & GetHits(void) const
Get the Hits member data.
Definition: Search_.hpp:587
~CSearch(void)
Definition: omssa.cpp:2015
void SetMass(TMass value)
Assign a value to Mass data member.
Definition: MSHits_.hpp:988
TMods & SetMods(void)
Assign a value to Mods data member.
Definition: MSHits_.hpp:1025
void SetStop(TStop value)
Assign a value to Stop data member.
Definition: MSPepHit_.hpp:609
void SetPvalue(TPvalue value)
Assign a value to Pvalue data member.
Definition: MSHits_.hpp:797
void SetProtlength(TProtlength value)
Assign a value to Protlength data member.
Definition: MSPepHit_.hpp:797
void SetOid(TOid value)
Assign a value to Oid data member.
Definition: MSPepHit_.hpp:844
TMinnoenzyme GetMinnoenzyme(void) const
Get the Minnoenzyme member data.
void SetGi(TGi value)
Assign a value to Gi data member.
Definition: MSPepHit_.hpp:656
void SetAccession(const TAccession &value)
Assign a value to Accession data member.
Definition: MSPepHit_.hpp:696
void SetSinglenum(TSinglenum value)
Assign a value to Singlenum data member.
void SetCharge(TCharge value)
Assign a value to Charge data member.
Definition: MSHits_.hpp:844
void SetTheomass(TTheomass value)
Assign a value to Theomass data member.
Definition: MSHits_.hpp:1201
void SetEvalue(TEvalue value)
Assign a value to Evalue data member.
Definition: MSHits_.hpp:750
EMSSerialDataFormat
Access to EMSSerialDataFormat's attributes (values, names) as defined in spec.
void SetDefline(const TDefline &value)
Assign a value to Defline data member.
Definition: MSPepHit_.hpp:743
void SetPepstring(const TPepstring &value)
Assign a value to Pepstring data member.
Definition: MSHits_.hpp:934
void SetPepstop(const TPepstop &value)
Assign a value to Pepstop data member.
Definition: MSHits_.hpp:1100
void SetStart(TStart value)
Assign a value to Start data member.
Definition: MSPepHit_.hpp:562
void SetBioseqs(TBioseqs &value)
Assign a value to Bioseqs data member.
Definition: MSResponse_.cpp:81
list< CRef< CMSOutFile > > TOutfiles
TResponse & SetResponse(void)
Assign a value to Response data member.
Definition: MSSearch_.hpp:235
TMzhits & SetMzhits(void)
Assign a value to Mzhits data member.
Definition: MSHits_.hpp:906
TNmethionine GetNmethionine(void) const
Get the Nmethionine member data.
void SetDoublenum(TDoublenum value)
Assign a value to Doublenum data member.
TRequest & SetRequest(void)
Assign a value to Request data member.
Definition: MSSearch_.hpp:210
EMSEnzymes
enumerate enzymes
Definition: MSEnzymes_.hpp:64
void SetDbversion(TDbversion value)
Assign a value to Dbversion data member.
EMSSpectrumFileType
Access to EMSSpectrumFileType's attributes (values, names) as defined in spec.
void SetPepstart(const TPepstart &value)
Assign a value to Pepstart data member.
Definition: MSHits_.hpp:1053
void SetScale(TScale value)
Assign a value to Scale data member.
THitsets & SetHitsets(void)
Assign a value to Hitsets data member.
TPephits & SetPephits(void)
Assign a value to Pephits data member.
Definition: MSHits_.hpp:881
void SetScale(TScale value)
Assign a value to Scale data member.
@ eMSSearchType_exact
@ eMSSerialDataFormat_csv
csv (excel)
@ eMSSerialDataFormat_none
@ eMSSerialDataFormat_xml
open XML format
@ eMSSerialDataFormat_asnbinary
open ASN.1 binary format
@ eMSSerialDataFormat_pepxml
pepXML format
@ eMSSerialDataFormat_asntext
open ASN.1 text format
@ eMSSerialDataFormat_xmlbz2
bzip2 XML format
@ eMSModType_modn
at the N terminus of a protein
Definition: MSModType_.hpp:66
@ eMSModType_modnpaa
at the N terminus of a peptide at particular amino acids
Definition: MSModType_.hpp:71
@ eMSModType_modcpaa
at the C terminus of a peptide at particular amino acids
Definition: MSModType_.hpp:73
@ eMSModType_modnp
at the N terminus of a peptide
Definition: MSModType_.hpp:70
@ eMSModType_modcp
at the C terminus of a peptide
Definition: MSModType_.hpp:72
@ eMSModType_modnaa
at the N terminus of a protein at particular amino acids
Definition: MSModType_.hpp:67
@ eMSHitError_notenuffpeaks
not enough peaks to search
Definition: MSHitError_.hpp:68
@ eMSSpectrumFileType_pks
@ eMSSpectrumFileType_mgf
@ eMSSpectrumFileType_sciex
@ eMSSpectrumFileType_omxbz2
bzip2 omx file
@ eMSSpectrumFileType_dtablank
@ eMSSpectrumFileType_pkl
@ eMSSpectrumFileType_omx
xml for iterative search
@ eMSSpectrumFileType_dtaxml
@ eMSSpectrumFileType_dta
@ eMSSpectrumFileType_unknown
@ eMSSpectrumFileType_asc
@ eMSSpectrumFileType_xml
xml MSRequest
@ eMSSpectrumFileType_oms
asn.1 binary for iterative search
static int input()
int i
const int kMSLadderMax
Definition: msladder.hpp:56
const double kNeutron
neutron mass
Definition: msms.hpp:70
EMSPeakListTypes
enum that describes type of peak list
Definition: mspeak.hpp:63
@ eMSPeakListCharge1
Definition: mspeak.hpp:66
EMSTerminalBias
is the peptide statistically biased in any way on either end?
Definition: msscore.hpp:550
@ eMSNTerminalBias
Definition: msscore.hpp:552
@ eMSNoTerminalBias
Definition: msscore.hpp:551
@ eMSBothTerminalBias
Definition: msscore.hpp:554
@ eMSCTerminalBias
Definition: msscore.hpp:553
const int kIonDirection[]
ion direction.
Definition: msscore.hpp:86
EMSIonSeries
enumeration of ion series
Definition: msscore.hpp:68
constexpr auto sort(_Init &&init)
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
const struct ncbi::grid::netcache::search::fields::SIZE size
#define abs(a)
Definition: ncbi_heapmgr.c:130
unsigned int a
Definition: ncbi_localip.c:102
Prototypes for portable math library (ported from C Toolkit)
std::istream & in(std::istream &in_, double &x_)
USING_SCOPE(objects)
int PositiveSign(int input)
Definition: omssa.cpp:557
USING_NCBI_SCOPE
Definition: omssa.cpp:54
DEFINE_STATIC_FAST_MUTEX(iSearchMutex)
const double kHighEval
Definition: omssa.hpp:83
#define MAXMOD2
Definition: omssa.hpp:77
#define MAXMISSEDCLEAVE
Definition: omssa.hpp:72
void(* TOMSSACallback)(int TotalSeq, int Completed, void *Anything)
progress callback for CSearch
Definition: omssa.hpp:86
#define MAXMOD
Definition: omssa.hpp:75
bool operator()(const TMassMask &x, const TMassMask &y)
Definition: omssa.cpp:615
int Mask
Definition: omssa.hpp:97
int Mass
Definition: omssa.hpp:97
int GetCharge(char code)
#define N
Definition: crc32.c:57
Modified on Wed Apr 24 14:14:58 2024 by modify_doxy.py rev. 669887