60 int
61 CSearchHelper::ReadModFiles(const string& ModFileName,
62  const string& UserModFileName,
63  const string& Path,
64  CRef <CMSModSpecSet> Modset)
65 {
66  CDirEntry DirEntry(Path);
67  string FileName;
68  try {
69  if(ModFileName == "")
70  ERR_POST(Critical << "modification filename is blank!");
71  if(!CDirEntry::IsAbsolutePath(ModFileName))
72  FileName = DirEntry.GetDir() + ModFileName;
73  else FileName = ModFileName;
74  unique_ptr<CObjectIStream>
75  modsin(CObjectIStream::Open(FileName.c_str(), eSerial_Xml));
76  if(modsin->fail()) {
77  ERR_POST(Fatal << "ommsacl: unable to open modification file" <<
78  FileName);
79  return 1;
80  }
81  modsin->Read(ObjectInfo(*Modset));
82  modsin->Close();
84  } catch (NCBI_NS_STD::exception& e) {
85  ERR_POST(Fatal << "Unable to read modification file " <<
86  FileName << " with error " << e.what());
87  }
89  // read in user mod file, if any
90  if(UserModFileName != "") {
91  try {
92  CRef <CMSModSpecSet> UserModset(new CMSModSpecSet);
93  if(!CDirEntry::IsAbsolutePath(UserModFileName))
94  FileName = DirEntry.GetDir() + UserModFileName;
95  else FileName = UserModFileName;
96  unique_ptr<CObjectIStream>
97  usermodsin(CObjectIStream::Open(FileName.c_str(), eSerial_Xml));
98  if(usermodsin->fail()) {
99  ERR_POST(Warning << "ommsacl: unable to open user modification file" <<
100  ModFileName);
101  return 0;
102  }
103  usermodsin->Read(ObjectInfo(*UserModset));
104  usermodsin->Close();
105  Modset->Append(*UserModset);
106  } catch (NCBI_NS_STD::exception& e) {
107  ERR_POST(Fatal << "Unable to read user modification file " <<
108  FileName << " with error " << e.what());
109  }
110  }
111  return 0;
112 }
115 void
116 CSearchHelper::ReadTaxFile(string& Filename, TTaxNameMap& TaxNameMap)
117 {
118  ifstream taxnames(Filename.c_str());
119  string line;
120  list<string> linelist;
121  list<string>::iterator ilist;
122  while(taxnames && !taxnames.eof()) {
123  getline(taxnames, line);
124  linelist.clear();
125  NStr::Split(line, ",", linelist, NStr::fSplit_Tokenize);
126  if(!linelist.empty()) {
127  ilist = linelist.begin();
128  ilist++;
129  TaxNameMap[NStr::StringToInt(*ilist)] = *(linelist.begin());
130  }
131  }
132 }
134 void
136 {
137  if(!xml_out) return;
138  // turn on xml schema
139  xml_out->SetReferenceSchema();
140  // turn off names in named integers
141  xml_out->SetWriteNamedIntegersByValue(true);
142 }
146 int
147 CSearchHelper::ReadFile(const string& Filename,
148  const EMSSpectrumFileType FileType,
149  CMSSearch& MySearch)
150 {
151  CRef <CMSRequest> Request (new CMSRequest);
152  MySearch.SetRequest().push_back(Request);
153 // CRef <CMSResponse> Response (new CMSResponse);
154 // MySearch.SetResponse().push_back(Response);
156  CNcbiIfstream PeakFile(Filename.c_str());
157  if(!PeakFile) {
158  ERR_POST(Fatal <<" omssacl: not able to open spectrum file " <<
159  Filename);
160  return 1;
161  }
163  CRef <CSpectrumSet> SpectrumSet(new CSpectrumSet);
164  (*MySearch.SetRequest().begin())->SetSpectra(*SpectrumSet);
165  return SpectrumSet->LoadFile(FileType, PeakFile);
166 }
168 int
169 CSearchHelper::ReadSearchRequest(const string& Filename,
170  const ESerialDataFormat DataFormat,
171  CMSSearch& MySearch)
172 {
173  CRef <CMSRequest> Request (new CMSRequest);
174  MySearch.SetRequest().push_back(Request);
175 // CRef <CMSResponse> Response (new CMSResponse);
176 // MySearch.SetResponse().push_back(Response);
178  unique_ptr<CObjectIStream>
179  in(CObjectIStream::Open(Filename.c_str(), DataFormat));
180  in->Open(Filename.c_str(), DataFormat);
181  if(in->fail()) {
182  ERR_POST(Warning << "omssacl: unable to search file" <<
183  Filename);
184  return 1;
185  }
186  in->Read(ObjectInfo(*Request));
187  in->Close();
188  return 0;
189 }
192 int
193 CSearchHelper::ReadCompleteSearch(const string& Filename,
194  const ESerialDataFormat DataFormat,
195  bool bz2,
196  CMSSearch& MySearch)
197 {
198  unique_ptr <CNcbiIfstream> raw_in;
199  unique_ptr <CCompressionIStream> compress_in;
200  unique_ptr <CObjectIStream> in;
202  if( bz2 ) {
203  raw_in.reset(new CNcbiIfstream(Filename.c_str()));
204  compress_in.reset( new CCompressionIStream (*raw_in,
207  in.reset(CObjectIStream::Open(DataFormat, *compress_in));
208  }
209  else {
210  in.reset(CObjectIStream::Open(Filename.c_str(), DataFormat));
211  }
212  if(in->fail()) {
213  ERR_POST(Warning << "omssacl: unable to search file" <<
214  Filename);
215  return 1;
216  }
217  in->Read(ObjectInfo(MySearch));
218  in->Close();
219  return 0;
220 }
223 int
225  CConstRef <CMSInFile> InFile,
226  bool* SearchEngineIterative)
227 {
228  string Filename(InFile->GetInfile());
229  EMSSpectrumFileType DataFormat =
230  static_cast <EMSSpectrumFileType> (InFile->GetInfiletype());
232  switch (DataFormat) {
238  return CSearchHelper::ReadFile(Filename, DataFormat, MySearch);
239  break;
241  if(SearchEngineIterative) *SearchEngineIterative = true;
242  return CSearchHelper::ReadCompleteSearch(Filename, eSerial_AsnBinary, false, MySearch);
243  break;
245  if(SearchEngineIterative) *SearchEngineIterative = true;
246  return CSearchHelper::ReadCompleteSearch(Filename, eSerial_Xml, false, MySearch);
247  break;
249  return CSearchHelper::ReadSearchRequest(Filename, eSerial_Xml, MySearch);
250  break;
252  return CSearchHelper::ReadCompleteSearch(Filename, eSerial_Xml, true, MySearch);
253  break;
258  default:
259  break;
260  }
261  return 1; // not supported
262 }
266  const string Filename,
267  ESerialDataFormat FileFormat,
268  bool IncludeRequest,
269  bool bz2)
270 {
271  unique_ptr <CNcbiOfstream> raw_out;
272  unique_ptr <CCompressionOStream> compress_out;
273  unique_ptr <CObjectOStream> txt_out;
275  if( bz2 ) {
276  raw_out.reset(new CNcbiOfstream(Filename.c_str()));
277  compress_out.reset( new CCompressionOStream (*raw_out,
278  new CBZip2StreamCompressor(),
280  txt_out.reset(CObjectOStream::Open(FileFormat, *compress_out));
281  }
282  else {
283  txt_out.reset(CObjectOStream::Open(Filename.c_str(), FileFormat));
284  }
286  if(FileFormat == eSerial_Xml) {
287  CObjectOStreamXml *xml_out = dynamic_cast <CObjectOStreamXml *> (txt_out.get());
289  }
290  if(IncludeRequest)
291  txt_out->Write(ObjectInfo(MySearch));
292  else
293  txt_out->Write(ObjectInfo(**MySearch.SetResponse().begin()));
294 }
297 int
300  CRef <CMSModSpecSet> Modset)
301 {
302  CMSSearchSettings::TOutfiles::const_iterator iOutFile;
304  for(iOutFile = OutFiles.begin(); iOutFile != OutFiles.end(); ++iOutFile) {
305  string Filename((*iOutFile)->GetOutfile());
306  EMSSerialDataFormat DataFormat =
307  static_cast <EMSSerialDataFormat> ((*iOutFile)->GetOutfiletype());
310  unique_ptr <CObjectOStream> txt_out;
311  if(DataFormat == eMSSerialDataFormat_asntext)
312  FileFormat = eSerial_AsnText;
313  if(DataFormat == eMSSerialDataFormat_asnbinary)
314  FileFormat = eSerial_AsnBinary;
315  if(DataFormat == eMSSerialDataFormat_xml)
316  FileFormat = eSerial_Xml;
317  if(DataFormat == eMSSerialDataFormat_xmlbz2)
318  FileFormat = eSerial_Xml;
320  switch (DataFormat) {
325  Filename,
326  FileFormat,
327  (*iOutFile)->GetIncluderequest(),
328  false);
329  break;
332  Filename,
333  FileFormat,
334  (*iOutFile)->GetIncluderequest(),
335  true);
336  break;
338  {
339  CPepXML outPepXML;
340  outPepXML.ConvertFromOMSSA(MySearch, Modset, Filename, Filename);
341  unique_ptr<CObjectOStream> file_out(CObjectOStream::Open(Filename, eSerial_Xml));
342  *file_out << outPepXML;
343  }
344  break;
346  {
347  CNcbiOfstream oscsv;
349  (*MySearch.SetResponse().begin())->PrintCSV(oscsv, Modset);
350  oscsv.close();
351  }
352  break;
354  default:
355  {
356  ERR_POST(Error << "Unknown output file format " << DataFormat);
357  }
358  return 1;
359  break;
360  }
361  }
362  return 0;
363 }
365 void
367 {
368  list <string> ValidError;
369  if(Settings->Validate(ValidError) != 0) {
370  list <string>::iterator iErr;
371  for(iErr = ValidError.begin(); iErr != ValidError.end(); iErr++)
372  ERR_POST(Warning << *iErr);
373  ERR_POST(Fatal << "Unable to validate settings");
374  }
375 }
378 void
380  CRef<CMSSearchSettings> &Settings)
381 {
382  if(FileName != "" ) {
383  try {
384  unique_ptr<CObjectIStream>
385  paramsin(CObjectIStream::Open(FileName.c_str(), eSerial_Xml));
386  if(paramsin->fail()) {
387  ERR_POST(Fatal << "ommsacl: unable to open parameter file" <<
388  FileName);
389  return;
390  }
391  paramsin->Read(ObjectInfo(*Settings));
392  paramsin->Close();
394  } catch (NCBI_NS_STD::exception& e) {
395  ERR_POST(Fatal << "Unable to read parameter file " <<
396  FileName << " with error " << e.what());
397  }
398  }
399 }
404 /////////////////////////////////////////////////////////////////////////////
405 //
406 // CSearch::
407 //
408 // Performs the ms/ms search
409 //
412 CSearch::CSearch(int tNum):
413 UseRankScore(false),
414 Iterative(false),
415 RestrictedSearch(false)
416 {
417  ThreadNum = tNum;
418 }
422 {
423  iSearchGlobal = -1;
424  MaxMZ = 0;
425  SharedPeakSet.Reset(0);
426 }
429 int CSearch::InitBlast(const char *blastdb, bool use_mmap)
430 {
431  if (!blastdb) return 0;
432  rdfp.Reset(new CSeqDB(blastdb, CSeqDB::eProtein,
433  0, 0, use_mmap));
434  numseq = rdfp->GetNumOIDs();
435  return 0;
436 }
439 // create the ladders from sequence
442  int iSearch,
443  int position,
444  int endposition,
445  int *Masses,
446  int iMissed,
447  CAA& AA,
448  int iMod,
449  CMod ModList[],
450  int NumMod)
451 {
453  SetLadderContainer().Begin(Iter);
454  while(Iter != SetLadderContainer().SetLadderMap().end()) {
455  bool NoProline = find(GetSettings()->GetNoprolineions().begin(),
456  GetSettings()->GetNoprolineions().end(),
457  CMSMatchedPeakSetMap::Key2Series(Iter->first)) !=
458  GetSettings()->GetNoprolineions().end();
459  if (!(*(Iter->second))[iMod]->
460  CreateLadder(CMSMatchedPeakSetMap::Key2Series(Iter->first),
462  Sequence,
463  iSearch,
464  position,
465  endposition,
466  Masses[iMissed],
467  MassArray,
468  AA,
469  SetMassAndMask(iMissed, iMod).Mask,
470  ModList,
471  NumMod,
472  *SetSettings(),
473  NoProline
474  )) return 1;
475  SetLadderContainer().Next(Iter);
476  }
478  return 0;
479 }
482 // compare ladders to experiment
484  CMSPeak *Peaks,
485  bool OrLadders,
486  const TMassPeak *MassPeak)
487 {
488  EMSPeakListTypes Which = Peaks->GetWhich(MassPeak->Charge);
490  int ChargeLimitLo(0), ChargeLimitHi(0);
491  if (MassPeak) {
492  if(MassPeak->Charge < Peaks->GetConsiderMult()) {
493  ChargeLimitLo = 1;
494  ChargeLimitHi = 1;
495  }
496  else {
497  ChargeLimitLo = 0;
498  ChargeLimitHi = 0;
499  }
500  }
503  SetLadderContainer().Begin(Iter, ChargeLimitLo, ChargeLimitHi);
504  vector<bool> usedPeaks(Peaks->SetPeakLists()[Which]->GetNum(), false);
505  while(Iter != SetLadderContainer().SetLadderMap().end()) {
506  Peaks->CompareSortedRank(*((*(Iter->second))[iMod]), Which, usedPeaks);
507  SetLadderContainer().Next(Iter, ChargeLimitLo, ChargeLimitHi);
508  }
509  return 0;
510 }
513 // compare ladders to experiment
515  CMSPeak *Peaks,
516  const TMassPeak *MassPeak)
517 {
518  int ChargeLimitLo(0), ChargeLimitHi(0);
519  if (MassPeak) {
520  if(MassPeak->Charge < Peaks->GetConsiderMult()) {
521  ChargeLimitLo = 1;
522  ChargeLimitHi = 1;
523  }
524  else {
525  ChargeLimitLo = 0;
526  ChargeLimitHi = 0;
527  }
528  }
531  SetLadderContainer().Begin(Iter, ChargeLimitLo, ChargeLimitHi);
532  while(Iter != SetLadderContainer().SetLadderMap().end()) {
533  if(Peaks->CompareTop(*((*(Iter->second))[iMod]))) return true;
534  SetLadderContainer().Next(Iter, ChargeLimitLo, ChargeLimitHi);
535  }
536  return false;
537 }
540 const bool
541 CSearch::ReSearch(const int Number) const
542 {
543  if ( GetSettings()->GetIterativesettings().GetResearchthresh() != 0.0) {
544  // look for hitset
545  CRef <CMSHitSet> HitSet;
546  HitSet = GetResponse()->FindHitSet(Number);
547  if (HitSet.IsNull()) return true;
548  if (HitSet->GetHits().empty()) return true;
549  if ((*HitSet->GetHits().begin())->GetEvalue() <=
550  GetSettings()->GetIterativesettings().GetResearchthresh())
551  return false;
552  else return true;
553  }
554  return true;
555 }
558 {
559  return abs(input);
560 }
562 // loads spectra into peaks
563 //void CSearch::Spectrum2Peak(CMSPeakSet& PeakSet)
565 {
566  CSpectrumSet::Tdata::const_iterator iSpectrum;
567  CMSPeak* Peaks;
569  iSpectrum = GetRequest()->GetSpectra().Get().begin();
570  for (; iSpectrum != GetRequest()->GetSpectra().Get().end(); iSpectrum++) {
571  CRef <CMSSpectrum> Spectrum = *iSpectrum;
572  if (!Spectrum) {
573  ERR_POST(Error << "omssa: unable to find spectrum");
574  return;
575  }
577  // reset charges so that they are absolute values. The charge sign is indicated
578  // by GetSettings()->GetChargehandling().GetNegative()
579  transform(Spectrum->SetCharge().begin(), Spectrum->SetCharge().end(), Spectrum->SetCharge().begin(), PositiveSign);
581  // if iterative search and spectrum should not be re-search, skip
582  if (GetIterative() && !ReSearch(Spectrum->GetNumber()))
583  continue;
585  Peaks = new CMSPeak(GetSettings()->GetHitlistlen());
586  if (!Peaks) {
587  ERR_POST(Error << "omssa: unable to allocate CMSPeak");
588  return;
589  }
591  Peaks->ReadAndProcess(*Spectrum, *GetSettings());
592 #if 0
593  {
594  ofstream os("test.dta");
596  }
597 #endif
598  PeakSet->AddPeak(Peaks);
600  }
601  int Numisotopes(0);
602  if(GetSettings()->CanGetNumisotopes())
603  Numisotopes = GetSettings()->GetNumisotopes();
604  bool Pepppm(false);
605  if(GetSettings()->CanGetPepppm())
606  Pepppm = GetSettings()->GetPepppm();
607  MaxMZ = PeakSet->SortPeaks(MSSCALE2INT(GetSettings()->GetPeptol()),
608  GetSettings()->GetZdep(),
609  Numisotopes, Pepppm, GetSettings()->GetChargehandling().GetNegative());
611 }
613 // compares TMassMasks. Lower m/z first in sort.
615  bool operator() (const TMassMask& x, const TMassMask& y)
616  {
617  if (x.Mass < y.Mass) return true;
618  return false;
619  }
620 };
622 /**
623  * delete variable mods that overlap with fixed mods
624  * @param NumMod the number of modifications
625  * @param ModList modification information
626  */
628  CMod ModList[])
629 {
630  int i, j;
631  for (i = 0; i < NumMod; i++) {
632  // if variable mod
633  if (ModList[i].GetFixed() != 1) {
634  // iterate thru all mods for comparison
635  for (j = 0; j < NumMod; j++) {
636  // if fixed and at same site
637  if (ModList[j].GetFixed() == 1 &&
638  ModList[i].GetSite() == ModList[j].GetSite()) {
639  // mark mod for deletion
640  ModList[i].SetFixed() = -1;
641  }
642  } // j loop
643  } // IsFixed
644  } // i loop
646  // now do the deletion
647  for (i = 0; i < NumMod;) {
648  if (ModList[i].GetFixed() == -1) {
649  NumMod--;
650  // if last mod, then just return
651  if (i == NumMod) return;
652  // otherwise, delete the modification
653  for (j=i; j < NumMod; ++j) {
654  ModList[j] = ModList[j+1];
655  }
656  }
657  else i++;
658  }
659  return;
660 }
662 // update sites and masses for new peptide
664  const char *PepStart[],
665  const char *PepEnd[],
666  int NumMod[],
667  CMod ModList[][MAXMOD],
668  int Masses[],
669  int EndMasses[],
670  int NumModSites[],
671  CRef <CMSModSpecSet> &Modset)
672 {
673  // iterate over missed cleavages
674  int iMissed;
675  // maximum mods allowed
676  //int ModMax;
677  // iterate over mods
678  int iMod;
681  // update the longer peptides to add the new peptide (Missed-1) on the end
682  for (iMissed = 0; iMissed < Missed - 1; iMissed++) {
683  // skip start
684  if (PepStart[iMissed] == (const char *)-1) continue;
685  // reset the end sequences
686  PepEnd[iMissed] = PepEnd[Missed - 1];
688  // update new mod masses to add in any new mods from new peptide
690  // first determine the maximum value for updated mod list
691  //if(NumMod[iMissed] + NumMod[Missed-1] >= MAXMOD)
692  // ModMax = MAXMOD - NumMod[iMissed];
693  //else ModMax = NumMod[Missed-1];
695  // now interate thru the new entries
696  const char *OldSite(0);
697  int NumModSitesCount(0), NumModCount(0);
698  for (iMod = 0; iMod < NumMod[Missed-1]; iMod++) {
700  // don't do more than the maximum number of modifications
701  if (NumModCount + NumMod[iMissed] >= MAXMOD) break;
703  // if n-term peptide mod and not at the start of the peptide, don't copy
704  if ((Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modnp ||
705  Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modnpaa) &&
706  PepStart[iMissed] != ModList[Missed-1][iMod].GetSite()) {
707  continue;
708  }
710  // if n-term protein mod, don't copy
711  if (Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modn ||
712  Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modnaa) {
713  continue;
714  }
716  // copy the mod to the old peptide
717  ModList[iMissed][NumModCount + NumMod[iMissed]] =
718  ModList[Missed-1][iMod];
720  // increment site count if not fixed mod and not the same site
721  if (OldSite != ModList[iMissed][NumModCount + NumMod[iMissed]].GetSite() &&
722  ModList[iMissed][NumModCount + NumMod[iMissed]].GetFixed() != 1) {
723  NumModSitesCount++;
724  OldSite = ModList[iMissed][NumModCount + NumMod[iMissed]].GetSite();
725  }
727  // increment number of mods
728  NumModCount++;
731  }
733  // update old masses
734  Masses[iMissed] += Masses[Missed - 1];
736  // update end masses
737  EndMasses[iMissed] = EndMasses[Missed - 1];
739  // update number of Mods
740  NumMod[iMissed] += NumModCount;
742  // update number of Modification Sites
743  NumModSites[iMissed] += NumModSitesCount;
744  }
745 }
748 /**
749  * count the number of unique sites modified
750  *
751  * @param NumModSites the number of unique mod sites
752  * @param NumMod the number of mods
753  * @param ModList modification information
754  */
755 void CSearch::CountModSites(int &NumModSites,
756  int NumMod,
757  CMod ModList[])
758 {
759  NumModSites = 0;
760  int i;
761  const char *OldSite(0);
763  for (i = 0; i < NumMod; i++) {
764  // skip repeated sites and fixed mods
765  if (ModList[i].GetSite() != OldSite && ModList[i].GetFixed() != 1 ) {
766  NumModSites++;
767  OldSite = ModList[i].GetSite();
768  }
769  }
770 }
773 // create the various combinations of mods
775  const char *PepStart[],
776  int Masses[],
777  int EndMasses[],
778  int NumMod[],
779  int NumMassAndMask[],
780  int NumModSites[],
781  CMod ModList[][MAXMOD]
782  )
783 {
784  // need to iterate thru combinations that have iMod.
785  // i.e. iMod = 3 and NumMod=5
786  // 00111, 01011, 10011, 10101, 11001, 11010, 11100, 01101,
787  // 01110
788  // i[0] = 0 --> 5-3, i[1] = i[0]+1 -> 5-2, i[3] = i[1]+1 -> 5-1
789  // then construct bool mask
791  // holders for calculated modification mask and modified peptide masses
792  unsigned Mask, MassOfMask;
793  // iterate thru active mods
794  int iiMod;
795  // keep track of the number of unique masks created. each corresponds to a ladder
796  int iModCount;
797  // missed cleavage
798  int iMissed;
799  // number of mods to consider
800  int iMod;
801  // positions of mods
802  int ModIndex[MAXMOD];
804  // go thru missed cleaves
805  for (iMissed = 0; iMissed < Missed; iMissed++) {
806  // skip start
807  if (PepStart[iMissed] == (const char *)-1) continue;
808  iModCount = 0;
810  // set up non-modified mass
811  SetMassAndMask(iMissed, iModCount).Mass =
812  Masses[iMissed] + EndMasses[iMissed];
813  SetMassAndMask(iMissed, iModCount).Mask = 0;
815  int NumVariable(NumMod[iMissed]); // number of variable mods
816  int NumFixed;
817  // add in fixed mods
818  for (iMod = 0; iMod < NumMod[iMissed]; iMod++) {
819  if (ModList[iMissed][iMod].GetFixed()) {
820  SetMassAndMask(iMissed, iModCount).Mass += ModList[iMissed][iMod].GetPrecursorDelta();
821  SetMassAndMask(iMissed, iModCount).Mask |= 1 << iMod;
822  NumVariable--;
823  }
824  }
825  iModCount++;
826  NumFixed = NumMod[iMissed] - NumVariable;
828  // go thru number of mods allowed
829 // for(iMod = 0; iMod < NumVariable && iModCount < MaxModPerPep; iMod++) {
830  for (iMod = 0; iMod < NumModSites[iMissed] && iModCount < MaxModPerPep; iMod++) {
832  // initialize ModIndex that points to mod sites
834  // todo: ModIndex must always include fixed mods
836  InitModIndex(ModIndex, iMod, NumMod[iMissed],
837  NumModSites[iMissed], ModList[iMissed]);
838  do {
840  // calculate mass
841  MassOfMask = SetMassAndMask(iMissed, 0).Mass;
842  for (iiMod = 0; iiMod <= iMod; iiMod++ )
843  MassOfMask += ModList[iMissed][ModIndex[iiMod + NumFixed]].GetPrecursorDelta();
844  // make bool mask
845  Mask = MakeBoolMask(ModIndex, iMod + NumFixed);
846  // put mass and mask into storage
847  SetMassAndMask(iMissed, iModCount).Mass = MassOfMask;
848  SetMassAndMask(iMissed, iModCount).Mask = Mask;
849 #if 0
850  printf("NumMod = %d iMod = %d, Mask = \n", NumMod[iMissed], iMod);
851  int iii;
852  for (iii=NumMod[iMissed]-1; iii >= 0; iii--) {
853  if (Mask & 1 << iii) printf("1");
854  else printf("0");
855  }
856  printf("\n");
857 #endif
858  // keep track of the number of ladders
859  iModCount++;
861  } while (iModCount < MaxModPerPep &&
862  CalcModIndex(ModIndex, iMod, NumMod[iMissed], NumFixed,
863  NumModSites[iMissed], ModList[iMissed]));
864  } // iMod
866  // if exact mass, add neutrons as appropriate
867  if (SetSettings()->GetPrecursorsearchtype() == eMSSearchType_exact) {
868  int ii;
869  for (ii = 0; ii < iModCount; ++ii) {
870  SetMassAndMask(iMissed, ii).Mass +=
871  SetMassAndMask(iMissed, ii).Mass /
872  MSSCALE2INT(GetSettings()->GetExactmass()) *
874  }
875  }
878  // sort mask and mass by mass
879  sort(MassAndMask.get() + iMissed*MaxModPerPep, MassAndMask.get() + iMissed*MaxModPerPep + iModCount,
880  CMassMaskCompare());
881  // keep track of number of MassAndMask
882  NumMassAndMask[iMissed] = iModCount;
884  } // iMissed
885 }
888 void CSearch::SetIons(list <EMSIonSeries> & Ions)
889 {
890  if (GetSettings()->GetIonstosearch().size() < 1) {
891  ERR_POST(Fatal << "omssa: at least one ions series to search need to be specified");
892  }
893  CMSSearchSettings::TIonstosearch::const_iterator i;
894  i = GetSettings()->GetIonstosearch().begin();
895  for(; i != GetSettings()->GetIonstosearch().end(); ++i) {
896  Ions.push_back(static_cast <EMSIonSeries> (*i));
897  }
898 }
901 void CSearch::InitLadders(list <EMSIonSeries> & Ions)
902 {
904  int MaxLadderSize = GetSettings()->GetMaxproductions();
905  if (MaxLadderSize == 0) MaxLadderSize = kMSLadderMax;
907  int i;
909  list <EMSIonSeries> ::const_iterator iIons;
911  for (iIons = Ions.begin(); iIons != Ions.end(); ++iIons) {
912  for(i = 1; i <= GetSettings()->GetChargehandling().GetMaxproductcharge(); ++i) {
914  push_back(TSeriesChargePairList::value_type(i, *iIons));
915  }
916  }
918 }
922 {
923  SetOidSet().clear();
924  if (GetSettings()->GetIterativesettings().GetSubsetthresh() != 0.0) {
925  SetRestrictedSearch() = true;
926  GetResponse()->
927  GetOidsBelowThreshold(
928  SetOidSet(),
929  GetSettings()->GetIterativesettings().GetSubsetthresh());
930  }
931 }
933 int CSearch::iSearchGlobal = -1;
934 int CSearch::MaxMZ = 0;
938 DEFINE_STATIC_FAST_MUTEX(PeaksExaminedMutex);
941  CRef <CMSResponse> MyResponseIn,
942  CRef <CMSModSpecSet> Modset,
943  CRef <CMSSearchSettings> SettingsIn,
944  TOMSSACallback Callback,
945  void *CallbackData)
946 {
947  initRequestIn = MyRequestIn;
948  initResponseIn = MyResponseIn;
949  initModset = Modset;
950  initSettingsIn = SettingsIn;
951  initCallback = Callback;
952  initCallbackData = CallbackData;
953 }
955 void* CSearch::Main(void)
956 {
959  initModset,
961  initCallback);
963  return new bool(true);
964 }
966 void CSearch::OnExit(void)
967 {
968 }
971 {
972  initRequestIn = fromObj->initRequestIn;
973  initResponseIn = fromObj->initResponseIn;
974  initModset = fromObj->initModset;
975  initSettingsIn = fromObj->initSettingsIn;
976  initCallback = fromObj->initCallback;
978  UseRankScore = fromObj->UseRankScore;
979  Iterative = fromObj->Iterative;
980  numseq = fromObj->numseq;
981  rdfp = fromObj->rdfp;
983 }
986  CRef <CMSResponse> MyResponseIn,
987  CRef <CMSModSpecSet> Modset,
988  CRef <CMSSearchSettings> SettingsIn,
989  TOMSSACallback Callback,
990  void *CallbackData)
991 {
992  try {
993  SetSettings().Reset(SettingsIn);
994  SetRequest().Reset(MyRequestIn);
995  SetResponse().Reset(MyResponseIn);
997  // force the mass scale settings to what is currently used.
1001  // set up automatic number of peaks per bin for noise filter
1002  if (GetSettings()->GetSinglenum() == 0) {
1003  SetSettings()->SetSinglenum() = GetSettings()->GetIonstosearch().size();
1004  }
1005  if (GetSettings()->GetDoublenum() == 0) {
1006  SetSettings()->SetDoublenum() = GetSettings()->GetIonstosearch().size();
1007  }
1010  (GetSettings()->GetEnzyme()));
1012  // do iterative search setup
1013  if (GetIterative()) {
1014  // check to see if the same sequence library
1015  if (GetResponse()->GetDbversion() != Getnumseq())
1016  ERR_POST(Fatal <<
1017  "number of sequences in search library is not the same as previously searched. Unable to do iterative search.");
1018  // if restricted sequence search
1019  // scan thru hits and make map of oids
1020  MakeOidSet();
1021  }
1023  // set maximum number of ladders to calculate per peptide
1024  MaxModPerPep = GetSettings()->GetMaxmods();
1027  list <EMSIonSeries> Ions;
1028  SetIons(Ions);
1029  InitLadders(Ions);
1032  CAA AA;
1034  int Missed; // number of missed cleaves allowed + 1
1035  if (GetEnzyme()->GetNonSpecific()) Missed = 1;
1036  else Missed = GetSettings()->GetMissedcleave()+1;
1038  int iMissed; // iterate thru missed cleavages
1040  int iSearch, hits;
1041  int endposition, position;
1043  // initialize fixed mods
1044  FixedMods.Init(GetSettings()->GetFixed(), Modset);
1045  MassArray.Init(FixedMods, GetSettings()->GetProductsearchtype(), Modset);
1047  GetSettings()->GetPrecursorsearchtype(), Modset);
1048  // initialize variable mods and set enzyme to use n-term methionine cleavage
1049  SetEnzyme()->SetNMethionine() =
1050  VariableMods.Init(GetSettings()->GetVariable(), Modset) ||
1053  const int *IntMassArray = MassArray.GetIntMass();
1054  const int *PrecursorIntMassArray = PrecursorMassArray.GetIntMass();
1055  const char *PepStart[MAXMISSEDCLEAVE];
1056  const char *PepEnd[MAXMISSEDCLEAVE];
1058  // contains informations on individual mod sites
1061  int NumMod[MAXMISSEDCLEAVE];
1062  // the number of modification sites. always less than NumMod.
1063  int NumModSites[MAXMISSEDCLEAVE];
1066  // calculated masses and masks
1069  // the number of masses and masks for each peptide
1070  int NumMassAndMask[MAXMISSEDCLEAVE];
1072  // set up mass array, indexed by missed cleavage
1073  // note that EndMasses is the end mass of peptide, kept separate to allow
1074  // reuse of Masses array in missed cleavage calc
1075  int Masses[MAXMISSEDCLEAVE];
1076  int EndMasses[MAXMISSEDCLEAVE];
1078  int iMod; // used to iterate thru modifications
1080  bool SequenceDone; // are we done iterating through the sequences?
1082  const CMSSearchSettings::TTaxids& Tax = GetSettings()->GetTaxids();
1083  CMSSearchSettings::TTaxids::const_iterator iTax;
1085  CMSHit NewHit; // a new hit of a ladder to an m/z value
1086  CMSHit *NewHitOut; // copy of new hit
1088  const TMassPeak *MassPeak; // peak currently in consideration
1089  CMSPeak* Peaks;
1090  CIntervalTree::const_iterator im; // iterates over interval tree
1092  // iterates over ladders
1093  TLadderMap::iterator Iter;
1095  {{
1096  CFastMutexGuard guard(PeakSetMutex);
1097  if (SharedPeakSet == null) {
1098  SharedPeakSet = new CMSPeakSet();
1100  }
1101  }}
1102  vector <int> taxids;
1103  vector <int>::iterator itaxids;
1104  bool TaxInfo(false); // check to see if any tax information in blast library
1105  bool iSearchNotDone(true);
1107  // iterate through sequences
1108  //for (iSearch = 0; rdfp->CheckOrFindOID(iSearch); iSearch++) {
1109  while (iSearchNotDone) {
1110  {{
1111  CFastMutexGuard guard(iSearchMutex);
1112  iSearchGlobal++;
1114  iSearchNotDone = false;
1115  continue;
1116  }
1117  iSearch = iSearchGlobal;
1118  if (iSearch % 10000 == 0) {
1119  if(Callback) Callback(Getnumseq(), iSearch, CallbackData);
1120  }
1121  }}
1123  // if oid restricted search, check to see if oid is in set
1124  if (GetRestrictedSearch() && SetOidSet().find(iSearch) == SetOidSet().end())
1125  continue;
1127  if (SetSettings()->IsSetTaxids()) {
1128  rdfp->GetTaxIDs(iSearch, taxids, false);
1129  for (itaxids = taxids.begin(); itaxids != taxids.end(); ++itaxids) {
1130  if (*itaxids == 0) continue;
1131  TaxInfo = true;
1132  for (iTax = Tax.begin(); iTax != Tax.end(); ++iTax) {
1133  if (*itaxids == *iTax) goto TaxContinue;
1134  }
1135  }
1136  continue;
1137  }
1138  TaxContinue:
1139  CSeqDBSequence Sequence(rdfp.GetPointer(), iSearch);
1140  SequenceDone = false;
1142  // initialize missed cleavage matrix
1143  for (iMissed = 0; iMissed < Missed; iMissed++) {
1144  PepStart[iMissed] = (const char *)-1; // mark start
1145  PepEnd[iMissed] = Sequence.GetData();
1146  Masses[iMissed] = 0;
1147  EndMasses[iMissed] = 0;
1148  NumMod[iMissed] = 0;
1149  NumModSites[iMissed] = 0;
1151  ModList[iMissed][0].Reset();
1152  }
1153  PepStart[Missed - 1] = Sequence.GetData();
1155  // if non-specific enzyme, set stop point
1156  if (SetEnzyme()->GetNonSpecific()) {
1157  SetEnzyme()->SetStop() = Sequence.GetData() + SetSettings()->GetMinnoenzyme() - 1;
1158  }
1160  // iterate thru the sequence by digesting it
1161  while (!SequenceDone) {
1164  // zero out no missed cleavage peptide mass and mods
1165  // note that Masses and EndMass are separate to reuse
1166  // masses during the missed cleavage calculation
1167  Masses[Missed - 1] = 0;
1168  EndMasses[Missed - 1] = 0;
1169  NumMod[Missed - 1] = 0;
1170  NumModSites[Missed - 1] = 0;
1171  // init no modification elements
1172  ModList[Missed - 1][0].Reset();
1174  // calculate new stop and mass
1175  SequenceDone =
1176  SetEnzyme()->CalcAndCut(Sequence.GetData(),
1177  Sequence.GetData() + Sequence.GetLength() - 1,
1178  &(PepEnd[Missed - 1]),
1179  &(Masses[Missed - 1]),
1180  NumMod[Missed - 1],
1181  MAXMOD,
1182  &(EndMasses[Missed - 1]),
1184  ModList[Missed - 1],
1185  IntMassArray,
1186  PrecursorIntMassArray,
1187  Modset,
1188  SetSettings()->GetMaxproductions()
1189  );
1191  // delete variable mods that overlap with fixed mods
1192  DeleteVariableOverlap(NumMod[Missed - 1],
1193  ModList[Missed - 1]);
1195  // count the number of unique sites modified
1196  CountModSites(NumModSites[Missed - 1],
1197  NumMod[Missed - 1],
1198  ModList[Missed - 1]);
1200  UpdateWithNewPep(Missed, PepStart, PepEnd, NumMod, ModList,
1201  Masses, EndMasses, NumModSites, Modset);
1203  CreateModCombinations(Missed, PepStart, Masses,
1204  EndMasses, NumMod, NumMassAndMask,
1205  NumModSites, ModList);
1208  int OldMass; // keeps the old peptide mass for comparison
1209  bool NoMassMatch; // was there a match to the old mass?
1211  for (iMissed = 0; iMissed < Missed; iMissed++) {
1212  if (PepStart[iMissed] == (const char *)-1) continue; // skip start
1214  // get the start and stop position, inclusive, of the peptide
1215  position = PepStart[iMissed] - Sequence.GetData();
1216  endposition = PepEnd[iMissed] - Sequence.GetData();
1218  // init bool for "Has ladder been calculated?"
1219  ClearLadderCalc(NumMassAndMask[iMissed]);
1221  OldMass = 0;
1222  NoMassMatch = true;
1224  // go thru total number of mods
1225  for (iMod = 0; iMod < NumMassAndMask[iMissed]; iMod++) {
1227  // have we seen this mass before?
1228  if (SetMassAndMask(iMissed, iMod).Mass == OldMass &&
1229  NoMassMatch) continue;
1230  NoMassMatch = true;
1231  OldMass = SetMassAndMask(iMissed, iMod).Mass;
1233  // return peaks where theoretical mass is <= precursor mass + tol
1234  // and >= precursor mass - tol
1235  if (!SetEnzyme()->GetTopDown())
1237  // if top-down enzyme, skip the interval tree match
1238  else
1241  for (; im; ++im ) {
1242  MassPeak = static_cast <const TMassPeak *> (im.GetValue().GetPointerOrNull());
1244  Peaks = MassPeak->Peak;
1245  // make sure we look thru other mod masks with the same mass
1246  NoMassMatch = false;
1248  if (!GetLadderCalc(iMod)) {
1249  if (CreateLadders(Sequence.GetData(),
1250  iSearch,
1251  position,
1252  endposition,
1253  Masses,
1254  iMissed,
1255  AA,
1256  iMod,
1257  ModList[iMissed],
1258  NumMod[iMissed]) != 0) continue;
1259  SetLadderCalc(iMod) = true;
1260  // continue to next sequence if ladders not successfully made
1261  }
1262  else {
1263  TLadderMap::iterator Iter;
1264  SetLadderContainer().Begin(Iter);
1265  while(Iter != SetLadderContainer().SetLadderMap().end()) {
1266  (*(Iter->second))[iMod]->ClearHits();
1267  SetLadderContainer().Next(Iter);
1268  }
1269  }
1271  if (UseRankScore) {
1272  {{
1273  CFastMutexGuard guard(PeaksExaminedMutex);
1274  Peaks->SetPeptidesExamined(MassPeak->Charge)++;
1275  }}
1276  }
1277  if (CompareLaddersTop(iMod,
1278  Peaks,
1279  MassPeak)
1280  ) {
1281  if (!UseRankScore) {
1282  {{
1283  CFastMutexGuard guard(PeaksExaminedMutex);
1284  Peaks->SetPeptidesExamined(MassPeak->Charge)++;
1285  }}
1286  }
1287  CompareLadders(iMod,
1288  Peaks,
1289  false,
1290  MassPeak);
1291  hits = 0;
1292  SetLadderContainer().Begin(Iter);
1293  while(Iter != SetLadderContainer().SetLadderMap().end()) {
1294  hits += (*(Iter->second))[iMod]->HitCount();
1295  SetLadderContainer().Next(Iter);
1296  }
1299  {{
1300  CFastMutexGuard guard(PeakSetMutex);
1301  if (hits >= SetSettings()->GetMinhit()) {
1302  // need to save mods. bool map?
1303  NewHit.SetHits() = hits;
1304  NewHit.SetCharge() = MassPeak->Charge;
1305  // only record if hit kept
1306  if (Peaks->AddHit(NewHit, NewHitOut)) {
1307  NewHitOut->SetStart() = position;
1308  NewHitOut->SetStop() = endposition;
1309  NewHitOut->SetSeqIndex() = iSearch;
1310  NewHitOut->SetExpMass() = MassPeak->Mass;
1311  // record the hits
1312  NewHitOut->
1313  RecordMatches(SetLadderContainer(),
1314  iMod,
1315  Peaks,
1316  SetMassAndMask(iMissed, iMod).Mask,
1317  ModList[iMissed],
1318  NumMod[iMissed],
1319  PepStart[iMissed],
1320  SetSettings()->GetSearchctermproduct(),
1321  SetSettings()->GetSearchb1(),
1322  SetMassAndMask(iMissed, iMod).Mass
1323  );
1324  }
1325  }
1326  }}
1327  } // new addition
1328  } // MassPeak
1329  } //iMod
1330  } // iMissed
1331  if (SetEnzyme()->GetNonSpecific()) {
1332  int NonSpecificMass(Masses[0] + EndMasses[0]);
1333  PartialLoop:
1335  // check that stop is within bounds
1336  //// upper bound is max precursor mass divided by lightest AA
1337  //// if(enzyme->GetStop() - PepStart[0] < MaxMZ/MonoMass[7]/MSSCALE &&
1338  // upper bound redefined so that minimum mass of existing peptide
1339  // is less than the max precursor mass minus the mass of glycine
1340  // assumes that any mods have positive mass
1342  // argghh, doesn't work for semi-tryptic, which resets the mass
1343  // need to use different criterion if semi-tryptic and start position was
1344  // moved. otherwise this criterion is OK
1345  if (NonSpecificMass < MaxMZ /*- MSSCALE2INT(MonoMass[7]) */&&
1346  SetEnzyme()->GetStop() < Sequence.GetData() + Sequence.GetLength() - 1 /*-1 added*/ &&
1347  (SetSettings()->GetMaxnoenzyme() == 0 ||
1348  SetEnzyme()->GetStop() - PepStart[0] + 1 < SetSettings()->GetMaxnoenzyme())
1349  ) {
1350  SetEnzyme()->SetStop()++;
1351  NonSpecificMass += PrecursorIntMassArray[AA.GetMap()[*(SetEnzyme()->GetStop())]];
1352  }
1353  // reset to new start with minimum size
1354  else if ( PepStart[0] < Sequence.GetData() + Sequence.GetLength() -
1355  SetSettings()->GetMinnoenzyme()) {
1356  PepStart[0]++;
1357  SetEnzyme()->SetStop() = PepStart[0] + SetSettings()->GetMinnoenzyme() - 1;
1359  // reset mass
1360  NonSpecificMass = 0;
1361  const char *iSeqChar;
1362  for (iSeqChar = PepStart[0]; iSeqChar <= SetEnzyme()->GetStop(); iSeqChar++)
1363  NonSpecificMass += PrecursorIntMassArray[AA.GetMap()[*iSeqChar]];
1364  // reset sequence done flag if at end of sequence
1365  SequenceDone = false;
1366  }
1367  else SequenceDone = true;
1369  // if this is partial tryptic, loop back if one end or the other is not tryptic
1370  // for start, need to check sequence before (check for start of seq)
1371  // for end, need to deal with end of protein case
1372  if (!SequenceDone && SetEnzyme()->GetCleaveNum() > 0 &&
1373  PepStart[0] != Sequence.GetData() &&
1374  SetEnzyme()->GetStop() != Sequence.GetData() + Sequence.GetLength() - 1 /* -1 added */ ) {
1375  if (!SetEnzyme()->CheckCleaveChar(PepStart[0]-1) &&
1377  goto PartialLoop;
1378  }
1380  PepEnd[0] = PepStart[0];
1381  }
1382  else {
1383  if (!SequenceDone) {
1384  int NumModCount;
1385  const char *OldSite;
1386  int NumModSitesCount;
1387  // get rid of longest peptide and move the other peptides down the line
1388  for (iMissed = 0; iMissed < Missed - 1; iMissed++) {
1389  // move masses to next missed cleavage
1390  Masses[iMissed] = Masses[iMissed + 1];
1391  // don't move EndMasses as they are recalculated
1393  // move the modification data
1394  NumModCount = 0;
1395  OldSite = 0;
1396  NumModSitesCount = 0;
1397  for (iMod = 0; iMod < NumMod[iMissed + 1]; iMod++) {
1398  // throw away the c term peptide mods as we have a new c terminus
1399  if (Modset->GetModType(ModList[iMissed + 1][iMod].GetEnum()) != eMSModType_modcp &&
1400  Modset->GetModType(ModList[iMissed + 1][iMod].GetEnum()) != eMSModType_modcpaa) {
1401  ModList[iMissed][NumModCount] = ModList[iMissed + 1][iMod];
1402  NumModCount++;
1403  // increment mod site count if new site and not fixed mod
1404  if (OldSite != ModList[iMissed + 1][iMod].GetSite() &&
1405  ModList[iMissed + 1][iMod].GetFixed() != 1) {
1406  NumModSitesCount++;
1407  OldSite = ModList[iMissed + 1][iMod].GetSite();
1408  }
1409  }
1410  }
1411  NumMod[iMissed] = NumModCount;
1412  NumModSites[iMissed] = NumModSitesCount;
1414  // copy starts to next missed cleavage
1415  PepStart[iMissed] = PepStart[iMissed + 1];
1416  }
1418  // init new start from old stop
1419  PepEnd[Missed-1] += 1;
1420  PepStart[Missed-1] = PepEnd[Missed-1];
1421  }
1422  }
1424  }
1427  }
1430  if (GetSettings()->IsSetTaxids() && !TaxInfo)
1431  ERR_POST(Warning <<
1432  "Taxonomically restricted search specified and no matching organisms found in sequence library. Did you use a sequence library with taxonomic information?");
1434  }
1435  catch (NCBI_NS_STD::exception& e) {
1436  ERR_POST(Info << "Exception caught in CSearch::Search: " << e.what());
1437  throw;
1438  }
1440  //return PeakSet;
1441 }
1443 ///
1444 /// Adds modification information to hitset
1445 ///
1448 {
1449  int i;
1450  for (i = 0; i < MSHit->GetNumModInfo(); i++) {
1451  // screen out fixed mods
1452  if (MSHit->GetModInfo(i).GetIsFixed() == 1) continue;
1453  CRef< CMSModHit > ModHit(new CMSModHit);
1454  ModHit->SetSite() = MSHit->GetModInfo(i).GetSite();
1455  ModHit->SetModtype() = MSHit->GetModInfo(i).GetModEnum() ;
1456  Hit->SetMods().push_back(ModHit);
1457  }
1458 }
1461 ///
1462 /// Adds ion information to hitset
1463 ///
1466 {
1467  int i;
1468  for (i = 0; i < MSHit->GetHits(); i++) {
1469  CRef<CMSMZHit> IonHit(new CMSMZHit);
1470  IonHit->SetIon() = MSHit->GetHitInfo(i).GetIonSeries();
1471  IonHit->SetCharge() = MSHit->GetHitInfo(i).GetCharge();
1472  IonHit->SetNumber() = MSHit->GetHitInfo(i).GetNumber();
1473  IonHit->SetMz() = MSHit->GetHitInfo(i).GetMZ();
1474  Hit->SetMzhits().push_back(IonHit);
1475  }
1476 }
1479 ///
1480 /// Makes a string hashed out of the sequence plus mods
1481 ///
1483 void CSearch::MakeModString(string& seqstring, string& modseqstring, CMSHit *MSHit)
1484 {
1485  int i;
1486  modseqstring = seqstring;
1487  for (i = 0; i < MSHit->GetNumModInfo(); i++) {
1488  modseqstring += NStr::IntToString(MSHit->GetModInfo(i).GetSite()) + ":" +
1489  NStr::IntToString(MSHit->GetModInfo(i).GetModEnum()) + ",";
1490  }
1491 }
1495  int Stop,
1496  string &seqstring,
1498 {
1499  int iseq;
1500  seqstring.erase();
1502  for (iseq = Start; iseq <= Stop; iseq++) {
1503  seqstring += UniqueAA[Sequence.GetData()[iseq]];
1504  }
1505 }
1509 {
1511  double ThreshStart = GetSettings()->GetCutlo();
1512  double ThreshEnd = GetSettings()->GetCuthi();
1513  double ThreshInc = GetSettings()->GetCutinc();
1514  double Evalcutoff = GetSettings()->GetCutoff();
1516  CMSPeak* Peaks;
1518  TScoreList ScoreList;
1519  TScoreList::iterator iScoreList;
1520  CMSHit * MSHit;
1522  // set the search library version
1525  // Reset the oid set for tracking results
1526  SetOidSet().clear();
1528  while(!PeakSet->GetPeaks().empty()) {
1529  Peaks = *(PeakSet->GetPeaks().begin());
1531  // add to hitset
1532  CRef< CMSHitSet > HitSet(null);
1534  // if iterative search, try to find hitset
1535  if (GetIterative()) {
1536  HitSet = SetResponse()->FindHitSet(Peaks->GetNumber());
1537  if (HitSet.IsNull())
1538  ERR_POST(Warning << "unable to find matching hitset");
1539  }
1541  // create a hitset if necessary
1542  if (HitSet.IsNull()) {
1543  HitSet = new CMSHitSet;
1544  if (!HitSet) {
1545  ERR_POST(Error << "omssa: unable to allocate hitset");
1546  return;
1547  }
1548  HitSet->SetNumber(Peaks->GetNumber());
1549  HitSet->SetIds() = Peaks->GetName();
1550  SetResponse()->SetHitsets().push_back(HitSet);
1551  }
1552  HitSet->SetSettingid() = GetSettings()->GetSettingid();
1554  // if there weren't enough peaks to do a search, note in error status
1555  if (Peaks->GetError() == eMSHitError_notenuffpeaks) {
1556  _TRACE("empty set");
1557  HitSet->SetError(eMSHitError_notenuffpeaks);
1558  ScoreList.clear();
1559  delete *(PeakSet->GetPeaks().begin());
1560  PeakSet->GetPeaks().pop_front();
1561  continue;
1562  }
1564  double Threshold, MinThreshold(ThreshStart), MinEval(1000000.0L);
1565  if (!UseRankScore) {
1566  // now calculate scores and sort
1567  for (Threshold = ThreshStart; Threshold <= ThreshEnd;
1568  Threshold += ThreshInc) {
1569  CalcNSort(ScoreList, Threshold, Peaks);
1570  if (!ScoreList.empty()) {
1571  _TRACE("Threshold = " << Threshold <<
1572  "EVal = " << ScoreList.begin()->first);
1573  }
1574  if (!ScoreList.empty() && ScoreList.begin()->first < MinEval) {
1575  MinEval = ScoreList.begin()->first;
1576  MinThreshold = Threshold;
1577  }
1578  ScoreList.clear();
1579  }
1580  }
1581  _TRACE("Min Threshold = " << MinThreshold);
1582  CalcNSort(ScoreList,
1583  MinThreshold,
1584  Peaks);
1586  // if iterative search, check to see if hitset needs to be replaced
1587  if (GetIterative() && !ScoreList.empty()) {
1588  if ((GetSettings()->GetIterativesettings().GetReplacethresh() == 0.0 &&
1589  (HitSet->GetHits().empty() ||
1590  ScoreList.begin()->first <= (*HitSet->GetHits().begin())->GetEvalue())) ||
1591  (GetSettings()->GetIterativesettings().GetReplacethresh() != 0.0 &&
1592  ScoreList.begin()->first <= GetSettings()->GetIterativesettings().GetReplacethresh())) {
1593  HitSet->SetHits().clear();
1594  }
1595  else {
1596  ScoreList.clear();
1597  delete *(PeakSet->GetPeaks().begin());
1598  PeakSet->GetPeaks().pop_front();
1599  continue;
1600  }
1601  }
1603  const CMSSearchSettings::TTaxids& Tax = GetSettings()->GetTaxids();
1604  CMSSearchSettings::TTaxids::const_iterator iTax;
1606  // keep a list of redundant peptides
1607  map <string, CMSHits * > PepDone;
1608  int HitNum(0);
1609  // add to hitset by score
1610  for (iScoreList = ScoreList.begin();
1611  iScoreList != ScoreList.end();
1612  ++iScoreList,++HitNum) {
1614  double Score = iScoreList->first;
1615  if (Score > Evalcutoff)
1616  continue;
1617  if(GetSettings()->CanGetReportedhitcount())
1618  if(GetSettings()->GetReportedhitcount() != 0 && HitNum >= GetSettings()->GetReportedhitcount())
1619  continue;
1621  CMSHits * Hit;
1622  CMSPepHit * Pephit;
1624  MSHit = iScoreList->second;
1626  CBlast_def_line_set::Tdata::const_iterator iDefLine;
1628  // scan taxids
1629  for (iDefLine = Hdr->Get().begin();
1630  iDefLine != Hdr->Get().end();
1631  ++iDefLine) {
1632  if (GetSettings()->IsSetTaxids()) {
1633  for (iTax = Tax.begin(); iTax != Tax.end(); iTax++) {
1634  if ((*iDefLine)->GetTaxid() == *iTax) goto TaxContinue2;
1635  }
1636  continue;
1637  }
1638  TaxContinue2:
1639  string seqstring, modseqstring;
1641  // keep a list of the oids
1642  SetOidSet().insert(MSHit->GetSeqIndex());
1643  // get the sequence
1646  string tempstartstop;
1647  CreateSequence(MSHit->GetStart(), MSHit->GetStop(),
1648  seqstring, Sequence);
1649  MakeModString(seqstring, modseqstring, MSHit);
1651  if (PepDone.find(modseqstring) != PepDone.end()) {
1652  Hit = PepDone[modseqstring];
1653  }
1654  else {
1655  Hit = new CMSHits;
1656  Hit->SetTheomass(MSHit->GetTheoreticalMass());
1657  Hit->SetPepstring(seqstring);
1658  // set the start AA, if there is one
1659  if (MSHit->GetStart() > 0) {
1660  tempstartstop = UniqueAA[Sequence.GetData()[MSHit->GetStart()-1]];
1661  Hit->SetPepstart(tempstartstop);
1662  }
1663  else Hit->SetPepstart("");
1665  // set the end AA, if there is one
1666  if (MSHit->GetStop() < Sequence.GetLength() - 1) {
1667  tempstartstop = UniqueAA[Sequence.GetData()[MSHit->GetStop()+1]];
1668  Hit->SetPepstop(tempstartstop);
1669  }
1670  else Hit->SetPepstop("");
1672  if (isnan(Score)) {
1673  ERR_POST(Info << "Not a number in hitset " <<
1674  HitSet->GetNumber() <<
1675  " peptide " << modseqstring);
1676  Score = kHighEval;
1677  }
1678  else if (!finite(Score)) {
1679  ERR_POST(Info << "Infinite number in hitset " <<
1680  HitSet->GetNumber() <<
1681  " peptide " << modseqstring);
1682  Score = kHighEval;
1683  }
1684  Hit->SetEvalue(Score);
1685  Hit->SetPvalue(Score/Peaks->
1686  GetPeptidesExamined(MSHit->
1687  GetCharge()));
1688  Hit->SetCharge(MSHit->GetCharge());
1689  Hit->SetMass(MSHit->GetExpMass());
1690  // insert mods here
1691  AddModsToHit(Hit, MSHit);
1692  // insert ions here
1693  AddIonsToHit(Hit, MSHit);
1694  CRef<CMSHits> hitref(Hit);
1695  HitSet->SetHits().push_back(hitref);
1696  PepDone[modseqstring] = Hit;
1698  }
1700  Pephit = new CMSPepHit;
1702  if ((*iDefLine)->CanGetSeqid()) {
1703  // find a gi
1704  ITERATE(list< CRef<CSeq_id> >, seqid, (*iDefLine)->GetSeqid()) {
1705  if ((**seqid).IsGi()) {
1706  Pephit->SetGi((**seqid).GetGi());
1707  break;
1708  }
1709  }
1711  Pephit->SetAccession(
1712  FindBestChoice((*iDefLine)->GetSeqid(), CSeq_id::Score)->
1713  GetSeqIdString(false));
1714  }
1717  Pephit->SetStart(MSHit->GetStart());
1718  Pephit->SetStop(MSHit->GetStop());;
1719  Pephit->SetDefline((*iDefLine)->GetTitle());
1720  Pephit->SetProtlength(Sequence.GetLength());
1721  Pephit->SetOid(MSHit->GetSeqIndex());
1722  CRef<CMSPepHit> pepref(Pephit);
1723  Hit->SetPephits().push_back(pepref);
1725  }
1726  }
1727  ScoreList.clear();
1728  delete *(PeakSet->GetPeaks().begin());
1729  PeakSet->GetPeaks().pop_front();
1730  }
1731  // write bioseqs to output
1732  WriteBioseqs();
1733 }
1737 {
1739  CConstRef <CMSBioseq::TSeq> Bioseq(SetResponse()->SetBioseqs().GetBioseqByOid(*iOids));
1740  if (Bioseq.IsNull()) {
1741  CRef <CMSBioseq> MSBioseq (new CMSBioseq);
1742  MSBioseq->SetSeq(*rdfp->GetBioseq(*iOids));
1743  MSBioseq->SetOid() = *iOids;
1744  SetResponse()->SetBioseqs().Set().push_back(MSBioseq);
1745  }
1746  }
1747 }
1751  int SeriesCharge,
1752  int Ion,
1753  int minintensity,
1754  int Which,
1755  CMSPeak *Peaks,
1756  int Maxproductions)
1757 {
1758  int iii;
1759  int lowmz(0), highmz;
1761  unsigned Size = Hit.GetStop() - Hit.GetStart();
1762  if (Maxproductions == 0) Maxproductions = kMSLadderMax;
1765  // decide if there is any terminal bias
1766  EMSTerminalBias TerminalBias(eMSNoTerminalBias);
1768  for(iii = 0; iii < GetEnzyme()->GetCleaveNum(); ++iii) {
1769  // n term
1770  if(GetEnzyme()->GetCleaveOffset()[iii] == 1 ) {
1771  // check to see if should be biases on both ends
1772  if(TerminalBias == eMSNTerminalBias || TerminalBias == eMSNoTerminalBias)
1773  TerminalBias = eMSNTerminalBias;
1774  else
1775  TerminalBias = eMSBothTerminalBias;
1776  }
1777  // c term
1778  else if (GetEnzyme()->GetCleaveOffset()[iii] == 0 ) {
1779  // check to see if should be biases on both ends
1780  if(TerminalBias == eMSCTerminalBias || TerminalBias == eMSNoTerminalBias)
1781  TerminalBias = eMSCTerminalBias;
1782  else
1783  TerminalBias = eMSBothTerminalBias;
1784  }
1785  }
1787 //#if 0
1788  // make a copy of the peptide sequence
1790  string seqstring;
1791  CreateSequence(Hit.GetStart(),
1792  Hit.GetStop(),
1793  seqstring,
1794  Sequence);
1795 //#endif
1796  bool NoProline = find(GetSettings()->GetNoprolineions().begin(),
1797  GetSettings()->GetNoprolineions().end(),
1798  Ion) !=
1799  GetSettings()->GetNoprolineions().end();
1800  // fill in the matched ions
1801  Hit.FillMatchedPeaks(SeriesCharge,
1802  Ion,
1803  Size,
1804  minintensity,
1805  false,
1806  TerminalBias,
1807  SeriesCharge*Maxproductions
1808 //#if 0
1809  ,
1810  seqstring,
1811  NoProline
1812 //#endif
1813  );
1814  CMSMatchedPeakSet *MatchPeakSet = Hit.SetIonSeriesMatchMap().SetSeries(SeriesCharge, Ion);
1815  TMatchedPeakSet::iterator bin, prev, next;
1817  for ( bin = MatchPeakSet->SetMatchedPeakSet().begin(); bin != MatchPeakSet->SetMatchedPeakSet().end(); ++bin) {
1818  // need to go thru match info, not hit info.
1819  if(bin != MatchPeakSet->SetMatchedPeakSet().begin()) {
1820  lowmz = ((*bin)->GetMZ() + (*prev)->GetMZ())/2;
1821  }
1822  next = bin;
1823  ++next;
1824  if(next != MatchPeakSet->SetMatchedPeakSet().end()) {
1825  highmz = ((*bin)->GetMZ() + (*next)->GetMZ())/2;
1826  }
1827  else highmz = Hit.GetExpMass()/SeriesCharge;
1828  (*bin)->SetExpIons() =
1829  Peaks->CountMZRange(lowmz,
1830  highmz,
1831  minintensity,
1832  Which) /
1833  (double)(highmz - lowmz);
1835  (*bin)->SetMassTolerance() = (Peaks->GetTol())/SeriesCharge;
1836  prev = bin;
1837  }
1838  return MatchPeakSet;
1839 }
1845  CMSHit& Hit,
1846  EMSPeakListTypes Which,
1847  int minintensity,
1848  const TSeriesChargePairList::const_iterator &iPairList,
1849  list<CMSMatchedPeakSet *> &Forward,
1850  list<CMSMatchedPeakSet *> &Backward)
1851 {
1852  CMSMatchedPeakSet * current;
1854  current = PepCharge(Hit,
1855  iPairList->first,
1856  iPairList->second,
1857  minintensity,
1858  Which,
1859  Peaks,
1860  GetSettings()->GetMaxproductions());
1862  if (kIonDirection[iPairList->second] == 1)
1863  Forward.push_back(current);
1864  else if (kIonDirection[iPairList->second] == -1)
1865  Backward.push_back(current);
1866 }
1869 void CSearch::DoubleCompare(list<CMSMatchedPeakSet *> &SingleForward,
1870  list<CMSMatchedPeakSet *> &SingleBackward,
1871  list<CMSMatchedPeakSet *> &Double,
1872  bool DoubleForward)
1873 {
1874  list<CMSMatchedPeakSet *>::iterator iDouble, iFront, iBack;
1876  for (iDouble = Double.begin(); iDouble != Double.end(); ++iDouble) {
1878  for(iFront = SingleForward.begin(); iFront != SingleForward.end(); ++iFront) {
1879  (*iDouble)->Compare(*iFront, DoubleForward);
1880  }
1882  for(iBack = SingleBackward.begin(); iBack != SingleBackward.end(); ++iBack) {
1883  (*iDouble)->Compare(*iBack, !DoubleForward);
1884  }
1885  }
1886 }
1890  double Threshold,
1891  CMSPeak* Peaks
1892  )
1893 {
1894  int iCharges;
1895  int iHitList;
1896  int Tophitnum = GetSettings()->GetTophitnum();
1898  for (iCharges = 0; iCharges < Peaks->GetNumCharges(); iCharges++) {
1900  TMSHitList& HitList = Peaks->GetHitList(iCharges);
1901  for (iHitList = 0; iHitList != Peaks->GetHitListIndex(iCharges);
1902  iHitList++) {
1904  int tempMass = HitList[iHitList].GetExpMass();
1905  int Charge = HitList[iHitList].GetCharge();
1906  EMSPeakListTypes Which = Peaks->GetWhich(Charge);
1908  // set up new score
1911  // minimum intensity
1912  int minintensity = static_cast <int> (Threshold * Peaks->GetMaxI(Which));
1915  TSeriesChargePairList::const_iterator iPairList;
1916  list <CMSMatchedPeakSet *> SingleForward, SingleBackward, DoubleForward, DoubleBackward;
1918  for (iPairList = SetLadderContainer().GetSeriesChargePairList().begin();
1919  iPairList != SetLadderContainer().GetSeriesChargePairList().end();
1920  ++iPairList) {
1922  // charge 1
1923  if (iPairList->first == 1) {
1924  MatchAndSort(Peaks, HitList[iHitList], Which, minintensity,
1925  iPairList, SingleForward, SingleBackward);
1926  }
1927  else if (Charge >= Peaks->GetConsiderMult()) {
1928  MatchAndSort(Peaks, HitList[iHitList], Which, minintensity,
1929  iPairList, DoubleForward, DoubleBackward);
1930  }
1931  }
1933  list <CMSMatchedPeakSet *> ::iterator iFront, iBack, iDouble;
1935  if(GetSettings()->GetNocorrelationscore() == 0) {
1936  // do the singly charge comparison
1937  for (iFront = SingleForward.begin(); iFront != SingleForward.end(); ++iFront) {
1938  for(iBack = SingleBackward.begin(); iBack != SingleBackward.end(); ++iBack) {
1939  (*iFront)->Compare(*iBack, false);
1940  }
1941  }
1942  if (Charge >= Peaks->GetConsiderMult()) {
1943  DoubleCompare(SingleForward, SingleBackward, DoubleForward, true);
1944  DoubleCompare(SingleForward, SingleBackward, DoubleBackward, false);
1945  }
1946  }
1949  double adjust = HitList[iHitList].GetMaxDelta() /
1950  MSSCALE2INT(GetSettings()->GetMsmstol());
1951  if(adjust < GetSettings()->GetAutomassadjust())
1952  adjust = GetSettings()->GetAutomassadjust();
1953  if(adjust > 1.0)
1954  adjust = 1.0;
1955  double a =
1956  HitList[iHitList].CalcPoissonMean(GetSettings()->GetProbfollowingion(),
1957  GetEnzyme()->GetCleaveNum(),
1958  GetSettings()->GetProbfollowingion(),
1959  19,
1960  adjust);
1962  if (a == 0) {
1963  // threshold probably too high
1964  continue;
1965  }
1966  if (a < 0 ) {
1967  _TRACE("poisson mean is < 0");
1968  continue;
1969  }
1970  else if (isnan(a) || !finite(a)) {
1971  ERR_POST(Info << "poisson mean is NaN or is infinite");
1972  continue;
1973  }
1975  // keep going if obviously insignificant
1976  if (HitList[iHitList].GetHits() < a) continue;
1978  double pval; // statistical p-value
1979  int N; // number of peptides
1980  N = Peaks->GetPeptidesExamined(Charge) +
1981  (GetSettings()->GetZdep() * (Charge - 1) + 1) *
1982  GetSettings()->GetPseudocount();
1984  if (!UseRankScore) {
1985  int High, Low, NumPeaks, NumLo, NumHi;
1986  Peaks->HighLow(High, Low, NumPeaks, tempMass, Charge, Threshold, NumLo, NumHi);
1988  double TopHitProb = ((double)Tophitnum)/NumPeaks;
1989  // correct for situation where more tophits than experimental peaks
1990  if (TopHitProb > 1.0) TopHitProb = 1.0;
1991  int numhits = HitList[iHitList].CountHits(Threshold, Peaks->GetMaxI(Which));
1992  double Normal = HitList[iHitList].CalcNormalTopHit(a, TopHitProb);
1993  pval = HitList[iHitList].CalcPvalueTopHit(a, numhits, Normal, TopHitProb);
1994  }
1995  else {
1996  pval = HitList[iHitList].CalcPvalue(a, HitList[iHitList].CountHits(Threshold, Peaks->GetMaxI(Which)));
1997  }
1998  if (UseRankScore && !GetPoissonOnly()) {
1999  if (HitList[iHitList].GetM() != 0.0) {
2000  double Perf = HitList[iHitList].CalcRankProb();
2001  _TRACE( "Perf=" << Perf << " pval=" << pval << " N=" << N );
2002  pval *= Perf;
2003  pval *= 10.0; // correction to scales
2004  }
2005  else ERR_POST(Info << "M is zero");
2006  }
2007  double eval = 3e3 * pval * N;
2008 // _TRACE( " pval=" << pval << " eval=" << eval );
2009  ScoreList.insert(pair<const double, CMSHit *>
2010  (eval, &(HitList[iHitList])));
2011  }
2012  }
2013 }
2016 {
2017 }
