NCBI C++ ToolKit
msms.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: msms.hpp 84663 2018-11-27 18:22:00Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the authors in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Lewis Y. Geer
27  *
28  * File Description:
29  * Helper classes for ms search algorithms
30  *
31  * ===========================================================================
32  */
33 
34 #ifndef MSMS__HPP
35 #define MSMS__HPP
36 
37 #ifdef WIN32
38 #pragma warning(disable:4786)
39 #endif
40 
41 #include <list>
42 #include <iostream>
43 #include <fstream>
44 #include <string>
45 #include <set>
46 #include <deque>
47 #include <map>
51 #include "Mod.hpp"
52 #include "SpectrumSet.hpp"
53 
54 // #include <corelib/ncbistd.hpp>
55 
58 BEGIN_SCOPE(omssa)
59 
60 
61 // non-redundified integer intervals of amino acids
63 
64 // ABCXYZ ion mass calculation constants. See Papayannopoulos, pg 63.
65 
66 /** mass of water */
67 const double kWater = 18.010565;
68 
69 /** neutron mass */
70 const double kNeutron = 1.008664904;
71 
72 // const double AAAbundance[] = {1.0, 0.0758, 1.0, 0.0167, 0.0528, 0.0635, 0.0408, 0.0683, 0.0224, 0.058, 0.0593, 0.0943, 0.0237, 0.0447, 0.0491, 0.0399, 0.0514, 0.0715, 0.0569, 0.0656, 0.0124, 1.0, 0.0318, 1.0, 1.0, 1.0, 0.0};
73 
74 // masses taken from Papayannopoulos, IA, Mass Spectrometry Reviews, 1995, 14, 49-73.
75 // selenocysteine calculated by using cysteine mass and adding difference between Se and S from webelements.
76 // monoisotopic mass
77 const double MonoMass[] = {0.0, 71.03711, 0.0, 103.00919, 115.02694, 129.04259, 147.06841, 57.02147, 137.05891, 113.08406, 128.09496, 113.08406, 131.04049, 114.04293, 97.05276, 128.05858, 156.10111, 87.03203, 101.04768, 99.06841, 186.07931, 0.0, 163.06333, 0.0, 149.903 , 0.0, 113.08406, 237.14776, 0.0 };
78 // average mass
79 const double AverageMass[] = {0.0, 71.08, 0.0, 103.15, 115.09, 129.12, 147.18, 57.05, 137.14, 113.16, 128.17, 113.16, 131.20, 114.10, 97.12, 128.13, 156.19, 87.08, 101.11, 99.13, 186.21, 0.0, 163.18, 0.0, 150.044, 0.0, 113.16, 237.30, 0.0 };
80 // n15 enriched monoisotopic mass
81 const double MonoN15Mass[] = {0.0, 72.034144893, 0.0, 104.006224893, 116.023974893, 130.039624893, 148.065444893, 58.018494893, 140.050014679, 114.081094893, 130.089029786, 114.081094893, 132.037524893, 116.036999786, 98.049794893, 130.052649786, 160.089239572, 88.029064893, 102.044714893, 100.065444893, 188.073379786, 0.0, 164.060364893, 0.0, 150.8964, 0.0, 114.081094893, 240.1388649, 0.0 };
82 
83 
84 // const int AAIntervals[] = { 57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 150, 156, 163, 186 };
85 
86 
87 const double kTermMass[] = {1.007825, 1.007825, 1.007825, 17.00274, 17.00274, 17.00274, 0, 0, 0, 0, 1.007825, 17.00274, 1.007825};
88 const double kIonTypeMass[] = { -27.994915, 0.0, 17.02655, 27.994915, 2.01565, -14.003075, 0, 0, 0, 0, -26.98709, -15.9994, -70.981405};
89 
90 /////////////////////////////////////////////////////////////////////////////
91 //
92 // CMassArray::
93 //
94 // Holds AA indexed mass array
95 //
96 
98 public:
99  CMassArray(void) {};
100 
101  const double * const GetMass(void) const;
102  const int * const GetIntMass(void) const;
103 
104  //! initialize mass arrays with fixed mods
105  void Init(const CMSSearchSettings::TProductsearchtype &SearchType);
106  // initialize mass arrays with fixed mods
107  void Init(const CMSMod &Mods,
108  const CMSSearchSettings::TProductsearchtype &SearchType,
109  CRef <CMSModSpecSet> Modset);
110 private:
111  // inits mass arrays
112  void x_Init(const CMSSearchSettings::TProductsearchtype &SearchType);
113  // masses as doubles
114  double CalcMass[kNumUniqueAA];
115  // mass in scaled integer Daltons
116  int IntCalcMass[kNumUniqueAA];
117  // Se mass is 78.96, S is 32.066
118 };
119 
120 /////////////////// CMassArray inline methods
121 
122 inline const double * const CMassArray::GetMass(void) const
123 {
124  return CalcMass;
125 }
126 
127 inline const int * const CMassArray::GetIntMass(void) const
128 {
129  return IntCalcMass;
130 }
131 
132 /////////////////// end of CMassArray inline methods
133 
134 
135 /////////////////////////////////////////////////////////////////////////////
136 //
137 // CAA::
138 //
139 // lookup table for AA index
140 //
141 
142 // lookup table for reversing an AA character to AA number
144 public:
145  CAA(void);
146 
147  /**
148  * return the map for translating AA char to AA number
149  */
150  const char * const GetMap(void) const;
151 
152 private:
153  char AAMap[256];
154 };
155 
156 
157 /////////////////// CAA inline methods
158 
159 inline
160 const char * const CAA::GetMap(void) const
161 {
162  return AAMap;
163 }
164 
165 /////////////////// end of CAA inline methods
166 
167 /**
168  * contains information for a post translational modification
169  * at a particular sequence site
170  */
171 
173 public:
174 
175  /**
176  * type for a site on a sequence
177  */
178 
179  typedef const char * TSite;
180 
181  /**
182  * type for masses
183  */
184  typedef int TMass;
185 
186  /**
187  * what is the type of the mod?
188  */
189  typedef int TEnum;
190 
191  /**
192  * is the mod fixed?
193  */
194  typedef int TFixed;
195 
196  /**
197  * default constructor
198  */
199  CMod(void);
200 
201  /**
202  * copy constructor
203  */
204  CMod(const CMod &Old);
205 
206  /**
207  * assignment
208  */
209  const CMod& operator= (const CMod& rhs);
210 
211  /**
212  * reset to default values
213  */
214  void Reset(void);
215 
216  /**
217  * Get the site position
218  */
219  TSite GetSite(void) const;
220 
221  /**
222  * Set the site postion
223  */
224  TSite& SetSite(void);
225 
226  /**
227  * Get the mass to be added to the precursor mass
228  */
229  TMass GetPrecursorDelta(void) const;
230 
231  /**
232  * Set the site postion
233  */
234  TMass& SetPrecursorDelta(void);
235 
236  /**
237  * Get the mass to be added to the product mass
238  */
239  TMass GetProductDelta(void) const;
240 
241  /**
242  * Set the site postion
243  */
244  TMass& SetProductDelta(void);
245 
246  /**
247  * Get mod type
248  */
249  TEnum GetEnum(void) const;
250 
251  /**
252  * Set the mod type
253  */
254  TEnum& SetEnum(void);
255 
256  /**
257  * Is the mod fixed?
258  */
259  TFixed GetFixed(void) const;
260 
261  /**
262  * set mod state (1 = fixed)
263  */
264  TFixed& SetFixed(void);
265 
266 private:
267  /**
268  * the position within the peptide of a variable modification
269  */
270  const char *Site;
271 
272  /**
273  * the modification mass for the precursor
274  */
276 
277  /**
278  * the modification mass for the product
279  */
281 
282  /**
283  * the modification type (used for saving for output)
284  */
285  int ModEnum;
286 
287  /**
288  * track fixed mods, 1 == fixed
289  */
290  int IsFixed;
291 };
292 
293 /**
294  * default constructor
295  */
296 inline
298 {
299  Reset();
300 }
301 
302 /**
303  * reset to default values
304  */
305 inline
306 void CMod::Reset(void)
307 {
308  Site = (const char *)-1;
309  PrecursorDelta = 0;
310  ProductDelta = 0;
311  ModEnum = 0;
312  IsFixed = 0;
313 }
314 
315 /**
316  * copy constructor
317  */
318 inline
319 CMod::CMod(const CMod &Old)
320 {
321  *this = Old;
322 }
323 
324 /**
325  * assignment
326  */
327 inline
328 const CMod& CMod::operator= (const CMod& rhs)
329 {
330  Site = rhs.Site;
333  ModEnum = rhs.ModEnum;
334  IsFixed = rhs.IsFixed;
335 
336  return *this;
337 }
338 
339 /**
340  * Get the site position
341  */
342 inline
344 {
345  return Site;
346 }
347 
348 /**
349  * Set the site postion
350  */
351 inline
353 {
354  return Site;
355 }
356 
357 /**
358  * Get the mass to be added to the precursor mass
359  */
360 inline
362 {
363  return PrecursorDelta;
364 }
365 
366 /**
367  * Set the site postion
368  */
369 inline
371 {
372  return PrecursorDelta;
373 }
374 
375 /**
376  * Get the mass to be added to the product mass
377  */
378 inline
380 {
381  return ProductDelta;
382 }
383 
384 /**
385  * Set the site postion
386  */
387 inline
389 {
390  return ProductDelta;
391 }
392 
393 /**
394  * Get mod type
395  */
396 inline
398 {
399  return ModEnum;
400 }
401 
402 /**
403  * Set the mod type
404  */
405 inline
407 {
408  return ModEnum;
409 }
410 
411 /**
412  * Is the mod fixed?
413  */
414 inline
416 {
417  return IsFixed;
418 }
419 
420 /**
421  * set mod state (1 = fixed)
422  */
423 inline
425 {
426  return IsFixed;
427 }
428 
429 
430 
431 /**
432  * generic exception class for omssa
433  */
434 
436  public:
437  /// Error types that subsystem can generate.
438  enum EErrCode {
439  eMSParseException, ///< unable to parse COMSSASearch
440  eMSNoMatchException, ///< unmatched sequence library
441  eMSLadderNotFound ///< ladder not found in CLadderContainer
442  };
443 
444  /// Translate from the error code value to its string representation.
445  virtual const char* GetErrCodeString(void) const override
446  {
447  switch (GetErrCode()) {
448  case eMSParseException: return "unable to parse COMSSASearch";
449  case eMSNoMatchException: return "unmatched sequence library";
450  case eMSLadderNotFound: return "ladder not found in CLadderContainer";
451  default: return CException::GetErrCodeString();
452  }
453  }
454 
455  // Standard exception boilerplate code.
457 };
458 
459 
460 /////////////////////////////////////////////////////////////////////////////
461 //
462 // CCleave::
463 //
464 // Classes for cleaving sequences quickly and computing masses
465 //
466 
467 typedef std::deque <int> TCleave;
468 
470 public:
471  CCleave(void);
472 
473  /**
474  * cleaves the sequence. Note that output is 0 and the positions
475  * of the aa's to be cleaved. Should be interpreted as [0, pos1],
476  * (pos1, pos2], ..., (posn, end]. This weirdness is historical --
477  * the C++ string class uses an identifier for end-of-string and has
478  * no identifier for before start of string.
479  *
480  * @param SeqStart pointer to start of sequence
481  * @param SeqEnd pointer to end of sequence
482  * @param PepStart ** to the start of peptide
483  * @param Masses cumulative masses of peptides
484  * @param NumMod number of variable mods
485  * @param MaxNumMod upper bound on number of variable mods
486  * @param EndMass the end masses of the peptides
487  * @param VariableMods list of variable mods
488  * @param FixedMods list of fixed modifications
489  * @param ModList mod site info
490  * @param IntCalcMass integer AA masses
491  * @param PrecursorIntCalcMass integer precursor masses
492  * @param Modset list of possible mods
493  * @param Maxproductions max number of product ions to calculate
494  *
495  * @return are we at the end of the sequence?
496  */
497  bool CalcAndCut(const char *SeqStart,
498  const char *SeqEnd, // the end, not beyond the end
499  const char **PepStart, // return value
500  int *Masses, // Masses, indexed by miss cleav, mods
501  int& NumMod, // num Mods
502  int MaxNumMod, // max num mods
503  int *EndMasses,
504  CMSMod &VariableMods,
505  CMSMod &FixedMods,
506  CMod ModList[],
507  const int *IntCalcMass, // array of int AA masses
508  const int *PrecursorIntCalcMass, // precursor masses
509  CRef <CMSModSpecSet> &Modset,
510  int Maxproductions
511  );
512 
513 
514  /**
515  * Check to see if we are at a cleavage point
516  * Used by CalcAndCut
517  *
518  * @param iPepStart pointer to location of sequence cursor
519  * @param iSeqStart points to start of the sequence
520  */
521 
522  bool CheckCleave(const char *iPepStart, const char *iSeqStart);
523 
524 
525  /**
526  * is the character given one of the cleavage chars?
527  *
528  * @param iPepStart position in the sequence
529  *
530  */
531  bool CheckCleaveChar(const char *iPepStart) const;
532 
533 
534  void CalcMass(char SeqChar,
535  int *Masses,
536  const int *IntCalcMass
537  );
538 
539  void EndMass(int *Masses
540  );
541 
542  int findfirst(char* Seq, int Pos, int SeqLen);
543 
544  ///
545  /// looks for non-specific ptms
546  ///
547  void CheckNonSpecificMods(EMSModType ModType, // the type of mod
548  CMSMod &VariableMods, // list of mods to look for
549  int& NumMod, // number of mods applied to peptide
550  int MaxNumMod, // maximum mods for a peptide
551  CMod ModList[], // list of mod sites
552  const char *iPepStart, // position in protein
553  bool setfixed,
554  CRef <CMSModSpecSet> &Modset
555  );
556 
557  ///
558  /// looks for amino acid specific ptms
559  ///
560  void CheckAAMods(EMSModType ModType, // the type of mod
561  CMSMod &VariableMods, // list of mods to look for
562  int& NumMod, // number of mods applied to peptide
563  char SeqChar, // the amino acid
564  int MaxNumMod, // maximum mods for a peptide
565  CMod ModList[], // list of mod sites
566  const char *iPepStart, // position in protein
567  bool setfixed,
568  CRef <CMSModSpecSet> &Modset
569  );
570 
571  /**
572  * checks all mods for a particular type
573  */
574  void CheckMods(EMSModType NonSpecificIn, EMSModType Specific,
575  CMSMod &VariableMods, CMSMod &FixedMods,
576  int& NumMod, char SeqChar, int MaxNumMod,
577  CMod ModList[],
578  const char *iPepStart,
579  CRef <CMSModSpecSet> &Modset);
580 
581  /**
582  * Is the enzyme really a top-down search?
583  */
584  bool GetTopDown(void) const;
585 
586  /**
587  * Get the enzyme stop value
588  */
589  const char * GetStop(void) const;
590 
591  /**
592  * Set the enzyme stop value
593  */
594  const char * & SetStop(void);
595 
596  /**
597  * Is this a non-specific search?
598  */
599  bool GetNonSpecific(void) const;
600 
601  /**
602  * Get the number of cleavage chars
603  */
604  int GetCleaveNum(void) const;
605 
606  /**
607  * Get the the cleave offset, 0 = cterm, 1 = nterm
608  */
609  const char * GetCleaveOffset(void) const;
610 
611  /**
612  * Is there n-term methionine cleavage?
613  */
614  bool GetNMethionine(void) const;
615 
616  /**
617  * Set n-term methionine cleavage
618  */
619  bool& SetNMethionine(void);
620 
621  const string GetCleaveAt(void) const;
622 
623  bool GetCheckProline(void) const;
624 
625  const char * GetCleaveSense(void) const;
626 
627 protected:
628  int ProtonMass; // mass of the proton
629  int TermMass; // mass of h2o
631 
632  /**
633  * where to cleave. last two letters are in readdb format, assuming
634  * it uses the UniqueAA alphabet
635  */
636  const char *CleaveAt;
637 
638  /**
639  * what is the cleavage offset
640  */
641  const char *CleaveOffset;
642 
643  /**
644  * How many cleavage characters
645  */
646  int kCleave;
647 
648  /**
649  * TopDown
650  * does this signify a top-down search
651  */
652  bool TopDown;
653 
654  /**
655  * Stop
656  * Stop position for no-enzyme and semi-tryptic searches
657  */
658  const char *Stop;
659 
660  /**
661  * Is this a non-specific search?
662  */
664 
665  /**
666  * Should we apply the proline rule (no cleavage before proline)
667  */
669 
670  /**
671  * n-terminal methionine cleavage
672  */
674 };
675 
676 
677 /////////////////// CCleave inline methods
678 
679 /**
680  * What are the cleavage chars?
681  *
682  */
683 inline
684 const string CCleave::GetCleaveAt(void) const
685 {
686  string out;
688  return out;
689 }
690 
691 /**
692  * Should we check for proline?
693  *
694  */
695 inline
696 bool CCleave::GetCheckProline(void) const
697 {
698  return CheckProline;
699 }
700 
701 /**
702  * Should we check for proline?
703  *
704  */
705 inline
706 const char * CCleave::GetCleaveSense(void) const
707 {
708  if (CleaveOffset[0] == 0) return "C";
709  else return "N";
710 }
711 
712 
713 /**
714  * is the character given one of the cleavage chars?
715  *
716  * @param iPepStart position in the sequence
717  *
718  */
719 inline
720 bool CCleave::CheckCleaveChar(const char *iPepStart) const
721 {
722  int j;
723  for(j = 0; j < kCleave; j++)
724  if(*(iPepStart + CleaveOffset[j]) == CleaveAt[j]) return true;
725  return false;
726 }
727 
728 
729 /**
730  * Check to see if we are at a cleavage point
731  * Used by CalcAndCut
732  *
733  * @param iPepStart pointer to location of sequence cursor
734  */
735 inline
736 bool CCleave::CheckCleave(const char *iPepStart, const char *iSeqStart)
737 {
738  // methionine cleavage
739  // (allowed even if TopDown or NonSpecific)
740  if(iPepStart == iSeqStart && NMethionine && *(iPepStart) == '\x0c') {
741  return true;
742  }
743 
744  if(TopDown) return false; // todo: methionine cleavage allowed
745 
746  if(NonSpecific) {
747  if(iPepStart == GetStop()) return true;
748  return false; // todo: methionine cleavage allowed
749  }
750 
751  // check specific cleave amino acids
752  if(CheckCleaveChar(iPepStart)) {
753  if(CheckProline && *(iPepStart+1) == '\x0e' )
754  return false; // not before proline
755  return true;
756  }
757  return false;
758 }
759 
760 
761 inline
762 void CCleave::CalcMass(char SeqChar,
763  int *Masses,
764  const int *IntCalcMass
765  )
766 {
767  *Masses += IntCalcMass[ReverseAA.GetMap()[SeqChar]];
768 }
769 
770 
771 inline
772 void CCleave::EndMass( int *EndMasses
773  )
774 {
775  *EndMasses = TermMass;
776 }
777 
778 
779 inline
780 void CCleave::CheckAAMods(EMSModType ModType, CMSMod &VariableMods, int& NumMod,
781  char SeqChar, int MaxNumMod, CMod ModList[],
782  const char *iPepStart,
783  bool setfixed,
784  CRef <CMSModSpecSet> &Modset)
785 {
786  // iterator thru mods VariableMods.GetAAMods(ModType)
787  size_t iMods;
788  int iChar;
789 
790  for (iMods = 0;
791  iMods < VariableMods.GetAAMods(ModType).size(); ++iMods) {
792  for (iChar = 0; iChar < Modset->GetModNumChars(VariableMods.GetAAMods(ModType)[iMods]); ++iChar) {
793  if (SeqChar == Modset->GetModChar(VariableMods.GetAAMods(ModType)[iMods], iChar) && NumMod < MaxNumMod) {
794  ModList[NumMod].SetSite() = iPepStart;
795  ModList[NumMod].SetPrecursorDelta() = Modset->GetModMass(VariableMods.GetAAMods(ModType)[iMods]);
796  ModList[NumMod].SetProductDelta() = Modset->GetNeutralLoss(VariableMods.GetAAMods(ModType)[iMods]);
797  ModList[NumMod].SetEnum() = VariableMods.GetAAMods(ModType)[iMods];
798  if (setfixed) ModList[NumMod].SetFixed() = 1;
799  else ModList[NumMod].SetFixed() = 0;
800  NumMod++;
801  }
802  }
803  }
804 }
805 
806 
807 inline
808 void CCleave::CheckNonSpecificMods(EMSModType ModType, CMSMod &VariableMods,
809  int& NumMod, int MaxNumMod,
810  CMod ModList[],
811  const char *iPepStart,
812  bool setfixed,
813  CRef <CMSModSpecSet> &Modset)
814 {
815  // iterator thru mods
816  size_t iMods;
817 
818  for (iMods = 0;
819  iMods < VariableMods.GetAAMods(ModType).size(); ++iMods) {
820  if (NumMod < MaxNumMod) {
821  ModList[NumMod].SetSite() = iPepStart;
822  ModList[NumMod].SetPrecursorDelta() = Modset->GetModMass(VariableMods.GetAAMods(ModType)[iMods]);
823  ModList[NumMod].SetProductDelta() = Modset->GetNeutralLoss(VariableMods.GetAAMods(ModType)[iMods]);
824  ModList[NumMod].SetEnum() = VariableMods.GetAAMods(ModType)[iMods];
825  if (setfixed) ModList[NumMod].SetFixed() = 1;
826  else ModList[NumMod].SetFixed() = 0;
827  NumMod++;
828  }
829  }
830 }
831 
832 inline
833 void CCleave::CheckMods(EMSModType NonSpecificIn, EMSModType Specific,
834  CMSMod &VariableMods, CMSMod &FixedMods,
835  int& NumMod, char SeqChar, int MaxNumMod,
836  CMod ModList[],
837  const char *iPepStart,
838  CRef <CMSModSpecSet> &Modset)
839 {
840  // check non-specific mods
841  CheckNonSpecificMods(NonSpecificIn, VariableMods, NumMod, MaxNumMod, ModList,
842  iPepStart, false, Modset);
843  CheckNonSpecificMods(NonSpecificIn, FixedMods, NumMod, MaxNumMod, ModList,
844  iPepStart, true, Modset);
845  // check specific mods
846  CheckAAMods(Specific, VariableMods, NumMod, SeqChar, MaxNumMod, ModList,
847  iPepStart, false, Modset);
848  // fix
849  CheckAAMods(Specific, FixedMods, NumMod, SeqChar, MaxNumMod, ModList,
850  iPepStart, true, Modset);
851 }
852 
853 inline
854 bool CCleave::GetTopDown(void) const
855 {
856  return TopDown;
857 }
858 
859 inline
860 bool CCleave::GetNonSpecific(void) const
861 {
862  return NonSpecific;
863 }
864 
865 inline
866 const char * CCleave::GetStop(void) const
867 {
868  return Stop;
869 }
870 
871 inline
872 const char * & CCleave::SetStop(void)
873 {
874  return Stop;
875 }
876 
877 inline
878 int CCleave::GetCleaveNum(void) const
879 {
880  return kCleave;
881 }
882 
883 inline
884 const char * CCleave::GetCleaveOffset(void) const
885 {
886  return CleaveOffset;
887 }
888 
889 inline
890 bool CCleave::GetNMethionine(void) const
891 {
892  return NMethionine;
893 }
894 
895 
896 inline
898 {
899  return NMethionine;
900 }
901 
902 
903 /////////////////// end of CCleave inline methods
904 
905 
906 
908 public:
909  CCNBr(void);
910 };
911 
912 
914 public:
915  CFormicAcid(void);
916 };
917 
918 
920 public:
921  CTrypsin(void);
922 };
923 
924 
926 public:
927  CArgC(void);
928 };
929 
930 
932 public:
933  CChymotrypsin(void);
934 };
935 
936 
938 public:
939  CLysC(void);
940 };
941 
942 
944 public:
945  CLysCP(void);
946 };
947 
948 
950 public:
951  CPepsinA(void);
952 };
953 
954 
956 public:
957  CTrypCNBr(void);
958 };
959 
960 
962 public:
963  CTrypChymo(void);
964 };
965 
966 
968 public:
969  CTrypsinP(void);
970 };
971 
972 
973 //! whole protein (no cleavage)
975 public:
976  CWholeProtein(void);
977 };
978 
979 
980 //! Asp-N, Nterm of D
982 public:
983  CAspN(void);
984 };
985 
986 
987 //! Glu-C, Cterm of E
989 public:
990  CGluC(void);
991 };
992 
993 //! Glu-C and Asp-N
995 public:
996  CGluCAspN(void);
997 };
998 
999 
1000 /**
1001  * eMSEnzymes_top_down
1002  * top-down search of ETD spectra
1003  *
1004  */
1005 
1007 public:
1008  CTopDown(void);
1009 };
1010 
1011 
1012 /**
1013  * eMSEnzymes_semi_tryptic
1014  * semi tryptic search (one end of peptide has to be tryptic)
1015  *
1016  */
1017 
1019 public:
1020  CSemiTryptic(void);
1021 };
1022 
1023 
1024 /**
1025  * eMSEnzymes_no_enzyme
1026  * search without enzyme (precursor mass only)
1027  *
1028  */
1029 
1031 public:
1032  CNoEnzyme(void);
1033 };
1034 
1035 
1036 /**
1037  * eMSEnzymes_chymotrypsin_p
1038  * chymotrypsin without proline rule
1039  *
1040  */
1041 
1043 public:
1044  CChymoP(void);
1045 };
1046 
1047 /**
1048  * eMSEnzymes_aspn_de
1049  * Asp-N that cuts at D and E
1050  *
1051  */
1052 
1054 public:
1055  CAspNDE(void);
1056 };
1057 
1058 
1059 /**
1060  * eMSEnzymes_gluc_de
1061  * Glu-C that cuts at D and E
1062  *
1063  */
1064 
1066 public:
1067  CGluCDE(void);
1068 };
1069 
1070 
1071 
1072 /**
1073  * eMSEnzymes_lysn
1074  * Cuts N term of lysine
1075  *
1076  */
1077 
1079 public:
1080  CLysN(void);
1081 };
1082 
1083 /**
1084  * eMSEnzymes_thermolysin_p
1085  * cuts N terminal at A,F,I,L,M or V
1086  *
1087  */
1088 
1090 public:
1091  CThermolysinP(void);
1092 };
1093 
1094 /**
1095  *
1096  * Chymotrypsin, cuts N terminal at A,F,I,L,M or V
1097  *
1098  */
1099 
1101 public:
1102  CSemiChymotrypsin(void);
1103 };
1104 
1105 /**
1106  *
1107  * Glu-C, Cterm of E
1108  *
1109  */
1110 
1112 public:
1113  CSemiGluC(void);
1114 };
1115 
1116 
1117 
1118 ///
1119 /// factory to return back object for enzyme
1120 ///
1121 
1123 {
1124 public:
1125  static CRef <CCleave> CleaveFactory(const EMSEnzymes enzyme);
1126 
1127 };
1128 
1129 
1130 END_SCOPE(omssa)
1133 
1134 #endif
const int kNumUniqueAA
@MSMod.hpp User-defined methods of the data storage class.
Definition: MSMod.hpp:53
Definition: msms.hpp:143
const char *const GetMap(void) const
return the map for translating AA char to AA number
Definition: msms.hpp:160
char AAMap[256]
Definition: msms.hpp:153
Definition: msms.hpp:925
eMSEnzymes_aspn_de Asp-N that cuts at D and E
Definition: msms.hpp:1053
Asp-N, Nterm of D.
Definition: msms.hpp:981
Definition: msms.hpp:907
eMSEnzymes_chymotrypsin_p chymotrypsin without proline rule
Definition: msms.hpp:1042
factory to return back object for enzyme
Definition: msms.hpp:1123
const char *& SetStop(void)
Set the enzyme stop value.
Definition: msms.hpp:872
const char * CleaveOffset
what is the cleavage offset
Definition: msms.hpp:641
bool CheckProline
Should we apply the proline rule (no cleavage before proline)
Definition: msms.hpp:668
void CheckNonSpecificMods(EMSModType ModType, CMSMod &VariableMods, int &NumMod, int MaxNumMod, CMod ModList[], const char *iPepStart, bool setfixed, CRef< CMSModSpecSet > &Modset)
looks for non-specific ptms
Definition: msms.hpp:808
bool CheckCleave(const char *iPepStart, const char *iSeqStart)
Check to see if we are at a cleavage point Used by CalcAndCut.
Definition: msms.hpp:736
bool GetNonSpecific(void) const
Is this a non-specific search?
Definition: msms.hpp:860
const char * Stop
Stop Stop position for no-enzyme and semi-tryptic searches.
Definition: msms.hpp:658
bool NMethionine
n-terminal methionine cleavage
Definition: msms.hpp:673
void CalcMass(char SeqChar, int *Masses, const int *IntCalcMass)
Definition: msms.hpp:762
bool & SetNMethionine(void)
Set n-term methionine cleavage.
Definition: msms.hpp:897
const char * GetCleaveOffset(void) const
Get the the cleave offset, 0 = cterm, 1 = nterm.
Definition: msms.hpp:884
void CheckMods(EMSModType NonSpecificIn, EMSModType Specific, CMSMod &VariableMods, CMSMod &FixedMods, int &NumMod, char SeqChar, int MaxNumMod, CMod ModList[], const char *iPepStart, CRef< CMSModSpecSet > &Modset)
checks all mods for a particular type
Definition: msms.hpp:833
int ProtonMass
Definition: msms.hpp:628
const char * GetStop(void) const
Get the enzyme stop value.
Definition: msms.hpp:866
bool NonSpecific
Is this a non-specific search?
Definition: msms.hpp:663
bool GetCheckProline(void) const
Should we check for proline?
Definition: msms.hpp:696
bool GetNMethionine(void) const
Is there n-term methionine cleavage?
Definition: msms.hpp:890
int TermMass
Definition: msms.hpp:629
void EndMass(int *Masses)
Definition: msms.hpp:772
const string GetCleaveAt(void) const
What are the cleavage chars?
Definition: msms.hpp:684
const char * CleaveAt
where to cleave.
Definition: msms.hpp:636
bool GetTopDown(void) const
Is the enzyme really a top-down search?
Definition: msms.hpp:854
bool CheckCleaveChar(const char *iPepStart) const
is the character given one of the cleavage chars?
Definition: msms.hpp:720
const char * GetCleaveSense(void) const
Should we check for proline?
Definition: msms.hpp:706
int GetCleaveNum(void) const
Get the number of cleavage chars.
Definition: msms.hpp:878
bool TopDown
TopDown does this signify a top-down search.
Definition: msms.hpp:652
int kCleave
How many cleavage characters.
Definition: msms.hpp:646
void CheckAAMods(EMSModType ModType, CMSMod &VariableMods, int &NumMod, char SeqChar, int MaxNumMod, CMod ModList[], const char *iPepStart, bool setfixed, CRef< CMSModSpecSet > &Modset)
looks for amino acid specific ptms
Definition: msms.hpp:780
CAA ReverseAA
Definition: msms.hpp:630
Glu-C and Asp-N.
Definition: msms.hpp:994
eMSEnzymes_gluc_de Glu-C that cuts at D and E
Definition: msms.hpp:1065
Glu-C, Cterm of E.
Definition: msms.hpp:988
Definition: msms.hpp:943
Definition: msms.hpp:937
eMSEnzymes_lysn Cuts N term of lysine
Definition: msms.hpp:1078
int GetNeutralLoss(int Mod) const
get neutral loss
int GetModNumChars(int Mod) const
get the number of modification AA's
char GetModChar(int Mod, int Number) const
get modification AA's
int GetModMass(int Mod) const
get modification mass
Definition: Mod.hpp:61
const TModLists & GetAAMods(EMSModType Type) const
Definition: Mod.hpp:85
double CalcMass[kNumUniqueAA]
Definition: msms.hpp:114
CMassArray(void)
Definition: msms.hpp:99
const int *const GetIntMass(void) const
Definition: msms.hpp:127
const double *const GetMass(void) const
Definition: msms.hpp:122
int IntCalcMass[kNumUniqueAA]
Definition: msms.hpp:116
contains information for a post translational modification at a particular sequence site
Definition: msms.hpp:172
TMass & SetProductDelta(void)
Set the site postion.
Definition: msms.hpp:388
TMass & SetPrecursorDelta(void)
Set the site postion.
Definition: msms.hpp:370
int TMass
type for masses
Definition: msms.hpp:184
int IsFixed
track fixed mods, 1 == fixed
Definition: msms.hpp:290
TSite GetSite(void) const
Get the site position.
Definition: msms.hpp:343
int TFixed
is the mod fixed?
Definition: msms.hpp:194
const char * Site
the position within the peptide of a variable modification
Definition: msms.hpp:270
TMass GetPrecursorDelta(void) const
Get the mass to be added to the precursor mass.
Definition: msms.hpp:361
TFixed & SetFixed(void)
set mod state (1 = fixed)
Definition: msms.hpp:424
CMod(void)
default constructor
Definition: msms.hpp:297
int ProductDelta
the modification mass for the product
Definition: msms.hpp:280
TEnum GetEnum(void) const
Get mod type.
Definition: msms.hpp:397
int TEnum
what is the type of the mod?
Definition: msms.hpp:189
TMass GetProductDelta(void) const
Get the mass to be added to the product mass.
Definition: msms.hpp:379
void Reset(void)
reset to default values
Definition: msms.hpp:306
TSite & SetSite(void)
Set the site postion.
Definition: msms.hpp:352
int PrecursorDelta
the modification mass for the precursor
Definition: msms.hpp:275
int ModEnum
the modification type (used for saving for output)
Definition: msms.hpp:285
TEnum & SetEnum(void)
Set the mod type.
Definition: msms.hpp:406
const char * TSite
type for a site on a sequence
Definition: msms.hpp:179
TFixed GetFixed(void) const
Is the mod fixed?
Definition: msms.hpp:415
const CMod & operator=(const CMod &rhs)
assignment
Definition: msms.hpp:328
eMSEnzymes_no_enzyme search without enzyme (precursor mass only)
Definition: msms.hpp:1030
generic exception class for omssa
Definition: msms.hpp:435
NCBI_EXCEPTION_DEFAULT(COMSSAException, CException)
EErrCode
Error types that subsystem can generate.
Definition: msms.hpp:438
@ eMSLadderNotFound
ladder not found in CLadderContainer
Definition: msms.hpp:441
@ eMSNoMatchException
unmatched sequence library
Definition: msms.hpp:440
@ eMSParseException
unable to parse COMSSASearch
Definition: msms.hpp:439
virtual const char * GetErrCodeString(void) const override
Translate from the error code value to its string representation.
Definition: msms.hpp:445
CObject –.
Definition: ncbiobj.hpp:180
Chymotrypsin, cuts N terminal at A,F,I,L,M or V.
Definition: msms.hpp:1100
Glu-C, Cterm of E.
Definition: msms.hpp:1111
eMSEnzymes_semi_tryptic semi tryptic search (one end of peptide has to be tryptic)
Definition: msms.hpp:1018
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
@ e_Ncbieaa
Definition: sequtil.hpp:57
@ e_Ncbistdaa
Definition: sequtil.hpp:58
eMSEnzymes_thermolysin_p cuts N terminal at A,F,I,L,M or V
Definition: msms.hpp:1089
eMSEnzymes_top_down top-down search of ETD spectra
Definition: msms.hpp:1006
whole protein (no cleavage)
Definition: msms.hpp:974
unsigned short Pos
static void Init(void)
Definition: cursor6.c:76
std::ofstream out("events_result.xml")
main entry point for tests
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
#define EXCEPTION_VIRTUAL_BASE
Do not use virtual base classes in exception declaration at all, because in this case derived class s...
Definition: ncbiexpt.hpp:1388
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOMSSA_EXPORT
Definition: ncbi_export.h:1355
EMSModType
enumerate modification types
Definition: MSModType_.hpp:64
EMSEnzymes
enumerate enzymes
Definition: MSEnzymes_.hpp:64
const double kNeutron
neutron mass
Definition: msms.hpp:70
const double MonoMass[]
Definition: msms.hpp:77
const double kIonTypeMass[]
Definition: msms.hpp:88
std::deque< int > TCleave
Definition: msms.hpp:467
const double AverageMass[]
Definition: msms.hpp:79
const double MonoN15Mass[]
Definition: msms.hpp:81
const int kNumAAIntervals
Definition: msms.hpp:62
const double kWater
mass of water
Definition: msms.hpp:67
const double kTermMass[]
Definition: msms.hpp:87
const struct ncbi::grid::netcache::search::fields::SIZE size
#define const
Definition: zconf.h:230
Modified on Fri Dec 08 08:21:08 2023 by modify_doxy.py rev. 669887