62 const string& UserModFileName,
72 FileName = DirEntry.
GetDir() + ModFileName;
73 else FileName = ModFileName;
74 unique_ptr<CObjectIStream>
77 ERR_POST(
Fatal <<
"ommsacl: unable to open modification file" <<
84 }
catch (NCBI_NS_STD::exception& e) {
86 FileName <<
" with error " << e.what());
90 if(UserModFileName !=
"") {
94 FileName = DirEntry.
GetDir() + UserModFileName;
95 else FileName = UserModFileName;
96 unique_ptr<CObjectIStream>
98 if(usermodsin->fail()) {
105 Modset->
Append(*UserModset);
106 }
catch (NCBI_NS_STD::exception& e) {
107 ERR_POST(
Fatal <<
"Unable to read user modification file " <<
108 FileName <<
" with error " << e.what());
118 ifstream taxnames(Filename.c_str());
120 list<string> linelist;
121 list<string>::iterator ilist;
122 while(taxnames && !taxnames.eof()) {
123 getline(taxnames, line);
126 if(!linelist.empty()) {
127 ilist = linelist.begin();
158 ERR_POST(
Fatal <<
" omssacl: not able to open spectrum file " <<
164 (*MySearch.
SetRequest().begin())->SetSpectra(*SpectrumSet);
165 return SpectrumSet->LoadFile(FileType, PeakFile);
178 unique_ptr<CObjectIStream>
180 in->Open(Filename.c_str(), DataFormat);
198 unique_ptr <CNcbiIfstream> raw_in;
199 unique_ptr <CCompressionIStream> compress_in;
200 unique_ptr <CObjectIStream>
in;
226 bool* SearchEngineIterative)
228 string Filename(InFile->GetInfile());
232 switch (DataFormat) {
241 if(SearchEngineIterative) *SearchEngineIterative =
true;
245 if(SearchEngineIterative) *SearchEngineIterative =
true;
266 const string Filename,
271 unique_ptr <CNcbiOfstream> raw_out;
272 unique_ptr <CCompressionOStream> compress_out;
273 unique_ptr <CObjectOStream> txt_out;
302 CMSSearchSettings::TOutfiles::const_iterator iOutFile;
304 for(iOutFile = OutFiles.begin(); iOutFile != OutFiles.end(); ++iOutFile) {
305 string Filename((*iOutFile)->GetOutfile());
310 unique_ptr <CObjectOStream> txt_out;
320 switch (DataFormat) {
327 (*iOutFile)->GetIncluderequest(),
334 (*iOutFile)->GetIncluderequest(),
342 *file_out << outPepXML;
348 oscsv.open(Filename.c_str());
349 (*MySearch.
SetResponse().begin())->PrintCSV(oscsv, Modset);
356 ERR_POST(
Error <<
"Unknown output file format " << DataFormat);
368 list <string> ValidError;
369 if(Settings->
Validate(ValidError) != 0) {
370 list <string>::iterator iErr;
371 for(iErr = ValidError.begin(); iErr != ValidError.end(); iErr++)
382 if(FileName !=
"" ) {
384 unique_ptr<CObjectIStream>
386 if(paramsin->fail()) {
387 ERR_POST(
Fatal <<
"ommsacl: unable to open parameter file" <<
394 }
catch (NCBI_NS_STD::exception& e) {
396 FileName <<
" with error " << e.what());
415 RestrictedSearch(
false)
431 if (!blastdb)
return 0;
455 bool NoProline = find(
GetSettings()->GetNoprolineions().begin(),
459 if (!(*(Iter->second))[iMod]->
490 int ChargeLimitLo(0), ChargeLimitHi(0);
504 vector<bool> usedPeaks(Peaks->
SetPeakLists()[Which]->GetNum(),
false);
518 int ChargeLimitLo(0), ChargeLimitHi(0);
533 if(Peaks->
CompareTop(*((*(Iter->second))[iMod])))
return true;
543 if (
GetSettings()->GetIterativesettings().GetResearchthresh() != 0.0) {
547 if (HitSet.
IsNull())
return true;
548 if (HitSet->GetHits().empty())
return true;
549 if ((*HitSet->GetHits().begin())->GetEvalue() <=
550 GetSettings()->GetIterativesettings().GetResearchthresh())
566 CSpectrumSet::Tdata::const_iterator iSpectrum;
569 iSpectrum =
GetRequest()->GetSpectra().Get().begin();
570 for (; iSpectrum !=
GetRequest()->GetSpectra().Get().end(); iSpectrum++) {
579 transform(Spectrum->SetCharge().begin(), Spectrum->SetCharge().end(), Spectrum->SetCharge().begin(),
PositiveSign);
594 ofstream os(
"test.dta");
609 Numisotopes, Pepppm,
GetSettings()->GetChargehandling().GetNegative());
631 for (
i = 0;
i < NumMod;
i++) {
633 if (ModList[
i].GetFixed() != 1) {
635 for (j = 0; j < NumMod; j++) {
637 if (ModList[j].GetFixed() == 1 &&
638 ModList[
i].GetSite() == ModList[j].GetSite()) {
647 for (
i = 0;
i < NumMod;) {
648 if (ModList[
i].GetFixed() == -1) {
651 if (
i == NumMod)
return;
653 for (j=
i; j < NumMod; ++j) {
654 ModList[j] = ModList[j+1];
664 const char *PepStart[],
665 const char *PepEnd[],
682 for (iMissed = 0; iMissed < Missed - 1; iMissed++) {
684 if (PepStart[iMissed] == (
const char *)-1)
continue;
686 PepEnd[iMissed] = PepEnd[Missed - 1];
696 const char *OldSite(0);
697 int NumModSitesCount(0), NumModCount(0);
698 for (iMod = 0; iMod < NumMod[Missed-1]; iMod++) {
701 if (NumModCount + NumMod[iMissed] >=
MAXMOD)
break;
706 PepStart[iMissed] != ModList[Missed-1][iMod].GetSite()) {
717 ModList[iMissed][NumModCount + NumMod[iMissed]] =
718 ModList[Missed-1][iMod];
721 if (OldSite != ModList[iMissed][NumModCount + NumMod[iMissed]].GetSite() &&
722 ModList[iMissed][NumModCount + NumMod[iMissed]].GetFixed() != 1) {
724 OldSite = ModList[iMissed][NumModCount + NumMod[iMissed]].GetSite();
734 Masses[iMissed] += Masses[Missed - 1];
737 EndMasses[iMissed] = EndMasses[Missed - 1];
740 NumMod[iMissed] += NumModCount;
743 NumModSites[iMissed] += NumModSitesCount;
761 const char *OldSite(0);
763 for (
i = 0;
i < NumMod;
i++) {
765 if (ModList[
i].GetSite() != OldSite && ModList[
i].GetFixed() != 1 ) {
775 const char *PepStart[],
779 int NumMassAndMask[],
792 unsigned Mask, MassOfMask;
805 for (iMissed = 0; iMissed < Missed; iMissed++) {
807 if (PepStart[iMissed] == (
const char *)-1)
continue;
812 Masses[iMissed] + EndMasses[iMissed];
815 int NumVariable(NumMod[iMissed]);
818 for (iMod = 0; iMod < NumMod[iMissed]; iMod++) {
819 if (ModList[iMissed][iMod].GetFixed()) {
820 SetMassAndMask(iMissed, iModCount).
Mass += ModList[iMissed][iMod].GetPrecursorDelta();
826 NumFixed = NumMod[iMissed] - NumVariable;
830 for (iMod = 0; iMod < NumModSites[iMissed] && iModCount <
MaxModPerPep; iMod++) {
837 NumModSites[iMissed], ModList[iMissed]);
842 for (iiMod = 0; iiMod <= iMod; iiMod++ )
843 MassOfMask += ModList[iMissed][ModIndex[iiMod + NumFixed]].GetPrecursorDelta();
850 printf(
"NumMod = %d iMod = %d, Mask = \n", NumMod[iMissed], iMod);
852 for (iii=NumMod[iMissed]-1; iii >= 0; iii--) {
853 if (Mask & 1 << iii) printf(
"1");
863 NumModSites[iMissed], ModList[iMissed]));
869 for (ii = 0; ii < iModCount; ++ii) {
882 NumMassAndMask[iMissed] = iModCount;
891 ERR_POST(
Fatal <<
"omssa: at least one ions series to search need to be specified");
893 CMSSearchSettings::TIonstosearch::const_iterator
i;
904 int MaxLadderSize =
GetSettings()->GetMaxproductions();
909 list <EMSIonSeries> ::const_iterator iIons;
911 for (iIons = Ions.begin(); iIons != Ions.end(); ++iIons) {
912 for(
i = 1;
i <=
GetSettings()->GetChargehandling().GetMaxproductcharge(); ++
i) {
924 if (
GetSettings()->GetIterativesettings().GetSubsetthresh() != 0.0) {
927 GetOidsBelowThreshold(
929 GetSettings()->GetIterativesettings().GetSubsetthresh());
963 return new bool(
true);
1017 "number of sequences in search library is not the same as previously searched. Unable to do iterative search.");
1027 list <EMSIonSeries> Ions;
1035 if (
GetEnzyme()->GetNonSpecific()) Missed = 1;
1041 int endposition, position;
1083 CMSSearchSettings::TTaxids::const_iterator iTax;
1102 vector <int> taxids;
1103 vector <int>::iterator itaxids;
1104 bool TaxInfo(
false);
1105 bool iSearchNotDone(
true);
1109 while (iSearchNotDone) {
1114 iSearchNotDone =
false;
1118 if (iSearch % 10000 == 0) {
1119 if(Callback) Callback(
Getnumseq(), iSearch, CallbackData);
1129 for (itaxids = taxids.begin(); itaxids != taxids.end(); ++itaxids) {
1130 if (*itaxids == 0)
continue;
1132 for (iTax = Tax.begin(); iTax != Tax.end(); ++iTax) {
1133 if (*itaxids == *iTax)
goto TaxContinue;
1140 SequenceDone =
false;
1143 for (iMissed = 0; iMissed < Missed; iMissed++) {
1144 PepStart[iMissed] = (
const char *)-1;
1145 PepEnd[iMissed] =
Sequence.GetData();
1146 Masses[iMissed] = 0;
1147 EndMasses[iMissed] = 0;
1148 NumMod[iMissed] = 0;
1149 NumModSites[iMissed] = 0;
1151 ModList[iMissed][0].
Reset();
1153 PepStart[Missed - 1] =
Sequence.GetData();
1161 while (!SequenceDone) {
1167 Masses[Missed - 1] = 0;
1168 EndMasses[Missed - 1] = 0;
1169 NumMod[Missed - 1] = 0;
1170 NumModSites[Missed - 1] = 0;
1172 ModList[Missed - 1][0].
Reset();
1178 &(PepEnd[Missed - 1]),
1179 &(Masses[Missed - 1]),
1182 &(EndMasses[Missed - 1]),
1184 ModList[Missed - 1],
1186 PrecursorIntMassArray,
1193 ModList[Missed - 1]);
1198 ModList[Missed - 1]);
1201 Masses, EndMasses, NumModSites, Modset);
1204 EndMasses, NumMod, NumMassAndMask,
1205 NumModSites, ModList);
1211 for (iMissed = 0; iMissed < Missed; iMissed++) {
1212 if (PepStart[iMissed] == (
const char *)-1)
continue;
1215 position = PepStart[iMissed] -
Sequence.GetData();
1216 endposition = PepEnd[iMissed] -
Sequence.GetData();
1225 for (iMod = 0; iMod < NumMassAndMask[iMissed]; iMod++) {
1229 NoMassMatch)
continue;
1244 Peaks = MassPeak->
Peak;
1246 NoMassMatch =
false;
1258 NumMod[iMissed]) != 0)
continue;
1266 (*(Iter->second))[iMod]->ClearHits();
1294 hits += (*(Iter->second))[iMod]->HitCount();
1306 if (Peaks->
AddHit(NewHit, NewHitOut)) {
1308 NewHitOut->
SetStop() = endposition;
1332 int NonSpecificMass(Masses[0] + EndMasses[0]);
1345 if (NonSpecificMass <
MaxMZ &&
1360 NonSpecificMass = 0;
1361 const char *iSeqChar;
1362 for (iSeqChar = PepStart[0]; iSeqChar <=
SetEnzyme()->
GetStop(); iSeqChar++)
1363 NonSpecificMass += PrecursorIntMassArray[AA.
GetMap()[*iSeqChar]];
1365 SequenceDone =
false;
1367 else SequenceDone =
true;
1373 PepStart[0] !=
Sequence.GetData() &&
1380 PepEnd[0] = PepStart[0];
1383 if (!SequenceDone) {
1385 const char *OldSite;
1386 int NumModSitesCount;
1388 for (iMissed = 0; iMissed < Missed - 1; iMissed++) {
1390 Masses[iMissed] = Masses[iMissed + 1];
1396 NumModSitesCount = 0;
1397 for (iMod = 0; iMod < NumMod[iMissed + 1]; iMod++) {
1401 ModList[iMissed][NumModCount] = ModList[iMissed + 1][iMod];
1404 if (OldSite != ModList[iMissed + 1][iMod].GetSite() &&
1405 ModList[iMissed + 1][iMod].
GetFixed() != 1) {
1407 OldSite = ModList[iMissed + 1][iMod].
GetSite();
1411 NumMod[iMissed] = NumModCount;
1412 NumModSites[iMissed] = NumModSitesCount;
1415 PepStart[iMissed] = PepStart[iMissed + 1];
1419 PepEnd[Missed-1] += 1;
1420 PepStart[Missed-1] = PepEnd[Missed-1];
1432 "Taxonomically restricted search specified and no matching organisms found in sequence library. Did you use a sequence library with taxonomic information?");
1435 catch (NCBI_NS_STD::exception& e) {
1436 ERR_POST(
Info <<
"Exception caught in CSearch::Search: " << e.what());
1456 Hit->
SetMods().push_back(ModHit);
1486 modseqstring = seqstring;
1502 for (iseq = Start; iseq <= Stop; iseq++) {
1528 while(!PeakSet->
GetPeaks().empty()) {
1529 Peaks = *(PeakSet->
GetPeaks().begin());
1549 HitSet->SetIds() = Peaks->
GetName();
1552 HitSet->SetSettingid() =
GetSettings()->GetSettingid();
1559 delete *(PeakSet->
GetPeaks().begin());
1564 double Threshold, MinThreshold(ThreshStart), MinEval(1000000.0L);
1567 for (Threshold = ThreshStart; Threshold <= ThreshEnd;
1568 Threshold += ThreshInc) {
1570 if (!ScoreList.
empty()) {
1571 _TRACE(
"Threshold = " << Threshold <<
1572 "EVal = " << ScoreList.
begin()->first);
1574 if (!ScoreList.
empty() && ScoreList.
begin()->first < MinEval) {
1575 MinEval = ScoreList.
begin()->first;
1576 MinThreshold = Threshold;
1581 _TRACE(
"Min Threshold = " << MinThreshold);
1588 if ((
GetSettings()->GetIterativesettings().GetReplacethresh() == 0.0 &&
1589 (HitSet->GetHits().empty() ||
1590 ScoreList.
begin()->first <= (*HitSet->GetHits().begin())->GetEvalue())) ||
1591 (
GetSettings()->GetIterativesettings().GetReplacethresh() != 0.0 &&
1592 ScoreList.
begin()->first <=
GetSettings()->GetIterativesettings().GetReplacethresh())) {
1593 HitSet->SetHits().clear();
1597 delete *(PeakSet->
GetPeaks().begin());
1604 CMSSearchSettings::TTaxids::const_iterator iTax;
1610 for (iScoreList = ScoreList.
begin();
1611 iScoreList != ScoreList.
end();
1612 ++iScoreList,++HitNum) {
1614 double Score = iScoreList->first;
1615 if (Score > Evalcutoff)
1624 MSHit = iScoreList->second;
1626 CBlast_def_line_set::Tdata::const_iterator iDefLine;
1629 for (iDefLine = Hdr->
Get().begin();
1630 iDefLine != Hdr->
Get().end();
1633 for (iTax = Tax.begin(); iTax != Tax.end(); iTax++) {
1634 if ((*iDefLine)->GetTaxid() == *iTax)
goto TaxContinue2;
1639 string seqstring, modseqstring;
1646 string tempstartstop;
1651 if (PepDone.
find(modseqstring) != PepDone.
end()) {
1652 Hit = PepDone[modseqstring];
1674 HitSet->GetNumber() <<
1675 " peptide " << modseqstring);
1678 else if (!
finite(Score)) {
1680 HitSet->GetNumber() <<
1681 " peptide " << modseqstring);
1686 GetPeptidesExamined(MSHit->
1695 HitSet->SetHits().push_back(hitref);
1696 PepDone[modseqstring] = Hit;
1702 if ((*iDefLine)->CanGetSeqid()) {
1705 if ((**seqid).IsGi()) {
1706 Pephit->
SetGi((**seqid).GetGi());
1728 delete *(PeakSet->
GetPeaks().begin());
1743 MSBioseq->SetOid() = *iOids;
1759 int lowmz(0), highmz;
1762 if (Maxproductions == 0) Maxproductions =
kMSLadderMax;
1768 for(iii = 0; iii <
GetEnzyme()->GetCleaveNum(); ++iii) {
1770 if(
GetEnzyme()->GetCleaveOffset()[iii] == 1 ) {
1778 else if (
GetEnzyme()->GetCleaveOffset()[iii] == 0 ) {
1796 bool NoProline = find(
GetSettings()->GetNoprolineions().begin(),
1807 SeriesCharge*Maxproductions
1815 TMatchedPeakSet::iterator bin,
prev,
next;
1820 lowmz = ((*bin)->GetMZ() + (*prev)->GetMZ())/2;
1825 highmz = ((*bin)->GetMZ() + (*next)->GetMZ())/2;
1828 (*bin)->SetExpIons() =
1833 (double)(highmz - lowmz);
1835 (*bin)->SetMassTolerance() = (Peaks->
GetTol())/SeriesCharge;
1838 return MatchPeakSet;
1848 const TSeriesChargePairList::const_iterator &iPairList,
1849 list<CMSMatchedPeakSet *> &Forward,
1850 list<CMSMatchedPeakSet *> &Backward)
1863 Forward.push_back(current);
1865 Backward.push_back(current);
1870 list<CMSMatchedPeakSet *> &SingleBackward,
1871 list<CMSMatchedPeakSet *> &Double,
1874 list<CMSMatchedPeakSet *>::iterator iDouble, iFront, iBack;
1876 for (iDouble = Double.begin(); iDouble != Double.end(); ++iDouble) {
1878 for(iFront = SingleForward.begin(); iFront != SingleForward.end(); ++iFront) {
1879 (*iDouble)->Compare(*iFront, DoubleForward);
1882 for(iBack = SingleBackward.begin(); iBack != SingleBackward.end(); ++iBack) {
1883 (*iDouble)->Compare(*iBack, !DoubleForward);
1898 for (iCharges = 0; iCharges < Peaks->
GetNumCharges(); iCharges++) {
1904 int tempMass = HitList[iHitList].
GetExpMass();
1905 int Charge = HitList[iHitList].
GetCharge();
1912 int minintensity =
static_cast <int> (Threshold * Peaks->
GetMaxI(Which));
1915 TSeriesChargePairList::const_iterator iPairList;
1916 list <CMSMatchedPeakSet *> SingleForward, SingleBackward, DoubleForward, DoubleBackward;
1923 if (iPairList->first == 1) {
1924 MatchAndSort(Peaks, HitList[iHitList], Which, minintensity,
1925 iPairList, SingleForward, SingleBackward);
1928 MatchAndSort(Peaks, HitList[iHitList], Which, minintensity,
1929 iPairList, DoubleForward, DoubleBackward);
1933 list <CMSMatchedPeakSet *> ::iterator iFront, iBack, iDouble;
1937 for (iFront = SingleForward.begin(); iFront != SingleForward.end(); ++iFront) {
1938 for(iBack = SingleBackward.begin(); iBack != SingleBackward.end(); ++iBack) {
1939 (*iFront)->Compare(*iBack,
false);
1943 DoubleCompare(SingleForward, SingleBackward, DoubleForward,
true);
1944 DoubleCompare(SingleForward, SingleBackward, DoubleBackward,
false);
1967 _TRACE(
"poisson mean is < 0");
1971 ERR_POST(
Info <<
"poisson mean is NaN or is infinite");
1976 if (HitList[iHitList].
GetHits() <
a)
continue;
1985 int High, Low, NumPeaks, NumLo, NumHi;
1986 Peaks->
HighLow(High, Low, NumPeaks, tempMass, Charge, Threshold, NumLo, NumHi);
1988 double TopHitProb = ((double)Tophitnum)/NumPeaks;
1990 if (TopHitProb > 1.0) TopHitProb = 1.0;
1991 int numhits = HitList[iHitList].
CountHits(Threshold, Peaks->
GetMaxI(Which));
1996 pval = HitList[iHitList].
CalcPvalue(
a, HitList[iHitList].CountHits(Threshold, Peaks->
GetMaxI(Which)));
1999 if (HitList[iHitList].GetM() != 0.0) {
2001 _TRACE(
"Perf=" << Perf <<
" pval=" << pval <<
" N=" <<
N );
2007 double eval = 3e3 * pval *
N;
2009 ScoreList.
insert(pair<const double, CMSHit *>
2010 (eval, &(HitList[iHitList])));
const char *const UniqueAA
void transform(Container &c, UnaryFunction *op)
const char *const GetMap(void) const
return the map for translating AA char to AA number
CBZip2StreamCompressor – bzip2 based compression stream processor.
CBZip2StreamDecompressor – bzip2 based decompression stream processor.
static CRef< CCleave > CleaveFactory(const EMSEnzymes enzyme)
Simple factory to return back object for enzyme.
const char *& SetStop(void)
Set the enzyme stop value.
bool GetNonSpecific(void) const
Is this a non-specific search?
bool CalcAndCut(const char *SeqStart, const char *SeqEnd, const char **PepStart, int *Masses, int &NumMod, int MaxNumMod, int *EndMasses, CMSMod &VariableMods, CMSMod &FixedMods, CMod ModList[], const int *IntCalcMass, const int *PrecursorIntCalcMass, CRef< CMSModSpecSet > &Modset, int Maxproductions)
cleaves the sequence.
bool & SetNMethionine(void)
Set n-term methionine cleavage.
const char * GetStop(void) const
Get the enzyme stop value.
bool CheckCleaveChar(const char *iPepStart) const
is the character given one of the cleavage chars?
int GetCleaveNum(void) const
Get the number of cleavage chars.
void Next(TLadderMap::iterator &Iter, TMSCharge BeginCharge=0, TMSCharge EndCharge=0, TMSIonSeries SeriesType=eMSIonTypeUnknown)
iterate over the ladder map over the charge range and series type indicated
TSeriesChargePairList & SetSeriesChargePairList(void)
return the list of charge, series type pairs that are used to initialize the maps
void Begin(TLadderMap::iterator &Iter, TMSCharge BeginCharge=0, TMSCharge EndCharge=0, TMSIonSeries SeriesType=eMSIonTypeUnknown)
void CreateLadderArrays(int MaxModPerPep, int MaxLadderSize)
populate the Ladder Map with arrays based on the ladder
const TSeriesChargePairList & GetSeriesChargePairList(void) const
return the list of charge, series type pairs that are used to initialize the maps
const TMSNumber GetNumber(void) const
Get the ion series number.
const TMSIonSeries GetIonSeries(void) const
Get the ion type.
const TMSCharge GetCharge(void) const
Get the ion charge.
const TMSMZ GetMZ(void) const
get the m/z value of the peak
class to contain preliminary hits.
const int GetSeqIndex(void) const
get blast oid
int & SetStart()
set sequence start
int & SetSeqIndex(void)
set blast oid
const int GetStart(void) const
get sequence start
int CountHits(double Threshold, int MaxI)
return number of hits above threshold
const CMSModInfo & GetModInfo(int n) const
get modification info
const int GetStop(void) const
get sequence stop
int & SetStop(void)
set sequence stop
const int GetNumModInfo(void) const
get size of modification info array
@MSHits.hpp User-defined methods of the data storage class.
static const TMSIonSeries Key2Series(int Key)
convert a key into a series type
static const TMSCharge Key2Charge(int Key)
convert a key into a charge
CMSMatchedPeakSet * SetSeries(TMSCharge Charge, TMSIonSeries Series)
get a series for modification
container for a set of matches
TMatchedPeakSet & SetMatchedPeakSet(void)
Set the match info.
const int GetModEnum(void) const
const int GetIsFixed(void) const
const int GetSite(void) const
@MSModSpecSet.hpp User-defined methods of the data storage class.
void Append(const CMSModSpecSet &ModsIn)
concatenates in another CMSModSpecSet
EMSModType GetModType(int Mod) const
get modification type
bool Init(const CMSSearchSettings::TVariable &Mods, CRef< CMSModSpecSet > Modset)
initialize variable mod type array
int SortPeaks(int Peptol, int Zdep, int Numisotopes, bool Pepppm, int ChargeSign)
put the pointers into an array sorted by mass
CIntervalTree & SetIntervalTree(void)
void AddPeak(CMSPeak *PeakIn)
TPeakSet & GetPeaks(void)
class to hold spectral data for filtering and statistical characterization
int CompareSortedRank(CLadder &Ladder, EMSPeakListTypes Which, vector< bool > &usedPeaks)
Compare the ladder and peaks and return back rank statistics.
const EMSHitError GetError(void) const
return any errors in computing on peaks
const bool CompareTop(CLadder &Ladder)
compares only the top hits
const int GetMaxI(const EMSPeakListTypes Which) const
Get Maximum intensity.
TMSHitList & GetHitList(const int Index)
Get a hit list.
const int GetPeptidesExamined(const int ChargeIn) const
return number of peptides examine for each charge state
const int GetNumber(void) const
get the spectrum number
void ReadAndProcess(const CMSSpectrum &Spectrum, const CMSSearchSettings &Settings)
Read and process a spectrum set into a CMSPeak.
void HighLow(int &High, int &Low, int &NumPeaks, const int PrecursorMass, const int Charge, const double Threshold, int &NumLo, int &NumHi)
return the lowest culled peak and the highest culled peak less than the precursor mass passed in
TPeakLists & SetPeakLists(void)
set the peak lists
const bool AddHit(CMSHit &in, CMSHit *&out)
add hit to hitlist.
const int GetTol(void) const
get the product mass tolerance in Daltons.
int & SetPeptidesExamined(const int ChargeIn)
set the number of peptides examine for each charge state
const CMSSpectrum::TIds & GetName(void) const
get the names of the spectrum
const int CountMZRange(const int StartIn, const int StopIn, const double MinIntensity, const int Which) const
return the number of peaks in a range
const int GetNumCharges(void) const
return number of allowed computed charges
const int GetHitListIndex(const int Index) const
Get size of hit list.
const EMSPeakListTypes GetWhich(const int Charge) const
returns the cull array index
const int GetConsiderMult(void) const
gets min precursor charge to consider multiply charged product ions
void Write(std::ostream &FileOut, const EMSSpectrumFileType FileType, const EMSPeakListTypes Which) const
Write out a CMSPeak in dta format (useful for debugging)
CRef< CMSHitSet > FindHitSet(const int Number) const
Find hitset with given number.
int Validate(std::list< std::string > &Error) const
Validate Search Settings returns 0 if OK, 1 if not Error contains explanations.
const double CalcPoissonMean(double ProbTerminal=0.0L, int NumTerminalMasses=2, double ProbDependent=0.0L, int NumUniqueMasses=19, double ToleranceAdjust=1.0L) const
calculate the mean value of the poisson distribution for this match
const TMSMZ GetExpMass(void) const
Get the experimental m/z of the spectrum.
void FillMatchedPeaks(TMSCharge ChargeIn, TMSIonSeries Series, unsigned Size, TMSIntensity MinIntensity, bool Skipb1, EMSTerminalBias TerminalIon, int Maxproductions, string &Sequence, bool NoProline)
copies hit array into match array fills in missing peaks does not fill in exp peak values.
const double CalcPvalueTopHit(double Mean, int HitsIn, double Normal, double TopHitProb) const
calculate the p-value using poisson distribution and the top hit prob
CMSMatchedPeakSetMap & SetIonSeriesMatchMap(void)
Set map from ion series to CMSMatchedPeakSet *.
const CMSBasicMatchedPeak & GetHitInfo(int n) const
Get the hit info at array position n.
const int GetHits(void) const
return the size of the HitInfo array
const TMSMZ GetMaxDelta(void) const
calc max abs difference between experimental and theoretical mass values
const TMSMZ GetTheoreticalMass(void) const
return theoretical mass of the hit
TMSCharge & SetCharge()
set the charge
const double CalcNormalTopHit(double Mean, double TopHitProb) const
integrate CalcPoissonTopHit over all i
TMSMZ & SetExpMass(void)
Set the experimental mass of the spectrum.
int & SetHits(void)
set the size of the HitInfo array
const double CalcRankProb(void) const
calculate the rank score
const TMSCharge GetCharge(void) const
get the charge
const double CalcPvalue(double Mean, int HitsIn) const
calculate the p-value using poisson distribution
const int *const GetIntMass(void) const
void Init(const CMSSearchSettings::TProductsearchtype &SearchType)
initialize mass arrays with fixed mods
contains information for a post translational modification at a particular sequence site
TSite GetSite(void) const
Get the site position.
TFixed & SetFixed(void)
set mod state (1 = fixed)
TEnum GetEnum(void) const
Get mod type.
void Reset(void)
reset to default values
TFixed GetFixed(void) const
Is the mod fixed?
void ConvertFromOMSSA(CMSSearch &inOMSSA, CRef< CMSModSpecSet > Modset, string basename, string newname)
convert OMSSA to PepXML
static int SaveAnyFile(CMSSearch &MySearch, CMSSearchSettings::TOutfiles OutFiles, CRef< CMSModSpecSet > Modset)
Write out a complete search.
static void ConditionXMLStream(CObjectOStreamXml *xml_out)
correctly set up xml stream
static void SaveOneFile(CMSSearch &MySearch, const string Filename, ESerialDataFormat FileFormat, bool IncludeRequest, bool bz2)
static int ReadCompleteSearch(const string &Filename, const ESerialDataFormat DataFormat, bool bz2, CMSSearch &MySearch)
Read in a complete search (typically for an iterative search)
static void ReadTaxFile(string &Filename, TTaxNameMap &TaxNameMap)
static int ReadSearchRequest(const string &Filename, const ESerialDataFormat DataFormat, CMSSearch &MySearch)
Read in an MSRequest.
static void ValidateSearchSettings(CRef< CMSSearchSettings > &Settings)
Validates Search Settings.
static void CreateSearchSettings(string FileName, CRef< CMSSearchSettings > &Settings)
create search setting object from file or brand new
static int ReadFile(const string &Filename, const EMSSpectrumFileType FileType, CMSSearch &MySearch)
Read in a spectrum file.
static int LoadAnyFile(CMSSearch &MySearch, CConstRef< CMSInFile > InFile, bool *SearchEngineIterative=0)
Read in any input file.
static int ReadModFiles(const string &ModFileName, const string &UserModFileName, const string &Path, CRef< CMSModSpecSet > Modset)
read in modification files.
unsigned MakeBoolMask(int *ModIndex, int iMod)
CMSResponse::TOidSet & SetOidSet(void)
get the oidset
void SetResult(CRef< CMSPeakSet > PeakSet)
void DeleteVariableOverlap(int &NumMod, CMod ModList[])
delete variable mods that overlap with fixed mods
void UpdateWithNewPep(int Missed, const char *PepStart[], const char *PepEnd[], int NumMod[], CMod ModList[][32], int Masses[], int EndMasses[], int NumModSites[], CRef< CMSModSpecSet > &Modset)
update sites and masses for new peptide
const bool GetIterative(void) const
Gets iterate search.
Int1 & SetLadderCalc(int i)
Set the bit that indicates whether a ladder was calculated.
AutoPtr< Int1, ArrayDeleter< Int1 > > LadderCalc
bool array that indicates if the ladders been calculated
void AddModsToHit(CMSHits *Hit, CMSHit *MSHit)
Adds modification information to hitset.
CLadderContainer & SetLadderContainer(void)
set the ladder container
bool CalcModIndex(int *ModIndex, int &iMod, int &NumMod, int NumFixed, int NumModSites, CMod CModList[])
CRef< CMSResponse > & SetResponse(void)
Set search response.
void Spectrum2Peak(CRef< CMSPeakSet > PeakSet)
CRef< CSeqDB > rdfp
blast library
bool UseRankScore
boolean to turn on rank scoring
CConstRef< CMSRequest > GetRequest(void) const
Get search request.
virtual void OnExit(void)
Override this to execute finalization code.
void WriteBioseqs(void)
write oidset to result
Int1 GetLadderCalc(int i) const
Get the bit that indicates whether a ladder was calculated.
int InitBlast(const char *blastdb, bool use_mmap=false)
init blast databases.
void CreateModCombinations(int Missed, const char *PepStart[], int Masses[], int EndMasses[], int NumMod[], int NumMassAndMask[], int NumModSites[], CMod ModList[][32])
void MatchAndSort(CMSPeak *Peaks, CMSHit &Hit, EMSPeakListTypes Which, int minintensity, const TSeriesChargePairList::const_iterator &iPairList, list< CMSMatchedPeakSet * > &SingleForward, list< CMSMatchedPeakSet * > &SingleBackward)
Creates match ion match lists.
int CompareLadders(int iMod, CMSPeak *Peaks, bool OrLadders, const TMassPeak *MassPeak)
compare ladders to experiment
void CopySettings(CRef< CSearch > fromObj)
TMassMask & SetMassAndMask(int i, int j)
Set the mask and mass of mod bit array.
void Search(CRef< CMSRequest > MyRequestIn, CRef< CMSResponse > MyResponseIn, CRef< CMSModSpecSet > Modset, CRef< CMSSearchSettings > SettingsIn, TOMSSACallback Callback=0, void *CallbackData=0)
Performs the ms/ms search.
void ClearLadderCalc(int Max)
Clear the ladder calc array up to max index.
CMassArray PrecursorMassArray
CConstRef< CCleave > GetEnzyme(void) const
Get search enzyme.
bool & SetRestrictedSearch(void)
is this search restricted to the oid set?
CRef< CMSRequest > & SetRequest(void)
Set search request.
void InitModIndex(int *ModIndex, int &iMod, int NumMod, int NumModSites, CMod ModList[])
CConstRef< CMSSearchSettings > GetSettings(void) const
Get search settings.
CConstRef< CMSResponse > GetResponse(void) const
Get search response.
void AddIonsToHit(CMSHits *Hit, CMSHit *MSHit)
Adds ion information to hitset.
void DoubleCompare(list< CMSMatchedPeakSet * > &SingleForward, list< CMSMatchedPeakSet * > &SingleBackward, list< CMSMatchedPeakSet * > &Double, bool DoubleForward)
CMSMatchedPeakSet * PepCharge(CMSHit &Hit, int SeriesCharge, int Ion, int MinIntensity, int Which, CMSPeak *Peaks, int Maxproductions)
fill out MatchedPeakSet
static void MakeModString(string &seqstring, string &modseqstring, CMSHit *MSHit)
Makes a string hashed out of the sequence plus mods.
const bool GetPoissonOnly(void) const
Gets the scoring to use rank statistics only with Poisson.
const CMSResponse::TOidSet & GetOidSet(void) const
get the oidset
const bool GetRestrictedSearch(void) const
is this search restricted to the oid set?
static int iSearchGlobal
Tracks the iSearch number for all search threads.
CRef< CCleave > & SetEnzyme(void)
Set search enzyme.
void CountModSites(int &NumModSites, int NumMod, CMod ModList[])
count the number of unique sites modified
bool CompareLaddersTop(int iMod, CMSPeak *Peaks, const TMassPeak *MassPeak)
void InitLadders(std::list< EMSIonSeries > &Ions)
initialize mass ladders
int CreateLadders(const char *Sequence, int iSearch, int position, int endposition, int *Masses, int iMissed, CAA &AA, int iMod, CMod ModList[], int NumMod)
create the ladders from sequence
void SetIons(list< EMSIonSeries > &Ions)
set up the ions to use
void SetupSearch(CRef< CMSRequest > MyRequestIn, CRef< CMSResponse > MyResponseIn, CRef< CMSModSpecSet > Modset, CRef< CMSSearchSettings > SettingsIn, TOMSSACallback Callback=0, void *CallbackData=0)
Setup the ms/ms search.
void CreateSequence(int Start, int Stop, string &seqstring, CSeqDBSequence &Sequence)
Generate a peptide sequence.
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
CRef< CMSSearchSettings > & SetSettings(void)
Set search settings.
CRef< CMSModSpecSet > initModset
void CalcNSort(TScoreList &ScoreList, double Threshold, CMSPeak *Peaks)
calculate the evalues of the top hits and sort
void MakeOidSet(void)
makes map of oid from previous search used in iterative searching
static void ResetGlobals(void)
Reset global parameters used in threaded search.
TOMSSACallback initCallback
const int Getnumseq(void) const
CRef< CMSRequest > initRequestIn
These are so CSearch::Main() can call CSearch::Search() in a threaded run, this requires CSearch::Set...
CRef< CMSResponse > initResponseIn
int MaxModPerPep
maximum number of mod combinations per peptide
bool Iterative
boolean to turn on iterative search
AutoPtr< TMassMask, ArrayDeleter< TMassMask > > MassAndMask
contains bit mask of modifications and resulting mass
const bool ReSearch(const int Number) const
examines a hitset to see if any good hits
static int MaxMZ
maximum m/z value of all spectra precursors used to bound non-specific cleavage searches
static CRef< CMSPeakSet > SharedPeakSet
CRef< CMSSearchSettings > initSettingsIn
int ThreadNum
The threadid number.
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
CRef< CBioseq > GetBioseq(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence.
void GetTaxIDs(int oid, map< TGi, TTaxId > &gi_to_taxid, bool persist=false) const
Get taxid for an OID.
bool CheckOrFindOID(int &next_oid) const
Find an included OID, incrementing next_oid if necessary.
CRef< CBlast_def_line_set > GetHdr(int oid) const
Get the ASN.1 header for the sequence.
container_type::iterator iterator
const_iterator end() const
const_iterator find(const key_type &key) const
const_iterator end() const
iterator insert(const value_type &val)
const_iterator begin() const
container_type::iterator iterator
iterator_bool insert(const value_type &val)
string GetSeqIdString(const CSeq_id &id)
string Path(const string &dir, const string &file)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Critical(CExceptionArgs_Base &args)
void Error(CExceptionArgs_Base &args)
void Warning(CExceptionArgs_Base &args)
void Fatal(CExceptionArgs_Base &args)
void Info(CExceptionArgs_Base &args)
#define finite
Define value of finite (Is Finite).
string GetDir(EIfEmptyPath mode=eIfEmptyPath_Current) const
Get the directory component for this directory entry.
static bool IsAbsolutePath(const string &path)
Check if a "path" is absolute for the current OS.
ESerialDataFormat
Data file format.
@ eSerial_AsnText
ASN.1 text.
@ eSerial_AsnBinary
ASN.1 binary.
const_iterator AllIntervals(void) const
const_iterator IntervalsContaining(coordinate_type point) const
reference GetValue(void) const
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
pair< TObjectPtr, TTypeInfo > ObjectInfo(C &obj)
void SetReferenceSchema(bool use_schema=true)
Make generated XML document reference XML schema.
void SetWriteNamedIntegersByValue(bool set)
Set up writing named integers (in ANS.1 sense) by value only.
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
int8_t Int1
1-byte (8-bit) signed integer
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
const Tdata & Get(void) const
Get the member data.
const THits & GetHits(void) const
Get the Hits member data.
void SetMass(TMass value)
Assign a value to Mass data member.
TMods & SetMods(void)
Assign a value to Mods data member.
void SetStop(TStop value)
Assign a value to Stop data member.
void SetPvalue(TPvalue value)
Assign a value to Pvalue data member.
void SetProtlength(TProtlength value)
Assign a value to Protlength data member.
void SetOid(TOid value)
Assign a value to Oid data member.
TMinnoenzyme GetMinnoenzyme(void) const
Get the Minnoenzyme member data.
void SetGi(TGi value)
Assign a value to Gi data member.
void SetAccession(const TAccession &value)
Assign a value to Accession data member.
void SetSinglenum(TSinglenum value)
Assign a value to Singlenum data member.
void SetCharge(TCharge value)
Assign a value to Charge data member.
void SetTheomass(TTheomass value)
Assign a value to Theomass data member.
void SetEvalue(TEvalue value)
Assign a value to Evalue data member.
EMSSerialDataFormat
Access to EMSSerialDataFormat's attributes (values, names) as defined in spec.
void SetDefline(const TDefline &value)
Assign a value to Defline data member.
void SetPepstring(const TPepstring &value)
Assign a value to Pepstring data member.
void SetPepstop(const TPepstop &value)
Assign a value to Pepstop data member.
void SetStart(TStart value)
Assign a value to Start data member.
void SetBioseqs(TBioseqs &value)
Assign a value to Bioseqs data member.
list< CRef< CMSOutFile > > TOutfiles
TResponse & SetResponse(void)
Assign a value to Response data member.
TMzhits & SetMzhits(void)
Assign a value to Mzhits data member.
TNmethionine GetNmethionine(void) const
Get the Nmethionine member data.
void SetDoublenum(TDoublenum value)
Assign a value to Doublenum data member.
TRequest & SetRequest(void)
Assign a value to Request data member.
EMSEnzymes
enumerate enzymes
void SetDbversion(TDbversion value)
Assign a value to Dbversion data member.
EMSSpectrumFileType
Access to EMSSpectrumFileType's attributes (values, names) as defined in spec.
void SetPepstart(const TPepstart &value)
Assign a value to Pepstart data member.
void SetScale(TScale value)
Assign a value to Scale data member.
THitsets & SetHitsets(void)
Assign a value to Hitsets data member.
TPephits & SetPephits(void)
Assign a value to Pephits data member.
void SetScale(TScale value)
Assign a value to Scale data member.
@ eMSSerialDataFormat_csv
csv (excel)
@ eMSSerialDataFormat_none
@ eMSSerialDataFormat_xml
open XML format
@ eMSSerialDataFormat_asnbinary
open ASN.1 binary format
@ eMSSerialDataFormat_pepxml
pepXML format
@ eMSSerialDataFormat_asntext
open ASN.1 text format
@ eMSSerialDataFormat_xmlbz2
bzip2 XML format
@ eMSModType_modn
at the N terminus of a protein
@ eMSModType_modnpaa
at the N terminus of a peptide at particular amino acids
@ eMSModType_modcpaa
at the C terminus of a peptide at particular amino acids
@ eMSModType_modnp
at the N terminus of a peptide
@ eMSModType_modcp
at the C terminus of a peptide
@ eMSModType_modnaa
at the N terminus of a protein at particular amino acids
@ eMSHitError_notenuffpeaks
not enough peaks to search
@ eMSSpectrumFileType_pks
@ eMSSpectrumFileType_mgf
@ eMSSpectrumFileType_sciex
@ eMSSpectrumFileType_omxbz2
bzip2 omx file
@ eMSSpectrumFileType_dtablank
@ eMSSpectrumFileType_pkl
@ eMSSpectrumFileType_omx
xml for iterative search
@ eMSSpectrumFileType_dtaxml
@ eMSSpectrumFileType_dta
@ eMSSpectrumFileType_unknown
@ eMSSpectrumFileType_asc
@ eMSSpectrumFileType_xml
xml MSRequest
@ eMSSpectrumFileType_oms
asn.1 binary for iterative search
const double kNeutron
neutron mass
EMSPeakListTypes
enum that describes type of peak list
EMSTerminalBias
is the peptide statistically biased in any way on either end?
const int kIonDirection[]
ion direction.
EMSIonSeries
enumeration of ion series
constexpr auto sort(_Init &&init)
double value_type
The numeric datatype used by the parser.
const struct ncbi::grid::netcache::search::fields::SIZE size
Prototypes for portable math library (ported from C Toolkit)
std::istream & in(std::istream &in_, double &x_)
int PositiveSign(int input)
DEFINE_STATIC_FAST_MUTEX(iSearchMutex)
void(* TOMSSACallback)(int TotalSeq, int Completed, void *Anything)
progress callback for CSearch
bool operator()(const TMassMask &x, const TMassMask &y)