NCBI C++ ToolKit
omssa2pepXML.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: omssa2pepXML.cpp 92504 2021-01-26 19:17:00Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Douglas Slotta
27  *
28  * File Description:
29  * Command line utility to convert OMSSA output to the PepXML format
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbienv.hpp>
36 #include <corelib/ncbiargs.hpp>
37 #include <corelib/ncbifile.hpp>
38 #include <serial/serial.hpp>
39 #include <serial/objistr.hpp>
40 #include <serial/objostr.hpp>
42 #include <util/compress/bzip2.hpp>
43 
44 #include "omssa.hpp"
45 #include "pepxml.hpp"
46 
49 USING_SCOPE(omssa);
50 
51 // Helper function for debugging, might need this again in the future
52 // void PrintModInfo(CRef <CMSModSpecSet> Modset) {
53 // for (unsigned int modNum = 0; modNum < Modset->Get().size(); modNum++) {
54 // cout << MSSCALE2DBL(Modset->GetModMass(modNum)) << "\t";
55 // cout << MSSCALE2DBL(Modset->GetNeutralLoss(modNum)) << "\t";
56 // cout << Modset->GetModNumChars(modNum) << "\t";
57 // for (int i=0; i< Modset->GetModNumChars(modNum); i++) {
58 // cout << ConvertAA(Modset->GetModChar(modNum, i)) << " ";
59 // cout << MonoMass[static_cast <int> (Modset->GetModChar(modNum,i))] << " ";
60 // }
61 // cout << "\t" << Modset->GetModType(modNum) << "\t";
62 // cout << Modset->GetModName(modNum) << endl;
63 // }
64 // }
65 
66 /////////////////////////////////////////////////////////////////////////////
67 // COmssa2pepxmlApplication::
68 
69 
71 {
72 public:
74 private:
75  virtual void Init(void);
76  virtual int Run(void);
77  virtual void Exit(void);
78 };
79 
80 
82  SetVersion(CVersionInfo(2, 1, 4));
83 }
84 
85 /////////////////////////////////////////////////////////////////////////////
86 // Init test for all different types of arguments
88 {
89  // Create command-line argument descriptions class
90  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
91  arg_desc->PrintUsageIfNoArgs();
92 
93  // Specify USAGE context
94  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
95  "Convert OMSSA ASN.1 and XML output files to PepXML");
96 
97  // Describe the expected command-line arguments
98  arg_desc->AddFlag("xml", "Input file is XML");
99  arg_desc->AddFlag("bz2", "Input file is bzipped XML");
100  arg_desc->AddFlag("asn", "Input file is ASN.1");
101  arg_desc->AddFlag("asntext", "Input file is ASN.1 text");
102 
103  arg_desc->AddOptionalKey("o", "outfile",
104  "filename for pepXML formatted search results",
106 
107  arg_desc->AddPositional
108  ("filename",
109  "The name of the XML file to load",
111 
112  // Setup arg.descriptions for this application
113  SetupArgDescriptions(arg_desc.release());
114 }
115 
116 
117 
118 /////////////////////////////////////////////////////////////////////////////
119 // Run test (printout arguments obtained from command-line)
121 {
122  CMSSearch inOMSSA;
123  CPepXML outPepXML;
124  ESerialDataFormat format = eSerial_Xml; // assume xml
125 
126  // Get arguments
127  const CArgs& args = GetArgs();
128 
129  string filename = args["filename"].AsString();
130  CFile file(filename);
131 
132  string fullpath, path, base, ext;
133  if (CFile::IsAbsolutePath(file.GetPath())) {
134  fullpath = file.GetPath();
135  } else {
136  fullpath = CFile::CreateAbsolutePath(file.GetPath());
137  }
138  CFile::SplitPath(fullpath, &path, &base, &ext);
139  string basename = path + base;
140  string newname;
141  if (args["o"].HasValue()) {
142  newname = args["o"].AsString();
143  } else {
144  newname = basename + ".pep.xml";
145  }
146 
147  // figure out input file type
148  bool notSet = true;
149  if (args["xml"] || args["bz2"]) {
151  notSet = false;
152  } else if (args["asn"]) {
154  notSet = false;
155  } else if (args["asntext"]) {
157  notSet = false;
158  }
159 
160  if (notSet) { // Not explict, maybe extension gives us a clue?
161  if (ext == ".oms") {
163  } else if (ext == ".omx") {
165  } else if (ext == ".omt") {
167  }
168  }
169  cout << "Reading " << filename << " as ";
170  switch (format) {
171  case eSerial_AsnBinary:
172  cout << "ASN" << endl; break;
173  case eSerial_Xml:
174  cout << "XML" << endl; break;
175  case eSerial_AsnText:
176  cout << "ASN text" << endl; break;
177  default:
178  cout << "Unable to determine type of file" << endl;
179  return 0;
180  }
181 
182  CSearchHelper::ReadCompleteSearch(filename, format, args["bz2"], inOMSSA);
183 
184  if (!inOMSSA.CanGetRequest()) {
185  cout << "Sorry, this file cannot be converted." << endl;
186  cout << "The original search needs to have been executed with the '-w' flag set." << endl;
187  cout << "The search settings are not availiable in this file. Aborting" << endl;
188  return 0;
189  }
190 
192  CSearchHelper::ReadModFiles("mods.xml","usermods.xml",GetProgramExecutablePath(), Modset);
193  Modset->CreateArrays();
194  //PrintModInfo(Modset);
195 
196  outPepXML.ConvertFromOMSSA(inOMSSA, Modset, basename, newname);
197 
198  //CNcbiOfstream out(newname.c_str());
199  //out << MSerial_Xml << outPepXML;
200 
201  unique_ptr<CObjectOStream> oStream(CObjectOStream::Open(newname, eSerial_Xml));
202  CObjectOStreamXml *xml_out = dynamic_cast <CObjectOStreamXml *> (oStream.get());
203  xml_out->SetDefaultSchemaNamespace("http://regis-web.systemsbiology.net/pepXML");
204  xml_out->SetReferenceSchema();
205  *xml_out << outPepXML;
206 
207  return 0;
208 }
209 
210 
211 /////////////////////////////////////////////////////////////////////////////
212 // Cleanup
213 
214 
216 {
217  SetDiagStream ( 0 );
218 }
219 
220 
221 /////////////////////////////////////////////////////////////////////////////
222 // MAIN
223 
224 
225 int main ( int argc, const char* argv[] )
226 {
227  // Execute main application function
228  return COmssa2pepxmlApplication().AppMain ( argc, argv, 0, eDS_Default, 0 );
229 }
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
CFile –.
Definition: ncbifile.hpp:1604
@MSModSpecSet.hpp User-defined methods of the data storage class.
void CreateArrays(void)
creates arrays for the existing set
CObjectOStreamXml –.
Definition: objostrxml.hpp:54
virtual void Init(void)
Initialize the application.
virtual int Run(void)
Run the application.
virtual void Exit(void)
Cleanup on application exit.
void ConvertFromOMSSA(CMSSearch &inOMSSA, CRef< CMSModSpecSet > Modset, string basename, string newname)
convert OMSSA to PepXML
Definition: pepxml.cpp:427
static int ReadCompleteSearch(const string &Filename, const ESerialDataFormat DataFormat, bool bz2, CMSSearch &MySearch)
Read in a complete search (typically for an iterative search)
Definition: omssa.cpp:193
static int ReadModFiles(const string &ModFileName, const string &UserModFileName, const string &Path, CRef< CMSModSpecSet > Modset)
read in modification files.
Definition: omssa.cpp:61
CVersionInfo –.
#define basename(path)
Definition: replacements.h:116
const string & GetProgramExecutablePath(EFollowLinks follow_links=eIgnoreLinks) const
Get the application's executable path.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
void SetVersion(const CVersionInfo &version)
Set the version number for the program.
Definition: ncbiapp.cpp:1155
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")
Set diagnostic stream.
Definition: ncbidiag.cpp:8083
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
Definition: ncbidiag.hpp:1790
static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)
Get an absolute path from some, possibly relative, path.
Definition: ncbifile.cpp:665
static bool IsAbsolutePath(const string &path)
Check if a "path" is absolute for the current OS.
Definition: ncbifile.cpp:508
static void SplitPath(const string &path, string *dir=0, string *base=0, string *ext=0)
Split a path string into its basic components.
Definition: ncbifile.cpp:358
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
@ eSerial_Xml
XML.
Definition: serialdef.hpp:75
@ eSerial_AsnBinary
ASN.1 binary.
Definition: serialdef.hpp:74
void SetReferenceSchema(bool use_schema=true)
Make generated XML document reference XML schema.
Definition: objostrxml.cpp:128
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
Definition: objostr.cpp:126
void SetDefaultSchemaNamespace(const string &schema_ns)
Set default value of namespace name of generated DTD documents.
bool CanGetRequest(void) const
Check if it is safe to call GetRequest method.
Definition: MSSearch_.hpp:198
FILE * file
Magic spell ;-) needed for some weird compilers... very empiric.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
static Format format
Definition: njn_ioutil.cpp:53
USING_SCOPE(ncbi)
int main(int argc, const char *argv[])
Modified on Thu May 02 14:30:11 2024 by modify_doxy.py rev. 669887