NCBI C++ ToolKit
gff_object_loader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gff_object_loader.cpp 47479 2023-05-02 13:24:02Z ucko $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Mike DiCuccio, Roman Katargin
27 */
28 
29 
30 #include <ncbi_pch.hpp>
31 
32 #include <util/icanceled.hpp>
33 
34 #include <gui/objutils/label.hpp>
36 
40 
41 #include <util/format_guess.hpp>
42 
46 
47 #include <wx/filename.h>
48 
50 #include <serial/iterator.hpp>
51 
52 
55 
56 CGffObjectLoader::CGffObjectLoader(const CGffLoadParams& params, const vector<wxString>& filenames)
57  : m_Params(params), m_FileNames(filenames)
58 {
59 }
60 
62 {
63  return m_Objects;
64 }
65 
67 {
68  return "Loading GFF/GTF Files";
69 }
70 
72 {
73  return true;
74 }
75 
77 {
78  unsigned int flags = 0;
79 
80  switch(m_Params.GetParseSeqIds()) {
81  case 1:
83  break;
84  case 2:
86  break;
87  }
88 
91  }
92 
93  string annot_name = ToStdString(m_Params.GetNameFeatSet());
94  CRef<CScope> scope;
95  if (annot_name.empty()) {
96  // scope for CLabel::GetLabel
98  scope->AddDefaults();
99  }
100 
101  ITERATE(vector<wxString>, it, m_FileNames) {
102  if (canceled.IsCanceled())
103  return false;
104 
105  CRef<CErrorContainer> errCont;
106  const wxString& fn = *it;
107 
108  try {
109  CCompressedFile file(fn);
110 
112  switch (m_Params.GetFileFormat()) {
113  case 1 :
114  fmt = CFormatGuess::eGff3;
115  break;
116  case 2 :
117  fmt = CFormatGuess::eGtf;
118  break;
119  case 3 :
120  fmt = CFormatGuess::eGvf;
121  break;
122  default :
123  fmt = file.GuessFormat();
124  file.Reset();
125  break;
126  }
127 
128  CReaderBase::TAnnots annots;
129 
130  string fileName = ToStdString(wxFileName(fn).GetFullName());
131  AutoPtr<CReaderBase> reader;
132  if (fmt == CFormatGuess::eGff2) {
133  NCBI_THROW(CException, eInvalid, "GFF reader: Unsupported format.");
134  } else if (fmt == CFormatGuess::eGff3) {
135  reader.reset(new CGff3Reader(flags));
136  } else if (fmt == CFormatGuess::eGtf) {
137  reader.reset(new CGtfReader(flags));
138  } else if (fmt == CFormatGuess::eGvf) {
139  reader.reset(new CGvfReader(flags));
140  }
141 
142  if (!reader) {
143  x_UpdateHTMLResults(fn, errCont, NcbiEmptyString, "This file is not recognized as a supported GFF format.");
144  continue;
145  }
146  reader->SetCanceler(&canceled);
147  errCont.Reset(new CErrorContainer(100));
148  reader->ReadSeqAnnots(annots, file.GetIstream(), errCont.GetPointer());
149  x_UpdateHTMLResults(fn, errCont);
150  errCont.Reset();
151 
152  NON_CONST_ITERATE(CReaderBase::TAnnots, annot_iter, annots) {
153  if (canceled.IsCanceled())
154  return false;
155 
156  string label;
157  if (annot_name.empty())
158  CLabel::GetLabel(**annot_iter, &label, CLabel::eDefault, &*scope);
159  else
160  label = annot_name;
161 
162  if (label.empty())
163  label = fileName;
164 
165  (*annot_iter)->SetNameDesc(label);
166  m_Objects.emplace_back(**annot_iter, label, "File: " + fileName);
167 
168  x_UpdateMap(**annot_iter);
169  }
170  auto gff2 = dynamic_cast<CGff2Reader*>(reader.get());
171  if (gff2 && gff2->AtSequenceData()) {
172  if (!scope) {
173  // scope for CLabel::GetLabel
175  scope->AddDefaults();
176  }
177 
178  CSeqFastaReader fasta_reader(*scope, file.GetIstream(), flags | CFastaReader::fQuickIDCheck); //fNoParseID
179  auto seq = fasta_reader.ReadSequences();
180  if (seq) {
181  string label;
182  seq->GetLabel(&label, CSeq_entry::eContent);
183  m_Objects.emplace_back(*seq, label, "");
184  }
185  }
186  }
187  catch (const CException& e) {
188  x_UpdateHTMLResults(fn, errCont, e.GetMsg());
189  }
190  catch (const exception& e) {
191  x_UpdateHTMLResults(fn, errCont, e.what());
192  }
193  }
194 
195  return true;
196 }
197 
199 {
200  x_ShowErrorsDlg(wxT("GFF/GTF import errors"));
201  return x_ShowMappingDlg(m_Objects);
202 }
203 
AutoPtr –.
Definition: ncbimisc.hpp:401
CErrorContainer.
EFormat
The formats are checked in the same order as declared here.
@ eGff2
GFF2, CGff2Reader, any GFF-like that doesn't fit the others.
@ eGtf
New GTF, CGtfReader.
@ eGvf
GVF, CGvfReader.
@ eGff3
GFF3, CGff3Reader.
string GetAssemblyAcc() const
void x_CreateMapper(const string &assembly)
void x_UpdateMap(objects::CSeq_annot &annot)
bool x_ShowMappingDlg(IObjectLoader::TObjects &objects)
@ fNumericIdsAsLocal
numeric identifiers are local IDs
Definition: reader_base.hpp:76
@ fAllIdsAsLocal
all identifiers are local IDs
Definition: reader_base.hpp:78
TAnnotList TAnnots
Definition: reader_base.hpp:91
void x_ShowErrorsDlg(const wxString &title)
void x_UpdateHTMLResults(const wxString &object, objects::ILineErrorListener *errCont, const string &exception="", const string &error_msg="", const wxString &objectName=wxT("File:"))
CScope –.
Definition: scope.hpp:92
FASTA sequences reader class that does post-processing: * Overrides the local ids; * Removes the well...
CRef< objects::CSeq_entry > ReadSequences(vector< CConstRef< objects::CSeq_id >> *wellknown_ids=nullptr, int max_seqs=kMax_Int, objects::ILineErrorListener *pMessageListener=0)
Read multiple sequences and remove the well-known sequences from the entry.
@ eContent
Definition: Seq_entry.hpp:93
Interface for testing cancellation request in a long lasting operation.
Definition: icanceled.hpp:51
vector< SObject > TObjects
static uch flags
USING_SCOPE(objects)
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
Definition: ncbimisc.hpp:480
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:469
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
Definition: ncbiexpt.cpp:342
virtual TObjects & GetObjects()
wxString GetNameFeatSet() const
vector< wxString > m_FileNames
CGffObjectLoader(const CGffLoadParams &params, const vector< wxString > &filenames)
virtual string GetDescription() const
virtual bool Execute(ICanceled &canceled)
virtual bool PostExecute()
CMapAssemblyParams GetMapAssembly() const
int GetParseSeqIds() const
CGffLoadParams m_Params
int GetFileFormat() const
virtual bool PreExecute()
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
@ eDefault
Definition: label.hpp:73
@ fQuickIDCheck
Just check local IDs' first characters.
Definition: fasta.hpp:110
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
virtual bool IsCanceled(void) const =0
#define NcbiEmptyString
Definition: ncbistr.hpp:122
static const char label[]
FILE * file
#define wxT(x)
Definition: muParser.cpp:41
The Object manager core.
static int filenames
Definition: pcregrep.c:172
string ToStdString(const wxString &s)
Definition: wx_utils.hpp:161
Modified on Sun Apr 14 05:29:07 2024 by modify_doxy.py rev. 669887