NCBI C++ ToolKit
idmapper_config.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: idmapper_config.cpp 99721 2023-05-02 19:22:01Z foleyjp $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbireg.hpp>
34 
35 // Objects includes
39 
44 
47 
49  const std::string& strContext,
50  bool bInvert,
51  ILineErrorListener* pErrors)
52  : CIdMapper(strContext, bInvert, pErrors)
53 {
54  Initialize(istr);
55 }
56 
57 
59  bool bInvert,
60  ILineErrorListener* pErrors)
61  : CIdMapper(strContext, bInvert, pErrors)
62 {
63 }
64 
65 
67 {
68  string buffer;
69  {{
70  CNcbiOstrstream os;
71  NcbiStreamCopy(os, istr);
73  }}
74 
75  CMemoryRegistry reg;
76  try {
78  reg.Read(is);
79  }
80  catch (CException& e) {
81  ERR_POST(Warning << "CIdMapperConfig: "
82  "error reading config file in registry format: " << e <<
83  "; trying to read in old format...");
84 
85  //
86  // older config file support
87  // consider dropping this
88  //
90 
91  string strLine( "" );
92  string strCurrentContext( m_strContext );
93 
94  while( !is.eof() ) {
95  NcbiGetlineEOL( is, strLine );
96  NStr::TruncateSpacesInPlace( strLine );
97  if ( strLine.empty() || NStr::StartsWith( strLine, "#" ) ) {
98  //comment
99  continue;
100  }
101  if ( NStr::StartsWith( strLine, "[" ) ) {
102  //start of new build section
103  SetCurrentContext( strLine, strCurrentContext );
104  continue;
105  }
106  if ( m_strContext == strCurrentContext ) {
107  AddMapEntry( strLine );
108  }
109  }
110 
111  /// done here
112  return;
113  }
114 
115  ///
116  /// enumerate the fields required for the mapping
117  ///
118  list<string> entries;
120  NON_CONST_ITERATE (list<string>, iter, entries) {
121  if (*iter == "map_from" ||
122  *iter == "map_to") {
123  /// reserved keys
124  continue;
125  }
126  string id_set = reg.Get(m_strContext, *iter);
127  list<string> ids;
128  NStr::Split(id_set, " \t\n\r", ids,
130 
131  ///
132  /// id_from and id_to are naturally reversed, since we use a format
133  /// that contains 'gi| -> aliases' mapping
134  ///
135 
136  CSeq_id id_to;
137  try {
138  id_to.Set(*iter);
139  }
140  catch (CException&) {
141  id_to.SetLocal().SetStr(*iter);
142  }
144 
145  ITERATE (list<string>, id_iter, ids) {
146  CSeq_id id_from;
147  try {
148  id_from.Set(*id_iter);
149  }
150  catch (CException&) {
151  id_from.SetLocal().SetStr(*id_iter);
152  }
153  CSeq_id_Handle idh_from = CSeq_id_Handle::GetHandle(id_from);
154 
155  AddMapping(idh_from, idh_to);
156  if (m_bInvert) {
157  /// inversion honors *ONLY* the first token to preserve 1:1
158  /// mapping
159  break;
160  }
161  }
162  }
163 };
164 
165 
167  list<SMappingContext>& contexts)
168 {
169  CMemoryRegistry reg;
170  reg.Read(istr);
171 
172  list<string> sections;
173  reg.EnumerateSections(&sections);
174  ITERATE (list<string>, iter, sections) {
176  ctx.context = *iter;
177  ctx.map_from = reg.Get(*iter, "map_from");
178  ctx.map_to = reg.Get(*iter, "map_to");
179  contexts.push_back(ctx);
180  }
181 }
182 
183 // ============================================================================
184 void
186  const string& strLine,
187  string& strContext )
188 // ============================================================================
189 {
190  vector<string> columns;
191  NStr::Split( strLine, " \t[]|:", columns, NStr::fSplit_MergeDelimiters);
192 
193  //sanity check: only a single columns remaining
194  if ( columns.size() != 1 ) {
195  return;
196  }
197 
198  strContext = columns[0];
199 };
200 
201 // ============================================================================
202 void
204  const string& strLine )
205 // ============================================================================
206 {
207  vector<string> columns;
209 
210  //sanity check: two or three columns. If three columns, the last better be
211  //integer
212  if ( columns.size() != 2 && columns.size() != 3 ) {
213  return;
214  }
215  if ( columns.size() == 3 ) {
216  string strLength = columns[2];
217  try {
218  NStr::StringToLong( strLength );
219  }
220  catch( CException& ) {
221  return;
222  }
223  }
224 
225  CSeq_id_Handle hSource = SourceHandle( columns[0] );
226  CSeq_id_Handle hTarget = TargetHandle( columns[1] );
227  if ( hSource && hTarget ) {
228  AddMapping( hSource, hTarget );
229  }
230 };
231 
232 // ============================================================================
235  const string& strId )
236 // ============================================================================
237 {
238  CSeq_id source( CSeq_id::e_Local, strId );
240 };
241 
242 // ============================================================================
245  const string& strId )
246 // ============================================================================
247 {
248  //maybe it's a straight GI number ...
249  try {
250  CSeq_id target( CSeq_id::e_Gi, NStr::StringToNumeric<TGi>( strId ) );
251  return CSeq_id_Handle::GetHandle( target );
252  }
253  catch( CException& ) {
254  //or, maybe not ...
255  }
256 
257  //if not, assume a fasta string of one or more IDs. If more than one, pick
258  // the first
259  list< CRef< CSeq_id > > ids;
260  CSeq_id::ParseFastaIds( ids, strId, true );
261  if ( ids.empty() ) {
262  //nothing to work with ...
263  return CSeq_id_Handle();
264  }
265 
266  list< CRef< CSeq_id > >::iterator idit;
267  CSeq_id_Handle hTo;
268 
269  for ( idit = ids.begin(); idit != ids.end(); ++idit ) {
270 
271  //we favor GI numbers over everything else. In the absence of a GI number
272  // go for a Genbank accession. If neither is available, we use the first
273  // id we find.
274  const CSeq_id& current = **idit;
275  switch ( current.Which() ) {
276  case CSeq_id::e_Gi:
277  return CSeq_id_Handle::GetHandle( current );
278  case CSeq_id::e_Genbank:
279  hTo = CSeq_id_Handle::GetHandle( current );
280  break;
281  default:
282  if ( !hTo ) {
283  hTo = CSeq_id_Handle::GetHandle( current );
284  }
285  break;
286  }
287  }
288 
289  //don't know what else to do...
290  return hTo;
291 };
292 
294 
void AddMapEntry(const std::string &)
CSeq_id_Handle TargetHandle(const std::string &)
static void DescribeContexts(CNcbiIstream &istr, list< SMappingContext > &contexts)
void Initialize(CNcbiIstream &istr)
void SetCurrentContext(const std::string &, std::string &)
CSeq_id_Handle SourceHandle(const std::string &)
CIdMapperConfig(CNcbiIstream &istr, const std::string &strContext="", bool bInvert=false, ILineErrorListener *pErrors=0)
Constructor specifying the content of the mapping table, mapping context, direction,...
IdMapper base class implementation.
Definition: idmapper.hpp:56
const std::string m_strContext
Definition: idmapper.hpp:108
virtual void AddMapping(const CSeq_id_Handle &from, const CSeq_id_Handle &to)
Add a mapping to the internal mapping table.
Definition: idmapper.cpp:65
const bool m_bInvert
Definition: idmapper.hpp:109
CMemoryRegistry –.
Definition: ncbireg.hpp:584
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CS_CONTEXT * ctx
Definition: t0006.c:12
static const column_t columns[]
Definition: utf8_2.c:22
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
string
Definition: cgiapp.hpp:687
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
static SIZE_TYPE ParseFastaIds(CBioseq::TId &ids, const CTempString &s, bool allow_partial_failure=false)
Parse an entire set of |-delimited FASTA-style IDs, appending the results to IDS.
Definition: Seq_id.cpp:2603
CSeq_id & Set(const CTempString &the_id, TParseFlags flags=fParse_AnyRaw)
Reassign based on flat specifications; arguments interpreted as with constructors.
Definition: Seq_id.cpp:2457
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
virtual void EnumerateSections(list< string > *sections, TFlags flags=fAllLayers) const
Enumerate section names.
Definition: ncbireg.cpp:497
virtual const string & Get(const string &section, const string &name, TFlags flags=0) const
Get the parameter value.
Definition: ncbireg.cpp:262
virtual void EnumerateEntries(const string &section, list< string > *entries, TFlags flags=fAllLayers) const
Enumerate parameter names for a specified section.
Definition: ncbireg.cpp:514
IRWRegistry * Read(CNcbiIstream &is, TFlags flags=0, const string &path=kEmptyStr)
Read and parse the stream "is", and merge its content with current Registry entries.
Definition: ncbireg.cpp:605
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
bool NcbiStreamCopy(CNcbiOstream &os, CNcbiIstream &is)
Copy the entire contents of stream "is" to stream "os".
Definition: ncbistre.cpp:211
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
static long StringToLong(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to long.
Definition: ncbistr.cpp:653
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
@ fSplit_Truncate
Definition: ncbistr.hpp:2501
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
Definition: ncbistr.hpp:2498
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
TLocal & SetLocal(void)
Select the variant.
Definition: Seq_id_.cpp:199
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_Local
local use
Definition: Seq_id_.hpp:95
USING_SCOPE(objects)
const CharType(& source)[N]
Definition: pointer.h:1149
Process information in the NCBI Registry, including working with configuration files.
static pcre_uint8 * buffer
Definition: pcretest.c:1051
static wxAcceleratorEntry entries[3]
Modified on Sun May 19 04:48:12 2024 by modify_doxy.py rev. 669887