NCBI C++ ToolKit
Prot_ref.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: Prot_ref.cpp 85279 2019-01-30 19:15:51Z bollin $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: .......
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using specifications from the ASN data definition file
34  * 'seqfeat.asn'.
35  */
36 
37 // standard includes
38 
39 // generated includes
40 #include <ncbi_pch.hpp>
42 
43 // generated classes
44 
45 #include <corelib/ncbimtx.hpp>
46 #include <util/line_reader.hpp>
47 #include <util/util_misc.hpp>
49 
50 #define NCBI_USE_ERRCODE_X Objects_ProtRef
51 
53 
54 BEGIN_objects_SCOPE // namespace ncbi::objects::
55 
56 // destructor
58 {
59 }
60 
61 // Appends a label to "label" based on content
62 void CProt_ref::GetLabel(string* label) const
63 {
64  if (IsSetName() && !GetName().empty()) {
65  *label += *GetName().begin();
66  } else if (IsSetDesc()) {
67  *label += GetDesc();
68  } else if (IsSetDb()) {
69  GetDb().front()->GetLabel(label);
70  }
71 }
72 
73 
76 
79 static bool s_ECNumberMapsInitialized = false;
84 
89 
90 DEFINE_STATIC_FAST_MUTEX(s_ECNumberMutex);
91 
92 #include "ecnum_ambiguous.inc"
93 #include "ecnum_deleted.inc"
94 #include "ecnum_replaced.inc"
95 #include "ecnum_specific.inc"
96 
97 static void s_ProcessECNumberLine(const CTempString& line,
99 {
100  if (status == CProt_ref::eEC_replaced) {
101  SIZE_TYPE tab_pos = line.find('\t');
102  if (tab_pos == NPOS) {
103  ERR_POST_X(1, Warning << "No tab in ecnum_replaced entry " << line
104  << "; disregarding");
105  } else {
106  string lhs(line.substr(0, tab_pos)), rhs(line.substr(tab_pos + 1));
107  s_ECNumberStatusMap[lhs] = status;
108  s_ECNumberReplacementMap[lhs] = rhs;
109  }
110  } else {
111  SIZE_TYPE tab_pos = line.find('\t');
112  if (tab_pos == NPOS) {
113  s_ECNumberStatusMap[line] = status;
114  } else {
115  string lhs(line.substr(0, tab_pos));
116  s_ECNumberStatusMap[lhs] = status;
117  }
118  }
119 }
120 
121 
122 static CProt_ref::EECNumberFileStatus s_LoadECNumberTable(const string& dir, const string& name,
123  const char* const *fallback,
124  size_t fallback_count,
126 {
129  string file = kEmptyStr;
130  if ( !dir.empty() ) {
131  file = CDirEntry::MakePath(dir, "ecnum_" + name, "txt");
133  (CDirEntry::MakePath(dir, "ecnum_" + name, "txt")));
135  }
136  if (lr.Empty()) {
137  if (getenv("NCBI_DEBUG")) {
138  LOG_POST("Reading " + name + " EC number data from built-in table");
139  }
140  while (fallback_count--) {
141  s_ProcessECNumberLine(*fallback++, status);
142  }
143  } else {
144  if (getenv("NCBI_DEBUG")) {
145  LOG_POST("Reading " + name + " EC number data from " + file);
146  }
148  do {
149  s_ProcessECNumberLine(*++*lr, status);
151  } while ( !lr->AtEOF() );
152  }
153  return rval;
154 }
155 
156 
157 static void s_InitializeECNumberMaps(void)
158 {
159  CFastMutexGuard GUARD(s_ECNumberMutex);
161  return;
162  }
163  string dir;
164  const char* env_val = NULL;
165  env_val = getenv("NCBI_ECNUM_USE_DATA_DIR_FIRST");
166  if (env_val != NULL && NStr::EqualNocase(env_val, "TRUE"))
167  {
168  string file = g_FindDataFile("ecnum_specific.txt");
169  if ( !file.empty() ) {
171  }
172  if (dir.empty()) {
173  LOG_POST("s_InitializeECNumberMaps: reading specific EC Numbers from built-in data.");
174  } else {
175  LOG_POST("s_InitializeECNumberMaps: reading specific EC Numbers from " + file);
176  }
177  }
178 #define LOAD_EC(x) s_LoadECNumberTable \
179  (dir, #x, kECNum_##x, sizeof(kECNum_##x) / sizeof(*kECNum_##x), \
180  CProt_ref::eEC_##x)
181  s_ECNumSpecificStatus = LOAD_EC(specific);
182  s_ECNumAmbiguousStatus = LOAD_EC(ambiguous);
183  s_ECNumReplacedStatus = LOAD_EC(replaced);
184  s_ECNumDeletedStatus = LOAD_EC(deleted);
185 #undef LOAD_EC
187 }
188 
189 
191 {
192  if ( !s_ECNumberMapsInitialized ) {
194  }
196  if (it == s_ECNumberStatusMap.end()) {
197  return eEC_unknown;
198  } else {
199  return it->second;
200  }
201 }
202 
203 
204 const string& CProt_ref::GetECNumberReplacement(const string& old_ecno)
205 {
206  if ( !s_ECNumberMapsInitialized ) {
208  }
210  = s_ECNumberReplacementMap.find(old_ecno);
211  if (it != s_ECNumberReplacementMap.end()) {
212  // see if this number has also been replaced
213  auto other_it = s_ECNumberReplacementMap.find(it->second);
214  while (other_it != s_ECNumberReplacementMap.end()) {
215  it = other_it;
216  other_it = s_ECNumberReplacementMap.find(it->second);
217  }
218  return it->second;
219  } else {
220  return kEmptyStr;
221  }
222 
223 }
224 
225 
226 bool CProt_ref::IsECNumberSplit(const string& old_ecno)
227 {
228  if (GetECNumberStatus(old_ecno) != eEC_replaced) {
229  return false;
230  }
231 
232  const string& replacement = GetECNumberReplacement(old_ecno);
233  if (NStr::Find(replacement, "\t") != string::npos) {
234  return true;
235  } else {
236  return false;
237  }
238 }
239 
240 
241 // From the INSDC Feature Table Documentation:
242 // Valid values for EC numbers are defined in the list prepared by the
243 // Nomenclature Committee of the International Union of Biochemistry and
244 // Molecular Biology(NC - IUBMB) (published in Enzyme Nomenclature 1992,
245 // Academic Press, San Diego, or a more recent revision thereof).
246 // The format represents a string of four numbers separated by full
247 // stops; up to three numbers starting from the end of the string can
248 // be replaced by dash "." to indicate uncertain assignment.
249 // Symbol "n" can be used in the last position instead of a number
250 // where the EC number is awaiting assignment.Please note that such
251 // incomplete EC numbers are not approved by NC - IUBMB.
252 //
253 // Examples:
254 // 1.1.2.4
255 // 1.1.2.-
256 // 1.1.2.n
257 bool CProt_ref::IsValidECNumberFormat (const string& ecno)
258 {
259  char ch;
260  bool is_ambig;
261  int numdashes;
262  int numdigits;
263  int numperiods;
264  const char *ptr;
265 
266  if (NStr::IsBlank(ecno)) {
267  return false;
268  }
269 
270  is_ambig = false;
271  numperiods = 0;
272  numdigits = 0;
273  numdashes = 0;
274 
275  ptr = ecno.c_str();
276  ch = *ptr;
277  while (ch != '\0') {
278  if (isdigit(ch)) {
279  numdigits++;
280  if (is_ambig) return false;
281  ptr++;
282  ch = *ptr;
283  } else if (ch == '-') {
284  numdashes++;
285  is_ambig = true;
286  ptr++;
287  ch = *ptr;
288  } else if (ch == 'n') {
289  if (numperiods == 3 && numdigits == 0 && isdigit(*(ptr + 1))) {
290  // allow/ignore n in first position of fourth number to not mean ambiguous, if followed by digit */
291  } else {
292  numdashes++;
293  is_ambig = true;
294  }
295  ptr++;
296  ch = *ptr;
297  } else if (ch == '.') {
298  numperiods++;
299  if (numdigits > 0 && numdashes > 0) return false;
300  if (numdigits == 0 && numdashes == 0) return false;
301  if (numdashes > 1) return false;
302  numdigits = 0;
303  numdashes = 0;
304  ptr++;
305  ch = *ptr;
306  } else {
307  ptr++;
308  ch = *ptr;
309  }
310  }
311 
312  if (numperiods == 3) {
313  if (numdigits > 0 && numdashes > 0) return false;
314  if (numdigits > 0 || numdashes == 1) return true;
315  }
316 
317  return false;
318 }
319 
320 
322 {
323  if (!IsSetEc()) {
324  return;
325  }
326  CProt_ref::TEc::iterator it = SetEc().begin();
327  while (it != SetEc().end()) {
328  if (GetECNumberStatus(*it) == eEC_replaced) {
329  string new_val = GetECNumberReplacement(*it);
330  if (!NStr::IsBlank(new_val)) {
331  *it = new_val;
332  }
333  }
334  it++;
335  }
336 
337 }
338 
339 
341 {
342  AutoFixEC();
343  if (!IsSetEc()) {
344  return;
345  }
346  CProt_ref::TEc::iterator it = SetEc().begin();
347  while (it != SetEc().end()) {
348  EECNumberStatus status = GetECNumberStatus(*it);
349  if (status == eEC_deleted ||
350  status == eEC_unknown ||
351  status == eEC_replaced) {
352  it = SetEc().erase(it);
353  } else {
354  it++;
355  }
356  }
357  if (SetEc().empty()) {
358  ResetEc();
359  }
360 }
361 
362 
363 END_objects_SCOPE // namespace ncbi::objects::
364 
366 
367 /* Original file checksum: lines: 61, chars: 1885, CRC32: 4ba9347a */
static bool s_ECNumberMapsInitialized
Definition: Prot_ref.cpp:79
map< string, CProt_ref::EECNumberStatus, PNocase > TECNumberStatusMap
Definition: Prot_ref.cpp:74
static CProt_ref::EECNumberFileStatus s_ECNumAmbiguousStatus
Definition: Prot_ref.cpp:80
static void s_ProcessECNumberLine(const CTempString &line, CProt_ref::EECNumberStatus status)
Definition: Prot_ref.cpp:97
static CProt_ref::EECNumberFileStatus s_ECNumSpecificStatus
Definition: Prot_ref.cpp:83
static CProt_ref::EECNumberFileStatus s_ECNumDeletedStatus
Definition: Prot_ref.cpp:81
DEFINE_STATIC_FAST_MUTEX(s_ECNumberMutex)
static CProt_ref::EECNumberFileStatus s_ECNumReplacedStatus
Definition: Prot_ref.cpp:82
#define LOAD_EC(x)
map< string, string > TECNumberReplacementMap
Definition: Prot_ref.cpp:75
static CProt_ref::EECNumberFileStatus s_LoadECNumberTable(const string &dir, const string &name, const char *const *fallback, size_t fallback_count, CProt_ref::EECNumberStatus status)
Definition: Prot_ref.cpp:122
static TECNumberReplacementMap s_ECNumberReplacementMap
Definition: Prot_ref.cpp:78
static void s_InitializeECNumberMaps(void)
Definition: Prot_ref.cpp:157
static TECNumberStatusMap s_ECNumberStatusMap
Definition: Prot_ref.cpp:77
CDirEntry –.
Definition: ncbifile.hpp:262
static EECNumberFileStatus GetECNumAmbiguousStatus()
Definition: Prot_ref.cpp:85
static EECNumberFileStatus GetECNumSpecificStatus()
Definition: Prot_ref.cpp:88
void AutoFixEC()
Definition: Prot_ref.cpp:321
void RemoveBadEC()
Definition: Prot_ref.cpp:340
EECNumberFileStatus
Enzyme Commission file status.
Definition: Prot_ref.hpp:72
@ eECFile_not_found
File was not found in expected directory.
Definition: Prot_ref.hpp:74
@ eECFile_not_attempted
No attempt has been made to read the file.
Definition: Prot_ref.hpp:73
@ eECFile_not_read
File was found but could not be read.
Definition: Prot_ref.hpp:75
@ eECFile_read
File was read successfully (and is being instead of the compiled fallback data.
Definition: Prot_ref.hpp:76
void GetLabel(string *label) const
Definition: Prot_ref.cpp:62
static bool IsECNumberSplit(const string &old_ecno)
Definition: Prot_ref.cpp:226
~CProt_ref(void)
Definition: Prot_ref.cpp:57
static bool IsValidECNumberFormat(const string &ecno)
Verify correct form of EC number.
Definition: Prot_ref.cpp:257
static const string & GetECNumberReplacement(const string &old_ecno)
Return a replaced EC number's replacement.
Definition: Prot_ref.cpp:204
static EECNumberFileStatus GetECNumDeletedStatus()
Definition: Prot_ref.cpp:86
EECNumberStatus
Enzyme Commission number status.
Definition: Prot_ref.hpp:63
@ eEC_replaced
Obsolete synonym for some other EC number.
Definition: Prot_ref.hpp:66
@ eEC_unknown
Unrecognized; possibly malformed.
Definition: Prot_ref.hpp:68
@ eEC_deleted
Withdrawn, with no (single?) replacement.
Definition: Prot_ref.hpp:67
static EECNumberFileStatus GetECNumReplacedStatus()
Definition: Prot_ref.cpp:87
static EECNumberStatus GetECNumberStatus(const string &ecno)
Determine an EC number's validity and specificity.
Definition: Prot_ref.cpp:190
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
#define NULL
Definition: ncbistd.hpp:225
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
Definition: ncbifile.cpp:413
static string AddTrailingPathSeparator(const string &path)
Add trailing path separator, if needed.
Definition: ncbifile.cpp:455
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
Definition: line_reader.cpp:49
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2887
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
Definition: tempstr.hpp:776
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5352
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
Definition: tempstr.hpp:655
static const char label[]
bool IsSetDesc(void) const
description (instead of name) Check if a value has been assigned to Desc data member.
Definition: Prot_ref_.hpp:391
const TDb & GetDb(void) const
Get the Db member data.
Definition: Prot_ref_.hpp:500
TEc & SetEc(void)
Assign a value to Ec data member.
Definition: Prot_ref_.hpp:456
const TName & GetName(void) const
Get the Name member data.
Definition: Prot_ref_.hpp:378
bool IsSetDb(void) const
ids in other dbases Check if a value has been assigned to Db data member.
Definition: Prot_ref_.hpp:488
bool IsSetEc(void) const
E.C.
Definition: Prot_ref_.hpp:438
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
Definition: Prot_ref_.hpp:366
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Prot_ref_.hpp:403
void ResetEc(void)
Reset Ec data member.
Definition: Prot_ref_.cpp:76
Definition of all error codes used in objects libraries.
FILE * file
Lightweight interface for getting lines of data with minimal memory copying.
constexpr bool empty(list< Ts... >) noexcept
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
Multi-threading – mutexes; rw-locks; semaphore.
string g_FindDataFile(const CTempString &name, CDirEntry::EType type=CDirEntry::eFile)
Look for an NCBI application data file or directory of the given name and type; in general,...
Definition: util_misc.cpp:139
Modified on Wed Nov 29 02:15:51 2023 by modify_doxy.py rev. 669887