NCBI C++ ToolKit
sgml_entity.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: sgml_entity.cpp 81721 2018-03-28 12:46:32Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mati Shomrat
27  *
28  * File Description:
29  * Functions to Convert SGML to ASCII for Backbone subset SGML
30  */
31 #include <ncbi_pch.hpp>
32 #include <util/sgml_entity.hpp>
33 #include <util/static_map.hpp>
34 
36 
37 // mapping from SGML to ASCII
38 
40 static const TSgmlAsciiPair sc_sgml_entity[] = {
41  { "Agr" , "Alpha" },
42  { "Bgr" , "Beta" },
43  { "Dgr" , "Delta" },
44  { "EEgr", "Eta" },
45  { "Egr" , "Epsilon" },
46  { "Ggr" , "Gamma" },
47  { "Igr" , "Iota" },
48  { "KHgr", "Chi" },
49  { "Kgr" , "Kappa" },
50  { "Lgr" , "Lambda" },
51  { "Mgr" , "Mu" },
52  { "Ngr" , "Nu" },
53  { "OHgr", "Omega" },
54  { "Ogr" , "Omicron" },
55  { "PHgr", "Phi" },
56  { "PSgr", "Psi" },
57  { "Pgr" , "Pi" },
58  { "Rgr" , "Rho" },
59  { "Sgr" , "Sigma" },
60  { "THgr", "Theta" },
61  { "Tgr" , "Tau" },
62  { "Ugr" , "Upsilon" },
63  { "Xgr" , "Xi" },
64  { "Zgr" , "Zeta" },
65  { "agr" , "alpha" },
66  { "amp" , "&" },
67  { "bgr" , "beta" },
68  { "dgr" , "delta" },
69  { "eegr", "eta" },
70  { "egr" , "epsilon" },
71  { "ggr" , "gamma" },
72  { "gt" , ">" },
73  { "igr" , "iota" },
74  { "kgr" , "kappa" },
75  { "khgr", "chi" },
76  { "lgr" , "lambda" },
77  { "lt" , "<" },
78  { "mgr" , "mu" },
79  { "ngr" , "nu" },
80  { "ogr" , "omicron" },
81  { "ohgr", "omega" },
82  { "pgr" , "pi" },
83  { "phgr", "phi" },
84  { "psgr", "psi" },
85  { "rgr" , "rho" },
86  { "sfgr", "s" },
87  { "sgr" , "sigma" },
88  { "tgr" , "tau" },
89  { "thgr", "theta" },
90  { "ugr" , "upsilon" },
91  { "xgr" , "xi" },
92  { "zgr" , "zeta" }
93 };
94 
97 
98 
99 // in place conversion from SGML to ASCII
100 // we replace "&SGML entity; -> "<ASCII>"
101 void Sgml2Ascii(string& sgml)
102 {
103  SIZE_TYPE amp = sgml.find('&');
104 
105  while (amp != NPOS) {
106  SIZE_TYPE semi = sgml.find(';', amp);
107  if (semi != NPOS) {
108  size_t old_len = semi - amp - 1;
109  string ts = sgml.substr(amp + 1, old_len);
110  TSgmlAsciiMap::const_iterator it = sc_SgmlAsciiMap.find(ts.c_str());
111  if (it != sc_SgmlAsciiMap.end()) {
112  size_t new_len = strlen(it->second);
113  sgml[amp] = '<';
114  sgml[semi] = '>';
115  sgml.replace(amp + 1, old_len, it->second);
116  semi = amp + 1 + new_len;
117  }
118  else {
119  semi = amp;
120  }
121  }
122  else {
123  semi = amp;
124  }
125  amp = sgml.find('&', semi + 1);
126  }
127 }
128 
129 
130 // conversion of SGML to ASCII
131 string Sgml2Ascii(const string& sgml)
132 {
133  string result = sgml;
135  return result;
136 }
137 
138 
139 //detecting SGML in string
140 bool ContainsSgml(const string& str)
141 {
142  bool found = false;
143  size_t pos = NStr::Find(str, "&");
144  while (pos != string::npos && !found) {
145  size_t len = 0;
146  const char *end = str.c_str() + pos + 1;
147  while (*end != 0 && isalpha (*end)) {
148  len++;
149  end++;
150  }
151  if (*end == ';' && len > 1) {
152  string match = str.substr(pos + 1, len);
153 
154  TSgmlAsciiMap::const_iterator it = sc_SgmlAsciiMap.begin();
155  while (it != sc_SgmlAsciiMap.end() && !found) {
156  if (NStr::StartsWith(match, it->first)) {
157  found = true;
158  }
159  ++it;
160  }
161  }
162  if (*end == 0) {
163  pos = string::npos;
164  } else if (!found) {
165  pos = NStr::Find(str, "&", pos + len + 1);
166  }
167  }
168  return found;
169 }
170 
171 
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
Definition: static_map.hpp:105
TBase::const_iterator const_iterator
Definition: static_map.hpp:109
static const char * str(char *buf, int n)
Definition: stats.c:84
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
int len
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
Definition: pcre_exec.c:513
bool ContainsSgml(const string &str)
DEFINE_STATIC_ARRAY_MAP(TSgmlAsciiMap, sc_SgmlAsciiMap, sc_sgml_entity)
SStaticPair< const char *, const char * > TSgmlAsciiPair
Definition: sgml_entity.cpp:39
void Sgml2Ascii(string &sgml)
Convert SGML entity to ASCII (in place)
CStaticPairArrayMap< const char *, const char *, PCase_CStr > TSgmlAsciiMap
Definition: sgml_entity.cpp:95
static const TSgmlAsciiPair sc_sgml_entity[]
Definition: sgml_entity.cpp:40
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
else result
Definition: token2.c:20
Modified on Sun Apr 28 04:46:55 2024 by modify_doxy.py rev. 669887