NCBI C++ ToolKit
njn_stringutil.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: njn_stringutil.cpp 62325 2014-04-01 19:20:49Z boratyng $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25 
26 /*****************************************************************************
27 
28 File name: njn_stringutil.cpp
29 
30 Author: John Spouge
31 
32 Contents:
33 
34 ******************************************************************************/
35 
36 #include <ncbi_pch.hpp>
37 
38 #include "njn_stringutil.hpp"
39 
40 #include <algorithm>
41 #include <string.h>
42 
44 USING_SCOPE(blast);
46 
47 
48 bool StringUtil::isAlpha (const char *symbols_) // Is every symbol in symbols_ alphabetic ?
49 {
50  const char *c = 0;
51  for (c = symbols_; *c; c++) {
52  if (! isalpha (*c)) return false;
53  }
54  assert (! *c); // should be at the end of the line
55 
56  return true;
57 }
58 
59 bool StringUtil::isNoWhiteSpace (const char *symbols_) // Is there no white space ?
60 {
61  const char *c = 0;
62 
63  for (c = symbols_; *c; c++) { // find next whitespace
64  if (isspace (*c)) return false;
65  }
66  assert (! *c); // should be at the end of the line
67 
68  return true;
69 }
70 
71 bool StringUtil::isAllWhiteSpace (const std::string &symbols_) // Is everything white space ?
72 {
73  for (string::const_iterator c = symbols_.begin (); c != symbols_.end (); c++) { // find next whitespace
74  if (! isspace (*c)) return false;
75  }
76 
77  return true;
78 }
79 
80 bool StringUtil::isLower (const char *symbols_) // Is every symbol in symbols_ lower-case ?
81 {
82  const char *c = 0;
83  for (c = symbols_; *c; c++) {
84  if (! islower (*c)) return false;
85  }
86  assert (! *c); // should be at the end of the line
87 
88  return true;
89 }
90 
91 bool StringUtil::isUpper (const char *symbols_) // Is every symbol in symbols_ upper-case ?
92 {
93  const char *c = 0;
94  for (c = symbols_; *c; c++) {
95  if (! isupper (*c)) return false;
96  }
97  assert (! *c); // should be at the end of the line
98 
99  return true;
100 }
101 
102 void StringUtil::toLower (char *symbols_)
103 {
104  for (char *i = symbols_; *i; i++) *i = tolower (*i);
105 }
106 
107 void StringUtil::toUpper (char *symbols_)
108 {
109  for (char *i = symbols_; *i; i++) *i = toupper (*i);
110 }
111 
112 bool StringUtil::isReplicate ( // Are some of the symbols_ replicated ?
113 const std::string &symbols_,
114 bool upperEqualsLower_) // Does case matter?, i.e., a == A ?
115 {
116  string symbols (symbols_);
117  if (upperEqualsLower_) {
118  for (string::iterator i = symbols.begin (); i != symbols.end (); i++) {
119  *i = toupper (*i);
120  }
121  }
122  sort <string::iterator> (symbols.begin (), symbols.end ());
123  return unique <string::iterator> (symbols.begin (), symbols.end ()) != symbols.end ();
124 }
125 
127 // returns a char * symbols_ without initial white-space
128 {
129  char *i = 0;
130  char *j = 0;
131 
132 
133  for (i = symbols_; *i && isspace (*i); i++) ;
134 
135  if (i == symbols_) return;
136 
137  for (j = symbols_; *i; i++, j++)
138  {
139  *j = *i; // copy and skip white space
140  }
141 
142  *j = '\0';
143 }
144 
145 void StringUtil::eraseInitialWhiteSpace (std::string &symbols_) // erases initial white-space
146 {
147  char *str = new char [symbols_.size () + 1];
148 
149  strcpy (str, symbols_.c_str ());
151  symbols_ = str;
152 
153  delete [] str; str = 0;
154 }
155 
156 void StringUtil::eraseFinalWhiteSpace (char *symbols_)
157 // returns a char * symbols_ without initial white-space
158 {
159  reverse (symbols_, symbols_ + strlen (symbols_));
160  eraseInitialWhiteSpace (symbols_);
161  reverse (symbols_, symbols_ + strlen (symbols_));
162 }
163 
164 void StringUtil::eraseFinalWhiteSpace (std::string &symbols_) // erases initial white-space
165 {
166  char *str = new char [symbols_.size () + 1];
167 
168  strcpy (str, symbols_.c_str ());
170  symbols_ = str;
171 
172  delete [] str; str = 0;
173 }
174 
175 void StringUtil::eraseWhiteSpace (char *symbols_, bool eraseBlankOnly_)
176 // returns a char * symbols_ without white-space
177 {
178  char *i = 0;
179  char *j = 0;
180 
181  for (i = j = symbols_; *i; i++) {
182  if (!isspace(*i) || (*i != ' ' && eraseBlankOnly_)) { // copy unless white space
183  *j = *i;
184  j++;
185  }
186  }
187  *j = '\0';
188 }
189 
190 void StringUtil::eraseWhiteSpace (std::string &symbols_, bool eraseBlankOnly_) // erases white-space
191 {
192  char *str = new char [symbols_.size () + 1];
193 
194  strcpy (str, symbols_.c_str ());
195  eraseWhiteSpace (str, eraseBlankOnly_);
196  symbols_ = str;
197 
198  delete [] str; str = 0;
199 }
200 
201 void StringUtil::eraseInitialChar (char *symbols_, const char *c_) // removes initial char's c_
202 {
203  char *i = 0;
204  char *j = 0;
205  const char *s = 0;
206 
207  for (i = symbols_; *i; i++)
208  {
209  bool found = false;
210 
211  for (s = c_; *s; s++)
212  {
213  if (*i == *s)
214  {
215  found = true;
216  break;
217  }
218  }
219 
220  if (! found) break;
221  }
222 
223  if (i == symbols_) return;
224 
225  for (j = symbols_; *i; i++, j++)
226  {
227  *j = *i; // copy and skip white space
228  }
229 
230  *j = '\0';
231 }
232 
233 void StringUtil::eraseInitialChar (std::string &symbols_, const std::string &c_) // removes initial char's c_
234 {
235  char *str = new char [symbols_.size () + 1];
236 
237  strcpy (str, symbols_.c_str ());
238  eraseInitialChar (str, c_.c_str ());
239  symbols_ = str;
240 
241  delete [] str; str = 0;
242 }
243 
244 void StringUtil::eraseFinalChar (char *symbols_, const char *c_) // removes final char's c_
245 {
246  reverse (symbols_, symbols_ + strlen (symbols_));
247  eraseInitialChar (symbols_, c_);
248  reverse (symbols_, symbols_ + strlen (symbols_));
249 }
250 
251 void StringUtil::eraseFinalChar (std::string &symbols_, const std::string &c_) // removes final char's c_
252 {
253  char *str = new char [symbols_.size () + 1];
254 
255  strcpy (str, symbols_.c_str ());
256  eraseFinalChar (str, c_.c_str ());
257  symbols_ = str;
258 
259  delete [] str; str = 0;
260 }
261 
262 void StringUtil::eraseChar (char *symbols_, const char *c_) // removes char's c_
263 {
264  char *i = 0;
265  char *j = 0;
266  const char *s = 0;
267 
268  for (i = j = symbols_; *i; i++)
269  {
270  bool found = false;
271 
272  for (s = c_; *s; s++)
273  {
274  if (*i == *s)
275  {
276  found = true;
277  break;
278  }
279  }
280 
281  if (! found)
282  {
283  *j = *i; // copy
284  j++;
285  }
286  }
287  *j = '\0';
288 }
289 
290 void StringUtil::eraseChar (std::string &symbols_, const std::string &c_) // removes char's c_
291 {
292  char *str = new char [symbols_.size () + 1];
293 
294  strcpy (str, symbols_.c_str ());
295  eraseChar (str, c_.c_str ());
296  symbols_ = str;
297 
298  delete [] str; str = 0;
299 }
300 
301 void StringUtil::substituteChar (char *symbols_, char cOut_, const char cIn_) // replaces all occurrences of cIn_ with cOut_
302 {
303  char *i = 0;
304 
305  for (i = symbols_; *i; i++)
306  {
307  if (*i == cIn_) *i = cOut_;
308  }
309 }
310 
311 void StringUtil::substituteChar (std::string &symbols_, char cOut_, char cIn_) // replaces all occurrences of cIn_ with cOut_
312 {
313  char *str = new char [symbols_.size () + 1];
314 
315  strcpy (str, symbols_.c_str ());
316  substituteChar (str, cOut_, cIn_);
317  symbols_ = str;
318 
319  delete [] str; str = 0;
320 }
321 
322 void StringUtil::eraseCarriageReturn (char *symbols_) // erases white-space
323 {
324  // Erase the first carriage return character '\r',
325  // to avoid problems when transferring files between DOS and UNIX.
326  if (! symbols_ || ! *symbols_) return;
327  if (symbols_ [strlen (symbols_) - 1] == '\r') symbols_ [strlen (symbols_) - 1] = '\0';
328 }
329 
330 void StringUtil::eraseCarriageReturn (std::string &symbols_) // erases white-space
331 {
332  // Erase the first carriage return character '\r',
333  // to avoid problems when transferring files between DOS and UNIX.
334  if (symbols_.empty ()) return;
335  if (*symbols_.rbegin () == '\r') symbols_.erase (symbols_.size () - 1);
336 }
337 
338 void StringUtil::whiteSpace2UnderScore (char *symbols_) // puts single underscore between words
339 {
340  assert (strlen (symbols_) != 0);
341 
342  stringstream sstr;
343 
344  sstr.str (symbols_);
345  sstr.clear ();
346 
347  sstr >> skipws;
348 
349  string word;
350 
351  sstr >> word;
352 
353  if (sstr.fail ())
354  {
355  symbols_ [0] = '\0';
356  return;
357  }
358 
359  string str = word;
360 
361  while (sstr >> word)
362  {
363  str += string ("_") + word;
364  }
365 
366  strcpy (symbols_, str.c_str ());
367 }
368 
369 void StringUtil::whiteSpace2UnderScore (std::string &symbols_) // puts single underscore between words
370 {
371  char *str = new char [symbols_.size () + 1];
372 
373  strcpy (str, symbols_.c_str ());
375  symbols_ = str;
376 
377  delete [] str; str = 0;
378 }
379 
380 void StringUtil::split (
381 std::vector <std::string> *strVec_, // vector of pieces
382 const std::string &str_, // input string (can be part of strVec_)
383 const std::string &split_) // split points
384 {
385  assert (strVec_);
386  assert (split_.length () != 0);
387 
388  string str (str_);
389 
390  strVec_->clear ();
391  string::size_type pos0 = 0;
392 
393  for (string::size_type pos = str.find (split_ [0]); pos != string::npos && pos != str.length (); )
394  {
395  if (str.substr (pos, min (split_.length (), str.length () - pos)) != split_) {
396 
397  pos++;
398  continue;
399  }
400 
401  strVec_->push_back (pos == pos0 ? string ("") : str.substr (pos0, pos - pos0));
402  pos += split_.length ();
403  pos0 = pos;
404  }
405 
406  strVec_->push_back (str.length () == pos0 ? string ("") : str.substr (pos0, str.length () - pos0));
407 }
408 
410 const std::string &str_, // input string (can be part of strVec_)
411 const std::string &split_) // split points
412 {
413  std::vector <std::string> strVec;
414 
415  split (&strVec, str_, split_);
416 
417  return strVec.size ();
418 }
string
Definition: cgiapp.hpp:687
int i
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isspace(Uchar c)
Definition: ncbictype.hpp:69
int tolower(Uchar c)
Definition: ncbictype.hpp:72
int toupper(Uchar c)
Definition: ncbictype.hpp:73
int islower(Uchar c)
Definition: ncbictype.hpp:66
int isupper(Uchar c)
Definition: ncbictype.hpp:70
T min(T x_, T y_)
USING_SCOPE(blast)
USING_NCBI_SCOPE
void toUpper(char *symbols_)
bool isAllWhiteSpace(const std::string &symbols_)
bool isNoWhiteSpace(const char *symbols_)
void eraseFinalWhiteSpace(char *symbols_)
void eraseCarriageReturn(char *symbols_)
void eraseFinalChar(char *symbols_, const char *c_)
size_t splitCount(const std::string &str_, const std::string &split_)
void eraseInitialWhiteSpace(char *symbols_)
void split(std::vector< std::string > *strVec, const std::string &str_, const std::string &split_)
bool isAlpha(const char *symbols_)
void eraseInitialChar(char *symbols_, const char *c_)
bool isUpper(const char *symbols_)
void substituteChar(char *symbols_, const char cOut_, const char cIn_)
void whiteSpace2UnderScore(char *symbols_)
void eraseWhiteSpace(char *symbols_, bool eraseBlankOnly_=false)
void toLower(char *symbols_)
bool isLower(const char *symbols_)
void eraseChar(char *symbols_, const char *c_)
bool isReplicate(const std::string &symbols_, bool upperEqualsLower_=false)
#define assert(x)
Definition: srv_diag.hpp:58
static const char * str(char *buf, int n)
Definition: stats.c:84
Modified on Wed Nov 29 02:17:58 2023 by modify_doxy.py rev. 669887