NCBI C++ ToolKit
convert_dates_iso8601.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: convert_dates_iso8601.cpp 102536 2024-05-28 11:19:24Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Alex Kotliarov
27  *
28  * File Description:
29  *
30  * File Description:
31  * Convert dates from an arbitrary format to corresponding ISO 8601.
32  * Match a date string against set of regular expressions
33  * to determine whether the string contains date that can be transformed
34  * into ISO date.
35  *
36  * Copied and adapted from gpipe/common/read_date_iso8601.[ch]pp
37  *
38  */
39 
40 #include <ncbi_pch.hpp>
41 #include <sstream>
42 #include <memory>
43 #include <util/static_map.hpp>
44 #include <util/xregexp/regexp.hpp>
46 
47 
49 
50 
51 // Type definitions
52 
53 typedef string (* TFun_transform)(string const&);
54 typedef pair<string, string> (* TFun_transform_other)(string const&);
55 
56 static const char* kTransform_code_iso8601 = "ISO-8601";
57 static const char* transfrom_code_range_iso8601 = "RANGE|ISO-8601";
58 static const char* kTransform_code_cast_na = "CAST|NA";
59 static const char* kTransform_code_cast_iso8601 = "CAST|ISO-8601";
60 static const char* kTransform_code_range_cast_iso8601 = "RANGE|CAST|ISO-8601";
61 static const char* kTransform_code_no_date = "NODATE";
62 static const char* kTransform_code_cast_ambig = "CAST|YYYY"; // cast ambig date to YYYY
63 
64 
65 // Forward declarations and type definitions
66 
67 class TParse_rule;
68 
69 static pair<string, string> extract_date_iso8601(string const& value,
70  vector<TParse_rule> const& rules,
71  vector<TFun_transform_other> const& range_rules,
72  TFun_transform_other ambig_rule);
73 
74 static vector<TParse_rule> const& get_date_rule_collection();
75 static vector<TFun_transform_other> const& get_date_range_rule_collection();
77 
78 // Collection of functions that transform to ISO 8601
79 static string transform_identity(string const& value);
80 static string transform_missing(string const& value);
81 static string transform_YYYY_mm_DD(string const& value);
82 static string transform_mm_DD_YYYY(string const& value);
83 static string transform_DD_mm_YYYY(string const& value);
84 static string transform_DD_month_YYYY(string const& value);
85 static string transform_DD_month_comma_YYYY(string const& value);
86 static string transform_month_DD_YYYY(string const& value);
87 static string transform_month_YYYY(string const& value);
88 static string transform_YYYY_month(string const& value);
89 static string transform_MM_YYYY(string const& value);
90 static string transform_YYYY_MM(string const& value);
91 static string transform_range_decade(string const& value);
92 static string transform_range_before(string const& value);
93 
94 static pair<string, string> transform_ambiguous_date(string const& value);
95 static pair<string, string> transform_range(string const& value);
96 
97 
98 
99 //////////////////////////////////////////////////////////////////////////////
100 //
101 // API
102 //
103 
104 string ConvertDateTo_iso8601(string const& value)
105 {
106  pair<string, string> result =
111  return result.second;
112 }
113 
114 pair<string, string> ConvertDateTo_iso8601_and_annotate(string const& value)
115 {
120 }
121 
122 
123 //////////////////////////////////////////////////////////////////////////////
124 //
125 // Implementation
126 //
127 
129 {
130 public:
131  TParse_rule(string const& tag,
132  string const& regex,
134  : m_Tag(tag),
136  m_Regexp_s(regex),
137  m_Regexp(new CRegexp(regex))
138  {
139  }
140 
142  : m_Tag(rhs.m_Tag),
144  m_Regexp_s(rhs.m_Regexp_s),
145  m_Regexp( new CRegexp(rhs.m_Regexp_s) )
146  {
147  }
148 
150  {
151  TParse_rule temp(other);
152  Swap(*this, temp);
153  return *this;
154  }
155 
156  string const& GetTag() const { return m_Tag; }
157  string MakeTransform(string const& value) const { return m_Transform(value); }
158  CRegexp& GetRegexp() const { return *m_Regexp; }
159  string const& GetRegexpStr() const { return m_Regexp_s; }
160 
161 private:
163 
164  void Swap(TParse_rule& lhs, TParse_rule& rhs)
165  {
166  using std::swap;
167  swap(lhs.m_Tag, rhs.m_Tag);
168  swap(lhs.m_Transform, rhs.m_Transform);
169  swap(lhs.m_Regexp_s, rhs.m_Regexp_s);
170 
171  shared_ptr<CRegexp> temp(lhs.m_Regexp);
172  lhs.m_Regexp = rhs.m_Regexp;
173  rhs.m_Regexp = temp;
174  }
175 
176  string m_Tag;
178  string m_Regexp_s;
179  shared_ptr<CRegexp> m_Regexp;
180 };
181 
182 
184 {
185 public:
186  enum EErrCode {
187  eAmbigDate
188  };
189 
190  virtual const char* GetErrCodeString(void) const override
191  {
192  switch( GetErrCode() ) {
193  case eAmbigDate:
194  return "eAmbiguousDate";
195  default:
197  }
198  }
199 
201 };
202 
203 
204 pair<string, string> extract_date_iso8601(string const& value,
205  vector<TParse_rule> const & rules,
206  vector<TFun_transform_other> const& range_rules,
207  TFun_transform_other transform_ambiguous_date_fun
208  )
209 {
210  try {
211  for ( auto rule = rules.begin(); rule != rules.end(); ++rule ) {
212  CRegexp& re = rule->GetRegexp();
213  if ( re.IsMatch(value) ) {
214  re.GetMatch(value, 0, 0, CRegexp::fMatch_default, true);
215  string match = re.GetSub(value, 1);
216  return make_pair(rule->GetTag(), rule->MakeTransform(match));
217  }
218  }
219  // Try to match 'range' expressions
220  for ( auto transform = range_rules.begin(); transform != range_rules.end(); ++transform )
221  {
222  pair<string, string> result = (* transform)(value);
223  if ( !result.second.empty() ) {
224  return result;
225  }
226  }
227  }
228  catch ( CAmbiguousDateException& ) {
229  // Try to salvage a year
230  return transform_ambiguous_date_fun(value);
231  }
232 
233  // Unable to extract ISO date; record a miss.
234  return make_pair(kTransform_code_no_date, "");
235 }
236 
237 
238 const char* get_month_code_by_name(string const& month_name)
239 {
240  static const SStaticPair<const char*, const char*> s_month_lookup_table[] =
241  {
242  { "apr", "04" },
243  { "april", "04" },
244  { "aug", "08" },
245  { "august", "08" },
246  { "dec", "12" },
247  { "december", "12" },
248  { "feb", "02" },
249  { "february", "02" },
250  { "jan", "01" },
251  { "january", "01" },
252  { "jul", "07" },
253  { "july", "07" },
254  { "jun", "06" },
255  { "june", "06" },
256  { "mar", "03" },
257  { "march", "03" },
258  { "may", "05" },
259  { "nov", "11" },
260  { "november", "11" },
261  { "oct", "10" },
262  { "october", "10" },
263  { "sep", "09" },
264  { "september", "09" },
265  };
267  DEFINE_STATIC_ARRAY_MAP(TMonthCodeByName, s_MonthLookupTable, s_month_lookup_table);
268 
269  auto it = s_MonthLookupTable.find(month_name.c_str());
270  if ( it == s_MonthLookupTable.end() ) {
271  NCBI_THROW(CException, eUnknown, "Bad month name value '" + month_name + "'");
272  }
273  return it->second;
274 
275 }
276 
277 
278 vector<TParse_rule> const& get_date_rule_collection()
279 {
280  struct TRules {
281  char const* annot_tag;
282  char const* regexp;
284  }
285  rules_table[] =
286  {
288  "^((?:1\\d{3}|2\\d{3}))$",
289  transform_identity}, // YYYY - 1xxx, 2xxx
290 
292  "(?i)^([a-z]+(?:\\s[a-z]+)*)$",
293  transform_missing}, // not determined | unknown | ..
294 
296  "(?i)^((?:na|n[.]a[.]|n/a))$",
298 
300  "^([123]\\d{3}\\-(?:[0][1-9]|[1][012])\\-(?:[0][1-9]|[12][0-9]|[3][01])(?:T(?:[01][0-9]|2[0123])(?:[:][0-5][0-9]){1,2})Z)$",
302 
304  "^([123]\\d{3}\\-(?:[0][1-9]|[1][012])\\-(?:[0][1-9]|[12][0-9]|[3][01]))(?:[T ](?:[01][0-9]|2[0123])(?:[:][0-5][0-9]){1,2})?$",
306 
308  "^([123]\\d{3}\\-(?:[0][1-9]|[1][012]))$",
310 
312  "^([123]\\d{3}/(?:0?[1-9]|[1][012])/(?:0?[1-9]|[12][0-9]|[3][01]))$",
314 
316  "^([123]\\d{3}\\-(?:0?[1-9]|[1][012])\\-(?:0?[1-9]|[12][0-9]|[3][01]))$",
318 
319  // unambiguous dates: day >= 13
321  "(?i)^((?:[1][3-9]|[2][0-9]|[3][012])([-./])(?:0?[1-9]|[1][012])\\2(?:[123]\\d{3}|\\d{2}))(?: (?:0[1-9]|1[012])(?:[:][0-5][0-9]){1,2}(?:[ ]?[AP]M|[ ]?[AP][.]M[.]))?$",
322  transform_DD_mm_YYYY}, // DD-mm-YYYY
323 
324  // date would be ambiguous iff day != month and day <= 12
326  "(?i)^((?:0?[1-9]|[1][012])([-/.])(?:0?[1-9]|[12][0-9]|[3][01])\\2(?:[123]\\d{3}|\\d{2}))(?: (?:0[1-9]|1[012])(?:[:][0-5][0-9]){1,2}(?:[ ]?[AP]M|[ ]?[AP][.]M[.]))?$",
327  transform_mm_DD_YYYY}, // mm/DD/YYYY
328 
330  "(?i)^((?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\\s(?:0?[1-9]|[12][0-9]|[3][01]),?[ ](?:[123]\\d{3}|\\d{2}))$",
331  transform_month_DD_YYYY}, // DD-Month-YYYY
332 
334  "(?i)^((?:0?[1-9]|[12][0-9]|[3][01])([- ])(?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\\2(?:[123]\\d{3}|\\d{2}))$",
335  transform_DD_month_YYYY}, // DD-Month-YYYY
336 
338  "(?i)^((?:0?[1-9]|[12][0-9]|[3][01])[ ](?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?),[ ](?:[123]\\d{3}|\\d{2}))$",
339  transform_DD_month_comma_YYYY}, // DD-Month-YYYY
340 
342  "(?i)^((?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)[-./ ](?:[123]\\d{3}|\\d{2}))$",
343  transform_month_YYYY}, // Month-YY(YY)?
344 
346  "(?i)^((?:[12]\\d{3}|\\d{2})[-./ ](?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?))$",
347  transform_YYYY_month}, // YY(YY)?-Month
348 
350  "^((?:19\\d{2}|2\\d{3})[-/. ](?:0?[1-9]|1[012]))$",
352  },
353 
355  "^((?:0?[1-9]|1[012])[-/. ](?:19\\d{2}|2\\d{3}))$",
357  },
358 
359  {transfrom_code_range_iso8601, // YYYY-MM-DD/YYYY-MM-DD
360  "^((?:19\\d{2}|2\\d{3})\\-(?:0[1-9]|1[012])\\-(?:[0][1-9]|[12][0-9]|[3][01])\\/(?:19\\d{2}|2\\d{3})\\-(?:0[1-9]|1[012])\\-(?:[0][1-9]|[12][0-9]|[3][01]))$",
362 
363  {transfrom_code_range_iso8601, // YYYY-MM/YYYY-MM
364  "^((?:19\\d{2}|2\\d{3})\\-(?:0[1-9]|1[012])\\/(?:19\\d{2}|2\\d{3})\\-(?:0[1-9]|1[012]))$",
366 
367  {transfrom_code_range_iso8601, // YYYY/YYYY
368  "^((?:19\\d{2}|2\\d{3})\\/(?:19\\d{2}|2\\d{3}))$",
370 
372  "^((?:19[0-9]0|2\\d{2}0))s$",
374 
376  "^.*?(?<=before[ ])((?:19\\d{2}|2\\d{3}))$",
378 
380  "^.*?(?<=pre[-])((?:19\\d{2}|2\\d{3}))$",
382 
383  {0, 0, 0}
384  };
385 
386  static CSafeStatic< vector<TParse_rule> > parse_rules;
387 
388  if (parse_rules->empty()) {
389  for (struct TRules* entry = &rules_table[0]; entry->annot_tag != 0; ++entry ) {
390  parse_rules->push_back( TParse_rule(entry->annot_tag, entry->regexp, entry->transform) );
391  }
392  }
393  return parse_rules.Get();
394 }
395 
396 
397 vector<TFun_transform_other> const& get_date_range_rule_collection()
398 {
401  0
402  };
403  static CSafeStatic< vector<TFun_transform_other> > range_rules;
404 
405  if (range_rules->empty()) {
406  for (TFun_transform_other* entry = &table[0]; *entry != 0; ++entry) {
407  range_rules->push_back(*entry);
408  }
409  }
410  return range_rules.Get();
411 }
412 
413 
415 {
417 }
418 
419 pair<string, string> transform_ambiguous_date(string const& value)
420 {
421  // Ambiguous date come in following formats:
422  // MM/DD/YYYY and DD/MM/YYYY
423  //
424  // A date is ambiguous if DD < 13 and DD != MM
425  // In this case we extract just YYYY
426  static CRegexp re("^(?:0?[1-9]|1[012])([-.\\/])(?:0?[1-9]|[12][0-9]|3[01])\\1((?:19\\d{2}|2\\d{3}|\\d{2}))$");
427 
428  if ( re.IsMatch(value) ) {
429  string match= re.GetSub(value, 2);
430  int year = NStr::StringToNumeric<int>(match);
431  if ( year < 100 ) {
432  year = 1900 + ((year > 70) ? year : year + 100);
433  }
434  return make_pair(string(kTransform_code_cast_ambig), NStr::NumericToString<int>(year));
435  }
436  else {
437  return make_pair(kTransform_code_no_date, "");
438  }
439 }
440 
441 pair<string, string> transform_range(string const& value)
442 {
443  CRegexp re("(?i)(?:between(.+?)and(.+?)|^(.+?)\\/(.+?))$");
444 
445  if ( re.IsMatch(value) ) {
446  string lhs;
447  string rhs;
448 
449  if ( !re.GetSub(value, 1).empty() ) {
450  lhs = re.GetSub(value, 1);
451  rhs = re.GetSub(value, 2);
452  }
453  else {
454  lhs = re.GetSub(value, 3);
455  rhs = re.GetSub(value, 4);
456  }
457 
458  lhs = NStr::TruncateSpaces(lhs);
459  rhs = NStr::TruncateSpaces(rhs);
460 
461  vector<TParse_rule> const & rules = get_date_rule_collection();
462 
463  for ( vector<TParse_rule>::const_iterator rule = rules.begin(); rule != rules.end(); ++rule ) {
464  // skip rules that extract ranges
465  if ( rule->GetTag().find("RANGE") == 0 ) {
466  continue;
467  }
468 
469  CRegexp& re_rule = rule->GetRegexp();
470  if ( re_rule.IsMatch(lhs) ) {
471  re_rule.GetMatch(lhs, 0, 0, CRegexp::fMatch_default, true);
472  string match_lhs = re_rule.GetSub(lhs, 1);
473  if ( re_rule.IsMatch(rhs) ) {
474  string match_rhs = re_rule.GetSub(rhs, 1);
475  string result_lhs = rule->MakeTransform(match_lhs);
476  string result_rhs = rule->MakeTransform(match_rhs);
477 
478  string prefix = "RANGE|";
479  if ( rule->GetTag().find("CAST") == string::npos ) {
480  prefix += "CAST|";
481  }
482  string range = result_lhs + "/" + result_rhs;
483  return make_pair(prefix + rule->GetTag(), range);
484 
485  }
486  }
487  }
488  }
489  return make_pair(kTransform_code_no_date, "");
490 }
491 
492 string transform_identity(string const& value)
493 {
494  return value;
495 }
496 
497 string transform_missing(string const& /*value*/)
498 {
499  return "missing";
500 }
501 
502 string transform_YYYY_mm_DD(string const& value)
503 {
504  vector<string> tokens;
505  NStr::Split(value, "-/", tokens);
506 
507  ostringstream oss;
508  oss << tokens[0]
509  << "-"
510  << setfill('0') << setw(2)
511  << NStr::StringToNumeric<int>(tokens[1])
512  << "-"
513  << setw(2)
514  << NStr::StringToNumeric<int>(tokens[2]);
515 
516  return oss.str();
517 }
518 
519 string transform_mm_DD_YYYY(string const& value)
520 {
521  vector<string> tokens;
522  NStr::Split(value, "-/.", tokens);
523 
524  int month = NStr::StringToNumeric<int>(tokens[0]);
525  int day = NStr::StringToNumeric<int>(tokens[1]);
526  int year = NStr::StringToNumeric<int>(tokens[2]);
527 
528  if ( day < 13 && day != month ) {
529  NCBI_THROW(CAmbiguousDateException, eAmbigDate, "Date is ambiguous");
530  }
531 
532  if ( year < 100 ) {
533  year = 1900 + ( ( year > 70 ) ? year : 100 + year );
534  }
535 
536  ostringstream oss;
537  oss << year
538  << "-"
539  << setfill('0') << setw(2)
540  << month
541  << "-"
542  << setw(2)
543  << day;
544 
545  return oss.str();
546 }
547 
548 string transform_DD_mm_YYYY(string const& value)
549 {
550  vector<string> tokens;
551  NStr::Split(value, "-/.", tokens);
552 
553  int day = NStr::StringToNumeric<int>(tokens[0]);
554  int month = NStr::StringToNumeric<int>(tokens[1]);
555  int year = NStr::StringToNumeric<int>(tokens[2]);
556 
557  if ( day < 13 && day != month ) {
558  NCBI_THROW(CAmbiguousDateException, eAmbigDate, "Date is ambiguous");
559  }
560 
561  if ( year < 100 ) {
562  year = 1900 + ( ( year > 70 ) ? year : 100 + year );
563  }
564 
565  ostringstream oss;
566  oss << year
567  << "-"
568  << setfill('0') << setw(2)
569  << month
570  << "-"
571  << setw(2)
572  << day;
573 
574  return oss.str();
575 }
576 
577 string transform_DD_month_YYYY(string const& value)
578 {
579  vector<string> tokens;
580  NStr::Split(value, "- ", tokens);
581 
582  int day = NStr::StringToNumeric<int>(tokens[0]);
583  int year = NStr::StringToNumeric<int>(tokens[2]);
584  if ( year < 100 ) {
585  year = 1900 + ( ( year > 70 ) ? year : 100 + year );
586  }
587 
588  ostringstream oss;
589  oss << year
590  << "-"
591  << get_month_code_by_name(tokens[1])
592  << "-"
593  << setfill('0') << setw(2)
594  << day;
595  return oss.str();
596 }
597 
599 {
600  vector<string> tokens;
601  NStr::Split(value, " ", tokens);
602 
603  string month = tokens[1];
604  size_t pos = month.find_last_of(",");
605  month.erase(pos);
606 
607  int day = NStr::StringToNumeric<int>(tokens[0]);
608  int year = NStr::StringToNumeric<int>(tokens[2]);
609  if ( year < 100 ) {
610  year = 1900 + ( ( year > 70 ) ? year : 100 + year );
611  }
612 
613  ostringstream oss;
614  oss << year
615  << "-"
616  << get_month_code_by_name(month)
617  << "-"
618  << setfill('0') << setw(2)
619  << day;
620 
621  return oss.str();
622 }
623 
624 string transform_month_DD_YYYY(string const& value)
625 {
626 
627  vector<string> tokens;
628  NStr::Split(value, " ", tokens);
629 
630  // handle April 21, 1989 case
631  {{
632  string& day = tokens[1];
633  size_t pos = day.find_last_of(",");
634  if ( pos != std::string::npos ) {
635  day.erase(pos);
636  }
637  }}
638  int day = NStr::StringToNumeric<int>(tokens[1]);
639  int year = NStr::StringToNumeric<int>(tokens[2]);
640  if ( year < 100 ) {
641  year = 1900 + ( ( year > 70 ) ? year : 100 + year );
642  }
643 
644  ostringstream oss;
645  oss << year
646  << "-"
647  << get_month_code_by_name(tokens[0])
648  << "-"
649  << setfill('0') << setw(2)
650  << day;
651 
652  return oss.str();
653 }
654 
655 string transform_month_YYYY(string const& value)
656 {
657  vector<string> tokens;
658  NStr::Split(value, "-/. ", tokens);
659 
660  int year = NStr::StringToNumeric<int>(tokens[1]);
661  if ( year < 100 ) {
662  year = 1900 + ( ( year > 70 ) ? year : 100 + year );
663  }
664  ostringstream oss;
665  oss << year
666  << "-"
667  << get_month_code_by_name(tokens[0]);
668 
669  return oss.str();
670 }
671 
672 string transform_YYYY_month(string const& value)
673 {
674  vector<string> tokens;
675  NStr::Split(value, "/-. ", tokens);
676 
677  int year = NStr::StringToNumeric<int>(tokens[0]);
678  if ( year < 100 ) {
679  year = 1900 + ( ( year > 70 ) ? year : 100 + year );
680  }
681  ostringstream oss;
682  oss << year
683  << "-"
684  << get_month_code_by_name(tokens[1]);
685 
686  return oss.str();
687 }
688 
689 string transform_YYYY_MM(string const& value)
690 {
691 
692  vector<string> tokens;
693  NStr::Split(value, "/-. ", tokens);
694 
695  int month = NStr::StringToNumeric<int>(tokens[1]);
696 
697  ostringstream oss;
698  oss << tokens[0]
699  << "-"
700  << setfill('0') << setw(2)
701  << month;
702 
703  return oss.str();
704 }
705 
706 string transform_MM_YYYY(string const& value)
707 {
708 
709  vector<string> tokens;
710  NStr::Split(value, "/-. ", tokens);
711 
712  int month = NStr::StringToNumeric<int>(tokens[0]);
713 
714  ostringstream oss;
715  oss << tokens[1]
716  << "-"
717  << setfill('0') << setw(2)
718  << month;
719 
720  return oss.str();
721 }
722 
723 string transform_range_decade(string const& value)
724 {
725  int year = NStr::StringToNumeric<int>(value);
726 
727  ostringstream oss;
728  oss << year
729  << "/"
730  << year + 9;
731 
732  return oss.str();
733 }
734 
735 string transform_range_before(string const& value)
736 {
737  int year = NStr::StringToNumeric<int>(value);
738  year -= 1;
739 
740  ostringstream oss;
741  oss << 1900
742  << "/"
743  << year;
744 
745  return oss.str();
746 }
747 
748 
void transform(Container &c, UnaryFunction *op)
Definition: chainer.hpp:86
virtual const char * GetErrCodeString(void) const override
Get error code interpreted as text.
NCBI_EXCEPTION_DEFAULT(CAmbiguousDateException, CException)
CRegexp –.
Definition: regexp.hpp:74
CSafeStatic<>::
T & Get(void)
Create the variable if not created yet, return the reference.
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
Definition: static_map.hpp:105
TParse_rule & operator=(TParse_rule const &other)
void Swap(TParse_rule &lhs, TParse_rule &rhs)
CRegexp & GetRegexp() const
string const & GetRegexpStr() const
string MakeTransform(string const &value) const
string const & GetTag() const
TParse_rule(TParse_rule const &rhs)
TParse_rule(string const &tag, string const &regex, TFun_transform transform)
shared_ptr< CRegexp > m_Regexp
TFun_transform m_Transform
static vector< TParse_rule > const & get_date_rule_collection()
static string transform_month_DD_YYYY(string const &value)
static string transform_identity(string const &value)
static vector< TFun_transform_other > const & get_date_range_rule_collection()
static string transform_MM_YYYY(string const &value)
static string transform_DD_mm_YYYY(string const &value)
static const char * kTransform_code_iso8601
static string transform_range_before(string const &value)
static const char * transfrom_code_range_iso8601
static const char * kTransform_code_cast_iso8601
static pair< string, string > extract_date_iso8601(string const &value, vector< TParse_rule > const &rules, vector< TFun_transform_other > const &range_rules, TFun_transform_other ambig_rule)
static string transform_YYYY_MM(string const &value)
static string transform_DD_month_comma_YYYY(string const &value)
static string transform_YYYY_month(string const &value)
string(* TFun_transform)(string const &)
static const char * kTransform_code_cast_na
pair< string, string >(* TFun_transform_other)(string const &)
const char * get_month_code_by_name(string const &month_name)
static const char * kTransform_code_no_date
static string transform_missing(string const &value)
static string transform_range_decade(string const &value)
static string transform_mm_DD_YYYY(string const &value)
static string transform_month_YYYY(string const &value)
static const char * kTransform_code_cast_ambig
static pair< string, string > transform_ambiguous_date(string const &value)
static string transform_DD_month_YYYY(string const &value)
static const char * kTransform_code_range_cast_iso8601
static string transform_YYYY_mm_DD(string const &value)
static TFun_transform_other get_transform_for_ambiguous_date()
static pair< string, string > transform_range(string const &value)
Convert dates from an arbitrary format to corresponding ISO 8601.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
string
Definition: cgiapp.hpp:690
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
EErrCode
Error types that an application can generate.
Definition: ncbiexpt.hpp:884
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
@ eUnknown
Definition: app_popup.hpp:72
bool IsMatch(CTempString str, TMatch flags=fMatch_default)
Check existence substring which match a specified pattern.
Definition: regexp.cpp:253
pair< string, string > ConvertDateTo_iso8601_and_annotate(string const &value)
Convert dates from an arbitrary format to corresponding ISO 8601, with annotation.
string ConvertDateTo_iso8601(string const &value)
Convert dates from an arbitrary format to corresponding ISO 8601.
CTempString GetSub(CTempString str, size_t idx=0) const
Get pattern/subpattern from previous GetMatch().
Definition: regexp.cpp:200
CTempString GetMatch(CTempString str, size_t offset=0, size_t idx=0, TMatch flags=fMatch_default, bool noreturn=false)
Get matching pattern and subpatterns.
Definition: regexp.cpp:242
@ fMatch_default
Definition: regexp.hpp:135
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
Definition: tempstr.hpp:334
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
Definition: ncbistr.cpp:3177
static const char * month_name[]
Definition: indx_blk.cpp:188
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
range(_Ty, _Ty) -> range< _Ty >
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const char * tag
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
Definition: pcre2_match.c:594
#define DEFINE_STATIC_ARRAY_MAP(Type, Var, Array)
Definition: static_set.hpp:888
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
else result
Definition: token2.c:20
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
Modified on Fri Sep 20 14:57:40 2024 by modify_doxy.py rev. 669887