70 vector<TParse_rule>
const& rules,
71 vector<TFun_transform_other>
const& range_rules,
106 pair<string, string>
result =
171 shared_ptr<CRegexp> temp(lhs.
m_Regexp);
194 return "eAmbiguousDate";
205 vector<TParse_rule>
const & rules,
206 vector<TFun_transform_other>
const& range_rules,
211 for (
auto rule = rules.begin(); rule != rules.end(); ++rule ) {
212 CRegexp& re = rule->GetRegexp();
216 return make_pair(rule->GetTag(), rule->MakeTransform(
match));
222 pair<string, string>
result = (* transform)(
value);
223 if ( !
result.second.empty() ) {
230 return transform_ambiguous_date_fun(
value);
247 {
"december",
"12" },
249 {
"february",
"02" },
260 {
"november",
"11" },
264 {
"september",
"09" },
269 auto it = s_MonthLookupTable.find(
month_name.c_str());
270 if ( it == s_MonthLookupTable.end() ) {
281 char const* annot_tag;
288 "^((?:1\\d{3}|2\\d{3}))$",
292 "(?i)^([a-z]+(?:\\s[a-z]+)*)$",
296 "(?i)^((?:na|n[.]a[.]|n/a))$",
300 "^([123]\\d{3}\\-(?:[0][1-9]|[1][012])\\-(?:[0][1-9]|[12][0-9]|[3][01])(?:T(?:[01][0-9]|2[0123])(?:[:][0-5][0-9]){1,2})Z)$",
304 "^([123]\\d{3}\\-(?:[0][1-9]|[1][012])\\-(?:[0][1-9]|[12][0-9]|[3][01]))(?:[T ](?:[01][0-9]|2[0123])(?:[:][0-5][0-9]){1,2})?$",
308 "^([123]\\d{3}\\-(?:[0][1-9]|[1][012]))$",
312 "^([123]\\d{3}/(?:0?[1-9]|[1][012])/(?:0?[1-9]|[12][0-9]|[3][01]))$",
316 "^([123]\\d{3}\\-(?:0?[1-9]|[1][012])\\-(?:0?[1-9]|[12][0-9]|[3][01]))$",
321 "(?i)^((?:[1][3-9]|[2][0-9]|[3][012])([-./])(?:0?[1-9]|[1][012])\\2(?:[123]\\d{3}|\\d{2}))(?: (?:0[1-9]|1[012])(?:[:][0-5][0-9]){1,2}(?:[ ]?[AP]M|[ ]?[AP][.]M[.]))?$",
326 "(?i)^((?:0?[1-9]|[1][012])([-/.])(?:0?[1-9]|[12][0-9]|[3][01])\\2(?:[123]\\d{3}|\\d{2}))(?: (?:0[1-9]|1[012])(?:[:][0-5][0-9]){1,2}(?:[ ]?[AP]M|[ ]?[AP][.]M[.]))?$",
330 "(?i)^((?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\\s(?:0?[1-9]|[12][0-9]|[3][01]),?[ ](?:[123]\\d{3}|\\d{2}))$",
334 "(?i)^((?:0?[1-9]|[12][0-9]|[3][01])([- ])(?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\\2(?:[123]\\d{3}|\\d{2}))$",
338 "(?i)^((?:0?[1-9]|[12][0-9]|[3][01])[ ](?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?),[ ](?:[123]\\d{3}|\\d{2}))$",
342 "(?i)^((?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)[-./ ](?:[123]\\d{3}|\\d{2}))$",
346 "(?i)^((?:[12]\\d{3}|\\d{2})[-./ ](?:Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|June?|July?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?))$",
350 "^((?:19\\d{2}|2\\d{3})[-/. ](?:0?[1-9]|1[012]))$",
355 "^((?:0?[1-9]|1[012])[-/. ](?:19\\d{2}|2\\d{3}))$",
360 "^((?:19\\d{2}|2\\d{3})\\-(?:0[1-9]|1[012])\\-(?:[0][1-9]|[12][0-9]|[3][01])\\/(?:19\\d{2}|2\\d{3})\\-(?:0[1-9]|1[012])\\-(?:[0][1-9]|[12][0-9]|[3][01]))$",
364 "^((?:19\\d{2}|2\\d{3})\\-(?:0[1-9]|1[012])\\/(?:19\\d{2}|2\\d{3})\\-(?:0[1-9]|1[012]))$",
368 "^((?:19\\d{2}|2\\d{3})\\/(?:19\\d{2}|2\\d{3}))$",
372 "^((?:19[0-9]0|2\\d{2}0))s$",
376 "^.*?(?<=before[ ])((?:19\\d{2}|2\\d{3}))$",
380 "^.*?(?<=pre[-])((?:19\\d{2}|2\\d{3}))$",
388 if (parse_rules->empty()) {
389 for (
struct TRules* entry = &rules_table[0]; entry->annot_tag != 0; ++entry ) {
390 parse_rules->push_back(
TParse_rule(entry->annot_tag, entry->regexp, entry->transform) );
393 return parse_rules.
Get();
405 if (range_rules->empty()) {
407 range_rules->push_back(*entry);
410 return range_rules.
Get();
426 static CRegexp re(
"^(?:0?[1-9]|1[012])([-.\\/])(?:0?[1-9]|[12][0-9]|3[01])\\1((?:19\\d{2}|2\\d{3}|\\d{2}))$");
430 int year = NStr::StringToNumeric<int>(
match);
432 year = 1900 + ((year > 70) ? year : year + 100);
443 CRegexp re(
"(?i)(?:between(.+?)and(.+?)|^(.+?)\\/(.+?))$");
463 for ( vector<TParse_rule>::const_iterator rule = rules.begin(); rule != rules.end(); ++rule ) {
465 if ( rule->GetTag().find(
"RANGE") == 0 ) {
469 CRegexp& re_rule = rule->GetRegexp();
472 string match_lhs = re_rule.
GetSub(lhs, 1);
474 string match_rhs = re_rule.
GetSub(rhs, 1);
475 string result_lhs = rule->MakeTransform(match_lhs);
476 string result_rhs = rule->MakeTransform(match_rhs);
478 string prefix =
"RANGE|";
479 if ( rule->GetTag().find(
"CAST") == string::npos ) {
482 string range = result_lhs +
"/" + result_rhs;
483 return make_pair(prefix + rule->GetTag(),
range);
504 vector<string> tokens;
510 << setfill(
'0') << setw(2)
511 << NStr::StringToNumeric<int>(tokens[1])
514 << NStr::StringToNumeric<int>(tokens[2]);
521 vector<string> tokens;
524 int month = NStr::StringToNumeric<int>(tokens[0]);
525 int day = NStr::StringToNumeric<int>(tokens[1]);
526 int year = NStr::StringToNumeric<int>(tokens[2]);
528 if ( day < 13 && day != month ) {
533 year = 1900 + ( ( year > 70 ) ? year : 100 + year );
539 << setfill(
'0') << setw(2)
550 vector<string> tokens;
553 int day = NStr::StringToNumeric<int>(tokens[0]);
554 int month = NStr::StringToNumeric<int>(tokens[1]);
555 int year = NStr::StringToNumeric<int>(tokens[2]);
557 if ( day < 13 && day != month ) {
562 year = 1900 + ( ( year > 70 ) ? year : 100 + year );
568 << setfill(
'0') << setw(2)
579 vector<string> tokens;
582 int day = NStr::StringToNumeric<int>(tokens[0]);
583 int year = NStr::StringToNumeric<int>(tokens[2]);
585 year = 1900 + ( ( year > 70 ) ? year : 100 + year );
593 << setfill(
'0') << setw(2)
600 vector<string> tokens;
603 string month = tokens[1];
604 size_t pos = month.find_last_of(
",");
607 int day = NStr::StringToNumeric<int>(tokens[0]);
608 int year = NStr::StringToNumeric<int>(tokens[2]);
610 year = 1900 + ( ( year > 70 ) ? year : 100 + year );
618 << setfill(
'0') << setw(2)
627 vector<string> tokens;
632 string& day = tokens[1];
633 size_t pos = day.find_last_of(
",");
634 if ( pos != std::string::npos ) {
638 int day = NStr::StringToNumeric<int>(tokens[1]);
639 int year = NStr::StringToNumeric<int>(tokens[2]);
641 year = 1900 + ( ( year > 70 ) ? year : 100 + year );
649 << setfill(
'0') << setw(2)
657 vector<string> tokens;
660 int year = NStr::StringToNumeric<int>(tokens[1]);
662 year = 1900 + ( ( year > 70 ) ? year : 100 + year );
674 vector<string> tokens;
677 int year = NStr::StringToNumeric<int>(tokens[0]);
679 year = 1900 + ( ( year > 70 ) ? year : 100 + year );
692 vector<string> tokens;
695 int month = NStr::StringToNumeric<int>(tokens[1]);
700 << setfill(
'0') << setw(2)
709 vector<string> tokens;
712 int month = NStr::StringToNumeric<int>(tokens[0]);
717 << setfill(
'0') << setw(2)
725 int year = NStr::StringToNumeric<int>(
value);
737 int year = NStr::StringToNumeric<int>(
value);
void transform(Container &c, UnaryFunction *op)
virtual const char * GetErrCodeString(void) const override
Get error code interpreted as text.
NCBI_EXCEPTION_DEFAULT(CAmbiguousDateException, CException)
T & Get(void)
Create the variable if not created yet, return the reference.
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
TParse_rule & operator=(TParse_rule const &other)
void Swap(TParse_rule &lhs, TParse_rule &rhs)
CRegexp & GetRegexp() const
string const & GetRegexpStr() const
string MakeTransform(string const &value) const
string const & GetTag() const
TParse_rule(TParse_rule const &rhs)
TParse_rule(string const &tag, string const ®ex, TFun_transform transform)
shared_ptr< CRegexp > m_Regexp
TFun_transform m_Transform
static vector< TParse_rule > const & get_date_rule_collection()
static string transform_month_DD_YYYY(string const &value)
static string transform_identity(string const &value)
static vector< TFun_transform_other > const & get_date_range_rule_collection()
static string transform_MM_YYYY(string const &value)
static string transform_DD_mm_YYYY(string const &value)
static const char * kTransform_code_iso8601
static string transform_range_before(string const &value)
static const char * transfrom_code_range_iso8601
static const char * kTransform_code_cast_iso8601
static pair< string, string > extract_date_iso8601(string const &value, vector< TParse_rule > const &rules, vector< TFun_transform_other > const &range_rules, TFun_transform_other ambig_rule)
static string transform_YYYY_MM(string const &value)
static string transform_DD_month_comma_YYYY(string const &value)
static string transform_YYYY_month(string const &value)
string(* TFun_transform)(string const &)
static const char * kTransform_code_cast_na
pair< string, string >(* TFun_transform_other)(string const &)
const char * get_month_code_by_name(string const &month_name)
static const char * kTransform_code_no_date
static string transform_missing(string const &value)
static string transform_range_decade(string const &value)
static string transform_mm_DD_YYYY(string const &value)
static string transform_month_YYYY(string const &value)
static const char * kTransform_code_cast_ambig
static pair< string, string > transform_ambiguous_date(string const &value)
static string transform_DD_month_YYYY(string const &value)
static const char * kTransform_code_range_cast_iso8601
static string transform_YYYY_mm_DD(string const &value)
static TFun_transform_other get_transform_for_ambiguous_date()
static pair< string, string > transform_range(string const &value)
Convert dates from an arbitrary format to corresponding ISO 8601.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
EErrCode
Error types that an application can generate.
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
bool IsMatch(CTempString str, TMatch flags=fMatch_default)
Check existence substring which match a specified pattern.
pair< string, string > ConvertDateTo_iso8601_and_annotate(string const &value)
Convert dates from an arbitrary format to corresponding ISO 8601, with annotation.
string ConvertDateTo_iso8601(string const &value)
Convert dates from an arbitrary format to corresponding ISO 8601.
CTempString GetSub(CTempString str, size_t idx=0) const
Get pattern/subpattern from previous GetMatch().
CTempString GetMatch(CTempString str, size_t offset=0, size_t idx=0, TMatch flags=fMatch_default, bool noreturn=false)
Get matching pattern and subpatterns.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static const char * month_name[]
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
range(_Ty, _Ty) -> range< _Ty >
const GenericPointer< typename T::ValueType > T2 value
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
#define DEFINE_STATIC_ARRAY_MAP(Type, Var, Array)
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
C++ wrappers for the Perl-compatible regular expression (PCRE) library.