75 string fromEnv =
env.Get(
"NCBI_GEO_LOC_NAME_FOR_COUNTRY");
77 if (fromEnv ==
"true") {
79 }
else if (fromEnv ==
"false") {
84 string fromConfig = reg.
GetString(
"OrgSubSource",
"UseGeoLocNameForCountry",
"off");
86 if (fromConfig ==
"1" || fromConfig ==
"on" || fromConfig ==
"true" || fromConfig ==
"yes") {
133 replace(name.begin(), name.end(),
'_',
'-');
134 replace(name.begin(), name.end(),
' ',
'-');
143 if (name ==
"insertion-seq") {
145 }
else if (name ==
"plasmid") {
147 }
else if (name ==
"transposon") {
149 }
else if (name ==
"sub-clone") {
163 replace(name.begin(), name.end(),
'_',
'-');
164 replace(name.begin(), name.end(),
' ',
'-');
174 if (name ==
"insertion-seq" ||
176 name ==
"transposon" ||
177 name ==
"sub-clone") {
268 if (month < 1 || month > 12 || day < 1) {
274 }
else if (year > 3000) {
276 }
else if (year < 1538) {
279 CTime month_o(year, month, 1);
291 "collection-date string is blank");
295 if (IsISOFormatDate(
str)) {
296 return GetDateFromISODate(
str);
309 month =
str.substr(0, pos);
310 year =
str.substr(pos + 1);
313 "collection-date string is improperly formatted");
316 day =
str.substr(0, pos);
317 month =
str.substr(pos + 1, pos2 - pos - 1);
318 year =
str.substr(pos2 + 1);
321 "collection-date string is improperly formatted");
332 "collection-date string has invalid month");
342 "collection-date string has invalid day value");
344 }
catch (
const exception& ) {
347 "collection-date string is improperly formatted");
353 "collection-date string is improperly formatted");
359 }
catch (
const exception& ) {
362 "collection-date string is improperly formatted");
372 if (year_val < 1000) {
374 "collection-date year is out of range");
377 if (year_val >= 2100) {
379 "collection-date year is out of range");
382 if (day_val > 0 && month_val > 0 && !IsDayValueOkForMonth(day_val, month_val, year_val)) {
384 "collection-date day is greater than monthly maximum");
417 bool in_future =
false;
418 vector<string> pieces;
420 if (pieces.size() > 2) {
423 ITERATE(vector<string>, it, pieces) {
459 vector<string> pieces;
461 if (pieces.size() > 2) {
464 }
else if (pieces.size() == 2) {
465 bool first_bad =
false;
466 bool first_future =
false;
467 bool second_bad =
false;
468 bool second_future =
false;
471 bad_format = first_bad || second_bad;
473 in_future = first_future || second_future;
486 size_t pos2 =
NStr::Find(date_string,
"-", pos + 1);
487 if (pos2 !=
NPOS && pos != 2) {
508 vector<string> pieces;
510 if (pieces.size() > 2) {
512 }
else if (pieces.size() == 2) {
537 size_t pos2 =
NStr::Find(date_string,
"-", pos + 1);
538 if (pos2 !=
NPOS && pos != 2) {
564 "missing: control sample",
565 "missing: data agreement established pre-2023",
566 "missing: endangered species",
567 "missing: human-identifiable",
568 "missing: lab stock",
569 "missing: sample group",
570 "missing: synthetic construct",
571 "missing: third party data",
581 if (s_Null_CollectionDatesSet.find(date_string.c_str()) != s_Null_CollectionDatesSet.end()) {
584 size_t rval = CheckDateFormat(date_string);
585 if (rval & eDateFormatFlag_bad_format) {
586 problem =
"Collection_date format is not in DD-Mmm-YYYY format";
587 }
else if (rval & eDateFormatFlag_in_future) {
588 problem =
"Collection_date is in the future";
589 }
else if (rval & eDateFormatFlag_out_of_order) {
590 problem =
"Collection_dates are out of order";
603 if (second_pos !=
NPOS) {
606 bool month_ambig =
false;
607 string first_date =
FixDateFormat(orig_date.substr(0, pos),
true, month_ambig);
611 string second_date =
FixDateFormat(orig_date.substr(pos + delim.
length()),
true, month_ambig);
615 string fix = first_date +
"/" + second_date;
622 bool month_ambiguous =
false;
624 string fix =
FixDateFormat(orig_date,
true, month_ambiguous);
625 if (month_ambiguous) {
628 static const char* delimiters[] = {
"/",
" to ",
" and ",
"-",
"_"};
652 if (require_time_zone) {
655 suffix = orig_time.length();
658 if (orig_time.substr(
suffix).length() != 6 ||
661 orig_time[
suffix + 3] !=
':' ||
678 if (!
isdigit((
unsigned char)orig_time[0]) || !
isdigit((
unsigned char)orig_time[1])) {
686 if (hour < 0 || hour > 23) {
694 if (!
isdigit((
unsigned char)orig_time[3]) || !
isdigit((
unsigned char)orig_time[4])) {
699 if (min < 0 || min > 59) {
708 if (!
isdigit((
unsigned char)orig_time[6]) || !
isdigit((
unsigned char)orig_time[7])) {
716 }
else if (sec > 59) {
741 if (cpy.length() != 10 && cpy.length() != 7) {
746 string::const_iterator it = cpy.begin();
747 while (it != cpy.end() && rval) {
748 if (pos == 4 || pos == 7) {
762 if (month < 1 || month > 12) {
765 if (cpy.length() == 10) {
781 string cpy = orig_date;
785 if (time_pos ==
NPOS) {
806 string cpy = orig_date;
809 if (time_pos !=
NPOS) {
810 cpy = cpy.substr(0, time_pos);
818 string cpy = orig_date;
821 if (time_pos ==
NPOS) {
834 string cpy = orig_date;
841 if (cpy.length() > 7) {
854 vector<string> tokens;
855 string token_delimiters =
" ,-/=_.";
857 string cpy = orig_date;
861 bool is_chars =
false;
863 if (token_delimiters.find(*s) !=
NPOS) {
865 tokens.push_back(curr_token);
869 }
else if (is_chars && !
isalpha((
unsigned char)(*s))) {
872 tokens.push_back(curr_token);
878 tokens.push_back(curr_token);
889 tokens.push_back(curr_token);
893 if (tokens.size() > 3) {
894 vector<string>::iterator p = tokens.begin();
895 bool prev_is_number =
isdigit((
unsigned char)(*p)[0]);
896 vector<string>::iterator s = p;
898 while (s != tokens.end()) {
899 if (prev_is_number &&
906 prev_is_number =
false;
910 prev_is_number =
isdigit((
unsigned char)(*p)[0]);
919 bool s_ChooseMonthAndDay(
const string& token1,
const string& token2,
bool month_first,
string& month,
int& day,
bool& month_ambiguous)
924 if (val1 > 12 && val2 > 12) {
927 }
else if (val1 < 13 && val2 < 13) {
934 month_ambiguous =
true;
943 }
else if (val1 < 13) {
959 string orig_date =
test;
968 string reformatted_date;
970 int year = 0, day = 0;
972 size_t num_original_tokens = 0;
974 month_ambiguous =
false;
977 num_original_tokens = tokens.size();
978 if (tokens.size() < 1 || tokens.size() > 3) {
984 vector<string>::iterator it = tokens.begin();
985 while (it != tokens.end()) {
997 }
else if (one_token.length() > 0
998 &&
isdigit((
unsigned char)one_token[0])
1007 }
else if (
isalpha((
unsigned char)one_token[0])) {
1012 if (one_token.length() > 3) {
1013 one_token = one_token.substr(0, 3);
1026 if (this_val <
min) {
1028 }
else if (this_val >
max) {
1042 it = tokens.erase(it);
1048 if (tokens.size() == 0) {
1050 }
else if (tokens.size() > 2) {
1057 if (!
s_ChooseMonthAndDay(tokens[0], tokens[1], month_first, month, day, month_ambiguous)) {
1061 month_ambiguous =
true;
1066 }
else if (tokens.size() == 1) {
1073 if (
val > 0 &&
val < 13) {
1088 if (tokens.size() == 2) {
1101 if (val1 < 10 && !zero_pad_1 && (val2 > 10 || zero_pad_2)) {
1107 }
else if (val2 < 10 && !zero_pad_2 && (val1 > 10 || zero_pad_1)) {
1127 if (!
s_ChooseMonthAndDay(tokens[0], tokens[1], month_first, month, day, month_ambiguous)) {
1144 if (year > 0 && year < 100 && num_original_tokens > 1) {
1149 bool format_bad =
false;
1150 bool in_future =
false;
1158 if (year >= 1000 && year < 2100) {
1161 reformatted_date = month +
"-" + reformatted_date;
1164 if (day_str.length() < 2) {
1165 day_str =
"0" + day_str;
1167 reformatted_date = day_str +
"-" + reformatted_date;
1172 return reformatted_date;
1181 if (tokens.size() != 3) {
1190 ITERATE(vector<string>, it, tokens) {
1199 vector<int> positions;
1200 positions.push_back(0);
1201 positions.push_back(0);
1202 positions.push_back(0);
1205 ITERATE(vector<int>, it, nums) {
1207 if (positions[
eYear] > 0) {
1212 positions[
eYear] = token_pos;
1213 }
else if (*it > 12) {
1214 if (positions[
eDay] > 0) {
1219 positions[
eDay] = token_pos;
1220 }
else if (positions[
eMonth] > 0) {
1225 positions[
eMonth] = token_pos;
1238 bool& lat_in_range,
bool& lon_in_range,
1239 double& lat_value,
double& lon_value)
1241 format_correct =
false;
1242 lat_in_range =
false;
1243 lon_in_range =
false;
1244 precision_correct =
false;
1254 }
else if (sscanf (lat_lon.c_str(),
"%lf %c %lf %c%n", &ns, &lat, &ew, &lon, &processed) != 4
1255 ||
size_t(processed) != lat_lon.length()) {
1257 }
else if ((lat !=
'N' && lat !=
'S') || (lon !=
'E' && lon !=
'W')) {
1264 lat_value = 0.0 - ns;
1269 lon_value = 0.0 - ew;
1273 vector<string> pieces;
1275 if (pieces.size() > 3) {
1279 char reformatted[1000];
1280 sprintf (reformatted,
"%.*lf %c %.*lf %c", precision_lat, ns, lat,
1281 precision_lon, ew, lon);
1283 size_t len = strlen (reformatted);
1285 && (
len == lat_lon.length()
1286 || (
len < lat_lon.length()
1287 && lat_lon[
len] ==
';'))) {
1288 format_correct =
true;
1289 if (ns <= 90 && ns >= 0) {
1290 lat_in_range =
true;
1292 if (ew <= 180 && ew >= 0) {
1293 lon_in_range =
true;
1295 if (precision_lat < 3 && precision_lon < 3) {
1296 precision_correct =
true;
1306 bool format_correct =
false;
1307 bool precision_correct =
false;
1308 bool lat_in_range =
false;
1309 bool lon_in_range =
false;
1310 double lat_value = 0.0;
1311 double lon_value = 0.0;
1313 lat_in_range, lon_in_range,
1314 lat_value, lon_value);
1315 if (!format_correct || !lat_in_range || !lon_in_range || precision_correct) {
1318 vector<string> pieces;
1320 if (pieces.size() > 3) {
1323 if (precision_lat > 4) {
1326 if (precision_lon > 4) {
1330 char reformatted[1000];
1331 sprintf(reformatted,
"%.*lf %c %.*lf %c", precision_lat,
fabs(lat_value), pieces[1].c_str()[0],
1332 precision_lon,
fabs(lon_value), pieces[3].c_str()[0]);
1333 string new_val = reformatted;
1353 for (string::const_iterator
i = old_str.begin();
i != old_str.end(); ++
i)
1358 char c =
static_cast<char>(sym);
1359 if (!
isalpha(c) && !
isdigit(c) && c !=
'.' && c !=
'-' && c !=
'+')
1363 else if (!new_str.empty() &&
1370 if (!
isalpha(c) && !
isdigit(c) && c !=
'.' && c !=
'-' && c !=
'+')
1386 bool is_number =
true;
1387 for (string::const_iterator
i = old_str.begin();
i != old_str.end(); ++
i)
1392 char c =
static_cast<char>(sym);
1393 size_t j = new_str.size();
1394 if (j >= 4 && new_str[j-1] ==
' ' && new_str[j-2] ==
'.' && new_str[j-3] ==
' ' &&
isdigit(new_str[j-4]) &&
isdigit(c))
1402 if (!
isdigit(c) && c !=
'+' && c !=
'-' && c !=
'.' && !
isspace(c)) {
1433 static string s_NormalizeTokens(vector<string> &tokens, vector<double> &numbers, vector<string> &anum, vector<int> &
precision, vector<string> &lat_long, vector<string> &nsew)
1435 vector<string> pattern;
1436 for (
size_t i = 0;
i < tokens.size();
i++)
1438 string &token = tokens[
i];
1443 numbers.push_back(num);
1444 anum.push_back(token);
1445 pattern.push_back(
"1");
1450 =
static_cast<int>(token.length() - token.find(
'.') - 1);
1458 double num0, num1, num2;
1461 numbers.push_back(num0);
1462 anum.push_back(
tmp[0]);
1463 pattern.push_back(
"1");
1465 numbers.push_back(num1);
1466 anum.push_back(
tmp[1]);
1467 pattern.push_back(
"1");
1469 numbers.push_back(num2);
1470 anum.push_back(
tmp[2]);
1471 pattern.push_back(
"1");
1485 pattern.push_back(
"degrees");
1490 pattern.push_back(
"\'");
1495 pattern.push_back(
"\"");
1497 else if (token ==
"," || token ==
":" || token ==
"_" || token ==
"&" || token ==
"." || token ==
";" || token ==
"#" ||
NStr::EqualNocase(token,
"and"))
1502 pattern.push_back(
"lat");
1503 lat_long.push_back(
"lat");
1508 pattern.push_back(
"lat");
1509 lat_long.push_back(
"long");
1513 pattern.push_back(
"N");
1514 nsew.push_back(
"N");
1518 pattern.push_back(
"N");
1519 nsew.push_back(
"S");
1523 pattern.push_back(
"N");
1524 nsew.push_back(
"E");
1528 pattern.push_back(
"N");
1529 nsew.push_back(
"W");
1531 else if (token ==
"NW")
1533 nsew.push_back(
"N");
1534 nsew.push_back(
"W");
1536 else if (token ==
"NE")
1538 nsew.push_back(
"N");
1539 nsew.push_back(
"E");
1541 else if (token ==
"SW")
1543 nsew.push_back(
"S");
1544 nsew.push_back(
"W");
1546 else if (token ==
"SE")
1548 nsew.push_back(
"S");
1549 nsew.push_back(
"E");
1564 if (numbers.size() != 2)
1569 if (lat_long.size() == 2)
1571 if (lat_long.front() ==
"long")
1573 swap(numbers[0], numbers[1]);
1575 if (nsew.size() == 2) {
1576 swap(nsew[0], nsew[1]);
1580 else if (!lat_long.empty())
1585 if (nsew.size() == 2)
1587 if ((nsew[0] ==
"E" || nsew[0] ==
"W") &&
1588 (nsew[1] ==
"N" || nsew[1] ==
"S"))
1590 swap(numbers[0], numbers[1]);
1592 swap(nsew[0], nsew[1]);
1596 numbers[0] =
fabs(numbers[0]);
1598 else if (nsew[0] ==
"S")
1600 if (numbers[0] != 0)
1601 numbers[0] = -
fabs(numbers[0]);
1610 numbers[1] =
fabs(numbers[1]);
1612 else if (nsew[1] ==
"W")
1614 if (numbers[1] != 0)
1615 numbers[1] = -
fabs(numbers[1]);
1624 else if (!nsew.empty())
1629 if (lat_long.empty() && nsew.empty() &&
fabs(numbers[0]) > 90 &&
fabs(numbers[1]) < 90)
1631 swap(numbers[0], numbers[1]);
1634 if (
fabs(numbers[0]) > 90 ||
fabs(numbers[1]) > 180)
1643 vector<string> tokens;
1645 vector<string> lat_long;
1646 vector<string> nsew;
1647 vector<string> anum;
1649 if (pattern.empty())
1654 vector<double> degrees(2, 0);
1655 vector<int> prec(2, 0);
1658 if ( pattern ==
"1 1" ||
1659 pattern ==
"1 N 1 N" ||
1660 pattern ==
"N 1 N 1" ||
1661 pattern ==
"1 degrees N 1 degrees N" ||
1662 pattern ==
"lat 1 lat 1" ||
1663 pattern ==
"1 N lat 1 N lat" ||
1664 pattern ==
"1 degrees N lat 1 degrees N lat")
1666 degrees[0] = numbers[0];
1667 degrees[1] = numbers[1];
1671 else if ((pattern ==
"1 1 \" 1 1 '" ||
1672 pattern ==
"1 degrees 1 \" N 1 degrees 1 ' N")
1673 && numbers[1] < 60 && numbers[3] < 60
1674 && numbers[1] >= 0 && numbers[3] >= 0)
1676 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1677 sign2 = anum[2][0] ==
'-' ? -1 : 1;
1678 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 3600);
1679 degrees[1] = sign2*(
fabs(numbers[2]) + numbers[3] / 60);
1683 else if ( (pattern ==
"1 1 ' 1" ||
1684 pattern ==
"1 degrees 1 ' N 1 degrees N")
1688 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1689 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60);
1690 degrees[1] = numbers[2];
1694 else if (pattern ==
"1 1 ' 1 \" 1"
1695 && numbers[1] < 60 && numbers[2] < 60
1696 && numbers[1] >= 0 && numbers[2] >= 0)
1698 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1699 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);
1700 degrees[1] = numbers[3];
1704 else if ((pattern ==
"1 1 ' 1 \" 1 1 '" ||
1705 pattern ==
"1 1 1 N 1 1 N" ||
1706 pattern ==
"1 degrees 1 ' 1 \" N 1 degrees 1 ' N")
1707 && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60
1708 && numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0)
1710 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1711 sign2 = anum[3][0] ==
'-' ? -1 : 1;
1712 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);
1713 degrees[1] = sign2*(
fabs(numbers[3]) + numbers[4] / 60);
1717 else if (( pattern ==
"1 1 ' 1 \" 1 1 ' 1 \"" ||
1718 pattern ==
"1 1 ' 1 \" N 1 1 ' 1 \" N" ||
1719 pattern ==
"1 degrees 1 ' 1 \" 1 degrees 1 ' 1 \"" ||
1720 pattern ==
"1 degrees 1 ' 1 \" N 1 degrees 1 ' 1 \" N" ||
1721 pattern ==
"N 1 degrees 1 ' 1 \" N 1 degrees 1 ' 1 \"" ||
1722 pattern ==
"1 degrees 1 ' 1 N 1 degrees 1 ' 1 N" ||
1723 pattern ==
"1 degrees 1 1 N 1 degrees 1 1 N" ||
1724 pattern ==
"1 1 1 N 1 1 1 N")
1725 && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60 && numbers[5] < 60
1726 && numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0 && numbers[5] >= 0)
1728 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1729 sign2 = anum[3][0] ==
'-' ? -1 : 1;
1730 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);
1731 degrees[1] = sign2*(
fabs(numbers[3]) + numbers[4] / 60 + numbers[5] / 3600);
1735 else if (( pattern ==
"1 1 ' 1 1 '" ||
1736 pattern ==
"1 1 N 1 1 N" ||
1737 pattern ==
"1 1 ' N 1 1 ' N" ||
1738 pattern ==
"1 degrees 1 ' N 1 degrees 1 ' N" ||
1739 pattern ==
"lat 1 degrees 1 ' N lat 1 degrees 1 ' N" ||
1740 pattern ==
"1 degrees 1 N 1 degrees 1 N" ||
1741 pattern ==
"1 degrees 1 N 1 degrees 1 ' N" ||
1742 pattern ==
"1 degrees 1 ' N 1 degrees 1 N" ||
1743 pattern ==
"N 1 degrees 1 ' N 1 degrees 1" ||
1744 pattern ==
"N 1 degrees 1 ' N 1 degrees 1 '" ||
1745 pattern ==
"N 1 degrees 1 ' N 1 1 '")
1746 && numbers[1] < 60 && numbers[3] < 60
1747 && numbers[1] >= 0 && numbers[3] >= 0)
1749 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1750 sign2 = anum[2][0] ==
'-' ? -1 : 1;
1751 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60);
1752 degrees[1] = sign2*(
fabs(numbers[2]) + numbers[3] / 60);
1756 else if ((pattern ==
"1 N 1 1 N" ||
1757 pattern ==
"1 degrees N 1 degrees 1 ' N")
1761 sign2 = anum[1][0] ==
'-' ? -1 : 1;
1762 degrees[0] = numbers[0];
1763 degrees[1] = sign2*(
fabs(numbers[1]) + numbers[2] / 60);
1767 else if ((pattern ==
"1 degrees 1 ' 1 degrees 1 ' 1 \"" ||
1768 pattern ==
"N 1 1 N 1 1 1")
1769 && numbers[1] < 60 && numbers[3] < 60 && numbers[4] < 60
1770 && numbers[1] >= 0 && numbers[3] >= 0 && numbers[4] >= 0)
1772 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1773 sign2 = anum[2][0] ==
'-' ? -1 : 1;
1774 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60);
1775 degrees[1] = sign2*(
fabs(numbers[2]) + numbers[3] / 60 + numbers[4] / 3600);
1779 else if (pattern ==
"1 degrees 1 degrees 1 ' 1 \""
1780 && numbers[2] < 60 && numbers[3] < 60
1781 && numbers[2] >= 0 && numbers[3] >= 0)
1783 sign2 = anum[1][0] ==
'-' ? -1 : 1;
1784 degrees[0] = numbers[0];
1785 degrees[1] = sign2*(
fabs(numbers[1]) + numbers[2] / 60 + numbers[3] / 3600);
1789 else if (pattern ==
"1 degrees 1 ' 1 \" N 1 degrees 1 \" N"
1790 && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60
1791 && numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0)
1793 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1794 sign2 = anum[3][0] ==
'-' ? -1 : 1;
1795 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);
1796 degrees[1] = sign2*(
fabs(numbers[3]) + numbers[4] / 3600);
1805 swap(degrees, numbers);
1813 string north_or_south;
1815 string east_or_west;
1821 if (ch < '0' || ch >
'9') {
1827 lat_lon_stream >> lat;
1828 lat_lon_stream >> north_or_south;
1829 lat_lon_stream >> lon;
1830 lat_lon_stream >> east_or_west;
1831 if( lat_lon_stream.bad() ) {
1835 if( north_or_south !=
"N" && north_or_south !=
"S" ) {
1839 if( east_or_west !=
"E" && east_or_west !=
"W" ) {
1845 size_t len = lat.length();
1846 if (pos + 9 <
len) {
1853 size_t len = lon.length();
1854 if (pos + 9 <
len) {
1859 return lat +
" " + north_or_south +
" " + lon +
" " + east_or_west;
1876 vector<double> numbers;
1880 if (!numbers.empty())
1893 if (lat_value < 0) {
1895 lat_value = -lat_value;
1898 if (lon_value < 0) {
1900 lon_value = -lon_value;
1907 string res = lat +
" " + ns +
" " + lon +
" " + ew;
1916 bool goodmatch =
false;
1922 id->SetGuessCountry(guess->
GetLevel0());
1923 id->SetGuessProvince(guess->
GetLevel1());
1930 guess =
m_LatLonWaterMap->GuessRegionForLatLon(lat_value, lon_value, country);
1940 double landdistance = 0.0;
1941 guess =
m_LatLonCountryMap->FindClosestToLatLon (lat_value, lon_value, 5.0, landdistance);
1944 id->SetClosestCountry(guess->
GetLevel0());
1945 id->SetClosestProvince(guess->
GetLevel1());
1954 double landdistance = 0.0;
1955 guess =
m_LatLonCountryMap->FindClosestToLatLon (lat_value, lon_value, 5.0, landdistance);
1958 id->SetClosestCountry(guess->
GetLevel0());
1959 id->SetClosestProvince(guess->
GetLevel1());
1967 double waterdistance = 0.0;
1968 guess =
m_LatLonWaterMap->FindClosestToLatLon (lat_value, lon_value, 5.0, waterdistance);
1970 id->SetClosestWater(guess->
GetLevel0());
1971 id->SetWaterDistance(
m_LatLonWaterMap->AdjustAndRoundDistance (waterdistance));
1981 double distance = 0.0;
1982 guess =
m_LatLonCountryMap->IsNearLatLon (lat_value, lon_value, 5.0, distance, country, province);
1986 id->SetGuessCountry(country);
1987 id->SetGuessProvince(province);
1994 guess =
m_LatLonWaterMap->IsNearLatLon (lat_value, lon_value, 5.0, distance, country, province);
1997 id->SetClaimedDistance(
m_LatLonWaterMap->AdjustAndRoundDistance (distance));
2009 {
"Adriatic Sea",
"Mediterranean Sea"},
2010 {
"Aegean Sea",
"Mediterranean Sea"},
2011 {
"Alboran Sea",
"Mediterranean Sea"},
2012 {
"Andaman Sea",
"Indian Ocean"},
2013 {
"Arabian Sea",
"Indian Ocean"},
2014 {
"Argentine Sea",
"Atlantic Ocean"},
2015 {
"Ariake Sea",
"Pacific Ocean"},
2016 {
"Baffin Bay",
"Atlantic Ocean"},
2017 {
"Balearic Sea",
"Mediterranean Sea"},
2018 {
"Baltic Sea",
"Atlantic Ocean"},
2019 {
"Barents Sea",
"Arctic Ocean"},
2020 {
"Bay of Bengal",
"Indian Ocean"},
2021 {
"Beaufort Sea",
"Arctic Ocean"},
2022 {
"Bering Sea",
"Pacific Ocean"},
2023 {
"Bismarck Sea",
"Pacific Ocean"},
2024 {
"Black Sea",
"Mediterranean Sea"},
2025 {
"Bohai Sea",
"Pacific Ocean"},
2026 {
"Caribbean Sea",
"Atlantic Ocean"},
2027 {
"Celebes Sea",
"Pacific Ocean"},
2028 {
"Champlain Sea",
"Atlantic Ocean"},
2029 {
"Chilean Sea",
"Pacific Ocean"},
2030 {
"China Seas",
"Pacific Ocean"},
2031 {
"Chukchi Sea",
"Arctic Ocean"},
2032 {
"Coral Sea",
"Pacific Ocean"},
2033 {
"Davis Strait",
"Atlantic Ocean"},
2034 {
"East China Sea",
"Pacific Ocean"},
2035 {
"East Siberian Sea",
"Arctic Ocean"},
2036 {
"English Channel",
"Atlantic Ocean"},
2037 {
"Erythraean Sea",
"Indian Ocean"},
2038 {
"Golfo de California",
"Pacific Ocean"},
2039 {
"Greenland Sea",
"Arctic Ocean"},
2040 {
"Gulf of Mexico",
"Atlantic Ocean"},
2041 {
"Gulf of Thailand",
"Pacific Ocean"},
2042 {
"Gulf of Tonkin",
"Pacific Ocean"},
2043 {
"Hudson Bay",
"Arctic Ocean"},
2044 {
"Ionian Sea",
"Mediterranean Sea"},
2045 {
"Irish Sea",
"Atlantic Ocean"},
2046 {
"Irminger Sea",
"Atlantic Ocean"},
2047 {
"James Bay",
"Atlantic Ocean"},
2048 {
"Java Sea",
"Indian Ocean"},
2049 {
"Kara Sea",
"Arctic Ocean"},
2050 {
"Koro Sea",
"Pacific Ocean"},
2051 {
"Labrador Sea",
"Atlantic Ocean"},
2052 {
"Laccadive Sea",
"Indian Ocean"},
2053 {
"Laptev Sea",
"Arctic Ocean"},
2054 {
"Ligurian Sea",
"Mediterranean Sea"},
2055 {
"Lincoln Sea",
"Arctic Ocean"},
2056 {
"Myrtoan Sea",
"Mediterranean Sea"},
2057 {
"North Sea",
"Atlantic Ocean"},
2058 {
"Norwegian Sea",
"Atlantic Ocean"},
2059 {
"Pechora Sea",
"Arctic Ocean"},
2060 {
"Persian Gulf",
"Indian Ocean"},
2061 {
"Philippine Sea",
"Pacific Ocean"},
2062 {
"Red Sea",
"Indian Ocean"},
2063 {
"Salish Sea",
"Pacific Ocean"},
2064 {
"Sargasso Sea",
"Atlantic Ocean"},
2065 {
"Scotia Sea",
"Southern Ocean"},
2066 {
"Sea of Azov",
"Black Sea"},
2067 {
"Sea of Chiloe",
"Pacific Ocean"},
2068 {
"Sea of Crete",
"Mediterranean Sea"},
2069 {
"Sea of Japan",
"Pacific Ocean"},
2070 {
"Sea of Okhotsk",
"Pacific Ocean"},
2071 {
"Sea of the Hebrides",
"Atlantic Ocean"},
2072 {
"Sea of Zanj",
"Indian Ocean"},
2073 {
"Seas of Greenland",
"Atlantic Ocean"},
2074 {
"Sethusamudram",
"Indian Ocean"},
2075 {
"Sibutu Passage",
"Pacific Ocean"},
2076 {
"Solomon Sea",
"Pacific Ocean"},
2077 {
"South China Sea",
"Pacific Ocean"},
2078 {
"Sulu Sea",
"Pacific Ocean"},
2079 {
"Tasman Sea",
"Pacific Ocean"},
2080 {
"Thracian Sea",
"Mediterranean Sea"},
2081 {
"Timor Sea",
"Indian Ocean"},
2082 {
"Tyrrhenian Sea",
"Mediterranean Sea"},
2083 {
"Wandel Sea",
"Arctic Ocean"},
2084 {
"White Sea",
"Arctic Ocean"},
2085 {
"Yellow Sea",
"Pacific Ocean"}
2094 if( new_water_pair_iter != sc_WaterPairMap.end() ) {
2095 return new_water_pair_iter->second;
2104 string countryname = input_countryname;
2110 static std::mutex m;
2112 std::lock_guard
g(m);
2123 bool format_correct, lat_in_range, lon_in_range, precision_correct;
2124 double lat_value = 0.0, lon_value = 0.0;
2126 lat_in_range, lon_in_range,
2127 lat_value, lon_value);
2128 if (!format_correct) {
2132 lat_lon = lat_lon.substr(0, pos);
2134 lat_in_range, lon_in_range,
2135 lat_value, lon_value);
2140 if (!format_correct || !lat_in_range || !lon_in_range) {
2150 countryname = countryname.substr(0, pos);
2154 countryname = countryname.substr(0, pos);
2159 countryname =
"Svalbard";
2162 string country = countryname;
2168 province = country.substr(pos + 1);
2171 country = country.substr(0, pos);
2202 string wguess =
id->GetGuessWater();
2203 string cguess =
id->GetGuessCountry();
2207 if (province.empty() &&
NStr::Equal(cguess, country)) {
2227 double neardist = 0.0;
2231 if (!
flags &&
m_LatLonCountryMap->IsNearLatLon(lat_value, lon_value, 2.0, neardist, country) && neardist < 5.0) {
2232 id->SetGuessCountry (country);
2234 flags =
id->Classify(country, province);
2238 && !
m_LatLonWaterMap->IsNearLatLon(lat_value, lon_value, 20.0, neardist, country)) {
2241 adjusted_flags = adjust_id ==
NULL ? 0 : adjust_id->
Classify(country, province);
2242 if (adjusted_flags) {
2248 flags = adjusted_flags;
2256 adjusted_flags = adjust_id ==
NULL ? 0 : adjust_id->
Classify(country, province);
2257 if (adjusted_flags) {
2263 flags = adjusted_flags;
2271 adjusted_flags = adjust_id ==
NULL ? 0 : adjust_id->
Classify(country, province);
2272 if (adjusted_flags) {
2278 flags = adjusted_flags;
2295 error =
"Latitude and longitude values appear to be exchanged";
2299 if (lat_value < 0.0) {
2300 error =
"Latitude should be set to N (northern hemisphere)";
2302 error =
"Latitude should be set to S (southern hemisphere)";
2307 if (lon_value < 0.0) {
2308 error =
"Longitude should be set to E (eastern hemisphere)";
2310 error =
"Longitude should be set to W (western hemisphere)";
2320 string full_guess =
id->GetFullGuess();
2323 error =
"Lat_lon " + lat_lon +
" is in " +
id->GetFullGuess()
2324 +
" (more specific than " + country +
")";
2329 bool suppress =
false;
2330 string reportregion;
2332 string desphrase =
"designated subregion ";
2333 string subphrase =
"another subregion ";
2334 string phrase = nosubphrase;
2335 bool show_claimed =
false;
2347 reportregion = countryname;
2353 reportregion =
id->GetClosestFull();
2355 reportregion =
id->GetClosestCountry();
2358 show_claimed =
true;
2361 string water =
id->GetGuessWater();
2374 }
else if (!suppress) {
2377 error =
"Lat_lon '" + lat_lon +
"' is closest to " + phrase +
"'" + reportregion +
"' at distance "
2379 +
" km, but in water '" +
id->GetGuessWater()
2380 +
"' - claimed region '" +
id->GetClaimedFull()
2383 error =
"Lat_lon '" + lat_lon +
"' is closest to " + phrase +
"'" + reportregion
2385 +
id->GetGuessWater() +
"'";
2388 }
else if (neardist > 0.0) {
2390 error =
"Lat_lon '" + lat_lon +
"' is in water '" +
id->GetGuessWater() +
"', '"
2394 error =
"Lat_lon '" + lat_lon +
"' is in water '" +
id->GetGuessWater() +
"'";
2397 string full_guess =
id->GetFullGuess();
2406 error =
"Lat_lon '" + lat_lon +
"' maps to '" +
id->GetFullGuess() +
"' instead of '"
2407 + countryname +
"'";
2411 error =
"Lat_lon '" + lat_lon +
"' maps to '" +
id->GetFullGuess() +
"' instead of '"
2412 + country +
"' - claimed region '" +
id->GetClaimedFull()
2420 error =
"Lat_lon '" + lat_lon +
"' maps to '" +
id->GetFullGuess() +
"' instead of '"
2421 + countryname +
"' - claimed region '" +
id->GetClaimedFull()
2430 error =
"Lat_lon '" + lat_lon +
"' is closest to '" +
id->GetClosestCountry() +
"' instead of '"
2431 + countryname +
"'";
2434 error =
"Lat_lon '" + lat_lon +
"' is closest to '" +
id->GetClosestWater() +
"' instead of '"
2435 + countryname +
"'";
2438 error =
"Unable to determine mapping for lat_lon '" + lat_lon +
"' and country '" + countryname +
"'";
2468 "pooled males and females",
2469 "pooled male and female",
2480 if (find(begin, end,
value) != end) {
2498 vector<string> words;
2500 if (words.size() == 0) {
2509 bool is_good =
false;
2511 ITERATE(vector<string>, w, words) {
2515 if (find(begin, end, *w) != end) {
2536 vector<string> words;
2539 if (words.size() == 0) {
2547 vector<string> good_values;
2548 bool pooled =
false;
2550 ITERATE(vector<string>, w, words) {
2557 if (find(begin, end, *w) != end) {
2559 good_values.push_back(
"male");
2561 good_values.push_back(
"female");
2563 good_values.push_back(*w);
2571 if (good_values.size() == 0) {
2576 string fixed = good_values[0];
2577 for (
size_t i = 1;
i < good_values.size();
i++) {
2578 if (good_values.size() > 2) {
2581 if (
i == good_values.size() - 1) {
2584 fixed +=
" " + good_values[
i];
2587 fixed =
"pooled " + fixed;
2602 string::const_iterator it =
value.begin();
2603 if (*it ==
'+' || *it ==
'-') {
2608 bool any_digit =
false;
2609 bool skip_comma =
true;
2610 while (it !=
value.end() && (
isdigit(*it) || *it ==
',')) {
2625 if (it ==
value.end()) {
2640 if (it ==
value.end() || *it !=
' ' || !any_digit) {
2646 while (it !=
value.end()) {
2684 char reformatted[1000];
2686 string rval = reformatted;
2715 rval =
number +
" " +
"m";
2733 }
else if (
value.length() > 240) {
2737 for (
auto it :
value) {
2786 string genus = taxname.substr(0, pos);
2791 string species = taxname.substr(pos + 1);
2795 if (pos != 1 ||
value[0] !=
'p') {
2816 }
else if (
value.length() > 32) {
2825 static string s_ForbiddenPhrases[] = {
2837 for (
auto it : s_ForbiddenPhrases) {
2901 if (s_PlasmidNameExceptions.
find(
value) != end(s_PlasmidNameExceptions)) {
2919 #include "cell_line.inc"
2923 vector<string> tokens;
2925 if (tokens.size() < 4) {
2927 <<
"; disregarding");
2944 size_t count =
sizeof(kCellLine) /
sizeof (*kCellLine);
2945 const char *
const * start = kCellLine;
2960 string cell_line_search = cell_line;
2964 rval =
"The International Cell Line Authentication Committee database indicates that " +
2965 cell_line +
" from " + organism +
" is known to be contaminated by " +
2968 ". Please see http://iclac.org/databases/cross-contaminations/ for more information and references.";
2989 "Antigua and Barbuda",
2994 "Ashmore and Cartier Islands",
3014 "Bosnia and Herzegovina",
3018 "British Virgin Islands",
3028 "Central African Republic",
3033 "Clipperton Island",
3038 "Coral Sea Islands",
3046 "Democratic Republic of the Congo",
3050 "Dominican Republic",
3054 "Equatorial Guinea",
3060 "Falkland Islands (Islas Malvinas)",
3067 "French Southern and Antarctic Lands",
3087 "Heard Island and McDonald Islands",
3109 "Juan de Nova Island",
3112 "Kerguelen Archipelago",
3140 "Mediterranean Sea",
3142 "Micronesia, Federated States of",
3167 "Northern Mariana Islands",
3185 "Republic of the Congo",
3193 "Saint Kitts and Nevis",
3196 "Saint Pierre and Miquelon",
3197 "Saint Vincent and the Grenadines",
3200 "Sao Tome and Principe",
3213 "South Georgia and the South Sandwich Islands",
3220 "State of Palestine",
3236 "Trinidad and Tobago",
3241 "Turks and Caicos Islands",
3246 "United Arab Emirates",
3255 "Wallis and Futuna",
3275 "Netherlands Antilles",
3276 "Serbia and Montenegro",
3279 "The former Yugoslav Republic of Macedonia",
3289 "missing: control sample",
3290 "missing: data agreement established pre-2023",
3291 "missing: endangered species",
3292 "missing: human-identifiable",
3293 "missing: lab stock",
3294 "missing: sample group",
3295 "missing: synthetic construct",
3296 "missing: third party data",
3306 string name = country;
3307 size_t pos = country.find(
':');
3309 if ( pos !=
NPOS ) {
3310 if (pos == country.length() - 1) {
3313 name = country.substr(0, pos);
3331 string name = country;
3332 size_t pos = country.find(
':');
3334 if ( pos !=
NPOS ) {
3335 name = country.substr(0, pos);
3336 if (pos == country.length() - 1) {
3341 is_miscapitalized =
false;
3356 is_miscapitalized =
true;
3362 is_miscapitalized =
true;
3368 is_miscapitalized =
true;
3379 string name = country;
3380 size_t pos = country.find(
':');
3382 if ( pos !=
NPOS ) {
3383 name = country.substr(0, pos);
3393 string name = country;
3394 size_t pos = country.find(
':');
3396 if ( pos !=
NPOS ) {
3397 name = country.substr(0, pos);
3400 is_miscapitalized =
false;
3409 is_miscapitalized =
true;
3421 {
"england",
"United Kingdom: England"},
3422 {
"great britain",
"United Kingdom: Great Britain"},
3423 {
"new jersey, usa",
"USA: New Jersey"}
3430 {
"AFG",
"Afghanistan"},
3432 {
"AIA",
"Anguilla"},
3433 {
"ALA",
"Aland Islands"},
3436 {
"ARE",
"United Arab Emirates"},
3437 {
"ARG",
"Argentina"},
3439 {
"ASM",
"American Samoa"},
3440 {
"ATA",
"Antarctica"},
3441 {
"ATF",
"French Southern Territories"},
3442 {
"ATG",
"Antigua and Barbuda"},
3443 {
"AUS",
"Australia"},
3445 {
"AZE",
"Azerbaijan"},
3446 {
"Antigua & Barbuda",
"Antigua and Barbuda"},
3447 {
"Ashmore & Cartier Islands",
"Ashmore and Cartier Islands"},
3451 {
"BES",
"Bonaire, Sint Eustatius and Saba"},
3452 {
"BFA",
"Burkina Faso"},
3453 {
"BGD",
"Bangladesh"},
3454 {
"BGR",
"Bulgaria"},
3457 {
"BIH",
"Bosnia and Herzegovina"},
3458 {
"BLM",
"Saint Barthelemy"},
3464 {
"BRB",
"Barbados"},
3467 {
"BVT",
"Bouvet Island"},
3468 {
"BWA",
"Botswana"},
3469 {
"Brasil",
"Brazil"},
3470 {
"CAF",
"Central African Republic"},
3472 {
"CCK",
"Cocos Islands"},
3473 {
"CHE",
"Switzerland"},
3476 {
"CIV",
"Cote d'Ivoire"},
3477 {
"CMR",
"Cameroon"},
3478 {
"COD",
"Democratic Republic of the Congo"},
3479 {
"COG",
"Republic of the Congo"},
3480 {
"COK",
"Cook Islands"},
3481 {
"COL",
"Colombia"},
3483 {
"CPV",
"Cape Verde"},
3484 {
"CRI",
"Costa Rica"},
3487 {
"CXR",
"Christmas Island"},
3488 {
"CYM",
"Cayman Islands"},
3491 {
"Cape Verde Islands",
"Cape Verde"},
3493 {
"DJI",
"Djibouti"},
3494 {
"DMA",
"Dominica"},
3496 {
"DOM",
"Dominican Republic"},
3498 {
"Democratic Republic of Congo",
"Democratic Republic of the Congo"},
3502 {
"ESH",
"Western Sahara"},
3505 {
"ETH",
"Ethiopia"},
3508 {
"FLK",
"Falkland Islands (Islas Malvinas)"},
3510 {
"FRO",
"Faroe Islands"},
3511 {
"FSM",
"Micronesia, Federated States of"},
3512 {
"Falkland Islands",
"Falkland Islands (Islas Malvinas)"},
3513 {
"French Southern & Antarctic Lands",
"French Southern and Antarctic Lands"},
3515 {
"GBR",
"United Kingdom"},
3517 {
"GGY",
"Guernsey"},
3519 {
"GIB",
"Gibraltar"},
3521 {
"GLP",
"Guadeloupe"},
3523 {
"GNB",
"Guinea-Bissau"},
3524 {
"GNQ",
"Equatorial Guinea"},
3527 {
"GRL",
"Greenland"},
3528 {
"GTM",
"Guatemala"},
3529 {
"GUF",
"French Guiana"},
3532 {
"HKG",
"Hong Kong"},
3533 {
"HMD",
"Heard Island and McDonald Islands"},
3534 {
"HND",
"Honduras"},
3538 {
"Heard Island & McDonald Islands",
"Heard Island and McDonald Islands"},
3539 {
"IDN",
"Indonesia"},
3540 {
"IMN",
"Isle of Man"},
3542 {
"IOT",
"British Indian Ocean Territory"},
3549 {
"Ivory Coast",
"Cote d'Ivoire"},
3554 {
"KAZ",
"Kazakhstan"},
3556 {
"KGZ",
"Kyrgyzstan"},
3557 {
"KHM",
"Cambodia"},
3558 {
"KIR",
"Kiribati"},
3559 {
"KNA",
"Saint Kitts and Nevis"},
3560 {
"KOR",
"South Korea"},
3562 {
"LAO",
"Lao People's Democratic Republic"},
3565 {
"LBY",
"Libyan Arab Jamahiriya"},
3566 {
"LCA",
"Saint Lucia"},
3567 {
"LIE",
"Liechtenstein"},
3568 {
"LKA",
"Sri Lanka"},
3570 {
"LTU",
"Lithuania"},
3571 {
"LUX",
"Luxembourg"},
3573 {
"La Reunion Island",
"Reunion"},
3574 {
"Luxemburg",
"Luxembourg"},
3576 {
"MAF",
"Saint Martin (French part)"},
3580 {
"MDG",
"Madagascar"},
3581 {
"MDV",
"Maldives"},
3583 {
"MHL",
"Marshall Islands"},
3584 {
"MKD",
"North Macedonia"},
3588 {
"MNE",
"Montenegro"},
3589 {
"MNG",
"Mongolia"},
3590 {
"MNP",
"Northern Mariana Islands"},
3591 {
"MOZ",
"Mozambique"},
3592 {
"MRT",
"Mauritania"},
3593 {
"MSR",
"Montserrat"},
3594 {
"MTQ",
"Martinique"},
3595 {
"MUS",
"Mauritius"},
3597 {
"MYS",
"Malaysia"},
3599 {
"Macedonia",
"North Macedonia"},
3601 {
"NCL",
"New Caledonia"},
3603 {
"NFK",
"Norfolk Island"},
3605 {
"NIC",
"Nicaragua"},
3607 {
"NLD",
"Netherlands"},
3611 {
"NZL",
"New Zealand"},
3612 {
"Netherland",
"Netherlands"},
3613 {
"New Guinea",
"Papua New Guinea"},
3615 {
"P, R, China",
"China"},
3616 {
"P.R. China",
"China"},
3617 {
"P.R.China",
"China"},
3618 {
"PAK",
"Pakistan"},
3620 {
"PCN",
"Pitcairn"},
3622 {
"PHL",
"Philippines"},
3624 {
"PNG",
"Papua New Guinea"},
3626 {
"PRI",
"Puerto Rico"},
3627 {
"PRK",
"North Korea"},
3628 {
"PRT",
"Portugal"},
3629 {
"PRY",
"Paraguay"},
3630 {
"PSE",
"Palestinian Territory"},
3631 {
"PYF",
"French Polynesia"},
3632 {
"People's Republic of China",
"China"},
3633 {
"Pr China",
"China"},
3634 {
"Prchina",
"China"},
3640 {
"Republic of Congo",
"Republic of the Congo"},
3641 {
"SAU",
"Saudi Arabia"},
3644 {
"SGP",
"Singapore"},
3645 {
"SGS",
"South Georgia and the South Sandwich Islands"},
3646 {
"SHN",
"Saint Helena"},
3647 {
"SJM",
"Svalbard and Jan Mayen"},
3648 {
"SLB",
"Solomon Islands"},
3649 {
"SLE",
"Sierra Leone"},
3650 {
"SLV",
"El Salvador"},
3651 {
"SMR",
"San Marino"},
3653 {
"SPM",
"Saint Pierre and Miquelon"},
3655 {
"SSD",
"South Sudan"},
3656 {
"STP",
"Sao Tome and Principe"},
3657 {
"SUR",
"Suriname"},
3658 {
"SVK",
"Slovakia"},
3659 {
"SVN",
"Slovenia"},
3661 {
"SWZ",
"Eswatini"},
3662 {
"SXM",
"Sint Maarten (Dutch part)"},
3663 {
"SYC",
"Seychelles"},
3664 {
"SYR",
"Syrian Arab Republic"},
3665 {
"Saint Kitts & Nevis",
"Saint Kitts and Nevis"},
3666 {
"Saint Pierre & Miquelon",
"Saint Pierre and Miquelon"},
3667 {
"Saint Vincent & Grenadines",
"Saint Vincent and the Grenadines"},
3668 {
"Saint Vincent & the Grenadines",
"Saint Vincent and the Grenadines"},
3669 {
"Saint Vincent and Grenadines",
"Saint Vincent and the Grenadines"},
3670 {
"San Tome and Principe Island",
"Sao Tome and Principe"},
3671 {
"Sao Tome & Principe",
"Sao Tome and Principe"},
3672 {
"South Georgia & South Sandwich Islands",
"South Georgia and the South Sandwich Islands"},
3673 {
"South Georgia & the South Sandwich Islands",
"South Georgia and the South Sandwich Islands"},
3674 {
"St Helena",
"Saint Helena"},
3675 {
"St Lucia",
"Saint Lucia"},
3676 {
"St Pierre and Miquelon",
"Saint Pierre and Miquelon"},
3677 {
"St Vincent and the Grenadines",
"Saint Vincent and the Grenadines"},
3678 {
"St. Helena",
"Saint Helena"},
3679 {
"St. Lucia",
"Saint Lucia"},
3680 {
"St. Pierre and Miquelon",
"Saint Pierre and Miquelon"},
3681 {
"St. Vincent and the Grenadines",
"Saint Vincent and the Grenadines"},
3682 {
"TCA",
"Turks and Caicos Islands"},
3685 {
"THA",
"Thailand"},
3686 {
"TJK",
"Tajikistan"},
3688 {
"TKM",
"Turkmenistan"},
3689 {
"TLS",
"Timor-Leste"},
3691 {
"TTO",
"Trinidad and Tobago"},
3696 {
"TZA",
"Tanzania"},
3697 {
"The Netherlands",
"Netherlands"},
3698 {
"Trinidad & Tobago",
"Trinidad and Tobago"},
3699 {
"Turks & Caicos",
"Turks and Caicos Islands"},
3700 {
"Turks & Caicos Islands",
"Turks and Caicos Islands"},
3701 {
"Turks and Caicos",
"Turks and Caicos Islands"},
3704 {
"UK",
"United Kingdom"},
3706 {
"UMI",
"United States Minor Outlying Islands"},
3708 {
"UZB",
"Uzbekistan"},
3709 {
"United States",
"USA"},
3710 {
"United States of America",
"USA"},
3711 {
"VAT",
"Holy See (Vatican City State)"},
3712 {
"VCT",
"Saint Vincent and the Grenadines"},
3713 {
"VEN",
"Venezuela"},
3714 {
"VGB",
"British Virgin Islands"},
3715 {
"VIR",
"Virgin Islands"},
3716 {
"VNM",
"Viet Nam"},
3718 {
"Vietnam",
"Viet Nam"},
3719 {
"WLF",
"Wallis and Futuna"},
3722 {
"ZAF",
"South Africa"},
3724 {
"ZWE",
"Zimbabwe"},
3725 {
"the Netherlands",
"Netherlands"}
3732 {
"Burma",
"Myanmar"},
3733 {
"Siam",
"Thailand"}
3739 {
"Antigua",
"Antigua and Barbuda: Antigua"},
3740 {
"Ashmore Island",
"Ashmore and Cartier Islands: Ashmore Island"},
3741 {
"Autonomous Region of the Azores",
"Portugal: Azores"},
3742 {
"Azores",
"Portugal: Azores"},
3743 {
"Barbuda",
"Antigua and Barbuda: Barbuda"},
3744 {
"Bassas da India",
"French Southern and Antarctic Lands: Bassas da India"},
3745 {
"Caicos Islands",
"Turks and Caicos Islands: Caicos Islands"},
3746 {
"Canary Islands",
"Spain: Canary Islands"},
3747 {
"Cartier Island",
"Ashmore and Cartier Islands: Cartier Island"},
3748 {
"East Germany",
"Germany: East Germany"},
3749 {
"El Hierro",
"Spain: El Hierro"},
3750 {
"Europa Island",
"French Southern and Antarctic Lands: Europa Island"},
3751 {
"Fuerteventura",
"Spain: Fuerteventura"},
3752 {
"Glorioso Islands",
"French Southern and Antarctic Lands: Glorioso Islands"},
3753 {
"Gran Canaria",
"Spain: Gran Canaria"},
3754 {
"Grenadines",
"Saint Vincent and the Grenadines: Grenadines"},
3755 {
"Heard Island",
"Heard Island and McDonald Islands: Heard Island"},
3756 {
"Ile Amsterdam",
"French Southern and Antarctic Lands: Ile Amsterdam"},
3757 {
"Ile Saint-Paul",
"French Southern and Antarctic Lands: Ile Saint-Paul"},
3758 {
"Iles Crozet",
"French Southern and Antarctic Lands: Iles Crozet"},
3759 {
"Iles Kerguelen",
"French Southern and Antarctic Lands: Iles Kerguelen"},
3760 {
"Juan de Nova Island",
"French Southern and Antarctic Lands: Juan de Nova Island"},
3761 {
"La Gomera",
"Spain: La Gomera"},
3762 {
"La Graciosa",
"Spain: La Graciosa"},
3763 {
"La Palma",
"Spain: La Palma"},
3764 {
"Lanzarote",
"Spain: Lanzarote"},
3765 {
"Madeira",
"Portugal: Madeira"},
3766 {
"McDonald Island",
"Heard Island and McDonald Islands: McDonald Island"},
3767 {
"McDonald Islands",
"Heard Island and McDonald Islands: McDonald Islands"},
3768 {
"Miquelon",
"Saint Pierre and Miquelon: Miquelon"},
3769 {
"Nevis",
"Saint Kitts and Nevis: Nevis"},
3770 {
"Principe",
"Sao Tome and Principe: Principe"},
3771 {
"Saint Kitts",
"Saint Kitts and Nevis: Saint Kitts"},
3772 {
"Saint Pierre",
"Saint Pierre and Miquelon: Saint Pierre"},
3773 {
"Saint Vincent",
"Saint Vincent and the Grenadines: Saint Vincent"},
3774 {
"Sao Tome",
"Sao Tome and Principe: Sao Tome"},
3775 {
"Scotland",
"United Kingdom: Scotland"},
3776 {
"South Sandwich Islands",
"South Georgia and the South Sandwich Islands: South Sandwich Islands"},
3777 {
"St Kitts",
"Saint Kitts and Nevis: Saint Kitts"},
3778 {
"St Pierre",
"Saint Pierre and Miquelon: Saint Pierre"},
3779 {
"St Thomas",
"USA: Saint Thomas"},
3780 {
"St Vincent",
"Saint Vincent and the Grenadines: Saint Vincent"},
3781 {
"St. Kitts",
"Saint Kitts and Nevis: Saint Kitts"},
3782 {
"St. Pierre",
"Saint Pierre and Miquelon: Saint Pierre"},
3783 {
"St. Thomas",
"USA: Saint Thomas"},
3784 {
"St. Vincent",
"Saint Vincent and the Grenadines: Saint Vincent"},
3785 {
"Tenerife",
"Spain: Tenerife"},
3786 {
"Tobago",
"Trinidad and Tobago: Tobago"},
3787 {
"Trinidad",
"Trinidad and Tobago: Trinidad"},
3788 {
"Tromelin Island",
"French Southern and Antarctic Lands: Tromelin Island"},
3789 {
"Turks Islands",
"Turks and Caicos Islands: Turks Islands"},
3790 {
"Wales",
"United Kingdom: Wales"},
3791 {
"West Germany",
"Germany: West Germany"},
3806 "District of Columbia",
3853 vector<string> words;
3855 for(vector<string>::iterator word = words.begin(); word != words.end(); ++word)
3856 if (!word->empty() &&
isalpha(word->at(0)))
3857 word->at(0) = (
unsigned char)
toupper(word->at(0));
3865 if (found != k_whole_country_fixes.end()) {
3866 new_country = found->second;
3871 for (
size_t i = 0;
i < num_states; ++
i) {
3886 string country2(*c);
3890 while (pos2 !=
NPOS)
3892 if (pos2 <= pos1 && pos2+country2.length() >= pos1+country1.length())
3903 int num_matches = 0;
3910 if (!((pos+country.length()<phrase.length() &&
isalpha(phrase[pos+country.length()]))
3911 || (pos > 0 &&
isalpha(phrase[pos-1]))
3918 return (num_matches > 1);
3936 bool any_found =
true;
3937 while (!
val.empty() && any_found) {
3956 size_t len =
val.length();
3961 }
else if (
len > 5) {
3963 bool do_remove =
true;
3964 size_t pos =
val.length() - 2;
3966 while (dist < 4 && do_remove) {
3985 vector<string> tokens;
3988 vector<string>::iterator it = tokens.begin();
3989 while (it != tokens.end()) {
3991 if (pos !=
NPOS && pos > 3 && (*it).length() - pos > 4) {
3992 string first = (*it).substr(0, pos);
3993 string remainder = (*it).substr(pos + 1);
3995 size_t len_to_space =
first.length();
3996 while (space_pos !=
NPOS) {
3998 len_to_space =
first.length();
4001 if (len_to_space > 4) {
4002 (*it) = (*it).substr(0, pos);
4003 it = tokens.insert(it, remainder);
4018 size_t tlen =
test.length();
4019 size_t wlen = word.
length();
4022 while (pos !=
NPOS) {
4023 size_t p = start + pos;
4024 if ( (p == 0 || !
isalpha((
unsigned char)
test[p - 1])) &&
4025 (p + wlen >= tlen || !
isalpha((
unsigned char)
test[p + wlen])) ) {
4048 const vector<string>& countries,
4049 string& valid_country,
4050 string& orig_valid_country,
4051 bool& too_many_countries,
4054 for (
auto country : countries) {
4055 if (!country.empty() && !too_many_countries)
4057 string check = country;
4061 bool check_has_bad_cap =
false;
4064 if (valid_country.empty())
4066 valid_country =
check;
4067 orig_valid_country =
check;
4068 bad_cap = check_has_bad_cap;
4072 too_many_countries =
true;
4078 if (found != fix_map.
end())
4080 if (valid_country.empty())
4082 valid_country = found->second;
4083 orig_valid_country =
check;
4087 too_many_countries =
true;
4099 if (
val.length() == 0)
return false;
4101 char *
str =
new char[
sizeof(char) * (
val.length() + 1)];
4102 strcpy(
str,
val.c_str());
4113 while (ch !=
'\0') {
4146 {
"Acadia Parish",
"Acadia Parish" },
4147 {
"AcadiaParish",
"Acadia Parish" },
4148 {
"Allen Parish",
"Allen Parish" },
4149 {
"AllenParish",
"Allen Parish" },
4150 {
"Ascension Parish",
"Ascension Parish" },
4151 {
"AscensionParish",
"Ascension Parish" },
4152 {
"Assumption Parish",
"Assumption Parish" },
4153 {
"AssumptionParish",
"Assumption Parish" },
4154 {
"Avoyelles Parish",
"Avoyelles Parish" },
4155 {
"AvoyellesParish",
"Avoyelles Parish" },
4156 {
"Beauregard Parish",
"Beauregard Parish" },
4157 {
"BeauregardParish",
"Beauregard Parish" },
4158 {
"Bienville Parish",
"Bienville Parish" },
4159 {
"BienvilleParish",
"Bienville Parish" },
4160 {
"Bossier Parish",
"Bossier Parish" },
4161 {
"BossierParish",
"Bossier Parish" },
4162 {
"Caddo Parish",
"Caddo Parish" },
4163 {
"CaddoParish",
"Caddo Parish" },
4164 {
"Calcasieu Parish",
"Calcasieu Parish" },
4165 {
"CalcasieuParish",
"Calcasieu Parish" },
4166 {
"Caldwell Parish",
"Caldwell Parish" },
4167 {
"CaldwellParish",
"Caldwell Parish" },
4168 {
"Cameron Parish",
"Cameron Parish" },
4169 {
"CameronParish",
"Cameron Parish" },
4170 {
"Catahoula Parish",
"Catahoula Parish" },
4171 {
"CatahoulaParish",
"Catahoula Parish" },
4172 {
"Claiborne Parish",
"Claiborne Parish" },
4173 {
"ClaiborneParish",
"Claiborne Parish" },
4174 {
"Concordia Parish",
"Concordia Parish" },
4175 {
"ConcordiaParish",
"Concordia Parish" },
4176 {
"DeSoto Parish",
"DeSoto Parish" },
4177 {
"DeSotoParish",
"DeSoto Parish" },
4178 {
"East Baton Rouge Parish",
"East Baton Rouge Parish" },
4179 {
"East Carroll Parish",
"East Carroll Parish" },
4180 {
"East Feliciana Parish",
"East Feliciana Parish" },
4181 {
"EastBatonRougeParish",
"East Baton Rouge Parish" },
4182 {
"EastCarrollParish",
"East Carroll Parish" },
4183 {
"EastFelicianaParish",
"East Feliciana Parish" },
4184 {
"Evangeline Parish",
"Evangeline Parish" },
4185 {
"EvangelineParish",
"Evangeline Parish" },
4186 {
"Franklin Parish",
"Franklin Parish" },
4187 {
"FranklinParish",
"Franklin Parish" },
4188 {
"Grant Parish",
"Grant Parish" },
4189 {
"GrantParish",
"Grant Parish" },
4190 {
"Iberia Parish",
"Iberia Parish" },
4191 {
"IberiaParish",
"Iberia Parish" },
4192 {
"Iberville Parish",
"Iberville Parish" },
4193 {
"IbervilleParish",
"Iberville Parish" },
4194 {
"Jackson Parish",
"Jackson Parish" },
4195 {
"JacksonParish",
"Jackson Parish" },
4196 {
"Jefferson Davis Parish",
"Jefferson Davis Parish" },
4197 {
"Jefferson Parish",
"Jefferson Parish" },
4198 {
"JeffersonDavisParish",
"Jefferson Davis Parish" },
4199 {
"JeffersonParish",
"Jefferson Parish" },
4200 {
"Lafayette Parish",
"Lafayette Parish" },
4201 {
"LafayetteParish",
"Lafayette Parish" },
4202 {
"Lafourche Parish",
"Lafourche Parish" },
4203 {
"LafourcheParish",
"Lafourche Parish" },
4204 {
"LaSalle Parish",
"LaSalle Parish" },
4205 {
"LaSalleParish",
"LaSalle Parish" },
4206 {
"Lincoln Parish",
"Lincoln Parish" },
4207 {
"LincolnParish",
"Lincoln Parish" },
4208 {
"Livingston Parish",
"Livingston Parish" },
4209 {
"LivingstonParish",
"Livingston Parish" },
4210 {
"Madison Parish",
"Madison Parish" },
4211 {
"MadisonParish",
"Madison Parish" },
4212 {
"Morehouse Parish",
"Morehouse Parish" },
4213 {
"MorehouseParish",
"Morehouse Parish" },
4214 {
"Natchitoches Parish",
"Natchitoches Parish" },
4215 {
"NatchitochesParish",
"Natchitoches Parish" },
4216 {
"Orleans Parish",
"Orleans Parish" },
4217 {
"OrleansParish",
"Orleans Parish" },
4218 {
"Ouachita Parish",
"Ouachita Parish" },
4219 {
"OuachitaParish",
"Ouachita Parish" },
4220 {
"Plaquemines Parish",
"Plaquemines Parish" },
4221 {
"PlaqueminesParish",
"Plaquemines Parish" },
4222 {
"Pointe Coupee Parish",
"Pointe Coupee Parish" },
4223 {
"PointeCoupeeParish",
"Pointe Coupee Parish" },
4224 {
"Rapides Parish",
"Rapides Parish" },
4225 {
"RapidesParish",
"Rapides Parish" },
4226 {
"Red River Parish",
"Red River Parish" },
4227 {
"RedRiverParish",
"Red River Parish" },
4228 {
"Richland Parish",
"Richland Parish" },
4229 {
"RichlandParish",
"Richland Parish" },
4230 {
"Sabine Parish",
"Sabine Parish" },
4231 {
"SabineParish",
"Sabine Parish" },
4232 {
"St. Bernard Parish",
"St. Bernard Parish" },
4233 {
"St. Charles Parish",
"St. Charles Parish" },
4234 {
"St. Helena Parish",
"St. Helena Parish" },
4235 {
"St. James Parish",
"St. James Parish" },
4236 {
"St. John the Baptist Parish",
"St. John the Baptist Parish" },
4237 {
"St. Landry Parish",
"St. Landry Parish" },
4238 {
"St. Martin Parish",
"St. Martin Parish" },
4239 {
"St. Mary Parish",
"St. Mary Parish" },
4240 {
"St. Tammany Parish",
"St. Tammany Parish" },
4241 {
"St.BernardParish",
"St. Bernard Parish" },
4242 {
"St.CharlesParish",
"St. Charles Parish" },
4243 {
"St.HelenaParish",
"St. Helena Parish" },
4244 {
"St.JamesParish",
"St. James Parish" },
4245 {
"St.JohntheBaptistParish",
"St. John the Baptist Parish" },
4246 {
"St.LandryParish",
"St. Landry Parish" },
4247 {
"St.MartinParish",
"St. Martin Parish" },
4248 {
"St.MaryParish",
"St. Mary Parish" },
4249 {
"St.TammanyParish",
"St. Tammany Parish" },
4250 {
"Tangipahoa Parish",
"Tangipahoa Parish" },
4251 {
"TangipahoaParish",
"Tangipahoa Parish" },
4252 {
"Tensas Parish",
"Tensas Parish" },
4253 {
"TensasParish",
"Tensas Parish" },
4254 {
"Terrebonne Parish",
"Terrebonne Parish" },
4255 {
"TerrebonneParish",
"Terrebonne Parish" },
4256 {
"Union Parish",
"Union Parish" },
4257 {
"UnionParish",
"Union Parish" },
4258 {
"Vermilion Parish",
"Vermilion Parish" },
4259 {
"VermilionParish",
"Vermilion Parish" },
4260 {
"Vernon Parish",
"Vernon Parish" },
4261 {
"VernonParish",
"Vernon Parish" },
4262 {
"Washington Parish",
"Washington Parish" },
4263 {
"WashingtonParish",
"Washington Parish" },
4264 {
"Webster Parish",
"Webster Parish" },
4265 {
"WebsterParish",
"Webster Parish" },
4266 {
"West Baton Rouge Parish",
"West Baton Rouge Parish" },
4267 {
"West Carroll Parish",
"West Carroll Parish" },
4268 {
"West Feliciana Parish",
"West Feliciana Parish" },
4269 {
"WestBatonRougeParish",
"West Baton Rouge Parish" },
4270 {
"WestCarrollParish",
"West Carroll Parish" },
4271 {
"WestFelicianaParish",
"West Feliciana Parish" },
4272 {
"Winn Parish",
"Winn Parish" },
4273 {
"WinnParish",
"Winn Parish" }
4281 if ( parish.empty() ) {
4286 if ( parish_find_iter != parishAbbrevMap.end() ) {
4288 parish = parish_find_iter->second;
4298 {
"AL",
"Alabama" },
4299 {
"Alabama",
"Alabama" },
4300 {
"Alaska",
"Alaska" },
4301 {
"American Samoa",
"American Samoa" },
4302 {
"AR",
"Arkansas" },
4303 {
"Arizona",
"Arizona" },
4304 {
"Arkansas",
"Arkansas" },
4305 {
"AS",
"American Samoa" },
4306 {
"AZ",
"Arizona" },
4307 {
"CA",
"California" },
4308 {
"California",
"California" },
4309 {
"CO",
"Colorado" },
4310 {
"Colorado",
"Colorado" },
4311 {
"Connecticut",
"Connecticut" },
4312 {
"CT",
"Connecticut" },
4313 {
"DC",
"District of Columbia" },
4314 {
"DE",
"Delaware" },
4315 {
"Delaware",
"Delaware" },
4316 {
"District of Columbia",
"District of Columbia" },
4317 {
"FL",
"Florida" },
4318 {
"Florida",
"Florida" },
4319 {
"GA",
"Georgia" },
4320 {
"Georgia",
"Georgia" },
4323 {
"Hawaii",
"Hawaii" },
4327 {
"Idaho",
"Idaho" },
4328 {
"IL",
"Illinois" },
4329 {
"Illinois",
"Illinois" },
4330 {
"IN",
"Indiana" },
4331 {
"Indiana",
"Indiana" },
4333 {
"Kansas",
"Kansas" },
4334 {
"Kentucky",
"Kentucky" },
4336 {
"KY",
"Kentucky" },
4337 {
"LA",
"Louisiana" },
4338 {
"Louisiana",
"Louisiana" },
4339 {
"MA",
"Massachusetts" },
4340 {
"Maine",
"Maine" },
4341 {
"Maryland",
"Maryland" },
4342 {
"Massachusetts",
"Massachusetts" },
4343 {
"MD",
"Maryland" },
4345 {
"MI",
"Michigan" },
4346 {
"Michigan",
"Michigan" },
4347 {
"Minnesota",
"Minnesota" },
4348 {
"Mississippi",
"Mississippi" },
4349 {
"Missouri",
"Missouri" },
4350 {
"MN",
"Minnesota" },
4351 {
"MO",
"Missouri" },
4352 {
"Montana",
"Montana" },
4353 {
"MS",
"Mississippi" },
4354 {
"MT",
"Montana" },
4355 {
"NC",
"North Carolina" },
4356 {
"ND",
"North Dakota" },
4357 {
"NE",
"Nebraska" },
4358 {
"Nebraska",
"Nebraska" },
4359 {
"Nevada",
"Nevada" },
4360 {
"New Hampshire",
"New Hampshire" },
4361 {
"New Jersey",
"New Jersey" },
4362 {
"New Mexico",
"New Mexico" },
4363 {
"New York",
"New York" },
4364 {
"NH",
"New Hampshire" },
4365 {
"NJ",
"New Jersey" },
4366 {
"NM",
"New Mexico" },
4367 {
"North Carolina",
"North Carolina" },
4368 {
"North Dakota",
"North Dakota" },
4370 {
"NY",
"New York" },
4373 {
"OK",
"Oklahoma" },
4374 {
"Oklahoma",
"Oklahoma" },
4376 {
"Oregon",
"Oregon" },
4377 {
"PA",
"Pennsylvania" },
4378 {
"Pennsylvania",
"Pennsylvania" },
4379 {
"PR",
"Puerto Rico" },
4380 {
"Puerto Rico",
"Puerto Rico" },
4381 {
"Rhode Island",
"Rhode Island" },
4382 {
"RI",
"Rhode Island" },
4383 {
"SC",
"South Carolina" },
4384 {
"SD",
"South Dakota" },
4385 {
"South Carolina",
"South Carolina" },
4386 {
"South Dakota",
"South Dakota" },
4387 {
"Tennessee",
"Tennessee" },
4388 {
"Texas",
"Texas" },
4389 {
"TN",
"Tennessee" },
4391 {
"US Virgin Islands",
"US Virgin Islands" },
4394 {
"VA",
"Virginia" },
4395 {
"Vermont",
"Vermont" },
4396 {
"VI",
"US Virgin Islands" },
4397 {
"Virgin Islands",
"US Virgin Islands" },
4398 {
"Virginia",
"Virginia" },
4399 {
"VT",
"Vermont" },
4400 {
"WA",
"Washington" },
4401 {
"Washington",
"Washington" },
4402 {
"West Virginia",
"West Virginia" },
4403 {
"WI",
"Wisconsin" },
4404 {
"Wisconsin",
"Wisconsin" },
4405 {
"WV",
"West Virginia" },
4406 {
"WY",
"Wyoming" },
4407 {
"Wyoming",
"Wyoming" }
4415 if (
state.empty() ) {
4419 string original =
state;
4420 string working =
state;
4437 if ( state_find_iter != stateAbbrevMap.end() ) {
4439 state = state_find_iter->second;
4452 if ( country.empty() ) {
4457 string original = country;
4458 string working = country;
4462 working = working.substr ( 1, working.length() - 2 );
4488 vector<string> components;
4492 if ( components.size() < 1 ) {
4497 for (
int j = 0; j < components.size(); j++ ) {
4516 for (
int j = 0; j < components.size(); j++ ) {
4517 bool modified =
false;
4518 if (
s_IsState ( components[j], modified )) {
4543 res.append (
"USA: ");
4548 res.append ( components[
match] );
4552 for (
int j = 0; j < components.size(); j++ ) {
4553 if ( j ==
match)
continue;
4555 res.append ( components[j] );
4563 }
else if ( num_states > 1 ) {
4579 if ( ! exception_file.empty()) {
4582 for (
const auto &
row : my_stream ) {
4583 TFieldNo number_of_fields =
row. GetNumberOfFields();
4584 if ( number_of_fields != 2 )
continue;
4585 string fr =
row[0].Get<
string>();
4586 string to =
row[1].Get<
string>();
4587 exceptions [fr] = to;
4598 for (
const auto & itm : exceptions ) {
4599 string fr = itm.first;
4600 string to = itm.second;
4607 if ( ! f1.empty() && ! f2.empty()) {
4608 fr = f1 +
": " + f2;
4619 if ( ! exception_file.empty()) {
4630 string working = country;
4636 if ( ! corrected.empty()) {
4676 string micronesia =
"Micronesia, Federated States of";
4700 if (!usa1.empty() && !usa2.empty()) {
4704 input =
"USA: " + usa2;
4708 auto old_name_fix = k_old_country_name_fixes.find(
input.c_str());
4709 if (old_name_fix != k_old_country_name_fixes.end()) {
4710 input = old_name_fix->second;
4714 if (us_territories) {
4733 if (!new_country.empty())
4736 bool too_many_countries =
false;
4737 bool bad_cap =
false;
4739 string valid_country;
4740 string orig_valid_country;
4742 x_FindCountryName(k_country_name_fixes, countries, valid_country, orig_valid_country, too_many_countries, bad_cap);
4743 if (valid_country.empty()) {
4744 x_FindCountryName(k_subregion_fixes, countries, valid_country, orig_valid_country, too_many_countries, bad_cap);
4747 if (!valid_country.empty() && !too_many_countries)
4750 if (!valid_country.empty() && too_many_countries && valid_country ==
input)
4755 new_country = str1+
": "+str2;
4759 else if(!valid_country.empty() && !too_many_countries)
4764 string before =
input.substr(0,pos);
4769 string after =
input.substr(pos+orig_valid_country.length());
4773 else new_country = valid_country;
4774 if (!before.empty() || !after.empty()) {
4776 new_country +=
": ";
4778 new_country +=
", ";
4781 if (!before.empty())
4782 new_country += before;
4783 if (!before.empty() && !after.empty() && !
NStr::Equal(after,
")"))
4784 new_country +=
", ";
4786 new_country += after;
4799 for (
size_t i = 0;
i < country.length();
i++) {
4800 if (country[
i] ==
':') {
4815 string new_country = country;
4817 if (country_end_pos !=
NPOS)
4820 while (country[pos] ==
',' || country[pos] ==
':' ||
isspace((
unsigned char)country[pos]))
4824 string after = country.substr(pos);
4825 if (after.empty()) {
4826 if (pos > country_end_pos) {
4827 new_country = country.substr(0, country_end_pos);
4831 if (capitalize_after_colon)
4833 new_country = country.substr(0,country_end_pos);
4834 new_country +=
": " + after;
4846 {
"adult",
"adult" },
4848 {
"juvenile",
"juvenile" },
4849 {
"larva",
"larva" }
4860 if (it != sc_DevStagePairs.end()) {
4868 {
"hemocyte",
"hemocyte" },
4869 {
"hepatocyte",
"hepatocyte" },
4870 {
"lymphocyte",
"lymphocyte" },
4871 {
"neuroblast",
"neuroblast" }
4881 if (it != sc_CellTypePairs.end()) {
4896 vector<CTempString> tokens;
4898 if (tokens.size() > 1) {
4899 qual_map[tokens[0]] = tokens[1];
4905 const char **built_in,
size_t num_built_in,
4910 if (!
file.empty()) {
4917 if (built_in ==
NULL) {
4920 if (getenv(
"NCBI_DEBUG")) {
4921 ERR_POST(
Note <<
"Falling back on built-in data for " + data_name);
4923 for (
size_t i = 0;
i < num_built_in;
i++) {
4924 const char *p = built_in[
i];
4929 if (getenv(
"NCBI_DEBUG")) {
4934 }
while (!lr->
AtEOF());
4938 #include "isolation_sources.inc"
4968 for (
size_t i = 0;
i <
max;
i++) {
4995 for (
size_t i = 0;
i <
max;
i++) {
5018 string new_val =
value;
5066 const string& name =
GetName();
5129 "[BankIt_uncultured16S_wizard]; [universal primers]; [tgge]",
5130 "[BankIt_uncultured16S_wizard]; [universal primers]; [dgge]",
5131 "[BankIt_uncultured16S_wizard]; [universal primers]",
5132 "[BankIt_cultured16S_wizard]",
5133 "[BankIt_organellerRNA_wizard]",
5134 "[BankIt_ITS_wizard]; [rRNAITS_notfound]",
5135 "[BankIt_ITS_wizard]",
5136 "[uncultured (using universal primers)]",
5137 "[uncultured (using universal primers) bacterial source]",
5138 "[cultured bacterial source]",
5139 "[enrichment culture bacterial source]",
5140 "[mixed bacterial source (cultured and uncultured)]",
5141 "[uncultured]; [universal primers]",
5142 "[mixed bacterial source]",
5144 "[cDNA derived from mRNA, purified viral particles]",
5145 "[cDNA derived from mRNA, whole cell/tissue lysate]",
5146 "[cDNA derived from genomic RNA, whole cell/tissue lysate]",
5147 "[cDNA derived from genomic RNA, purified viral particles]",
5148 "[universal primers]",
5149 "[uncultured; wizard]",
5150 "[uncultured; wizard; spans unknown]",
5151 "[cultured; wizard]",
5152 "[cultured; wizard; spans unknown]",
5153 "[intergenic wizard]",
5154 "[intergenic wizard; spans unknown]",
5155 "[Microsatellite wizard]",
5156 "[Microsatellite wizard; multiple repeats]",
5158 "[D-loop wizard; spans unknown]",
5159 "[D-loop wizard; spans known]",
5164 "[BankIt_uncultured16S_wizard]; [species_specific primers]; [tgge]",
5165 "[BankIt_uncultured16S_wizard]; [species_specific primers]; [dgge]",
5166 "[BankIt_uncultured16S_wizard]; [species_specific primers]",
5167 "[uncultured (with species-specific primers)]",
5168 "[uncultured]; [amplified with species-specific primers]",
5169 "[uncultured (using species-specific primers) bacterial source]",
5170 "[amplified with species-specific primers]",
5179 if (pos != string::npos) {
5200 size_t remove_len = to_remove.length();
5202 while (pos !=
NPOS) {
5203 size_t extra_len = strspn (
value.c_str() + pos + remove_len,
" ;");
5204 value =
value.substr(0, pos) +
value.substr(pos + remove_len + extra_len);
5216 if (is_species_level) {
5219 value =
"amplified with species-specific primers";
5240 (
const string & country_name,
double y,
double min_x,
double max_x,
double scale)
5241 : m_CountryName(country_name) ,
5256 #define EPSILON 0.001
5314 : m_CountryName(country_name) , m_MinX (min_x), m_MinY (min_y), m_MaxX(max_x), m_MaxY (max_y)
5322 m_Level0 = country_name.substr(0, pos);
5324 m_Level1 = country_name.substr(pos + 1);
5398 && m_MaxX <= other_block->
GetMaxX()
5400 && m_MinY <= other_block->
GetMaxY()) {
5456 m_WaterDistance(-1),
5457 m_ClaimedDistance(-1)
5525 #include "lat_lon_country.inc"
5528 #include "lat_lon_water.inc"
5533 if (getenv(
"NCBI_DEBUG")) {
5534 ERR_POST(
Note <<
"Falling back on built-in data for latlon / water data.");
5539 string current_country;
5541 for (
int i = 0;
i < num;
i++) {
5543 if (line[0] ==
'-') {
5545 }
else if (
isalpha ((
unsigned char)line[0])) {
5546 current_country = line;
5547 }
else if (
isdigit ((
unsigned char)line[0])) {
5550 vector<string> tokens;
5552 if (tokens.size() > 3) {
5554 for (
size_t j = 2; j < tokens.size() - 1; j+=2) {
5571 if (getenv(
"NCBI_DEBUG")) {
5572 ERR_POST(
Note <<
"Reading from " + filename +
" for latlon/water data.");
5579 string current_country;
5584 vector<SIZE_TYPE> tab_positions;
5589 if (line[0] ==
'-') {
5591 }
else if (
isalpha ((
unsigned char)line[0])) {
5592 current_country = line;
5593 }
else if (
isdigit ((
unsigned char)line[0])) {
5606 tab_positions.clear();
5608 while( tab_pos !=
NPOS ) {
5609 tab_positions.push_back(tab_pos);
5610 tab_pos = line.
find(
'\t', tab_pos+1);
5613 tab_positions.push_back(line.
length());
5615 const char * line_start = line.
data();
5616 if( tab_positions.size() >= 4 ) {
5617 CTempString y_str( line_start + tab_positions[0]+1, tab_positions[1] - tab_positions[0] - 1 );
5621 for (
size_t j = 1; j < tab_positions.size() - 2; j+=2) {
5622 const SIZE_TYPE pos1 = tab_positions[j];
5623 const SIZE_TYPE pos2 = tab_positions[j+1];
5624 const SIZE_TYPE pos3 = tab_positions[j+2];
5625 CTempString first_num( line_start + pos1 + 1, pos2 - pos1 - 1 );
5626 CTempString second_num( line_start + pos2 + 1, pos3 - pos2 - 1 );
5631 }
while ( !lr->
AtEOF() );
5662 }
else if (
cmp < 0) {
5680 }
else if (
line1->GetMinX() >
line2->GetMinX()) {
5682 }
else if (
line1->GetMaxX() <
line2->GetMaxX()) {
5684 }
else if (
line1->GetMaxX() >
line2->GetMaxX()) {
5702 const char* env_val = getenv(
"NCBI_LAT_LON_DATA_PATH");
5705 data_path = (
string) env_val;
5707 data_path = data_path +
"/";
5713 if (data_path.empty() || !
x_InitFromFile(data_path +
"lat_lon_water.txt")) {
5719 if (data_path.empty() || !
x_InitFromFile(data_path +
"lat_lon_country.txt")) {
5731 TCountryToLinesMap countryToLinesMap;
5733 countryToLinesMap[(*line_it)->GetCountry()].push_back(*line_it);
5743 country_lines_it->second;
5748 back_inserter(new_country_line_list));
5812 }
else if (
cmp > 0) {
5913 const string& country,
5914 const string& province)
const
5943 #define EARTH_RADIUS 6371.0
5944 #define CONST_PI 3.14159265359
5951 return (degrees * (
CONST_PI / 180.0));
5962 double lat1, lon1, lat2, lon2;
5963 double dLat, dLon,
a, c;
5973 a = sin (dLat / 2) * sin (dLat / 2) +
5974 cos (lat1) * cos (lat2) * sin (dLon / 2) * sin (dLon / 2);
5975 c = 2 * atan2 (sqrt (
a), sqrt (1 -
a));
5986 double lat1, lon1, lat2, lon2;
5987 double dLat, dLon,
a, c;
5997 a = sin (dLat / 2) * sin (dLat / 2) +
5998 cos (lat1) * cos (lat2) * sin (dLon / 2) * sin (dLon / 2);
5999 c = 2 * atan2 (sqrt (
a), sqrt (1 -
a));
6015 int min_y = y - maxDelta;
6016 int max_y = y + maxDelta;
6017 int min_x = x - maxDelta;
6018 int max_x = x + maxDelta;
6023 double closest = 0.0;
6039 if (rval ==
NULL || closest > dist
6057 double lat,
double lon,
6058 double range,
double &distance)
const
6064 int min_y = y - maxDelta;
6065 int max_y = y + maxDelta;
6066 int min_x = x - maxDelta;
6067 int max_x = x + maxDelta;
6073 double closest = 0.0;
6074 int smallest_area = -1;
6092 smallest_area = ext->
GetArea();
6094 }
else if (closest == dist) {
6101 && (ext->
GetArea() < smallest_area
6104 smallest_area = ext->
GetArea();
6119 const string& country,
6120 const string& province)
const
6124 double closest = -1.0;
6126 int min_y = y - maxDelta;
6127 int max_y = y + maxDelta;
6128 int min_x = x - maxDelta;
6129 int max_x = x + maxDelta;
6152 if (closest < 0.0 || closest > dist) {
6168 const string& country2)
const
6191 }
else if (scale > 19.5 && scale < 20.5) {
6193 }
else if (scale > 99.5 && scale < 100.5) {
6197 return (
int) (distance + 0.5);
static void s_ProcessCellLineLine(const CTempString &line)
SStaticPair< const char *, const char * > TParishMapEntry
static const char *const s_Null_Countries[]
static string s_InsertSpacesBetweenTokens(const string &old_str)
double ErrorDistance(double latA, double lonA, double scale)
static TCellLineContaminationMap s_CellLineContaminationMap
void s_AddOneDataFile(const string &file_name, const string &data_name, const char **built_in, size_t num_built_in, TQualFixMap &qual_map)
static string s_NormalizeTokens(vector< string > &tokens, vector< double > &numbers, vector< string > &anum, vector< int > &precision, vector< string > &lat_long, vector< string > &nsew)
map< string, string, PNocase > TQualFixMap
DEFINE_STATIC_ARRAY_MAP(TWaterPairMap, sc_WaterPairMap, k_water_pair_map)
static void s_InitializeCellLineContaminationMap(void)
static const size_t k_NumLatLonCountryText
CStaticArrayMap< const char *, const char *, PNocase_CStr > TWaterPairMap
const char * sm_ValidSexQualifierTokens[]
map< string, TSpeciesContaminant > TCellLineContaminationMap
CCountries::EStateCleanup s_DoUSAStateCleanup(string &country)
SStaticPair< const char *, const char * > TStateMapEntry
static const TCStrSet s_Former_CountriesSet(s_Former_Countries, sizeof(s_Former_Countries), __FILE__, __LINE__)
static CCountries::TUsaExceptionMap exception_map
static double DegreesToRadians(double degrees)
static const char * s_ReplaceableCultureNotes[]
static TQualFixMap s_IsolationSourceMap
static bool s_CellLineContaminationMapInitialized
static void s_InitializeQualMaps(void)
static const TCStrSet s_Null_CountriesSet(s_Null_Countries, sizeof(s_Null_Countries), __FILE__, __LINE__)
bool s_IsState(string &state, bool &modified)
static bool s_FailsGenusOrSpeciesTest(const string &value, const string &taxname)
string s_ShortenLatLon(string &subname)
bool s_IsParish(string &parish)
static const TWaterPairElem k_water_pair_map[]
static const SStaticPair< const char *, const char * > s_map_subregion_fixes[]
CStaticPairArrayMap< const char *, const char *, PCase_CStr > TCStringPairsMap
static bool s_init_UseGeoLocNameForCountry(void)
static void s_ProcessQualMapLine(const CTempString &line, TQualFixMap &qual_map)
CStaticArraySet< const char *, PCase_CStr > TCStrSet
static const SStaticPair< const char *, const char * > s_map_old_country_name_fixes[]
static const TStaticQualFixPair kCellTypePairs[]
static const TStaticQualFixPair kDevStagePairs[]
static string s_RemoveSpacesWithinNumbers(const string &old_str)
static bool s_QualFixupMapsInitialized
static const char * s_RemovableCultureNotes[]
CStaticPairArrayMap< const char *, const char *, PNocase_CStr > TParishMap
static string x_FindSurroundingOcean(string &water)
SStaticPair< const char *, const char * > TWaterPairElem
DEFINE_STATIC_FAST_MUTEX(s_CellLineContaminationMutex)
static bool exceptions_initialized
CStaticPairArrayMap< const char *, const char *, PNocase_CStr > TStaticQualFixMap
static void s_GetLatLong(const string &new_str, vector< double > &numbers, vector< int > &precision)
bool s_SuppressCountryFix(const string &test)
static bool s_IsNumber(const string &token, double *result=NULL)
static const TParishMapEntry parish_abbrev_array[]
bool s_ChooseMonthAndDay(const string &token1, const string &token2, bool month_first, string &month, int &day, bool &month_ambiguous)
static const TCStrSet s_CountriesSet(s_Countries, sizeof(s_Countries), __FILE__, __LINE__)
const char * sm_ValidSexQualifierPhrases[]
static const char *const s_Countries[]
map< string, TContaminatingCellLine > TSpeciesContaminant
CRowReader< CRowReaderStream_NCBI_TSV > TNCBITSVStream
static const SStaticPair< const char *, const char * > s_map_country_name_fixes[]
bool s_IsValidSexQualifierPhrase(const string &value)
bool s_CompressRunsOfSpaces(string &val)
static const SStaticPair< const char *, const char * > s_map_whole_country_fixes[]
static void s_ReorderNorthSouthEastWest(vector< double > &numbers, vector< int > &precision, const vector< string > &lat_long, vector< string > &nsew)
static const char * s_USAStates[]
SStaticPair< const char *, const char * > TStaticQualFixPair
pair< string, string > TContaminatingCellLine
MAKE_CONST_SET(s_Null_CollectionDatesSet, ct::tagStrCase, { "missing", "missing: control sample", "missing: data agreement established pre-2023", "missing: endangered species", "missing: human-identifiable", "missing: lab stock", "missing: sample group", "missing: synthetic construct", "missing: third party data", "not applicable", "not collected", "not provided", "restricted access", }) string CSubSource
CStaticPairArrayMap< const char *, const char *, PNocase_CStr > TStateMap
void s_CollectNumberAndUnits(const string &value, string &number, string &units)
bool s_ContainsWholeWord(const CTempString test, const CTempString word, NStr::ECase case_sense)
static double DistanceOnGlobe(double latA, double lonA, double latB, double lonB)
static const char *const s_Former_Countries[]
static const TStateMapEntry state_abbrev_array[]
static const size_t k_NumLatLonWaterText
static vector< string > x_Tokenize(const string &val)
static string NewFixCountry(const string &input, bool us_territories=false)
static bool WasValid(const string &country)
static string USAStateCleanup(const string &country)
static string WholeCountryFix(string country)
static void x_RemoveDelimitersFromEnds(string &val, bool except_paren=false)
static bool IsValid(const string &country)
static bool ContainsMultipleCountryNames(const string &phrase)
static bool IsSubstringOfStringInList(const string &phrase, const string &country1, size_t pos1)
static void x_FindCountryName(const TCStringPairsMap &fix_map, const vector< string > &countries, string &valid_country, string &orig_valid_country, bool &too_many_countries, bool &bad_cap)
static void ReadUSAExceptionMap(TUsaExceptionMap &exceptions, const string &filepath)
static bool ChangeExtraColonsToCommas(string &country)
static string CapitalizeFirstLetterOfEveryWord(const string &phrase)
static string CountryFixupItem(const string &input, bool capitalize_after_colon)
static string GetCorrectedCountryCapitalization(const string &country)
static void LoadUSAExceptionMap(const TUsaExceptionMap &exceptions)
void AddLine(const CCountryLine *line)
bool DoesOverlap(const CCountryExtreme *other_block) const
string GetLevel0(void) const
CCountryExtreme(const string &country_name, int min_x, int min_y, int max_x, int max_y)
string GetCountry(void) const
bool PreferTo(const CCountryExtreme *other_block, const string country, const string province, const bool prefer_new) const
string GetLevel1(void) const
static int ConvertLat(double y, double scale)
int x_ConvertLat(double y)
CCountryLine(const string &country_name, double y, double min_x, double max_x, double scale)
int x_ConvertLon(double x)
static int ConvertLon(double x, double scale)
ECompare Compare(const CDate &date) const
@ eCompare_before
*this comes first.
@ eCompare_after
*this comes second.
int GetClaimedDistance(void) const
string GetClosestProvince(void) const
void SetFullGuess(string guess)
string GetClaimedFull(void) const
string GetClosestWater(void) const
void SetGuessProvince(string guess)
CLatLonCountryId(float lat, float lon)
int TClassificationFlags
Bitwise OR of "EClassificationFlags".
string GetGuessCountry(void) const
string GetGuessWater(void) const
CLatLonCountryId::TClassificationFlags Classify(string country, string province)
string GetClosestFull(void) const
int GetLandDistance(void) const
string GetClosestCountry(void) const
string GetGuessProvince(void) const
void SetGuessCountry(string guess)
const CCountryExtreme * x_FindCountryExtreme(const string &country) const
CLatLonCountryMap(bool is_water)
size_t x_GetLatStartIndex(int y) const
static bool s_CompareTwoLinesByLatLonThenCountry(const CCountryLine *line1, const CCountryLine *line2)
static bool s_CompareTwoLinesByCountry(const CCountryLine *line1, const CCountryLine *line2)
TCountryLineList m_CountryLineList
static int AdjustAndRoundDistance(double distance, double scale)
static bool s_CompareTwoLinesByLatLonOnly(const CCountryLine *line1, const CCountryLine *line2)
const CCountryExtreme * IsNearLatLon(double lat, double lon, double range, double &distance, const string &country, const string &province=kEmptyStr) const
bool DoCountryBoxesOverlap(const string &country1, const string &country2) const
const CCountryExtreme * FindClosestToLatLon(double lat, double lon, double range, double &distance)
int TLatLonAdjustFlags
Bitwise OR of "ELatLonAdjustFlags".
const CCountryExtreme * GuessRegionForLatLon(double lat, double lon, const string &country=kEmptyStr, const string &province=kEmptyStr) const
bool IsCountryInLatLon(const string &country, double lat, double lon) const
bool x_InitFromFile(const string &filename)
bool HaveLatLonForRegion(const string &country) const
TCountryExtremeList m_CountryExtremes
TCountryLineList m_LatLonSortedList
bool IsClosestToLatLon(const string &country, double lat, double lon, double range, double &distance) const
void x_InitFromDefaultList(const char *const *list, int num)
vector< CCountryLine * > TCountryLineList
static CNcbiApplication * Instance(void)
Singleton method.
static string FixHostCapitalization(const string &value)
Callback style template to iterate over a row stream.
Root class for all serialization exceptions.
class CStaticArrayMap<> provides access to a static array in much the same way as CStaticArraySet<>,...
TBase::const_iterator const_iterator
const_iterator find(const key_type &key) const
Return a const_iterator pointing to the specified element, or to the end if the element is not found.
const_iterator end() const
Return the end of the controlled sequence.
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
TBase::const_iterator const_iterator
static bool IsISOFormatDate(const string &orig_date)
static string GetCollectionDateProblem(const string &date_string)
static bool NCBI_UseGeoLocNameForCountry(void)
static string FixTissueTypeCapitalization(const string &value)
static string FixLatLonPrecision(const string &orig)
static string x_RemoveIsoTime(const string &orig_date)
static string x_ParseDateRangeWithDelimiter(const string &orig_date, CTempString delim)
static string FixSexQualifierValue(const string &value)
static bool IsISOFormatTime(const string &orig_time, int &hour, int &min, int &sec, bool require_time_zone=true)
@ eDateFormatFlag_bad_format
@ eDateFormatFlag_in_future
@ eDateFormatFlag_out_of_order
static TSubtype GetSubtypeValue(const string &str, EVocabulary vocabulary=eVocabulary_raw)
static bool x_MeetsCommonChromosomeLinkageGroupPlasmidNameRules(const string &value, const string &taxname)
static bool IsValidSubtypeName(const string &str, EVocabulary vocabulary=eVocabulary_raw)
static string FixLatLonFormat(string orig_lat_lon, bool guess=false)
static unique_ptr< CLatLonCountryMap > m_LatLonWaterMap
static bool IsPlasmidNameValid(const string &value, const string &taxname)
static bool x_IsFixableIsoDate(const string &orig_date)
static CRef< CDate > GetDateFromISODate(const string &orig_date)
static string FixIsolationSourceCapitalization(const string &value)
static bool HasCultureNotes(const string &value)
static bool IsValidSexQualifierValue(const string &value)
static string FixCellTypeCapitalization(const string &value)
static vector< string > x_GetDateTokens(const string &orig_date)
void GetLabel(string *str) const
static bool IsMultipleValuesAllowed(TSubtype)
@ eLatLonCountryErr_Value
@ eLatLonCountryErr_State
@ eLatLonCountryErr_Water
@ eLatLonCountryErr_Country
static CLatLonCountryId * x_CalculateLatLonId(float lat_value, float lon_value, string country, string province)
static bool IsISOFormatDateOnly(const string &date)
static bool IsDayValueOkForMonth(int day, int month, int year)
Determine whether day number could occur in month.
static bool IsAltitudeValid(const string &value)
static string ValidateLatLonCountry(const string &countryname, string &lat_lon, bool check_state, ELatLonCountryErr &errcode)
static string FixDateFormat(const string &orig_date)
Attempt to fix the format of the date Returns a blank if the format of the date cannot be determined.
static string CheckCellLine(const string &cell_line, const string &organism)
static string MakeLatLon(double lat_value, double lon_value, int lat_precision=2, int lon_precision=2)
static bool IsCollectionDateAfterTime(const string &collection_date, time_t t, bool &bad_format)
static size_t CheckDateFormat(const string &date_string)
static string x_FormatWithPrecision(double val, int precision)
static string GetSubtypeName(CSubSource::TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
static int x_GetPrecision(const string &num_str)
static bool NeedsNoText(const TSubtype &subtype)
static bool IsEndogenousVirusNameValid(const string &value)
static bool IsChromosomeNameValid(const string &value, const string &taxname)
static bool x_GenericRepliconNameValid(const string &value)
static void IsCorrectLatLonFormat(string lat_lon, bool &format_correct, bool &precision_correct, bool &lat_in_range, bool &lon_in_range, double &lat_value, double &lon_value)
static CRef< CDate > DateFromCollectionDate(const string &str) THROWS((CException))
static bool IsSegmentValid(const string &value)
static string FixDevStageCapitalization(const string &value)
static unique_ptr< CLatLonCountryMap > m_LatLonCountryMap
static bool IsLinkageGroupNameValid(const string &value, const string &taxname)
static string FixAltitude(const string &value)
static bool IsDiscouraged(const TSubtype subtype)
static void RemoveCultureNotes(string &value, bool is_species_level=true)
static string FixLabHostCapitalization(const string &value)
static void IsCorrectDateFormat(const string &date_string, bool &bad_format, bool &in_future)
static void DetectDateFormat(const string &orig_date, bool &ambiguous, bool &day_first)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
container_type::iterator iterator
const_iterator end() const
const_iterator find(const key_type &key) const
const_iterator find(const key_type &key) const
static void check_state(const char name[], prfunc print, int erc)
#define test(a, b, c, d, e)
static char line1[1024 *16]
static char line2[1024 *16]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static SQLCHAR output[256]
static const char * str(char *buf, int n)
const CNcbiEnvironment & GetEnvironment(void) const
Get the application's cached environment.
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
constexpr size_t ArraySize(const Element(&)[Size])
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define NCBI_CATCH(message)
Catch CExceptions as well This macro is deprecated - use *_X or *_XX variant instead of it.
void Warning(CExceptionArgs_Base &args)
#define ENUM_METHOD_NAME(EnumName)
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
virtual string GetString(const string §ion, const string &name, const string &default_value, TFlags flags=0) const
Get the parameter string value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
NCBI_NS_STD::string::size_type SIZE_TYPE
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
Uint4 TUnicodeSymbol
Unicode character.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static EEncoding GuessEncoding(const CTempString &src)
Guess the encoding of the C/C++ string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
static string ParseEscapes(const CTempString str, EEscSeqRange mode=eEscSeqRange_Standard, char user_char='?')
Parse C-style escape sequences in the specified string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
const char * data(void) const
Return a pointer to the array represented.
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
static CStringUTF8 AsUTF8(const CTempString &src, EEncoding encoding, EValidate validate=eNoValidate)
Convert into UTF8 from a C/C++ string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static void TrimSuffixInPlace(string &str, const CTempString suffix, ECase use_case=eCase)
Trim suffix from a string (in-place)
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
size_type length(void) const
Return the length of the represented array.
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
static TUnicodeSymbol Decode(const char *&src)
Convert sequence of UTF8 code units into Unicode code point.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static void TrimPrefixInPlace(string &str, const CTempString prefix, ECase use_case=eCase)
Trim prefix from a string (in-place)
ECase
Which type of string comparison.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static string & ToUpper(string &str)
Convert string to upper case – string& version.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
static string & ToLower(string &str)
Convert string to lower case – string& version.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
@ eReverseSearch
Search in a backward direction.
@ eTrunc_Both
Truncate spaces at both begin and end of string.
@ eTrunc_Begin
Truncate leading spaces only.
@ eNocase
Case insensitive compare.
int DaysInMonth(void) const
Get number of days in the month.
time_t GetTimeT(void) const
Get time in time_t format.
static int MonthNameToNum(const string &month)
Get numerical value of the month by name.
static string MonthNumToName(int month, ENameFormat format=eFull)
Get name of the month by numerical value.
@ eAbbr
Use abbreviated name.
const TAttrib & GetAttrib(void) const
Get the Attrib member data.
TSubtype GetSubtype(void) const
Get the Subtype member data.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
void ResetName(void)
Reset Name data member.
TName & SetName(void)
Assign a value to Name data member.
const TName & GetName(void) const
Get the Name member data.
bool IsSetAttrib(void) const
attribution/source of this name Check if a value has been assigned to Attrib data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
@ eSubtype_collection_date
DD-MMM-YYYY format.
@ eSubtype_insertion_seq_name
@ eSubtype_transposon_name
@ eSubtype_fwd_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_lat_lon
+/- decimal degrees
@ eSubtype_rev_primer_name
@ eSubtype_collected_by
name of person who collected the sample
@ eSubtype_fwd_primer_name
@ eSubtype_rev_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_isolation_source
@ eSubtype_environmental_sample
@ eSubtype_identified_by
name of person who identified the sample
@ eSubtype_whole_replicon
void SetYear(TYear value)
Assign a value to Year data member.
void SetMonth(TMonth value)
Assign a value to Month data member.
TStd & SetStd(void)
Select the variant.
void SetDay(TDay value)
Assign a value to Day data member.
unsigned int
A callback function used to compare two keys in a database.
where both of them are integers Note
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
range(_Ty, _Ty) -> range< _Ty >
std::integral_constant< ncbi::NStr::ECase, ncbi::NStr::eCase > tagStrCase
constexpr auto sort(_Init &&init)
const GenericPointer< typename T::ValueType > T2 value
Defines: CTimeFormat - storage class for time format.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
static const char * suffix[]
Uint4 TFieldNo
Field number (zero based)
Generic utility macros and templates for exploring NCBI objects.
#define BEGIN_COMMA_END(container)
#define row(bind, expected)
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
string g_FindDataFile(const CTempString &name, CDirEntry::EType type=CDirEntry::eFile)
Look for an NCBI application data file or directory of the given name and type; in general,...
static const char * type_name(CS_INT value)