78 replace(type_name.begin(), type_name.end(),
'_',
'-');
80 type_name =
"unknown";
99 replace(name.begin(), name.end(),
'_',
'-');
100 replace(name.begin(), name.end(),
' ',
'-');
109 if (name ==
"insertion-seq") {
111 }
else if (name ==
"plasmid") {
113 }
else if (name ==
"transposon") {
115 }
else if (name ==
"sub-clone") {
129 replace(name.begin(), name.end(),
'_',
'-');
130 replace(name.begin(), name.end(),
' ',
'-');
140 if (name ==
"insertion-seq" ||
142 name ==
"transposon" ||
143 name ==
"sub-clone") {
234 if (month < 1 || month > 12 || day < 1) {
240 }
else if (year > 3000) {
242 }
else if (year < 1538) {
245 CTime month_o(year, month, 1);
257 "collection-date string is blank");
261 if (IsISOFormatDate(
str)) {
262 return GetDateFromISODate(
str);
275 month =
str.substr(0, pos);
276 year =
str.substr(pos + 1);
279 "collection-date string is improperly formatted");
282 day =
str.substr(0, pos);
283 month =
str.substr(pos + 1, pos2 - pos - 1);
284 year =
str.substr(pos2 + 1);
287 "collection-date string is improperly formatted");
298 "collection-date string has invalid month");
308 "collection-date string has invalid day value");
310 }
catch (
const exception& ) {
313 "collection-date string is improperly formatted");
319 "collection-date string is improperly formatted");
325 }
catch (
const exception& ) {
328 "collection-date string is improperly formatted");
338 if (year_val < 1000) {
340 "collection-date year is out of range");
343 if (year_val >= 2100) {
345 "collection-date year is out of range");
348 if (day_val > 0 && month_val > 0 && !IsDayValueOkForMonth(day_val, month_val, year_val)) {
350 "collection-date day is greater than monthly maximum");
383 bool in_future =
false;
384 vector<string> pieces;
386 if (pieces.size() > 2) {
389 ITERATE(vector<string>, it, pieces) {
425 vector<string> pieces;
427 if (pieces.size() > 2) {
430 }
else if (pieces.size() == 2) {
431 bool first_bad =
false;
432 bool first_future =
false;
433 bool second_bad =
false;
434 bool second_future =
false;
437 bad_format = first_bad || second_bad;
439 in_future = first_future || second_future;
452 size_t pos2 =
NStr::Find(date_string,
"-", pos + 1);
453 if (pos2 !=
NPOS && pos != 2) {
474 vector<string> pieces;
476 if (pieces.size() > 2) {
478 }
else if (pieces.size() == 2) {
503 size_t pos2 =
NStr::Find(date_string,
"-", pos + 1);
504 if (pos2 !=
NPOS && pos != 2) {
530 "missing: control sample",
531 "missing: data agreement established pre-2023",
532 "missing: endangered species",
533 "missing: human-identifiable",
534 "missing: lab stock",
535 "missing: sample group",
536 "missing: synthetic construct",
537 "missing: third party data",
547 if (s_Null_CollectionDatesSet.find(date_string.c_str()) != s_Null_CollectionDatesSet.end()) {
550 size_t rval = CheckDateFormat(date_string);
551 if (rval & eDateFormatFlag_bad_format) {
552 problem =
"Collection_date format is not in DD-Mmm-YYYY format";
553 }
else if (rval & eDateFormatFlag_in_future) {
554 problem =
"Collection_date is in the future";
555 }
else if (rval & eDateFormatFlag_out_of_order) {
556 problem =
"Collection_dates are out of order";
569 if (second_pos !=
NPOS) {
572 bool month_ambig =
false;
573 string first_date =
FixDateFormat(orig_date.substr(0, pos),
true, month_ambig);
577 string second_date =
FixDateFormat(orig_date.substr(pos + delim.
length()),
true, month_ambig);
581 string fix = first_date +
"/" + second_date;
588 bool month_ambiguous =
false;
590 string fix =
FixDateFormat(orig_date,
true, month_ambiguous);
591 if (month_ambiguous) {
594 static const char* delimiters[] = {
"/",
" to ",
" and ",
"-",
"_"};
618 if (require_time_zone) {
621 suffix = orig_time.length();
624 if (orig_time.substr(
suffix).length() != 6 ||
627 orig_time[
suffix + 3] !=
':' ||
644 if (!
isdigit((
unsigned char)orig_time[0]) || !
isdigit((
unsigned char)orig_time[1])) {
652 if (hour < 0 || hour > 23) {
660 if (!
isdigit((
unsigned char)orig_time[3]) || !
isdigit((
unsigned char)orig_time[4])) {
665 if (min < 0 || min > 59) {
674 if (!
isdigit((
unsigned char)orig_time[6]) || !
isdigit((
unsigned char)orig_time[7])) {
682 }
else if (sec > 59) {
707 if (cpy.length() != 10 && cpy.length() != 7) {
712 string::const_iterator it = cpy.begin();
713 while (it != cpy.end() && rval) {
714 if (pos == 4 || pos == 7) {
728 if (month < 1 || month > 12) {
731 if (cpy.length() == 10) {
747 string cpy = orig_date;
751 if (time_pos ==
NPOS) {
772 string cpy = orig_date;
775 if (time_pos !=
NPOS) {
776 cpy = cpy.substr(0, time_pos);
784 string cpy = orig_date;
787 if (time_pos ==
NPOS) {
800 string cpy = orig_date;
807 if (cpy.length() > 7) {
820 vector<string> tokens;
821 string token_delimiters =
" ,-/=_.";
823 string cpy = orig_date;
827 bool is_chars =
false;
829 if (token_delimiters.find(*s) !=
NPOS) {
831 tokens.push_back(curr_token);
835 }
else if (is_chars && !
isalpha((
unsigned char)(*s))) {
838 tokens.push_back(curr_token);
844 tokens.push_back(curr_token);
855 tokens.push_back(curr_token);
859 if (tokens.size() > 3) {
860 vector<string>::iterator p = tokens.begin();
861 bool prev_is_number =
isdigit((
unsigned char)(*p)[0]);
862 vector<string>::iterator s = p;
864 while (s != tokens.end()) {
865 if (prev_is_number &&
872 prev_is_number =
false;
876 prev_is_number =
isdigit((
unsigned char)(*p)[0]);
885 bool s_ChooseMonthAndDay(
const string& token1,
const string& token2,
bool month_first,
string& month,
int& day,
bool& month_ambiguous)
890 if (val1 > 12 && val2 > 12) {
893 }
else if (val1 < 13 && val2 < 13) {
900 month_ambiguous =
true;
909 }
else if (val1 < 13) {
925 string orig_date =
test;
934 string reformatted_date;
936 int year = 0, day = 0;
938 size_t num_original_tokens = 0;
940 month_ambiguous =
false;
943 num_original_tokens = tokens.size();
944 if (tokens.size() < 1 || tokens.size() > 3) {
950 vector<string>::iterator it = tokens.begin();
951 while (it != tokens.end()) {
963 }
else if (one_token.length() > 0
964 &&
isdigit((
unsigned char)one_token[0])
973 }
else if (
isalpha((
unsigned char)one_token[0])) {
978 if (one_token.length() > 3) {
979 one_token = one_token.substr(0, 3);
992 if (this_val <
min) {
994 }
else if (this_val >
max) {
1008 it = tokens.erase(it);
1014 if (tokens.size() == 0) {
1016 }
else if (tokens.size() > 2) {
1023 if (!
s_ChooseMonthAndDay(tokens[0], tokens[1], month_first, month, day, month_ambiguous)) {
1027 month_ambiguous =
true;
1032 }
else if (tokens.size() == 1) {
1039 if (
val > 0 &&
val < 13) {
1054 if (tokens.size() == 2) {
1067 if (val1 < 10 && !zero_pad_1 && (val2 > 10 || zero_pad_2)) {
1073 }
else if (val2 < 10 && !zero_pad_2 && (val1 > 10 || zero_pad_1)) {
1093 if (!
s_ChooseMonthAndDay(tokens[0], tokens[1], month_first, month, day, month_ambiguous)) {
1110 if (year > 0 && year < 100 && num_original_tokens > 1) {
1115 bool format_bad =
false;
1116 bool in_future =
false;
1124 if (year >= 1000 && year < 2100) {
1127 reformatted_date = month +
"-" + reformatted_date;
1130 if (day_str.length() < 2) {
1131 day_str =
"0" + day_str;
1133 reformatted_date = day_str +
"-" + reformatted_date;
1138 return reformatted_date;
1147 if (tokens.size() != 3) {
1156 ITERATE(vector<string>, it, tokens) {
1165 vector<int> positions;
1166 positions.push_back(0);
1167 positions.push_back(0);
1168 positions.push_back(0);
1171 ITERATE(vector<int>, it, nums) {
1173 if (positions[
eYear] > 0) {
1178 positions[
eYear] = token_pos;
1179 }
else if (*it > 12) {
1180 if (positions[
eDay] > 0) {
1185 positions[
eDay] = token_pos;
1186 }
else if (positions[
eMonth] > 0) {
1191 positions[
eMonth] = token_pos;
1204 bool& lat_in_range,
bool& lon_in_range,
1205 double& lat_value,
double& lon_value)
1207 format_correct =
false;
1208 lat_in_range =
false;
1209 lon_in_range =
false;
1210 precision_correct =
false;
1220 }
else if (sscanf (lat_lon.c_str(),
"%lf %c %lf %c%n", &ns, &lat, &ew, &lon, &processed) != 4
1221 ||
size_t(processed) != lat_lon.length()) {
1223 }
else if ((lat !=
'N' && lat !=
'S') || (lon !=
'E' && lon !=
'W')) {
1230 lat_value = 0.0 - ns;
1235 lon_value = 0.0 - ew;
1239 vector<string> pieces;
1241 if (pieces.size() > 3) {
1245 char reformatted[1000];
1246 sprintf (reformatted,
"%.*lf %c %.*lf %c", precision_lat, ns, lat,
1247 precision_lon, ew, lon);
1249 size_t len = strlen (reformatted);
1251 && (
len == lat_lon.length()
1252 || (
len < lat_lon.length()
1253 && lat_lon[
len] ==
';'))) {
1254 format_correct =
true;
1255 if (ns <= 90 && ns >= 0) {
1256 lat_in_range =
true;
1258 if (ew <= 180 && ew >= 0) {
1259 lon_in_range =
true;
1261 if (precision_lat < 3 && precision_lon < 3) {
1262 precision_correct =
true;
1272 bool format_correct =
false;
1273 bool precision_correct =
false;
1274 bool lat_in_range =
false;
1275 bool lon_in_range =
false;
1276 double lat_value = 0.0;
1277 double lon_value = 0.0;
1279 lat_in_range, lon_in_range,
1280 lat_value, lon_value);
1281 if (!format_correct || !lat_in_range || !lon_in_range || precision_correct) {
1284 vector<string> pieces;
1286 if (pieces.size() > 3) {
1289 if (precision_lat > 4) {
1292 if (precision_lon > 4) {
1296 char reformatted[1000];
1297 sprintf(reformatted,
"%.*lf %c %.*lf %c", precision_lat,
fabs(lat_value), pieces[1].c_str()[0],
1298 precision_lon,
fabs(lon_value), pieces[3].c_str()[0]);
1299 string new_val = reformatted;
1319 for (string::const_iterator
i = old_str.begin();
i != old_str.end(); ++
i)
1324 char c =
static_cast<char>(sym);
1325 if (!
isalpha(c) && !
isdigit(c) && c !=
'.' && c !=
'-' && c !=
'+')
1329 else if (!new_str.empty() &&
1336 if (!
isalpha(c) && !
isdigit(c) && c !=
'.' && c !=
'-' && c !=
'+')
1352 bool is_number =
true;
1353 for (string::const_iterator
i = old_str.begin();
i != old_str.end(); ++
i)
1358 char c =
static_cast<char>(sym);
1359 size_t j = new_str.size();
1360 if (j >= 4 && new_str[j-1] ==
' ' && new_str[j-2] ==
'.' && new_str[j-3] ==
' ' &&
isdigit(new_str[j-4]) &&
isdigit(c))
1368 if (!
isdigit(c) && c !=
'+' && c !=
'-' && c !=
'.' && !
isspace(c)) {
1399 static string s_NormalizeTokens(vector<string> &tokens, vector<double> &numbers, vector<string> &anum, vector<int> &
precision, vector<string> &lat_long, vector<string> &nsew)
1401 vector<string> pattern;
1402 for (
size_t i = 0;
i < tokens.size();
i++)
1404 string &token = tokens[
i];
1409 numbers.push_back(num);
1410 anum.push_back(token);
1411 pattern.push_back(
"1");
1416 =
static_cast<int>(token.length() - token.find(
'.') - 1);
1424 double num0, num1, num2;
1427 numbers.push_back(num0);
1428 anum.push_back(
tmp[0]);
1429 pattern.push_back(
"1");
1431 numbers.push_back(num1);
1432 anum.push_back(
tmp[1]);
1433 pattern.push_back(
"1");
1435 numbers.push_back(num2);
1436 anum.push_back(
tmp[2]);
1437 pattern.push_back(
"1");
1451 pattern.push_back(
"degrees");
1456 pattern.push_back(
"\'");
1461 pattern.push_back(
"\"");
1463 else if (token ==
"," || token ==
":" || token ==
"_" || token ==
"&" || token ==
"." || token ==
";" || token ==
"#" ||
NStr::EqualNocase(token,
"and"))
1468 pattern.push_back(
"lat");
1469 lat_long.push_back(
"lat");
1474 pattern.push_back(
"lat");
1475 lat_long.push_back(
"long");
1479 pattern.push_back(
"N");
1480 nsew.push_back(
"N");
1484 pattern.push_back(
"N");
1485 nsew.push_back(
"S");
1489 pattern.push_back(
"N");
1490 nsew.push_back(
"E");
1494 pattern.push_back(
"N");
1495 nsew.push_back(
"W");
1497 else if (token ==
"NW")
1499 nsew.push_back(
"N");
1500 nsew.push_back(
"W");
1502 else if (token ==
"NE")
1504 nsew.push_back(
"N");
1505 nsew.push_back(
"E");
1507 else if (token ==
"SW")
1509 nsew.push_back(
"S");
1510 nsew.push_back(
"W");
1512 else if (token ==
"SE")
1514 nsew.push_back(
"S");
1515 nsew.push_back(
"E");
1530 if (numbers.size() != 2)
1535 if (lat_long.size() == 2)
1537 if (lat_long.front() ==
"long")
1539 swap(numbers[0], numbers[1]);
1541 if (nsew.size() == 2) {
1542 swap(nsew[0], nsew[1]);
1546 else if (!lat_long.empty())
1551 if (nsew.size() == 2)
1553 if ((nsew[0] ==
"E" || nsew[0] ==
"W") &&
1554 (nsew[1] ==
"N" || nsew[1] ==
"S"))
1556 swap(numbers[0], numbers[1]);
1558 swap(nsew[0], nsew[1]);
1562 numbers[0] =
fabs(numbers[0]);
1564 else if (nsew[0] ==
"S")
1566 if (numbers[0] != 0)
1567 numbers[0] = -
fabs(numbers[0]);
1576 numbers[1] =
fabs(numbers[1]);
1578 else if (nsew[1] ==
"W")
1580 if (numbers[1] != 0)
1581 numbers[1] = -
fabs(numbers[1]);
1590 else if (!nsew.empty())
1595 if (lat_long.empty() && nsew.empty() &&
fabs(numbers[0]) > 90 &&
fabs(numbers[1]) < 90)
1597 swap(numbers[0], numbers[1]);
1600 if (
fabs(numbers[0]) > 90 ||
fabs(numbers[1]) > 180)
1609 vector<string> tokens;
1611 vector<string> lat_long;
1612 vector<string> nsew;
1613 vector<string> anum;
1615 if (pattern.empty())
1620 vector<double> degrees(2, 0);
1621 vector<int> prec(2, 0);
1624 if ( pattern ==
"1 1" ||
1625 pattern ==
"1 N 1 N" ||
1626 pattern ==
"N 1 N 1" ||
1627 pattern ==
"1 degrees N 1 degrees N" ||
1628 pattern ==
"lat 1 lat 1" ||
1629 pattern ==
"1 N lat 1 N lat" ||
1630 pattern ==
"1 degrees N lat 1 degrees N lat")
1632 degrees[0] = numbers[0];
1633 degrees[1] = numbers[1];
1637 else if ((pattern ==
"1 1 \" 1 1 '" ||
1638 pattern ==
"1 degrees 1 \" N 1 degrees 1 ' N")
1639 && numbers[1] < 60 && numbers[3] < 60
1640 && numbers[1] >= 0 && numbers[3] >= 0)
1642 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1643 sign2 = anum[2][0] ==
'-' ? -1 : 1;
1644 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 3600);
1645 degrees[1] = sign2*(
fabs(numbers[2]) + numbers[3] / 60);
1649 else if ( (pattern ==
"1 1 ' 1" ||
1650 pattern ==
"1 degrees 1 ' N 1 degrees N")
1654 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1655 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60);
1656 degrees[1] = numbers[2];
1660 else if (pattern ==
"1 1 ' 1 \" 1"
1661 && numbers[1] < 60 && numbers[2] < 60
1662 && numbers[1] >= 0 && numbers[2] >= 0)
1664 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1665 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);
1666 degrees[1] = numbers[3];
1670 else if ((pattern ==
"1 1 ' 1 \" 1 1 '" ||
1671 pattern ==
"1 1 1 N 1 1 N" ||
1672 pattern ==
"1 degrees 1 ' 1 \" N 1 degrees 1 ' N")
1673 && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60
1674 && numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0)
1676 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1677 sign2 = anum[3][0] ==
'-' ? -1 : 1;
1678 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);
1679 degrees[1] = sign2*(
fabs(numbers[3]) + numbers[4] / 60);
1683 else if (( pattern ==
"1 1 ' 1 \" 1 1 ' 1 \"" ||
1684 pattern ==
"1 1 ' 1 \" N 1 1 ' 1 \" N" ||
1685 pattern ==
"1 degrees 1 ' 1 \" 1 degrees 1 ' 1 \"" ||
1686 pattern ==
"1 degrees 1 ' 1 \" N 1 degrees 1 ' 1 \" N" ||
1687 pattern ==
"N 1 degrees 1 ' 1 \" N 1 degrees 1 ' 1 \"" ||
1688 pattern ==
"1 degrees 1 ' 1 N 1 degrees 1 ' 1 N" ||
1689 pattern ==
"1 degrees 1 1 N 1 degrees 1 1 N" ||
1690 pattern ==
"1 1 1 N 1 1 1 N")
1691 && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60 && numbers[5] < 60
1692 && numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0 && numbers[5] >= 0)
1694 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1695 sign2 = anum[3][0] ==
'-' ? -1 : 1;
1696 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);
1697 degrees[1] = sign2*(
fabs(numbers[3]) + numbers[4] / 60 + numbers[5] / 3600);
1701 else if (( pattern ==
"1 1 ' 1 1 '" ||
1702 pattern ==
"1 1 N 1 1 N" ||
1703 pattern ==
"1 1 ' N 1 1 ' N" ||
1704 pattern ==
"1 degrees 1 ' N 1 degrees 1 ' N" ||
1705 pattern ==
"lat 1 degrees 1 ' N lat 1 degrees 1 ' N" ||
1706 pattern ==
"1 degrees 1 N 1 degrees 1 N" ||
1707 pattern ==
"1 degrees 1 N 1 degrees 1 ' N" ||
1708 pattern ==
"1 degrees 1 ' N 1 degrees 1 N" ||
1709 pattern ==
"N 1 degrees 1 ' N 1 degrees 1" ||
1710 pattern ==
"N 1 degrees 1 ' N 1 degrees 1 '" ||
1711 pattern ==
"N 1 degrees 1 ' N 1 1 '")
1712 && numbers[1] < 60 && numbers[3] < 60
1713 && numbers[1] >= 0 && numbers[3] >= 0)
1715 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1716 sign2 = anum[2][0] ==
'-' ? -1 : 1;
1717 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60);
1718 degrees[1] = sign2*(
fabs(numbers[2]) + numbers[3] / 60);
1722 else if ((pattern ==
"1 N 1 1 N" ||
1723 pattern ==
"1 degrees N 1 degrees 1 ' N")
1727 sign2 = anum[1][0] ==
'-' ? -1 : 1;
1728 degrees[0] = numbers[0];
1729 degrees[1] = sign2*(
fabs(numbers[1]) + numbers[2] / 60);
1733 else if ((pattern ==
"1 degrees 1 ' 1 degrees 1 ' 1 \"" ||
1734 pattern ==
"N 1 1 N 1 1 1")
1735 && numbers[1] < 60 && numbers[3] < 60 && numbers[4] < 60
1736 && numbers[1] >= 0 && numbers[3] >= 0 && numbers[4] >= 0)
1738 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1739 sign2 = anum[2][0] ==
'-' ? -1 : 1;
1740 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60);
1741 degrees[1] = sign2*(
fabs(numbers[2]) + numbers[3] / 60 + numbers[4] / 3600);
1745 else if (pattern ==
"1 degrees 1 degrees 1 ' 1 \""
1746 && numbers[2] < 60 && numbers[3] < 60
1747 && numbers[2] >= 0 && numbers[3] >= 0)
1749 sign2 = anum[1][0] ==
'-' ? -1 : 1;
1750 degrees[0] = numbers[0];
1751 degrees[1] = sign2*(
fabs(numbers[1]) + numbers[2] / 60 + numbers[3] / 3600);
1755 else if (pattern ==
"1 degrees 1 ' 1 \" N 1 degrees 1 \" N"
1756 && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60
1757 && numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0)
1759 sign1 = anum[0][0] ==
'-' ? -1 : 1;
1760 sign2 = anum[3][0] ==
'-' ? -1 : 1;
1761 degrees[0] = sign1*(
fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);
1762 degrees[1] = sign2*(
fabs(numbers[3]) + numbers[4] / 3600);
1771 swap(degrees, numbers);
1779 string north_or_south;
1781 string east_or_west;
1787 if (ch < '0' || ch >
'9') {
1793 lat_lon_stream >> lat;
1794 lat_lon_stream >> north_or_south;
1795 lat_lon_stream >> lon;
1796 lat_lon_stream >> east_or_west;
1797 if( lat_lon_stream.bad() ) {
1801 if( north_or_south !=
"N" && north_or_south !=
"S" ) {
1805 if( east_or_west !=
"E" && east_or_west !=
"W" ) {
1811 size_t len = lat.length();
1812 if (pos + 9 <
len) {
1819 size_t len = lon.length();
1820 if (pos + 9 <
len) {
1825 return lat +
" " + north_or_south +
" " + lon +
" " + east_or_west;
1842 vector<double> numbers;
1846 if (!numbers.empty())
1859 if (lat_value < 0) {
1861 lat_value = -lat_value;
1864 if (lon_value < 0) {
1866 lon_value = -lon_value;
1873 string res = lat +
" " + ns +
" " + lon +
" " + ew;
1882 bool goodmatch =
false;
1888 id->SetGuessCountry(guess->
GetLevel0());
1889 id->SetGuessProvince(guess->
GetLevel1());
1896 guess =
m_LatLonWaterMap->GuessRegionForLatLon(lat_value, lon_value, country);
1906 double landdistance = 0.0;
1907 guess =
m_LatLonCountryMap->FindClosestToLatLon (lat_value, lon_value, 5.0, landdistance);
1910 id->SetClosestCountry(guess->
GetLevel0());
1911 id->SetClosestProvince(guess->
GetLevel1());
1920 double landdistance = 0.0;
1921 guess =
m_LatLonCountryMap->FindClosestToLatLon (lat_value, lon_value, 5.0, landdistance);
1924 id->SetClosestCountry(guess->
GetLevel0());
1925 id->SetClosestProvince(guess->
GetLevel1());
1933 double waterdistance = 0.0;
1934 guess =
m_LatLonWaterMap->FindClosestToLatLon (lat_value, lon_value, 5.0, waterdistance);
1936 id->SetClosestWater(guess->
GetLevel0());
1937 id->SetWaterDistance(
m_LatLonWaterMap->AdjustAndRoundDistance (waterdistance));
1947 double distance = 0.0;
1948 guess =
m_LatLonCountryMap->IsNearLatLon (lat_value, lon_value, 5.0, distance, country, province);
1952 id->SetGuessCountry(country);
1953 id->SetGuessProvince(province);
1960 guess =
m_LatLonWaterMap->IsNearLatLon (lat_value, lon_value, 5.0, distance, country, province);
1963 id->SetClaimedDistance(
m_LatLonWaterMap->AdjustAndRoundDistance (distance));
1975 {
"Adriatic Sea",
"Mediterranean Sea"},
1976 {
"Aegean Sea",
"Mediterranean Sea"},
1977 {
"Alboran Sea",
"Mediterranean Sea"},
1978 {
"Andaman Sea",
"Indian Ocean"},
1979 {
"Arabian Sea",
"Indian Ocean"},
1980 {
"Argentine Sea",
"Atlantic Ocean"},
1981 {
"Ariake Sea",
"Pacific Ocean"},
1982 {
"Baffin Bay",
"Atlantic Ocean"},
1983 {
"Balearic Sea",
"Mediterranean Sea"},
1984 {
"Baltic Sea",
"Atlantic Ocean"},
1985 {
"Barents Sea",
"Arctic Ocean"},
1986 {
"Bay of Bengal",
"Indian Ocean"},
1987 {
"Beaufort Sea",
"Arctic Ocean"},
1988 {
"Bering Sea",
"Pacific Ocean"},
1989 {
"Bismarck Sea",
"Pacific Ocean"},
1990 {
"Black Sea",
"Mediterranean Sea"},
1991 {
"Bohai Sea",
"Pacific Ocean"},
1992 {
"Caribbean Sea",
"Atlantic Ocean"},
1993 {
"Celebes Sea",
"Pacific Ocean"},
1994 {
"Champlain Sea",
"Atlantic Ocean"},
1995 {
"Chilean Sea",
"Pacific Ocean"},
1996 {
"China Seas",
"Pacific Ocean"},
1997 {
"Chukchi Sea",
"Arctic Ocean"},
1998 {
"Coral Sea",
"Pacific Ocean"},
1999 {
"Davis Strait",
"Atlantic Ocean"},
2000 {
"East China Sea",
"Pacific Ocean"},
2001 {
"East Siberian Sea",
"Arctic Ocean"},
2002 {
"English Channel",
"Atlantic Ocean"},
2003 {
"Erythraean Sea",
"Indian Ocean"},
2004 {
"Golfo de California",
"Pacific Ocean"},
2005 {
"Greenland Sea",
"Arctic Ocean"},
2006 {
"Gulf of Mexico",
"Atlantic Ocean"},
2007 {
"Gulf of Thailand",
"Pacific Ocean"},
2008 {
"Gulf of Tonkin",
"Pacific Ocean"},
2009 {
"Hudson Bay",
"Arctic Ocean"},
2010 {
"Ionian Sea",
"Mediterranean Sea"},
2011 {
"Irish Sea",
"Atlantic Ocean"},
2012 {
"Irminger Sea",
"Atlantic Ocean"},
2013 {
"James Bay",
"Atlantic Ocean"},
2014 {
"Java Sea",
"Indian Ocean"},
2015 {
"Kara Sea",
"Arctic Ocean"},
2016 {
"Koro Sea",
"Pacific Ocean"},
2017 {
"Labrador Sea",
"Atlantic Ocean"},
2018 {
"Laccadive Sea",
"Indian Ocean"},
2019 {
"Laptev Sea",
"Arctic Ocean"},
2020 {
"Ligurian Sea",
"Mediterranean Sea"},
2021 {
"Lincoln Sea",
"Arctic Ocean"},
2022 {
"Myrtoan Sea",
"Mediterranean Sea"},
2023 {
"North Sea",
"Atlantic Ocean"},
2024 {
"Norwegian Sea",
"Atlantic Ocean"},
2025 {
"Pechora Sea",
"Arctic Ocean"},
2026 {
"Persian Gulf",
"Indian Ocean"},
2027 {
"Philippine Sea",
"Pacific Ocean"},
2028 {
"Red Sea",
"Indian Ocean"},
2029 {
"Salish Sea",
"Pacific Ocean"},
2030 {
"Sargasso Sea",
"Atlantic Ocean"},
2031 {
"Scotia Sea",
"Southern Ocean"},
2032 {
"Sea of Azov",
"Black Sea"},
2033 {
"Sea of Chiloe",
"Pacific Ocean"},
2034 {
"Sea of Crete",
"Mediterranean Sea"},
2035 {
"Sea of Japan",
"Pacific Ocean"},
2036 {
"Sea of Okhotsk",
"Pacific Ocean"},
2037 {
"Sea of the Hebrides",
"Atlantic Ocean"},
2038 {
"Sea of Zanj",
"Indian Ocean"},
2039 {
"Seas of Greenland",
"Atlantic Ocean"},
2040 {
"Sethusamudram",
"Indian Ocean"},
2041 {
"Sibutu Passage",
"Pacific Ocean"},
2042 {
"Solomon Sea",
"Pacific Ocean"},
2043 {
"South China Sea",
"Pacific Ocean"},
2044 {
"Sulu Sea",
"Pacific Ocean"},
2045 {
"Tasman Sea",
"Pacific Ocean"},
2046 {
"Thracian Sea",
"Mediterranean Sea"},
2047 {
"Timor Sea",
"Indian Ocean"},
2048 {
"Tyrrhenian Sea",
"Mediterranean Sea"},
2049 {
"Wandel Sea",
"Arctic Ocean"},
2050 {
"White Sea",
"Arctic Ocean"},
2051 {
"Yellow Sea",
"Pacific Ocean"}
2060 if( new_water_pair_iter != sc_WaterPairMap.end() ) {
2061 return new_water_pair_iter->second;
2070 string countryname = input_countryname;
2076 static std::mutex m;
2078 std::lock_guard
g(m);
2089 bool format_correct, lat_in_range, lon_in_range, precision_correct;
2090 double lat_value = 0.0, lon_value = 0.0;
2092 lat_in_range, lon_in_range,
2093 lat_value, lon_value);
2094 if (!format_correct) {
2098 lat_lon = lat_lon.substr(0, pos);
2100 lat_in_range, lon_in_range,
2101 lat_value, lon_value);
2106 if (!format_correct || !lat_in_range || !lon_in_range) {
2116 countryname = countryname.substr(0, pos);
2120 countryname = countryname.substr(0, pos);
2125 countryname =
"Svalbard";
2128 string country = countryname;
2134 province = country.substr(pos + 1);
2137 country = country.substr(0, pos);
2168 string wguess =
id->GetGuessWater();
2169 string cguess =
id->GetGuessCountry();
2173 if (province.empty() &&
NStr::Equal(cguess, country)) {
2193 double neardist = 0.0;
2197 if (!
flags &&
m_LatLonCountryMap->IsNearLatLon(lat_value, lon_value, 2.0, neardist, country) && neardist < 5.0) {
2198 id->SetGuessCountry (country);
2200 flags =
id->Classify(country, province);
2204 && !
m_LatLonWaterMap->IsNearLatLon(lat_value, lon_value, 20.0, neardist, country)) {
2207 adjusted_flags = adjust_id ==
NULL ? 0 : adjust_id->
Classify(country, province);
2208 if (adjusted_flags) {
2214 flags = adjusted_flags;
2222 adjusted_flags = adjust_id ==
NULL ? 0 : adjust_id->
Classify(country, province);
2223 if (adjusted_flags) {
2229 flags = adjusted_flags;
2237 adjusted_flags = adjust_id ==
NULL ? 0 : adjust_id->
Classify(country, province);
2238 if (adjusted_flags) {
2244 flags = adjusted_flags;
2261 error =
"Latitude and longitude values appear to be exchanged";
2265 if (lat_value < 0.0) {
2266 error =
"Latitude should be set to N (northern hemisphere)";
2268 error =
"Latitude should be set to S (southern hemisphere)";
2273 if (lon_value < 0.0) {
2274 error =
"Longitude should be set to E (eastern hemisphere)";
2276 error =
"Longitude should be set to W (western hemisphere)";
2286 string full_guess =
id->GetFullGuess();
2289 error =
"Lat_lon " + lat_lon +
" is in " +
id->GetFullGuess()
2290 +
" (more specific than " + country +
")";
2295 bool suppress =
false;
2296 string reportregion;
2298 string desphrase =
"designated subregion ";
2299 string subphrase =
"another subregion ";
2300 string phrase = nosubphrase;
2301 bool show_claimed =
false;
2313 reportregion = countryname;
2319 reportregion =
id->GetClosestFull();
2321 reportregion =
id->GetClosestCountry();
2324 show_claimed =
true;
2327 string water =
id->GetGuessWater();
2340 }
else if (!suppress) {
2343 error =
"Lat_lon '" + lat_lon +
"' is closest to " + phrase +
"'" + reportregion +
"' at distance "
2345 +
" km, but in water '" +
id->GetGuessWater()
2346 +
"' - claimed region '" +
id->GetClaimedFull()
2349 error =
"Lat_lon '" + lat_lon +
"' is closest to " + phrase +
"'" + reportregion
2351 +
id->GetGuessWater() +
"'";
2354 }
else if (neardist > 0.0) {
2356 error =
"Lat_lon '" + lat_lon +
"' is in water '" +
id->GetGuessWater() +
"', '"
2360 error =
"Lat_lon '" + lat_lon +
"' is in water '" +
id->GetGuessWater() +
"'";
2363 string full_guess =
id->GetFullGuess();
2372 error =
"Lat_lon '" + lat_lon +
"' maps to '" +
id->GetFullGuess() +
"' instead of '"
2373 + countryname +
"'";
2377 error =
"Lat_lon '" + lat_lon +
"' maps to '" +
id->GetFullGuess() +
"' instead of '"
2378 + country +
"' - claimed region '" +
id->GetClaimedFull()
2386 error =
"Lat_lon '" + lat_lon +
"' maps to '" +
id->GetFullGuess() +
"' instead of '"
2387 + countryname +
"' - claimed region '" +
id->GetClaimedFull()
2396 error =
"Lat_lon '" + lat_lon +
"' is closest to '" +
id->GetClosestCountry() +
"' instead of '"
2397 + countryname +
"'";
2400 error =
"Lat_lon '" + lat_lon +
"' is closest to '" +
id->GetClosestWater() +
"' instead of '"
2401 + countryname +
"'";
2404 error =
"Unable to determine mapping for lat_lon '" + lat_lon +
"' and country '" + countryname +
"'";
2434 "pooled males and females",
2435 "pooled male and female",
2446 if (find(begin, end,
value) != end) {
2464 vector<string> words;
2466 if (words.size() == 0) {
2475 bool is_good =
false;
2477 ITERATE(vector<string>, w, words) {
2481 if (find(begin, end, *w) != end) {
2502 vector<string> words;
2505 if (words.size() == 0) {
2513 vector<string> good_values;
2514 bool pooled =
false;
2516 ITERATE(vector<string>, w, words) {
2523 if (find(begin, end, *w) != end) {
2525 good_values.push_back(
"male");
2527 good_values.push_back(
"female");
2529 good_values.push_back(*w);
2537 if (good_values.size() == 0) {
2542 string fixed = good_values[0];
2543 for (
size_t i = 1;
i < good_values.size();
i++) {
2544 if (good_values.size() > 2) {
2547 if (
i == good_values.size() - 1) {
2550 fixed +=
" " + good_values[
i];
2553 fixed =
"pooled " + fixed;
2568 string::const_iterator it =
value.begin();
2569 if (*it ==
'+' || *it ==
'-') {
2574 bool any_digit =
false;
2575 bool skip_comma =
true;
2576 while (it !=
value.end() && (
isdigit(*it) || *it ==
',')) {
2591 if (it ==
value.end()) {
2606 if (it ==
value.end() || *it !=
' ' || !any_digit) {
2612 while (it !=
value.end()) {
2650 char reformatted[1000];
2652 string rval = reformatted;
2681 rval =
number +
" " +
"m";
2699 }
else if (
value.length() > 240) {
2703 for (
auto it :
value) {
2752 string genus = taxname.substr(0, pos);
2757 string species = taxname.substr(pos + 1);
2761 if (pos != 1 ||
value[0] !=
'p') {
2782 }
else if (
value.length() > 32) {
2791 static string s_ForbiddenPhrases[] = {
2803 for (
auto it : s_ForbiddenPhrases) {
2867 if (s_PlasmidNameExceptions.
find(
value) != end(s_PlasmidNameExceptions)) {
2885 #include "cell_line.inc"
2889 vector<string> tokens;
2891 if (tokens.size() < 4) {
2893 <<
"; disregarding");
2910 size_t count =
sizeof(kCellLine) /
sizeof (*kCellLine);
2911 const char *
const * start = kCellLine;
2926 string cell_line_search = cell_line;
2930 rval =
"The International Cell Line Authentication Committee database indicates that " +
2931 cell_line +
" from " + organism +
" is known to be contaminated by " +
2934 ". Please see http://iclac.org/databases/cross-contaminations/ for more information and references.";
2955 "Antigua and Barbuda",
2960 "Ashmore and Cartier Islands",
2980 "Bosnia and Herzegovina",
2984 "British Virgin Islands",
2994 "Central African Republic",
2999 "Clipperton Island",
3004 "Coral Sea Islands",
3012 "Democratic Republic of the Congo",
3016 "Dominican Republic",
3020 "Equatorial Guinea",
3026 "Falkland Islands (Islas Malvinas)",
3033 "French Southern and Antarctic Lands",
3053 "Heard Island and McDonald Islands",
3075 "Juan de Nova Island",
3078 "Kerguelen Archipelago",
3106 "Mediterranean Sea",
3108 "Micronesia, Federated States of",
3133 "Northern Mariana Islands",
3151 "Republic of the Congo",
3159 "Saint Kitts and Nevis",
3162 "Saint Pierre and Miquelon",
3163 "Saint Vincent and the Grenadines",
3166 "Sao Tome and Principe",
3179 "South Georgia and the South Sandwich Islands",
3186 "State of Palestine",
3202 "Trinidad and Tobago",
3207 "Turks and Caicos Islands",
3212 "United Arab Emirates",
3221 "Wallis and Futuna",
3240 "Netherlands Antilles",
3241 "Serbia and Montenegro",
3244 "The former Yugoslav Republic of Macedonia",
3254 "missing: control sample",
3255 "missing: data agreement established pre-2023",
3256 "missing: endangered species",
3257 "missing: human-identifiable",
3258 "missing: lab stock",
3259 "missing: sample group",
3260 "missing: synthetic construct",
3261 "missing: third party data",
3271 string name = country;
3272 size_t pos = country.find(
':');
3274 if ( pos !=
NPOS ) {
3275 if (pos == country.length() - 1) {
3278 name = country.substr(0, pos);
3296 string name = country;
3297 size_t pos = country.find(
':');
3299 if ( pos !=
NPOS ) {
3300 name = country.substr(0, pos);
3301 if (pos == country.length() - 1) {
3306 is_miscapitalized =
false;
3321 is_miscapitalized =
true;
3327 is_miscapitalized =
true;
3333 is_miscapitalized =
true;
3344 string name = country;
3345 size_t pos = country.find(
':');
3347 if ( pos !=
NPOS ) {
3348 name = country.substr(0, pos);
3358 string name = country;
3359 size_t pos = country.find(
':');
3361 if ( pos !=
NPOS ) {
3362 name = country.substr(0, pos);
3365 is_miscapitalized =
false;
3374 is_miscapitalized =
true;
3386 {
"england",
"United Kingdom: England"},
3387 {
"great britain",
"United Kingdom: Great Britain"},
3388 {
"new jersey, usa",
"USA: New Jersey"}
3395 {
"AFG",
"Afghanistan"},
3397 {
"AIA",
"Anguilla"},
3398 {
"ALA",
"Aland Islands"},
3401 {
"ARE",
"United Arab Emirates"},
3402 {
"ARG",
"Argentina"},
3404 {
"ASM",
"American Samoa"},
3405 {
"ATA",
"Antarctica"},
3406 {
"ATF",
"French Southern Territories"},
3407 {
"ATG",
"Antigua and Barbuda"},
3408 {
"AUS",
"Australia"},
3410 {
"AZE",
"Azerbaijan"},
3411 {
"Antigua & Barbuda",
"Antigua and Barbuda"},
3412 {
"Ashmore & Cartier Islands",
"Ashmore and Cartier Islands"},
3416 {
"BES",
"Bonaire, Sint Eustatius and Saba"},
3417 {
"BFA",
"Burkina Faso"},
3418 {
"BGD",
"Bangladesh"},
3419 {
"BGR",
"Bulgaria"},
3422 {
"BIH",
"Bosnia and Herzegovina"},
3423 {
"BLM",
"Saint Barthelemy"},
3429 {
"BRB",
"Barbados"},
3432 {
"BVT",
"Bouvet Island"},
3433 {
"BWA",
"Botswana"},
3434 {
"Brasil",
"Brazil"},
3435 {
"CAF",
"Central African Republic"},
3437 {
"CCK",
"Cocos Islands"},
3438 {
"CHE",
"Switzerland"},
3441 {
"CIV",
"Cote d'Ivoire"},
3442 {
"CMR",
"Cameroon"},
3443 {
"COD",
"Democratic Republic of the Congo"},
3444 {
"COG",
"Republic of the Congo"},
3445 {
"COK",
"Cook Islands"},
3446 {
"COL",
"Colombia"},
3448 {
"CPV",
"Cape Verde"},
3449 {
"CRI",
"Costa Rica"},
3452 {
"CXR",
"Christmas Island"},
3453 {
"CYM",
"Cayman Islands"},
3455 {
"CZE",
"Czech Republic"},
3456 {
"Cape Verde Islands",
"Cape Verde"},
3458 {
"DJI",
"Djibouti"},
3459 {
"DMA",
"Dominica"},
3461 {
"DOM",
"Dominican Republic"},
3463 {
"Democratic Republic of Congo",
"Democratic Republic of the Congo"},
3467 {
"ESH",
"Western Sahara"},
3470 {
"ETH",
"Ethiopia"},
3473 {
"FLK",
"Falkland Islands (Islas Malvinas)"},
3475 {
"FRO",
"Faroe Islands"},
3476 {
"FSM",
"Micronesia, Federated States of"},
3477 {
"Falkland Islands",
"Falkland Islands (Islas Malvinas)"},
3478 {
"French Southern & Antarctic Lands",
"French Southern and Antarctic Lands"},
3480 {
"GBR",
"United Kingdom"},
3482 {
"GGY",
"Guernsey"},
3484 {
"GIB",
"Gibraltar"},
3486 {
"GLP",
"Guadeloupe"},
3488 {
"GNB",
"Guinea-Bissau"},
3489 {
"GNQ",
"Equatorial Guinea"},
3492 {
"GRL",
"Greenland"},
3493 {
"GTM",
"Guatemala"},
3494 {
"GUF",
"French Guiana"},
3497 {
"HKG",
"Hong Kong"},
3498 {
"HMD",
"Heard Island and McDonald Islands"},
3499 {
"HND",
"Honduras"},
3503 {
"Heard Island & McDonald Islands",
"Heard Island and McDonald Islands"},
3504 {
"IDN",
"Indonesia"},
3505 {
"IMN",
"Isle of Man"},
3507 {
"IOT",
"British Indian Ocean Territory"},
3514 {
"Ivory Coast",
"Cote d'Ivoire"},
3519 {
"KAZ",
"Kazakhstan"},
3521 {
"KGZ",
"Kyrgyzstan"},
3522 {
"KHM",
"Cambodia"},
3523 {
"KIR",
"Kiribati"},
3524 {
"KNA",
"Saint Kitts and Nevis"},
3525 {
"KOR",
"South Korea"},
3527 {
"LAO",
"Lao People's Democratic Republic"},
3530 {
"LBY",
"Libyan Arab Jamahiriya"},
3531 {
"LCA",
"Saint Lucia"},
3532 {
"LIE",
"Liechtenstein"},
3533 {
"LKA",
"Sri Lanka"},
3535 {
"LTU",
"Lithuania"},
3536 {
"LUX",
"Luxembourg"},
3538 {
"La Reunion Island",
"Reunion"},
3539 {
"Luxemburg",
"Luxembourg"},
3541 {
"MAF",
"Saint Martin (French part)"},
3545 {
"MDG",
"Madagascar"},
3546 {
"MDV",
"Maldives"},
3548 {
"MHL",
"Marshall Islands"},
3549 {
"MKD",
"North Macedonia"},
3553 {
"MNE",
"Montenegro"},
3554 {
"MNG",
"Mongolia"},
3555 {
"MNP",
"Northern Mariana Islands"},
3556 {
"MOZ",
"Mozambique"},
3557 {
"MRT",
"Mauritania"},
3558 {
"MSR",
"Montserrat"},
3559 {
"MTQ",
"Martinique"},
3560 {
"MUS",
"Mauritius"},
3562 {
"MYS",
"Malaysia"},
3564 {
"Macedonia",
"North Macedonia"},
3566 {
"NCL",
"New Caledonia"},
3568 {
"NFK",
"Norfolk Island"},
3570 {
"NIC",
"Nicaragua"},
3572 {
"NLD",
"Netherlands"},
3576 {
"NZL",
"New Zealand"},
3577 {
"Netherland",
"Netherlands"},
3578 {
"New Guinea",
"Papua New Guinea"},
3580 {
"P, R, China",
"China"},
3581 {
"P.R. China",
"China"},
3582 {
"P.R.China",
"China"},
3583 {
"PAK",
"Pakistan"},
3585 {
"PCN",
"Pitcairn"},
3587 {
"PHL",
"Philippines"},
3589 {
"PNG",
"Papua New Guinea"},
3591 {
"PRI",
"Puerto Rico"},
3592 {
"PRK",
"North Korea"},
3593 {
"PRT",
"Portugal"},
3594 {
"PRY",
"Paraguay"},
3595 {
"PSE",
"Palestinian Territory"},
3596 {
"PYF",
"French Polynesia"},
3597 {
"People's Republic of China",
"China"},
3598 {
"Pr China",
"China"},
3599 {
"Prchina",
"China"},
3605 {
"Republic of Congo",
"Republic of the Congo"},
3606 {
"SAU",
"Saudi Arabia"},
3609 {
"SGP",
"Singapore"},
3610 {
"SGS",
"South Georgia and the South Sandwich Islands"},
3611 {
"SHN",
"Saint Helena"},
3612 {
"SJM",
"Svalbard and Jan Mayen"},
3613 {
"SLB",
"Solomon Islands"},
3614 {
"SLE",
"Sierra Leone"},
3615 {
"SLV",
"El Salvador"},
3616 {
"SMR",
"San Marino"},
3618 {
"SPM",
"Saint Pierre and Miquelon"},
3620 {
"SSD",
"South Sudan"},
3621 {
"STP",
"Sao Tome and Principe"},
3622 {
"SUR",
"Suriname"},
3623 {
"SVK",
"Slovakia"},
3624 {
"SVN",
"Slovenia"},
3626 {
"SWZ",
"Eswatini"},
3627 {
"SXM",
"Sint Maarten (Dutch part)"},
3628 {
"SYC",
"Seychelles"},
3629 {
"SYR",
"Syrian Arab Republic"},
3630 {
"Saint Kitts & Nevis",
"Saint Kitts and Nevis"},
3631 {
"Saint Pierre & Miquelon",
"Saint Pierre and Miquelon"},
3632 {
"Saint Vincent & Grenadines",
"Saint Vincent and the Grenadines"},
3633 {
"Saint Vincent & the Grenadines",
"Saint Vincent and the Grenadines"},
3634 {
"Saint Vincent and Grenadines",
"Saint Vincent and the Grenadines"},
3635 {
"San Tome and Principe Island",
"Sao Tome and Principe"},
3636 {
"Sao Tome & Principe",
"Sao Tome and Principe"},
3637 {
"South Georgia & South Sandwich Islands",
"South Georgia and the South Sandwich Islands"},
3638 {
"South Georgia & the South Sandwich Islands",
"South Georgia and the South Sandwich Islands"},
3639 {
"St Helena",
"Saint Helena"},
3640 {
"St Lucia",
"Saint Lucia"},
3641 {
"St Pierre and Miquelon",
"Saint Pierre and Miquelon"},
3642 {
"St Vincent and the Grenadines",
"Saint Vincent and the Grenadines"},
3643 {
"St. Helena",
"Saint Helena"},
3644 {
"St. Lucia",
"Saint Lucia"},
3645 {
"St. Pierre and Miquelon",
"Saint Pierre and Miquelon"},
3646 {
"St. Vincent and the Grenadines",
"Saint Vincent and the Grenadines"},
3647 {
"TCA",
"Turks and Caicos Islands"},
3650 {
"THA",
"Thailand"},
3651 {
"TJK",
"Tajikistan"},
3653 {
"TKM",
"Turkmenistan"},
3654 {
"TLS",
"Timor-Leste"},
3656 {
"TTO",
"Trinidad and Tobago"},
3661 {
"TZA",
"Tanzania"},
3662 {
"The Netherlands",
"Netherlands"},
3663 {
"Trinidad & Tobago",
"Trinidad and Tobago"},
3664 {
"Turks & Caicos",
"Turks and Caicos Islands"},
3665 {
"Turks & Caicos Islands",
"Turks and Caicos Islands"},
3666 {
"Turks and Caicos",
"Turks and Caicos Islands"},
3669 {
"UK",
"United Kingdom"},
3671 {
"UMI",
"United States Minor Outlying Islands"},
3673 {
"UZB",
"Uzbekistan"},
3674 {
"United States",
"USA"},
3675 {
"United States of America",
"USA"},
3676 {
"VAT",
"Holy See (Vatican City State)"},
3677 {
"VCT",
"Saint Vincent and the Grenadines"},
3678 {
"VEN",
"Venezuela"},
3679 {
"VGB",
"British Virgin Islands"},
3680 {
"VIR",
"Virgin Islands"},
3681 {
"VNM",
"Viet Nam"},
3683 {
"Vietnam",
"Viet Nam"},
3684 {
"WLF",
"Wallis and Futuna"},
3687 {
"ZAF",
"South Africa"},
3689 {
"ZWE",
"Zimbabwe"},
3690 {
"the Netherlands",
"Netherlands"}
3697 {
"Burma",
"Myanmar"},
3698 {
"Siam",
"Thailand"}
3704 {
"Antigua",
"Antigua and Barbuda: Antigua"},
3705 {
"Ashmore Island",
"Ashmore and Cartier Islands: Ashmore Island"},
3706 {
"Autonomous Region of the Azores",
"Portugal: Azores"},
3707 {
"Azores",
"Portugal: Azores"},
3708 {
"Barbuda",
"Antigua and Barbuda: Barbuda"},
3709 {
"Bassas da India",
"French Southern and Antarctic Lands: Bassas da India"},
3710 {
"Caicos Islands",
"Turks and Caicos Islands: Caicos Islands"},
3711 {
"Canary Islands",
"Spain: Canary Islands"},
3712 {
"Cartier Island",
"Ashmore and Cartier Islands: Cartier Island"},
3713 {
"East Germany",
"Germany: East Germany"},
3714 {
"El Hierro",
"Spain: El Hierro"},
3715 {
"Europa Island",
"French Southern and Antarctic Lands: Europa Island"},
3716 {
"Fuerteventura",
"Spain: Fuerteventura"},
3717 {
"Glorioso Islands",
"French Southern and Antarctic Lands: Glorioso Islands"},
3718 {
"Gran Canaria",
"Spain: Gran Canaria"},
3719 {
"Grenadines",
"Saint Vincent and the Grenadines: Grenadines"},
3720 {
"Heard Island",
"Heard Island and McDonald Islands: Heard Island"},
3721 {
"Ile Amsterdam",
"French Southern and Antarctic Lands: Ile Amsterdam"},
3722 {
"Ile Saint-Paul",
"French Southern and Antarctic Lands: Ile Saint-Paul"},
3723 {
"Iles Crozet",
"French Southern and Antarctic Lands: Iles Crozet"},
3724 {
"Iles Kerguelen",
"French Southern and Antarctic Lands: Iles Kerguelen"},
3725 {
"Juan de Nova Island",
"French Southern and Antarctic Lands: Juan de Nova Island"},
3726 {
"La Gomera",
"Spain: La Gomera"},
3727 {
"La Graciosa",
"Spain: La Graciosa"},
3728 {
"La Palma",
"Spain: La Palma"},
3729 {
"Lanzarote",
"Spain: Lanzarote"},
3730 {
"Madeira",
"Portugal: Madeira"},
3731 {
"McDonald Island",
"Heard Island and McDonald Islands: McDonald Island"},
3732 {
"McDonald Islands",
"Heard Island and McDonald Islands: McDonald Islands"},
3733 {
"Miquelon",
"Saint Pierre and Miquelon: Miquelon"},
3734 {
"Nevis",
"Saint Kitts and Nevis: Nevis"},
3735 {
"Principe",
"Sao Tome and Principe: Principe"},
3736 {
"Saint Kitts",
"Saint Kitts and Nevis: Saint Kitts"},
3737 {
"Saint Pierre",
"Saint Pierre and Miquelon: Saint Pierre"},
3738 {
"Saint Vincent",
"Saint Vincent and the Grenadines: Saint Vincent"},
3739 {
"Sao Tome",
"Sao Tome and Principe: Sao Tome"},
3740 {
"Scotland",
"United Kingdom: Scotland"},
3741 {
"South Sandwich Islands",
"South Georgia and the South Sandwich Islands: South Sandwich Islands"},
3742 {
"St Kitts",
"Saint Kitts and Nevis: Saint Kitts"},
3743 {
"St Pierre",
"Saint Pierre and Miquelon: Saint Pierre"},
3744 {
"St Thomas",
"USA: Saint Thomas"},
3745 {
"St Vincent",
"Saint Vincent and the Grenadines: Saint Vincent"},
3746 {
"St. Kitts",
"Saint Kitts and Nevis: Saint Kitts"},
3747 {
"St. Pierre",
"Saint Pierre and Miquelon: Saint Pierre"},
3748 {
"St. Thomas",
"USA: Saint Thomas"},
3749 {
"St. Vincent",
"Saint Vincent and the Grenadines: Saint Vincent"},
3750 {
"Tenerife",
"Spain: Tenerife"},
3751 {
"Tobago",
"Trinidad and Tobago: Tobago"},
3752 {
"Trinidad",
"Trinidad and Tobago: Trinidad"},
3753 {
"Tromelin Island",
"French Southern and Antarctic Lands: Tromelin Island"},
3754 {
"Turks Islands",
"Turks and Caicos Islands: Turks Islands"},
3755 {
"Wales",
"United Kingdom: Wales"},
3756 {
"West Germany",
"Germany: West Germany"},
3771 "District of Columbia",
3818 vector<string> words;
3820 for(vector<string>::iterator word = words.begin(); word != words.end(); ++word)
3821 if (!word->empty() &&
isalpha(word->at(0)))
3822 word->at(0) = (
unsigned char)
toupper(word->at(0));
3830 if (found != k_whole_country_fixes.end()) {
3831 new_country = found->second;
3836 for (
size_t i = 0;
i < num_states; ++
i) {
3851 string country2(*c);
3855 while (pos2 !=
NPOS)
3857 if (pos2 <= pos1 && pos2+country2.length() >= pos1+country1.length())
3868 int num_matches = 0;
3875 if (!((pos+country.length()<phrase.length() &&
isalpha(phrase[pos+country.length()]))
3876 || (pos > 0 &&
isalpha(phrase[pos-1]))
3883 return (num_matches > 1);
3901 bool any_found =
true;
3902 while (!
val.empty() && any_found) {
3921 size_t len =
val.length();
3926 }
else if (
len > 5) {
3928 bool do_remove =
true;
3929 size_t pos =
val.length() - 2;
3931 while (dist < 4 && do_remove) {
3950 vector<string> tokens;
3953 vector<string>::iterator it = tokens.begin();
3954 while (it != tokens.end()) {
3956 if (pos !=
NPOS && pos > 3 && (*it).length() - pos > 4) {
3957 string first = (*it).substr(0, pos);
3958 string remainder = (*it).substr(pos + 1);
3960 size_t len_to_space =
first.length();
3961 while (space_pos !=
NPOS) {
3963 len_to_space =
first.length();
3966 if (len_to_space > 4) {
3967 (*it) = (*it).substr(0, pos);
3968 it = tokens.insert(it, remainder);
3983 size_t tlen =
test.length();
3984 size_t wlen = word.
length();
3987 while (pos !=
NPOS) {
3988 size_t p = start + pos;
3989 if ( (p == 0 || !
isalpha((
unsigned char)
test[p - 1])) &&
3990 (p + wlen >= tlen || !
isalpha((
unsigned char)
test[p + wlen])) ) {
4013 const vector<string>& countries,
4014 string& valid_country,
4015 string& orig_valid_country,
4016 bool& too_many_countries,
4019 for (
auto country : countries) {
4020 if (!country.empty() && !too_many_countries)
4022 string check = country;
4026 bool check_has_bad_cap =
false;
4029 if (valid_country.empty())
4031 valid_country =
check;
4032 orig_valid_country =
check;
4033 bad_cap = check_has_bad_cap;
4037 too_many_countries =
true;
4043 if (found != fix_map.
end())
4045 if (valid_country.empty())
4047 valid_country = found->second;
4048 orig_valid_country =
check;
4052 too_many_countries =
true;
4064 if (
val.length() == 0)
return false;
4066 char *
str =
new char[
sizeof(char) * (
val.length() + 1)];
4067 strcpy(
str,
val.c_str());
4078 while (ch !=
'\0') {
4111 {
"Acadia Parish",
"Acadia Parish" },
4112 {
"AcadiaParish",
"Acadia Parish" },
4113 {
"Allen Parish",
"Allen Parish" },
4114 {
"AllenParish",
"Allen Parish" },
4115 {
"Ascension Parish",
"Ascension Parish" },
4116 {
"AscensionParish",
"Ascension Parish" },
4117 {
"Assumption Parish",
"Assumption Parish" },
4118 {
"AssumptionParish",
"Assumption Parish" },
4119 {
"Avoyelles Parish",
"Avoyelles Parish" },
4120 {
"AvoyellesParish",
"Avoyelles Parish" },
4121 {
"Beauregard Parish",
"Beauregard Parish" },
4122 {
"BeauregardParish",
"Beauregard Parish" },
4123 {
"Bienville Parish",
"Bienville Parish" },
4124 {
"BienvilleParish",
"Bienville Parish" },
4125 {
"Bossier Parish",
"Bossier Parish" },
4126 {
"BossierParish",
"Bossier Parish" },
4127 {
"Caddo Parish",
"Caddo Parish" },
4128 {
"CaddoParish",
"Caddo Parish" },
4129 {
"Calcasieu Parish",
"Calcasieu Parish" },
4130 {
"CalcasieuParish",
"Calcasieu Parish" },
4131 {
"Caldwell Parish",
"Caldwell Parish" },
4132 {
"CaldwellParish",
"Caldwell Parish" },
4133 {
"Cameron Parish",
"Cameron Parish" },
4134 {
"CameronParish",
"Cameron Parish" },
4135 {
"Catahoula Parish",
"Catahoula Parish" },
4136 {
"CatahoulaParish",
"Catahoula Parish" },
4137 {
"Claiborne Parish",
"Claiborne Parish" },
4138 {
"ClaiborneParish",
"Claiborne Parish" },
4139 {
"Concordia Parish",
"Concordia Parish" },
4140 {
"ConcordiaParish",
"Concordia Parish" },
4141 {
"DeSoto Parish",
"DeSoto Parish" },
4142 {
"DeSotoParish",
"DeSoto Parish" },
4143 {
"East Baton Rouge Parish",
"East Baton Rouge Parish" },
4144 {
"East Carroll Parish",
"East Carroll Parish" },
4145 {
"East Feliciana Parish",
"East Feliciana Parish" },
4146 {
"EastBatonRougeParish",
"East Baton Rouge Parish" },
4147 {
"EastCarrollParish",
"East Carroll Parish" },
4148 {
"EastFelicianaParish",
"East Feliciana Parish" },
4149 {
"Evangeline Parish",
"Evangeline Parish" },
4150 {
"EvangelineParish",
"Evangeline Parish" },
4151 {
"Franklin Parish",
"Franklin Parish" },
4152 {
"FranklinParish",
"Franklin Parish" },
4153 {
"Grant Parish",
"Grant Parish" },
4154 {
"GrantParish",
"Grant Parish" },
4155 {
"Iberia Parish",
"Iberia Parish" },
4156 {
"IberiaParish",
"Iberia Parish" },
4157 {
"Iberville Parish",
"Iberville Parish" },
4158 {
"IbervilleParish",
"Iberville Parish" },
4159 {
"Jackson Parish",
"Jackson Parish" },
4160 {
"JacksonParish",
"Jackson Parish" },
4161 {
"Jefferson Davis Parish",
"Jefferson Davis Parish" },
4162 {
"Jefferson Parish",
"Jefferson Parish" },
4163 {
"JeffersonDavisParish",
"Jefferson Davis Parish" },
4164 {
"JeffersonParish",
"Jefferson Parish" },
4165 {
"Lafayette Parish",
"Lafayette Parish" },
4166 {
"LafayetteParish",
"Lafayette Parish" },
4167 {
"Lafourche Parish",
"Lafourche Parish" },
4168 {
"LafourcheParish",
"Lafourche Parish" },
4169 {
"LaSalle Parish",
"LaSalle Parish" },
4170 {
"LaSalleParish",
"LaSalle Parish" },
4171 {
"Lincoln Parish",
"Lincoln Parish" },
4172 {
"LincolnParish",
"Lincoln Parish" },
4173 {
"Livingston Parish",
"Livingston Parish" },
4174 {
"LivingstonParish",
"Livingston Parish" },
4175 {
"Madison Parish",
"Madison Parish" },
4176 {
"MadisonParish",
"Madison Parish" },
4177 {
"Morehouse Parish",
"Morehouse Parish" },
4178 {
"MorehouseParish",
"Morehouse Parish" },
4179 {
"Natchitoches Parish",
"Natchitoches Parish" },
4180 {
"NatchitochesParish",
"Natchitoches Parish" },
4181 {
"Orleans Parish",
"Orleans Parish" },
4182 {
"OrleansParish",
"Orleans Parish" },
4183 {
"Ouachita Parish",
"Ouachita Parish" },
4184 {
"OuachitaParish",
"Ouachita Parish" },
4185 {
"Plaquemines Parish",
"Plaquemines Parish" },
4186 {
"PlaqueminesParish",
"Plaquemines Parish" },
4187 {
"Pointe Coupee Parish",
"Pointe Coupee Parish" },
4188 {
"PointeCoupeeParish",
"Pointe Coupee Parish" },
4189 {
"Rapides Parish",
"Rapides Parish" },
4190 {
"RapidesParish",
"Rapides Parish" },
4191 {
"Red River Parish",
"Red River Parish" },
4192 {
"RedRiverParish",
"Red River Parish" },
4193 {
"Richland Parish",
"Richland Parish" },
4194 {
"RichlandParish",
"Richland Parish" },
4195 {
"Sabine Parish",
"Sabine Parish" },
4196 {
"SabineParish",
"Sabine Parish" },
4197 {
"St. Bernard Parish",
"St. Bernard Parish" },
4198 {
"St. Charles Parish",
"St. Charles Parish" },
4199 {
"St. Helena Parish",
"St. Helena Parish" },
4200 {
"St. James Parish",
"St. James Parish" },
4201 {
"St. John the Baptist Parish",
"St. John the Baptist Parish" },
4202 {
"St. Landry Parish",
"St. Landry Parish" },
4203 {
"St. Martin Parish",
"St. Martin Parish" },
4204 {
"St. Mary Parish",
"St. Mary Parish" },
4205 {
"St. Tammany Parish",
"St. Tammany Parish" },
4206 {
"St.BernardParish",
"St. Bernard Parish" },
4207 {
"St.CharlesParish",
"St. Charles Parish" },
4208 {
"St.HelenaParish",
"St. Helena Parish" },
4209 {
"St.JamesParish",
"St. James Parish" },
4210 {
"St.JohntheBaptistParish",
"St. John the Baptist Parish" },
4211 {
"St.LandryParish",
"St. Landry Parish" },
4212 {
"St.MartinParish",
"St. Martin Parish" },
4213 {
"St.MaryParish",
"St. Mary Parish" },
4214 {
"St.TammanyParish",
"St. Tammany Parish" },
4215 {
"Tangipahoa Parish",
"Tangipahoa Parish" },
4216 {
"TangipahoaParish",
"Tangipahoa Parish" },
4217 {
"Tensas Parish",
"Tensas Parish" },
4218 {
"TensasParish",
"Tensas Parish" },
4219 {
"Terrebonne Parish",
"Terrebonne Parish" },
4220 {
"TerrebonneParish",
"Terrebonne Parish" },
4221 {
"Union Parish",
"Union Parish" },
4222 {
"UnionParish",
"Union Parish" },
4223 {
"Vermilion Parish",
"Vermilion Parish" },
4224 {
"VermilionParish",
"Vermilion Parish" },
4225 {
"Vernon Parish",
"Vernon Parish" },
4226 {
"VernonParish",
"Vernon Parish" },
4227 {
"Washington Parish",
"Washington Parish" },
4228 {
"WashingtonParish",
"Washington Parish" },
4229 {
"Webster Parish",
"Webster Parish" },
4230 {
"WebsterParish",
"Webster Parish" },
4231 {
"West Baton Rouge Parish",
"West Baton Rouge Parish" },
4232 {
"West Carroll Parish",
"West Carroll Parish" },
4233 {
"West Feliciana Parish",
"West Feliciana Parish" },
4234 {
"WestBatonRougeParish",
"West Baton Rouge Parish" },
4235 {
"WestCarrollParish",
"West Carroll Parish" },
4236 {
"WestFelicianaParish",
"West Feliciana Parish" },
4237 {
"Winn Parish",
"Winn Parish" },
4238 {
"WinnParish",
"Winn Parish" }
4246 if ( parish.empty() ) {
4251 if ( parish_find_iter != parishAbbrevMap.end() ) {
4253 parish = parish_find_iter->second;
4263 {
"AL",
"Alabama" },
4264 {
"Alabama",
"Alabama" },
4265 {
"Alaska",
"Alaska" },
4266 {
"American Samoa",
"American Samoa" },
4267 {
"AR",
"Arkansas" },
4268 {
"Arizona",
"Arizona" },
4269 {
"Arkansas",
"Arkansas" },
4270 {
"AS",
"American Samoa" },
4271 {
"AZ",
"Arizona" },
4272 {
"CA",
"California" },
4273 {
"California",
"California" },
4274 {
"CO",
"Colorado" },
4275 {
"Colorado",
"Colorado" },
4276 {
"Connecticut",
"Connecticut" },
4277 {
"CT",
"Connecticut" },
4278 {
"DC",
"District of Columbia" },
4279 {
"DE",
"Delaware" },
4280 {
"Delaware",
"Delaware" },
4281 {
"District of Columbia",
"District of Columbia" },
4282 {
"FL",
"Florida" },
4283 {
"Florida",
"Florida" },
4284 {
"GA",
"Georgia" },
4285 {
"Georgia",
"Georgia" },
4288 {
"Hawaii",
"Hawaii" },
4292 {
"Idaho",
"Idaho" },
4293 {
"IL",
"Illinois" },
4294 {
"Illinois",
"Illinois" },
4295 {
"IN",
"Indiana" },
4296 {
"Indiana",
"Indiana" },
4298 {
"Kansas",
"Kansas" },
4299 {
"Kentucky",
"Kentucky" },
4301 {
"KY",
"Kentucky" },
4302 {
"LA",
"Louisiana" },
4303 {
"Louisiana",
"Louisiana" },
4304 {
"MA",
"Massachusetts" },
4305 {
"Maine",
"Maine" },
4306 {
"Maryland",
"Maryland" },
4307 {
"Massachusetts",
"Massachusetts" },
4308 {
"MD",
"Maryland" },
4310 {
"MI",
"Michigan" },
4311 {
"Michigan",
"Michigan" },
4312 {
"Minnesota",
"Minnesota" },
4313 {
"Mississippi",
"Mississippi" },
4314 {
"Missouri",
"Missouri" },
4315 {
"MN",
"Minnesota" },
4316 {
"MO",
"Missouri" },
4317 {
"Montana",
"Montana" },
4318 {
"MS",
"Mississippi" },
4319 {
"MT",
"Montana" },
4320 {
"NC",
"North Carolina" },
4321 {
"ND",
"North Dakota" },
4322 {
"NE",
"Nebraska" },
4323 {
"Nebraska",
"Nebraska" },
4324 {
"Nevada",
"Nevada" },
4325 {
"New Hampshire",
"New Hampshire" },
4326 {
"New Jersey",
"New Jersey" },
4327 {
"New Mexico",
"New Mexico" },
4328 {
"New York",
"New York" },
4329 {
"NH",
"New Hampshire" },
4330 {
"NJ",
"New Jersey" },
4331 {
"NM",
"New Mexico" },
4332 {
"North Carolina",
"North Carolina" },
4333 {
"North Dakota",
"North Dakota" },
4335 {
"NY",
"New York" },
4338 {
"OK",
"Oklahoma" },
4339 {
"Oklahoma",
"Oklahoma" },
4341 {
"Oregon",
"Oregon" },
4342 {
"PA",
"Pennsylvania" },
4343 {
"Pennsylvania",
"Pennsylvania" },
4344 {
"PR",
"Puerto Rico" },
4345 {
"Puerto Rico",
"Puerto Rico" },
4346 {
"Rhode Island",
"Rhode Island" },
4347 {
"RI",
"Rhode Island" },
4348 {
"SC",
"South Carolina" },
4349 {
"SD",
"South Dakota" },
4350 {
"South Carolina",
"South Carolina" },
4351 {
"South Dakota",
"South Dakota" },
4352 {
"Tennessee",
"Tennessee" },
4353 {
"Texas",
"Texas" },
4354 {
"TN",
"Tennessee" },
4356 {
"US Virgin Islands",
"US Virgin Islands" },
4359 {
"VA",
"Virginia" },
4360 {
"Vermont",
"Vermont" },
4361 {
"VI",
"US Virgin Islands" },
4362 {
"Virgin Islands",
"US Virgin Islands" },
4363 {
"Virginia",
"Virginia" },
4364 {
"VT",
"Vermont" },
4365 {
"WA",
"Washington" },
4366 {
"Washington",
"Washington" },
4367 {
"West Virginia",
"West Virginia" },
4368 {
"WI",
"Wisconsin" },
4369 {
"Wisconsin",
"Wisconsin" },
4370 {
"WV",
"West Virginia" },
4371 {
"WY",
"Wyoming" },
4372 {
"Wyoming",
"Wyoming" }
4380 if (
state.empty() ) {
4384 string original =
state;
4385 string working =
state;
4402 if ( state_find_iter != stateAbbrevMap.end() ) {
4404 state = state_find_iter->second;
4417 if ( country.empty() ) {
4422 string original = country;
4423 string working = country;
4427 working = working.substr ( 1, working.length() - 2 );
4453 vector<string> components;
4457 if ( components.size() < 1 ) {
4462 for (
size_t j = 0; j < components.size(); j++ ) {
4481 for (
int j = 0; j < components.size(); j++ ) {
4482 bool modified =
false;
4483 if (
s_IsState ( components[j], modified )) {
4508 res.append (
"USA: ");
4513 res.append ( components[
match] );
4517 for (
size_t j = 0; j < components.size(); j++ ) {
4518 if ( j ==
match)
continue;
4520 res.append ( components[j] );
4528 }
else if ( num_states > 1 ) {
4544 if ( ! exception_file.empty()) {
4547 for (
const auto & row : my_stream ) {
4548 TFieldNo number_of_fields = row. GetNumberOfFields();
4549 if ( number_of_fields != 2 )
continue;
4550 string fr = row[0].Get<
string>();
4551 string to = row[1].Get<
string>();
4552 exceptions [fr] = to;
4563 for (
const auto & itm : exceptions ) {
4564 string fr = itm.first;
4565 string to = itm.second;
4572 if ( ! f1.empty() && ! f2.empty()) {
4573 fr = f1 +
": " + f2;
4584 if ( ! exception_file.empty()) {
4595 string working = country;
4601 if ( ! corrected.empty()) {
4653 if (!usa1.empty() && !usa2.empty()) {
4657 input =
"USA: " + usa2;
4661 auto old_name_fix = k_old_country_name_fixes.find(
input.c_str());
4662 if (old_name_fix != k_old_country_name_fixes.end()) {
4663 input = old_name_fix->second;
4667 if (us_territories) {
4686 if (!new_country.empty())
4689 bool too_many_countries =
false;
4690 bool bad_cap =
false;
4692 string valid_country;
4693 string orig_valid_country;
4695 x_FindCountryName(k_country_name_fixes, countries, valid_country, orig_valid_country, too_many_countries, bad_cap);
4696 if (valid_country.empty()) {
4697 x_FindCountryName(k_subregion_fixes, countries, valid_country, orig_valid_country, too_many_countries, bad_cap);
4700 if (!valid_country.empty() && !too_many_countries)
4703 if (!valid_country.empty() && too_many_countries && valid_country ==
input)
4708 new_country = str1+
": "+str2;
4712 else if(!valid_country.empty() && !too_many_countries)
4717 string before =
input.substr(0,pos);
4722 string after =
input.substr(pos+orig_valid_country.length());
4726 else new_country = valid_country;
4727 if (!before.empty() || !after.empty()) {
4729 new_country +=
": ";
4731 new_country +=
", ";
4734 if (!before.empty())
4735 new_country += before;
4736 if (!before.empty() && !after.empty() && !
NStr::Equal(after,
")"))
4737 new_country +=
", ";
4739 new_country += after;
4752 for (
size_t i = 0;
i < country.length();
i++) {
4753 if (country[
i] ==
':') {
4768 string new_country = country;
4770 if (country_end_pos !=
NPOS)
4773 while (country[pos] ==
',' || country[pos] ==
':' ||
isspace((
unsigned char)country[pos]))
4777 string after = country.substr(pos);
4778 if (after.empty()) {
4779 if (pos > country_end_pos) {
4780 new_country = country.substr(0, country_end_pos);
4784 if (capitalize_after_colon)
4786 new_country = country.substr(0,country_end_pos);
4787 new_country +=
": " + after;
4799 {
"adult",
"adult" },
4801 {
"juvenile",
"juvenile" },
4802 {
"larva",
"larva" }
4813 if (it != sc_DevStagePairs.end()) {
4821 {
"hemocyte",
"hemocyte" },
4822 {
"hepatocyte",
"hepatocyte" },
4823 {
"lymphocyte",
"lymphocyte" },
4824 {
"neuroblast",
"neuroblast" }
4834 if (it != sc_CellTypePairs.end()) {
4849 vector<CTempString> tokens;
4851 if (tokens.size() > 1) {
4852 qual_map[tokens[0]] = tokens[1];
4858 const char **built_in,
size_t num_built_in,
4863 if (!
file.empty()) {
4870 if (built_in ==
NULL) {
4873 if (getenv(
"NCBI_DEBUG")) {
4874 ERR_POST(
Note <<
"Falling back on built-in data for " + data_name);
4876 for (
size_t i = 0;
i < num_built_in;
i++) {
4877 const char *p = built_in[
i];
4882 if (getenv(
"NCBI_DEBUG")) {
4887 }
while (!lr->
AtEOF());
4891 #include "isolation_sources.inc"
4921 for (
size_t i = 0;
i <
max;
i++) {
4948 for (
size_t i = 0;
i <
max;
i++) {
4971 string new_val =
value;
5019 const string& name =
GetName();
5082 "[BankIt_uncultured16S_wizard]; [universal primers]; [tgge]",
5083 "[BankIt_uncultured16S_wizard]; [universal primers]; [dgge]",
5084 "[BankIt_uncultured16S_wizard]; [universal primers]",
5085 "[BankIt_cultured16S_wizard]",
5086 "[BankIt_organellerRNA_wizard]",
5087 "[BankIt_ITS_wizard]; [rRNAITS_notfound]",
5088 "[BankIt_ITS_wizard]",
5089 "[uncultured (using universal primers)]",
5090 "[uncultured (using universal primers) bacterial source]",
5091 "[cultured bacterial source]",
5092 "[enrichment culture bacterial source]",
5093 "[mixed bacterial source (cultured and uncultured)]",
5094 "[uncultured]; [universal primers]",
5095 "[mixed bacterial source]",
5097 "[cDNA derived from mRNA, purified viral particles]",
5098 "[cDNA derived from mRNA, whole cell/tissue lysate]",
5099 "[cDNA derived from genomic RNA, whole cell/tissue lysate]",
5100 "[cDNA derived from genomic RNA, purified viral particles]",
5101 "[universal primers]",
5102 "[uncultured; wizard]",
5103 "[uncultured; wizard; spans unknown]",
5104 "[cultured; wizard]",
5105 "[cultured; wizard; spans unknown]",
5106 "[intergenic wizard]",
5107 "[intergenic wizard; spans unknown]",
5108 "[Microsatellite wizard]",
5109 "[Microsatellite wizard; multiple repeats]",
5111 "[D-loop wizard; spans unknown]",
5112 "[D-loop wizard; spans known]",
5117 "[BankIt_uncultured16S_wizard]; [species_specific primers]; [tgge]",
5118 "[BankIt_uncultured16S_wizard]; [species_specific primers]; [dgge]",
5119 "[BankIt_uncultured16S_wizard]; [species_specific primers]",
5120 "[uncultured (with species-specific primers)]",
5121 "[uncultured]; [amplified with species-specific primers]",
5122 "[uncultured (using species-specific primers) bacterial source]",
5123 "[amplified with species-specific primers]",
5132 if (pos != string::npos) {
5153 size_t remove_len = to_remove.length();
5155 while (pos !=
NPOS) {
5156 size_t extra_len = strspn (
value.c_str() + pos + remove_len,
" ;");
5157 value =
value.substr(0, pos) +
value.substr(pos + remove_len + extra_len);
5169 if (is_species_level) {
5172 value =
"amplified with species-specific primers";
5193 (
const string & country_name,
double y,
double min_x,
double max_x,
double scale)
5194 : m_CountryName(country_name) ,
5209 #define EPSILON 0.001
5267 : m_CountryName(country_name) , m_MinX (min_x), m_MinY (min_y), m_MaxX(max_x), m_MaxY (max_y)
5275 m_Level0 = country_name.substr(0, pos);
5277 m_Level1 = country_name.substr(pos + 1);
5351 && m_MaxX <= other_block->
GetMaxX()
5353 && m_MinY <= other_block->
GetMaxY()) {
5409 m_WaterDistance(-1),
5410 m_ClaimedDistance(-1)
5478 #include "lat_lon_country.inc"
5481 #include "lat_lon_water.inc"
5486 if (getenv(
"NCBI_DEBUG")) {
5487 ERR_POST(
Note <<
"Falling back on built-in data for latlon / water data.");
5492 string current_country;
5494 for (
int i = 0;
i < num;
i++) {
5496 if (line[0] ==
'-') {
5498 }
else if (
isalpha ((
unsigned char)line[0])) {
5499 current_country = line;
5500 }
else if (
isdigit ((
unsigned char)line[0])) {
5503 vector<string> tokens;
5505 if (tokens.size() > 3) {
5507 for (
size_t j = 2; j < tokens.size() - 1; j+=2) {