CPP_DOC/doxyhtml/SubSource_8cpp_source.html

 /* $Id: SubSource.cpp 102247 2024-04-10 22:44:36Z kans $

  * ===========================================================================

  *

  *                            PUBLIC DOMAIN NOTICE

  *               National Center for Biotechnology Information

  *

  *  This software/database is a "United States Government Work" under the

  *  terms of the United States Copyright Act.  It was written as part of

  *  the author's official duties as a United States Government employee and

  *  thus cannot be copyrighted.  This software/database is freely available

  *  to the public for use. The National Library of Medicine and the U.S.

  *  Government have not placed any restriction on its use or reproduction.

  *

  *  Although all reasonable efforts have been taken to ensure the accuracy

  *  and reliability of the software and data, the NLM and the U.S.

  *  Government do not and cannot warrant the performance or results that

  *  may be obtained by using this software or data. The NLM and the U.S.

  *  Government disclaim all warranties, express or implied, including

  *  warranties of performance, merchantability or fitness for any particular

  *  purpose.

  *

  *  Please cite the author in any work or product based on this material.

  *

  * ===========================================================================

  *

  * Author:  .......

  *

  * File Description:

  *   .......

  *

  * Remark:

  *   This code was originally generated by application DATATOOL

  *   using the following specifications:

  *   'seqfeat.asn'.

  */


 // standard includes

 #include <ncbi_pch.hpp>

 #include <serial/enumvalues.hpp>


 // generated includes

 #include <objects/seqfeat/SubSource.hpp>


 #include <math.h>

 #include <objects/misc/sequence_util_macros.hpp>

 #include <corelib/ncbitime.hpp>


 #include <util/row_reader_ncbi_tsv.hpp>

 #include <mutex>

 #include <util/compile_time.hpp>


 // generated classes


 BEGIN_NCBI_SCOPE


 BEGIN_objects_SCOPE // namespace ncbi::objects::


 unique_ptr<CLatLonCountryMap> CSubSource::m_LatLonCountryMap;

 unique_ptr<CLatLonCountryMap> CSubSource::m_LatLonWaterMap;


 // destructor

 CSubSource::~CSubSource(void)

 {

 }


 static bool s_init_UseGeoLocNameForCountry(void)

 {

     if (! CNcbiApplication::Instance()) {

         return false;

     }


     const CNcbiEnvironment& env = CNcbiApplication::Instance()->GetEnvironment();

     string fromEnv = env.Get("NCBI_GEO_LOC_NAME_FOR_COUNTRY");

     NStr::ToLower(fromEnv);

     if (fromEnv == "true") {

         return true;

     } else if (fromEnv == "false") {

         return false;

     }


     const CNcbiRegistry& reg = CNcbiApplication::Instance()->GetConfig();

     string fromConfig = reg.GetString("OrgSubSource", "UseGeoLocNameForCountry", "off");

     NStr::ToLower(fromConfig);

     if (fromConfig == "1" || fromConfig == "on" || fromConfig == "true" || fromConfig == "yes") {

         return true;

     }


     return false;

 }


 bool CSubSource::NCBI_UseGeoLocNameForCountry(void)

 {

     static bool value = s_init_UseGeoLocNameForCountry();

     return value;

 }


 void CSubSource::GetLabel(string* str) const

 {

     *str += '/';

     string type_name;

     if (GetSubtype() == eSubtype_other) {

         type_name = "other";

     } else {

         try {

             // eVocabulary_insdc has some special cases not (historically)

             // used here.

             type_name = GetSubtypeName(GetSubtype());

             replace(type_name.begin(), type_name.end(), '_', '-');

         } catch (const CSerialException&) {

             type_name = "unknown";

         }

     }

     *str += type_name;

     *str += '=';

     *str += GetName();

     if (IsSetAttrib()) {

         *str += " (";

         *str += GetAttrib();

         *str += ")";

     }

 }


 CSubSource::TSubtype CSubSource::GetSubtypeValue(const string& str,

                                                  EVocabulary vocabulary)

 {

     string name = NStr::TruncateSpaces(str);

     NStr::ToLower(name);

     replace(name.begin(), name.end(), '_', '-');

     replace(name.begin(), name.end(), ' ', '-');


     if ( NStr::EqualNocase(name, "note") ||

          NStr::EqualNocase(name, "subsource-note") ||

          NStr::EqualNocase(name, "subsrc-note") ||

          NStr::EqualNocase(name, "note-subsource")) {

         return eSubtype_other;

     } else if (vocabulary == eVocabulary_insdc) {

         // consider a table if more special cases arise.

         if (name == "insertion-seq") {

             return eSubtype_insertion_seq_name;

         } else if (name == "plasmid") {

             return eSubtype_plasmid_name;

         } else if (name == "transposon") {

             return eSubtype_transposon_name;

         } else if (name == "sub-clone") {

             return eSubtype_subclone;

         }

     }

     return ENUM_METHOD_NAME(ESubtype)()->FindValue(name);

 }


 bool CSubSource::IsValidSubtypeName(const string& str,

                                     EVocabulary vocabulary)

 {


     string name = NStr::TruncateSpaces(str);

     NStr::ToLower(name);

     replace(name.begin(), name.end(), '_', '-');

     replace(name.begin(), name.end(), ' ', '-');


     if ( NStr::EqualNocase(name, "note") ||

          NStr::EqualNocase(name, "subsource-note") ||

          NStr::EqualNocase(name, "subsrc-note") ||

          NStr::EqualNocase(name, "note-subsource")) {

          return true;

     }

     if (vocabulary == eVocabulary_insdc) {

         // consider a table if more special cases arise.

         if (name == "insertion-seq" ||

             name == "plasmid" ||

             name == "transposon" ||

             name == "sub-clone") {

             return true;

         }

     }

     return ENUM_METHOD_NAME(ESubtype)()->IsValidName(name);

 }


 string CSubSource::GetSubtypeName(CSubSource::TSubtype stype,

                                   EVocabulary vocabulary)

 {

     if (stype == CSubSource::eSubtype_other) {

         return "note";

     } else if (vocabulary == eVocabulary_insdc) {

         switch (stype) {

         case eSubtype_subclone:           return "sub_clone";

         case eSubtype_plasmid_name:       return "plasmid";

         case eSubtype_transposon_name:    return "transposon";

         case eSubtype_insertion_seq_name: return "insertion_seq";

         default:

             return NStr::Replace

                 (ENUM_METHOD_NAME(ESubtype)()->FindName(stype, true),

                  "-", "_");

         }

     } else {

         return ENUM_METHOD_NAME(ESubtype)()->FindName(stype, true);

     }

 }


 bool CSubSource::IsMultipleValuesAllowed(TSubtype subtype)

 {

     return subtype != eSubtype_chromosome

         && subtype != eSubtype_sex

         && subtype != eSubtype_germline

         && subtype != eSubtype_rearranged

         && subtype != eSubtype_plasmid_name

         && subtype != eSubtype_segment

         && subtype != eSubtype_country

         && subtype != eSubtype_transgenic

         && subtype != eSubtype_environmental_sample

         && subtype != eSubtype_lat_lon

         && subtype != eSubtype_collection_date

         && subtype != eSubtype_collected_by

         && subtype != eSubtype_identified_by

         && subtype != eSubtype_fwd_primer_seq

         && subtype != eSubtype_rev_primer_seq

         && subtype != eSubtype_fwd_primer_name

         && subtype != eSubtype_rev_primer_name

         && subtype != eSubtype_metagenomic

         && subtype != eSubtype_altitude

         && subtype != eSubtype_clone;

 }


 bool CSubSource::NeedsNoText(const TSubtype& subtype)

 {

     if (subtype == eSubtype_germline

         || subtype == eSubtype_rearranged

         || subtype == eSubtype_transgenic

         || subtype == eSubtype_environmental_sample

         || subtype == eSubtype_metagenomic) {

         return true;

     } else {

         return false;

     }

 }


 bool CSubSource::IsDiscouraged(const TSubtype subtype)

 {

     if (subtype == eSubtype_frequency

         || subtype == eSubtype_insertion_seq_name

         || subtype == eSubtype_phenotype

         || subtype == eSubtype_plastid_name

         || subtype == eSubtype_transposon_name

         || subtype == eSubtype_fwd_primer_seq

         || subtype == eSubtype_rev_primer_seq

         || subtype == eSubtype_fwd_primer_name

         || subtype == eSubtype_rev_primer_name

         || subtype == eSubtype_whole_replicon) {  // metagenomic subsrc qualifier taken off this list: GB-3384

         return true;

     } else {

         return false;

     }

 }


 bool CSubSource::IsDayValueOkForMonth(int day, int month, int year)

 {

     if (month < 1 || month > 12 || day < 1) {

         return false;

     }

     bool rval = true;

     if (year < 100) {

         year += 2000;

     } else if (year > 3000) {

         return false;

     } else if (year < 1538) {

         return false;

     }

     CTime month_o(year, month, 1);

     if (day > month_o.DaysInMonth()) {

         rval = false;

     }

     return rval;

 }


 CRef<CDate> CSubSource::DateFromCollectionDate (const string& test) THROWS((CException))

 {

     if (NStr::IsBlank(test)) {

         NCBI_THROW (CException, eUnknown,

                         "collection-date string is blank");

     }

     string str = NStr::TruncateSpaces(test);


     if (IsISOFormatDate(str)) {

         return GetDateFromISODate(str);

     }


     size_t pos = NStr::Find(str, "-");

     string year;

     string month;

     string day;


     if (pos == NPOS) {

         year = str;

     } else {

         size_t pos2 = NStr::Find(str, "-", pos + 1);

         if (pos2 == NPOS) {

             month = str.substr(0, pos);

             year = str.substr(pos + 1);

             if (NStr::IsBlank(month)) {

                 NCBI_THROW (CException, eUnknown,

                                 "collection-date string is improperly formatted");

             }

         } else {

             day = str.substr(0, pos);

             month = str.substr(pos + 1, pos2 - pos - 1);

             year = str.substr(pos2 + 1);

             if (NStr::IsBlank(month) || NStr::IsBlank(day)) {

                 NCBI_THROW (CException, eUnknown,

                                 "collection-date string is improperly formatted");

             }

         }

     }


     int month_val = 0;

     if (!NStr::IsBlank(month)) {

         try {

             month_val = CTime::MonthNameToNum(month);

         } catch (const CTimeException&) {

             NCBI_THROW (CException, eUnknown,

                             "collection-date string has invalid month");

         }

     }


     int day_val = 0;

     if (!NStr::IsBlank(day)) {

         try {

             day_val = NStr::StringToInt (day);

             if (day_val < 1) {

                 NCBI_THROW (CException, eUnknown,

                                 "collection-date string has invalid day value");

             }

         } catch ( const exception& ) {

             // threw exception while converting to int

             NCBI_THROW (CException, eUnknown,

                             "collection-date string is improperly formatted");

         }

     }


     if (NStr::IsBlank(year)) {

         NCBI_THROW (CException, eUnknown,

                         "collection-date string is improperly formatted");

     }


     int year_val = 0;

     try {

         year_val = NStr::StringToInt (year);

     } catch ( const exception& ) {

         // threw exception while converting to int

         NCBI_THROW (CException, eUnknown,

                         "collection-date string is improperly formatted");

     }


     /*

     if (year_val < 1000 || year_val >= 2100) {

         NCBI_THROW (CException, eUnknown,

                         "collection-date year is out of range");

     }

     */


     if (year_val < 1000) {

         NCBI_THROW (CException, eUnknown,

                         "collection-date year is out of range");

     }


     if (year_val >= 2100) {

         NCBI_THROW (CException, eUnknown,

                         "collection-date year is out of range");

     }


     if (day_val > 0 && month_val > 0 && !IsDayValueOkForMonth(day_val, month_val, year_val)) {

         NCBI_THROW (CException, eUnknown,

                         "collection-date day is greater than monthly maximum");

     }


     CRef<CDate> date(new CDate);


     date->SetStd().SetYear (year_val);

     if (month_val > 0) {

         date->SetStd().SetMonth (month_val);

     }

     if (day_val > 0) {

         date->SetStd().SetDay (day_val);

     }


     time_t t;


     time(&t);


     CDate now(t);


     /*

     if (IsCollectionDateAfterTime(*date, t)) {

          NCBI_THROW (CException, eUnknown,

                         "collection-date year is out of range");

     }

     */


     return date;

 }


 bool CSubSource::IsCollectionDateAfterTime(const string& collection_date, time_t t, bool& bad_format)

 {

     bad_format = false;

     bool in_future = false;

     vector<string> pieces;

     NStr::Split(collection_date, "/", pieces);

     if (pieces.size() > 2) {

         bad_format = true;

     } else {

         ITERATE(vector<string>, it, pieces) {

             CRef<CDate> coll_date = DateFromCollectionDate (*it);

             if (!coll_date) {

                 bad_format = true;

             } else if (IsCollectionDateAfterTime(*coll_date, t)) {

                 in_future = true;

             }

         }

     }

     return in_future;

 }


 bool CSubSource::IsCollectionDateAfterTime(const CDate& collection_date, time_t t)

 {

     CDate now(t);

     if (collection_date.Compare(now) == CDate::eCompare_after) {

         return true;

     } else {

         return false;

     }

 }


 bool CSubSource::IsCollectionDateAfterTime(const CDate& collection_date, CTime& ctime)

 {

     time_t t = ctime.GetTimeT();

     return IsCollectionDateAfterTime(collection_date, t);

 }


 void CSubSource::IsCorrectDateFormat(const string& date_string, bool& bad_format, bool& in_future)

 {

     bad_format = false;

     in_future = false;


     vector<string> pieces;

     NStr::Split(date_string, "/", pieces);

     if (pieces.size() > 2) {

         bad_format = true;

         return;

     } else if (pieces.size() == 2) {

         bool first_bad = false;

         bool first_future = false;

         bool second_bad = false;

         bool second_future = false;

         IsCorrectDateFormat(pieces[0], first_bad, first_future);

         IsCorrectDateFormat(pieces[1], second_bad, second_future);

         bad_format = first_bad || second_bad;

         if (!bad_format) {

             in_future = first_future || second_future;

         }

         return;

     }


     try {

         CRef<CDate> coll_date = CSubSource::DateFromCollectionDate (date_string);


         if (!IsISOFormatDate(date_string)) {

             // if there are two dashes, then the first token needs to be the day, and the

             // day has to have two numbers, a leading zero if the day is less than 10

             size_t pos = NStr::Find(date_string, "-");

             if (pos != NPOS) {

                 size_t pos2 = NStr::Find(date_string, "-", pos + 1);

                 if (pos2 != NPOS  &&  pos != 2) {

                     bad_format = true;

                 }

             }

         }


         if (!bad_format) {

             time_t t;


             time(&t);


             in_future = IsCollectionDateAfterTime(*coll_date, t);

         }

     } catch (const CException& ) {

         bad_format = true;

     }

 }


 size_t CSubSource::CheckDateFormat(const string& date_string)

 {

     size_t rval = eDateFormatFlag_ok;

     vector<string> pieces;

     NStr::Split(date_string, "/", pieces);

     if (pieces.size() > 2) {

         rval |= eDateFormatFlag_bad_format;

     } else if (pieces.size() == 2) {

         rval |= CheckDateFormat(pieces[0]);

         rval |= CheckDateFormat(pieces[1]);

         if (rval == eDateFormatFlag_ok) {

             try {

                 CRef<CDate> d1 = CSubSource::DateFromCollectionDate(pieces[0]);

                 CRef<CDate> d2 = CSubSource::DateFromCollectionDate(pieces[1]);

                 if (d2->Compare(*d1) == CDate::eCompare_before) {

                     rval |= eDateFormatFlag_out_of_order;

                 }

             } catch (const CException&) {

                 rval |= eDateFormatFlag_bad_format;

             }

         }

         return rval;

     }


     try {

         CRef<CDate> coll_date = CSubSource::DateFromCollectionDate(date_string);


         if (!IsISOFormatDate(date_string)) {

             // if there are two dashes, then the first token needs to be the day, and the

             // day has to have two numbers, a leading zero if the day is less than 10

             size_t pos = NStr::Find(date_string, "-");

             if (pos != NPOS) {

                 size_t pos2 = NStr::Find(date_string, "-", pos + 1);

                 if (pos2 != NPOS  &&  pos != 2) {

                     rval |= eDateFormatFlag_bad_format;

                 }

             }

         }


         if (rval == eDateFormatFlag_ok) {

             time_t t;


             time(&t);

             if (IsCollectionDateAfterTime(*coll_date, t)) {

                 rval |= eDateFormatFlag_in_future;

             }

         }

     } catch (const CException&) {

         rval |= eDateFormatFlag_bad_format;

     }

     return rval;

 }


 typedef CStaticArraySet<const char*, PCase_CStr> TCStrSet;


 // null term exemption values, order is not important

 MAKE_CONST_SET(s_Null_CollectionDatesSet, ct::tagStrCase,

 {

     "missing",

     "missing: control sample",

     "missing: data agreement established pre-2023",

     "missing: endangered species",

     "missing: human-identifiable",

     "missing: lab stock",

     "missing: sample group",

     "missing: synthetic construct",

     "missing: third party data",

     "not applicable",

     "not collected",

     "not provided",

     "restricted access",

 })


 string CSubSource::GetCollectionDateProblem (const string& date_string)

 {

     string problem;

     if (s_Null_CollectionDatesSet.find(date_string.c_str()) != s_Null_CollectionDatesSet.end()) {

         return problem;

     }

     size_t rval = CheckDateFormat(date_string);

     if (rval & eDateFormatFlag_bad_format) {

         problem = "Collection_date format is not in DD-Mmm-YYYY format";

     } else if (rval & eDateFormatFlag_in_future) {

         problem = "Collection_date is in the future";

     } else if (rval & eDateFormatFlag_out_of_order) {

         problem = "Collection_dates are out of order";

     }

     return problem;

 }


 string CSubSource::x_ParseDateRangeWithDelimiter(const string& orig_date, CTempString delim)

 {

     size_t pos = NStr::Find(orig_date, delim, NStr::eNocase);

     if (pos == NPOS) {

         return kEmptyStr;

     }

     size_t second_pos = NStr::Find(orig_date.substr(pos + 1), delim, NStr::eNocase);

     if (second_pos != NPOS) {

         return kEmptyStr;

     }

     bool month_ambig = false;

     string first_date = FixDateFormat(orig_date.substr(0, pos), true, month_ambig);

     if (month_ambig || NStr::IsBlank(first_date)) {

         return kEmptyStr;

     }

     string second_date = FixDateFormat(orig_date.substr(pos + delim.length()), true, month_ambig);

     if (month_ambig || NStr::IsBlank(second_date)) {

         return kEmptyStr;

     }

     string fix = first_date + "/" + second_date;

     return fix;

 }


 string CSubSource::FixDateFormat (const string& orig_date)

 {

     bool month_ambiguous = false;


     string fix = FixDateFormat(orig_date, true, month_ambiguous);

     if (month_ambiguous) {

         fix.clear();

     } else if (NStr::IsBlank(fix)) {

         static const char* delimiters[] = {"/", " to ", " and ", "-", "_"};

         for (size_t i = 0; i < ArraySize(delimiters); i++) {

             fix = x_ParseDateRangeWithDelimiter(orig_date, delimiters[i]);

             if (!NStr::IsBlank(fix)) {

                 break;

             }

         }

     }

     return fix;

 }


 // ISO Format for time is one of these:

 // HH:MM:SS

 // HH:MM

 // HH

 // Followed by either Z or +hh:mm to indicate an offset from Zulu

 bool CSubSource::IsISOFormatTime(const string& orig_time, int& hour, int& min, int& sec, bool require_time_zone)

 {

     int offset_hour = 0;

     int offset_min = 0;

     size_t suffix = NStr::Find(orig_time, "Z");

     if (suffix == NPOS) {

         suffix = NStr::Find(orig_time, "+");

         if (suffix == NPOS) {

             if (require_time_zone) {

                 return false;

             } else {

                 suffix = orig_time.length();

             }

         } else {

             if (orig_time.substr(suffix).length() != 6 ||

                 !isdigit((unsigned char)orig_time[suffix + 1]) ||

                 !isdigit((unsigned char)orig_time[suffix + 2]) ||

                 orig_time[suffix + 3] != ':' ||

                 !isdigit((unsigned char)orig_time[suffix + 4]) ||

                 !isdigit((unsigned char)orig_time[suffix + 5])) {

                 return false;

             }

             try {

                 offset_hour = NStr::StringToInt(orig_time.substr(suffix + 1, 2));

                 offset_min = NStr::StringToInt(orig_time.substr(suffix + 4, 2));

             } catch (...) {

                 return false;

             }

         }

     }

     if (suffix != 2 && suffix != 5 && suffix != 8) {

         return false;

     }


     if (!isdigit((unsigned char)orig_time[0]) || !isdigit((unsigned char)orig_time[1])) {

         return false;

     }

     hour = 0;

     min = 0;

     sec = 0;

     try {

         hour = NStr::StringToInt(orig_time.substr(0, 2));

         if (hour < 0 || hour > 23) {

             return false;

         }

         hour -= offset_hour;

     } catch (...) {

         return false;

     }

     if (suffix > 2) {

         if (!isdigit((unsigned char)orig_time[3]) || !isdigit((unsigned char)orig_time[4])) {

             return false;

         }

         try {

             min = NStr::StringToInt(orig_time.substr(3, 2));

             if (min < 0 || min > 59) {

                 return false;

             }

         } catch (...) {

             return false;

         }

         min -= offset_min;

     }

     if (suffix == 8) {

         if (!isdigit((unsigned char)orig_time[6]) || !isdigit((unsigned char)orig_time[7])) {

             return false;

         }

         try {

             sec = NStr::StringToInt(orig_time.substr(6, 2));

             if (sec < 0) {

                 // negative number bad

                 return false;

             } else if (sec > 59) {

                 // too big

                 return false;

             }

         } catch (...) {

             return false;

         }

     }


     return true;

 }


 // ISO Format for date is exactly 10 characters long OR exactly 7 characters long.

 // For ten characters:

 // First four characters must be digits, represent year.

 // Fifth character must be dash.

 // Sixth and seventh characters must be digits, represent month, use zero padding.

 // Eighth character must be dash.

 // Ninth and tenth characters must be digits, represent day, use zero padding.

 // For 7 characters:

 // First four characters must be digits, represent year.

 // Fifth character must be dash.

 // Sixth and seventh characters must be digits, represent month, use zero padding.

 bool CSubSource::IsISOFormatDateOnly (const string& cpy)

 {

     if (cpy.length() != 10 && cpy.length() != 7) {

         return false;

     }

     bool rval = true;

     size_t pos = 0;

     string::const_iterator it = cpy.begin();

     while (it != cpy.end() && rval) {

         if (pos == 4 || pos == 7) {

             if (*it != '-') {

                 rval = false;

             }

         } else if (!isdigit(*it)) {

             rval = false;

         }

         ++it;

         ++pos;

     }

     if (rval) {

         try {

             int year = NStr::StringToInt(cpy.substr(0, 4));

             int month = NStr::StringToInt(cpy.substr(5, 2));

             if (month < 1 || month > 12) {

                 rval = false;

             }

             if (cpy.length() == 10) { // has day

                 int day = NStr::StringToInt(cpy.substr(8, 2));

                 if (!IsDayValueOkForMonth(day, month, year)) {

                     rval = false;

                 }

             }

         } catch (...) {

             rval = false;

         }

     }

     return rval;

 }


 bool CSubSource::x_IsFixableIsoDate(const string& orig_date)

 {

     string cpy = orig_date;

     NStr::TruncateSpacesInPlace(cpy);

     size_t time_pos = NStr::Find(cpy, "T");

     bool rval = false;

     if (time_pos == NPOS) {

         rval = false;

     } else {

         if (!IsISOFormatDateOnly(cpy.substr(0, time_pos))) {

             rval = false;

         } else {

             int h, m, s;

             if (IsISOFormatTime(cpy.substr(time_pos + 1), h, m, s, true)) {

                 // already fine, not fixable

                 rval = false;

             } else {

                 rval = IsISOFormatTime(cpy.substr(time_pos + 1), h, m, s, false);

             }

         }

     }

     return rval;

 }


 string CSubSource::x_RemoveIsoTime(const string& orig_date)

 {

     string cpy = orig_date;

     NStr::TruncateSpacesInPlace(cpy);

     size_t time_pos = NStr::Find(cpy, "T");

     if (time_pos != NPOS) {

         cpy = cpy.substr(0, time_pos);

     }

     return cpy;

 }


 bool CSubSource::IsISOFormatDate(const string& orig_date)

 {

     string cpy = orig_date;

     NStr::TruncateSpacesInPlace(cpy);

     size_t time_pos = NStr::Find(cpy, "T");

     if (time_pos == NPOS) {

         return IsISOFormatDateOnly(cpy);

     } else {

         int h, m, s;

         return (IsISOFormatDateOnly(cpy.substr(0, time_pos)) &&

             IsISOFormatTime(cpy.substr(time_pos + 1), h, m, s));

     }


 }


 CRef<CDate> CSubSource::GetDateFromISODate(const string& orig_date)

 {

     try {

         string cpy = orig_date;

         NStr::TruncateSpacesInPlace(cpy);

         CRef<CDate> date(new CDate());

         int year_val = NStr::StringToInt(cpy.substr(0, 4));

         int month_val = NStr::StringToInt(cpy.substr(5, 2));

         date->SetStd().SetYear (year_val);

         date->SetStd().SetMonth (month_val);

         if (cpy.length() > 7) {

             int day_val = NStr::StringToInt(cpy.substr(8, 2));

             date->SetStd().SetDay (day_val);

         }

         return date;

     } catch (...) {

         return CRef<CDate>();

     }

 }


 vector<string> CSubSource::x_GetDateTokens(const string& orig_date)

 {

     vector<string> tokens;

     string token_delimiters = " ,-/=_.";


     string cpy = orig_date;

     NStr::TruncateSpacesInPlace (cpy);


     string curr_token;

     bool is_chars = false;

     ITERATE(string, s, cpy) {

         if (token_delimiters.find(*s) != NPOS) {

             if (!NStr::IsBlank(curr_token)) {

                 tokens.push_back(curr_token);

             }

             curr_token.clear();

             is_chars = false;

         } else if (is_chars && !isalpha((unsigned char)(*s))) {

             // previous token was all letters, do not add non-letter characters

             if (!NStr::IsBlank(curr_token)) {

                 tokens.push_back(curr_token);

             }

             curr_token = *s;

             is_chars = false;

         } else if (!NStr::IsBlank(curr_token) && !is_chars && isalpha(*s)) {

             // previous token had no letters

             tokens.push_back(curr_token);

             curr_token = *s;

             is_chars = true;

         } else {

             curr_token += *s;

             if (isalpha(*s)) {

                 is_chars = true;

             }

         }

     }

     if (!NStr::IsBlank(curr_token)) {

         tokens.push_back(curr_token);

     }


     // reattach 'st', 'nd', 'rd', and 'th' to numbers if present

     if (tokens.size() > 3) {

         vector<string>::iterator p = tokens.begin();

         bool prev_is_number = isdigit((unsigned char)(*p)[0]);

         vector<string>::iterator s = p;

         ++s;

         while (s != tokens.end()) {

             if (prev_is_number &&

                 (NStr::EqualNocase(*s, "st") ||

                 NStr::EqualNocase(*s, "nd") ||

                 NStr::EqualNocase(*s, "rd") ||

                 NStr::EqualNocase(*s, "th"))) {

                 *p += *s;

                 s = tokens.erase(s);

                 prev_is_number = false;

             } else {

                 ++p;

                 ++s;

                 prev_is_number = isdigit((unsigned char)(*p)[0]);

             }

         }

     }


     return tokens;

 }


 bool s_ChooseMonthAndDay(const string& token1, const string& token2, bool month_first, string& month, int& day, bool& month_ambiguous)

 {

     try {

         int val1 = NStr::StringToInt (token1);

         int val2 = NStr::StringToInt (token2);

         if (val1 > 12 && val2 > 12) {

             // both numbers too big for month

             return false;

         } else if (val1 < 13 && val2 < 13) {

             if (val1 == val2) {

                 // no need to call this ambiguous

                 month = CTime::MonthNumToName(val1, CTime::eAbbr);

                 day = val2;

             } else {

                 // both numbers could be month

                 month_ambiguous = true;

                 if (month_first) {

                     month = CTime::MonthNumToName(val1, CTime::eAbbr);

                     day = val2;

                 } else {

                     month = CTime::MonthNumToName(val2, CTime::eAbbr);

                     day = val1;

                 }

             }

         } else if (val1 < 13) {

             month = CTime::MonthNumToName(val1, CTime::eAbbr);

             day = val2;

         } else {

             month = CTime::MonthNumToName(val2, CTime::eAbbr);

             day = val1;

         }

         return true;

     } catch ( ... ) {

         return false;

     }

 }


 string CSubSource::FixDateFormat (const string& test, bool month_first, bool& month_ambiguous)

 {

     string orig_date = test;

     NStr::TruncateSpacesInPlace(orig_date);


     if (IsISOFormatDate(orig_date)) {

         return orig_date;

     } else if (x_IsFixableIsoDate(orig_date)) {

         return x_RemoveIsoTime(orig_date);

     }


     string reformatted_date;

     string month;

     int year = 0, day = 0;

     //string token_delimiters = " ,-/=_.";

     size_t num_original_tokens = 0;


     month_ambiguous = false;

     vector<string> tokens = x_GetDateTokens(orig_date);


     num_original_tokens = tokens.size();

     if (tokens.size() < 1 || tokens.size() > 3) {

         // no tokens or too many tokens

         return kEmptyStr;

     }


     string one_token;

     vector<string>::iterator it = tokens.begin();

     while (it != tokens.end()) {

         one_token = *it;

         bool found = false;

         if (NStr::EqualNocase(one_token, "1st") || NStr::EqualNocase(one_token, "first")) {

             day = 1;

             found = true;

         } else if (NStr::EqualNocase(one_token, "2nd") || NStr::EqualNocase(one_token, "second")) {

             day = 2;

             found = true;

         } else if (NStr::EqualNocase(one_token, "3rd") || NStr::EqualNocase (one_token, "third")) {

             day = 3;

             found = true;

         } else if (one_token.length() > 0

                    && isdigit((unsigned char)one_token[0])

                    && NStr::EndsWith(one_token, "th")) {

             try {

                 day = NStr::StringToInt (one_token.substr(0, one_token.length() - 2));

                 found = true;

             } catch ( ... ) {

                 // threw exception while converting to int

                 return kEmptyStr;

             }

         } else if (isalpha((unsigned char)one_token[0])) {

             if (!NStr::IsBlank(month)) {

                 // already have month, error

                 return kEmptyStr;

             }

             if (one_token.length() > 3) {

                 one_token = one_token.substr(0, 3);

             }

             try {

                 int month_num = CTime::MonthNameToNum(one_token);

                 found = true;

                 month = CTime::MonthNumToName(month_num, CTime::eAbbr);

             } catch (const CTimeException&) {

             }

         } else {

             try {

                 int this_val = NStr::StringToInt (one_token);

                 int min = 1;

                 int max = 31;

                 if (this_val < min) {

                     return kEmptyStr;

                 } else if (this_val > max) {

                     if (year > 0) {

                         // already have year, error

                         return kEmptyStr;

                     }

                     year = this_val;

                     found = true;

                 }

             } catch ( ... ) {

                 // threw exception while converting to int

                 return kEmptyStr;

             }

         }

         if (found) {

             it = tokens.erase(it);

         } else {

             it++;

         }

     }


     if (tokens.size() == 0) {

         // good - all tokens assigned to values

     } else if (tokens.size() > 2) {

         // three numbers: treat last one as year

         try {

             year = NStr::StringToInt(tokens[2]);

             if (year < 100) {

                 year += 2000;

             }

             if (!s_ChooseMonthAndDay(tokens[0], tokens[1], month_first, month, day, month_ambiguous)) {

                 return kEmptyStr;

             }

             // mark month as ambiguous, since we are guessing about year

             month_ambiguous = true;

         } catch ( ... ) {

             // threw exception while converting to int

             return kEmptyStr;

         }

     } else if (tokens.size() == 1) {

         try {

             int val = NStr::StringToInt (tokens[0]);

             if (year == 0) {

                 year = val;

             } else {

                 if (NStr::IsBlank (month)) {

                     if (val > 0 && val < 13) {

                         month = CTime::MonthNumToName(val, CTime::eAbbr);

                     } else {

                         // month number out of range

                         return kEmptyStr;

                     }

                 } else {

                     day = val;

                 }

             }

         } catch ( ... ) {

             // threw exception while converting to int

             return kEmptyStr;

         }

     } else if (!NStr::IsBlank (month)) {

         if (tokens.size() == 2) {

             // we have a month and two other numbers (we hope)

             int val1 = 0;

             int val2 = 0;

             try {

                 val1 = NStr::StringToInt (tokens[0]);

                 val2 = NStr::StringToInt (tokens[1]);

             } catch (CException& /*e*/) {

                 // not actually numbers

                 return kEmptyStr;

             }

             bool zero_pad_1 = NStr::StartsWith(tokens[0], "0");

             bool zero_pad_2 = NStr::StartsWith(tokens[1], "0");

             if (val1 < 10 && !zero_pad_1 && (val2 > 10 || zero_pad_2)) {

                 // if one token is not zero-padded and less than 10,

                 // the other either is zero-padded and greater than 10,

                 // the "small" token is the day and the second (+2000) is the year

                 day = val1;

                 year = val2 + 2000;

             } else if (val2 < 10 && !zero_pad_2 && (val1 > 10 || zero_pad_1)) {

                 // if one token is not zero-padded and less than 10,

                 // the other either is zero-padded and greater than 10,

                 // the "small" token is the day and the second (+2000) is the year

                 day = val2;

                 year = val1 + 2000;

             } else {

                 int month_num = CTime::MonthNameToNum(month);

                 if (IsDayValueOkForMonth(val1, month_num, val2 + 2000)) {

                     day = val1;

                     year = val2 + 2000;

                 } else {

                     day = val2;

                     year = val1 + 2000;

                 }

             }

         } else {

             return kEmptyStr;

         }

     } else {

         if (!s_ChooseMonthAndDay(tokens[0], tokens[1], month_first, month, day, month_ambiguous)) {

             return kEmptyStr;

         }

     }


     // make sure day is valid

     if (day > 0 && !NStr::IsBlank(month) && year > -1) {

         try {

             int month_num = CTime::MonthNameToNum(month);

             if (!IsDayValueOkForMonth(day, month_num, year)) {

                 return kEmptyStr;

             }

         } catch (const CTimeException&) {

             return kEmptyStr;

         }

     }


     if (year > 0 && year < 100 && num_original_tokens > 1) {

         // try to guess year from two-digit year provided,

         // only if it could not possibly be a day of the month

         // and if there were at least two tokens provided

         string year_date = NStr::NumericToString(year + 2000);

         bool format_bad = false;

         bool in_future = false;

         IsCorrectDateFormat(year_date, format_bad, in_future);

         if (in_future) {

             year += 1900;

         } else {

             year += 2000;

         }

     }

     if (year >= 1000 && year < 2100) {

         reformatted_date = NStr::NumericToString (year);

         if (!NStr::IsBlank (month)) {

             reformatted_date = month + "-" + reformatted_date;

             if (day > 0) {

                 string day_str = NStr::NumericToString (day);

                 if (day_str.length() < 2) {

                     day_str = "0" + day_str;

                 }

                 reformatted_date = day_str + "-" + reformatted_date;

             }

         }

     }


     return reformatted_date;

 }


 void CSubSource::DetectDateFormat(const string& orig_date, bool& ambiguous, bool &day_first)

 {

     ambiguous = false;

     day_first = false;

     vector<string> tokens = x_GetDateTokens(orig_date);

     if (tokens.size() != 3) {

         // can't do detection if there are more or less than three tokens

         ambiguous = true;

         return;

     }

     vector<int> nums;


     // detection is only valid if all tokens are numbers and at least one is known to be the year

     try {

         ITERATE(vector<string>, it, tokens) {

             nums.push_back(NStr::StringToInt (*it));

         }

     } catch ( ... ) {

         // threw exception while converting to int

         ambiguous = true;

         return;

     }

     enum EPos { eDay = 0, eMonth = 1, eYear = 2 };

     vector<int> positions;

     positions.push_back(0);

     positions.push_back(0);

     positions.push_back(0);


     int token_pos = 1;

     ITERATE(vector<int>, it, nums) {

         if (*it > 31) {

             if (positions[eYear] > 0) {

                 // already found a year

                 ambiguous = true;

                 return;

             }

             positions[eYear] = token_pos;

         } else if (*it > 12) {

             if (positions[eDay] > 0) {

                 // already found a day

                 ambiguous = true;

                 return;

             }

             positions[eDay] = token_pos;

         } else if (positions[eMonth] > 0) {

             // already found a month

             ambiguous = true;

             return;

         } else {

             positions[eMonth] = token_pos;

         }

         token_pos++;

     }

     if (positions[eDay] < positions[eMonth]) {

         day_first = true;

     } else {

         day_first = false;

     }

 }


 void CSubSource::IsCorrectLatLonFormat (string lat_lon, bool& format_correct, bool& precision_correct,

                                      bool& lat_in_range, bool& lon_in_range,

                                      double& lat_value, double& lon_value)

 {

     format_correct = false;

     lat_in_range = false;

     lon_in_range = false;

     precision_correct = false;

     double ns, ew;

     char lon, lat;

     int processed;


     lat_value = 0.0;

     lon_value = 0.0;


     if (NStr::IsBlank(lat_lon)) {

         return;

     } else if (sscanf (lat_lon.c_str(), "%lf %c %lf %c%n", &ns, &lat, &ew, &lon, &processed) != 4

                || size_t(processed) != lat_lon.length()) {

         return;

     } else if ((lat != 'N' && lat != 'S') || (lon != 'E' && lon != 'W')) {

         return;

     } else {

         // init values found

         if (lat == 'N') {

             lat_value = ns;

         } else {

             lat_value = 0.0 - ns;

         }

         if (lon == 'E') {

             lon_value = ew;

         } else {

             lon_value = 0.0 - ew;

         }


         // make sure format is correct

         vector<string> pieces;

         NStr::Split(lat_lon, " ", pieces);

         if (pieces.size() > 3) {

             int precision_lat = x_GetPrecision(pieces[0]);

             int precision_lon = x_GetPrecision(pieces[2]);


             char reformatted[1000];

             sprintf (reformatted, "%.*lf %c %.*lf %c", precision_lat, ns, lat,

                                                        precision_lon, ew, lon);


             size_t len = strlen (reformatted);

             if (NStr::StartsWith(lat_lon, reformatted)

                 && (len == lat_lon.length()

                   || (len < lat_lon.length()

                       && lat_lon[len] == ';'))) {

                 format_correct = true;

                 if (ns <= 90 && ns >= 0) {

                     lat_in_range = true;

                 }

                 if (ew <= 180 && ew >= 0) {

                     lon_in_range = true;

                 }

                 if (precision_lat < 3 && precision_lon < 3) {

                     precision_correct = true;

                 }

             }

         }

     }

 }


 string CSubSource::FixLatLonPrecision(const string& orig)

 {

     bool format_correct = false;

     bool precision_correct = false;

     bool lat_in_range = false;

     bool lon_in_range = false;

     double lat_value = 0.0;

     double lon_value = 0.0;

     IsCorrectLatLonFormat(orig, format_correct, precision_correct,

                           lat_in_range, lon_in_range,

                           lat_value, lon_value);

     if (!format_correct || !lat_in_range || !lon_in_range || precision_correct) {

         return orig;

     }

     vector<string> pieces;

     NStr::Split(orig, " ", pieces);

     if (pieces.size() > 3) {

         int precision_lat = x_GetPrecision(pieces[0]);

         int precision_lon = x_GetPrecision(pieces[2]);

         if (precision_lat > 4) {

             precision_lat = 4;

         }

         if (precision_lon > 4) {

             precision_lon = 4;

         }


         char reformatted[1000];

         sprintf(reformatted, "%.*lf %c %.*lf %c", precision_lat, fabs(lat_value), pieces[1].c_str()[0],

             precision_lon, fabs(lon_value), pieces[3].c_str()[0]);

         string new_val = reformatted;

         return reformatted;

     }

     return kEmptyStr;

 }


 /*

 1. String should be converted to UTF8 string, this will get rid of \xC0 and similar substrings

 2. Every codepoint (note that this is not regular ascii "char") that is not a digit or a decimal point or a letter should be prepended with a space.

    Transitions from alpha to digit/point and from digit/point to alpha should also be prepended with a space.

 3. NStr::Split is called with space as a separator and Tokenize flag - need to check if Split works with UTF8 strings properly.

 4. After this we should have a vector of tokens, some of which are numbers and others are "modifiers" such as ', '', degrees, N, S, E, W, etc.

 5. A pattern string is created where each number is replaced with "1" and modifiers are normalized to "lat", or "N"; the actual numerical values are kept in a separate vector

 5. Based on the pattern the vector of numbers is parsed into degrees, minutes, or seconds,

 6. NSEW and "lattitude/longitude" are applied to degrees in the order of appearance, if none are present other heuristic to determine which is latitude and which is longitude

 */


 static string s_InsertSpacesBetweenTokens(const string &old_str)

 {

     string new_str;

     for (string::const_iterator i = old_str.begin(); i != old_str.end(); ++i)

     {

         TUnicodeSymbol sym = CUtf8::Decode(i);

         if (sym < 0x80)

         {

             char c = static_cast<char>(sym);

             if (!isalpha(c) && !isdigit(c) && c != '.' && c != '-' && c != '+')

             {

                 new_str += ' ';

             }

             else if (!new_str.empty() &&

                  ((isalpha(new_str.back()) && !isalpha(c)) ||

                   (!isalpha(new_str.back()) && isalpha(c))))

             {

                 new_str += ' ';

             }

             new_str += c;

             if (!isalpha(c) && !isdigit(c) && c != '.' && c != '-' && c != '+')

             {

                 new_str += ' ';

             }

         }

         else

         {

             new_str += ' ';

         }

     }

     return new_str;

 }


 static string s_RemoveSpacesWithinNumbers(const string &old_str)

 {

     string new_str;

     bool is_number = true;

     for (string::const_iterator i = old_str.begin(); i != old_str.end(); ++i)

     {

         TUnicodeSymbol sym = CUtf8::Decode(i);

         if (sym < 0x80)

         {

             char c = static_cast<char>(sym);

                 size_t j = new_str.size();

                 if (j >= 4 &&  new_str[j-1] == ' ' && new_str[j-2] == '.' && new_str[j-3] == ' ' && isdigit(new_str[j-4]) && isdigit(c))

                 {

                     new_str.pop_back();

                     new_str.pop_back();

                     new_str.pop_back();

                     new_str += '.';

                 }

                 new_str += c;

                 if (!isdigit(c) && c != '+' && c != '-' && c != '.' && !isspace(c)) {

                     is_number = false;

                 }

             }

         else

         {

             new_str += ' ';

             is_number = false;

         }

     }

     if (is_number)

     {

         NStr::ReplaceInPlace(new_str, "+", " +");

         NStr::ReplaceInPlace(new_str, "-", " -");

     }

     return new_str;

 }


 static bool s_IsNumber(const string &token, double *result = NULL)

 {

     double num = NStr::StringToDouble(token, NStr::fConvErr_NoThrow);

     if (!num && errno)

     {

         return false;

     }

     if (result) {

         *result = num;

     }

     return true;

 }


 static string s_NormalizeTokens(vector<string> &tokens, vector<double> &numbers, vector<string> &anum, vector<int> &precision, vector<string> &lat_long,  vector<string> &nsew)

 {

     vector<string> pattern;

     for (size_t i = 0; i < tokens.size(); i++)

     {

         string &token = tokens[i];


         double num;

         if (s_IsNumber(token, &num))

         {

             numbers.push_back(num);

             anum.push_back(token);

             pattern.push_back("1");

             precision.push_back(0);

             if (NStr::Find(token, ".") != NPOS && !NStr::EndsWith(token, "."))

             {

                 precision.back()

                     = static_cast<int>(token.length() - token.find('.') - 1);

             }

             continue;

         }


         {

             vector<string> tmp;

             NStr::Split(token, ".", tmp);

             double num0, num1, num2;

             if (tmp.size() == 3 && s_IsNumber(tmp[0], &num0) && s_IsNumber(tmp[1], &num1) && s_IsNumber(tmp[2], &num2))

             {

                 numbers.push_back(num0);

                 anum.push_back(tmp[0]);

                 pattern.push_back("1");

                 precision.push_back(0);

                 numbers.push_back(num1);

                 anum.push_back(tmp[1]);

                 pattern.push_back("1");

                 precision.push_back(0);

                 numbers.push_back(num2);

                 anum.push_back(tmp[2]);

                 pattern.push_back("1");

                 precision.push_back(0);

                 continue;

             }

         }


         if (token == "\'" && i >= 3 && s_IsNumber(tokens[i - 1]) && tokens[i - 2] == "\'" && s_IsNumber(tokens[i - 3]))

         {

             token = "\"";

         }


         if (NStr::EqualNocase(token, "degrees") || NStr::EqualNocase(token, "deg")  || NStr::EqualNocase(token, "deg.") || NStr::EqualNocase(token, "degree"))

         {

             token = "degrees";

             pattern.push_back("degrees");

         }

         else if ( token == "\'"  || NStr::EqualNocase(token, "min") || NStr::EqualNocase(token, "min.") || NStr::EqualNocase(token, "minute") || NStr::EqualNocase(token, "minutes"))

         {

             token  = "\'";

             pattern.push_back("\'");

         }

         else if (token == "\"" || NStr::EqualNocase(token, "sec") || NStr::EqualNocase(token, "sec.") || NStr::EqualNocase(token, "second") || NStr::EqualNocase(token, "seconds"))

         {

             token = "\"";

             pattern.push_back("\"");

         }

         else if (token == "," || token == ":" || token == "_" || token == "&" || token == "." || token == ";" || token == "#" || NStr::EqualNocase(token, "and"))

         {

         }

         else if (NStr::EqualNocase(token, "lattitude") || NStr::EqualNocase(token, "latitude") || NStr::EqualNocase(token, "lat") || NStr::EqualNocase(token, "lat."))

         {

             pattern.push_back("lat");

             lat_long.push_back("lat");

         }

         else if (NStr::EqualNocase(token, "longitude") || NStr::EqualNocase(token, "lo") || NStr::EqualNocase(token, "lon") || NStr::EqualNocase(token, "long")

                      || NStr::EqualNocase(token, "lo.") || NStr::EqualNocase(token, "lon.") || NStr::EqualNocase(token, "long."))

         {

             pattern.push_back("lat");

             lat_long.push_back("long");

         }

         else if (token == "N"  || NStr::EqualNocase(token, "north"))

         {

             pattern.push_back("N");

             nsew.push_back("N");

         }

         else if (token == "S"  || NStr::EqualNocase(token, "south"))

         {

             pattern.push_back("N");

             nsew.push_back("S");

         }

         else if (token == "E"  || NStr::EqualNocase(token, "east"))

         {

             pattern.push_back("N");

             nsew.push_back("E");

         }

         else if (token == "W"  || NStr::EqualNocase(token, "west") || token == "Wdeg")

         {

             pattern.push_back("N");

             nsew.push_back("W");

         }

         else if (token == "NW")

         {

             nsew.push_back("N");

             nsew.push_back("W");

         }

         else if (token == "NE")

         {

             nsew.push_back("N");

             nsew.push_back("E");

         }

         else if (token == "SW")

         {

             nsew.push_back("S");

             nsew.push_back("W");

         }

         else if (token == "SE")

         {

             nsew.push_back("S");

             nsew.push_back("E");

         }

         else

         {

             //cout << "Token: " << token << endl;

             numbers.clear();

             return kEmptyStr;

         }

     }

     //cout << "Pattern: " << NStr::Join(pattern, " ") << endl;

     return NStr::Join(pattern, " ");

 }


 static void s_ReorderNorthSouthEastWest(vector<double> &numbers, vector<int> &precision, const vector<string> &lat_long, vector<string> &nsew)

 {

     if (numbers.size() != 2)

     {

         numbers.clear();

         return;

     }

     if (lat_long.size() == 2)

     {

         if (lat_long.front() == "long")

         {

             swap(numbers[0], numbers[1]);

             swap(precision[0], precision[1]);

             if (nsew.size() == 2) {

                 swap(nsew[0], nsew[1]);

             }

         }

     }

     else if (!lat_long.empty())

     {

         numbers.clear();

         return;

     }

     if (nsew.size() == 2)

     {

         if ((nsew[0] == "E" || nsew[0] == "W") &&

             (nsew[1] == "N" || nsew[1] == "S"))

         {

             swap(numbers[0], numbers[1]);

             swap(precision[0], precision[1]);

             swap(nsew[0], nsew[1]);

         }

         if (nsew[0] == "N")

         {

         numbers[0] = fabs(numbers[0]);

         }

         else if (nsew[0] == "S")

         {

             if (numbers[0] != 0)

                 numbers[0] = -fabs(numbers[0]);

         }

         else

         {

             numbers.clear();

             return;

         }

         if (nsew[1] == "E")

         {

             numbers[1] = fabs(numbers[1]);

         }

         else if (nsew[1] == "W")

         {

             if (numbers[1] != 0)

                 numbers[1] = -fabs(numbers[1]);

         }

         else

         {

             numbers.clear();

             return;

         }


     }

     else if (!nsew.empty())

     {

         numbers.clear();

         return;

     }

     if (lat_long.empty() && nsew.empty() && fabs(numbers[0]) > 90 && fabs(numbers[1]) < 90)

     {

         swap(numbers[0], numbers[1]);

         swap(precision[0], precision[1]);

     }

     if (fabs(numbers[0]) > 90 || fabs(numbers[1]) > 180)

     {

         numbers.clear();

         return;

     }

 }


 static void s_GetLatLong(const string &new_str, vector<double> &numbers, vector<int> &precision)

 {

     vector<string> tokens;

     NStr::Split(new_str, " ", tokens, NStr::fSplit_Tokenize);

     vector<string> lat_long;

     vector<string> nsew;

     vector<string> anum;

     string pattern = s_NormalizeTokens(tokens, numbers, anum, precision, lat_long, nsew);

     if (pattern.empty())

     {

         numbers.clear();

         return;

     }

     vector<double> degrees(2, 0);

     vector<int> prec(2, 0);

     int sign1 = 1;

     int sign2 = 1;

     if ( pattern == "1 1" ||

      pattern == "1 N 1 N" ||

          pattern == "N 1 N 1" ||

      pattern == "1 degrees N 1 degrees N" ||

      pattern == "lat 1 lat 1" ||

          pattern == "1 N lat 1 N lat" ||

          pattern == "1 degrees N lat 1 degrees N lat")

     {

         degrees[0] = numbers[0];

         degrees[1] = numbers[1];

         prec[0] = precision[0];

         prec[1] = precision[1];

     }

     else if ((pattern == "1 1 \" 1 1 '" ||

           pattern == "1 degrees 1 \" N 1 degrees 1 ' N")

          && numbers[1] < 60 && numbers[3] < 60

              && numbers[1] >= 0 && numbers[3] >= 0)

     {

         sign1 = anum[0][0] == '-' ? -1 : 1;

         sign2 = anum[2][0] == '-' ? -1 : 1;

         degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 3600);

         degrees[1] = sign2*(fabs(numbers[2]) + numbers[3] / 60);

         prec[0] = max(precision[0], precision[1] + 4);

         prec[1] = max(precision[2], precision[3] + 2);

     }

     else if ( (pattern == "1 1 ' 1" ||

                pattern == "1 degrees 1 ' N 1 degrees N")

               && numbers[1] < 60

               && numbers[1] >= 0)

     {

         sign1 = anum[0][0] == '-' ? -1 : 1;

         degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60);

         degrees[1] = numbers[2];

         prec[0] = max(precision[0], precision[1] + 2);

         prec[1] = precision[2];

         }

     else if (pattern == "1 1 ' 1 \" 1"

          && numbers[1] < 60 && numbers[2] < 60

              && numbers[1] >= 0 && numbers[2] >= 0)

     {

         sign1 = anum[0][0] == '-' ? -1 : 1;

         degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);

         degrees[1] = numbers[3];

         prec[0] = max(max(precision[0], precision[1] + 2), precision[2] + 4);

         prec[1] = precision[3];

     }

     else if ((pattern == "1 1 ' 1 \" 1 1 '" ||

           pattern == "1 1 1 N 1 1 N" ||

           pattern == "1 degrees 1 ' 1 \" N 1 degrees 1 ' N")

          && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60

              && numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0)

     {

         sign1 = anum[0][0] == '-' ? -1 : 1;

         sign2 = anum[3][0] == '-' ? -1 : 1;

         degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);

         degrees[1] = sign2*(fabs(numbers[3]) + numbers[4] / 60);

         prec[0] = max(max(precision[0], precision[1] + 2), precision[2] + 4);

         prec[1] = max(precision[3], precision[4] + 2);

     }

     else if (( pattern == "1 1 ' 1 \" 1 1 ' 1 \"" ||

            pattern == "1 1 ' 1 \" N 1 1 ' 1 \" N" ||

            pattern == "1 degrees 1 ' 1 \" 1 degrees 1 ' 1 \"" ||

            pattern == "1 degrees 1 ' 1 \" N 1 degrees 1 ' 1 \" N" ||

            pattern == "N 1 degrees 1 ' 1 \" N 1 degrees 1 ' 1 \"" ||

            pattern == "1 degrees 1 ' 1 N 1 degrees 1 ' 1 N" ||

            pattern == "1 degrees 1 1 N 1 degrees 1 1 N" ||

            pattern == "1 1 1 N 1 1 1 N")

              && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60 && numbers[5] < 60

              && numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0 && numbers[5] >= 0)

     {

         sign1 = anum[0][0] == '-' ? -1 : 1;

         sign2 = anum[3][0] == '-' ? -1 : 1;

         degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);

         degrees[1] = sign2*(fabs(numbers[3]) + numbers[4] / 60 + numbers[5] / 3600);

         prec[0] = max(max(precision[0], precision[1] + 2), precision[2] + 4);

         prec[1] = max(max(precision[3], precision[4] + 2), precision[5] + 4);

     }

     else if (( pattern == "1 1 ' 1 1 '" ||

            pattern == "1 1 N 1 1 N" ||

                pattern == "1 1 ' N 1 1 ' N" ||

            pattern == "1 degrees 1 ' N 1 degrees 1 ' N" ||

                pattern == "lat 1 degrees 1 ' N lat 1 degrees 1 ' N" ||

            pattern == "1 degrees 1 N 1 degrees 1 N" ||

            pattern == "1 degrees 1 N 1 degrees 1 ' N" ||

                pattern == "1 degrees 1 ' N 1 degrees 1 N" ||

                pattern == "N 1 degrees 1 ' N 1 degrees 1" ||

                pattern == "N 1 degrees 1 ' N 1 degrees 1 '" ||

                pattern == "N 1 degrees 1 ' N 1 1 '")

          && numbers[1] < 60  && numbers[3] < 60

              && numbers[1] >= 0  && numbers[3] >= 0)

     {

         sign1 = anum[0][0] == '-' ? -1 : 1;

         sign2 = anum[2][0] == '-' ? -1 : 1;

         degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60);

         degrees[1] = sign2*(fabs(numbers[2]) + numbers[3] / 60);

         prec[0] = max(precision[0], precision[1] + 2);

         prec[1] = max(precision[2], precision[3] + 2);

     }

     else if ((pattern == "1 N 1 1 N" ||

               pattern == "1 degrees N 1 degrees 1 ' N")

          &&  numbers[2] < 60

              &&  numbers[2] >= 0)

     {

         sign2 = anum[1][0] == '-' ? -1 : 1;

         degrees[0] = numbers[0];

         degrees[1] = sign2*(fabs(numbers[1]) + numbers[2] / 60);

         prec[0] = precision[0];

         prec[1] = max(precision[1], precision[2] + 2);

     }

     else if ((pattern == "1 degrees 1 ' 1 degrees 1 ' 1 \"" ||

               pattern == "N 1 1 N 1 1 1")

          && numbers[1] < 60 && numbers[3] < 60 && numbers[4] < 60

              && numbers[1] >= 0 && numbers[3] >= 0 && numbers[4] >= 0)

     {

         sign1 = anum[0][0] == '-' ? -1 : 1;

         sign2 = anum[2][0] == '-' ? -1 : 1;

         degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60);

         degrees[1] = sign2*(fabs(numbers[2]) + numbers[3] / 60 + numbers[4] / 3600);

         prec[0] = max(precision[0], precision[1] + 2);

         prec[1] = max(max(precision[2], precision[3] + 2), precision[4] + 4);

     }

     else if (pattern == "1 degrees 1 degrees 1 ' 1 \""

          && numbers[2] < 60 && numbers[3] < 60

              && numbers[2] >= 0 && numbers[3] >= 0)

     {

         sign2 = anum[1][0] == '-' ? -1 : 1;

         degrees[0] = numbers[0];

         degrees[1] = sign2*(fabs(numbers[1]) + numbers[2] / 60 + numbers[3] / 3600);

         prec[0] = precision[0];

         prec[1] = max(max(precision[1], precision[2] + 2), precision[3] + 4);

     }

     else if (pattern == "1 degrees 1 ' 1 \" N 1 degrees 1 \" N"

          && numbers[1] < 60 && numbers[2] < 60 && numbers[4] < 60

              && numbers[1] >= 0 && numbers[2] >= 0 && numbers[4] >= 0)

     {

         sign1 = anum[0][0] == '-' ? -1 : 1;

         sign2 = anum[3][0] == '-' ? -1 : 1;

         degrees[0] = sign1*(fabs(numbers[0]) + numbers[1] / 60 + numbers[2] / 3600);

         degrees[1] = sign2*(fabs(numbers[3]) + numbers[4] / 3600);

         prec[0] = max(max(precision[0], precision[1] + 2), precision[2] + 4);

         prec[1] = max(precision[3], precision[4] + 4);

     }

     else

     {

         degrees.clear();

         prec.clear();

     }

     swap(degrees, numbers);

     swap(prec, precision);

     s_ReorderNorthSouthEastWest(numbers, precision, lat_long, nsew);

 }


 string s_ShortenLatLon( string &subname ) {

     string lat;

     string north_or_south;

     string lon;

     string east_or_west;


     if (subname.length() < 1) {

        return subname;

     }

     char ch = subname[0];

     if (ch < '0' || ch > '9') {

         return subname;

     }


     // extract the pieces

     CNcbiIstrstream lat_lon_stream( subname );

     lat_lon_stream >> lat;

     lat_lon_stream >> north_or_south;

     lat_lon_stream >> lon;

     lat_lon_stream >> east_or_west;

     if( lat_lon_stream.bad() ) {

         return subname;

     }


     if( north_or_south != "N" && north_or_south != "S" ) {

         return subname;

     }


     if( east_or_west != "E" && east_or_west != "W" ) {

         return subname;

     }


     size_t pos = NStr::Find(lat, ".");

     if (pos > 0) {

         size_t len = lat.length();

         if (pos + 9 < len) {

             lat.erase(pos + 9);

         }

     }


     pos = NStr::Find(lon, ".");

     if (pos > 0) {

         size_t len = lon.length();

         if (pos + 9 < len) {

             lon.erase(pos + 9);

         }

     }


     return lat + " " + north_or_south + " " + lon + " " + east_or_west;

 }


 string CSubSource::FixLatLonFormat (string orig_lat_lon, bool guess)

 {

     //cout << "Before: " << orig_lat_lon << endl;

     NStr::ParseEscapes(orig_lat_lon);

     CStringUTF8 old_str = CUtf8::AsUTF8(orig_lat_lon, CUtf8::GuessEncoding(orig_lat_lon));

     if (NStr::StartsWith(old_str, "\""))

     {

         NStr::TrimPrefixInPlace(old_str, "\"");

         NStr::TrimSuffixInPlace(old_str, "\"");

     }

     NStr::ReplaceInPlace(old_str, "\'\'", "\"");

     string fixed_str = s_RemoveSpacesWithinNumbers(old_str);

     string new_str = s_InsertSpacesBetweenTokens(fixed_str);

     NStr::Sanitize(new_str);

     vector<double> numbers;

     vector<int> precision;

     s_GetLatLong(new_str, numbers, precision);

     string res;

     if (!numbers.empty())

     {

         res = MakeLatLon(numbers[0], numbers[1], precision[0], precision[1]);

     }

     //cout << "After: " << res << endl;

     res = s_ShortenLatLon(res);

     return res;

 }


 string CSubSource::MakeLatLon(double lat_value, double lon_value, int lat_precision, int lon_precision )

 {

     char ns = 'N';

     if (lat_value < 0) {

         ns = 'S';

         lat_value = -lat_value;

     }

     char ew = 'E';

     if (lon_value < 0) {

         ew = 'W';

         lon_value = -lon_value;

     }

     string lat = NStr::DoubleToString(lat_value, lat_precision);

     string lon = NStr::DoubleToString(lon_value, lon_precision);


     NStr::TrimSuffixInPlace(lat, ".");

     NStr::TrimSuffixInPlace(lon, ".");

     string res = lat + " " + ns + " " + lon + " " + ew;

     return res;

 }


 CLatLonCountryId *CSubSource::x_CalculateLatLonId(float lat_value, float lon_value, string country, string province)

 {

     CLatLonCountryId *id = new CLatLonCountryId(lat_value, lon_value);


     bool goodmatch = false;


     // lookup region by coordinates, or find nearest region and calculate distance

     const CCountryExtreme * guess = m_LatLonCountryMap->GuessRegionForLatLon(lat_value, lon_value, country, province);

     if (guess) {

         id->SetFullGuess(guess->GetCountry());

         id->SetGuessCountry(guess->GetLevel0());

         id->SetGuessProvince(guess->GetLevel1());

         if (NStr::EqualNocase(country, id->GetGuessCountry())

             && (NStr::IsBlank(province) || NStr::EqualNocase(province, id->GetGuessProvince()))) {

             goodmatch = true;

         }

     } else {

         // not inside a country, check water

         guess = m_LatLonWaterMap->GuessRegionForLatLon(lat_value, lon_value, country);

         if (guess) {

             // found inside water

             id->SetGuessWater(guess->GetCountry());

             if (NStr::EqualNocase(country, id->GetGuessWater())) {

                 goodmatch = true;

             }


             // also see if close to land for coastal warning (if country is land)

             // or proximity message (if country is water)

             double landdistance = 0.0;

             guess = m_LatLonCountryMap->FindClosestToLatLon (lat_value, lon_value, 5.0, landdistance);

             if (guess) {

                 id->SetClosestFull(guess->GetCountry());

                 id->SetClosestCountry(guess->GetLevel0());

                 id->SetClosestProvince(guess->GetLevel1());

                 id->SetLandDistance(m_LatLonCountryMap->AdjustAndRoundDistance (landdistance));

                 if (NStr::EqualNocase(country, id->GetClosestCountry())

                     && (NStr::IsBlank(province) || NStr::EqualNocase(province, guess->GetLevel1()))) {

                     goodmatch = true;

                 }

             }

         } else {

             // may be coastal inlet, area of data insufficiency

             double landdistance = 0.0;

             guess = m_LatLonCountryMap->FindClosestToLatLon (lat_value, lon_value, 5.0, landdistance);

             if (guess) {

                 id->SetClosestFull(guess->GetCountry());

                 id->SetClosestCountry(guess->GetLevel0());

                 id->SetClosestProvince(guess->GetLevel1());

                 id->SetLandDistance(m_LatLonCountryMap->AdjustAndRoundDistance (landdistance));

                 if (NStr::EqualNocase(country, id->GetClosestCountry())

                      && (NStr::IsBlank(province) || NStr::EqualNocase(province, guess->GetLevel1()))) {

                     goodmatch = true;

                 }

             }


             double waterdistance = 0.0;

             guess = m_LatLonWaterMap->FindClosestToLatLon (lat_value, lon_value, 5.0, waterdistance);

             if (guess) {

                 id->SetClosestWater(guess->GetLevel0());

                 id->SetWaterDistance(m_LatLonWaterMap->AdjustAndRoundDistance (waterdistance));

                 if (NStr::EqualNocase(country, id->GetClosestWater())) {

                     goodmatch = true;

                 }

             }

         }

     }


     // if guess is not the provided country or province, calculate distance to claimed country

     if (!goodmatch) {

         double distance = 0.0;

         guess = m_LatLonCountryMap->IsNearLatLon (lat_value, lon_value, 5.0, distance, country, province);

         if (guess) {

             if (distance < ErrorDistance(lat_value, lon_value, m_LatLonCountryMap->GetScale())) {

                 // close enough

                 id->SetGuessCountry(country);

                 id->SetGuessProvince(province);

                 id->SetFullGuess(guess->GetCountry());

             } else {

                 id->SetClaimedFull(guess->GetCountry());

                 id->SetClaimedDistance(m_LatLonCountryMap->AdjustAndRoundDistance (distance));

             }

         } else if (NStr::IsBlank(province)) {

             guess = m_LatLonWaterMap->IsNearLatLon (lat_value, lon_value, 5.0, distance, country, province);

             if (guess) {

                 id->SetClaimedFull(guess->GetCountry());

                 id->SetClaimedDistance(m_LatLonWaterMap->AdjustAndRoundDistance (distance));

             }

         }

     }


     return id;

 }


 typedef SStaticPair<const char*, const char*>  TWaterPairElem;

 static const TWaterPairElem k_water_pair_map[] = {

     {"Adriatic Sea",         "Mediterranean Sea"},

     {"Aegean Sea",           "Mediterranean Sea"},

     {"Alboran Sea",          "Mediterranean Sea"},

     {"Andaman Sea",          "Indian Ocean"},

     {"Arabian Sea",          "Indian Ocean"},

     {"Argentine Sea",        "Atlantic Ocean"},

     {"Ariake Sea",           "Pacific Ocean"},

     {"Baffin Bay",           "Atlantic Ocean"},

     {"Balearic Sea",         "Mediterranean Sea"},

     {"Baltic Sea",           "Atlantic Ocean"},

     {"Barents Sea",          "Arctic Ocean"},

     {"Bay of Bengal",        "Indian Ocean"},

     {"Beaufort Sea",         "Arctic Ocean"},

     {"Bering Sea",           "Pacific Ocean"},

     {"Bismarck Sea",         "Pacific Ocean"},

     {"Black Sea",            "Mediterranean Sea"},

     {"Bohai Sea",            "Pacific Ocean"},

     {"Caribbean Sea",        "Atlantic Ocean"},

     {"Celebes Sea",          "Pacific Ocean"},

     {"Champlain Sea",        "Atlantic Ocean"},

     {"Chilean Sea",          "Pacific Ocean"},

     {"China Seas",           "Pacific Ocean"},

     {"Chukchi Sea",          "Arctic Ocean"},

     {"Coral Sea",            "Pacific Ocean"},

     {"Davis Strait",         "Atlantic Ocean"},

     {"East China Sea",       "Pacific Ocean"},

     {"East Siberian Sea",    "Arctic Ocean"},

     {"English Channel",      "Atlantic Ocean"},

     {"Erythraean Sea",       "Indian Ocean"},

     {"Golfo de California",  "Pacific Ocean"},

     {"Greenland Sea",        "Arctic Ocean"},

     {"Gulf of Mexico",       "Atlantic Ocean"},

     {"Gulf of Thailand",     "Pacific Ocean"},

     {"Gulf of Tonkin",       "Pacific Ocean"},

     {"Hudson Bay",           "Arctic Ocean"},

     {"Ionian Sea",           "Mediterranean Sea"},

     {"Irish Sea",            "Atlantic Ocean"},

     {"Irminger Sea",         "Atlantic Ocean"},

     {"James Bay",            "Atlantic Ocean"},

     {"Java Sea",             "Indian Ocean"},

     {"Kara Sea",             "Arctic Ocean"},

     {"Koro Sea",             "Pacific Ocean"},

     {"Labrador Sea",         "Atlantic Ocean"},

     {"Laccadive Sea",        "Indian Ocean"},

     {"Laptev Sea",           "Arctic Ocean"},

     {"Ligurian Sea",         "Mediterranean Sea"},

     {"Lincoln Sea",          "Arctic Ocean"},

     {"Myrtoan Sea",          "Mediterranean Sea"},

     {"North Sea",            "Atlantic Ocean"},

     {"Norwegian Sea",        "Atlantic Ocean"},

     {"Pechora Sea",          "Arctic Ocean"},

     {"Persian Gulf",         "Indian Ocean"},

     {"Philippine Sea",       "Pacific Ocean"},

     {"Red Sea",              "Indian Ocean"},

     {"Salish Sea",           "Pacific Ocean"},

     {"Sargasso Sea",         "Atlantic Ocean"},

     {"Scotia Sea",           "Southern Ocean"},

     {"Sea of Azov",          "Black Sea"},

     {"Sea of Chiloe",        "Pacific Ocean"},

     {"Sea of Crete",         "Mediterranean Sea"},

     {"Sea of Japan",         "Pacific Ocean"},

     {"Sea of Okhotsk",       "Pacific Ocean"},

     {"Sea of the Hebrides",  "Atlantic Ocean"},

     {"Sea of Zanj",          "Indian Ocean"},

     {"Seas of Greenland",    "Atlantic Ocean"},

     {"Sethusamudram",        "Indian Ocean"},

     {"Sibutu Passage",       "Pacific Ocean"},

     {"Solomon Sea",          "Pacific Ocean"},

     {"South China Sea",      "Pacific Ocean"},

     {"Sulu Sea",             "Pacific Ocean"},

     {"Tasman Sea",           "Pacific Ocean"},

     {"Thracian Sea",         "Mediterranean Sea"},

     {"Timor Sea",            "Indian Ocean"},

     {"Tyrrhenian Sea",       "Mediterranean Sea"},

     {"Wandel Sea",           "Arctic Ocean"},

     {"White Sea",            "Arctic Ocean"},

     {"Yellow Sea",           "Pacific Ocean"}

 };

 typedef CStaticArrayMap<const char*, const char*, PNocase_CStr> TWaterPairMap;

 DEFINE_STATIC_ARRAY_MAP(TWaterPairMap, sc_WaterPairMap, k_water_pair_map);


 static string x_FindSurroundingOcean (string& water)


 {

     TWaterPairMap::const_iterator new_water_pair_iter = sc_WaterPairMap.find(water.c_str());

     if( new_water_pair_iter != sc_WaterPairMap.end() ) {

         return new_water_pair_iter->second;

     }

     return kEmptyStr;

 }


 string CSubSource::ValidateLatLonCountry (const string& input_countryname, string& lat_lon, bool check_state, ELatLonCountryErr& errcode)

 {

     errcode = eLatLonCountryErr_None;

     string countryname = input_countryname;

     if (NStr::IsBlank(countryname) || NStr::IsBlank(lat_lon)) {

         return kEmptyStr;

     }


     {

         static std::mutex m;


         std::lock_guard g(m);


         if ( m_LatLonCountryMap.get() == 0 ) {

             m_LatLonCountryMap.reset (new CLatLonCountryMap(false));

         }

         if ( m_LatLonWaterMap.get() == 0 ) {

             m_LatLonWaterMap.reset (new CLatLonCountryMap(true));

         }

     }


     // only do these checks if the latlon format is good

     bool format_correct, lat_in_range, lon_in_range, precision_correct;

     double lat_value = 0.0, lon_value = 0.0;

     CSubSource::IsCorrectLatLonFormat (lat_lon, format_correct, precision_correct,

                                lat_in_range, lon_in_range,

                                lat_value, lon_value);

     if (!format_correct) {

         // may have comma and then altitude, so just get lat_lon component */

         size_t pos = NStr::Find(lat_lon, ",", NStr::eNocase, NStr::eReverseSearch);

         if (pos != NPOS) {

             lat_lon = lat_lon.substr(0, pos);

             CSubSource::IsCorrectLatLonFormat (lat_lon, format_correct, precision_correct,

                                        lat_in_range, lon_in_range,

                                        lat_value, lon_value);

         }

     }


     // reality checks

     if (!format_correct || !lat_in_range || !lon_in_range) {

         // incorrect lat_lon format should be reported elsewhere

         // incorrect latitude range should be reported elsewhere

         // incorrect longitude range should be reported elsewhere

         return kEmptyStr;

     }


     // get rid of comments after semicolon or comma in country name

     size_t pos = NStr::Find(countryname, ";");

     if (pos != NPOS) {

          countryname = countryname.substr(0, pos);

         }

     pos = NStr::Find(countryname, ",");

     if (pos != NPOS) {

          countryname = countryname.substr(0, pos);

     }


     // adjust for special cases

     if (NStr::StartsWith(countryname, "Norway: Svalbard")) {

         countryname = "Svalbard";

     }


     string country = countryname;

     string province;

     pos = NStr::Find(country, ":");

     if (pos != NPOS) {

         // is the full string in the list?

         if (m_LatLonCountryMap->HaveLatLonForRegion(countryname)) {

             province = country.substr(pos + 1);

             NStr::TruncateSpacesInPlace(province, NStr::eTrunc_Both);

         }

         country = country.substr(0, pos);

         NStr::TruncateSpacesInPlace(country, NStr::eTrunc_Both);

     }

     if (NStr::IsBlank(country)) {

         return kEmptyStr;

     }


     // known exceptions - don't even bother calculating any further

     if (NStr::EqualNocase (country, "Antarctica") && lat_value < -60.0) {

         return kEmptyStr;

     }


     if (! NStr::IsBlank(province)) {

         // do not attempt quick exit

     } else if (m_LatLonCountryMap->HaveLatLonForRegion(country)) {

         if (m_LatLonCountryMap->IsCountryInLatLon(country, lat_value, lon_value)) {

             return kEmptyStr;

         }

     } else if (m_LatLonWaterMap->HaveLatLonForRegion(country)) {

         if (m_LatLonWaterMap->IsCountryInLatLon(country, lat_value, lon_value)) {

             return kEmptyStr;

         }

     } else if (NStr::EqualNocase (country, "State of Palestine")) {

     } else {

         // report unrecognized country

         return kEmptyStr;

     }


     CLatLonCountryId *id = x_CalculateLatLonId(lat_value, lon_value, country, province);

     CLatLonCountryId::TClassificationFlags flags = (id == NULL ? 0 : id->Classify(country, province));


     string wguess = id->GetGuessWater();

     string cguess = id->GetGuessCountry();


     // special case where subsection of country has been identified but is not in coordinates of country

     // VR-840

     if (province.empty() && NStr::Equal(cguess, country)) {

         delete id;

         return kEmptyStr;

     }


     if (NStr::EqualNocase (country, "State of Palestine") &&

         (NStr::EqualNocase (cguess, "Gaza Strip") ||

          NStr::EqualNocase (cguess, "West Bank"))) {

         delete id;

         return kEmptyStr;

     }


     if (NStr::IsBlank (cguess) && (! NStr::IsBlank (wguess))) {

         string parent = x_FindSurroundingOcean (wguess);

         if ((! NStr::IsBlank (parent)) && NStr::EqualNocase (country, parent)) {

             delete id;

             return kEmptyStr;

         }

     }


     double neardist = 0.0;

     CLatLonCountryMap::TLatLonAdjustFlags adjustment = CLatLonCountryMap::fNone;

     CLatLonCountryId::TClassificationFlags adjusted_flags = 0;


     if (!flags && m_LatLonCountryMap->IsNearLatLon(lat_value, lon_value, 2.0, neardist, country) && neardist < 5.0) {

         id->SetGuessCountry (country);

         id->SetGuessProvince (kEmptyStr);

         flags = id->Classify(country, province);

     }


     if (!flags && !m_LatLonCountryMap->IsNearLatLon(lat_value, lon_value, 20.0, neardist, country)

         && !m_LatLonWaterMap->IsNearLatLon(lat_value, lon_value, 20.0, neardist, country)) {

         /* do not flip from water */

         CLatLonCountryId *adjust_id = x_CalculateLatLonId(lon_value, lat_value, country, province);

         adjusted_flags = adjust_id == NULL ? 0 : adjust_id->Classify(country, province);

         if (adjusted_flags) {

             string awguess = adjust_id->GetGuessWater();

             string acguess = adjust_id->GetGuessCountry();

             if (NStr::IsBlank (awguess) && (! NStr::IsBlank (acguess))) {

                 delete id;

                 id = adjust_id;

                 flags = adjusted_flags;

                 adjustment = CLatLonCountryMap::fFlip;

             }

         } else {

             if (adjust_id) {

                 delete adjust_id;

             }

             adjust_id = x_CalculateLatLonId(-lat_value, lon_value, country, province);

             adjusted_flags = adjust_id == NULL ? 0 : adjust_id->Classify(country, province);

             if (adjusted_flags) {

                 string awguess = adjust_id->GetGuessWater();

                 string acguess = adjust_id->GetGuessCountry();

                 if (NStr::IsBlank (awguess) && (! NStr::IsBlank (acguess))) {

                     delete id;

                     id = adjust_id;

                     flags = adjusted_flags;

                     adjustment = CLatLonCountryMap::fNegateLat;

                 }

             } else {

                 if (adjust_id) {

                     delete adjust_id;

                 }

                 adjust_id = x_CalculateLatLonId(lat_value, -lon_value, country, province);

                 adjusted_flags = adjust_id == NULL ? 0 : adjust_id->Classify(country, province);

                 if (adjusted_flags) {

                     string awguess = adjust_id->GetGuessWater();

                     string acguess = adjust_id->GetGuessCountry();

                     if (NStr::IsBlank (awguess) && (! NStr::IsBlank (acguess))) {

                         delete id;

                         id = adjust_id;

                         flags = adjusted_flags;

                         adjustment = CLatLonCountryMap::fNegateLon;

                     }

                 } else {

                     if (adjust_id) {

                         delete adjust_id;

                     }

                 }

             }

         }

     }


     string error;


     if (adjustment != CLatLonCountryMap::fNone) {

         if (adjustment == CLatLonCountryMap::fFlip) {

             errcode = eLatLonCountryErr_Value;

             error = "Latitude and longitude values appear to be exchanged";

             lat_lon = MakeLatLon(lon_value, lat_value);

         } else if (adjustment == CLatLonCountryMap::fNegateLat) {

             errcode = eLatLonCountryErr_Value;

             if (lat_value < 0.0) {

                 error = "Latitude should be set to N (northern hemisphere)";

             } else {

                 error = "Latitude should be set to S (southern hemisphere)";

             }

             lat_lon = MakeLatLon(-lat_value, lon_value);

         } else if (adjustment == CLatLonCountryMap::fNegateLon) {

             errcode = eLatLonCountryErr_Value;

             if (lon_value < 0.0) {

                 error = "Longitude should be set to E (eastern hemisphere)";

             } else {

                 error = "Longitude should be set to W (western hemisphere)";

             }

             lat_lon = MakeLatLon(lat_value, -lon_value);

         }

     } else if ((flags & CLatLonCountryId::fCountryMatch) && (flags & CLatLonCountryId::fProvinceMatch)) {

         // success!  nothing to report

     } else if (flags & CLatLonCountryId::fWaterMatch) {

         // success!  nothing to report

     } else if (flags & CLatLonCountryId::fCountryMatch && NStr::IsBlank(province)) {

         if (check_state) {

             string full_guess = id->GetFullGuess();

             if (!NStr::Equal(full_guess, country)) {

                 errcode = eLatLonCountryErr_State;

                 error = "Lat_lon " + lat_lon + " is in " + id->GetFullGuess()

                     + " (more specific than " + country + ")";

             }

         }

     } else if (!NStr::IsBlank(id->GetGuessWater())) {

         if (flags & (CLatLonCountryId::fCountryClosest | CLatLonCountryId::fProvinceClosest)) {

             bool suppress = false;

             string reportregion;

             string nosubphrase;

             string desphrase = "designated subregion ";

             string subphrase = "another subregion ";

             string phrase = nosubphrase;

             bool show_claimed = false;


             if (id->GetLandDistance() < 100) {

                 // for now, will not report

                 // this is a policy decision

                 suppress = true;

             } else if (NStr::Find(countryname, "Island") != NPOS) {

                 suppress = true;

             }


             if (flags & CLatLonCountryId::fProvinceClosest) {

                 reportregion = countryname;

                 phrase = desphrase;

             } else {

                 // wasn't closest province, so must be closest country

                 if (!NStr::IsBlank(province) && check_state) {

                   phrase = subphrase;

                   reportregion = id->GetClosestFull();

                 } else {

                   reportregion = id->GetClosestCountry();

                 }

                 if (!NStr::IsBlank(id->GetClaimedFull())) {

                   show_claimed = true;

                 }

             }

             string water = id->GetGuessWater();

             if (NStr::EqualNocase (water, "Red Sea") &&

                (NStr::EqualNocase (reportregion, "Egypt") ||

                 NStr::EqualNocase (reportregion, "Saudi Arabia") ||

                 NStr::EqualNocase (reportregion, "Sudan") ||

                 NStr::EqualNocase (reportregion, "Eritrea") ||

                 NStr::EqualNocase (reportregion, "Dijibouti") ||

                 NStr::EqualNocase (reportregion, "Yemen") ||

                 NStr::EqualNocase (reportregion, "Israel") ||

                 NStr::EqualNocase (reportregion, "Jordan"))) {

             } else if (NStr::EqualNocase (water, "Gulf of Mexico") &&

                (NStr::EqualNocase (reportregion, "USA") ||

                 NStr::EqualNocase (reportregion, "Mexico"))) {

             } else if (!suppress) {

                 errcode = eLatLonCountryErr_Water;

                 if (show_claimed) {

                     error = "Lat_lon '" + lat_lon + "' is closest to " + phrase + "'" + reportregion + "' at distance "

                             + NStr::IntToString(id->GetLandDistance())

                             + " km, but in water '" + id->GetGuessWater()

                             + "' - claimed region '" + id->GetClaimedFull()

                             + "' is at distance " + NStr::IntToString(id->GetClaimedDistance()) + " km";

                 } else {

                     error = "Lat_lon '" + lat_lon + "' is closest to " + phrase + "'" + reportregion

                             + "' at distance " + NStr::IntToString(id->GetLandDistance()) + " km, but in water '"

                             + id->GetGuessWater() + "'";

                 }

             }

         } else if (neardist > 0.0) {

             errcode = eLatLonCountryErr_Water;

             error = "Lat_lon '" + lat_lon + "' is in water '" + id->GetGuessWater() + "', '"

                         + countryname + "' is " + NStr::IntToString(m_LatLonCountryMap->AdjustAndRoundDistance(neardist)) + " km away";

         } else {

             errcode = eLatLonCountryErr_Water;

             error = "Lat_lon '" + lat_lon + "' is in water '" + id->GetGuessWater() + "'";

         }

     } else if (!NStr::IsBlank(id->GetGuessCountry())) {

         string full_guess = id->GetFullGuess();

         if (NStr::EqualNocase (country, "China") && NStr::EqualNocase (full_guess, "Hong Kong")) {

             // skip

         } else if (NStr::IsBlank(id->GetClaimedFull())) {

             if (NStr::Equal(id->GetGuessCountry(), country) && !NStr::Equal(id->GetGuessProvince(), province)) {

                 errcode = eLatLonCountryErr_State;

             } else {

                 errcode = eLatLonCountryErr_Country;

             }

             error = "Lat_lon '" + lat_lon + "' maps to '" + id->GetFullGuess() + "' instead of '"

                         + countryname + "'";

         } else {

             if (NStr::IsBlank(province)) {

                 errcode = eLatLonCountryErr_Country;

                 error = "Lat_lon '" + lat_lon + "' maps to '" + id->GetFullGuess() + "' instead of '"

                             + country + "' - claimed region '" + id->GetClaimedFull()

                             + "' is at distance " + NStr::IntToString(id->GetClaimedDistance()) + " km";

             } else {

                 errcode = eLatLonCountryErr_Country;

                 if (NStr::EqualNocase(id->GetGuessCountry(), country)) {

                     errcode = eLatLonCountryErr_State;

                 }

                 if (errcode == eLatLonCountryErr_Country || check_state) {

                     error = "Lat_lon '" + lat_lon + "' maps to '" + id->GetFullGuess() + "' instead of '"

                                 + countryname + "' - claimed region '" + id->GetClaimedFull()

                                 + "' is at distance " + NStr::IntToString(id->GetClaimedDistance()) + " km";

                 } else {

                     errcode = eLatLonCountryErr_None;

                 }

             }

         }

     } else if (!NStr::IsBlank(id->GetClosestCountry())) {

         errcode = eLatLonCountryErr_Country;

         error = "Lat_lon '" + lat_lon + "' is closest to '" + id->GetClosestCountry() + "' instead of '"

                     + countryname + "'";

     } else if (!NStr::IsBlank(id->GetClosestWater())) {

         errcode = eLatLonCountryErr_Water;

         error = "Lat_lon '" + lat_lon + "' is closest to '" + id->GetClosestWater() + "' instead of '"

                     + countryname + "'";

     } else {

         errcode = eLatLonCountryErr_Country;

         error = "Unable to determine mapping for lat_lon '" + lat_lon + "' and country '" + countryname + "'";

     }


     delete id;

     return error;

 }


 const char* sm_ValidSexQualifierTokens[] = {

   "asexual",

   "bisexual",

   "diecious",

   "dioecious",

   "f",

   "female",

   "gelding",

   "hermaphrodite",

   "intersex",

   "m",

   "male",

   "mixed",

   "monecious",

   "monoecious",

   "neuter",

   "unisexual",

 };


 const char* sm_ValidSexQualifierPhrases[] = {

   "pooled males and females",

   "pooled male and female",

 };


 bool s_IsValidSexQualifierPhrase(const string& value)

 {

     size_t max = sizeof(sm_ValidSexQualifierPhrases) / sizeof(const char*);


     const char* *begin = sm_ValidSexQualifierPhrases;

     const char* *end = &(sm_ValidSexQualifierPhrases[max]);


     if (find(begin, end, value) != end) {

         return true;

     } else {

         return false;

     }

 }


 bool CSubSource::IsValidSexQualifierValue (const string& value)


 {

     string str = value;

     NStr::ToLower(str);


     if (s_IsValidSexQualifierPhrase(str)) {

         return true;

     }


     vector<string> words;

     NStr::Split(str, " ,/", words);

     if (words.size() == 0) {

         return false;

     }


     size_t max = sizeof(sm_ValidSexQualifierTokens) / sizeof(const char*);


     const char* *begin = sm_ValidSexQualifierTokens;

     const char* *end = &(sm_ValidSexQualifierTokens[max]);


     bool is_good = false;


     ITERATE(vector<string>, w, words) {

         if (NStr::Equal(*w, "and")) {

             // ok, skip it

         } else {

             if (find(begin, end, *w) != end) {

                 is_good = true;

             } else {

                 is_good = false;

                 break;

             }

         }

     }

     return is_good;

 }


 string CSubSource::FixSexQualifierValue (const string& value)

 {

     string str = value;

     NStr::ToLower(str);


     if (s_IsValidSexQualifierPhrase(str)) {

         return str;

     }


     vector<string> words;

     NStr::Split(str, " ,/", words);


     if (words.size() == 0) {

         return kEmptyStr;

     }

     size_t max = ArraySize(sm_ValidSexQualifierTokens);


     const char* *begin = sm_ValidSexQualifierTokens;

     const char* *end = &(sm_ValidSexQualifierTokens[max]);


     vector<string> good_values;

     bool pooled = false;


     ITERATE(vector<string>, w, words) {

         if (NStr::Equal(*w, "and")) {

             // ok, skip it

         } else if (NStr::EqualNocase(*w, "(pooled)") || NStr::EqualNocase(*w, "pooled")) {

             // set pooled flag

             pooled = true;

         } else {

             if (find(begin, end, *w) != end) {

                 if (NStr::Equal(*w, "m")) {

                     good_values.push_back("male");

                 } else if (NStr::Equal(*w, "f")) {

                     good_values.push_back("female");

                 } else {

                     good_values.push_back(*w);

                 }

             } else {

                 // if any bad values, can't autofix

                 return kEmptyStr;

             }

         }

     }

     if (good_values.size() == 0) {

         // no good tokens, can't autofix

         return kEmptyStr;

     }


     string fixed = good_values[0];

     for (size_t i = 1; i < good_values.size(); i++) {

         if (good_values.size() > 2) {

             fixed += ",";

         }

         if (i == good_values.size() - 1) {

             fixed += " and";

         }

         fixed += " " + good_values[i];

     }

     if (pooled) {

         fixed = "pooled " + fixed;

     }

     return fixed;

 }


 void s_CollectNumberAndUnits(const string& value, string& number, string& units)

 {

     number.clear();

     units.clear();


     if (NStr::IsBlank(value)) {

         return;

     }


     string::const_iterator it = value.begin();

     if (*it == '+' || *it == '-') {

         number += *it;

         it++;

     }


     bool any_digit = false;

     bool skip_comma = true;

     while (it != value.end() && (isdigit(*it) || *it == ',')) {

         if (*it == ',') {

             if (skip_comma) {

                 // only skip the first comma

                 skip_comma = false;

             } else {

                 break;

             }

         } else {

             any_digit = true;

             number += *it;

         }

         it++;

     }


     if (it == value.end()) {

         number.clear();

         return;

     }


     if (*it == '.') {

         number += *it;

         it++;

         while (it != value.end() && isdigit(*it)) {

             any_digit = true;

             number += *it;

             it++;

         }

     }


     if (it == value.end() || *it != ' ' || !any_digit) {

         number.clear();

         return;

     }


     it++;

     while (it != value.end()) {

         units += *it;

         it++;

     }

 }


 bool CSubSource::IsAltitudeValid (const string& value)

 {

     if (NStr::IsBlank(value)) {

         return false;

     }


     string number;

     string units;

     s_CollectNumberAndUnits(value, number, units);

     if (NStr::IsBlank(number) || !NStr::EqualCase(units, "m")) {

         return false;

     } else {

         return true;

     }


 }


 int CSubSource::x_GetPrecision(const string& num_str)

 {

     int precision = 0;

     size_t pos = NStr::Find(num_str, ".");

     if (pos != NPOS) {

         precision = int(num_str.length() - pos - 1);

     }

     return precision;

 }


 string CSubSource::x_FormatWithPrecision(double val, int precision)

 {

     char reformatted[1000];

     sprintf(reformatted, "%.*lf", precision, val);

     string rval = reformatted;

     return rval;

 }


 string CSubSource::FixAltitude (const string& value)

 {

     if (NStr::IsBlank(value)) {

         return kEmptyStr;

     }


     string number;

     string units;

     s_CollectNumberAndUnits(value, number, units);

     if (NStr::IsBlank(number)) {

         return kEmptyStr;

     } else if (NStr::Equal(units, "ft.") || NStr::Equal(units, "ft") || NStr::Equal(units, "feet") || NStr::Equal(units, "foot")) {

         int precision = x_GetPrecision(number);

         double val = NStr::StringToDouble(number);

         val *= 0.3048;

         number = x_FormatWithPrecision(val, precision);

         units = "m";

     }


     string rval = kEmptyStr;

     if (NStr::Equal(units, "m.")

         || NStr::Equal(units, "meters")

         || NStr::Equal(units, "meter")

         || NStr::Equal(units, "m")) {


         rval = number + " " + "m";

     }

     return rval;

 }


 // From VR-793:

 // A.    For segment, endogenous_virus_name:

 //   1.  Must begin with a letter or number

 //   2.  Spaces and other printable characters are permitted

 //   3.  Must not be empty, must not be longer than 240 characters


 bool CSubSource::x_GenericRepliconNameValid(const string& value)

 {

     if (NStr::IsBlank(value)) {

         return false;

     } else if (!isalnum(value.c_str()[0])) {

         return false;

     } else if (value.length() > 240) {

         return false;

     }


     for (auto it : value) {

         if (!isprint(it)) {

             return false;

         }

     }


     return true;

 }


 bool CSubSource::IsSegmentValid(const string& value)

 {

     return x_GenericRepliconNameValid(value);

 }


 bool CSubSource::IsEndogenousVirusNameValid(const string& value)

 {

     return x_GenericRepliconNameValid(value);

 }


 // From VR-793:

 // B.    For chromosome, linkage_group and plasmid_name values:

 //   4.  Must begin with a letter or number

 //   5.  Must not be empty, must not be longer than 32 characters

 //   6.  Must not contain <tab>

 //   7.  Spaces and other printable characters are permitted

 //   8.  Must not contain the word "plasmid" (ignoring case)

 //   9.  Must not contain the word "chromosome" (ignoring case)

 //   10. Must not contain the phrase "linkage group" (ignoring case)

 //   11. Must not contain the series of letters "chr" (ignoring case)

 //   12. Must not contain the taxname (ignoring case)

 //   14. Must not contain the genus (ignoring case)

 //   15. Must not contain the species (ignoring case)

 //       except allow the species to match the value after an initial 'p' (e.g., JX416328)

 //   16. Must not contain the series of letters "chrm" (ignoring case)

 //   17. Must not contain the series of letters "chrom" (ignoring case)

 //   18. Must not contain the phrase "linkage-group" (ignoring case)

 static bool s_FailsGenusOrSpeciesTest(const string& value, const string& taxname)

 { // See RW-1436

     if (NStr::IsBlank(taxname) ||

         NStr::StartsWith(taxname, "Plasmid ", NStr::eNocase) ||

         NStr::StartsWith(taxname, "IncQ plasmid", NStr::eNocase)) {

         return false;

     }


     size_t pos = NStr::Find(taxname, " ");

     if (pos != NPOS) {

         string genus = taxname.substr(0, pos);

         if (NStr::FindNoCase(value, genus) != NPOS) {

                 // B.14

                 return true;

         }

         string species = taxname.substr(pos + 1);


         pos = NStr::FindNoCase(value, species);

         if (pos != NPOS) {

             if (pos != 1 || value[0] != 'p') {

                 // B.15

                 return true;

             }

         }

     }


     return false;

 }


 bool CSubSource::x_MeetsCommonChromosomeLinkageGroupPlasmidNameRules(const string& value, const string& taxname)

 {

     if (NStr::FindNoCase(taxname, "Borrelia") != NPOS || NStr::FindNoCase(taxname, "Borreliella") != NPOS) {

         if (NStr::StartsWith(value, "cp") || NStr::StartsWith(value, "lp")) {

             return true;

         }

     }

     if (!x_GenericRepliconNameValid(value)) {

         // checks for isalnum start, blankness and unprintable characters

         // B.4, B.5, B.7

         return false;

     } else if (value.length() > 32) {

         // B.5

         return false;

     }


     if (s_FailsGenusOrSpeciesTest(value, taxname)) {

         return false;

     }


     static string s_ForbiddenPhrases[] = {

         "\t",  // B.6.

         "plasmid", // B.8

         "chromosome", // B.9

         "linkage group", // B.10

         "chr", // B.11

         "linkage_group", // B.15

         "chrm", // B.16

         "chrom", // B.17

         "linkage-group" // B.18

     };


     for (auto it : s_ForbiddenPhrases) {

         if (NStr::FindNoCase(value, it) != NPOS) {

             return false;

         }

     }

     return true;

 }


 bool CSubSource::IsChromosomeNameValid(const string& value, const string& taxname)

 {

     if (NStr::IsBlank(value)) {

         return false;

     }

     if (NStr::StartsWith(value, "LG", NStr::eNocase)) {

         return false;

     } else {

         return x_MeetsCommonChromosomeLinkageGroupPlasmidNameRules(value, taxname);

     }

 }


 bool CSubSource::IsLinkageGroupNameValid(const string& value, const string& taxname)

 {

     if (NStr::IsBlank(value)) {

         return false;

     }

     return x_MeetsCommonChromosomeLinkageGroupPlasmidNameRules(value, taxname);

 }


 // VR-793

 // C.    For plasmid_name values:

 //   19. Exception- megaplasmid is legal

 bool CSubSource::IsPlasmidNameValid(const string& value, const string& taxname)

 {

     if (NStr::IsBlank(value)) {

         return false;

     }

     if (NStr::Equal(value, "megaplasmid")) {

         return true;

     }

     if (NStr::StartsWith(value, "megaplasmid ") && value.length() > 12 && NStr::Find(value.substr(12), " ") == NPOS) {

         return true;

     }

     if (NStr::Equal(value, "F") || NStr::Equal(value, "F factor") || NStr::Equal(value, "F plasmid")) {

         return true;

     }


     if (NStr::FindNoCase(value,"plasmid") != NPOS) {

         static const set<string, PNocase_Conditional> s_PlasmidNameExceptions =

         { // This list comes from RW-1436/RW-1430

             "Plasmid F",

             "Plasmid R",

             "Plasmid pIP630",

             "Plasmid pNG2",

             "Plasmid pGT633",

             "Plasmid pE5",

             "Plasmid pIP1527",

             "Plasmid pAM77",

             "Plasmid pAZ1",

             "Plasmid RP4"

         };


         if (s_PlasmidNameExceptions.find(value) != end(s_PlasmidNameExceptions)) {

             return true;

         }

         return false;

     }


     return x_MeetsCommonChromosomeLinkageGroupPlasmidNameRules(value, taxname);

 }


 typedef pair<string, string> TContaminatingCellLine;

 typedef map<string, TContaminatingCellLine> TSpeciesContaminant;

 typedef map<string, TSpeciesContaminant> TCellLineContaminationMap;


 static TCellLineContaminationMap s_CellLineContaminationMap;

 static bool s_CellLineContaminationMapInitialized = false;

 DEFINE_STATIC_FAST_MUTEX(s_CellLineContaminationMutex);


 #include "cell_line.inc"


 static void s_ProcessCellLineLine(const CTempString& line)

 {

     vector<string> tokens;

     NStr::Split(line, "\t", tokens);

     if (tokens.size() < 4) {

         ERR_POST_X(1, Warning << "Not enough columns in cell_line entry " << line

                    << "; disregarding");

     } else {

         NStr::ToUpper(tokens[0]);

         (s_CellLineContaminationMap[tokens[0]])[tokens[1]] = TContaminatingCellLine(tokens[2], tokens[3]);

     }

 }


 static void s_InitializeCellLineContaminationMap(void)

 {

     CFastMutexGuard GUARD(s_CellLineContaminationMutex);

     if (s_CellLineContaminationMapInitialized) {

         return;

     }


     // read table


     size_t count = sizeof(kCellLine) / sizeof (*kCellLine);

     const char * const * start = kCellLine;

     while (count--) {

         s_ProcessCellLineLine(*start++);

     }


     s_CellLineContaminationMapInitialized = true;

 }


 string CSubSource::CheckCellLine(const string& cell_line, const string& organism)

 {

     string rval;


     s_InitializeCellLineContaminationMap();

     string cell_line_search = cell_line;

     NStr::ToUpper(cell_line_search);


     if (!NStr::IsBlank(((s_CellLineContaminationMap[cell_line_search])[organism]).first)) {

         rval = "The International Cell Line Authentication Committee database indicates that " +

                cell_line + " from " + organism + " is known to be contaminated by " +

                ((s_CellLineContaminationMap[cell_line_search])[organism]).first +

                " from " + ((s_CellLineContaminationMap[cell_line_search])[organism]).second +

                ". Please see http://iclac.org/databases/cross-contaminations/ for more information and references.";

     }

     return rval;

 }


 // =============================================================================

 //                                 Country Names

 // =============================================================================


 // legal country names, must be in alphabetical order (case sensitive)

 static const char* const s_Countries[] = {

     "Afghanistan",

     "Albania",

     "Algeria",

     "American Samoa",

     "Andorra",

     "Angola",

     "Anguilla",

     "Antarctica",

     "Antigua and Barbuda",

     "Arctic Ocean",

     "Argentina",

     "Armenia",

     "Aruba",

     "Ashmore and Cartier Islands",

     "Atlantic Ocean",

     "Australia",

     "Austria",

     "Azerbaijan",

     "Bahamas",

     "Bahrain",

     "Baker Island",

     "Baltic Sea",

     "Bangladesh",

     "Barbados",

     "Bassas da India",

     "Belarus",

     "Belgium",

     "Belize",

     "Benin",

     "Bermuda",

     "Bhutan",

     "Bolivia",

     "Borneo",

     "Bosnia and Herzegovina",

     "Botswana",

     "Bouvet Island",

     "Brazil",

     "British Virgin Islands",

     "Brunei",

     "Bulgaria",

     "Burkina Faso",

     "Burundi",

     "Cambodia",

     "Cameroon",

     "Canada",

     "Cape Verde",

     "Cayman Islands",

     "Central African Republic",

     "Chad",

     "Chile",

     "China",

     "Christmas Island",

     "Clipperton Island",

     "Cocos Islands",

     "Colombia",

     "Comoros",

     "Cook Islands",

     "Coral Sea Islands",

     "Costa Rica",

     "Cote d'Ivoire",

     "Croatia",

     "Cuba",

     "Curacao",

     "Cyprus",

     "Czechia",

     "Democratic Republic of the Congo",

     "Denmark",

     "Djibouti",

     "Dominica",

     "Dominican Republic",

     "Ecuador",

     "Egypt",

     "El Salvador",

     "Equatorial Guinea",

     "Eritrea",

     "Estonia",

     "Eswatini",

     "Ethiopia",

     "Europa Island",

     "Falkland Islands (Islas Malvinas)",

     "Faroe Islands",

     "Fiji",

     "Finland",

     "France",

     "French Guiana",

     "French Polynesia",

     "French Southern and Antarctic Lands",

     "Gabon",

     "Gambia",

     "Gaza Strip",

     "Georgia",

     "Germany",

     "Ghana",

     "Gibraltar",

     "Glorioso Islands",

     "Greece",

     "Greenland",

     "Grenada",

     "Guadeloupe",

     "Guam",

     "Guatemala",

     "Guernsey",

     "Guinea",

     "Guinea-Bissau",

     "Guyana",

     "Haiti",

     "Heard Island and McDonald Islands",

     "Honduras",

     "Hong Kong",

     "Howland Island",

     "Hungary",

     "Iceland",

     "India",

     "Indian Ocean",

     "Indonesia",

     "Iran",

     "Iraq",

     "Ireland",

     "Isle of Man",

     "Israel",

     "Italy",

     "Jamaica",

     "Jan Mayen",

     "Japan",

     "Jarvis Island",

     "Jersey",

     "Johnston Atoll",

     "Jordan",

     "Juan de Nova Island",

     "Kazakhstan",

     "Kenya",

     "Kerguelen Archipelago",

     "Kingman Reef",

     "Kiribati",

     "Kosovo",

     "Kuwait",

     "Kyrgyzstan",

     "Laos",

     "Latvia",

     "Lebanon",

     "Lesotho",

     "Liberia",

     "Libya",

     "Liechtenstein",

     "Line Islands",

     "Lithuania",

     "Luxembourg",

     "Macau",

     "Madagascar",

     "Malawi",

     "Malaysia",

     "Maldives",

     "Mali",

     "Malta",

     "Marshall Islands",

     "Martinique",

     "Mauritania",

     "Mauritius",

     "Mayotte",

     "Mediterranean Sea",

     "Mexico",

     "Micronesia, Federated States of",

     "Midway Islands",

     "Moldova",

     "Monaco",

     "Mongolia",

     "Montenegro",

     "Montserrat",

     "Morocco",

     "Mozambique",

     "Myanmar",

     "Namibia",

     "Nauru",

     "Navassa Island",

     "Nepal",

     "Netherlands",

     "New Caledonia",

     "New Zealand",

     "Nicaragua",

     "Niger",

     "Nigeria",

     "Niue",

     "Norfolk Island",

     "North Korea",

     "North Macedonia",

     "North Sea",

     "Northern Mariana Islands",

     "Norway",

     "Oman",

     "Pacific Ocean",

     "Pakistan",

     "Palau",

     "Palmyra Atoll",

     "Panama",

     "Papua New Guinea",

     "Paracel Islands",

     "Paraguay",

     "Peru",

     "Philippines",

     "Pitcairn Islands",

     "Poland",

     "Portugal",

     "Puerto Rico",

     "Qatar",

     "Republic of the Congo",

     "Reunion",

     "Romania",

     "Ross Sea",

     "Russia",

     "Rwanda",

     "Saint Barthelemy",

     "Saint Helena",

     "Saint Kitts and Nevis",

     "Saint Lucia",

     "Saint Martin",

     "Saint Pierre and Miquelon",

     "Saint Vincent and the Grenadines",

     "Samoa",

     "San Marino",

     "Sao Tome and Principe",

     "Saudi Arabia",

     "Senegal",

     "Serbia",

     "Seychelles",

     "Sierra Leone",

     "Singapore",

     "Sint Maarten",

     "Slovakia",

     "Slovenia",

     "Solomon Islands",

     "Somalia",

     "South Africa",

     "South Georgia and the South Sandwich Islands",

     "South Korea",

     "South Sudan",

     "Southern Ocean",

     "Spain",

     "Spratly Islands",

     "Sri Lanka",

     "State of Palestine",

     "Sudan",

     "Suriname",

     "Svalbard",

     "Sweden",

     "Switzerland",

     "Syria",

     "Taiwan",

     "Tajikistan",

     "Tanzania",

     "Tasman Sea",

     "Thailand",

     "Timor-Leste",

     "Togo",

     "Tokelau",

     "Tonga",

     "Trinidad and Tobago",

     "Tromelin Island",

     "Tunisia",

     "Turkey",

     "Turkmenistan",

     "Turks and Caicos Islands",

     "Tuvalu",

     "USA",

     "Uganda",

     "Ukraine",

     "United Arab Emirates",

     "United Kingdom",

     "Uruguay",

     "Uzbekistan",

     "Vanuatu",

     "Venezuela",

     "Viet Nam",

     "Virgin Islands",

     "Wake Island",

     "Wallis and Futuna",

     "West Bank",

     "Western Sahara",

     "Yemen",

     "Zambia",

     "Zimbabwe"

 };

 static const TCStrSet s_CountriesSet(s_Countries, sizeof(s_Countries), __FILE__, __LINE__);


 // former legal country names, must be in alphabetical order (case sensitive)

 static const char* const s_Former_Countries[] = {

     "Belgian Congo",

     "British Guiana",

     "Burma",

     "Czech Republic",

     "Czechoslovakia",

     "East Timor",

     "Korea",

     "Macedonia",

     "Micronesia",

     "Netherlands Antilles",

     "Serbia and Montenegro",

     "Siam",

     "Swaziland",

     "The former Yugoslav Republic of Macedonia",

     "USSR",

     "Yugoslavia",

     "Zaire"

 };

 static const TCStrSet s_Former_CountriesSet(s_Former_Countries, sizeof(s_Former_Countries), __FILE__, __LINE__);


 // null term exemption values, must be in alphabetical order (case sensitive)

 static const char* const s_Null_Countries[] = {

     "missing",

     "missing: control sample",

     "missing: data agreement established pre-2023",

     "missing: endangered species",

     "missing: human-identifiable",

     "missing: lab stock",

     "missing: sample group",

     "missing: synthetic construct",

     "missing: third party data",

     "not applicable",

     "not collected",

     "not provided",

     "restricted access"

 };

 static const TCStrSet s_Null_CountriesSet(s_Null_Countries, sizeof(s_Null_Countries), __FILE__, __LINE__);


 bool CCountries::IsValid(const string& country)

 {

     string name = country;

     size_t pos = country.find(':');


     if ( pos != NPOS ) {

         if (pos == country.length() - 1) {

             return false;

         }

         name = country.substr(0, pos);

     }


     // try current countries

     if (s_CountriesSet.find(name.c_str()) != s_CountriesSet.end()) {

         return true;

     } else if (s_Former_CountriesSet.find(name.c_str()) != s_Former_CountriesSet.end()) {

         return true;

     } else if (s_Null_CountriesSet.find(name.c_str()) != s_Null_CountriesSet.end()) {

         return true;

     } else {

         return false;

     }

 }


 bool CCountries::IsValid(const string& country, bool& is_miscapitalized)

 {

     string name = country;

     size_t pos = country.find(':');


     if ( pos != NPOS ) {

         name = country.substr(0, pos);

         if (pos == country.length() - 1) {

             return false;

         }

     }


     is_miscapitalized = false;

     // try current countries

     // fast check for properly capitalized

     if ( s_CountriesSet.find(name.c_str()) != s_CountriesSet.end() ) {

         return true;

     }

     if ( s_Former_CountriesSet.find(name.c_str()) != s_Former_CountriesSet.end() ) {

         return true;

     }

     if ( s_Null_CountriesSet.find(name.c_str()) != s_Null_CountriesSet.end() ) {

         return true;

     }

     // slow check for miscapitalized

     ITERATE ( TCStrSet, it, s_CountriesSet ) {

         if ( NStr::EqualNocase(name, *it) ) {

             is_miscapitalized = true;

             return true;

         }

     }

     ITERATE ( TCStrSet, it, s_Former_CountriesSet ) {

         if ( NStr::EqualNocase(name, *it) ) {

             is_miscapitalized = true;

             return true;

         }

     }

     ITERATE ( TCStrSet, it, s_Null_CountriesSet ) {

         if ( NStr::EqualNocase(name, *it) ) {

             is_miscapitalized = true;

             return true;

         }

     }


     return false;

 }


 bool CCountries::WasValid(const string& country)

 {

     string name = country;

     size_t pos = country.find(':');


     if ( pos != NPOS ) {

         name = country.substr(0, pos);

     }


     // try formerly-valid countries

     return s_Former_CountriesSet.find(name.c_str()) != s_Former_CountriesSet.end();

 }


 bool CCountries::WasValid(const string& country, bool& is_miscapitalized)

 {

     string name = country;

     size_t pos = country.find(':');


     if ( pos != NPOS ) {

         name = country.substr(0, pos);

     }


     is_miscapitalized = false;

     // try formerly-valid countries

     // fast check for properly capitalized

     if ( s_Former_CountriesSet.find(name.c_str()) != s_Former_CountriesSet.end() ) {

         return true;

     }

     // slow check for miscapitalized

     ITERATE ( TCStrSet, it, s_Former_CountriesSet ) {

         if ( NStr::EqualNocase(name, *it) ) {

             is_miscapitalized = true;

             return true;

         }

     }

     return false;

 }


 /////////////////////////////////////////////////////////////////////////////

 ////// Country Capitalization Fix ///////////////////////////////////////////


 static const SStaticPair<const char*, const char*> s_map_whole_country_fixes[] =

 {

   {"england", "United Kingdom: England"},

   {"great britain", "United Kingdom: Great Britain"},

   {"new jersey, usa", "USA: New Jersey"}

 };

 typedef CStaticPairArrayMap<const char*, const char*, PCase_CStr> TCStringPairsMap;

 DEFINE_STATIC_ARRAY_MAP(TCStringPairsMap, k_whole_country_fixes, s_map_whole_country_fixes);


 static const SStaticPair<const char*, const char*> s_map_country_name_fixes[] = {

 {"ABW", "Aruba"},

 {"AFG", "Afghanistan"},

 {"AGO", "Angola"},

 {"AIA", "Anguilla"},

 {"ALA", "Aland Islands"},

 {"ALB", "Albania"},

 {"AND", "Andorra"},

 {"ARE", "United Arab Emirates"},

 {"ARG", "Argentina"},

 {"ARM", "Armenia"},

 {"ASM", "American Samoa"},

 {"ATA", "Antarctica"},

 {"ATF", "French Southern Territories"},

 {"ATG", "Antigua and Barbuda"},

 {"AUS", "Australia"},

 {"AUT", "Austria"},

 {"AZE", "Azerbaijan"},

 {"Antigua & Barbuda", "Antigua and Barbuda"},

 {"Ashmore & Cartier Islands", "Ashmore and Cartier Islands"},

 {"BDI", "Burundi"},

 {"BEL", "Belgium"},

 {"BEN", "Benin"},

 {"BES", "Bonaire, Sint Eustatius and Saba"},

 {"BFA", "Burkina Faso"},

 {"BGD", "Bangladesh"},

 {"BGR", "Bulgaria"},

 {"BHR", "Bahrain"},

 {"BHS", "Bahamas"},

 {"BIH", "Bosnia and Herzegovina"},

 {"BLM", "Saint Barthelemy"},

 {"BLR", "Belarus"},

 {"BLZ", "Belize"},

 {"BMU", "Bermuda"},

 {"BOL", "Bolivia"},

 {"BRA", "Brazil"},

 {"BRB", "Barbados"},

 {"BRN", "Brunei"},

 {"BTN", "Bhutan"},

 {"BVT", "Bouvet Island"},

 {"BWA", "Botswana"},

 {"Brasil", "Brazil"},

 {"CAF", "Central African Republic"},

 {"CAN", "Canada"},

 {"CCK", "Cocos Islands"},

 {"CHE", "Switzerland"},

 {"CHL", "Chile"},

 {"CHN", "China"},

 {"CIV", "Cote d'Ivoire"},

 {"CMR", "Cameroon"},

 {"COD", "Democratic Republic of the Congo"},

 {"COG", "Republic of the Congo"},

 {"COK", "Cook Islands"},

 {"COL", "Colombia"},

 {"COM", "Comoros"},

 {"CPV", "Cape Verde"},

 {"CRI", "Costa Rica"},

 {"CUB", "Cuba"},

 {"CUW", "Curacao"},

 {"CXR", "Christmas Island"},

 {"CYM", "Cayman Islands"},

 {"CYP", "Cyprus"},

 {"CZE", "Czechia"},

 {"Cape Verde Islands", "Cape Verde"},

 {"DEU", "Germany"},

 {"DJI", "Djibouti"},

 {"DMA", "Dominica"},

 {"DNK", "Denmark"},

 {"DOM", "Dominican Republic"},

 {"DZA", "Algeria"},

 {"Democratic Republic of Congo", "Democratic Republic of the Congo"},

 {"ECU", "Ecuador"},

 {"EGY", "Egypt"},

 {"ERI", "Eritrea"},

 {"ESH", "Western Sahara"},

 {"ESP", "Spain"},

 {"EST", "Estonia"},

 {"ETH", "Ethiopia"},

 {"FIN", "Finland"},

 {"FJI", "Fiji"},

 {"FLK", "Falkland Islands (Islas Malvinas)"},

 {"FRA", "France"},

 {"FRO", "Faroe Islands"},

 {"FSM", "Micronesia, Federated States of"},

 {"Falkland Islands", "Falkland Islands (Islas Malvinas)"},

 {"French Southern & Antarctic Lands", "French Southern and Antarctic Lands"},

 {"GAB", "Gabon"},

 {"GBR", "United Kingdom"},

 {"GEO", "Georgia"},

 {"GGY", "Guernsey"},

 {"GHA", "Ghana"},

 {"GIB", "Gibraltar"},

 {"GIN", "Guinea"},

 {"GLP", "Guadeloupe"},

 {"GMB", "Gambia"},

 {"GNB", "Guinea-Bissau"},

 {"GNQ", "Equatorial Guinea"},

 {"GRC", "Greece"},

 {"GRD", "Grenada"},

 {"GRL", "Greenland"},

 {"GTM", "Guatemala"},

 {"GUF", "French Guiana"},

 {"GUM", "Guam"},

 {"GUY", "Guyana"},

 {"HKG", "Hong Kong"},

 {"HMD", "Heard Island and McDonald Islands"},

 {"HND", "Honduras"},

 {"HRV", "Croatia"},

 {"HTI", "Haiti"},

 {"HUN", "Hungary"},

 {"Heard Island & McDonald Islands", "Heard Island and McDonald Islands"},

 {"IDN", "Indonesia"},

 {"IMN", "Isle of Man"},

 {"IND", "India"},

 {"IOT", "British Indian Ocean Territory"},

 {"IRL", "Ireland"},

 {"IRN", "Iran"},

 {"IRQ", "Iraq"},

 {"ISL", "Iceland"},

 {"ISR", "Israel"},

 {"ITA", "Italy"},

 {"Ivory Coast", "Cote d'Ivoire"},

 {"JAM", "Jamaica"},

 {"JEY", "Jersey"},

 {"JOR", "Jordan"},

 {"JPN", "Japan"},

 {"KAZ", "Kazakhstan"},

 {"KEN", "Kenya"},

 {"KGZ", "Kyrgyzstan"},

 {"KHM", "Cambodia"},

 {"KIR", "Kiribati"},

 {"KNA", "Saint Kitts and Nevis"},

 {"KOR", "South Korea"},

 {"KWT", "Kuwait"},

 {"LAO", "Lao People's Democratic Republic"},

 {"LBN", "Lebanon"},

 {"LBR", "Liberia"},

 {"LBY", "Libyan Arab Jamahiriya"},

 {"LCA", "Saint Lucia"},

 {"LIE", "Liechtenstein"},

 {"LKA", "Sri Lanka"},

 {"LSO", "Lesotho"},

 {"LTU", "Lithuania"},

 {"LUX", "Luxembourg"},

 {"LVA", "Latvia"},

 {"La Reunion Island", "Reunion"},

 {"Luxemburg", "Luxembourg"},

 {"MAC", "Macao"},

 {"MAF", "Saint Martin (French part)"},

 {"MAR", "Morocco"},

 {"MCO", "Monaco"},

 {"MDA", "Moldova"},

 {"MDG", "Madagascar"},

 {"MDV", "Maldives"},

 {"MEX", "Mexico"},

 {"MHL", "Marshall Islands"},

 {"MKD", "North Macedonia"},

 {"MLI", "Mali"},

 {"MLT", "Malta"},

 {"MMR", "Myanmar"},

 {"MNE", "Montenegro"},

 {"MNG", "Mongolia"},

 {"MNP", "Northern Mariana Islands"},

 {"MOZ", "Mozambique"},

 {"MRT", "Mauritania"},

 {"MSR", "Montserrat"},

 {"MTQ", "Martinique"},

 {"MUS", "Mauritius"},

 {"MWI", "Malawi"},

 {"MYS", "Malaysia"},

 {"MYT", "Mayotte"},

 {"Macedonia", "North Macedonia"},

 {"NAM", "Namibia"},

 {"NCL", "New Caledonia"},

 {"NER", "Niger"},

 {"NFK", "Norfolk Island"},

 {"NGA", "Nigeria"},

 {"NIC", "Nicaragua"},

 {"NIU", "Niue"},

 {"NLD", "Netherlands"},

 {"NOR", "Norway"},

 {"NPL", "Nepal"},

 {"NRU", "Nauru"},

 {"NZL", "New Zealand"},

 {"Netherland", "Netherlands"},

 {"New Guinea", "Papua New Guinea"},

 {"OMN", "Oman"},

 {"P, R, China", "China"},

 {"P.R. China", "China"},

 {"P.R.China", "China"},

 {"PAK", "Pakistan"},

 {"PAN", "Panama"},

 {"PCN", "Pitcairn"},

 {"PER", "Peru"},

 {"PHL", "Philippines"},

 {"PLW", "Palau"},

 {"PNG", "Papua New Guinea"},

 {"POL", "Poland"},

 {"PRI", "Puerto Rico"},

 {"PRK", "North Korea"},

 {"PRT", "Portugal"},

 {"PRY", "Paraguay"},

 {"PSE", "Palestinian Territory"},

 {"PYF", "French Polynesia"},

 {"People's Republic of China", "China"},

 {"Pr China", "China"},

 {"Prchina", "China"},

 {"QAT", "Qatar"},

 {"REU", "Reunion"},

 {"ROU", "Romania"},

 {"RUS", "Russia"},

 {"RWA", "Rwanda"},

 {"Republic of Congo", "Republic of the Congo"},

 {"SAU", "Saudi Arabia"},

 {"SDN", "Sudan"},

 {"SEN", "Senegal"},

 {"SGP", "Singapore"},

 {"SGS", "South Georgia and the South Sandwich Islands"},

 {"SHN", "Saint Helena"},

 {"SJM", "Svalbard and Jan Mayen"},

 {"SLB", "Solomon Islands"},

 {"SLE", "Sierra Leone"},

 {"SLV", "El Salvador"},

 {"SMR", "San Marino"},

 {"SOM", "Somalia"},

 {"SPM", "Saint Pierre and Miquelon"},

 {"SRB", "Serbia"},

 {"SSD", "South Sudan"},

 {"STP", "Sao Tome and Principe"},

 {"SUR", "Suriname"},

 {"SVK", "Slovakia"},

 {"SVN", "Slovenia"},

 {"SWE", "Sweden"},

 {"SWZ", "Eswatini"},

 {"SXM", "Sint Maarten (Dutch part)"},

 {"SYC", "Seychelles"},

 {"SYR", "Syrian Arab Republic"},

 {"Saint Kitts & Nevis", "Saint Kitts and Nevis"},

 {"Saint Pierre & Miquelon", "Saint Pierre and Miquelon"},

 {"Saint Vincent & Grenadines", "Saint Vincent and the Grenadines"},

 {"Saint Vincent & the Grenadines", "Saint Vincent and the Grenadines"},

 {"Saint Vincent and Grenadines", "Saint Vincent and the Grenadines"},

 {"San Tome and Principe Island", "Sao Tome and Principe"},

 {"Sao Tome & Principe", "Sao Tome and Principe"},

 {"South Georgia & South Sandwich Islands", "South Georgia and the South Sandwich Islands"},

 {"South Georgia & the South Sandwich Islands", "South Georgia and the South Sandwich Islands"},

 {"St Helena", "Saint Helena"},

 {"St Lucia", "Saint Lucia"},

 {"St Pierre and Miquelon", "Saint Pierre and Miquelon"},

 {"St Vincent and the Grenadines", "Saint Vincent and the Grenadines"},

 {"St. Helena", "Saint Helena"},

 {"St. Lucia", "Saint Lucia"},

 {"St. Pierre and Miquelon", "Saint Pierre and Miquelon"},

 {"St. Vincent and the Grenadines", "Saint Vincent and the Grenadines"},

 {"TCA", "Turks and Caicos Islands"},

 {"TCD", "Chad"},

 {"TGO", "Togo"},

 {"THA", "Thailand"},

 {"TJK", "Tajikistan"},

 {"TKL", "Tokelau"},

 {"TKM", "Turkmenistan"},

 {"TLS", "Timor-Leste"},

 {"TON", "Tonga"},

 {"TTO", "Trinidad and Tobago"},

 {"TUN", "Tunisia"},

 {"TUR", "Turkey"},

 {"TUV", "Tuvalu"},

 {"TWN", "Taiwan"},

 {"TZA", "Tanzania"},

 {"The Netherlands", "Netherlands"},

 {"Trinidad & Tobago", "Trinidad and Tobago"},

 {"Turks & Caicos", "Turks and Caicos Islands"},

 {"Turks & Caicos Islands", "Turks and Caicos Islands"},

 {"Turks and Caicos", "Turks and Caicos Islands"},

 {"U.S.A.", "USA"},

 {"UGA", "Uganda"},

 {"UK", "United Kingdom"},

 {"UKR", "Ukraine"},

 {"UMI", "United States Minor Outlying Islands"},

 {"URY", "Uruguay"},

 {"UZB", "Uzbekistan"},

 {"United States", "USA"},

 {"United States of America", "USA"},

 {"VAT", "Holy See (Vatican City State)"},

 {"VCT", "Saint Vincent and the Grenadines"},

 {"VEN", "Venezuela"},

 {"VGB", "British Virgin Islands"},

 {"VIR", "Virgin Islands"},

 {"VNM", "Viet Nam"},

 {"VUT", "Vanuatu"},

 {"Vietnam", "Viet Nam"},

 {"WLF", "Wallis and Futuna"},

 {"WSM", "Samoa"},

 {"YEM", "Yemen"},

 {"ZAF", "South Africa"},

 {"ZMB", "Zambia"},

 {"ZWE", "Zimbabwe"},

 {"the Netherlands", "Netherlands"}

 };


 DEFINE_STATIC_ARRAY_MAP(TCStringPairsMap,k_country_name_fixes, s_map_country_name_fixes);


 // for GP-24841

 static const SStaticPair<const char*, const char*> s_map_old_country_name_fixes[] = {

 {"Burma", "Myanmar"},

 {"Siam", "Thailand"}

 };

 DEFINE_STATIC_ARRAY_MAP(TCStringPairsMap,k_old_country_name_fixes, s_map_old_country_name_fixes);


 // for GB-7408

 static const SStaticPair<const char*, const char*> s_map_subregion_fixes[] = {

 {"Antigua", "Antigua and Barbuda: Antigua"},

 {"Ashmore Island", "Ashmore and Cartier Islands: Ashmore Island"},

 {"Autonomous Region of the Azores", "Portugal: Azores"},

 {"Azores", "Portugal: Azores"},

 {"Barbuda", "Antigua and Barbuda: Barbuda"},

 {"Bassas da India", "French Southern and Antarctic Lands: Bassas da India"},

 {"Caicos Islands", "Turks and Caicos Islands: Caicos Islands"},

 {"Canary Islands", "Spain: Canary Islands"},

 {"Cartier Island", "Ashmore and Cartier Islands: Cartier Island"},

 {"East Germany", "Germany: East Germany"},

 {"El Hierro", "Spain: El Hierro"},

 {"Europa Island", "French Southern and Antarctic Lands: Europa Island"},

 {"Fuerteventura", "Spain: Fuerteventura"},

 {"Glorioso Islands", "French Southern and Antarctic Lands: Glorioso Islands"},

 {"Gran Canaria", "Spain: Gran Canaria"},

 {"Grenadines", "Saint Vincent and the Grenadines: Grenadines"},

 {"Heard Island", "Heard Island and McDonald Islands: Heard Island"},

 {"Ile Amsterdam", "French Southern and Antarctic Lands: Ile Amsterdam"},

 {"Ile Saint-Paul", "French Southern and Antarctic Lands: Ile Saint-Paul"},

 {"Iles Crozet", "French Southern and Antarctic Lands: Iles Crozet"},

 {"Iles Kerguelen", "French Southern and Antarctic Lands: Iles Kerguelen"},

 {"Juan de Nova Island", "French Southern and Antarctic Lands: Juan de Nova Island"},

 {"La Gomera", "Spain: La Gomera"},

 {"La Graciosa", "Spain: La Graciosa"},

 {"La Palma", "Spain: La Palma"},

 {"Lanzarote", "Spain: Lanzarote"},

 {"Madeira", "Portugal: Madeira"},

 {"McDonald Island", "Heard Island and McDonald Islands: McDonald Island"},

 {"McDonald Islands", "Heard Island and McDonald Islands: McDonald Islands"},

 {"Miquelon", "Saint Pierre and Miquelon: Miquelon"},

 {"Nevis", "Saint Kitts and Nevis: Nevis"},

 {"Principe", "Sao Tome and Principe: Principe"},

 {"Saint Kitts", "Saint Kitts and Nevis: Saint Kitts"},

 {"Saint Pierre", "Saint Pierre and Miquelon: Saint Pierre"},

 {"Saint Vincent", "Saint Vincent and the Grenadines: Saint Vincent"},

 {"Sao Tome", "Sao Tome and Principe: Sao Tome"},

 {"Scotland", "United Kingdom: Scotland"},

 {"South Sandwich Islands", "South Georgia and the South Sandwich Islands: South Sandwich Islands"},

 {"St Kitts", "Saint Kitts and Nevis: Saint Kitts"},

 {"St Pierre", "Saint Pierre and Miquelon: Saint Pierre"},

 {"St Thomas", "USA: Saint Thomas"},

 {"St Vincent", "Saint Vincent and the Grenadines: Saint Vincent"},

 {"St. Kitts", "Saint Kitts and Nevis: Saint Kitts"},

 {"St. Pierre", "Saint Pierre and Miquelon: Saint Pierre"},

 {"St. Thomas", "USA: Saint Thomas"},

 {"St. Vincent", "Saint Vincent and the Grenadines: Saint Vincent"},

 {"Tenerife", "Spain: Tenerife"},

 {"Tobago", "Trinidad and Tobago: Tobago"},

 {"Trinidad", "Trinidad and Tobago: Trinidad"},

 {"Tromelin Island", "French Southern and Antarctic Lands: Tromelin Island"},

 {"Turks Islands", "Turks and Caicos Islands: Turks Islands"},

 {"Wales", "United Kingdom: Wales"},

 {"West Germany", "Germany: West Germany"},


 };

 DEFINE_STATIC_ARRAY_MAP(TCStringPairsMap,k_subregion_fixes, s_map_subregion_fixes);


 static const char* s_USAStates[] = {

     "Alabama",

     "Alaska",

     "Arizona",

     "Arkansas",

     "California",

     "Colorado",

     "Connecticut",

     "Delaware",

     "District of Columbia",

     "Florida",

     "Georgia",

     "Hawaii",

     "Idaho",

     "Illinois",

     "Indiana",

     "Iowa",

     "Kansas",

     "Kentucky",

     "Louisiana",

     "Maine",

     "Maryland",

     "Massachusetts",

     "Michigan",

     "Minnesota",

     "Mississippi",

     "Missouri",

     "Montana",

     "Nebraska",

     "Nevada",

     "New Hampshire",

     "New Jersey",

     "New Mexico",

     "New York",

     "North Carolina",

     "North Dakota",

     "Ohio",

     "Oklahoma",

     "Oregon",

     "Pennsylvania",

     "Rhode Island",

     "South Carolina",

     "South Dakota",

     "Tennessee",

     "Texas",

     "Utah",

     "Vermont",

     "Virginia",

     "Washington",

     "West Virginia",

     "Wisconsin",

     "Wyoming"

 };


 string CCountries::CapitalizeFirstLetterOfEveryWord (const string &phrase)

 {

     vector<string> words;

     NStr::Split(phrase, " \t\r\n", words);

     for(vector<string>::iterator word = words.begin(); word != words.end(); ++word)

         if (!word->empty() && isalpha(word->at(0)))

             word->at(0) = (unsigned char)toupper(word->at(0));

     return NStr::Join(words," ");

 }


 string CCountries::WholeCountryFix(string country)

 {

     string new_country;

     TCStringPairsMap::const_iterator found = k_whole_country_fixes.find(NStr::ToLower(country).c_str());

     if (found != k_whole_country_fixes.end()) {

         new_country = found->second;

         return new_country;

     }


     const size_t num_states = sizeof(s_USAStates) / sizeof(s_USAStates[0]);

     for (size_t i = 0; i < num_states; ++i) {

         if (NStr::EqualNocase(s_USAStates[i], country)) {

             new_country = "USA: " + CTempString(s_USAStates[i]);

             break;

         }

     }


     return new_country;

 }


 bool CCountries::IsSubstringOfStringInList(const string& phrase, const string& country1, size_t pos1)

 {

     bool r = false;

     ITERATE ( TCStrSet, c, s_CountriesSet )

     {

         string country2(*c);

         if (country2.length() > country1.length() && NStr::FindNoCase(country2,country1) != NPOS)

         {

             SIZE_TYPE pos2 = NStr::FindNoCase(phrase,country2);

             while (pos2 != NPOS)

             {

                 if (pos2 <= pos1 && pos2+country2.length() >= pos1+country1.length())

                     r = true;

                 pos2 = NStr::FindNoCase(phrase,country2,pos2+country2.length());

             }

         }

     }

     return r;

 }


 bool CCountries::ContainsMultipleCountryNames (const string &phrase)

 {

     int num_matches = 0;

     ITERATE ( TCStrSet, c, s_CountriesSet )

     {

         string country(*c);

         size_t pos = NStr::FindNoCase(phrase,country);

         while (pos != NPOS)

         {

             if (!((pos+country.length()<phrase.length() && isalpha(phrase[pos+country.length()]))

                   || (pos > 0 && isalpha(phrase[pos-1]))

                   || IsSubstringOfStringInList(phrase,country,pos)))

                 num_matches++;

             pos = NStr::FindNoCase(phrase,country,pos+country.length());

         }


     }

     return (num_matches > 1);

 }


 string CCountries::GetCorrectedCountryCapitalization(const string& country)

 {

     string output = country;

     ITERATE ( TCStrSet, it, s_CountriesSet ) {

         if ( NStr::EqualNocase(country, *it) ) {

             output = *it;

         }

     }

     return output;

 }


 void CCountries::x_RemoveDelimitersFromEnds(string& val, bool except_paren)

 {

     NStr::TruncateSpacesInPlace(val);

     bool any_found = true;

     while (!val.empty() && any_found) {

         any_found = false;

         if (NStr::StartsWith(val, ",")

             || NStr::StartsWith(val, ":")

             || NStr::StartsWith(val, ".")

             || (!except_paren && NStr::StartsWith(val, ")"))) {

             val = val.substr(1);

             any_found = true;

             NStr::TruncateSpacesInPlace(val);

         } else if (NStr::EndsWith(val, ",")

             || NStr::EndsWith(val, ":")

             || (!except_paren && NStr::EndsWith(val, "("))) {

             val = val.substr(0, val.length() - 1);

             any_found = true;

             NStr::TruncateSpacesInPlace(val);

         } else if (NStr::EndsWith(val, "the") && val.length() > 3 && !isalpha((unsigned char)val[val.length() - 4])) {

             val = val.substr(0, val.length() - 4);

             any_found = true;

         } else if (NStr::EndsWith(val, ".")) {

             size_t len = val.length();

             if (len > 1 && isspace((unsigned char)val[len - 2])) {

                 val = val.substr(0, val.length() - 1);

                 any_found = true;

                 NStr::TruncateSpacesInPlace(val);

             } else if (len > 5) {

                 // make sure no spaces or punctuation within 4 characters before '.'

                 bool do_remove = true;

                 size_t pos = val.length() - 2;

                 size_t dist = 0;

                 while (dist < 4 && do_remove) {

                     if (isspace((unsigned char)val[pos]) || ispunct((unsigned char)val[pos])) {

                         do_remove = false;

                     }

                     pos--;

                     dist++;

                 }

                 if (do_remove) {

                     val = val.substr(0, val.length() - 1);

                     any_found = true;

                 }

             }

         }

     }

 }


 vector<string> CCountries::x_Tokenize(const string& val)

 {

     vector<string> tokens;

     NStr::Split(val, ",:()", tokens);

     // special tokenizing - if tokens contain periods but resulting token is at least four characters long

     vector<string>::iterator it = tokens.begin();

     while (it != tokens.end()) {

         size_t pos = NStr::Find(*it, ".");

         if (pos != NPOS  &&  pos > 3 && (*it).length() - pos > 4) {

             string first = (*it).substr(0, pos);

             string remainder = (*it).substr(pos + 1);

             size_t space_pos = NStr::Find(first, " ");

             size_t len_to_space = first.length();

             while (space_pos != NPOS) {

                 first = first.substr(space_pos + 1);

                 len_to_space = first.length();

                 space_pos = NStr::Find(first, " ");

             }

             if (len_to_space > 4) {

                 (*it) = (*it).substr(0, pos);

                 it = tokens.insert(it, remainder);

             } else {

                 it++;

             }

         } else {

             it++;

         }

     }

     return tokens;

 }


 bool s_ContainsWholeWord(const CTempString test, const CTempString word, NStr::ECase case_sense)

 {

     size_t start = 0;

     size_t tlen = test.length();

     size_t wlen = word.length();


     size_t pos = NStr::Find(test, word, case_sense);

     while (pos != NPOS) {

         size_t p = start + pos;

         if ( (p == 0           || !isalpha((unsigned char)test[p - 1]))  &&

              (p + wlen >= tlen || !isalpha((unsigned char)test[p + wlen])) ) {

             return true;

         }

         start = p + 1;

         pos = NStr::Find(CTempString(test, start, tlen - start), word, case_sense);

     }

     return false;

 }


 bool s_SuppressCountryFix(const string& test)

 {

     if (s_ContainsWholeWord(test, "Sea", NStr::eNocase)) {

         return true;

     } else if (s_ContainsWholeWord(test, "USSR", NStr::eNocase)) {

         return true;

     }

     return false;

 }


 void CCountries::x_FindCountryName

 (const TCStringPairsMap& fix_map,

  const vector<string>& countries,

  string& valid_country,

  string& orig_valid_country,

  bool& too_many_countries,

  bool& bad_cap)

 {

     for (auto country : countries) {

         if (!country.empty() && !too_many_countries)

         {

             string check = country;

             NStr::TruncateSpacesInPlace(check);

             x_RemoveDelimitersFromEnds(check);


             bool check_has_bad_cap = false;

             if (IsValid(check,check_has_bad_cap))

             {

                 if (valid_country.empty())

                 {

                     valid_country = check;

                     orig_valid_country = check;

                     bad_cap = check_has_bad_cap;

                 }

                 else

                 {

                     too_many_countries = true;

                 }

             }

             else // see if this is a fixable country

             {

                 TCStringPairsMap::const_iterator found = fix_map.find(check.c_str());

                 if (found != fix_map.end())

                 {

                     if (valid_country.empty())

                     {

                         valid_country = found->second;

                         orig_valid_country = check;

                     }

                     else

                     {

                         too_many_countries = true;

                     }

                 }

             }

         }

     }

 }


 // start of RW-1278


 bool s_CompressRunsOfSpaces(string& val)

 {

     if (val.length() == 0) return false;


     char * str = new char[sizeof(char) * (val.length() + 1)];

     strcpy(str, val.c_str());


     unsigned char    ch;    /* to use 8bit characters in multibyte languages */

     unsigned char    pv;    /* to use 8bit characters in multibyte languages */

     char *  dst;

     char *  ptr;


     dst = str;

     ptr = str;

     ch = *ptr;

     pv = '\0';

     while (ch != '\0') {

         *dst = ch;

         dst++;

         ptr++;

         pv = ch;

         ch = *ptr;

         if (pv == ' ') {

             while (ch == ' ') {

               ptr++;

               ch = *ptr;

             }

             pv = '\0';

         }

     }

     if (dst != NULL) {

         *dst = '\0';

     }


     string new_val;

     new_val = str;

     delete[] str;


     if (!NStr::Equal(val, new_val)) {

         val = new_val;

         return true;

     }

     else {

         return false;

     }

 }


 typedef SStaticPair<const char*, const char*> TParishMapEntry;

 static const TParishMapEntry parish_abbrev_array[] = {

     { "Acadia Parish",               "Acadia Parish"               },

     { "AcadiaParish",                "Acadia Parish"               },

     { "Allen Parish",                "Allen Parish"                },

     { "AllenParish",                 "Allen Parish"                },

     { "Ascension Parish",            "Ascension Parish"            },

     { "AscensionParish",             "Ascension Parish"            },

     { "Assumption Parish",           "Assumption Parish"           },

     { "AssumptionParish",            "Assumption Parish"           },

     { "Avoyelles Parish",            "Avoyelles Parish"            },

     { "AvoyellesParish",             "Avoyelles Parish"            },

     { "Beauregard Parish",           "Beauregard Parish"           },

     { "BeauregardParish",            "Beauregard Parish"           },

     { "Bienville Parish",            "Bienville Parish"            },

     { "BienvilleParish",             "Bienville Parish"            },

     { "Bossier Parish",              "Bossier Parish"              },

     { "BossierParish",               "Bossier Parish"              },

     { "Caddo Parish",                "Caddo Parish"                },

     { "CaddoParish",                 "Caddo Parish"                },

     { "Calcasieu Parish",            "Calcasieu Parish"            },

     { "CalcasieuParish",             "Calcasieu Parish"            },

     { "Caldwell Parish",             "Caldwell Parish"             },

     { "CaldwellParish",              "Caldwell Parish"             },

     { "Cameron Parish",              "Cameron Parish"              },

     { "CameronParish",               "Cameron Parish"              },

     { "Catahoula Parish",            "Catahoula Parish"            },

     { "CatahoulaParish",             "Catahoula Parish"            },

     { "Claiborne Parish",            "Claiborne Parish"            },

     { "ClaiborneParish",             "Claiborne Parish"            },

     { "Concordia Parish",            "Concordia Parish"            },

     { "ConcordiaParish",             "Concordia Parish"            },

     { "DeSoto Parish",               "DeSoto Parish"               },

     { "DeSotoParish",                "DeSoto Parish"               },

     { "East Baton Rouge Parish",     "East Baton Rouge Parish"     },

     { "East Carroll Parish",         "East Carroll Parish"         },

     { "East Feliciana Parish",       "East Feliciana Parish"       },

     { "EastBatonRougeParish",        "East Baton Rouge Parish"     },

     { "EastCarrollParish",           "East Carroll Parish"         },

     { "EastFelicianaParish",         "East Feliciana Parish"       },

     { "Evangeline Parish",           "Evangeline Parish"           },

     { "EvangelineParish",            "Evangeline Parish"           },

     { "Franklin Parish",             "Franklin Parish"             },

     { "FranklinParish",              "Franklin Parish"             },

     { "Grant Parish",                "Grant Parish"                },

     { "GrantParish",                 "Grant Parish"                },

     { "Iberia Parish",               "Iberia Parish"               },

     { "IberiaParish",                "Iberia Parish"               },

     { "Iberville Parish",            "Iberville Parish"            },

     { "IbervilleParish",             "Iberville Parish"            },

     { "Jackson Parish",              "Jackson Parish"              },

     { "JacksonParish",               "Jackson Parish"              },

     { "Jefferson Davis Parish",      "Jefferson Davis Parish"      },

     { "Jefferson Parish",            "Jefferson Parish"            },

     { "JeffersonDavisParish",        "Jefferson Davis Parish"      },

     { "JeffersonParish",             "Jefferson Parish"            },

     { "Lafayette Parish",            "Lafayette Parish"            },

     { "LafayetteParish",             "Lafayette Parish"            },

     { "Lafourche Parish",            "Lafourche Parish"            },

     { "LafourcheParish",             "Lafourche Parish"            },

     { "LaSalle Parish",              "LaSalle Parish"              },

     { "LaSalleParish",               "LaSalle Parish"              },

     { "Lincoln Parish",              "Lincoln Parish"              },

     { "LincolnParish",               "Lincoln Parish"              },

     { "Livingston Parish",           "Livingston Parish"           },

     { "LivingstonParish",            "Livingston Parish"           },

     { "Madison Parish",              "Madison Parish"              },

     { "MadisonParish",               "Madison Parish"              },

     { "Morehouse Parish",            "Morehouse Parish"            },

     { "MorehouseParish",             "Morehouse Parish"            },

     { "Natchitoches Parish",         "Natchitoches Parish"         },

     { "NatchitochesParish",          "Natchitoches Parish"         },

     { "Orleans Parish",              "Orleans Parish"              },

     { "OrleansParish",               "Orleans Parish"              },

     { "Ouachita Parish",             "Ouachita Parish"             },

     { "OuachitaParish",              "Ouachita Parish"             },

     { "Plaquemines Parish",          "Plaquemines Parish"          },

     { "PlaqueminesParish",           "Plaquemines Parish"          },

     { "Pointe Coupee Parish",        "Pointe Coupee Parish"        },

     { "PointeCoupeeParish",          "Pointe Coupee Parish"        },

     { "Rapides Parish",              "Rapides Parish"              },

     { "RapidesParish",               "Rapides Parish"              },

     { "Red River Parish",            "Red River Parish"            },

     { "RedRiverParish",              "Red River Parish"            },

     { "Richland Parish",             "Richland Parish"             },

     { "RichlandParish",              "Richland Parish"             },

     { "Sabine Parish",               "Sabine Parish"               },

     { "SabineParish",                "Sabine Parish"               },

     { "St. Bernard Parish",          "St. Bernard Parish"          },

     { "St. Charles Parish",          "St. Charles Parish"          },

     { "St. Helena Parish",           "St. Helena Parish"           },

     { "St. James Parish",            "St. James Parish"            },

     { "St. John the Baptist Parish", "St. John the Baptist Parish" },

     { "St. Landry Parish",           "St. Landry Parish"           },

     { "St. Martin Parish",           "St. Martin Parish"           },

     { "St. Mary Parish",             "St. Mary Parish"             },

     { "St. Tammany Parish",          "St. Tammany Parish"          },

     { "St.BernardParish",            "St. Bernard Parish"          },

     { "St.CharlesParish",            "St. Charles Parish"          },

     { "St.HelenaParish",             "St. Helena Parish"           },

     { "St.JamesParish",              "St. James Parish"            },

     { "St.JohntheBaptistParish",     "St. John the Baptist Parish" },

     { "St.LandryParish",             "St. Landry Parish"           },

     { "St.MartinParish",             "St. Martin Parish"           },

     { "St.MaryParish",               "St. Mary Parish"             },

     { "St.TammanyParish",            "St. Tammany Parish"          },

     { "Tangipahoa Parish",           "Tangipahoa Parish"           },

     { "TangipahoaParish",            "Tangipahoa Parish"           },

     { "Tensas Parish",               "Tensas Parish"               },

     { "TensasParish",                "Tensas Parish"               },

     { "Terrebonne Parish",           "Terrebonne Parish"           },

     { "TerrebonneParish",            "Terrebonne Parish"           },

     { "Union Parish",                "Union Parish"                },

     { "UnionParish",                 "Union Parish"                },

     { "Vermilion Parish",            "Vermilion Parish"            },

     { "VermilionParish",             "Vermilion Parish"            },

     { "Vernon Parish",               "Vernon Parish"               },

     { "VernonParish",                "Vernon Parish"               },

     { "Washington Parish",           "Washington Parish"           },

     { "WashingtonParish",            "Washington Parish"           },

     { "Webster Parish",              "Webster Parish"              },

     { "WebsterParish",               "Webster Parish"              },

     { "West Baton Rouge Parish",     "West Baton Rouge Parish"     },

     { "West Carroll Parish",         "West Carroll Parish"         },

     { "West Feliciana Parish",       "West Feliciana Parish"       },

     { "WestBatonRougeParish",        "West Baton Rouge Parish"     },

     { "WestCarrollParish",           "West Carroll Parish"         },

     { "WestFelicianaParish",         "West Feliciana Parish"       },

     { "Winn Parish",                 "Winn Parish"                 },

     { "WinnParish",                  "Winn Parish"                 }

 };


 typedef CStaticPairArrayMap<const char *, const char *, PNocase_CStr> TParishMap;

 DEFINE_STATIC_ARRAY_MAP(TParishMap, parishAbbrevMap, parish_abbrev_array);


 bool s_IsParish ( string& parish ) {


     if ( parish.empty() ) {

         return false;

     }


     TParishMap::const_iterator parish_find_iter = parishAbbrevMap.find(parish.c_str());

     if ( parish_find_iter != parishAbbrevMap.end() ) {

         // replace with full parish name

         parish = parish_find_iter->second;

         return true;

     }


     return false;

 }


 typedef SStaticPair<const char*, const char*> TStateMapEntry;

 static const TStateMapEntry state_abbrev_array[] = {

     { "AK",                    "Alaska"               },

     { "AL",                    "Alabama"              },

     { "Alabama",               "Alabama"              },

     { "Alaska",                "Alaska"               },

     { "American Samoa",        "American Samoa"       },

     { "AR",                    "Arkansas"             },

     { "Arizona",               "Arizona"              },

     { "Arkansas",              "Arkansas"             },

     { "AS",                    "American Samoa"       },

     { "AZ",                    "Arizona"              },

     { "CA",                    "California"           },

     { "California",            "California"           },

     { "CO",                    "Colorado"             },

     { "Colorado",              "Colorado"             },

     { "Connecticut",           "Connecticut"          },

     { "CT",                    "Connecticut"          },

     { "DC",                    "District of Columbia" },

     { "DE",                    "Delaware"             },

     { "Delaware",              "Delaware"             },

     { "District of Columbia",  "District of Columbia" },

     { "FL",                    "Florida"              },

     { "Florida",               "Florida"              },

     { "GA",                    "Georgia"              },

     { "Georgia",               "Georgia"              },

     { "GU",                    "Guam"                 },

     { "Guam",                  "Guam"                 },

     { "Hawaii",                "Hawaii"               },

     { "HI",                    "Hawaii"               },

     { "IA",                    "Iowa"                 },

     { "ID",                    "Idaho"                },

     { "Idaho",                 "Idaho"                },

     { "IL",                    "Illinois"             },

     { "Illinois",              "Illinois"             },

     { "IN",                    "Indiana"              },

     { "Indiana",               "Indiana"              },

     { "Iowa",                  "Iowa"                 },

     { "Kansas",                "Kansas"               },

     { "Kentucky",              "Kentucky"             },

     { "KS",                    "Kansas"               },

     { "KY",                    "Kentucky"             },

     { "LA",                    "Louisiana"            },

     { "Louisiana",             "Louisiana"            },

     { "MA",                    "Massachusetts"        },

     { "Maine",                 "Maine"                },

     { "Maryland",              "Maryland"             },

     { "Massachusetts",         "Massachusetts"        },

     { "MD",                    "Maryland"             },

     { "ME",                    "Maine"                },

     { "MI",                    "Michigan"             },

     { "Michigan",              "Michigan"             },

     { "Minnesota",             "Minnesota"            },

     { "Mississippi",           "Mississippi"          },

     { "Missouri",              "Missouri"             },

     { "MN",                    "Minnesota"            },

     { "MO",                    "Missouri"             },

     { "Montana",               "Montana"              },

     { "MS",                    "Mississippi"          },

     { "MT",                    "Montana"              },

     { "NC",                    "North Carolina"       },

     { "ND",                    "North Dakota"         },

     { "NE",                    "Nebraska"             },

     { "Nebraska",              "Nebraska"             },

     { "Nevada",                "Nevada"               },

     { "New Hampshire",         "New Hampshire"        },

     { "New Jersey",            "New Jersey"           },

     { "New Mexico",            "New Mexico"           },

     { "New York",              "New York"             },

     { "NH",                    "New Hampshire"        },

     { "NJ",                    "New Jersey"           },

     { "NM",                    "New Mexico"           },

     { "North Carolina",        "North Carolina"       },

     { "North Dakota",          "North Dakota"         },

     { "NV",                    "Nevada"               },

     { "NY",                    "New York"             },

     { "OH",                    "Ohio"                 },

     { "Ohio",                  "Ohio"                 },

     { "OK",                    "Oklahoma"             },

     { "Oklahoma",              "Oklahoma"             },

     { "OR",                    "Oregon"               },

     { "Oregon",                "Oregon"               },

     { "PA",                    "Pennsylvania"         },

     { "Pennsylvania",          "Pennsylvania"         },

     { "PR",                    "Puerto Rico"          },

     { "Puerto Rico",           "Puerto Rico"          },

     { "Rhode Island",          "Rhode Island"         },

     { "RI",                    "Rhode Island"         },

     { "SC",                    "South Carolina"       },

     { "SD",                    "South Dakota"         },

     { "South Carolina",        "South Carolina"       },

     { "South Dakota",          "South Dakota"         },

     { "Tennessee",             "Tennessee"            },

     { "Texas",                 "Texas"                },

     { "TN",                    "Tennessee"            },

     { "TX",                    "Texas"                },

     { "US Virgin Islands",     "US Virgin Islands"    },

     { "UT",                    "Utah"                 },

     { "Utah",                  "Utah"                 },

     { "VA",                    "Virginia"             },

     { "Vermont",               "Vermont"              },

     { "VI",                    "US Virgin Islands"    },

     { "Virgin Islands",        "US Virgin Islands"    },

     { "Virginia",              "Virginia"             },

     { "VT",                    "Vermont"              },

     { "WA",                    "Washington"           },

     { "Washington",            "Washington"           },

     { "West Virginia",         "West Virginia"        },

     { "WI",                    "Wisconsin"            },

     { "Wisconsin",             "Wisconsin"            },

     { "WV",                    "West Virginia"        },

     { "WY",                    "Wyoming"              },

     { "Wyoming",               "Wyoming"              }

 };


 typedef CStaticPairArrayMap<const char *, const char *, PNocase_CStr> TStateMap;

 DEFINE_STATIC_ARRAY_MAP(TStateMap, stateAbbrevMap, state_abbrev_array);


 bool s_IsState ( string& state, bool& modified ) {


     if ( state.empty() ) {

         return false;

     }


     string original = state;

     string working = state;


     if ( NStr::StartsWith ( working, "State of ", NStr::eNocase )) {

           NStr::TrimPrefixInPlace ( working, "State of ", NStr::eNocase );

     }


     if ( NStr::StartsWith ( working, "Commonwealth of ", NStr::eNocase )) {

         NStr::TrimPrefixInPlace ( working, "Commonwealth of ", NStr::eNocase );

     }


     if ( NStr::EndsWith ( working, " State", NStr::eNocase )) {

         NStr::TrimSuffixInPlace ( working, " State", NStr::eNocase );

     }


     NStr::TruncateSpacesInPlace ( working );


     TStateMap::const_iterator state_find_iter = stateAbbrevMap.find(working.c_str());

     if ( state_find_iter != stateAbbrevMap.end() ) {

         // replace with full state name

         state = state_find_iter->second;

         // report conversion from two-letter, changed capitalization, or prefix/suffix removal

         if ( ! NStr::Equal ( original, state )) {

             modified = true;

         }

         return true;

     }


     return false;

 }


 CCountries::EStateCleanup s_DoUSAStateCleanup ( string& country ) {


     if ( country.empty() ) {

         return CCountries::e_NoResult;

     }


     // make working copy

     string original = country;

     string working = country;


     // remove flanking quotation marks - if CCountries::NewFixCountry not called

     if ( NStr::StartsWith ( working, "\"" ) && NStr::EndsWith ( working, "\"" )) {

         working = working.substr ( 1, working.length() - 2 );

     }


     // remove flanking spaces

     NStr::TruncateSpacesInPlace ( working );


     // separate strings before and after colon

     string frst, scnd;

     NStr::SplitInTwo ( working, ":", frst, scnd );


     NStr::TruncateSpacesInPlace ( frst );

     NStr::TruncateSpacesInPlace ( scnd );


     // confirm that country is USA

     if ( ! NStr::EqualNocase ( frst, "USA") && ! NStr::EqualNocase ( frst, "US")) {

         // if not, first try rescuing US territory

         working = CCountries::NewFixCountry(working, true);

         NStr::SplitInTwo ( working, ":", frst, scnd );

         NStr::TruncateSpacesInPlace ( frst );

         NStr::TruncateSpacesInPlace ( scnd );

         if ( ! NStr::EqualNocase ( frst, "USA") && ! NStr::EqualNocase ( frst, "US")) {

             return CCountries::e_NotUSA;

         }

     }


     // split state/county/city clauses at commas

     vector<string> components;

     NStr::Split(scnd, ",", components);


     // check for only country

     if ( components.size() < 1 ) {

         country = "USA";

         return CCountries::e_Valid;

     }


     for ( int j = 0; j < components.size(); j++ ) {

         // remove flanking spaces around components

         NStr::TruncateSpacesInPlace ( components[j] );

         s_CompressRunsOfSpaces ( components[j] );

         // clean up runon strings like EastBatonRougeParish

         if ( NStr::EndsWith ( components[j], "Parish", NStr::eNocase )) {

             s_IsParish( components[j] );

         }

     }


     // bool any_modified = false;

     int num_states = 0;

     int match = -1;


     // string* first = 0;

     // string* last = 0;


     // has multiple components

     // int max = components.size() - 1;

     for ( int j = 0; j < components.size(); j++ ) {

         bool modified = false;

         if ( s_IsState  ( components[j], modified )) {

             /*

             if (modified) {

                 any_modified = true;

             }

             */

             if ( match < 0 ) {

                 // record position of first s_IsState match

                 match = j;

             }

             // count successful matches

             num_states++;

             /*

             if ( j == 0 ) {

                 first = &(components[j]);

             }

             if ( j == max ) {

                 last = &(components[j]);

             }

             */

         }

     }


     // generate result

     string res;

     res.append ("USA: ");

     string pfx = "";


     if ( match >= 0 ) {

         // move first state matched to first position

         res.append ( components[match] );

         pfx = ", ";

     }


     for ( int j = 0; j < components.size(); j++ ) {

         if ( j == match) continue;

         res.append ( pfx );

         res.append ( components[j] );

         pfx = ", ";

     }


     country = res;


     if ( match < 0 ) {

         return CCountries::e_Missing;

     } else if ( num_states > 1 ) {

         return CCountries::e_Ambiguous;

     } else if ( ! NStr::Equal ( original, res )) {

         return CCountries::e_Corrected;

     }


     return CCountries::e_Valid;

 }


 typedef CRowReader<CRowReaderStream_NCBI_TSV> TNCBITSVStream;


 static CCountries::TUsaExceptionMap exception_map;

 static bool exceptions_initialized = false;


 void CCountries::ReadUSAExceptionMap (CCountries::TUsaExceptionMap& exceptions, const string& exception_file ) {


     if ( ! exception_file.empty()) {


         TNCBITSVStream my_stream (exception_file);

         for ( const auto & row : my_stream ) {

             TFieldNo number_of_fields = row. GetNumberOfFields();

             if ( number_of_fields != 2 ) continue;

             string fr = row[0].Get<string>();

             string to = row[1].Get<string>();

             exceptions [fr] = to;

         }

     }

 }


 void CCountries::LoadUSAExceptionMap (const TUsaExceptionMap& exceptions) {


     // clear previous map

     exception_map.clear();


     // initialize internal exception map

     for ( const auto & itm : exceptions ) {

         string fr = itm.first;

         string to = itm.second;


         // ensure colon is followed by space to match initial correction

         string f1, f2;

         NStr::SplitInTwo ( fr, ":", f1, f2 );

         NStr::TruncateSpacesInPlace ( f1 );

         NStr::TruncateSpacesInPlace ( f2 );

         if ( ! f1.empty() && ! f2.empty()) {

             fr = f1 + ": " + f2;

         }


         exception_map [fr] = to;

     }


     exceptions_initialized = true;

 }


 void CCountries::LoadUSAExceptionMap (const string& exception_file ) {


     if ( ! exception_file.empty()) {


         TUsaExceptionMap exceptions;

         ReadUSAExceptionMap ( exceptions, exception_file );

         LoadUSAExceptionMap ( exceptions );

     }

 }


 string CCountries::USAStateCleanup ( const string& country, CCountries::EStateCleanup& type ) {


     // call algorithmic mapping function

     string working = country;

     type = s_DoUSAStateCleanup ( working );


     // apply exceptions from preloaded data file

     if ( exceptions_initialized ) {

         string corrected = exception_map [working];

         if ( ! corrected.empty()) {

             // presence in map here will disambiguate otherwise ambiguous name pair,

             // thus self-entries need to be added to the ambiguous state exception list

             if ( ! NStr::StartsWith ( corrected, "USA" )) {

                 type = e_NotUSA;

             } else if ( NStr::Equal ( corrected, working ) && NStr::Equal ( corrected, country )) {

                 type = e_Valid;

             } else {

                 type = e_Corrected;

             }

             return corrected;

         }

     }


     if ( ! NStr::StartsWith ( working, "USA" )) {

         type = e_NotUSA;

     }

     return working;

 }


 string CCountries::USAStateCleanup ( const string& country ) {


     CCountries::EStateCleanup type = e_NoResult;

     return USAStateCleanup ( country, type );

 }


 // end of RW-1278


 string CCountries::NewFixCountry (const string& test, bool us_territories)

 {

     // change requested for JIRA:SQD-1410

     if (s_SuppressCountryFix(test)) {

         if (IsValid(test)) {

             return test;

         } else {

             return kEmptyStr;

         }

     }


     // JIRA:RW-2243 Micronesia is the only entry with a comma, special case test here

     string micronesia = "Micronesia, Federated States of";

     if (NStr::EqualNocase(test, micronesia)) {

         if (! NStr::EqualCase(test, micronesia)) {

             return micronesia;

         }

     }

     // JIRA:RW-2243 also special case to convert old Micronesia name to new name

     if (NStr::EqualNocase(test, "Micronesia")) {

         return micronesia;

     }


     string input = test;

     if (NStr::StartsWith(input, "\"") && NStr::EndsWith(input, "\"")) {

         input = input.substr(1, input.length() - 2);

     }

     NStr::TruncateSpacesInPlace(input);


     if (NStr::EndsWith(input, ":")) {

         input = input.substr(0, input.length() - 1);

         NStr::TruncateSpacesInPlace(input);

     }


     string usa1,usa2;

     NStr::SplitInTwo(input, ":", usa1, usa2);

     if (!usa1.empty() && !usa2.empty()) {

         NStr::TruncateSpacesInPlace(usa1);

         NStr::TruncateSpacesInPlace(usa2);

         if (NStr::EqualNocase(usa1, "U.S.A.") || NStr::EqualNocase(usa1, "United States") || NStr::EqualNocase(usa1, "United States of America")) {

             input = "USA: " + usa2;

         }

     }


     auto old_name_fix = k_old_country_name_fixes.find(input.c_str());

     if (old_name_fix != k_old_country_name_fixes.end()) {

         input = old_name_fix->second;

         return input;

     }


     if (us_territories) {

         if ( NStr::StartsWith( input, "Puerto Rico", NStr::eNocase) || NStr::StartsWith( input, "Guam", NStr::eNocase) || NStr::StartsWith( input, "American Samoa", NStr::eNocase) ) {

             input = "USA: " + input;

             CCountries::ChangeExtraColonsToCommas(input);

             input = CCountries::USAStateCleanup(input);

             return input;

         } else if ( NStr::StartsWith( input, "Virgin Islands", NStr::eNocase) ) {

             input = "USA: US " + input;

             CCountries::ChangeExtraColonsToCommas(input);

             input = CCountries::USAStateCleanup(input);

             return input;

         }

     }


     if (IsValid(input)) {

         CCountries::ChangeExtraColonsToCommas(input);

         return input;

     }

     string new_country = WholeCountryFix(input);

     if (!new_country.empty())

         return new_country;


     bool too_many_countries = false;

     bool bad_cap = false;

     vector<string> countries = x_Tokenize(input);

     string valid_country;

     string orig_valid_country;


     x_FindCountryName(k_country_name_fixes, countries, valid_country, orig_valid_country, too_many_countries, bad_cap);

     if (valid_country.empty()) {

         x_FindCountryName(k_subregion_fixes, countries, valid_country, orig_valid_country, too_many_countries, bad_cap);

     }


     if (!valid_country.empty() && !too_many_countries)

         too_many_countries = ContainsMultipleCountryNames (input);


     if (!valid_country.empty() && too_many_countries && valid_country == input)

     {

         string str1,str2;

         NStr::SplitInTwo(valid_country,":",str1,str2);

         if (!str1.empty() && !str2.empty() && !NStr::StartsWith(str2," "))

             new_country = str1+": "+str2;


         CCountries::ChangeExtraColonsToCommas(new_country);

     }

     else if(!valid_country.empty() && !too_many_countries)

     {

         // find valid_country in input

         size_t pos = NStr::Find(input,orig_valid_country);

         // save preceeding string without trailing spaces or delimiters ":,"

         string before = input.substr(0,pos);


         x_RemoveDelimitersFromEnds(before);

         NStr::TruncateSpacesInPlace(before);

         // save trailing string without initial spaces or delimiters

         string after = input.substr(pos+orig_valid_country.length());

         x_RemoveDelimitersFromEnds(after, true);

         NStr::TruncateSpacesInPlace(after);

         if (bad_cap) new_country = GetCorrectedCountryCapitalization(valid_country);

         else new_country = valid_country;

         if (!before.empty() || !after.empty()) {

             if (NStr::Find(valid_country, ":") == NPOS) {

                 new_country += ": ";

             } else {

                 new_country += ", ";

             }

         }

         if (!before.empty())

             new_country += before;

         if (!before.empty() && !after.empty() && !NStr::Equal(after, ")"))

             new_country += ", ";

         if (!after.empty())

             new_country += after;

         CCountries::ChangeExtraColonsToCommas(new_country);

     }


     return new_country;

 }


 bool CCountries::ChangeExtraColonsToCommas(string& country)

 {

     // requested in SQD-4516

     bool rval = false;

     int count = 0;

     for (size_t i = 0; i < country.length(); i++) {

         if (country[i] == ':') {

             count++;

             if (count > 1) {

                 country[i] = ',';

                 rval = true;

             }

         }

     }

     return rval;

 }


 string CCountries::CountryFixupItem(const string &input, bool capitalize_after_colon)

 {

     string country = NewFixCountry (input);

     string new_country = country;

     SIZE_TYPE country_end_pos = NStr::Find(country,":");

     if (country_end_pos != NPOS)

     {

         SIZE_TYPE pos = country_end_pos;

         while (country[pos] == ','  ||  country[pos] == ':'  ||  isspace((unsigned char)country[pos]))

         {

             pos++;

         }

         string after = country.substr(pos);

         if (after.empty()) {

             if (pos > country_end_pos) {

                 new_country = country.substr(0, country_end_pos);

             }

         } else {

             NStr::TruncateSpacesInPlace(after,NStr::eTrunc_Begin);

             if (capitalize_after_colon)

                 after = CapitalizeFirstLetterOfEveryWord (after);

             new_country = country.substr(0,country_end_pos);

             new_country += ": " + after;

         }

     }

     return new_country;

 }


 // SubSource Qual Fixups

 typedef SStaticPair<const char*, const char*> TStaticQualFixPair;

 typedef CStaticPairArrayMap<const char*, const char*, PNocase_CStr> TStaticQualFixMap;


 static const TStaticQualFixPair kDevStagePairs[] = {

     { "adult", "adult" },

     { "egg", "egg" },

     { "juvenile", "juvenile" },

     { "larva", "larva" }

 };


 DEFINE_STATIC_ARRAY_MAP(TStaticQualFixMap, sc_DevStagePairs, kDevStagePairs);


 string CSubSource::FixDevStageCapitalization(const string& value)

 {

     string fix = value;


     TStaticQualFixMap::const_iterator it = sc_DevStagePairs.find(value.c_str());

     if (it != sc_DevStagePairs.end()) {

         fix = it->second;

     }

     return fix;

 }


 static const TStaticQualFixPair kCellTypePairs[] = {

     { "hemocyte", "hemocyte" },

     { "hepatocyte", "hepatocyte" },

     { "lymphocyte", "lymphocyte" },

     { "neuroblast", "neuroblast" }

 };


 DEFINE_STATIC_ARRAY_MAP(TStaticQualFixMap, sc_CellTypePairs, kCellTypePairs);


 string CSubSource::FixCellTypeCapitalization(const string& value)

 {

     string fix = value;


     TStaticQualFixMap::const_iterator it = sc_CellTypePairs.find(value.c_str());

     if (it != sc_CellTypePairs.end()) {

         fix = it->second;

     }

     return fix;


 }


 DEFINE_STATIC_FAST_MUTEX(s_QualFixMutex);

 typedef map<string, string, PNocase> TQualFixMap;


 static TQualFixMap s_IsolationSourceMap;

 static bool s_QualFixupMapsInitialized = false;


 static void s_ProcessQualMapLine(const CTempString& line, TQualFixMap& qual_map)

 {

     vector<CTempString> tokens;

     NStr::Split(line, "\t", tokens);

     if (tokens.size() > 1) {

         qual_map[tokens[0]] = tokens[1];

     }

 }


 void s_AddOneDataFile(const string& file_name, const string& data_name,

                       const char **built_in, size_t num_built_in,

                       TQualFixMap& qual_map)

 {

     string file = g_FindDataFile(file_name);

     CRef<ILineReader> lr;

     if (!file.empty()) {

         try {

             lr = ILineReader::New(file);

         } NCBI_CATCH("s_InitializeQualMaps")

     }


     if (lr.Empty()) {

         if (built_in == NULL) {

             ERR_POST(Note << "No data for " + data_name);

         } else {

             if (getenv("NCBI_DEBUG")) {

                 ERR_POST(Note << "Falling back on built-in data for " + data_name);

             }

             for (size_t i = 0; i < num_built_in; i++) {

                 const char *p = built_in[i];

                 s_ProcessQualMapLine(p, qual_map);

             }

         }

     } else {

         if (getenv("NCBI_DEBUG")) {

             ERR_POST(Note << "Reading from " + file + " for " + data_name);

         }

         do {

             s_ProcessQualMapLine(*++*lr, qual_map);

         } while (!lr->AtEOF());

     }

 }


 #include "isolation_sources.inc"


 static void s_InitializeQualMaps(void)

 {

     CFastMutexGuard GUARD(s_QualFixMutex);

     if (s_QualFixupMapsInitialized) {

         return;

     }


     // tissue types

     s_AddOneDataFile("isolation_sources.txt", "isolation sources", (const char **)k_isolation_sources, sizeof(k_isolation_sources) / sizeof(char *), s_IsolationSourceMap);

     s_QualFixupMapsInitialized = true;

 }


 string CSubSource::FixIsolationSourceCapitalization(const string& value)

 {

     string fix = value;


     s_InitializeQualMaps();


     TQualFixMap::iterator it = s_IsolationSourceMap.find(value);

     if (it != s_IsolationSourceMap.end()) {

         return it->second;

     }


     size_t max = sizeof(sm_ValidSexQualifierTokens) / sizeof(const char*);

     for (size_t i = 0; i < max; i++) {

         if (NStr::EqualNocase(fix, sm_ValidSexQualifierTokens[i])) {

             fix = sm_ValidSexQualifierTokens[i];

             break;

         }

     }


     fix = COrgMod::FixHostCapitalization(fix);

     fix = FixDevStageCapitalization(fix);

     fix = FixCellTypeCapitalization(fix);


     return fix;

 }


 string CSubSource::FixTissueTypeCapitalization(const string& value)

 {

     string fix = value;


     s_InitializeQualMaps();

     TQualFixMap::iterator it = s_IsolationSourceMap.find(value);

     if (it != s_IsolationSourceMap.end()) {

         return it->second;

     }


     size_t max = sizeof(sm_ValidSexQualifierTokens) / sizeof(const char*);

     for (size_t i = 0; i < max; i++) {

         if (NStr::EqualNocase(fix, sm_ValidSexQualifierTokens[i])) {

             fix = sm_ValidSexQualifierTokens[i];

             break;

         }

     }


     fix = COrgMod::FixHostCapitalization(fix);

     fix = FixDevStageCapitalization(fix);

     fix = FixCellTypeCapitalization(fix);


     return fix;

 }


 string CSubSource::FixLabHostCapitalization(const string& value)

 {

     return COrgMod::FixHostCapitalization(value);

 }


 string CSubSource::FixCapitalization(TSubtype subtype, const string& value)

 {

     string new_val = value;

     switch (subtype) {

         case CSubSource::eSubtype_sex:

             new_val = FixSexQualifierValue(value);

             if (NStr::IsBlank(new_val)) {

                 new_val = value;

             }

             break;

         case CSubSource::eSubtype_isolation_source:

             new_val = FixIsolationSourceCapitalization(value);

             break;

         case CSubSource::eSubtype_lab_host:

             new_val = FixLabHostCapitalization(value);

             break;

         case CSubSource::eSubtype_tissue_type:

             new_val = FixTissueTypeCapitalization(value);

             break;

         case CSubSource::eSubtype_dev_stage:

             new_val = FixDevStageCapitalization(value);

             break;

         case CSubSource::eSubtype_cell_type:

             new_val = FixCellTypeCapitalization(value);

             break;

         default:

             new_val = value;

             break;

     }

     return new_val;

 }


 void CSubSource::FixCapitalization()

 {

     if (!IsSetSubtype() || !IsSetName()) {

         return;

     }


     TSubtype subtype = GetSubtype();


     if (subtype == CSubSource::eSubtype_sex) {

         string upr = GetName();

         string lwr = upr;

         NStr::ToLower(lwr);

         if (! NStr::Equal(upr, lwr)) {

             SetName(lwr);

         }

     }


     const string& name = GetName();


     string new_val = FixCapitalization(subtype, name);


     if (!NStr::IsBlank(new_val)) {

         SetName(new_val);

     }


 }


 string CSubSource::AutoFix(TSubtype subtype, const string& value)

 {

     string new_val;

     switch (subtype) {

         case CSubSource::eSubtype_country:

             new_val = CCountries::NewFixCountry(value);

             break;

         case CSubSource::eSubtype_collection_date:

             new_val = FixDateFormat(value);

             break;

         case CSubSource::eSubtype_lat_lon:

             new_val = FixLatLonFormat(value);

             break;

         case CSubSource::eSubtype_sex:

             new_val = FixSexQualifierValue(value);

             break;

         case CSubSource::eSubtype_altitude:

             new_val = FixAltitude(value);

             break;

         default:

             break;

     }

     return new_val;

 }


 void CSubSource::AutoFix()

 {

     if (!IsSetSubtype() || !IsSetName()) {

         return;

     }


     TSubtype subtype = GetSubtype();

     string new_val = AutoFix(subtype, GetName());


     if (!NStr::IsBlank(new_val)) {

         SetName(new_val);

     } else if (subtype == CSubSource::eSubtype_sex) {

         string upr = GetName();

         string lwr = upr;

         NStr::ToLower(lwr);

         if (! NStr::Equal(upr, lwr)) {

             SetName(lwr);

         }

     }

 }


 // NOTE (for two arrays below): If string A is a prefix of string B, string B should be placed

 // BEFORE string A. I.e. longer string should be earlier

 static const char * s_RemovableCultureNotes[] = {

     "[BankIt_uncultured16S_wizard]; [universal primers]; [tgge]",

     "[BankIt_uncultured16S_wizard]; [universal primers]; [dgge]",

     "[BankIt_uncultured16S_wizard]; [universal primers]",

     "[BankIt_cultured16S_wizard]",

     "[BankIt_organellerRNA_wizard]",

     "[BankIt_ITS_wizard]; [rRNAITS_notfound]",

     "[BankIt_ITS_wizard]",

     "[uncultured (using universal primers)]",

     "[uncultured (using universal primers) bacterial source]",

     "[cultured bacterial source]",

     "[enrichment culture bacterial source]",

     "[mixed bacterial source (cultured and uncultured)]",

     "[uncultured]; [universal primers]",

     "[mixed bacterial source]",

     "[virus wizard]",

     "[cDNA derived from mRNA, purified viral particles]",

     "[cDNA derived from mRNA, whole cell/tissue lysate]",

     "[cDNA derived from genomic RNA, whole cell/tissue lysate]",

     "[cDNA derived from genomic RNA, purified viral particles]",

     "[universal primers]",

     "[uncultured; wizard]",

     "[uncultured; wizard; spans unknown]",

     "[cultured; wizard]",

     "[cultured; wizard; spans unknown]",

     "[intergenic wizard]",

     "[intergenic wizard; spans unknown]",

     "[Microsatellite wizard]",

     "[Microsatellite wizard; multiple repeats]",

     "[D-loop wizard]",

     "[D-loop wizard; spans unknown]",

     "[D-loop wizard; spans known]",

     NULL

 };


 static const char * s_ReplaceableCultureNotes[] = {

  "[BankIt_uncultured16S_wizard]; [species_specific primers]; [tgge]",

  "[BankIt_uncultured16S_wizard]; [species_specific primers]; [dgge]",

  "[BankIt_uncultured16S_wizard]; [species_specific primers]",

  "[uncultured (with species-specific primers)]",

  "[uncultured]; [amplified with species-specific primers]",

  "[uncultured (using species-specific primers) bacterial source]",

  "[amplified with species-specific primers]",

  NULL

 };


 bool CSubSource::HasCultureNotes(const string& value)

 {

     for (size_t i = 0; s_RemovableCultureNotes[i] != NULL; i++) {

         size_t pos = NStr::FindNoCase(value, s_RemovableCultureNotes[i]);

         if (pos != string::npos) {

             return true;

         }

     }

     for (size_t i = 0; s_ReplaceableCultureNotes[i] != NULL; i++) {

         if (NStr::EqualNocase(value, s_ReplaceableCultureNotes[i])) {

             return true;

         }

     }

     return false;

 }


 void CSubSource::RemoveCultureNotes (string& value, bool is_species_level)

 {

     if (NStr::IsBlank(value)) {

         return;

     }


     for (size_t i = 0; s_RemovableCultureNotes[i] != NULL; i++) {

         string to_remove = s_RemovableCultureNotes[i];

         size_t remove_len = to_remove.length();

         size_t pos = NStr::FindNoCase(value, to_remove);

         while (pos != NPOS) {

             size_t extra_len = strspn (value.c_str() + pos + remove_len, " ;");

             value = value.substr(0, pos) + value.substr(pos + remove_len + extra_len);

             pos = NStr::FindNoCase(value, to_remove);

         }

     }

     // remove leading/trailing semicolons

     while (NStr::StartsWith(value, " ") || NStr::StartsWith(value, ";")) {

         value = value.substr(1);

     }

     while (NStr::EndsWith(value, " ") || NStr::EndsWith(value, ";")) {

         value = value.substr(0, value.length() - 1);

     }


     if (is_species_level) {

         for (size_t i = 0; s_ReplaceableCultureNotes[i] != NULL; i++) {

             if (NStr::EqualNocase(value, s_ReplaceableCultureNotes[i])) {

                 value = "amplified with species-specific primers";

                 break;

             }

         }

     }

 }


 void CSubSource::RemoveCultureNotes (bool is_species_level)

 {

     if (IsSetName()) {

         RemoveCultureNotes(SetName(), is_species_level);

         if (NStr::IsBlank(GetName())) {

             ResetName();

         }

     }

 }


 // CCountryLine

 CCountryLine::CCountryLine

 (const string & country_name, double y, double min_x, double max_x, double scale)

 : m_CountryName(country_name) ,

   m_Scale (scale)

 {

     m_Y = x_ConvertLat(y);

     m_MinX = x_ConvertLon(min_x);

     m_MaxX = x_ConvertLon(max_x);


 }


 CCountryLine::~CCountryLine (void)

 {

 }


 #define EPSILON 0.001


 int CCountryLine::ConvertLat (double y, double scale)

 {


     int  val = 0;


     if (y < -90.0) {

         y = -90.0;

     }

     if (y > 90.0) {

         y = 90.0;

     }


     if (y > 0) {

         val = (int) (y * scale + EPSILON);

     } else {

         val = (int) (-(-y * scale + EPSILON));

     }


     return val;

 }


 int CCountryLine::x_ConvertLat (double y)

 {

     return ConvertLat(y, m_Scale);

 }


 int CCountryLine::ConvertLon (double x, double scale)

 {


   int  val = 0;


   if (x < -180.0) {

     x = -180.0;

   }

   if (x > 180.0) {

     x = 180.0;

   }


   if (x > 0) {

     val = (int) (x * scale + EPSILON);

   } else {

     val = (int) (-(-x * scale + EPSILON));

   }


   return val;

 }


 int CCountryLine::x_ConvertLon (double x)

 {

     return ConvertLon(x, m_Scale);

 }


 CCountryExtreme::CCountryExtreme (const string & country_name, int min_x, int min_y, int max_x, int max_y)

 : m_CountryName(country_name) , m_MinX (min_x), m_MinY (min_y), m_MaxX(max_x), m_MaxY (max_y)

 {

     m_Area = (1 + m_MaxY - m_MinY) * (1 + m_MaxX - m_MinX);

     size_t pos = NStr::Find(country_name, ":");

     if (pos == NPOS) {

         m_Level0 = country_name;

         m_Level1.clear();

     } else {

         m_Level0 = country_name.substr(0, pos);

         NStr::TruncateSpacesInPlace(m_Level0);

         m_Level1 = country_name.substr(pos + 1);

         NStr::TruncateSpacesInPlace(m_Level1);

     }


 }


 CCountryExtreme::~CCountryExtreme (void)

 {


 }


 bool CCountryExtreme::SetMinX(int min_x)

 {

     if (min_x < m_MinX) {

         m_MinX = min_x;

         return true;

     } else {

         return false;

     }

 }


 bool CCountryExtreme::SetMaxX(int max_x)

 {

     if (max_x > m_MaxX) {

         m_MaxX = max_x;

         return true;

     } else {

         return false;

     }

 }


 bool CCountryExtreme::SetMinY(int min_y)

 {

     if (min_y < m_MinY) {

         m_MinY = min_y;

         return true;

     } else {

         return false;

     }

 }


 bool CCountryExtreme::SetMaxY(int max_y)

 {

     if (max_y > m_MaxY) {

         m_MaxY = max_y;

         return true;

     } else {

         return false;

     }

 }


 void CCountryExtreme::AddLine(const CCountryLine *line)

 {

     if (line) {

         SetMinX(line->GetMinX());

         SetMaxX(line->GetMaxX());

         SetMinY(line->GetY());

         SetMaxY(line->GetY());

         m_Area += 1 + line->GetMaxX() - line->GetMinX();

     }

 }


 bool CCountryExtreme::DoesOverlap(const CCountryExtreme* other_block) const

 {

     if (!other_block) {

         return false;

     } else if (m_MaxX >= other_block->GetMinX()

         && m_MaxX <= other_block->GetMaxX()

         && m_MaxY >= other_block->GetMinY()

         && m_MinY <= other_block->GetMaxY()) {

         return true;

     } else if (other_block->GetMaxX() >= m_MinX

         && other_block->GetMaxX() <= m_MaxX

         && other_block->GetMaxY() >= m_MinY

         && other_block->GetMinY() <= m_MaxY) {

         return true;

     } else {

         return false;

     }

 }


 bool CCountryExtreme::PreferTo(const CCountryExtreme* other_block, const string country, const string province, const bool prefer_new) const

 {

     if (!other_block) {

         return true;

     }


     // if no preferred country, these are equal

     if (NStr::IsBlank(country)) {

         return prefer_new;

     }


     // if match to preferred country

     if (NStr::EqualNocase(country, m_Level0)) {

         // if best was not preferred country, take new match

         if (!NStr::EqualNocase(country, other_block->GetLevel0())) {

             return true;

         }

         // if match to preferred province

         if (!NStr::IsBlank(province) && NStr::EqualNocase(province, m_Level1)) {

             // if best was not preferred province, take new match

             if (!NStr::EqualNocase(province, other_block->GetLevel1())) {

                 return true;

             }

         }


         // if both match province, or neither does, or no preferred province, take smallest

         return prefer_new;

     }


     // if best matches preferred country, keep

     if (NStr::EqualNocase(country, other_block->GetLevel0())) {

         return false;

     }


     // otherwise take smallest

     return prefer_new;

 }


 CLatLonCountryId::CLatLonCountryId(float lat, float lon)

     : m_Lat(lat),

       m_Lon(lon),

       m_LandDistance(-1),

       m_WaterDistance(-1),

       m_ClaimedDistance(-1)

 {}


 CLatLonCountryId::TClassificationFlags CLatLonCountryId::Classify(string country, string province)

 {

     CLatLonCountryId::TClassificationFlags rval = 0;


     // compare guesses or closest regions to indicated country and province

     if (!NStr::IsBlank(GetGuessCountry())) {

         // if top level countries match

         if (NStr::EqualNocase(country, GetGuessCountry())) {

             rval |= CLatLonCountryId::fCountryMatch;

             // if both are empty, still call it a match

             if (NStr::EqualNocase(province, GetGuessProvince())) {

                 rval |= CLatLonCountryId::fProvinceMatch;

             }

         }

         // if they don't match, are they closest?

         if (!(rval & CLatLonCountryId::fCountryMatch)) {

             if (NStr::EqualNocase(country, GetClosestCountry())) {

                 rval |= CLatLonCountryId::fCountryClosest;

                 if (NStr::EqualNocase(province, GetClosestProvince())) {

                     rval |= CLatLonCountryId::fProvinceClosest;

                 }

             }

         } else if (!(rval & CLatLonCountryId::fProvinceMatch) && !NStr::IsBlank(province)) {

             if (NStr::EqualNocase (province, GetClosestProvince())) {

                 rval |= CLatLonCountryId::fProvinceClosest;

             }

         }

     }


     if (!NStr::IsBlank(GetGuessWater())) {

         // was the non-approved body of water correctly indicated?

         if (NStr::EqualNocase(country, GetGuessWater())) {

             rval |= CLatLonCountryId::fWaterMatch;

         } else if (NStr::EqualNocase(country, GetClosestWater())) {

             rval |= CLatLonCountryId::fWaterClosest;

         }

     }


     if (!NStr::IsBlank(GetClosestCountry()) && NStr::EqualNocase(country, GetClosestCountry())) {

         if (NStr::IsBlank(GetGuessCountry()) && NStr::IsBlank(GetGuessWater())) {

             rval |= CLatLonCountryId::fCountryMatch;

             SetGuessCountry(GetClosestCountry());

             SetFullGuess(GetClosestCountry());

             if (!NStr::IsBlank(GetClosestProvince()) && NStr::EqualNocase(province, GetClosestProvince())) {

                 rval |= CLatLonCountryId::fProvinceMatch;

                 SetGuessProvince(GetClosestProvince());

                 SetFullGuess(GetClosestFull());

             }

         } else {

             rval |= CLatLonCountryId::fCountryClosest;

             if (!NStr::IsBlank(GetClosestProvince()) && NStr::EqualNocase(province, GetClosestProvince())) {

                 rval |= CLatLonCountryId::fProvinceClosest;

             }

         }

     }

     return rval;

 }


 CLatLonCountryId::~CLatLonCountryId(void)

 {

 }


 #include "lat_lon_country.inc"

 static const size_t k_NumLatLonCountryText = ArraySize(s_DefaultLatLonCountryText);


 #include "lat_lon_water.inc"

 static const size_t k_NumLatLonWaterText = ArraySize(s_DefaultLatLonWaterText);


 void CLatLonCountryMap::x_InitFromDefaultList(const char * const *list, int num)

 {

     if (getenv("NCBI_DEBUG")) {

         ERR_POST(Note << "Falling back on built-in data for latlon / water data.");

     }

       // initialize list of country lines

     m_CountryLineList.clear();

     m_Scale = 20.0;

     string current_country;


     for (int i = 0; i < num; i++) {

         CTempString line = list[i];

         if (line[0] == '-') {

             // skip comment

         } else if (isalpha ((unsigned char)line[0])) {

             current_country = line;

         } else if (isdigit ((unsigned char)line[0])) {

             m_Scale = NStr::StringToDouble(line);

         } else {

             vector<string> tokens;

              NStr::Split(line, "\t", tokens);

             if (tokens.size() > 3) {

                 double x = NStr::StringToDouble(tokens[1]);

                 for (size_t j = 2; j < tokens.size() - 1; j+=2) {

                     m_CountryLineList.push_back(new CCountryLine(current_country, x, NStr::StringToDouble(tokens[j]), NStr::StringToDouble(tokens[j + 1]), m_Scale));

                 }

             }

         }

     }

 }


 bool CLatLonCountryMap::x_InitFromFile(const string& filename)

 {

     string fname = g_FindDataFile (filename);

     if (NStr::IsBlank (fname)) {

         return false;

     }

     if (getenv("NCBI_DEBUG")) {

         ERR_POST(Note << "Reading from " + filename + " for latlon/water data.");

     }

     CRef<ILineReader> lr = ILineReader::New (fname);

     if (lr.Empty()) {

         return false;

     } else {

         m_Scale = 20.0;

         string current_country;


         // make sure to clear before using.  in this outer

         // scope in the interest of speed (avoid repeated

         // construction/destruction)

         vector<SIZE_TYPE> tab_positions;


         do {

             // const string& line = *++*lr;

             CTempString line = *++*lr;

             if (line[0] == '-') {

                 // skip comment

             } else if (isalpha ((unsigned char)line[0])) {

                 current_country = line;

             } else if (isdigit ((unsigned char)line[0])) {

                 m_Scale = NStr::StringToDouble(line);

             } else {

                 // NStr::Tokenize would be much simpler, but

                 // it's just too slow in this case, especially

                 // in debug mode.


                 // for the future, if we need even more speed,

                 // it should be possible to eliminate the tab_positions

                 // vector and collect tab positions on the fly without

                 // any heap-allocated memory


                 // find position of all tabs on this line

                 tab_positions.clear();

                 SIZE_TYPE tab_pos = line.find('\t');

                 while( tab_pos != NPOS ) {

                     tab_positions.push_back(tab_pos);

                     tab_pos = line.find('\t', tab_pos+1);

                 }

                 // an imaginary sentinel tab

                 tab_positions.push_back(line.length());


                 const char * line_start = line.data();

                 if( tab_positions.size() >= 4 ) {

                     CTempString y_str( line_start + tab_positions[0]+1, tab_positions[1] - tab_positions[0] - 1 );

                     double y = NStr::StringToDouble( y_str );


                     // convert into line list

                     for (size_t j = 1; j < tab_positions.size() - 2; j+=2) {

                         const SIZE_TYPE pos1 = tab_positions[j];

                         const SIZE_TYPE pos2 = tab_positions[j+1];

                         const SIZE_TYPE pos3 = tab_positions[j+2];

                         CTempString first_num( line_start + pos1 + 1, pos2 - pos1 - 1 );

                         CTempString second_num( line_start + pos2 + 1, pos3 - pos2 - 1 );

                         m_CountryLineList.push_back(new CCountryLine(current_country, y, NStr::StringToDouble(first_num), NStr::StringToDouble(second_num), m_Scale));

                     }

                 }

             }

         } while ( !lr->AtEOF() );


         return true;

     }

 }


 bool

 CLatLonCountryMap::s_CompareTwoLinesByLatLonOnly(

     const CCountryLine* line1,

     const CCountryLine* line2)

 {

     if (line1->GetY() < line2->GetY()) {

         return true;

     } else if (line1->GetY() > line2->GetY()) {

         return false;

     } else {

         if (line1->GetMinX() < line2->GetMinX()) {

             return true;

         } else {

             return false;

         }

     }

 }


 bool CLatLonCountryMap::

         s_CompareTwoLinesByCountry(const CCountryLine* line1,

                                     const CCountryLine* line2)

 {

     int cmp = NStr::CompareNocase(line1->GetCountry(), line2->GetCountry());

     if (cmp == 0) {

         return s_CompareTwoLinesByLatLonOnly(line1, line2);

     } else if (cmp < 0) {

         return true;

     } else {

         return false;

     }

 }


 bool CLatLonCountryMap::

         s_CompareTwoLinesByLatLonThenCountry(const CCountryLine* line1,

                                     const CCountryLine* line2)

 {

     if (line1->GetY() < line2->GetY()) {

         return true;

     } else if (line1->GetY() > line2->GetY()) {

         return false;

     } if (line1->GetMinX() < line2->GetMinX()) {

         return true;

     } else if (line1->GetMinX() > line2->GetMinX()) {

         return false;

     } else if (line1->GetMaxX() < line2->GetMaxX()) {

         return true;

     } else if (line1->GetMaxX() > line2->GetMaxX()) {

         return false;

     } else {

         int cmp = NStr::CompareNocase(line1->GetCountry(), line2->GetCountry());

         if (cmp < 0) {

             return true;

         } else {

             return false;

         }

     }

 }


 CLatLonCountryMap::CLatLonCountryMap (bool is_water)

 {

     // initialize list of country lines

     m_CountryLineList.clear();


     const char* env_val = getenv("NCBI_LAT_LON_DATA_PATH");

     string data_path;

     if (env_val) {

         data_path = (string) env_val;

         if (! NStr::EndsWith(data_path, "/")) {

             data_path = data_path + "/";

         }

     }


     if (is_water) {

         if (!x_InitFromFile("lat_lon_water.txt")) {

             if (data_path.empty() || !x_InitFromFile(data_path + "lat_lon_water.txt")) {

                 x_InitFromDefaultList(s_DefaultLatLonWaterText, k_NumLatLonWaterText);

             }

         }

     } else {

         if (!x_InitFromFile("lat_lon_country.txt")) {

             if (data_path.empty() || !x_InitFromFile(data_path + "lat_lon_country.txt")) {

                 x_InitFromDefaultList(s_DefaultLatLonCountryText, k_NumLatLonCountryText);

             }

         }

     }


     // Instead of doing a plain sort, we take advantage of the fact that

     // there are few unique country names versus the number

     // of lines.

     typedef map<CTempString, TCountryLineList, PNocase> TCountryToLinesMap;

     // this map maps a country name (case insens) to all the lines that

     // belong to that country.

     TCountryToLinesMap countryToLinesMap;

     ITERATE(TCountryLineList, line_it, m_CountryLineList) {

         countryToLinesMap[(*line_it)->GetCountry()].push_back(*line_it);

     }


     // build new m_CountryLineList here:

     TCountryLineList new_country_line_list;

     NON_CONST_ITERATE(TCountryToLinesMap, country_lines_it, countryToLinesMap)

     {

         // sort the lines for each country by lat/lon only, since we've already

         // implicitly sorted by country in countryToLinesMap

         TCountryLineList & line_list_for_this_country =

             country_lines_it->second;

         stable_sort(

             BEGIN_COMMA_END(line_list_for_this_country),

             s_CompareTwoLinesByLatLonOnly);

         copy(BEGIN_COMMA_END(line_list_for_this_country),

              back_inserter(new_country_line_list));

     }

     // swap should be constant time

     m_CountryLineList.swap(new_country_line_list);


     // set up extremes index and copy into LatLon index

     m_CountryExtremes.clear();

     m_LatLonSortedList.clear();

       size_t i, ext = 0;


     for (i = 0; i < m_CountryLineList.size(); i++) {

         if (ext > 0 && NStr::Equal(m_CountryLineList[i]->GetCountry(), m_CountryExtremes[ext - 1]->GetCountry())) {

             m_CountryExtremes[ext - 1]->AddLine(m_CountryLineList[i]);

         } else {

             m_CountryExtremes.push_back(new CCountryExtreme(m_CountryLineList[i]->GetCountry(),

                                                 m_CountryLineList[i]->GetMinX(),

                                                 m_CountryLineList[i]->GetY(),

                                                 m_CountryLineList[i]->GetMaxX(),

                                                 m_CountryLineList[i]->GetY()));

             ext++;

         }

         m_LatLonSortedList.push_back(m_CountryLineList[i]);

         m_CountryLineList[i]->SetBlock(m_CountryExtremes[ext - 1]);

     }

     sort (m_LatLonSortedList.begin(), m_LatLonSortedList.end(), s_CompareTwoLinesByLatLonThenCountry);


 }


 CLatLonCountryMap::~CLatLonCountryMap (void)

 {

       size_t i;


     for (i = 0; i < m_CountryLineList.size(); i++) {

         delete (m_CountryLineList[i]);

     }

     m_CountryLineList.clear();


     for (i = 0; i < m_CountryExtremes.size(); i++) {

         delete (m_CountryExtremes[i]);

     }

     m_CountryExtremes.clear();

     // note - do not delete items in m_LatLonSortedList, they are pointing to the same objects as m_CountryLineList

     m_LatLonSortedList.clear();

 }


 bool CLatLonCountryMap::IsCountryInLatLon(const string& country, double lat,

                                           double lon) const

 {

     int x = CCountryLine::ConvertLon(lon, m_Scale);

     int y = CCountryLine::ConvertLat(lat, m_Scale);


     size_t L, R, mid;


     L = 0;

     R = m_CountryLineList.size() - 1;

     mid = 0;


     while (L < R) {

         mid = (L + R) / 2;

         int cmp = NStr::Compare(m_CountryLineList[mid]->GetCountry(), country);

         if (cmp < 0) {

             L = mid + 1;

         } else if (cmp > 0) {

             R = mid;

         } else {

             while (mid > 0

                    && NStr::Compare(m_CountryLineList[mid - 1]->GetCountry(), country) == 0

                    && m_CountryLineList[mid - 1]->GetY() >= y) {

                 mid--;

             }

             L = mid;

             R = mid;

         }

     }


     while (R < m_CountryLineList.size()

            && NStr::EqualNocase(country, m_CountryLineList[R]->GetCountry())

            && m_CountryLineList[R]->GetY() < y) {

         R++;

     }


     while (R < m_CountryLineList.size()

            && NStr::EqualNocase(country, m_CountryLineList[R]->GetCountry())

            && m_CountryLineList[R]->GetY() == y

            && m_CountryLineList[R]->GetMaxX() < x) {

         R++;

     }

     if (R < m_CountryLineList.size()

            && NStr::EqualNocase(country, m_CountryLineList[R]->GetCountry())

            && m_CountryLineList[R]->GetY() == y

            && m_CountryLineList[R]->GetMinX() <= x

            && m_CountryLineList[R]->GetMaxX() >= x) {

         return true;

     } else {

         return false;

     }

 }


 const CCountryExtreme *

 CLatLonCountryMap::x_FindCountryExtreme(const string& country) const

 {

     size_t L, R, mid;


     if (NStr::IsBlank (country)) return NULL;


     L = 0;

     R = m_CountryExtremes.size() - 1;


     while (L < R) {

         mid = (L + R) / 2;

         if (NStr::CompareNocase(m_CountryExtremes[mid]->GetCountry(), country) < 0) {

             L = mid + 1;

         } else {

             R = mid;

         }

     }

     if (!NStr::EqualNocase(m_CountryExtremes[R]->GetCountry(), country)) {

         return NULL;

     } else {

         return m_CountryExtremes[R];

     }

 }


 bool CLatLonCountryMap::HaveLatLonForRegion(const string& region) const

 {

     if (x_FindCountryExtreme(region) == NULL) {

         return false;

     } else {

         return true;

     }

 }


 size_t CLatLonCountryMap::x_GetLatStartIndex (int y) const

 {

     size_t L, R, mid;


     L = 0;

     R = m_LatLonSortedList.size() - 1;

     mid = 0;


     while (L < R) {

         mid = (L + R) / 2;

         if (m_LatLonSortedList[mid]->GetY() < y) {

             L = mid + 1;

         } else if (m_LatLonSortedList[mid]->GetY() > y) {

             R = mid;

         } else {

             while (mid > 0 && m_LatLonSortedList[mid - 1]->GetY() == y) {

                 mid--;

             }

             L = mid;

             R = mid;

         }

     }

     return R;

 }


 const CCountryExtreme *

 CLatLonCountryMap::GuessRegionForLatLon(double lat, double lon,

                                         const string& country,

                                         const string& province) const

 {

     int x = CCountryLine::ConvertLon(lon, m_Scale);

     int y = CCountryLine::ConvertLon(lat, m_Scale);


     size_t R = x_GetLatStartIndex(y);


     const CCountryExtreme *best = NULL;


     while (R < m_LatLonSortedList.size() && m_LatLonSortedList[R]->GetY() == y) {

             if (m_LatLonSortedList[R]->GetMinX() <= x

             && m_LatLonSortedList[R]->GetMaxX() >= x) {

             const CCountryExtreme *other = m_LatLonSortedList[R]->GetBlock();

             if (best == NULL) {

                 best = other;

             } else if (!best->PreferTo(other, country, province, (bool)(best->GetArea() <= other->GetArea()))) {

                 best = other;

             }

              }

         R++;

       }

       return best;

 }


 //Distance on a spherical surface calculation adapted from

 //http://www.linuxjournal.com/magazine/

 //work-shell-calculating-distance-between-two-latitudelongitude-points


 #define EARTH_RADIUS 6371.0 /* average radius of non-spherical earth in kilometers */

 #define CONST_PI 3.14159265359


 static double DegreesToRadians (

   double degrees

 )


 {

   return (degrees * (CONST_PI / 180.0));

 }


 static double DistanceOnGlobe (

   double latA,

   double lonA,

   double latB,

   double lonB

 )


 {

   double lat1, lon1, lat2, lon2;

   double dLat, dLon, a, c;


   lat1 = DegreesToRadians (latA);

   lon1 = DegreesToRadians (lonA);

   lat2 = DegreesToRadians (latB);

   lon2 = DegreesToRadians (lonB);


   dLat = lat2 - lat1;

   dLon = lon2 - lon1;


    a = sin (dLat / 2) * sin (dLat / 2) +

        cos (lat1) * cos (lat2) * sin (dLon / 2) * sin (dLon / 2);

    c = 2 * atan2 (sqrt (a), sqrt (1 - a));


   return (double) (EARTH_RADIUS * c);

 }


 double ErrorDistance (

   double latA,

   double lonA,

   double scale)

 {

   double lat1, lon1, lat2, lon2;

   double dLat, dLon, a, c;


   lat1 = DegreesToRadians (latA);

   lon1 = DegreesToRadians (lonA);

   lat2 = DegreesToRadians (latA + (1.0 / scale));

   lon2 = DegreesToRadians (lonA + (1.0 / scale));


   dLat = lat2 - lat1;

   dLon = lon2 - lon1;


    a = sin (dLat / 2) * sin (dLat / 2) +

        cos (lat1) * cos (lat2) * sin (dLon / 2) * sin (dLon / 2);

    c = 2 * atan2 (sqrt (a), sqrt (1 - a));


   return (double) (EARTH_RADIUS * c);


 }


 const CCountryExtreme * CLatLonCountryMap::FindClosestToLatLon(double lat,

                                                                double lon,

                                                                double range,

                                                                double &distance)

 {

     int x = CCountryLine::ConvertLon(lon, m_Scale);

     int y = CCountryLine::ConvertLon(lat, m_Scale);


     int maxDelta = (int) (range * m_Scale + EPSILON);

     int min_y = y - maxDelta;

     int max_y = y + maxDelta;

     int min_x = x - maxDelta;

     int max_x = x + maxDelta;


     // binary search to lowest lat

     size_t R = x_GetLatStartIndex(min_y);


     double closest = 0.0;

     CCountryExtreme *rval = NULL;


     while (R < m_LatLonSortedList.size() && m_LatLonSortedList[R]->GetY() <= max_y) {

         if (m_LatLonSortedList[R]->GetMaxX() < min_x || m_LatLonSortedList[R]->GetMinX() > max_x) {

             // out of range, don't bother calculating distance

         } else {

             double end;

             if (x < m_LatLonSortedList[R]->GetMinX()) {

                 end = m_LatLonSortedList[R]->GetMinLon();

             } else if (x > m_LatLonSortedList[R]->GetMaxX()) {

                 end = m_LatLonSortedList[R]->GetMaxLon();

             } else {

                 end = lon;

             }

             double dist = DistanceOnGlobe (lat, lon, m_LatLonSortedList[R]->GetLat(), end);

             if (rval == NULL || closest > dist

                 || (closest == dist

                     && (rval->GetArea() > m_LatLonSortedList[R]->GetBlock()->GetArea()

                         || (rval->GetArea() == m_LatLonSortedList[R]->GetBlock()->GetArea()

                             && NStr::IsBlank(rval->GetLevel1())

                             && !NStr::IsBlank(m_LatLonSortedList[R]->GetBlock()->GetLevel1()))))) {

                 rval = m_LatLonSortedList[R]->GetBlock();

                 closest = dist;

             }

         }

         R++;

     }

     distance = closest;

     return rval;

 }


 bool CLatLonCountryMap::IsClosestToLatLon(const string& comp_country,

                                           double lat, double lon,

                                           double range, double &distance) const

 {

     int x = CCountryLine::ConvertLon(lon, m_Scale);

     int y = CCountryLine::ConvertLon(lat, m_Scale);


     int maxDelta = (int) (range * m_Scale + EPSILON);

     int min_y = y - maxDelta;

     int max_y = y + maxDelta;

     int min_x = x - maxDelta;

     int max_x = x + maxDelta;


     // binary search to lowest lat

     size_t R = x_GetLatStartIndex(min_y);


     string country;

     double closest = 0.0;

     int smallest_area = -1;


     while (R < m_LatLonSortedList.size() && m_LatLonSortedList[R]->GetY() <= max_y) {

         if (m_LatLonSortedList[R]->GetMaxX() < min_x || m_LatLonSortedList[R]->GetMinX() > max_x) {

             // out of range, don't bother calculating distance

         } else {

             double end;

             if (x < m_LatLonSortedList[R]->GetMinX()) {

                 end = m_LatLonSortedList[R]->GetMinLon();

             } else {

                 end = m_LatLonSortedList[R]->GetMaxLon();

             }

             double dist = DistanceOnGlobe (lat, lon, m_LatLonSortedList[R]->GetLat(), end);

             if (NStr::IsBlank (country) || closest > dist) {

                 country = m_LatLonSortedList[R]->GetCountry();

                 closest = dist;

                 const CCountryExtreme * ext = x_FindCountryExtreme(country);

                 if (ext) {

                     smallest_area = ext->GetArea();

                 }

             } else if (closest == dist) {

                 // if the distances are the same, prefer the input country, otherwise prefer the smaller region

                 if (NStr::Equal(country, comp_country)) {

                     // keep country we're searching for

                 } else if (!NStr::Equal(m_LatLonSortedList[R]->GetCountry(), country)) {

                     const CCountryExtreme * ext = x_FindCountryExtreme(m_LatLonSortedList[R]->GetCountry());

                     if (ext

                         && (ext->GetArea() < smallest_area

                             || NStr::Equal(m_LatLonSortedList[R]->GetCountry(), comp_country))) {

                         country = m_LatLonSortedList[R]->GetCountry();

                         smallest_area = ext->GetArea();

                     }

                 }

             }

         }

         R++;

     }

     distance = closest;

     return NStr::Equal(country, comp_country);

 }


 const CCountryExtreme * CLatLonCountryMap::IsNearLatLon(double lat, double lon,

                                                         double range,

                                                         double &distance,

                                                         const string& country,

                                                         const string& province) const

 {

     int x = CCountryLine::ConvertLon(lon, m_Scale);

     int y = CCountryLine::ConvertLat(lat, m_Scale);

     double closest = -1.0;

     int maxDelta = (int) (range * m_Scale + EPSILON);

     int min_y = y - maxDelta;

     int max_y = y + maxDelta;

     int min_x = x - maxDelta;

     int max_x = x + maxDelta;

     CCountryExtreme *ext = NULL;


     // binary search to lowest lat

     size_t R = x_GetLatStartIndex(min_y);


     while (R < m_LatLonSortedList.size() && m_LatLonSortedList[R]->GetY() <= max_y) {

         if (m_LatLonSortedList[R]->GetMaxX() < min_x || m_LatLonSortedList[R]->GetMinX() > max_x) {

             // out of range, don't bother calculating distance

         } else if (!NStr::EqualNocase(m_LatLonSortedList[R]->GetBlock()->GetLevel0(), country)) {

             // wrong country, skip

         } else if (!NStr::IsBlank(province) && !NStr::EqualNocase(m_LatLonSortedList[R]->GetBlock()->GetLevel1(), province)) {

             // wrong province, skip

         } else {

             double end;

             if (x < m_LatLonSortedList[R]->GetMinX()) {

                 end = m_LatLonSortedList[R]->GetMinLon();

             } else if (x > m_LatLonSortedList[R]->GetMaxX()) {

                 end = m_LatLonSortedList[R]->GetMaxLon();

             } else {

                 end = lon;

             }

             double dist = DistanceOnGlobe (lat, lon, m_LatLonSortedList[R]->GetLat(), end);

             if (closest < 0.0 ||  closest > dist) {

                 closest = dist;

                 ext = m_LatLonSortedList[R]->GetBlock();

             }

         }

         R++;

     }

     distance = closest;

     return ext;

 }


 bool CLatLonCountryMap::DoCountryBoxesOverlap(const string& country1,

                                               const string& country2) const

 {

     if (NStr::IsBlank (country1) || NStr::IsBlank(country2)) return false;


     const CCountryExtreme *ext1 = x_FindCountryExtreme (country1);

     if (!ext1) {

         return false;

     }

     const CCountryExtreme *ext2 = x_FindCountryExtreme (country2);

     if (!ext2) {

         return false;

     }


     return ext1->DoesOverlap(ext2);

 }


 int CLatLonCountryMap::AdjustAndRoundDistance (double distance, double scale)


 {

   if (scale < 1.1) {

     distance += 111.19;

   } else if (scale > 19.5 && scale < 20.5) {

     distance += 5.56;

   } else if (scale > 99.5 && scale < 100.5) {

     distance += 1.11;

   }


   return (int) (distance + 0.5);

 }


 int CLatLonCountryMap::AdjustAndRoundDistance (double distance) const


 {

   return AdjustAndRoundDistance (distance, m_Scale);

 }


 END_objects_SCOPE // namespace ncbi::objects::


 END_NCBI_SCOPE


 /* Original file checksum: lines: 65, chars: 1891, CRC32: 7724f0c5 */

EPSILON
#define EPSILON
Definition: SubSource.cpp:5256

CONST_PI
#define CONST_PI
Definition: SubSource.cpp:5944

s_ProcessCellLineLine
static void s_ProcessCellLineLine(const CTempString &line)
Definition: SubSource.cpp:2921

TParishMapEntry
SStaticPair< const char *, const char * > TParishMapEntry
Definition: SubSource.cpp:4144

s_Null_Countries
static const char *const s_Null_Countries[]
Definition: SubSource.cpp:3287

s_InsertSpacesBetweenTokens
static string s_InsertSpacesBetweenTokens(const string &old_str)
Definition: SubSource.cpp:1350

ErrorDistance
double ErrorDistance(double latA, double lonA, double scale)
Definition: SubSource.cpp:5981

s_CellLineContaminationMap
static TCellLineContaminationMap s_CellLineContaminationMap
Definition: SubSource.cpp:2915

s_AddOneDataFile
void s_AddOneDataFile(const string &file_name, const string &data_name, const char **built_in, size_t num_built_in, TQualFixMap &qual_map)
Definition: SubSource.cpp:4904

s_NormalizeTokens
static string s_NormalizeTokens(vector< string > &tokens, vector< double > &numbers, vector< string > &anum, vector< int > &precision, vector< string > &lat_long, vector< string > &nsew)
Definition: SubSource.cpp:1433

TQualFixMap
map< string, string, PNocase > TQualFixMap
Definition: SubSource.cpp:4889

DEFINE_STATIC_ARRAY_MAP
DEFINE_STATIC_ARRAY_MAP(TWaterPairMap, sc_WaterPairMap, k_water_pair_map)

s_InitializeCellLineContaminationMap
static void s_InitializeCellLineContaminationMap(void)
Definition: SubSource.cpp:2935

k_NumLatLonCountryText
static const size_t k_NumLatLonCountryText
Definition: SubSource.cpp:5526

TWaterPairMap
CStaticArrayMap< const char *, const char *, PNocase_CStr > TWaterPairMap
Definition: SubSource.cpp:2087

sm_ValidSexQualifierTokens
const char * sm_ValidSexQualifierTokens[]
Definition: SubSource.cpp:2447

TCellLineContaminationMap
map< string, TSpeciesContaminant > TCellLineContaminationMap
Definition: SubSource.cpp:2913

s_DoUSAStateCleanup
CCountries::EStateCleanup s_DoUSAStateCleanup(string &country)
Definition: SubSource.cpp:4450

TStateMapEntry
SStaticPair< const char *, const char * > TStateMapEntry
Definition: SubSource.cpp:4295

s_Former_CountriesSet
static const TCStrSet s_Former_CountriesSet(s_Former_Countries, sizeof(s_Former_Countries), __FILE__, __LINE__)

exception_map
static CCountries::TUsaExceptionMap exception_map
Definition: SubSource.cpp:4574

DegreesToRadians
static double DegreesToRadians(double degrees)
Definition: SubSource.cpp:5946

s_ReplaceableCultureNotes
static const char * s_ReplaceableCultureNotes[]
Definition: SubSource.cpp:5163

s_IsolationSourceMap
static TQualFixMap s_IsolationSourceMap
Definition: SubSource.cpp:4891

s_CellLineContaminationMapInitialized
static bool s_CellLineContaminationMapInitialized
Definition: SubSource.cpp:2916

s_InitializeQualMaps
static void s_InitializeQualMaps(void)
Definition: SubSource.cpp:4940

s_Null_CountriesSet
static const TCStrSet s_Null_CountriesSet(s_Null_Countries, sizeof(s_Null_Countries), __FILE__, __LINE__)

s_IsState
bool s_IsState(string &state, bool &modified)
Definition: SubSource.cpp:4413

s_FailsGenusOrSpeciesTest
static bool s_FailsGenusOrSpeciesTest(const string &value, const string &taxname)
Definition: SubSource.cpp:2776

s_ShortenLatLon
string s_ShortenLatLon(string &subname)
Definition: SubSource.cpp:1811

s_IsParish
bool s_IsParish(string &parish)
Definition: SubSource.cpp:4279

k_water_pair_map
static const TWaterPairElem k_water_pair_map[]
Definition: SubSource.cpp:2008

s_map_subregion_fixes
static const SStaticPair< const char *, const char * > s_map_subregion_fixes[]
Definition: SubSource.cpp:3738

TCStringPairsMap
CStaticPairArrayMap< const char *, const char *, PCase_CStr > TCStringPairsMap
Definition: SubSource.cpp:3425

s_init_UseGeoLocNameForCountry
static bool s_init_UseGeoLocNameForCountry(void)
Definition: SubSource.cpp:68

s_ProcessQualMapLine
static void s_ProcessQualMapLine(const CTempString &line, TQualFixMap &qual_map)
Definition: SubSource.cpp:4894

TCStrSet
CStaticArraySet< const char *, PCase_CStr > TCStrSet
Definition: SubSource.cpp:558

s_map_old_country_name_fixes
static const SStaticPair< const char *, const char * > s_map_old_country_name_fixes[]
Definition: SubSource.cpp:3731

kCellTypePairs
static const TStaticQualFixPair kCellTypePairs[]
Definition: SubSource.cpp:4867

kDevStagePairs
static const TStaticQualFixPair kDevStagePairs[]
Definition: SubSource.cpp:4845

s_RemoveSpacesWithinNumbers
static string s_RemoveSpacesWithinNumbers(const string &old_str)
Definition: SubSource.cpp:1383

s_QualFixupMapsInitialized
static bool s_QualFixupMapsInitialized
Definition: SubSource.cpp:4892

s_RemovableCultureNotes
static const char * s_RemovableCultureNotes[]
Definition: SubSource.cpp:5128

TParishMap
CStaticPairArrayMap< const char *, const char *, PNocase_CStr > TParishMap
Definition: SubSource.cpp:4276

x_FindSurroundingOcean
static string x_FindSurroundingOcean(string &water)
Definition: SubSource.cpp:2090

TWaterPairElem
SStaticPair< const char *, const char * > TWaterPairElem
Definition: SubSource.cpp:2007

EARTH_RADIUS
#define EARTH_RADIUS
Definition: SubSource.cpp:5943

DEFINE_STATIC_FAST_MUTEX
DEFINE_STATIC_FAST_MUTEX(s_CellLineContaminationMutex)

exceptions_initialized
static bool exceptions_initialized
Definition: SubSource.cpp:4575

TStaticQualFixMap
CStaticPairArrayMap< const char *, const char *, PNocase_CStr > TStaticQualFixMap
Definition: SubSource.cpp:4843

s_GetLatLong
static void s_GetLatLong(const string &new_str, vector< double > &numbers, vector< int > &precision)
Definition: SubSource.cpp:1641

s_SuppressCountryFix
bool s_SuppressCountryFix(const string &test)
Definition: SubSource.cpp:4035

s_IsNumber
static bool s_IsNumber(const string &token, double *result=NULL)
Definition: SubSource.cpp:1420

parish_abbrev_array
static const TParishMapEntry parish_abbrev_array[]
Definition: SubSource.cpp:4145

s_ChooseMonthAndDay
bool s_ChooseMonthAndDay(const string &token1, const string &token2, bool month_first, string &month, int &day, bool &month_ambiguous)
Definition: SubSource.cpp:919

s_CountriesSet
static const TCStrSet s_CountriesSet(s_Countries, sizeof(s_Countries), __FILE__, __LINE__)

sm_ValidSexQualifierPhrases
const char * sm_ValidSexQualifierPhrases[]
Definition: SubSource.cpp:2467

s_Countries
static const char *const s_Countries[]
Definition: SubSource.cpp:2980

TSpeciesContaminant
map< string, TContaminatingCellLine > TSpeciesContaminant
Definition: SubSource.cpp:2912

TNCBITSVStream
CRowReader< CRowReaderStream_NCBI_TSV > TNCBITSVStream
Definition: SubSource.cpp:4572

s_map_country_name_fixes
static const SStaticPair< const char *, const char * > s_map_country_name_fixes[]
Definition: SubSource.cpp:3428

s_IsValidSexQualifierPhrase
bool s_IsValidSexQualifierPhrase(const string &value)
Definition: SubSource.cpp:2473

s_CompressRunsOfSpaces
bool s_CompressRunsOfSpaces(string &val)
Definition: SubSource.cpp:4097

s_map_whole_country_fixes
static const SStaticPair< const char *, const char * > s_map_whole_country_fixes[]
Definition: SubSource.cpp:3419

s_ReorderNorthSouthEastWest
static void s_ReorderNorthSouthEastWest(vector< double > &numbers, vector< int > &precision, const vector< string > &lat_long, vector< string > &nsew)
Definition: SubSource.cpp:1562

s_USAStates
static const char * s_USAStates[]
Definition: SubSource.cpp:3797

TStaticQualFixPair
SStaticPair< const char *, const char * > TStaticQualFixPair
Definition: SubSource.cpp:4842

TContaminatingCellLine
pair< string, string > TContaminatingCellLine
Definition: SubSource.cpp:2911

MAKE_CONST_SET
MAKE_CONST_SET(s_Null_CollectionDatesSet, ct::tagStrCase, { "missing", "missing: control sample", "missing: data agreement established pre-2023", "missing: endangered species", "missing: human-identifiable", "missing: lab stock", "missing: sample group", "missing: synthetic construct", "missing: third party data", "not applicable", "not collected", "not provided", "restricted access", }) string CSubSource
Definition: SubSource.cpp:561

TStateMap
CStaticPairArrayMap< const char *, const char *, PNocase_CStr > TStateMap
Definition: SubSource.cpp:4410

s_CollectNumberAndUnits
void s_CollectNumberAndUnits(const string &value, string &number, string &units)
Definition: SubSource.cpp:2593

s_ContainsWholeWord
bool s_ContainsWholeWord(const CTempString test, const CTempString word, NStr::ECase case_sense)
Definition: SubSource.cpp:4015

DistanceOnGlobe
static double DistanceOnGlobe(double latA, double lonA, double latB, double lonB)
Definition: SubSource.cpp:5954

s_Former_Countries
static const char *const s_Former_Countries[]
Definition: SubSource.cpp:3265

state_abbrev_array
static const TStateMapEntry state_abbrev_array[]
Definition: SubSource.cpp:4296

k_NumLatLonWaterText
static const size_t k_NumLatLonWaterText
Definition: SubSource.cpp:5529

SubSource.hpp

CCountries::x_Tokenize
static vector< string > x_Tokenize(const string &val)
Definition: SubSource.cpp:3983

CCountries::NewFixCountry
static string NewFixCountry(const string &input, bool us_territories=false)
Definition: SubSource.cpp:4664

CCountries::WasValid
static bool WasValid(const string &country)
Definition: SubSource.cpp:3377

CCountries::USAStateCleanup
static string USAStateCleanup(const string &country)
Definition: SubSource.cpp:4656

CCountries::WholeCountryFix
static string WholeCountryFix(string country)
Definition: SubSource.cpp:3861

CCountries::x_RemoveDelimitersFromEnds
static void x_RemoveDelimitersFromEnds(string &val, bool except_paren=false)
Definition: SubSource.cpp:3933

CCountries::IsValid
static bool IsValid(const string &country)
Definition: SubSource.cpp:3304

CCountries::ContainsMultipleCountryNames
static bool ContainsMultipleCountryNames(const string &phrase)
Definition: SubSource.cpp:3901

CCountries::EStateCleanup
EStateCleanup
Definition: SubSource.hpp:286

CCountries::e_Missing
@ e_Missing
Definition: SubSource.hpp:291

CCountries::e_NotUSA
@ e_NotUSA
Definition: SubSource.hpp:292

CCountries::e_NoResult
@ e_NoResult
Definition: SubSource.hpp:287

CCountries::e_Ambiguous
@ e_Ambiguous
Definition: SubSource.hpp:290

CCountries::e_Valid
@ e_Valid
Definition: SubSource.hpp:288

CCountries::e_Corrected
@ e_Corrected
Definition: SubSource.hpp:289

CCountries::IsSubstringOfStringInList
static bool IsSubstringOfStringInList(const string &phrase, const string &country1, size_t pos1)
Definition: SubSource.cpp:3881

CCountries::x_FindCountryName
static void x_FindCountryName(const TCStringPairsMap &fix_map, const vector< string > &countries, string &valid_country, string &orig_valid_country, bool &too_many_countries, bool &bad_cap)
Definition: SubSource.cpp:4047

CCountries::ReadUSAExceptionMap
static void ReadUSAExceptionMap(TUsaExceptionMap &exceptions, const string &filepath)
Definition: SubSource.cpp:4577

CCountries::ChangeExtraColonsToCommas
static bool ChangeExtraColonsToCommas(string &country)
Definition: SubSource.cpp:4794

CCountries::CapitalizeFirstLetterOfEveryWord
static string CapitalizeFirstLetterOfEveryWord(const string &phrase)
Definition: SubSource.cpp:3851

CCountries::CountryFixupItem
static string CountryFixupItem(const string &input, bool capitalize_after_colon)
Definition: SubSource.cpp:4812

CCountries::GetCorrectedCountryCapitalization
static string GetCorrectedCountryCapitalization(const string &country)
Definition: SubSource.cpp:3921

CCountries::LoadUSAExceptionMap
static void LoadUSAExceptionMap(const TUsaExceptionMap &exceptions)
Definition: SubSource.cpp:4592

CCountryExtreme
Definition: SubSource.hpp:330

CCountryExtreme::AddLine
void AddLine(const CCountryLine *line)
Definition: SubSource.cpp:5381

CCountryExtreme::SetMinX
bool SetMinX(int min_x)
Definition: SubSource.cpp:5337

CCountryExtreme::SetMaxY
bool SetMaxY(int max_y)
Definition: SubSource.cpp:5370

CCountryExtreme::m_Level0
string m_Level0
Definition: SubSource.hpp:353

CCountryExtreme::DoesOverlap
bool DoesOverlap(const CCountryExtreme *other_block) const
Definition: SubSource.cpp:5393

CCountryExtreme::m_Level1
string m_Level1
Definition: SubSource.hpp:354

CCountryExtreme::GetMinX
int GetMinX(void) const
Definition: SubSource.hpp:338

CCountryExtreme::GetLevel0
string GetLevel0(void) const
Definition: SubSource.hpp:336

CCountryExtreme::CCountryExtreme
CCountryExtreme(const string &country_name, int min_x, int min_y, int max_x, int max_y)
Definition: SubSource.cpp:5313

CCountryExtreme::GetCountry
string GetCountry(void) const
Definition: SubSource.hpp:335

CCountryExtreme::m_Area
int m_Area
Definition: SubSource.hpp:359

CCountryExtreme::m_MinY
int m_MinY
Definition: SubSource.hpp:356

CCountryExtreme::m_MinX
int m_MinX
Definition: SubSource.hpp:355

CCountryExtreme::m_MaxX
int m_MaxX
Definition: SubSource.hpp:357

CCountryExtreme::GetArea
int GetArea(void) const
Definition: SubSource.hpp:342

CCountryExtreme::GetMaxX
int GetMaxX(void) const
Definition: SubSource.hpp:340

CCountryExtreme::GetMaxY
int GetMaxY(void) const
Definition: SubSource.hpp:341

CCountryExtreme::GetMinY
int GetMinY(void) const
Definition: SubSource.hpp:339

CCountryExtreme::SetMaxX
bool SetMaxX(int max_x)
Definition: SubSource.cpp:5348

CCountryExtreme::~CCountryExtreme
~CCountryExtreme(void)
Definition: SubSource.cpp:5331

CCountryExtreme::SetMinY
bool SetMinY(int min_y)
Definition: SubSource.cpp:5359

CCountryExtreme::PreferTo
bool PreferTo(const CCountryExtreme *other_block, const string country, const string province, const bool prefer_new) const
Definition: SubSource.cpp:5413

CCountryExtreme::m_MaxY
int m_MaxY
Definition: SubSource.hpp:358

CCountryExtreme::GetLevel1
string GetLevel1(void) const
Definition: SubSource.hpp:337

CCountryLine
Definition: SubSource.hpp:364

CCountryLine::m_MaxX
int m_MaxX
Definition: SubSource.hpp:391

CCountryLine::~CCountryLine
~CCountryLine(void)
Definition: SubSource.cpp:5251

CCountryLine::ConvertLat
static int ConvertLat(double y, double scale)
Definition: SubSource.cpp:5258

CCountryLine::GetMaxX
int GetMaxX(void) const
Definition: SubSource.hpp:375

CCountryLine::m_Y
int m_Y
Definition: SubSource.hpp:389

CCountryLine::GetMinX
int GetMinX(void) const
Definition: SubSource.hpp:374

CCountryLine::GetY
int GetY(void) const
Definition: SubSource.hpp:373

CCountryLine::x_ConvertLat
int x_ConvertLat(double y)
Definition: SubSource.cpp:5280

CCountryLine::m_Scale
double m_Scale
Definition: SubSource.hpp:392

CCountryLine::CCountryLine
CCountryLine(const string &country_name, double y, double min_x, double max_x, double scale)
Definition: SubSource.cpp:5240

CCountryLine::x_ConvertLon
int x_ConvertLon(double x)
Definition: SubSource.cpp:5307

CCountryLine::ConvertLon
static int ConvertLon(double x, double scale)
Definition: SubSource.cpp:5285

CCountryLine::m_MinX
int m_MinX
Definition: SubSource.hpp:390

CDate
Definition: Date.hpp:53

CDate::Compare
ECompare Compare(const CDate &date) const
Definition: Date.cpp:83

CDate::eCompare_before
@ eCompare_before
*this comes first.
Definition: Date.hpp:74

CDate::eCompare_after
@ eCompare_after
*this comes second.
Definition: Date.hpp:76

CException
Definition: ncbiexpt.hpp:877

CGuard
Definition: guard.hpp:118

CLatLonCountryId
Definition: SubSource.hpp:397

CLatLonCountryId::GetClaimedDistance
int GetClaimedDistance(void) const
Definition: SubSource.hpp:429

CLatLonCountryId::GetClosestProvince
string GetClosestProvince(void) const
Definition: SubSource.hpp:418

CLatLonCountryId::SetFullGuess
void SetFullGuess(string guess)
Definition: SubSource.hpp:407

CLatLonCountryId::~CLatLonCountryId
~CLatLonCountryId(void)
Definition: SubSource.cpp:5520

CLatLonCountryId::GetClaimedFull
string GetClaimedFull(void) const
Definition: SubSource.hpp:422

CLatLonCountryId::GetClosestWater
string GetClosestWater(void) const
Definition: SubSource.hpp:420

CLatLonCountryId::SetGuessProvince
void SetGuessProvince(string guess)
Definition: SubSource.hpp:411

CLatLonCountryId::fProvinceClosest
@ fProvinceClosest
Definition: SubSource.hpp:439

CLatLonCountryId::fWaterMatch
@ fWaterMatch
Definition: SubSource.hpp:436

CLatLonCountryId::fCountryMatch
@ fCountryMatch
Definition: SubSource.hpp:434

CLatLonCountryId::fWaterClosest
@ fWaterClosest
Definition: SubSource.hpp:440

CLatLonCountryId::fCountryClosest
@ fCountryClosest
Definition: SubSource.hpp:438

CLatLonCountryId::fProvinceMatch
@ fProvinceMatch
Definition: SubSource.hpp:435

CLatLonCountryId::CLatLonCountryId
CLatLonCountryId(float lat, float lon)
Definition: SubSource.cpp:5452

CLatLonCountryId::TClassificationFlags
int TClassificationFlags
Bitwise OR of "EClassificationFlags".
Definition: SubSource.hpp:442

CLatLonCountryId::GetGuessCountry
string GetGuessCountry(void) const
Definition: SubSource.hpp:408

CLatLonCountryId::GetGuessWater
string GetGuessWater(void) const
Definition: SubSource.hpp:412

CLatLonCountryId::Classify
CLatLonCountryId::TClassificationFlags Classify(string country, string province)
Definition: SubSource.cpp:5461

CLatLonCountryId::GetClosestFull
string GetClosestFull(void) const
Definition: SubSource.hpp:414

CLatLonCountryId::GetLandDistance
int GetLandDistance(void) const
Definition: SubSource.hpp:425

CLatLonCountryId::GetClosestCountry
string GetClosestCountry(void) const
Definition: SubSource.hpp:416

CLatLonCountryId::GetGuessProvince
string GetGuessProvince(void) const
Definition: SubSource.hpp:410

CLatLonCountryId::SetGuessCountry
void SetGuessCountry(string guess)
Definition: SubSource.hpp:409

CLatLonCountryMap
Definition: SubSource.hpp:465

CLatLonCountryMap::x_FindCountryExtreme
const CCountryExtreme * x_FindCountryExtreme(const string &country) const
Definition: SubSource.cpp:5850

CLatLonCountryMap::CLatLonCountryMap
CLatLonCountryMap(bool is_water)
Definition: SubSource.cpp:5697

CLatLonCountryMap::~CLatLonCountryMap
~CLatLonCountryMap(void)
Definition: SubSource.cpp:5777

CLatLonCountryMap::x_GetLatStartIndex
size_t x_GetLatStartIndex(int y) const
Definition: SubSource.cpp:5885

CLatLonCountryMap::s_CompareTwoLinesByLatLonThenCountry
static bool s_CompareTwoLinesByLatLonThenCountry(const CCountryLine *line1, const CCountryLine *line2)
Definition: SubSource.cpp:5671

CLatLonCountryMap::s_CompareTwoLinesByCountry
static bool s_CompareTwoLinesByCountry(const CCountryLine *line1, const CCountryLine *line2)
Definition: SubSource.cpp:5656

CLatLonCountryMap::m_CountryLineList
TCountryLineList m_CountryLineList
Definition: SubSource.hpp:513

CLatLonCountryMap::AdjustAndRoundDistance
static int AdjustAndRoundDistance(double distance, double scale)
Definition: SubSource.cpp:6186

CLatLonCountryMap::s_CompareTwoLinesByLatLonOnly
static bool s_CompareTwoLinesByLatLonOnly(const CCountryLine *line1, const CCountryLine *line2)
Definition: SubSource.cpp:5638

CLatLonCountryMap::IsNearLatLon
const CCountryExtreme * IsNearLatLon(double lat, double lon, double range, double &distance, const string &country, const string &province=kEmptyStr) const
Definition: SubSource.cpp:6116

CLatLonCountryMap::fFlip
@ fFlip
Definition: SubSource.hpp:489

CLatLonCountryMap::fNone
@ fNone
Definition: SubSource.hpp:488

CLatLonCountryMap::fNegateLat
@ fNegateLat
Definition: SubSource.hpp:490

CLatLonCountryMap::fNegateLon
@ fNegateLon
Definition: SubSource.hpp:491

CLatLonCountryMap::DoCountryBoxesOverlap
bool DoCountryBoxesOverlap(const string &country1, const string &country2) const
Definition: SubSource.cpp:6167

CLatLonCountryMap::FindClosestToLatLon
const CCountryExtreme * FindClosestToLatLon(double lat, double lon, double range, double &distance)
Definition: SubSource.cpp:6006

CLatLonCountryMap::TLatLonAdjustFlags
int TLatLonAdjustFlags
Bitwise OR of "ELatLonAdjustFlags".
Definition: SubSource.hpp:493

CLatLonCountryMap::GuessRegionForLatLon
const CCountryExtreme * GuessRegionForLatLon(double lat, double lon, const string &country=kEmptyStr, const string &province=kEmptyStr) const
Definition: SubSource.cpp:5912

CLatLonCountryMap::IsCountryInLatLon
bool IsCountryInLatLon(const string &country, double lat, double lon) const
Definition: SubSource.cpp:5795

CLatLonCountryMap::m_Scale
double m_Scale
Definition: SubSource.hpp:515

CLatLonCountryMap::x_InitFromFile
bool x_InitFromFile(const string &filename)
Definition: SubSource.cpp:5565

CLatLonCountryMap::HaveLatLonForRegion
bool HaveLatLonForRegion(const string &country) const
Definition: SubSource.cpp:5875

CLatLonCountryMap::m_CountryExtremes
TCountryExtremeList m_CountryExtremes
Definition: SubSource.hpp:519

CLatLonCountryMap::m_LatLonSortedList
TCountryLineList m_LatLonSortedList
Definition: SubSource.hpp:514

CLatLonCountryMap::IsClosestToLatLon
bool IsClosestToLatLon(const string &country, double lat, double lon, double range, double &distance) const
Definition: SubSource.cpp:6056

CLatLonCountryMap::x_InitFromDefaultList
void x_InitFromDefaultList(const char *const *list, int num)
Definition: SubSource.cpp:5531

CLatLonCountryMap::TCountryLineList
vector< CCountryLine * > TCountryLineList
Definition: SubSource.hpp:510

CNcbiApplication::Instance
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264

CNcbiEnvironment
CNcbiEnvironment –.
Definition: ncbienv.hpp:110

CNcbiIstrstream
Definition: ncbistre.hpp:171

CNcbiRegistry
CNcbiRegistry –.
Definition: ncbireg.hpp:913

COrgMod::FixHostCapitalization
static string FixHostCapitalization(const string &value)
Definition: OrgMod.cpp:965

CRef< CDate >

CRowReader
Callback style template to iterate over a row stream.
Definition: row_reader.hpp:358

CSerialException
Root class for all serialization exceptions.
Definition: exception.hpp:50

CStaticArrayMap
class CStaticArrayMap<> provides access to a static array in much the same way as CStaticArraySet<>,...
Definition: static_map.hpp:175

CStaticArrayMap::const_iterator
TBase::const_iterator const_iterator
Definition: static_map.hpp:179

CStaticArraySearchBase::find
const_iterator find(const key_type &key) const
Return a const_iterator pointing to the specified element, or to the end if the element is not found.
Definition: static_set.hpp:680

CStaticArraySearchBase::end
const_iterator end() const
Return the end of the controlled sequence.
Definition: static_set.hpp:647

CStaticArraySet
Definition: static_set.hpp:824

CStaticPairArrayMap
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
Definition: static_map.hpp:105

CStaticPairArrayMap::const_iterator
TBase::const_iterator const_iterator
Definition: static_map.hpp:109

CSubSource::IsISOFormatDate
static bool IsISOFormatDate(const string &orig_date)
Definition: SubSource.cpp:816

CSubSource::GetCollectionDateProblem
static string GetCollectionDateProblem(const string &date_string)

CSubSource::NCBI_UseGeoLocNameForCountry
static bool NCBI_UseGeoLocNameForCountry(void)
Definition: SubSource.cpp:94

CSubSource::FixTissueTypeCapitalization
static string FixTissueTypeCapitalization(const string &value)
Definition: SubSource.cpp:4983

CSubSource::FixLatLonPrecision
static string FixLatLonPrecision(const string &orig)
Definition: SubSource.cpp:1304

CSubSource::x_RemoveIsoTime
static string x_RemoveIsoTime(const string &orig_date)
Definition: SubSource.cpp:804

CSubSource::x_ParseDateRangeWithDelimiter
static string x_ParseDateRangeWithDelimiter(const string &orig_date, CTempString delim)
Definition: SubSource.cpp:596

CSubSource::FixSexQualifierValue
static string FixSexQualifierValue(const string &value)
Definition: SubSource.cpp:2527

CSubSource::IsISOFormatTime
static bool IsISOFormatTime(const string &orig_time, int &hour, int &min, int &sec, bool require_time_zone=true)
Definition: SubSource.cpp:644

CSubSource::eDateFormatFlag_bad_format
@ eDateFormatFlag_bad_format
Definition: SubSource.hpp:112

CSubSource::eDateFormatFlag_in_future
@ eDateFormatFlag_in_future
Definition: SubSource.hpp:113

CSubSource::eDateFormatFlag_ok
@ eDateFormatFlag_ok
Definition: SubSource.hpp:111

CSubSource::eDateFormatFlag_out_of_order
@ eDateFormatFlag_out_of_order
Definition: SubSource.hpp:114

CSubSource::GetSubtypeValue
static TSubtype GetSubtypeValue(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: SubSource.cpp:128

CSubSource::x_MeetsCommonChromosomeLinkageGroupPlasmidNameRules
static bool x_MeetsCommonChromosomeLinkageGroupPlasmidNameRules(const string &value, const string &taxname)
Definition: SubSource.cpp:2805

CSubSource::IsValidSubtypeName
static bool IsValidSubtypeName(const string &str, EVocabulary vocabulary=eVocabulary_raw)
Definition: SubSource.cpp:157

CSubSource::FixLatLonFormat
static string FixLatLonFormat(string orig_lat_lon, bool guess=false)
Definition: SubSource.cpp:1862

CSubSource::m_LatLonWaterMap
static unique_ptr< CLatLonCountryMap > m_LatLonWaterMap
Definition: SubSource.hpp:248

CSubSource::IsPlasmidNameValid
static bool IsPlasmidNameValid(const string &value, const string &taxname)
Definition: SubSource.cpp:2871

CSubSource::~CSubSource
~CSubSource(void)
Definition: SubSource.cpp:63

CSubSource::x_IsFixableIsoDate
static bool x_IsFixableIsoDate(const string &orig_date)
Definition: SubSource.cpp:779

CSubSource::GetDateFromISODate
static CRef< CDate > GetDateFromISODate(const string &orig_date)
Definition: SubSource.cpp:831

CSubSource::FixIsolationSourceCapitalization
static string FixIsolationSourceCapitalization(const string &value)
Definition: SubSource.cpp:4956

CSubSource::HasCultureNotes
static bool HasCultureNotes(const string &value)
Definition: SubSource.cpp:5175

CSubSource::IsValidSexQualifierValue
static bool IsValidSexQualifierValue(const string &value)
Definition: SubSource.cpp:2488

CSubSource::FixCellTypeCapitalization
static string FixCellTypeCapitalization(const string &value)
Definition: SubSource.cpp:4876

CSubSource::x_GetDateTokens
static vector< string > x_GetDateTokens(const string &orig_date)
Definition: SubSource.cpp:852

CSubSource::GetLabel
void GetLabel(string *str) const
Definition: SubSource.cpp:101

CSubSource::IsMultipleValuesAllowed
static bool IsMultipleValuesAllowed(TSubtype)
Definition: SubSource.cpp:208

CSubSource::ELatLonCountryErr
ELatLonCountryErr
Definition: SubSource.hpp:189

CSubSource::eLatLonCountryErr_None
@ eLatLonCountryErr_None
Definition: SubSource.hpp:190

CSubSource::eLatLonCountryErr_Value
@ eLatLonCountryErr_Value
Definition: SubSource.hpp:194

CSubSource::eLatLonCountryErr_State
@ eLatLonCountryErr_State
Definition: SubSource.hpp:192

CSubSource::eLatLonCountryErr_Water
@ eLatLonCountryErr_Water
Definition: SubSource.hpp:193

CSubSource::eLatLonCountryErr_Country
@ eLatLonCountryErr_Country
Definition: SubSource.hpp:191

CSubSource::x_CalculateLatLonId
static CLatLonCountryId * x_CalculateLatLonId(float lat_value, float lon_value, string country, string province)
Definition: SubSource.cpp:1912

CSubSource::IsISOFormatDateOnly
static bool IsISOFormatDateOnly(const string &date)
Definition: SubSource.cpp:739

CSubSource::IsDayValueOkForMonth
static bool IsDayValueOkForMonth(int day, int month, int year)
Determine whether day number could occur in month.
Definition: SubSource.cpp:266

CSubSource::IsAltitudeValid
static bool IsAltitudeValid(const string &value)
Definition: SubSource.cpp:2653

CSubSource::ValidateLatLonCountry
static string ValidateLatLonCountry(const string &countryname, string &lat_lon, bool check_state, ELatLonCountryErr &errcode)
Definition: SubSource.cpp:2101

CSubSource::FixDateFormat
static string FixDateFormat(const string &orig_date)
Attempt to fix the format of the date Returns a blank if the format of the date cannot be determined.
Definition: SubSource.cpp:620

CSubSource::AutoFix
void AutoFix()
Definition: SubSource.cpp:5103

CSubSource::CheckCellLine
static string CheckCellLine(const string &cell_line, const string &organism)
Definition: SubSource.cpp:2955

CSubSource::MakeLatLon
static string MakeLatLon(double lat_value, double lon_value, int lat_precision=2, int lon_precision=2)
Definition: SubSource.cpp:1890

CSubSource::FixCapitalization
void FixCapitalization()
Definition: SubSource.cpp:5049

CSubSource::EVocabulary
EVocabulary
Definition: SubSource.hpp:81

CSubSource::eVocabulary_insdc
@ eVocabulary_insdc
Definition: SubSource.hpp:83

CSubSource::IsCollectionDateAfterTime
static bool IsCollectionDateAfterTime(const string &collection_date, time_t t, bool &bad_format)
Definition: SubSource.cpp:414

CSubSource::CheckDateFormat
static size_t CheckDateFormat(const string &date_string)
Definition: SubSource.cpp:505

CSubSource::x_FormatWithPrecision
static string x_FormatWithPrecision(double val, int precision)
Definition: SubSource.cpp:2682

CSubSource::GetSubtypeName
static string GetSubtypeName(CSubSource::TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
Definition: SubSource.cpp:185

CSubSource::x_GetPrecision
static int x_GetPrecision(const string &num_str)
Definition: SubSource.cpp:2671

CSubSource::NeedsNoText
static bool NeedsNoText(const TSubtype &subtype)
Definition: SubSource.cpp:233

CSubSource::IsEndogenousVirusNameValid
static bool IsEndogenousVirusNameValid(const string &value)
Definition: SubSource.cpp:2753

CSubSource::IsChromosomeNameValid
static bool IsChromosomeNameValid(const string &value, const string &taxname)
Definition: SubSource.cpp:2846

CSubSource::x_GenericRepliconNameValid
static bool x_GenericRepliconNameValid(const string &value)
Definition: SubSource.cpp:2727

CSubSource::IsCorrectLatLonFormat
static void IsCorrectLatLonFormat(string lat_lon, bool &format_correct, bool &precision_correct, bool &lat_in_range, bool &lon_in_range, double &lat_value, double &lon_value)
Definition: SubSource.cpp:1237

CSubSource::DateFromCollectionDate
static CRef< CDate > DateFromCollectionDate(const string &str) THROWS((CException))
Definition: SubSource.cpp:287

CSubSource::IsSegmentValid
static bool IsSegmentValid(const string &value)
Definition: SubSource.cpp:2747

CSubSource::FixDevStageCapitalization
static string FixDevStageCapitalization(const string &value)
Definition: SubSource.cpp:4855

CSubSource::m_LatLonCountryMap
static unique_ptr< CLatLonCountryMap > m_LatLonCountryMap
Definition: SubSource.hpp:247

CSubSource::IsLinkageGroupNameValid
static bool IsLinkageGroupNameValid(const string &value, const string &taxname)
Definition: SubSource.cpp:2859

CSubSource::FixAltitude
static string FixAltitude(const string &value)
Definition: SubSource.cpp:2690

CSubSource::IsDiscouraged
static bool IsDiscouraged(const TSubtype subtype)
Definition: SubSource.cpp:247

CSubSource::RemoveCultureNotes
static void RemoveCultureNotes(string &value, bool is_species_level=true)
Definition: SubSource.cpp:5192

CSubSource::FixLabHostCapitalization
static string FixLabHostCapitalization(const string &value)
Definition: SubSource.cpp:5010

CSubSource::IsCorrectDateFormat
static void IsCorrectDateFormat(const string &date_string, bool &bad_format, bool &in_future)
Definition: SubSource.cpp:454

CSubSource::DetectDateFormat
static void DetectDateFormat(const string &orig_date, bool &ambiguous, bool &day_first)
Definition: SubSource.cpp:1176

CTempString
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65

CTimeException
CTimeException –.
Definition: ncbitime.hpp:2076

CTime
CTime –.
Definition: ncbitime.hpp:296

map_checker< std::map< string, string, PNocase > >::iterator
container_type::iterator iterator
Definition: map.hpp:54

map_checker::end
const_iterator end() const
Definition: map.hpp:152

map_checker::clear
void clear()
Definition: map.hpp:169

map_checker::find
const_iterator find(const key_type &key) const
Definition: map.hpp:153

map
Definition: map.hpp:338

set< string, PNocase_Conditional >

set::find
const_iterator find(const key_type &key) const
Definition: set.hpp:137

state
Definition: sls_alp.hpp:60

compile_time.hpp

flags
static uch flags
Definition: ct_nlmzip_trees.cpp:342

eMonth
@ eMonth
Definition: cuCdUpdateParameters.hpp:61

file_name
const char * file_name[]
Definition: dbapi_bcp_util.hpp:35

enumvalues.hpp

check_state
static void check_state(const char name[], prfunc print, int erc)
Definition: done_handling.c:80

test
#define test(a, b, c, d, e)
Definition: numeric.c:170

line1
static char line1[1024 *16]
Definition: t0016.c:98

line2
static char line2[1024 *16]
Definition: t0016.c:99

first
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46

check
#define check(s)
Definition: describecol2.c:21

precision
static char precision
Definition: genparams.c:28

output
static SQLCHAR output[256]
Definition: print.c:5

str
static const char * str(char *buf, int n)
Definition: stats.c:84

env
static HENV env
Definition: transaction2.c:38

tmp
static char tmp[3200]
Definition: utf8.c:42

CNcbiApplicationAPI::GetEnvironment
const CNcbiEnvironment & GetEnvironment(void) const
Get the application's cached environment.
Definition: ncbiapp_api.hpp:760

CNcbiApplicationAPI::GetConfig
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
Definition: ncbiapp_api.hpp:770

ArraySize
constexpr size_t ArraySize(const Element(&)[Size])
Definition: ncbimisc.hpp:1532

ITERATE
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815

NON_CONST_ITERATE
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822

swap
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508

string
string
Definition: cgiapp.hpp:687

NULL
#define NULL
Definition: ncbistd.hpp:225

ERR_POST_X
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550

ERR_POST
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186

NCBI_THROW
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704

NCBI_CATCH
#define NCBI_CATCH(message)
Catch CExceptions as well This macro is deprecated - use *_X or *_XX variant instead of it.
Definition: ncbiexpt.hpp:580

Warning
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191

THROWS
#define THROWS(x)
Definition: ncbiexpt.hpp:75

eUnknown
@ eUnknown
Definition: app_popup.hpp:72

ENUM_METHOD_NAME
#define ENUM_METHOD_NAME(EnumName)
Definition: serialbase.hpp:994

ILineReader::New
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
Definition: line_reader.cpp:49

ILineReader::AtEOF
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.

CRef::Empty
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719

IRegistry::GetString
virtual string GetString(const string &section, const string &name, const string &default_value, TFlags flags=0) const
Get the parameter string value.
Definition: ncbireg.cpp:321

END_NCBI_SCOPE
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103

BEGIN_NCBI_SCOPE
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100

CStringUTF8
std::string CStringUTF8
Definition: ncbistl.hpp:254

SIZE_TYPE
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132

NStr::DoubleToString
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5187

kEmptyStr
#define kEmptyStr
Definition: ncbistr.hpp:123

NStr::CompareNocase
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219

NStr::StringToInt
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630

NStr::Split
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461

NStr::FindNoCase
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993

NStr::EndsWith
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430

NStr::IsBlank
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106

NStr::StringToDouble
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
Definition: ncbistr.cpp:1387

NPOS
#define NPOS
Definition: ncbistr.hpp:133

TUnicodeSymbol
Uint4 TUnicodeSymbol
Unicode character.
Definition: ncbistr.hpp:141

NStr::TruncateSpacesInPlace
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201

NStr::IntToString
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084

CUtf8::GuessEncoding
static EEncoding GuessEncoding(const CTempString &src)
Guess the encoding of the C/C++ string.
Definition: ncbistr.cpp:6691

NStr::Find
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891

NStr::Join
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697

NStr::ParseEscapes
static string ParseEscapes(const CTempString str, EEscSeqRange mode=eEscSeqRange_Standard, char user_char='?')
Parse C-style escape sequences in the specified string.
Definition: ncbistr.cpp:4793

NStr::EqualCase
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
Definition: ncbistr.hpp:5325

CTempString::data
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:313

NStr::Replace
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3314

NStr::Compare
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
Definition: ncbistr.hpp:5297

CUtf8::AsUTF8
static CStringUTF8 AsUTF8(const CTempString &src, EEncoding encoding, EValidate validate=eNoValidate)
Convert into UTF8 from a C/C++ string.
Definition: ncbistr.hpp:3889

NStr::StartsWith
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412

NStr::TrimSuffixInPlace
static void TrimSuffixInPlace(string &str, const CTempString suffix, ECase use_case=eCase)
Trim suffix from a string (in-place)
Definition: ncbistr.cpp:3278

NStr::SplitInTwo
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3554

CTempString::length
size_type length(void) const
Return the length of the represented array.
Definition: tempstr.hpp:320

NStr::Sanitize
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
Definition: ncbistr.hpp:2876

CUtf8::Decode
static TUnicodeSymbol Decode(const char *&src)
Convert sequence of UTF8 code units into Unicode code point.
Definition: ncbistr.hpp:5662

NStr::EqualNocase
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353

NStr::TrimPrefixInPlace
static void TrimPrefixInPlace(string &str, const CTempString prefix, ECase use_case=eCase)
Trim prefix from a string (in-place)
Definition: ncbistr.cpp:3242

NStr::ECase
ECase
Which type of string comparison.
Definition: ncbistr.hpp:1204

NStr::NumericToString
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673

CTempString::find
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
Definition: tempstr.hpp:655

NStr::Equal
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384

NStr::ReplaceInPlace
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3405

NStr::ToUpper
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424

NStr::TruncateSpaces
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3186

NStr::ToLower
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405

NStr::fConvErr_NoThrow
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285

NStr::fSplit_Tokenize
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508

NStr::eReverseSearch
@ eReverseSearch
Search in a backward direction.
Definition: ncbistr.hpp:1947

NStr::eTrunc_Both
@ eTrunc_Both
Truncate spaces at both begin and end of string.
Definition: ncbistr.hpp:2242

NStr::eTrunc_Begin
@ eTrunc_Begin
Truncate leading spaces only.
Definition: ncbistr.hpp:2240

NStr::eNocase
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206

CTime::DaysInMonth
int DaysInMonth(void) const
Get number of days in the month.
Definition: ncbitime.cpp:1198

CTime::GetTimeT
time_t GetTimeT(void) const
Get time in time_t format.
Definition: ncbitime.cpp:1395

CTime::MonthNameToNum
static int MonthNameToNum(const string &month)
Get numerical value of the month by name.
Definition: ncbitime.cpp:1211

CTime::MonthNumToName
static string MonthNumToName(int month, ENameFormat format=eFull)
Get name of the month by numerical value.
Definition: ncbitime.cpp:1229

CTime::eAbbr
@ eAbbr
Use abbreviated name.
Definition: ncbitime.hpp:319

CSubSource_Base::TSubtype
int TSubtype
Definition: SubSource_.hpp:135

CSubSource_Base::GetAttrib
const TAttrib & GetAttrib(void) const
Get the Attrib member data.
Definition: SubSource_.hpp:397

CSubSource_Base::GetSubtype
TSubtype GetSubtype(void) const
Get the Subtype member data.
Definition: SubSource_.hpp:310

CSubSource_Base::ESubtype
ESubtype
Definition: SubSource_.hpp:84

CSubSource_Base::IsSetSubtype
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: SubSource_.hpp:291

CSubSource_Base::ResetName
void ResetName(void)
Reset Name data member.
Definition: SubSource_.cpp:101

CSubSource_Base::SetName
TName & SetName(void)
Assign a value to Name data member.
Definition: SubSource_.hpp:373

CSubSource_Base::GetName
const TName & GetName(void) const
Get the Name member data.
Definition: SubSource_.hpp:350

CSubSource_Base::IsSetAttrib
bool IsSetAttrib(void) const
attribution/source of this name Check if a value has been assigned to Attrib data member.
Definition: SubSource_.hpp:385

CSubSource_Base::IsSetName
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
Definition: SubSource_.hpp:338

CSubSource_Base::eSubtype_transgenic
@ eSubtype_transgenic
Definition: SubSource_.hpp:110

CSubSource_Base::eSubtype_altitude
@ eSubtype_altitude
Definition: SubSource_.hpp:127

CSubSource_Base::eSubtype_dev_stage
@ eSubtype_dev_stage
Definition: SubSource_.hpp:96

CSubSource_Base::eSubtype_collection_date
@ eSubtype_collection_date
DD-MMM-YYYY format.
Definition: SubSource_.hpp:114

CSubSource_Base::eSubtype_cell_type
@ eSubtype_cell_type
Definition: SubSource_.hpp:93

CSubSource_Base::eSubtype_insertion_seq_name
@ eSubtype_insertion_seq_name
Definition: SubSource_.hpp:105

CSubSource_Base::eSubtype_transposon_name
@ eSubtype_transposon_name
Definition: SubSource_.hpp:104

CSubSource_Base::eSubtype_fwd_primer_seq
@ eSubtype_fwd_primer_seq
sequence (possibly more than one; semicolon-separated)
Definition: SubSource_.hpp:117

CSubSource_Base::eSubtype_subclone
@ eSubtype_subclone
Definition: SubSource_.hpp:88

CSubSource_Base::eSubtype_phenotype
@ eSubtype_phenotype
Definition: SubSource_.hpp:126

CSubSource_Base::eSubtype_country
@ eSubtype_country
Definition: SubSource_.hpp:107

CSubSource_Base::eSubtype_frequency
@ eSubtype_frequency
Definition: SubSource_.hpp:97

CSubSource_Base::eSubtype_rearranged
@ eSubtype_rearranged
Definition: SubSource_.hpp:99

CSubSource_Base::eSubtype_plastid_name
@ eSubtype_plastid_name
Definition: SubSource_.hpp:106

CSubSource_Base::eSubtype_clone
@ eSubtype_clone
Definition: SubSource_.hpp:87

CSubSource_Base::eSubtype_other
@ eSubtype_other
Definition: SubSource_.hpp:128

CSubSource_Base::eSubtype_lab_host
@ eSubtype_lab_host
Definition: SubSource_.hpp:100

CSubSource_Base::eSubtype_lat_lon
@ eSubtype_lat_lon
+/- decimal degrees
Definition: SubSource_.hpp:113

CSubSource_Base::eSubtype_sex
@ eSubtype_sex
Definition: SubSource_.hpp:91

CSubSource_Base::eSubtype_rev_primer_name
@ eSubtype_rev_primer_name
Definition: SubSource_.hpp:120

CSubSource_Base::eSubtype_metagenomic
@ eSubtype_metagenomic
Definition: SubSource_.hpp:121

CSubSource_Base::eSubtype_collected_by
@ eSubtype_collected_by
name of person who collected the sample
Definition: SubSource_.hpp:115

CSubSource_Base::eSubtype_segment
@ eSubtype_segment
Definition: SubSource_.hpp:108

CSubSource_Base::eSubtype_fwd_primer_name
@ eSubtype_fwd_primer_name
Definition: SubSource_.hpp:119

CSubSource_Base::eSubtype_rev_primer_seq
@ eSubtype_rev_primer_seq
sequence (possibly more than one; semicolon-separated)
Definition: SubSource_.hpp:118

CSubSource_Base::eSubtype_chromosome
@ eSubtype_chromosome
Definition: SubSource_.hpp:85

CSubSource_Base::eSubtype_plasmid_name
@ eSubtype_plasmid_name
Definition: SubSource_.hpp:103

CSubSource_Base::eSubtype_isolation_source
@ eSubtype_isolation_source
Definition: SubSource_.hpp:112

CSubSource_Base::eSubtype_tissue_type
@ eSubtype_tissue_type
Definition: SubSource_.hpp:94

CSubSource_Base::eSubtype_environmental_sample
@ eSubtype_environmental_sample
Definition: SubSource_.hpp:111

CSubSource_Base::eSubtype_identified_by
@ eSubtype_identified_by
name of person who identified the sample
Definition: SubSource_.hpp:116

CSubSource_Base::eSubtype_whole_replicon
@ eSubtype_whole_replicon
Definition: SubSource_.hpp:125

CSubSource_Base::eSubtype_germline
@ eSubtype_germline
Definition: SubSource_.hpp:98

CDate_std_Base::SetYear
void SetYear(TYear value)
Assign a value to Year data member.
Definition: Date_std_.hpp:435

CDate_std_Base::SetMonth
void SetMonth(TMonth value)
Assign a value to Month data member.
Definition: Date_std_.hpp:482

CDate_Base::SetStd
TStd & SetStd(void)
Select the variant.
Definition: Date_.cpp:115

CDate_std_Base::SetDay
void SetDay(TDay value)
Assign a value to Day data member.
Definition: Date_std_.hpp:529

int
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210

Note
where both of them are integers Note
Definition: introspection_html.hpp:354

orig
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
Definition: introspection_html.hpp:118

file
FILE * file
Definition: lex.newick.cpp:1337

input
static int input()
Definition: lex.newick.cpp:1147

i
int i
Definition: lex.newick.cpp:1456

len
int len
Definition: lex.newick.cpp:1450

subname
static char * subname
Definition: mdb_load.c:26

compile_time_bits::range
range(_Ty, _Ty) -> range< _Ty >

compile_time_bits::tagStrCase
std::integral_constant< ncbi::NStr::ECase, ncbi::NStr::eCase > tagStrCase
Definition: ct_string_cxx14.hpp:43

ct::sort
constexpr auto sort(_Init &&init)
Definition: compile_time.hpp:179

rapidjson::value
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227

fabs
#define fabs(v)
Definition: ncbi_dispd.c:46

a
unsigned int a
Definition: ncbi_localip.c:102

t
EIPRangeType t
Definition: ncbi_localip.c:101

ncbi_pch.hpp

isalpha
int isalpha(Uchar c)
Definition: ncbictype.hpp:61

isspace
int isspace(Uchar c)
Definition: ncbictype.hpp:69

isalnum
int isalnum(Uchar c)
Definition: ncbictype.hpp:62

isdigit
int isdigit(Uchar c)
Definition: ncbictype.hpp:64

toupper
int toupper(Uchar c)
Definition: ncbictype.hpp:73

isprint
int isprint(Uchar c)
Definition: ncbictype.hpp:67

ispunct
int ispunct(Uchar c)
Definition: ncbictype.hpp:68

eYear
@ eYear
Definition: ncbitime.cpp:2753

eDay
@ eDay
Definition: ncbitime.cpp:2755

ncbitime.hpp
Defines: CTimeFormat - storage class for time format.

max
T max(T x_, T y_)
Definition: njn_function.hpp:105

min
T min(T x_, T y_)
Definition: njn_function.hpp:107

r
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)

copy
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613

match
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
Definition: pcre_exec.c:513

number
static BOOL number
Definition: pcregrep.c:193

suffix
static const char * suffix[]
Definition: pcregrep.c:408

TFieldNo
Uint4 TFieldNo
Field number (zero based)
Definition: row_reader.hpp:53

row_reader_ncbi_tsv.hpp

sequence_util_macros.hpp
Generic utility macros and templates for exploring NCBI objects.

BEGIN_COMMA_END
#define BEGIN_COMMA_END(container)
Definition: sequence_util_macros.hpp:278

R
#define R(t)

row
#define row(bind, expected)
Definition: string_bind.c:73

R
Definition: netservice_api.cpp:185

SStaticPair
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60

cmp
Definition: document.cpp:114

error
Definition: sls_alp_data.hpp:88

type
Definition: type.c:6

eFiles::val
@ val

g
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44

result
else result
Definition: token2.c:20

g_FindDataFile
string g_FindDataFile(const CTempString &name, CDirEntry::EType type=CDirEntry::eFile)
Look for an NCBI application data file or directory of the given name and type; in general,...
Definition: util_misc.cpp:139

type_name
static const char * type_name(CS_INT value)
Definition: will_convert.c:122