73 if (name_std.IsSetInitials() && !
NStr::IsBlank(name_std.GetInitials())) {
74 author += name_std.GetInitials();
76 if (name_std.IsSetLast() && !
NStr::IsBlank(name_std.GetLast())) {
77 author += name_std.GetLast();
155 switch (pub.
Which()) {
209 for (
const auto& it : pubdesc.
GetPub().
Get()) {
221 for (
const auto& desc :
context.GetSeqdesc()) {
226 if (!title.empty()) {
238 if (m_Objs.empty()) {
241 for (
auto& it : m_Objs[
kEmptyStr].GetMap()) {
242 if (it.second->GetMap().size() > 1) {
243 string top =
"[n] articles have title [(]'" + it.first +
"'[)] but do not have the same author list";
244 for (
auto& aa : it.second->GetMap()) {
245 string label =
"[n] article[s] [has] title [(]'" + it.first +
"'[)] and author list [(]'" + aa.first +
"'";
246 for (
auto& obj: aa.second->GetObjects()) {
254 m_ReportItems = m_Objs.GetMap().cbegin()->second->Export(*this)->GetSubitems();
266 bool is_unpublished =
false;
268 is_unpublished =
true;
270 return is_unpublished;
276 bool is_unpublished =
false;
280 return is_unpublished;
286 bool is_unpublished =
false;
290 return is_unpublished;
296 bool is_unpublished =
false;
297 if (
proc.IsSetBook()) {
300 return is_unpublished;
306 bool is_unpublished =
false;
310 return is_unpublished;
316 bool is_unpublished =
false;
318 switch (pub.
Which()) {
321 is_unpublished =
true;
345 is_unpublished =
true;
354 return is_unpublished;
363 for (
const auto& it : pubdesc.
GetPub().
Get()) {
381 m_Objs[
"[n] unpublished pub[s] [has] no title"].Add(*
context.PubdescObjRef(*pubdesc)).Fatal();
396 }
else if (affil.
IsStd()) {
414 for (
const auto& it : pubdesc.
GetPub().
Get()) {
416 if (!it->GetSub().IsSetAuthors() ||
417 !it->GetSub().GetAuthors().IsSetAffil() ||
430 for (
auto& pubdesc :
context.GetPubdescs()) {
432 m_Objs[
"[n] citsub[s] [is] missing affiliation"].Add(*
context.PubdescObjRef(*pubdesc)).Fatal();
440 #define ADD_TO_AFFIL_SUMMARY(Fieldname) \
441 if(affil.IsSet##Fieldname() && !NStr::IsBlank(affil.Get##Fieldname())) { \
442 if (!NStr::IsBlank(rval)) { \
445 rval += affil.Get##Fieldname(); \
472 }
else if (affil.
IsStr()) {
486 for (
auto& authors :
context.GetAuthors()) {
488 if (pub && !pub->
IsSub()) {
492 if (authors->IsSetAffil()) {
493 const CAffil& affil = authors->GetAffil();
495 m_Objs[
"Affil"][affil.
GetStr()].Add(*repobj);
499 m_Objs[
"Country"][
kEmptyStr].Add(*repobj);
500 m_Objs[
"Street"][
kEmptyStr].Add(*repobj);
501 m_Objs[
"Postal_code"][
kEmptyStr].Add(*repobj);
503 else if (affil.
IsStd()) {
527 if (m_Objs.empty()) {
531 for (
const auto& it : m_Objs[
kSummaries].GetMap()) {
532 string two =
NStr::IsBlank(it.first) ?
"[*0*][n] Cit-sub[s] [has] no affiliation" :
"[*1*][n] CitSub[s] [has] affiliation " + it.first;
533 for (
auto& robj : m_Objs[
kSummaries][it.first].GetObjects()) {
538 #define REPORT_CITSUBAFFIL_CONFLICT(order, field, alias) \
539 if (m_Objs[#field].GetMap().size() > 1) {\
540 for (const auto& it : m_Objs[#field].GetMap()) {\
541 string two = "[n] affiliation[s] [has] "#alias" value '" + it.first + "'";\
542 for (auto& robj : m_Objs[#field][it.first].GetObjects()) {\
543 out[kCitSubSummary]["[*"#order"*]Affiliations have different values for "#alias][two].Ext().Add(*robj, false);\
554 m_ReportItems =
out.Export(*this)->GetSubitems();
566 m_Objs[ss.str()].Add(*
context.SubmitBlockObjRef());
573 if (m_Objs.GetMap().size() > 1) {
577 for (
auto& it : m_Objs.GetMap()) {
578 CReportNode& node = outout[
"[*" + to_string(
count++) +
"*][n] record[s] [has] identical submit-blocks"];
579 for (
auto& obj : it.second->GetObjects()) {
583 m_ReportItems =
out.Export(*this)->GetSubitems();
592 static const string msg =
"[n] publication[s]/submitter block[s] [has] consortium";
593 for (
auto& authors :
context.GetAuthors()) {
594 if (authors->IsSetNames() && authors->GetNames().IsStd()) {
596 for (
auto& auth :
names) {
597 if (auth->IsSetName() && auth->GetName().IsConsortium()) {
598 m_Objs[
msg].Add(*
context.AuthorsObjRef(*authors,
true));
605 m_Objs[
msg].Add(*
context.SubmitBlockObjRef(
true));
614 CAuth_list::C_Names::TStd::iterator it =
names.begin();
615 while (it !=
names.end()) {
616 if ((*it)->CanGetName() && (*it)->GetName().IsConsortium()) {
636 cout <<
"CONSORTIUM AUTOFIX: on seq_feat is not implemented\n";
640 for (
auto pub :
data) {
641 if (pub->IsSetAuthors()) {
664 for (
auto& authors :
context.GetAuthors()) {
665 if (authors->IsSetNames() && authors->GetNames().IsStd()) {
671 for (
auto& auth :
names) {
672 if (!auth->IsSetName() || (auth->GetName().IsName() &&
673 (!auth->GetName().GetName().CanGetFirst() || !auth->GetName().GetName().CanGetLast() || auth->GetName().GetName().GetFirst().empty() || auth->GetName().GetName().GetLast().empty()))) {
697 else if (pub.
IsSub()) {
708 size_t delimiter_pos = street.size() - tail.size() - 1;
709 if (
ispunct(street[delimiter_pos]) ||
isspace(street[delimiter_pos])) {
710 string university_of(
"University of");
711 university_of += street[delimiter_pos] + tail;
725 const string& street =
data.GetStreet();
726 if (
data.IsSetCountry()) {
729 if (!ret &&
data.IsSetPostal_code()) {
732 if (!ret &&
data.IsSetSub()) {
735 if (!ret &&
data.IsSetCity()) {
745 for (
auto& pubdesc :
context.GetPubdescs()) {
746 if (pubdesc->IsSetPub()) {
748 for (
auto& it : pubdesc->GetPub().Get()) {
757 m_Objs[
"[n] Cit-sub pubs have duplicate affil text"].Add(*
context.PubdescObjRef(*pubdesc,
true));
769 size_t off = street.size() - tail.size();
770 static const string kChina =
"China";
771 static const string kChinaPR =
"P.R. China";
773 off = street.size() - kChinaPR.size();
775 string new_street = street.substr(0, off);
788 string& street =
data.SetStreet();
789 if (
data.IsSetCountry()) {
792 if (!ret &&
data.IsSetPostal_code()) {
795 if (!ret &&
data.IsSetSub()) {
798 if (!ret &&
data.IsSetCity()) {
814 cout <<
"CITSUB_AFFIL_DUP_TEXT AUTOFIX on seq_feat -- coming soon!\n";
819 for (
auto pub :
data) {
846 {
"AR",
"Arkansas" },
848 {
"CA",
"California" },
852 {
"CO",
"Colorado" },
855 {
"CT",
"Connecticut" },
857 {
"DE",
"Delaware" },
865 {
"IL",
"Illinois" },
873 {
"KY",
"Kentucky" },
876 {
"LA",
"Louisiana" },
878 {
"MD",
"Maryland" },
879 {
"MA",
"Massachusetts" },
881 {
"MI",
"Michigan" },
883 {
"MN",
"Minnesota" },
885 {
"MS",
"Mississippi" },
887 {
"MO",
"Missouri" },
890 {
"NE",
"Nebraska" },
895 {
"NH",
"New Hampshire" },
896 {
"NJ",
"New Jersey" },
897 {
"NM",
"New Mexico" },
898 {
"NY",
"New York" },
899 {
"NC",
"North Carolina" },
901 {
"ND",
"North Dakota" },
904 {
"OK",
"Oklahoma" },
909 {
"PA",
"Pennsylvania" },
912 {
"PR",
"Puerto Rico" },
913 {
"RI",
"Rhode Island" },
914 {
"SC",
"South Carolina" },
916 {
"SD",
"South Dakota" },
918 {
"TN",
"Tennessee" },
924 {
"VA",
"Virginia" },
926 {
"WA",
"Washington" },
928 {
"WV",
"West Virginia" },
929 {
"WI",
"Wisconsin" },
951 for (
auto& pubdesc :
context.GetPubdescs()) {
952 if (pubdesc->IsSetPub()) {
955 for (
auto& it : pubdesc->GetPub().Get()) {
965 if (country ==
"USA") {
972 m_Objs[
"[n] cit-sub[s] [is] missing state abbreviations"].Add(*
context.PubdescObjRef(*pubdesc,
true));
1007 cout <<
"USA_STATE AUTOFIX on seq_feat -- coming soon!\n";
1012 for (
auto& pub :
data) {
1022 if (country ==
"USA") {
1042 static const string kIncorrectCap =
"[n] pub[s] [has] incorrect author capitalization";
1061 if (!name.empty()) {
1062 bool need_cap =
true;
1063 bool need_lower =
false;
1064 bool found_lower =
true;
1065 size_t len = name.size();
1066 for (
size_t i = 0;
i <
len; ++
i) {
1071 string::const_iterator start = name.cbegin() +
i;
1072 string::const_iterator end = start + 1;
1073 while (end != name.cend() && *end !=
' ') {
1076 string short_name(start, end);
1078 if (it == kShortNames.cend()) {
1082 i += it->
size() - 1;
1089 found_lower =
islower(name[
i]) != 0;
1096 if (need_lower && !found_lower) {
1107 if (!initials.empty()) {
1108 for (
auto& cur : initials) {
1121 for (
auto& authors :
context.GetAuthors()) {
1122 if (authors->IsSetNames() && authors->GetNames().IsStd()) {
1123 for (
auto& auth : authors->GetNames().GetStd()) {
1124 if (auth->IsSetName() && auth->GetName().IsName()) {
1125 const CName_std& name = auth->GetName().GetName();
1126 bool correct =
true;
1150 bool to_lower =
false;
1151 for (
auto& cur : name) {
1153 if (to_lower &&
isupper(cur)) {
1165 else if (apostroph && cur ==
'\'') {
1168 else if (cur ==
' ' || cur ==
'-') {
1182 for (
auto auth : auth_list->
SetNames().SetStd()) {
1183 if (auth->GetName().IsName()) {
1184 CName_std& name = auth->SetName().SetName();
1214 cout <<
"CHECK_AUTH_CAPS AUTOFIX on seq_feat -- coming soon!\n";
1218 if (authors->IsSetAuthors()) {
1227 if (subb->
SetCit().IsSetAuthors()) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@Affil.hpp User-defined methods of the data storage class.
@Auth_list.hpp User-defined methods of the data storage class.
@Name_std.hpp User-defined methods of the data storage class.
@Pubdesc.hpp User-defined methods of the data storage class.
static void Add(TReportObjectList &list, TReportObjectSet &hash, CReportObj &obj, bool unique=true)
namespace ncbi::objects::
Base class for all serializable objects.
const string & GetTitle(C_E::E_Choice type=C_E::e_not_set) const
If the internal list contains a title (of the specified type, if given), returns the corresponding st...
const_iterator find(const key_type &key) const
#define DISCREPANCY_AUTOFIX(name)
#define DISCREPANCY_CASE(name, type, group, descr)
#define DISCREPANCY_SUMMARIZE(name)
std::ofstream out("events_result.xml")
main entry point for tests
static const struct name_t names[]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
constexpr size_t ArraySize(const Element(&)[Size])
#define MSerial_AsnBinary
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
@ fSS_NoTruncate_Begin
Do not truncate leading spaces.
@ fSS_alnum
Check on isalnum()
@ eNocase
Case insensitive compare.
static const char label[]
bool IsProc(void) const
Check if variant Proc is selected.
const TTitle & GetTitle(void) const
Get the Title member data.
const TAuthors & GetAuthors(void) const
Get the Authors member data.
const TCit & GetCit(void) const
Get the Cit member data.
bool IsSetAffil(void) const
author affiliation Check if a value has been assigned to Affil data member.
bool IsSetAuthors(void) const
Check if a value has been assigned to Authors data member.
bool IsSetAuthors(void) const
authors (ANSI requires) Check if a value has been assigned to Authors data member.
const TJournal & GetJournal(void) const
Get the variant data.
void SetSub(const TSub &value)
Assign a value to Sub data member.
bool IsSetPrepub(void) const
Check if a value has been assigned to Prepub data member.
const TFrom & GetFrom(void) const
Get the From member data.
const TAuthors & GetAuthors(void) const
Get the Authors member data.
const TCit & GetCit(void) const
Get the Cit member data.
bool IsSetTitle(void) const
Title of book Check if a value has been assigned to Title data member.
const TAffil & GetAffil(void) const
Get the Affil member data.
bool IsSetStreet(void) const
street address, not ANSI Check if a value has been assigned to Street data member.
const TStr & GetStr(void) const
Get the variant data.
bool IsSetTitle(void) const
title of paper (ANSI requires) Check if a value has been assigned to Title data member.
const TTitle & GetTitle(void) const
Get the Title member data.
const TAuthors & GetAuthors(void) const
Get the Authors member data.
bool IsSetCity(void) const
Author Affiliation, City Check if a value has been assigned to City data member.
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
bool IsSetAuthors(void) const
not necessarily authors of the paper Check if a value has been assigned to Authors data member.
TPrepub GetPrepub(void) const
Get the Prepub member data.
const TStreet & GetStreet(void) const
Get the Street member data.
const TName & GetName(void) const
Get the Name member data.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
const TAffil & GetAffil(void) const
Get the Affil member data.
bool IsSetCit(void) const
anything, not parsable Check if a value has been assigned to Cit data member.
const TSub & GetSub(void) const
Get the Sub member data.
const TProc & GetProc(void) const
Get the variant data.
list< CRef< CAuthor > > TStd
bool IsStr(void) const
Check if variant Str is selected.
const TTitle & GetTitle(void) const
Get the Title member data.
bool CanGetAuthors(void) const
Check if it is safe to call GetAuthors method.
bool IsSetNames(void) const
Check if a value has been assigned to Names data member.
bool IsSetTitle(void) const
eg.
bool IsSetAuthors(void) const
author/inventor Check if a value has been assigned to Authors data member.
void SetNames(TNames &value)
Assign a value to Names data member.
const TAuthors & GetAuthors(void) const
Get the Authors member data.
bool CanGetNames(void) const
Check if it is safe to call GetNames method.
bool IsSetDiv(void) const
Author Affiliation, Division Check if a value has been assigned to Div data member.
const TCountry & GetCountry(void) const
Get the Country member data.
const TStd & GetStd(void) const
Get the variant data.
const TPostal_code & GetPostal_code(void) const
Get the Postal_code member data.
bool IsSetName(void) const
Author, Primary or Secondary Check if a value has been assigned to Name data member.
const TStr & GetStr(void) const
Get the variant data.
bool IsSetTitle(void) const
Check if a value has been assigned to Title data member.
bool IsBook(void) const
Check if variant Book is selected.
bool IsSetAuthors(void) const
authors Check if a value has been assigned to Authors data member.
bool IsJournal(void) const
Check if variant Journal is selected.
const TNames & GetNames(void) const
Get the Names member data.
bool IsSetCit(void) const
same fields as a book Check if a value has been assigned to Cit data member.
const TStd & GetStd(void) const
Get the variant data.
bool IsStd(void) const
Check if variant Std is selected.
const TAuthors & GetAuthors(void) const
Get the Authors member data.
bool IsSetPostal_code(void) const
Check if a value has been assigned to Postal_code data member.
bool IsSetCountry(void) const
Author Affiliation, Country Check if a value has been assigned to Country data member.
const TImp & GetImp(void) const
Get the Imp member data.
const TMl & GetMl(void) const
Get the variant data.
const TTitle & GetTitle(void) const
Get the Title member data.
const TCity & GetCity(void) const
Get the City member data.
bool IsSetAffil(void) const
Author Affiliation, Name Check if a value has been assigned to Affil data member.
const TDiv & GetDiv(void) const
Get the Div member data.
TStd & SetStd(void)
Select the variant.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetImp(void) const
Check if a value has been assigned to Imp data member.
bool IsSetSub(void) const
Author Affiliation, County Sub Check if a value has been assigned to Sub data member.
const TBook & GetBook(void) const
Get the variant data.
@ e_Ml
MEDLINE, semi-structured.
E_Choice Which(void) const
Which variant is currently selected.
const TStr & GetStr(void) const
Get the variant data.
bool IsConsortium(void) const
Check if variant Consortium is selected.
void SetInitials(const TInitials &value)
Assign a value to Initials data member.
const TInitials & GetInitials(void) const
Get the Initials member data.
void SetLast(const TLast &value)
Assign a value to Last data member.
void SetFirst(const TFirst &value)
Assign a value to First data member.
bool IsSetInitials(void) const
first + middle initials Check if a value has been assigned to Initials data member.
const TMl & GetMl(void) const
Get the variant data.
bool IsSetLast(void) const
Check if a value has been assigned to Last data member.
const TConsortium & GetConsortium(void) const
Get the variant data.
const TFirst & GetFirst(void) const
Get the First member data.
const TLast & GetLast(void) const
Get the Last member data.
const TName & GetName(void) const
Get the variant data.
bool IsSetFirst(void) const
Check if a value has been assigned to First data member.
@ e_Ml
MEDLINE name (semi-structured) eg. "Jones RM".
@ e_Consortium
consortium name
bool CanGet(void) const
Check if it is safe to call Get method.
list< CRef< CPub > > Tdata
const TMan & GetMan(void) const
Get the variant data.
const TArticle & GetArticle(void) const
Get the variant data.
const TJournal & GetJournal(void) const
Get the variant data.
const TSub & GetSub(void) const
Get the variant data.
bool IsSet(void) const
Check if a value has been assigned to data member.
const TPatent & GetPatent(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TProc & GetProc(void) const
Get the variant data.
const TEquiv & GetEquiv(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsEquiv(void) const
Check if variant Equiv is selected.
bool IsSub(void) const
Check if variant Sub is selected.
const TGen & GetGen(void) const
Get the variant data.
const TBook & GetBook(void) const
Get the variant data.
@ e_Gen
general or generic unparsed
@ e_Proc
proceedings of a meeting
@ e_Man
manuscript, thesis, or letter
void SetPub(TPub &value)
Assign a value to Pub data member.
bool CanGetPub(void) const
Check if it is safe to call GetPub method.
TPub & SetPub(void)
Select the variant.
const TPub & GetPub(void) const
Get the variant data.
bool IsPub(void) const
Check if variant Pub is selected.
bool IsSetPub(void) const
the citation(s) Check if a value has been assigned to Pub data member.
const TPub & GetPub(void) const
Get the Pub member data.
const TCit & GetCit(void) const
Get the Cit member data.
void SetCit(TCit &value)
Assign a value to Cit data member.
bool CanGetCit(void) const
Check if it is safe to call GetCit method.
const struct ncbi::grid::netcache::search::fields::SIZE size
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
string SummarizeAffiliation(const CAffil::C_Std &affil)
static bool ReplaceStateAbbreviation(CAffil *affil)
static size_t kNumOfAbbreviations
static const char * kTitleAuthorConflict
#define REPORT_CITSUBAFFIL_CONFLICT(order, field, alias)
static bool FixCapitalization(string &name, bool apostroph)
static bool RemoveAffilDup(CCit_sub *cit_sub)
void GetPubTitleAndAuthors(const CPub &pub, string &title, string &authors)
bool IsPubUnpublished(const CImprint &imp)
static bool RemoveAffilStreetEnd(string &street, const string &tail, bool country)
static bool IsCapInitialsCorrect(const string &initials)
static const CCit_sub * GetCitSubFromPub(const CPub &pub)
const string kTitleAuthorConflictEnd
static bool AffilStreetEndsWith(const string &street, const string &tail)
static bool AffilStreetContainsDup(const CAffil &affil)
const string kTitleAuthorConflictStart
bool IsCitSubMissingAffiliation(const CPubdesc &pubdesc)
static const string kIncorrectCap
string GetAuthorString(const CName_std &name_std)
static pair< string, string > us_state_abbreviations[]
static bool IsValidStateAbbreviation(const string &state)
bool HasUnpubWithoutTitle(const CPubdesc &pubdesc)
const string kMissingAuthorsName
bool HasNoAffiliation(const CAffil &affil)
#define ADD_TO_AFFIL_SUMMARY(Fieldname)
const string kCitSubSummary
static int RemoveConsortium(CAuth_list &authors)
static bool IsCapNameCorrect(const string &name)
CRef< CPub > journal(ParserPtr pp, char *bptr, char *eptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, bool has_muid, CRef< CCit_art > &cit_art, Int4 er)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static CS_CONTEXT * context