NCBI C++ ToolKit
pub_fix.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: pub_fix.cpp 101400 2023-12-08 19:03:47Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Alexey Dobronadezhdin
27  *
28  * File Description:
29  * Code for fixing up publications.
30  * MedArch lookup and post-processing utilities.
31  * Based on medutil.c written by James Ostell.
32  */
33 
34 #include <ncbi_pch.hpp>
35 
44 #include <objects/biblio/Title.hpp>
47 #include <objects/general/Date.hpp>
51 
52 #include <objects/pub/Pub.hpp>
53 
55 
56 #include "pub_fix_aux.hpp"
57 
58 #include <corelib/ncbi_message.hpp>
62 
63 #include <cmath>
64 
68 
69 #define ERR_POST_TO_LISTENER(listener, severity, code, subcode, message) \
70 do { \
71  if (listener) { \
72  ostringstream ostr; \
73  ostr << message; \
74  string text = ostr.str(); \
75  CMessage_Basic msg(text, severity, code, subcode); \
76  listener->PostMessage(msg); \
77  } \
78 } while (false)
79 
80 namespace fix_pub
81 {
83 {
84  string m_error_str;
86 };
87 
89 {
90  // I'm using it in blob_maint application. The string REFERENCE is not informative, changing to FixPub.
91  { err_Reference,{ "FixPub",
92  {
93  { err_Reference_MuidNotFound, "MuidNotFound" },
94  { err_Reference_SuccessfulMuidLookup, "SuccessfulMuidLookup" },
95  { err_Reference_OldInPress, "OldInPress" },
96  { err_Reference_No_reference, "No_reference" },
97  { err_Reference_Multiple_ref, "Multiple_ref" },
98  { err_Reference_Multiple_muid, "Multiple_muid" },
99  { err_Reference_MedlineMatchIgnored, "MedlineMatchIgnored" },
100  { err_Reference_MuidMissmatch, "MuidMissmatch" },
101  { err_Reference_NoConsortAuthors, "NoConsortAuthors" },
102  { err_Reference_DiffConsortAuthors, "DiffConsortAuthors" },
103  { err_Reference_PmidMissmatch, "PmidMissmatch" },
104  { err_Reference_Multiple_pmid, "Multiple_pmid" },
105  { err_Reference_FailedToGetPub, "FailedToGetPub" },
106  { err_Reference_MedArchMatchIgnored, "MedArchMatchIgnored" },
107  { err_Reference_SuccessfulPmidLookup, "SuccessfulPmidLookup" },
108  { err_Reference_PmidNotFound, "PmidNotFound" },
109  { err_Reference_NoPmidJournalNotInPubMed, "NoPmidJournalNotInPubMed" },
110  { err_Reference_PmidNotFoundInPress, "PmidNotFoundInPress" },
111  { err_Reference_NoPmidJournalNotInPubMedInPress, "NoPmidJournalNotInPubMedInPress" }
112  }
113  } },
114  { err_Print,{ "PRINT",
115  {
116  { err_Print_Failed, "Failed" }
117  }
118  } },
119  { err_AuthList,{ "AuthList",
120  {
121  { err_AuthList_SignificantDrop, "SignificantDrop" },
122  { err_AuthList_PreserveGB, "PreserveGB" },
123  { err_AuthList_LowMatch, "LowMatch" }
124  }
125  } }
126 };
127 }
128 
129 string CPubFix::GetErrorId(int err_code, int err_sub_code)
130 {
131  string ret;
132 
133  const auto& err_category = fix_pub::ERROR_CODE_STR.find(err_code);
134  if (err_category != fix_pub::ERROR_CODE_STR.end()) {
135 
136  const auto& error_sub_code_str = err_category->second.m_sub_errors.find(err_sub_code);
137  if (error_sub_code_str != err_category->second.m_sub_errors.end()) {
138  ret = err_category->second.m_error_str;
139  ret += '.';
140  ret += error_sub_code_str->second;
141  }
142  }
143 
144  return ret;
145 }
146 
147 
148 namespace fix_pub
149 {
150 // MedlineToISO(tmp)
151 // converts a MEDLINE citation to ISO/GenBank style
152 
153 void MedlineToISO(CCit_art& cit_art)
154 {
155  if (cit_art.IsSetAuthors()) {
156  cit_art.SetAuthors().ConvertMlToStd(true);
157  }
158 
159  if (!cit_art.IsSetFrom() || !cit_art.GetFrom().IsJournal())
160  return;
161 
162  // from a journal - get iso_jta
163  CCit_jour& journal = cit_art.SetFrom().SetJournal();
164  if (journal.IsSetImp()) {
165  // remove Eng language
166  if (journal.GetImp().IsSetLanguage() && journal.GetImp().GetLanguage() == "Eng")
167  journal.SetImp().ResetLanguage();
168  }
169 }
170 
171 // SplitMedlineEntry(mep)
172 // splits a medline entry into 2 pubs (1 muid, 1 Cit-art)
173 // converts Cit-art to ISO/GenBank style
174 // deletes original medline entry
176 {
177  if (medlines.size() != 1) {
178  return;
179  }
180 
181  CPub& pub = *medlines.front();
182  CMedline_entry& medline = pub.SetMedline();
183  if (!medline.IsSetCit() && medline.IsSetPmid() && medline.GetPmid() < ZERO_ENTREZ_ID) {
184  return;
185  }
186 
187  CRef<CPub> pmid;
188  if (medline.GetPmid() > ZERO_ENTREZ_ID) {
189  pmid.Reset(new CPub);
190  pmid->SetPmid(medline.GetPmid());
191  }
192 
193  CRef<CPub> cit_art;
194  if (medline.IsSetCit()) {
195  cit_art.Reset(new CPub);
196  cit_art->SetArticle(medline.SetCit());
197  MedlineToISO(cit_art->SetArticle());
198  }
199 
200  medlines.clear();
201 
202  if (pmid.NotEmpty())
203  medlines.push_back(pmid);
204 
205  if (cit_art.NotEmpty())
206  medlines.push_back(cit_art);
207 }
208 
209 
210 bool IsInpress(const CCit_art& cit_art)
211 {
212  if (!cit_art.IsSetFrom())
213  return false;
214 
215  bool ret = false;
216  if (cit_art.GetFrom().IsJournal()) {
217  const CCit_jour& journal = cit_art.GetFrom().GetJournal();
218  ret = journal.IsSetImp() && journal.GetImp().IsSetPrepub() && journal.GetImp().GetPrepub() == CImprint::ePrepub_in_press;
219  }
220  else if (cit_art.GetFrom().IsBook()) {
221  const CCit_book& book = cit_art.GetFrom().GetBook();
222  ret = book.IsSetImp() && book.GetImp().IsSetPrepub() && book.GetImp().GetPrepub() == CImprint::ePrepub_in_press;
223  }
224  else if (cit_art.GetFrom().IsProc() && cit_art.GetFrom().GetProc().IsSetBook()) {
225  const CCit_book& book = cit_art.GetFrom().GetProc().GetBook();
226  ret = book.IsSetImp() && book.GetImp().IsSetPrepub() && book.GetImp().GetPrepub() == CImprint::ePrepub_in_press;
227  }
228  return ret;
229 }
230 
231 
232 bool MULooksLikeISSN(const string& str)
233 {
234  // ISSN: nnnn-nnnn or nnnn-nnnX, where n -> '0'-'9', i.e. 0123-5566
235  static const size_t ISSN_SIZE = 9;
236  static const size_t ISSN_DASH_POS = 4;
237  static const size_t ISSN_X_POS = 8;
238 
239  if (NStr::IsBlank(str) || str.size() != ISSN_SIZE || str[ISSN_DASH_POS] != '-') {
240  return false;
241  }
242 
243  for (size_t i = 0; i < ISSN_SIZE; ++i) {
244  char ch = str[i];
245  if (isdigit(ch) || (ch == '-' && i == ISSN_DASH_POS) || (ch == 'X' && i == ISSN_X_POS)) {
246  continue;
247  }
248  return false;
249  }
250 
251  return true;
252 }
253 
254 /*
255 bool MUIsJournalIndexed(const string& journal)
256 {
257  if (journal.empty()) {
258  return false;
259  }
260 
261  string title(journal);
262  NStr::ReplaceInPlace(title, "(", " ");
263  NStr::ReplaceInPlace(title, ")", " ");
264  NStr::ReplaceInPlace(title, ".", " ");
265 
266  title = NStr::Sanitize(title);
267 
268  CEutilsClient eutils;
269 
270  static const int MAX_ITEMS = 200;
271  eutils.SetMaxReturn(MAX_ITEMS);
272 
273  vector<string> ids;
274 
275  static const string EUTILS_DATABASE("nlmcatalog");
276 
277  try {
278  if (MULooksLikeISSN(title)) {
279  eutils.Search(EUTILS_DATABASE, title + "[issn]", ids);
280  }
281 
282  if (ids.empty()) {
283  eutils.Search(EUTILS_DATABASE, title + "[multi] AND ncbijournals[sb]", ids);
284  }
285 
286  if (ids.empty()) {
287  eutils.Search(EUTILS_DATABASE, title + "[jo]", ids);
288  }
289  }
290  catch (CException&) {
291  return false;
292  }
293 
294  if (ids.size() != 1) {
295  return false;
296  }
297 
298 
299  // getting the indexing status of the journal found
300  static const string SUMMARY_VERSION("2.0");
301  xml::document doc;
302  eutils.Summary(EUTILS_DATABASE, ids, doc, SUMMARY_VERSION);
303 
304  const xml::node& root_node = doc.get_root_node();
305  xml::node_set nodes(root_node.run_xpath_query("//DocumentSummarySet/DocumentSummary/CurrentIndexingStatus/text()"));
306 
307  string status;
308  if (nodes.size() == 1) {
309  status = nodes.begin()->get_content();
310  }
311 
312  return status == "Y";
313 }
314 */
315 
317  const string& term,
318  list<string>& ids) {
319  // error handling is modeled on that of CEUtilsClient::x_Search()
320  req.SetArgument("term", term);
321  for (int retry=0; retry<10; ++retry) {
322  try {
323  auto& istr = dynamic_cast<CConn_HttpStream&>(req.GetStream());
324  auto pRes = Ref(new esearch::CESearchResult());
325  istr >> MSerial_Xml >> *pRes;
326 
327  if (istr.GetStatusCode() == 200) {
328  if (pRes->IsSetData()) {
329  if (pRes->GetData().IsInfo() &&
330  pRes->GetData().GetInfo().IsSetContent() &&
331  pRes->GetData().GetInfo().GetContent().IsSetIdList()) {
332 
333  const auto& idList = pRes->GetData().GetInfo().GetContent().GetIdList();
334  if (idList.IsSetId()) {
335  ids = idList.GetId();
336  }
337  req.Disconnect();
338  return;
339  }
340  else
341  if (pRes->GetData().IsERROR()) {
343  pRes->GetData().GetERROR());
344  }
345  } // pRest->IsSetData()
346  } // istr.GetStatusCode() == 200
347  }
348  catch(CException& e) {
349  ERR_POST(Warning << "failed on attempt " << retry + 1
350  << ": " << e);
351  }
352  req.Disconnect();
353 
354  int sleepSeconds = int(sqrt(retry));
355  if (sleepSeconds) {
356  SleepSec(sleepSeconds);
357  }
358  } // retry
359 
361  "failed to execute query: " + term);
362 }
363 
364 
366  const string& id) {
367 
368  // error handling is modeled on that of CEUtilsClient::x_Summary()
369  CESummary_Request request("nlmcatalog", pContext);
370  request.GetId().AddId(id);
371  request.SetArgument("version", "2.0");
372  string xmlOutput;
373  bool success=false;
374  for (int retry=0; retry<10; ++retry) {
375  try {
376  auto& istr = dynamic_cast<CConn_HttpStream&>(request.GetStream());
377  NcbiStreamToString(&xmlOutput, istr);
378  if (istr.GetStatusCode() == 200) {
379  success = true;
380  break;
381  }
382  }
383  catch (...) {
384  }
385  request.Disconnect();
386 
387  int sleepSeconds = int(sqrt(retry));
388  if (sleepSeconds) {
389  SleepSec(sleepSeconds);
390  }
391  }
392 
393  if (!success) {
395  "failed to execute esummary request: " + request.GetQueryString());
396  }
397 
398  static const string indexingElement { "<CurrentIndexingStatus>Y</CurrentIndexingStatus>" };
399  auto firstPos = NStr::Find(xmlOutput, indexingElement, NStr::eNocase);
400  if (firstPos == NPOS) {
401  return false;
402  }
403  auto lastPos = NStr::Find(xmlOutput, indexingElement, NStr::eNocase, NStr::eReverseSearch);
404 
405  return firstPos == lastPos;
406 }
407 
408 
409 
410 bool MUIsJournalIndexed(const string& journal)
411 {
412  if (journal.empty()) {
413  return false;
414  }
415 
416  string title(journal);
417  NStr::ReplaceInPlace(title, "(", " ");
418  NStr::ReplaceInPlace(title, ")", " ");
419  NStr::ReplaceInPlace(title, ".", " ");
420 
421  title = NStr::Sanitize(title);
422 
423  list<string> ids;
424  auto pContext = Ref(new CEUtils_ConnContext());
425  CESearch_Request req("nlmcatalog", pContext);
426  req.SetRetMax(2);
427  req.SetUseHistory(false);
428  try {
429  if (MULooksLikeISSN(title)) {
430  s_GetESearchIds(req, title + "[issn]", ids);
431  }
432 
433  if (ids.empty()) {
434  s_GetESearchIds(req, title + "[multi] AND ncbijournals[sb]", ids);
435  }
436 
437  if (ids.empty()) {
438  s_GetESearchIds(req, title + "[jo]", ids);
439  }
440  }
441  catch (CException&) {
442  return false;
443  }
444 
445  for (const string& id : ids) {
446  if (s_IsIndexed(pContext, id)) {
447  return true;
448  }
449  }
450 
451  return false;
452 }
453 
454 
455 
456 void PrintPub(const CCit_art& cit_art, bool found, bool auth, long muid, IMessageListener* err_log)
457 {
458  string first_name,
459  last_name;
460 
461  if (cit_art.IsSetAuthors() && cit_art.GetAuthors().IsSetNames()) {
462 
463  if (cit_art.GetAuthors().GetNames().IsStd()) {
464 
465  const CAuthor& first_author = *cit_art.GetAuthors().GetNames().GetStd().front();
466 
467  if (first_author.IsSetName()) {
468  if (first_author.GetName().IsName()) {
469  const CName_std& namestd = first_author.GetName().GetName();
470  if (namestd.IsSetLast()) {
471  last_name = namestd.GetLast();
472  }
473  if (namestd.IsSetInitials()) {
474  first_name = namestd.GetInitials();
475  }
476  }
477  else if (first_author.GetName().IsConsortium()) {
478  last_name = first_author.GetName().GetConsortium();
479  }
480  }
481  }
482  else {
483  last_name = cit_art.GetAuthors().GetNames().GetStr().front();
484  }
485  }
486  else {
487  ERR_POST_TO_LISTENER(err_log, eDiag_Warning, err_Print, err_Print_Failed, "Authors NULL");
488  }
489 
490  const CImprint* imprint = nullptr;
491  const CTitle* title = nullptr;
492 
493  if (cit_art.IsSetFrom()) {
494  if (cit_art.GetFrom().IsJournal()) {
495  const CCit_jour& journal = cit_art.GetFrom().GetJournal();
496 
497  if (journal.IsSetTitle()) {
498  title = &journal.GetTitle();
499  }
500 
501  if (journal.IsSetImp()) {
502  imprint = &journal.GetImp();
503  }
504  }
505  else if (cit_art.GetFrom().IsBook()) {
506  const CCit_book& book = cit_art.GetFrom().GetBook();
507 
508  if (book.IsSetTitle()) {
509  title = &book.GetTitle();
510  }
511 
512  if (book.IsSetImp()) {
513  imprint = &book.GetImp();
514  }
515  }
516  }
517 
518  static const string UNKNOWN_JOURNAL("journal unknown");
519  string title_str(UNKNOWN_JOURNAL);
520 
521  if (title && title->IsSet() && !title->Get().empty()) {
522 
523  const CTitle::C_E& first_title = *title->Get().front();
524  const string& str = title->GetTitle(first_title);
525 
526  if (!str.empty())
527  title_str = str;
528  }
529 
530 
531  static const string NO_PAGE("no page number");
532  static const string NO_VOL("no volume number");
533 
534  string vol(NO_VOL),
535  page(NO_PAGE);
536 
537  int year = 0;
538  bool in_press = false;
539 
540  if (imprint) {
541 
542  if (imprint->IsSetVolume()) {
543  vol = imprint->GetVolume();
544  }
545 
546  if (imprint->IsSetPages()) {
547  page = imprint->GetPages();
548  }
549 
550  if (imprint->IsSetDate() && imprint->GetDate().IsStd() && imprint->GetDate().GetStd().IsSetYear()) {
551  year = imprint->GetDate().GetStd().GetYear();
552  }
553 
554  in_press = imprint->IsSetPrepub() && imprint->GetPrepub() == CImprint::ePrepub_in_press;
555  }
556 
557  ostringstream ostr;
558  ostr << last_name << " " << first_name << "|" << title_str << "|(" << year << ")|" << vol << "|" << page;
559  const string tail = ostr.str();
560 
561  if (auth) {
563  "Too many author name differences: " << muid << "|" << tail);
564  return;
565  }
566 
567  if (in_press) {
568 
569  int cur_year = CDate_std(CTime(CTime::eCurrent)).GetYear();
570  static const int YEAR_MAX_DIFF = 2;
571 
572  if (year && cur_year - year > YEAR_MAX_DIFF) {
574  "encountered in-press article more than " << YEAR_MAX_DIFF << " years old: " << tail);
575  }
576  }
577 
578  if (found) {
580  muid << "|" << tail);
581  }
582  else if (MUIsJournalIndexed(title_str)) {
583  if (muid) {
585  ">>" << muid << "<<|" << tail);
586  }
587  else {
589  tail);
590  }
591  }
592  else {
593  if (muid) {
595  ">>" << muid << "<<|" << tail);
596  }
597  else {
599  tail);
600  }
601  }
602 }
603 
604 
605 bool IsFromBook(const CCit_art& art)
606 {
607  return art.IsSetFrom() && art.GetFrom().IsBook();
608 }
609 
610 static void MoveAuthors(CCit_art& to, CCit_art& from)
611 {
612  to.SetAuthors(from.SetAuthors());
613  from.ResetAuthors();
614 }
615 
616 static const size_t MAX_MATCH_COEFF = 3;
617 
618 bool TenAuthorsCompare(CCit_art& cit_old, CCit_art& cit_new)
619 {
620  _ASSERT(cit_old.IsSetAuthors() && cit_new.IsSetAuthors() &&
621  cit_old.GetAuthors().IsSetNames() && cit_new.GetAuthors().IsSetNames() && "Both arguments should have valid author's names at this point");
622 
623  const CAuth_list::C_Names& old_names = cit_old.GetAuthors().GetNames();
624  const CAuth_list::C_Names& new_names = cit_new.GetAuthors().GetNames();
625 
626  auto StrNotEmpty = [](const string& str) -> bool { return !str.empty(); };
627  size_t new_num_of_authors = count_if(new_names.GetStr().begin(), new_names.GetStr().end(), StrNotEmpty),
628  num_of_authors = count_if(old_names.GetStr().begin(), old_names.GetStr().end(), StrNotEmpty);
629 
630  size_t match = 0;
631  for (const string& name : old_names.GetStr()) {
632 
633  if (!name.empty()) {
634  if (NStr::FindNoCase(new_names.GetStr(), name)) {
635  ++match;
636  }
637  }
638  }
639 
640  size_t min_num_of_authors = min(num_of_authors, new_num_of_authors);
641 
642  if (min_num_of_authors > MAX_MATCH_COEFF * match) {
643  return false;
644  }
645 
646  static const size_t MAX_AUTHORS = 10;
647  if (min_num_of_authors > MAX_AUTHORS) {
648  MoveAuthors(cit_new, cit_old);
649  }
650 
651  return true;
652 }
653 
655 {
656  size_t num_of_names = 0;
657 
658  for (const auto& name : names)
659  {
660  const CAuthor& auth = *name;
661  if (auth.IsSetName() && auth.GetName().IsName()) {
662  ++num_of_names;
663  }
664  else if (auth.IsSetName() && auth.GetName().IsConsortium()) {
665 
666  const string& cur_consortium = auth.GetName().GetConsortium();
667  extracted.push_back(cur_consortium);
668  }
669  }
670 
671  extracted.sort([](const string& a, const string& b) { return NStr::CompareNocase(a, b) == -1; });
672 
673  return num_of_names;
674 }
675 
676 static bool s_GetConsortia(const list<CRef<CAuthor>>& authors, list<string>& consortia) {
677 
678  if (!consortia.empty()) {
679  consortia.clear();
680  }
681 
682  for (const auto& pAuthor : authors) {
683  if (pAuthor &&
684  pAuthor->IsSetName() &&
685  pAuthor->GetName().IsConsortium()) {
686  consortia.push_back(pAuthor->GetName().GetConsortium());
687  }
688  }
689 
690  if (!consortia.empty()) {
691  consortia.sort(PNocase());
692  return true;
693  }
694 
695  return false;
696 }
697 
698 
700 {
703 }
704 
705 static bool s_ConsortiaMatch(const list<string>& x, const list<string>& y)
706 {
707  if (x.size() != y.size()) {
708  return false;
709  }
710 
711 
712  auto yit = begin(y);
713  for (auto xit = begin(x); xit != end(x); ++xit, ++yit) {
714  auto xval = NStr::TruncateSpaces_Unsafe(*xit);
715  auto yval = NStr::TruncateSpaces_Unsafe(*yit);
716 
717  if (!NStr::EqualNocase(xval, yval)) {
718  const auto xval_starts_with_the = NStr::StartsWith(xval, "The", NStr::eNocase);
719  const auto yval_starts_with_the = NStr::StartsWith(yval, "The", NStr::eNocase);
720  if (xval_starts_with_the && !yval_starts_with_the) {
721  s_TrimPrefixThe(xval);
722  if (!NStr::EqualNocase(xval, yval)) {
723  return false;
724  }
725  }
726  else if(yval_starts_with_the && !xval_starts_with_the) {
727  s_TrimPrefixThe(yval);
728  if (!NStr::EqualNocase(xval, yval)) {
729  return false;
730  }
731  }
732  else {
733  return false;
734  }
735  }
736  }
737 
738  return true;
739 }
740 
741 static void s_ProcessConsortia(const CCit_art& old_cit, CCit_art& new_cit, IMessageListener* pListener)
742 {
743  if (!old_cit.IsSetAuthors() ||
744  !old_cit.GetAuthors().IsSetNames() ||
745  !old_cit.GetAuthors().GetNames().IsStd()) {
746  return;
747  }
748 
749  if (!new_cit.IsSetAuthors() ||
750  !new_cit.GetAuthors().IsSetNames() ||
751  !new_cit.GetAuthors().GetNames().IsStd()) {
752  return;
753  }
754 
755  list<string> old_consortia;
756  if (!s_GetConsortia(old_cit.GetAuthors().GetNames().GetStd(), old_consortia)) {
757  return;
758  }
759 
760  list<string> new_consortia;
761  if (!s_GetConsortia(new_cit.GetAuthors().GetNames().GetStd(), new_consortia)) {
762  auto old_consortia_string = NStr::Join(old_consortia, ";");
764  "Publication as returned by MedArch lacks consortium authors of the original publication : \""
765  << old_consortia_string << "\".");
766 
767  auto& std_list = new_cit.SetAuthors().SetNames().SetStd();
768  transform(begin(old_consortia),
769  end(old_consortia),
770  back_inserter(std_list),
771  [](const string& consortium) {
772  auto pAuthor = Ref(new CAuthor());
773  pAuthor->SetName().SetConsortium(consortium);
774  return pAuthor; });
775  }
776  else if (!s_ConsortiaMatch(old_consortia, new_consortia)) {
777  auto old_consortia_string = NStr::Join(old_consortia, ";");
778  auto new_consortia_string = NStr::Join(new_consortia, ";");
780  "Consortium author names differ. Original is \""
781  << old_consortia_string
782  << "\". MedArch's is \""
783  << new_consortia_string << "\".");
784 
785  }
786 }
787 
788 
789 void GetFirstTenNames(const CAuth_list::C_Names::TStd& names, list<CTempString>& res)
790 {
791  static const size_t MAX_EXTRACTED = 10;
792  size_t extracted = 0;
793 
794  for (const auto& name : names) {
795  if (name->IsSetName() && name->GetName().IsName() && name->GetName().GetName().IsSetLast()) {
796  res.push_back(name->GetName().GetName().GetLast());
797  ++extracted;
798 
799  if (extracted == MAX_EXTRACTED) {
800  break;
801  }
802  }
803  }
804 }
805 
806 
807 bool TenAuthorsProcess(CCit_art& cit, CCit_art& new_cit, IMessageListener* err_log)
808 {
809  if (!new_cit.IsSetAuthors() || !new_cit.GetAuthors().IsSetNames()) {
810  if (cit.IsSetAuthors()) {
811  MoveAuthors(new_cit, cit);
812  }
813  return true;
814  }
815 
816  if (!cit.IsSetAuthors() || !cit.GetAuthors().IsSetNames() ||
817  cit.GetAuthors().GetNames().Which() != new_cit.GetAuthors().GetNames().Which()) {
818  return true;
819  }
820 
821  if (!cit.GetAuthors().GetNames().IsStd()) {
822  return TenAuthorsCompare(cit, new_cit);
823  }
824 
825  CAuth_list::C_Names::TStr old_consortiums;
826  size_t num_names = ExtractConsortiums(cit.GetAuthors().GetNames().GetStd(), old_consortiums);
827 
828  CAuth_list::C_Names::TStr new_consortiums;
829  size_t new_num_names = ExtractConsortiums(new_cit.GetAuthors().GetNames().GetStd(), new_consortiums);
830 
831  if (!old_consortiums.empty()) {
832 
833  string old_cons_list = NStr::Join(old_consortiums, ";");
834  if (new_consortiums.empty()) {
835 
837  "Publication as returned by MedArch lacks consortium authors of the original publication : \"" << old_cons_list << "\".");
838 
839  for_each(old_consortiums.begin(), old_consortiums.end(),
840  [&new_cit](const string& consortium) {
841 
842  CRef<CAuthor> auth(new CAuthor);
843  auth->SetName().SetConsortium(consortium);
844 
845  new_cit.SetAuthors().SetNames().SetStd().push_front(auth);
846  });
847  }
848  else {
849 
850  string new_cons_list = NStr::Join(new_consortiums, ";");
851  if (!NStr::EqualNocase(old_cons_list, new_cons_list)) {
853  "Consortium author names differ. Original is \"" << old_cons_list << "\". MedArch's is \"" << new_cons_list << "\".");
854  }
855  }
856 
857  if (num_names == 0) {
858  return true;
859  }
860  }
861 
862  list<CTempString> new_author_names;
863  GetFirstTenNames(new_cit.GetAuthors().GetNames().GetStd(), new_author_names);
864  size_t match = 0;
865 
866  for (const auto& name: cit.GetAuthors().GetNames().GetStd())
867  {
868  const CAuthor& auth = *name;
869  if (auth.IsSetName() && auth.GetName().IsName() && auth.GetName().GetName().IsSetLast()) {
870 
871  const string& last_name = auth.GetName().GetName().GetLast();
872  if (find_if(new_author_names.begin(), new_author_names.end(),
873  [&last_name](const CTempString& cur_name)
874  {
875  return NStr::EqualNocase(last_name, cur_name);
876  }) != new_author_names.end()) {
877 
878  ++match;
879  }
880  }
881  }
882 
883  size_t min_num_names = min(num_names, new_author_names.size());
884  if (min_num_names > MAX_MATCH_COEFF * match) {
885  return false;
886  }
887 
888  bool replace_authors = new_num_names == 0;
889  if (!replace_authors && new_num_names < num_names) {
890  // Check the last author from PubMed. If it is "et al" - leave the old authors list
891  const CAuthor& last_author = *new_cit.GetAuthors().GetNames().GetStd().back();
892  if (last_author.IsSetName() && last_author.GetName().IsName()) {
893 
894  const CName_std& name = last_author.GetName().GetName();
895  string last_name = name.IsSetLast() ? name.GetLast() : "",
896  initials = name.IsSetInitials() ? name.GetInitials() : "";
897 
898  replace_authors = NStr::EqualNocase(last_name, "et") &&
899  NStr::EqualNocase(initials, "al");
900  }
901 
902  // If the last author does not contain "et al", look at the amount of authors
903  // This is done according to the next document:
904  // ~cavanaug/WORK/MedArch/doc.medarch.4genbank.txt
905  //
906  // If the MedArchCitArt has zero Name-std Author.name ...
907  //
908  // Or if the InputCitArt has more than 10 Name - std Author.name while
909  // the MedArchCitArt has less than 12 ...
910  //
911  // Or if the InputCitArt has more than 25 Name - std Author.name while
912  // the MedArchCitArt has less than 27 ...
913  //
914  // Then free the Auth - list of the MedArchCitArt and replace it with
915  // the Auth - list of the InputCitArt, and **null out** the Auth - list
916  // of the MedArchCitArt .
917  if (!replace_authors)
918  {
919  static const int MIN_FIRST_AUTHORS_THRESHOLD_1995 = 10;
920  static const int MAX_FIRST_AUTHORS_THRESHOLD_1995 = 12;
921 
922  static const int MIN_SECOND_AUTHORS_THRESHOLD_1999 = 25;
923  static const int MAX_SECOND_AUTHORS_THRESHOLD_1999 = 27;
924 
925  replace_authors = (new_num_names < MAX_FIRST_AUTHORS_THRESHOLD_1995 && num_names > MIN_FIRST_AUTHORS_THRESHOLD_1995) ||
926  (new_num_names < MAX_SECOND_AUTHORS_THRESHOLD_1999 && num_names > MIN_SECOND_AUTHORS_THRESHOLD_1999);
927  }
928  }
929 
930  if (replace_authors) {
931  MoveAuthors(new_cit, cit);
932  }
933 
934  return true;
935 }
936 
937 
938 void MergeNonPubmedPubIds(const CCit_art& cit_old, CCit_art& cit_new)
939 {
940  if (!cit_old.IsSetIds()) {
941  return;
942  }
943 
944  const CArticleIdSet& old_ids = cit_old.GetIds();
945 
946  for (const auto& cur_id : old_ids.Get()) {
947 
948  if (!cur_id->IsDoi() && !cur_id->IsOther()) {
949  continue;
950  }
951 
952  bool found = false;
953  if (cit_new.IsSetIds()) {
954 
955  auto& new_ids = cit_new.GetIds().Get();
956  found = find_if(new_ids.begin(), new_ids.end(),
957  [&cur_id](const CRef<CArticleId>& new_id)
958  {
959  if (cur_id->Which() != new_id->Which()) {
960  return false;
961  }
962 
963  if (new_id->IsDoi()) {
964  return true;
965  }
966 
967  bool res = cur_id->GetOther().IsSetDb() == new_id->GetOther().IsSetDb();
968  if (res && cur_id->GetOther().IsSetDb()) {
969  res = cur_id->GetOther().GetDb() == new_id->GetOther().GetDb();
970  }
971  return res;
972  }) != new_ids.end();
973  }
974 
975  if (!found) {
976  cit_new.SetIds().Set().push_front(cur_id);
977  }
978  }
979 }
980 
981 
982 bool NeedToPropagateInJournal(const CCit_art& cit_art)
983 {
984  if (!cit_art.IsSetFrom() || !cit_art.GetFrom().IsJournal() ||
985  !cit_art.GetFrom().GetJournal().IsSetTitle() || !cit_art.GetFrom().GetJournal().GetTitle().IsSet() ||
986  cit_art.GetFrom().GetJournal().GetTitle().Get().empty()) {
987  return true;
988  }
989 
990  const CCit_jour& journal = cit_art.GetFrom().GetJournal();
991  if (!journal.IsSetImp()) {
992  return true;
993  }
994 
995  if (!journal.GetImp().IsSetVolume() || !journal.GetImp().IsSetPages() || !journal.GetImp().IsSetDate()) {
996  return true;
997  }
998 
999  return false;
1000 }
1001 
1002 
1003 void PropagateInPress(bool inpress, CCit_art& cit_art)
1004 {
1005  if (!inpress)
1006  return;
1007 
1008  if (!cit_art.IsSetFrom() || !NeedToPropagateInJournal(cit_art)) {
1009  return;
1010  }
1011 
1012  CImprint* imprint = nullptr;
1013 
1014  switch (cit_art.GetFrom().Which()) {
1015 
1017  if (cit_art.GetFrom().GetJournal().IsSetImp()) {
1018  imprint = &cit_art.SetFrom().SetJournal().SetImp();
1019  }
1020  break;
1021 
1023  if (cit_art.GetFrom().GetBook().IsSetImp()) {
1024  imprint = &cit_art.SetFrom().SetBook().SetImp();
1025  }
1026  break;
1027 
1029  if (cit_art.GetFrom().GetProc().IsSetBook() && cit_art.GetFrom().GetProc().GetBook().IsSetImp()) {
1030  imprint = &cit_art.SetFrom().SetProc().SetBook().SetImp();
1031  }
1032  break;
1033 
1034  default:; // do nothing
1035  }
1036 
1037  if (imprint) {
1039  }
1040 }
1041 
1042 }
1043 
1044 using namespace fix_pub;
1045 
1047 {
1048  CPub_equiv::Tdata muids,
1049  pmids,
1050  medlines,
1051  others,
1052  cit_arts;
1053 
1054  if (pub_equiv.IsSet()) {
1055  for (auto& pub : pub_equiv.Set())
1056  {
1057  if (pub->IsMuid()) {
1058  muids.push_back(pub);
1059  }
1060  else if (pub->IsPmid()) {
1061  pmids.push_back(pub);
1062  }
1063  else if (pub->IsArticle()) {
1064  if (IsFromBook(pub->GetArticle())) {
1065  others.push_back(pub);
1066  }
1067  else {
1068  cit_arts.push_back(pub);
1069  }
1070  }
1071  else if (pub->IsMedline()) {
1072  medlines.push_back(pub);
1073  }
1074  else {
1075  others.push_back(pub);
1076  }
1077  }
1078  }
1079 
1080  auto& pub_list = pub_equiv.Set();
1081  pub_list.clear();
1082 
1083  if ((!muids.empty() || !pmids.empty()) && !m_always_lookup) {
1084  // pmid or muid is present
1085  pub_list.splice(pub_list.end(), cit_arts);
1086  pub_list.splice(pub_list.end(), muids);
1087  pub_list.splice(pub_list.end(), pmids);
1088  pub_list.splice(pub_list.end(), medlines);
1089  pub_list.splice(pub_list.end(), others);
1090  return;
1091  }
1092 
1093  pub_list.splice(pub_list.end(), others);
1094 
1095  if (!medlines.empty())
1096  {
1097  if (medlines.size() > 1) {
1098  ERR_POST_TO_LISTENER(m_err_log, eDiag_Warning, err_Reference, err_Reference_Multiple_ref, "More than one Medline entry in Pub-equiv");
1099  medlines.resize(1);
1100  }
1101 
1102  SplitMedlineEntry(medlines);
1103  pub_list.splice(pub_list.end(), medlines);
1104  }
1105 
1106  TEntrezId oldpmid = ZERO_ENTREZ_ID;
1107  if (!pmids.empty()) {
1108 
1109  oldpmid = pmids.front()->GetPmid();
1110 
1111  // check if more than one
1112  for (const auto& pub : pmids) {
1113  if (pub->GetPmid() != oldpmid) {
1115  "Two different pmids in Pub-equiv [" << oldpmid << "] [" << pub->GetPmid() << "]");
1116  }
1117  }
1118  pmids.resize(1);
1119  }
1120 
1121  TEntrezId oldmuid = ZERO_ENTREZ_ID;
1122  if (!muids.empty()) {
1123 
1124  oldmuid = muids.front()->GetMuid();
1125 
1126  // check if more than one
1127  for (const auto& pub : muids) {
1128  if (pub->GetMuid() != oldmuid) {
1130  "Two different muids in Pub-equiv [" << oldmuid << "] [" << pub->GetMuid() << "]");
1131  }
1132  }
1133  muids.resize(1);
1134  }
1135 
1136  if (!cit_arts.empty()) {
1137  if (cit_arts.size() > 1) {
1138  // ditch extras
1139  ERR_POST_TO_LISTENER(m_err_log, eDiag_Warning, err_Reference, err_Reference_Multiple_ref, "More than one Cit-art in Pub-equiv");
1140  cit_arts.resize(1);
1141  }
1142 
1143  CCit_art* cit_art = &cit_arts.front()->SetArticle();
1144  bool inpress = IsInpress(*cit_art);
1145 
1146  CRef<CPub> new_pub(new CPub);
1147  new_pub->SetArticle(*cit_art);
1148 
1149  TEntrezId pmid = ZERO_ENTREZ_ID;
1150  try {
1151  if (m_upd) {
1152  pmid = m_upd->CitMatch(*new_pub);
1153  }
1154  } catch (...) {
1155  // pmid == 0
1156  }
1157 
1158  if ( pmid != ZERO_ENTREZ_ID ) {
1159 
1160  PrintPub(*cit_art, true, false, ENTREZ_ID_TO(long, pmid), m_err_log);
1161 
1162  if (oldpmid > ZERO_ENTREZ_ID && oldpmid != pmid) {
1163  // already had a pmid
1165  "OldPMID=" << oldpmid << " doesn't match lookup (" << pmid << "). Keeping lookup.");
1166  }
1167 
1168  bool set_pmid = true;
1169  if (m_replace_cit) {
1170 
1171  CRef<CCit_art> new_cit_art = FetchPubPmId(pmid, m_upd);
1172 
1173  if (new_cit_art.NotEmpty()) {
1174 
1175  bool new_cit_is_valid(false);
1177  CAuthListValidator::EOutcome outcome = m_authlist_validator.validate(*cit_art, *new_cit_art);
1178  switch (outcome) {
1180  new_cit_is_valid = true;
1181  s_ProcessConsortia(*cit_art, *new_cit_art, m_err_log);
1182  break;
1184  MoveAuthors(*new_cit_art, *cit_art);
1185  new_cit_is_valid = true;
1186  break;
1188  new_cit_is_valid = false;
1189  break;
1190  default:
1191  throw logic_error("Invalid outcome returned by CAuthListValidator::validate(): " + std::to_string(outcome));
1192  }
1193  }
1194  else {
1195  new_cit_is_valid = TenAuthorsProcess(*cit_art, *new_cit_art, m_err_log);
1196  }
1197 
1198  if (new_cit_is_valid) {
1199  if (pmids.empty()) {
1200  CRef<CPub> pmid_pub(new CPub);
1201  pmids.push_back(pmid_pub);
1202  }
1203 
1204  pmids.front()->SetPmid().Set(pmid);
1205  pub_list.splice(pub_list.end(), pmids);
1206 
1207  CRef<CPub> cit_pub(new CPub);
1208  cit_pub->SetArticle(*new_cit_art);
1209  pub_list.push_back(cit_pub);
1210 
1211  if (m_merge_ids) {
1212  MergeNonPubmedPubIds(*cit_art, cit_pub->SetArticle());
1213  }
1214 
1215  cit_arts.clear();
1216  cit_arts.push_back(cit_pub);
1217  cit_art = new_cit_art;
1218  }
1219  else {
1220  pmids.clear();
1221 
1222  PrintPub(*cit_art, false, true, ENTREZ_ID_TO(long, pmid), m_err_log);
1223  pub_list.splice(pub_list.end(), cit_arts);
1224  }
1225 
1226  set_pmid = false;
1227  }
1228  else {
1230  "Failed to get pub from MedArch server for pmid = " << pmid << ". Input one is preserved.");
1231  }
1232  }
1233 
1234  if (set_pmid) {
1235  if (pmids.empty()) {
1236  CRef<CPub> pmid_pub(new CPub);
1237  pmids.push_back(pmid_pub);
1238  }
1239 
1240  pmids.front()->SetPmid().Set(pmid);
1241  pub_list.splice(pub_list.end(), pmids);
1242 
1243  MedlineToISO(*cit_art);
1244 
1245  pub_list.splice(pub_list.end(), cit_arts);
1246  }
1247 
1248  PropagateInPress(inpress, *cit_art);
1249  return;
1250  }
1251 
1252  PrintPub(*cit_art, false, false, ENTREZ_ID_TO(long, oldpmid), m_err_log);
1253  PropagateInPress(inpress, *cit_art);
1254  pub_list.splice(pub_list.end(), cit_arts);
1255 
1256  return;
1257  }
1258 
1259  if (oldpmid != ZERO_ENTREZ_ID) {
1260  // have a pmid but no cit-art
1261 
1262  CRef<CCit_art> new_cit_art = FetchPubPmId(oldpmid, m_upd);
1263 
1264  if (new_cit_art.NotEmpty()) {
1265 
1266  pub_list.splice(pub_list.end(), pmids);
1267 
1268  if (m_replace_cit) {
1269  MedlineToISO(*new_cit_art);
1270  CRef<CPub> cit_pub(new CPub);
1271  cit_pub->SetArticle(*new_cit_art);
1272  pub_list.push_back(cit_pub);
1273  }
1274 
1275  return;
1276  }
1278  "Cant find article for pmid [" << oldpmid << "]");
1279  }
1280 
1281  if (oldpmid > ZERO_ENTREZ_ID) {
1282  pub_list.splice(pub_list.end(), pmids);
1283  }
1284  else if (oldmuid > ZERO_ENTREZ_ID) {
1285  pub_list.splice(pub_list.end(), muids);
1286  }
1287 }
1288 
1289 
1290 // Tries to make any Pub into muid / cit - art
1292 {
1293  switch (pub.Which()) {
1294 
1295  case CPub::e_Medline:
1296  {
1297  CRef<CPub_equiv> pub_equiv(new CPub_equiv);
1298  pub_equiv->Set().push_back(CRef<CPub>(new CPub));
1299  pub_equiv->Set().front()->Assign(pub);
1300 
1301  SplitMedlineEntry(pub_equiv->Set());
1302  pub.SetEquiv().Assign(*pub_equiv);
1303  }
1304  break;
1305 
1306  case CPub::e_Article:
1307  {
1308  CCit_art& cit_art = pub.SetArticle();
1309  if (cit_art.IsSetFrom() && cit_art.GetFrom().IsBook()) {
1310  return;
1311  }
1312 
1313  TEntrezId pmid = ZERO_ENTREZ_ID;
1314  try {
1315  if (m_upd) {
1316  pmid = m_upd->CitMatch(pub);
1317  }
1318  } catch (...) {
1319  // pmid == 0;
1320  }
1321 
1322  if (pmid > ZERO_ENTREZ_ID) {
1323  PrintPub(cit_art, true, false, ENTREZ_ID_TO(long, pmid), m_err_log);
1324  if (m_replace_cit) {
1325  CRef<CCit_art> new_cit_art = FetchPubPmId(pmid, m_upd);
1326 
1327  if (new_cit_art.NotEmpty()) {
1328  if (TenAuthorsProcess(cit_art, *new_cit_art, m_err_log)) {
1329 
1330  if (m_merge_ids) {
1331  MergeNonPubmedPubIds(*new_cit_art, cit_art);
1332  }
1333 
1334  CRef<CPub> new_pub(new CPub);
1335  new_pub->SetArticle(*new_cit_art);
1336  pub.SetEquiv().Set().push_back(new_pub);
1337 
1338  new_pub.Reset(new CPub);
1339  new_pub->SetPmid().Set(pmid);
1340  pub.SetEquiv().Set().push_back(new_pub);
1341  }
1342  else {
1343  PrintPub(cit_art, false, true, ENTREZ_ID_TO(long, pmid), m_err_log);
1344  MedlineToISO(cit_art);
1345  }
1346  }
1347  }
1348  else {
1349  PrintPub(cit_art, false, false, ENTREZ_ID_TO(long, pmid), m_err_log);
1350  MedlineToISO(cit_art);
1351  }
1352  }
1353  }
1354  break;
1355 
1356  case CPub::e_Equiv:
1357  FixPubEquiv(pub.SetEquiv());
1358  break;
1359 
1360  default:; // do nothing
1361  }
1362 }
1363 
1365 {
1366  CRef<CCit_art> cit_art;
1367  if (!upd || pmid < ZERO_ENTREZ_ID) {
1368  return cit_art;
1369  }
1370 
1371  CRef<CPub> pub;
1372  try {
1373  pub = upd->GetPub(pmid);
1374  } catch (...) {
1375  return cit_art;
1376  }
1377 
1378  if (pub.NotEmpty() && pub->IsArticle()) {
1379  cit_art.Reset(new CCit_art);
1380  cit_art->Assign(pub->GetArticle());
1381 
1382  MedlineToISO(*cit_art);
1383  }
1384 
1385  return cit_art;
1386 }
1387 
1388 
1389 bool CAuthListValidator::enabled = true; // Verified in ID-6550, so set to use it by default
1390  // Setting it to false would lead to a few bugs
1391 bool CAuthListValidator::configured = false;
1394 void CAuthListValidator::Configure(const CNcbiRegistry& cfg, const string& section)
1395 {
1396  enabled = cfg.GetBool(section, "enabled", enabled);
1397  cfg_matched_to_min = cfg.GetDouble(section, "matched_to_min", cfg_matched_to_min);
1398  cfg_removed_to_gb = cfg.GetDouble(section, "removed_to_gb", cfg_removed_to_gb);
1399  configured = true;
1400 }
1401 
1403  : outcome(eNotSet), pub_year(0), reported_limit("not initialized"), m_err_log(err_log)
1404 {
1405  if (! configured) {
1406  Configure(CNcbiApplication::Instance()->GetConfig(), "auth_list_validator");
1407  }
1408 }
1409 
1411 {
1412  outcome = eNotSet;
1413  pub_year = 0;
1414  pub_year = pm_art.GetFrom().GetJournal().GetImp().GetDate().GetStd().GetYear();
1415  if (pub_year < 1900 || pub_year > 3000) {
1416  throw logic_error("Publication from PubMed has invalid year: " + std::to_string(pub_year));
1417  }
1418  if (gb_art.IsSetAuthors()) {
1421  }
1422  if (pm_art.IsSetAuthors()) {
1425  }
1426  matched.clear();
1432  "Too many authors removed (" << cnt_removed << ") compared to total Genbank authors (" << cnt_gb << ")");
1433  }
1434  // determine outcome according to ID-6514 (see fix_pub.hpp)
1435  if (pub_year > 1999) {
1436  reported_limit = "Unlimited";
1438  }
1439  else if (pub_year > 1995) {
1440  reported_limit = "25 authors";
1441  if (cnt_gb > 25) {
1443  "Preserving original " << cnt_gb << " GB authors, ignoring " << cnt_pm << " PubMed authors "
1444  << "(PubMed limit was " << reported_limit << " in pub.year " << pub_year << ")");
1446  }
1447  else {
1449  }
1450  }
1451  else { // pub_year < 1996
1452  reported_limit = "10 authors";
1453  if (cnt_gb > 10) {
1455  "Preserving original " << cnt_gb << " GB authors, ignoring " << cnt_pm << " PubMed authors "
1456  << "(PubMed limit was " << reported_limit << " in pub.year " << pub_year << ")");
1458  }
1459  else {
1461  }
1462  }
1463  // check minimum required # of matching authors
1466  "Only " << cnt_matched << " authors matched between " << cnt_gb << " Genbank and "
1467  << cnt_pm << " PubMed. Match/Min ratio " << fixed << setprecision(2) << actual_matched_to_min
1468  << " is below threshold " << fixed << setprecision(2) << cfg_matched_to_min);
1470  }
1471  return outcome;
1472 }
1473 
1475 {
1476  out << "\n--- Debug Dump of CAuthListValidator object ---\n";
1477  out << "pub_year: " << pub_year << "\n";
1478  out << "PubMed Auth-list limit in " << pub_year << ": " << reported_limit << "\n";
1479  out << "Configured ratio 'matched' to 'min(gb,pm)': " << cfg_matched_to_min
1480  << "; actual: " << actual_matched_to_min << "\n";
1481  out << "Configured ratio 'removed' to 'gb': " << cfg_removed_to_gb
1482  << "; actual: " << actual_removed_to_gb << "\n";
1483  out << "GB author list type: " << gb_type << "; # of entries: " << cnt_gb << "\n";
1484  out << "PM author list type: " << pm_type << "; # of entries: " << cnt_pm << "\n";
1485  dumplist("Matched", matched, out);
1486  dumplist("Added", added, out);
1487  dumplist("Removed", removed, out);
1488  const char* outcome_names[] = {"NotSet", "Failed_validation", "Accept_pubmed", "Keep_genbank"};
1489  out << "Outcome reported: " << outcome_names[outcome] << "(" << outcome << ")\n";
1490  out << "--- End of Debug Dump of CAuthListValidator object ---\n\n";
1491 }
1492 
1493 void CAuthListValidator::dumplist(const char* hdr, const list<string>& lst, CNcbiOstream& out) const
1494 {
1495  out << lst.size() << " " << hdr << " authors:\n";
1496  for (const string& a : lst)
1497  out << " " << a << "\n";
1498 }
1499 
1501 {
1502  auto gbit = removed.begin();
1503  while (gbit != removed.end()) {
1504  list<string>::iterator gbnext(gbit);
1505  ++gbnext;
1506  list<string>::iterator pmit = std::find(added.begin(), added.end(), *gbit);
1507  if (pmit != added.end()) {
1508  matched.push_back(*gbit);
1509  removed.erase(gbit++);
1510  added.erase(pmit);
1511  }
1512  gbit = gbnext;
1513  }
1514  cnt_matched = matched.size();
1515  cnt_removed = removed.size();
1516  cnt_added = added.size();
1519  cnt_min = min(cnt_gb, cnt_pm);
1520 }
1521 
1522 
1523 void CAuthListValidator::get_lastnames(const CAuth_list& authors, list<string>& lastnames, string& auth_string)
1524 {
1525  lastnames.clear();
1526  switch (authors.GetNames().Which()) {
1528  get_lastnames(authors.GetNames().GetStd(), lastnames);
1529  break;
1531  {{
1532  CRef< CAuth_list > authlist_std;
1533  authlist_std->Assign(authors);
1534  authlist_std->ConvertMlToStandard();
1535  get_lastnames(authlist_std->GetNames().GetStd(), lastnames);
1536  }}
1537  break;
1539  get_lastnames(authors.GetNames().GetStr(), lastnames);
1540  break;
1541  default:
1542  throw logic_error("Unexpected CAuth_list::C_Name choice: " + CAuth_list::C_Names::SelectionName(authors.GetNames().Which()));
1543  }
1544  auth_string = NStr::Join(lastnames, "; ");
1545 }
1546 
1547 void CAuthListValidator::get_lastnames(const CAuth_list::C_Names::TStd& authors, list<string>& lastnames)
1548 {
1549  for (const auto& name : authors) {
1550  if (name->IsSetName() && name->GetName().IsName() && name->GetName().GetName().IsSetLast()) {
1551  string lname(name->GetName().GetName().GetLast());
1552  lastnames.push_back(NStr::ToLower(lname));
1553  }
1554  }
1555 }
1556 
1557 void CAuthListValidator::get_lastnames(const CAuth_list::C_Names::TStr& authors, list<string>& lastnames)
1558 {
1559  const char* alpha = "abcdefghijklmnopqrstuvwxyz";
1560  for (string auth : authors) {
1561  size_t eow = NStr::ToLower(auth).find_first_not_of(alpha);
1562  lastnames.push_back(auth.substr(0, eow));
1563  }
1564 }
1565 
1566 END_SCOPE(edit)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void transform(Container &c, UnaryFunction *op)
Definition: chainer.hpp:86
CArticleIdSet –.
IMessageListener * m_err_log
Definition: pub_fix.hpp:128
void DebugDump(CNcbiOstream &out) const
Definition: pub_fix.cpp:1474
list< string > added
Definition: pub_fix.hpp:112
static bool enabled
Definition: pub_fix.hpp:94
static double cfg_matched_to_min
Definition: pub_fix.hpp:130
double actual_matched_to_min
Definition: pub_fix.hpp:119
static void get_lastnames(const CAuth_list &authors, list< string > &lastnames, string &auth_string)
Definition: pub_fix.cpp:1523
EOutcome validate(const CCit_art &gb_art, const CCit_art &pm_art)
Definition: pub_fix.cpp:1410
string gb_auth_string
Definition: pub_fix.hpp:115
string reported_limit
Definition: pub_fix.hpp:118
void compare_lastnames()
Definition: pub_fix.cpp:1500
CAuthListValidator(IMessageListener *err_log)
Definition: pub_fix.cpp:1402
static void Configure(const CNcbiRegistry &cfg, const string &section)
Definition: pub_fix.cpp:1394
static double cfg_removed_to_gb
Definition: pub_fix.hpp:131
string pm_auth_string
Definition: pub_fix.hpp:116
double actual_removed_to_gb
Definition: pub_fix.hpp:120
list< string > removed
Definition: pub_fix.hpp:111
void dumplist(const char *hdr, const list< string > &lst, CNcbiOstream &out) const
Definition: pub_fix.cpp:1493
list< string > matched
Definition: pub_fix.hpp:110
EOutcome outcome
Definition: pub_fix.hpp:102
static bool configured
Definition: pub_fix.hpp:129
@Auth_list.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:57
void ConvertMlToStandard(bool normalize_suffix=false)
Definition: Auth_list.cpp:187
CAuthor –.
Definition: Author.hpp:59
This stream exchanges data with an HTTP server located at the URL: http[s]://host[:port]/path[?...
CESearch_Request.
Definition: esearch.hpp:59
CESummary_Request.
Definition: esummary.hpp:58
CEUtils_ConnContext.
Definition: eutils.hpp:65
CImprint –.
Definition: Imprint.hpp:66
@Name_std.hpp User-defined methods of the data storage class.
Definition: Name_std.hpp:56
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:244
CNcbiRegistry –.
Definition: ncbireg.hpp:913
static CRef< CCit_art > FetchPubPmId(TEntrezId pmid, IPubmedUpdater *)
Definition: pub_fix.cpp:1364
void FixPubEquiv(CPub_equiv &pub_equiv)
Definition: pub_fix.cpp:1046
void FixPub(CPub &pub)
Definition: pub_fix.cpp:1291
static string GetErrorId(int code, int subcode)
Definition: pub_fix.cpp:129
Definition: Pub.hpp:56
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CTime –.
Definition: ncbitime.hpp:296
C_E –.
Definition: Title_.hpp:96
Definition: Title.hpp:51
const string & GetTitle(C_E::E_Choice type=C_E::e_not_set) const
If the internal list contains a title (of the specified type, if given), returns the corresponding st...
Definition: Title.cpp:56
IMessageListener::
virtual CRef< CPub > GetPub(TEntrezId pmid, EPubmedError *=nullptr)=0
static const struct name_t names[]
User-defined methods of the data storage class.
std::ofstream out("events_result.xml")
main entry point for tests
CRef< CCit_art > FetchPubPmId(TEntrezId pmid)
Definition: ftamed.cpp:92
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
Definition: ncbimisc.hpp:1041
#define ENTREZ_ID_TO(T, entrez_id)
Definition: ncbimisc.hpp:1097
#define ZERO_ENTREZ_ID
Definition: ncbimisc.hpp:1102
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
@ eDiag_Info
Informational message.
Definition: ncbidiag.hpp:651
@ eDiag_Error
Error message.
Definition: ncbidiag.hpp:653
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
virtual string GetQueryString(void) const
Get CGI script query string.
Definition: esummary.cpp:55
const CEUtils_IdGroup & GetId(void) const
Group of ids required if history is not used.
Definition: esummary.hpp:71
CNcbiIostream & GetStream(void)
Get input stream for reading plain data.
Definition: eutils.cpp:247
void SetArgument(const string &name, const string &value)
Add/change custom argument. Remove the argument if the value is empty.
Definition: eutils.cpp:179
void AddId(const string &id)
Add a single id to the list.
Definition: eutils.hpp:235
void SetUseHistory(bool value)
Definition: esearch.hpp:78
void SetRetMax(int retmax)
Definition: esearch.hpp:112
void Disconnect(void)
Close connection, destroy the stream.
Definition: eutils.hpp:140
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
@ eUnknown
Definition: app_popup.hpp:72
TPrim & Set(void)
Definition: serialbase.hpp:351
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_Xml
Definition: serialbase.hpp:698
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
virtual bool GetBool(const string &section, const string &name, bool default_value, TFlags flags=0, EErrAction err_action=eThrow) const
Get boolean value of specified parameter name.
Definition: ncbireg.cpp:391
virtual double GetDouble(const string &section, const string &name, double default_value, TFlags flags=0, EErrAction err_action=eThrow) const
Get double value of specified parameter name.
Definition: ncbireg.cpp:420
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
size_t NcbiStreamToString(string *s, CNcbiIstream &is, size_t pos=0)
Input the entire contents of an istream into a string (NULL causes drain).
Definition: ncbistre.cpp:296
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3187
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2989
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3197
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2887
PNocase_Generic< string > PNocase
Definition: ncbistr.hpp:4907
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5411
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
Definition: ncbistr.hpp:2876
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5352
static void TrimPrefixInPlace(string &str, const CTempString prefix, ECase use_case=eCase)
Trim prefix from a string (in-place)
Definition: ncbistr.cpp:3238
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3401
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ eReverseSearch
Search in a backward direction.
Definition: ncbistr.hpp:1947
@ eTrunc_Begin
Truncate leading spaces only.
Definition: ncbistr.hpp:2240
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
@ eCurrent
Use current time. See also CCurrentTime.
Definition: ncbitime.hpp:300
bool IsProc(void) const
Check if variant Proc is selected.
Definition: Cit_art_.hpp:507
const TTitle & GetTitle(void) const
Get the Title member data.
Definition: Cit_book_.hpp:296
bool IsSetVolume(void) const
Check if a value has been assigned to Volume data member.
Definition: Imprint_.hpp:746
static string SelectionName(E_Choice index)
Retrieve selection name (for diagnostic purposes).
Definition: Auth_list_.cpp:101
bool IsDoi(void) const
Check if variant Doi is selected.
Definition: ArticleId_.hpp:492
void SetIds(TIds &value)
Assign a value to Ids data member.
Definition: Cit_art_.cpp:258
const TBook & GetBook(void) const
Get the Book member data.
Definition: Cit_proc_.hpp:214
bool IsSetAuthors(void) const
authors (ANSI requires) Check if a value has been assigned to Authors data member.
Definition: Cit_art_.hpp:534
const TJournal & GetJournal(void) const
Get the variant data.
Definition: Cit_art_.cpp:111
bool IsSetTitle(void) const
title of journal Check if a value has been assigned to Title data member.
Definition: Cit_jour_.hpp:201
const TVolume & GetVolume(void) const
Get the Volume member data.
Definition: Imprint_.hpp:758
const TPages & GetPages(void) const
Get the Pages member data.
Definition: Imprint_.hpp:852
bool IsSetPrepub(void) const
Check if a value has been assigned to Prepub data member.
Definition: Imprint_.hpp:1080
const TFrom & GetFrom(void) const
Get the From member data.
Definition: Cit_art_.hpp:567
bool IsSetTitle(void) const
Title of book Check if a value has been assigned to Title data member.
Definition: Cit_book_.hpp:284
void SetFrom(TFrom &value)
Assign a value to From data member.
Definition: Cit_art_.cpp:248
const TOther & GetOther(void) const
Get the variant data.
Definition: ArticleId_.cpp:187
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
Definition: Cit_art_.hpp:555
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
Definition: Cit_art_.cpp:227
bool IsSetImp(void) const
Check if a value has been assigned to Imp data member.
Definition: Cit_jour_.hpp:231
TPrepub GetPrepub(void) const
Get the Prepub member data.
Definition: Imprint_.hpp:1099
const TName & GetName(void) const
Get the Name member data.
Definition: Author_.hpp:352
const TProc & GetProc(void) const
Get the variant data.
Definition: Cit_art_.cpp:155
E_Choice Which(void) const
Which variant is currently selected.
Definition: Cit_art_.hpp:466
list< CRef< CAuthor > > TStd
Definition: Auth_list_.hpp:170
const Tdata & Get(void) const
Get the member data.
bool IsSetNames(void) const
Check if a value has been assigned to Names data member.
Definition: Auth_list_.hpp:464
void ResetAuthors(void)
Reset Authors data member.
Definition: Cit_art_.cpp:222
bool IsSetDate(void) const
date of publication Check if a value has been assigned to Date data member.
Definition: Imprint_.hpp:716
bool IsSetName(void) const
Author, Primary or Secondary Check if a value has been assigned to Name data member.
Definition: Author_.hpp:340
const TStr & GetStr(void) const
Get the variant data.
Definition: Auth_list_.hpp:450
bool IsSetIds(void) const
lots of ids Check if a value has been assigned to Ids data member.
Definition: Cit_art_.hpp:585
bool IsSet(void) const
Check if a value has been assigned to data member.
Definition: Title_.hpp:769
bool IsBook(void) const
Check if variant Book is selected.
Definition: Cit_art_.hpp:501
const TImp & GetImp(void) const
Get the Imp member data.
Definition: Cit_jour_.hpp:243
bool IsSetBook(void) const
citation to meeting Check if a value has been assigned to Book data member.
Definition: Cit_proc_.hpp:202
bool IsJournal(void) const
Check if variant Journal is selected.
Definition: Cit_art_.hpp:495
const TNames & GetNames(void) const
Get the Names member data.
Definition: Auth_list_.hpp:478
const TStd & GetStd(void) const
Get the variant data.
Definition: Auth_list_.hpp:410
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Cit_art_.hpp:597
const TDate & GetDate(void) const
Get the Date member data.
Definition: Imprint_.hpp:728
void SetPrepub(TPrepub value)
Assign a value to Prepub data member.
Definition: Imprint_.hpp:1108
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_art_.hpp:546
const TTitle & GetTitle(void) const
Get the Title member data.
Definition: Cit_jour_.hpp:213
const TImp & GetImp(void) const
Get the Imp member data.
Definition: Cit_book_.hpp:377
bool IsSetPages(void) const
Check if a value has been assigned to Pages data member.
Definition: Imprint_.hpp:840
const Tdata & Get(void) const
Get the member data.
Definition: Title_.hpp:781
E_Choice Which(void) const
Which variant is currently selected.
Definition: Auth_list_.hpp:375
bool IsSetImp(void) const
Check if a value has been assigned to Imp data member.
Definition: Cit_book_.hpp:365
bool IsStd(void) const
Check if variant Std is selected.
Definition: Auth_list_.hpp:404
const TBook & GetBook(void) const
Get the variant data.
Definition: Cit_art_.cpp:133
@ ePrepub_in_press
accepted, not published
Definition: Imprint_.hpp:96
@ e_Ml
MEDLINE, semi-structured.
Definition: Auth_list_.hpp:114
@ e_Std
full citations
Definition: Auth_list_.hpp:113
bool IsConsortium(void) const
Check if variant Consortium is selected.
Definition: Person_id_.hpp:405
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
Definition: Dbtag_.hpp:208
bool IsSetYear(void) const
full year (including 1900) Check if a value has been assigned to Year data member.
Definition: Date_std_.hpp:407
bool IsStd(void) const
Check if variant Std is selected.
Definition: Date_.hpp:320
const TInitials & GetInitials(void) const
Get the Initials member data.
Definition: Name_std_.hpp:610
bool IsName(void) const
Check if variant Name is selected.
Definition: Person_id_.hpp:359
bool IsSetInitials(void) const
first + middle initials Check if a value has been assigned to Initials data member.
Definition: Name_std_.hpp:598
bool IsSetLast(void) const
Check if a value has been assigned to Last data member.
Definition: Name_std_.hpp:410
const TConsortium & GetConsortium(void) const
Get the variant data.
Definition: Person_id_.hpp:411
TYear GetYear(void) const
Get the Year member data.
Definition: Date_std_.hpp:426
const TLast & GetLast(void) const
Get the Last member data.
Definition: Name_std_.hpp:422
const TName & GetName(void) const
Get the variant data.
Definition: Person_id_.cpp:137
const TStd & GetStd(void) const
Get the variant data.
Definition: Date_.cpp:109
void SetCit(TCit &value)
Assign a value to Cit data member.
bool IsSetCit(void) const
article citation Check if a value has been assigned to Cit data member.
bool IsSetPmid(void) const
MEDLINE records may include the PubMedId Check if a value has been assigned to Pmid data member.
const TPmid & GetPmid(void) const
Get the Pmid member data.
TPmid & SetPmid(void)
Select the variant.
Definition: Pub_.hpp:690
list< CRef< CPub > > Tdata
Definition: Pub_equiv_.hpp:90
Tdata & Set(void)
Assign a value to data member.
Definition: Pub_equiv_.hpp:171
const TArticle & GetArticle(void) const
Get the variant data.
Definition: Pub_.cpp:233
bool IsSet(void) const
Check if a value has been assigned to data member.
Definition: Pub_equiv_.hpp:153
TEquiv & SetEquiv(void)
Select the variant.
Definition: Pub_.cpp:393
E_Choice Which(void) const
Which variant is currently selected.
Definition: Pub_.hpp:555
TMedline & SetMedline(void)
Select the variant.
Definition: Pub_.cpp:217
bool IsArticle(void) const
Check if variant Article is selected.
Definition: Pub_.hpp:629
TArticle & SetArticle(void)
Select the variant.
Definition: Pub_.cpp:239
@ e_Article
Definition: Pub_.hpp:106
@ e_Medline
Definition: Pub_.hpp:104
@ e_Equiv
to cite a variety of ways
Definition: Pub_.hpp:113
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
if(yy_accept[yy_current_state])
Definition: fix_pub.hpp:45
bool TenAuthorsProcess(CCit_art &cit, CCit_art &new_cit, IMessageListener *err_log)
Definition: pub_fix.cpp:807
bool IsFromBook(const CCit_art &art)
Definition: pub_fix.cpp:605
static void s_TrimPrefixThe(CTempString &str)
Definition: pub_fix.cpp:699
@ err_Reference
Definition: pub_fix_aux.hpp:60
@ err_AuthList
Definition: pub_fix_aux.hpp:62
static bool s_GetConsortia(const list< CRef< CAuthor >> &authors, list< string > &consortia)
Definition: pub_fix.cpp:676
void MergeNonPubmedPubIds(const CCit_art &cit_old, CCit_art &cit_new)
Definition: pub_fix.cpp:938
static void s_GetESearchIds(CESearch_Request &req, const string &term, list< string > &ids)
Definition: pub_fix.cpp:316
bool IsInpress(const CCit_art &cit_art)
Definition: pub_fix.cpp:210
bool TenAuthorsCompare(CCit_art &cit_old, CCit_art &cit_new)
Definition: pub_fix.cpp:618
bool MUIsJournalIndexed(const string &journal)
Definition: pub_fix.cpp:410
static const size_t MAX_MATCH_COEFF
Definition: pub_fix.cpp:616
@ err_Print_Failed
Definition: pub_fix_aux.hpp:91
static bool s_ConsortiaMatch(const list< string > &x, const list< string > &y)
Definition: pub_fix.cpp:705
bool MULooksLikeISSN(const string &str)
Definition: pub_fix.cpp:232
void GetFirstTenNames(const CAuth_list::C_Names::TStd &names, list< CTempString > &res)
Definition: pub_fix.cpp:789
bool NeedToPropagateInJournal(const CCit_art &cit_art)
Definition: pub_fix.cpp:982
@ err_Reference_DiffConsortAuthors
Definition: pub_fix_aux.hpp:76
@ err_Reference_No_reference
Definition: pub_fix_aux.hpp:70
@ err_Reference_MedlineMatchIgnored
Definition: pub_fix_aux.hpp:73
@ err_Reference_PmidNotFound
Definition: pub_fix_aux.hpp:82
@ err_Reference_Multiple_pmid
Definition: pub_fix_aux.hpp:78
@ err_Reference_NoConsortAuthors
Definition: pub_fix_aux.hpp:75
@ err_Reference_SuccessfulMuidLookup
Definition: pub_fix_aux.hpp:68
@ err_Reference_SuccessfulPmidLookup
Definition: pub_fix_aux.hpp:81
@ err_Reference_Multiple_ref
Definition: pub_fix_aux.hpp:71
@ err_Reference_NoPmidJournalNotInPubMed
Definition: pub_fix_aux.hpp:83
@ err_Reference_Multiple_muid
Definition: pub_fix_aux.hpp:72
@ err_Reference_NoPmidJournalNotInPubMedInPress
Definition: pub_fix_aux.hpp:85
@ err_Reference_PmidNotFoundInPress
Definition: pub_fix_aux.hpp:84
@ err_Reference_MuidNotFound
Definition: pub_fix_aux.hpp:67
@ err_Reference_MedArchMatchIgnored
Definition: pub_fix_aux.hpp:80
@ err_Reference_MuidMissmatch
Definition: pub_fix_aux.hpp:74
@ err_Reference_OldInPress
Definition: pub_fix_aux.hpp:69
@ err_Reference_PmidMissmatch
Definition: pub_fix_aux.hpp:77
@ err_Reference_FailedToGetPub
Definition: pub_fix_aux.hpp:79
void MedlineToISO(CCit_art &cit_art)
Definition: pub_fix.cpp:153
static map< int, SErrorSubcodes > ERROR_CODE_STR
Definition: pub_fix.cpp:88
static bool s_IsIndexed(CRef< CEUtils_ConnContext > pContext, const string &id)
Definition: pub_fix.cpp:365
static void s_ProcessConsortia(const CCit_art &old_cit, CCit_art &new_cit, IMessageListener *pListener)
Definition: pub_fix.cpp:741
void PropagateInPress(bool inpress, CCit_art &cit_art)
Definition: pub_fix.cpp:1003
@ err_AuthList_PreserveGB
Definition: pub_fix_aux.hpp:97
@ err_AuthList_LowMatch
Definition: pub_fix_aux.hpp:98
@ err_AuthList_SignificantDrop
Definition: pub_fix_aux.hpp:96
void SplitMedlineEntry(CPub_equiv::Tdata &medlines)
Definition: pub_fix.cpp:175
void PrintPub(const CCit_art &cit_art, bool found, bool auth, long muid, IMessageListener *err_log)
Definition: pub_fix.cpp:456
static void MoveAuthors(CCit_art &to, CCit_art &from)
Definition: pub_fix.cpp:610
size_t ExtractConsortiums(const CAuth_list::C_Names::TStd &names, CAuth_list::C_Names::TStr &extracted)
Definition: pub_fix.cpp:654
unsigned int a
Definition: ncbi_localip.c:102
IMessage/IMessageListener interfaces and basic implementations.
void SleepSec(unsigned long sec, EInterruptOnSignal onsignal=eRestartOnSignal)
Sleep.
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T min(T x_, T y_)
User-defined methods of the data storage class.
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
Definition: pcre_exec.c:513
#define ERR_POST_TO_LISTENER(listener, severity, code, subcode, message)
Definition: pub_fix.cpp:69
CRef< CPub > journal(ParserPtr pp, char *bptr, char *eptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, bool has_muid, CRef< CCit_art > &cit_art, Int4 er)
Definition: ref.cpp:1468
@ eNotSet
Definition: splign_app.cpp:550
static const char * str(char *buf, int n)
Definition: stats.c:84
map< int, string > m_sub_errors
Definition: pub_fix.cpp:85
#define _ASSERT
Modified on Sat Dec 09 04:49:52 2023 by modify_doxy.py rev. 669887