NCBI C++ ToolKit
pub_fix.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef _PUB_FIX_HPP_
2 #define _PUB_FIX_HPP_
3 
4 /* $Id: pub_fix.hpp 101431 2023-12-13 09:20:40Z stakhovv $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Alexey Dobronadezhdin
30  *
31  * File Description:
32  * Code for fixing up publications.
33  *
34  * ===========================================================================
35  */
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbiobj.hpp>
38 
40 
42 
43 class IMessageListener;
44 
46 
47 class CPub;
48 class CPub_equiv;
49 class CCit_art;
50 
52 
53 /*-------------------------------------------------------------------------------
54 https://jira.ncbi.nlm.nih.gov/browse/ID-6514?focusedCommentId=6241819&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-6241819
55 As requested by Mark Cavanaugh:
56 So here's how I imagine things working Leonid:
57 
58 1) PubMed Cit-art pub has a year value > 1999
59 
60 Accept the Auth-list of the PubMed article, as-is
61 
62 Consider generating a warning if the PubMed article author count is significantly less than the original author count.
63 
64 "Significant" ? Hmmmmm..... Let's try: Auth-Count-Diff >= 1/3 * Orig-Auth-Count
65 
66 2) PubMed Cit-art pub has a year value ranging from 1996 to 1999
67 
68 If the original author count is > 25, preserve the Auth-list of the original article, discarding PubMed's author list
69 
70 Log the author name counts : Original vs PubMed
71 Log the author lists: Original vs Pubmed
72 
73 3) PubMed Cit-art pub has a year value < 1996
74 
75 If the original author count is > 10, preserve the Auth-list of the original article, discarding PubMed's author list
76 
77 Log the author name counts : Original vs PubMed
78 Log the author lists: Original vs Pubmed
79 
80 We may have to tweak things a bit further, but this is a good start.
81 -------------------------------------------------------------------------------*/
82 
84 {
85 public:
86  enum EOutcome {
87  eNotSet = 0,
90  eKeep_genbank
91  };
92  static void Configure(const CNcbiRegistry& cfg, const string& section);
93  // If true, FixPubEquiv() will use this class to validate authors list
94  static bool enabled;
96  EOutcome validate(const CCit_art& gb_art, const CCit_art& pm_art);
97  void DebugDump(CNcbiOstream& out) const;
98  // utility method
99  static void get_lastnames(const CAuth_list& authors, list<string>& lastnames, string& auth_string);
100 
101  // public vars
103  int pub_year;
104  int cnt_gb;
105  int cnt_pm;
107  int cnt_added; // new from pubmed list
108  int cnt_removed; // not matched in genbank list
109  int cnt_min; // minimum # in GB/PM list, use as a base for ration
110  list<string> matched;
111  list<string> removed;
112  list<string> added;
113  string gb_type;
114  string pm_type;
117  // for DebugDump()
121 
122 private:
123  void compare_lastnames();
124  void dumplist(const char* hdr, const list<string>& lst, CNcbiOstream& out) const;
125  static void get_lastnames(const CAuth_list::C_Names::TStd& authors, list<string>& lastnames);
126  static void get_lastnames(const CAuth_list::C_Names::TStr& authors, list<string>& lastnames);
127  // vars
129  static bool configured;
130  static double cfg_matched_to_min;
131  static double cfg_removed_to_gb;
132 };
133 
135 {
136 public:
137 
138  CPubFix(bool always_lookup, bool replace_cit, bool merge_ids, IMessageListener* err_log, CEUtilsUpdater* upd) :
139  m_always_lookup(always_lookup),
140  m_replace_cit(replace_cit),
141  m_merge_ids(merge_ids),
142  m_err_log(err_log),
143  m_authlist_validator(err_log),
144  m_upd(upd)
145  {
146  }
147 
148  void FixPub(CPub& pub);
149  void FixPubEquiv(CPub_equiv& pub_equiv);
150  const CAuthListValidator& GetValidator() const { return m_authlist_validator; };
151 
153  static string GetErrorId(int code, int subcode);
154 
155 private:
159 
163 };
164 
168 
169 #endif // _PUB_FIX_HPP_
IMessageListener * m_err_log
Definition: pub_fix.hpp:128
list< string > added
Definition: pub_fix.hpp:112
static bool enabled
Definition: pub_fix.hpp:94
static double cfg_matched_to_min
Definition: pub_fix.hpp:130
double actual_matched_to_min
Definition: pub_fix.hpp:119
string gb_auth_string
Definition: pub_fix.hpp:115
string reported_limit
Definition: pub_fix.hpp:118
static double cfg_removed_to_gb
Definition: pub_fix.hpp:131
string pm_auth_string
Definition: pub_fix.hpp:116
double actual_removed_to_gb
Definition: pub_fix.hpp:120
list< string > removed
Definition: pub_fix.hpp:111
list< string > matched
Definition: pub_fix.hpp:110
EOutcome outcome
Definition: pub_fix.hpp:102
static bool configured
Definition: pub_fix.hpp:129
@Auth_list.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:57
CNcbiRegistry –.
Definition: ncbireg.hpp:913
bool m_always_lookup
Definition: pub_fix.hpp:156
CPubFix(bool always_lookup, bool replace_cit, bool merge_ids, IMessageListener *err_log, CEUtilsUpdater *upd)
Definition: pub_fix.hpp:138
CEUtilsUpdater * m_upd
Definition: pub_fix.hpp:162
const CAuthListValidator & GetValidator() const
Definition: pub_fix.hpp:150
bool m_merge_ids
Definition: pub_fix.hpp:158
bool m_replace_cit
Definition: pub_fix.hpp:157
IMessageListener * m_err_log
Definition: pub_fix.hpp:160
CAuthListValidator m_authlist_validator
Definition: pub_fix.hpp:161
Definition: Pub.hpp:56
IMessageListener::
Include a standard set of the NCBI C++ Toolkit most basic headers.
std::ofstream out("events_result.xml")
main entry point for tests
CRef< CCit_art > FetchPubPmId(TEntrezId pmid)
Definition: ftamed.cpp:92
static const char * validate(DSNINFO *di)
Go looking for trouble.
Definition: winsetup.c:179
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
Definition: ncbimisc.hpp:1041
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NCBI_XOBJEDIT_EXPORT
Definition: ncbi_export.h:1291
list< CRef< CAuthor > > TStd
Definition: Auth_list_.hpp:170
Definition: fix_pub.hpp:45
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
@ eNotSet
Definition: splign_app.cpp:550
Definition: inftrees.h:24
Modified on Sun May 05 05:15:26 2024 by modify_doxy.py rev. 669887