NCBI C++ ToolKit
hydra_client.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Jonathan Kans, Aaron Ucko
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 
33 #include <ncbi_pch.hpp>
34 
36 
37 #include <corelib/ncbi_system.hpp>
40 
41 #include <cmath>
42 
44 
46 {
47 public:
48  CHydraServer(vector<int>& uids);
49  bool RunHydraSearch(const string& query,
50  CHydraSearch::ESearch search,
52 
53 protected:
54  bool error(const string& message);
55  bool warning(const string& message);
56  bool start_element(const string& name, const attrs_type& attrs);
57  bool end_element(const string& name);
58  bool text(const string& contents);
59 
60 protected:
61  string m_Score;
62  vector<int>& m_Uids;
64 };
65 
66 CHydraServer::CHydraServer(vector<int>& uids)
67  : m_Uids(uids)
68 {
69 }
70 
71 static string GetSearchType(CHydraSearch::ESearch search)
72 {
73  switch(search) {
75  return "pubmed_search_citation_top_20.1";
76 
78  return "pubmed_search_citation.1";
79 
81  return "citation";
82 
84  return "pmc_citation.1";
85 
87  return "pmc_citation_top_6.1";
88 
89  default:
90  return "pubmed_search_citation_top_20.1";
91  }
92 }
93 
95  CHydraSearch::ESearch search,
97 {
98  string hostname = "www.ncbi.nlm.nih.gov";
99  string path = "/projects/hydra/hydra_search.cgi";
100  string args = "search=@SEARCHTYPE@&query=";
101 
102  m_Cutoff = cutoff;
103 
104  string searchtype = GetSearchType(search);
105  NStr::ReplaceInPlace(args, "@SEARCHTYPE@", searchtype);
106 
107  string params = args + NStr::URLEncode(query);
108 
109  for (int attempt = 1; attempt <= 5; attempt++) {
110  try {
111  CConn_HttpStream istr(hostname, path, params);
112 
113  xml::error_messages msgs;
114  parse_stream(istr, &msgs);
115 
116  if (msgs.has_errors() || msgs.has_fatal_errors()) {
117  ERR_POST(Warning << "error parsing xml: " << msgs.print());
118  return false;
119  } else {
120  return m_Uids.size() > 0;
121  }
122  }
123  catch (CException& e) {
124  ERR_POST(Warning << "failed on attempt " << attempt
125  << ": " << e);
126  }
127 
128  int sleep_secs = ::sqrt((double)attempt);
129  if (sleep_secs) {
130  SleepSec(sleep_secs);
131  }
132  }
133  return false;
134 }
135 
136 bool CHydraServer::error(const string& message)
137 {
138  ERR_POST(Error << "parse error: " << message);
139  return false;
140 }
141 
142 bool CHydraServer::warning(const string& message)
143 {
144  ERR_POST(Warning << "parse warning: " << message);
145  return false;
146 }
147 
148 bool CHydraServer::start_element(const string& name, const attrs_type& attrs)
149 {
150  m_Score.clear();
151  if (NStr::EqualNocase(name, "Id")) {
152  attrs_type::const_iterator it = attrs.find("score");
153  if (it != attrs.end()) {
154  m_Score = it->second;
155  }
156  }
157 
158  return true;
159 }
160 
161 bool CHydraServer::end_element(const string& name)
162 {
163  m_Score.clear();
164  return true;
165 }
166 
167 bool CHydraServer::text(const string& contents)
168 {
169  if ( m_Score.empty() ) return true;
170  if ( contents.find_first_not_of(" \n\r\t") == NPOS ) return true;
171 
172  if (m_Score == "1") {
173  // score 100 always gets reported
174  m_Uids.push_back(NStr::StringToInt(contents));
175  return true;
176  }
177 
178  // convert m_Score string to double and then make comparisons to cutoff
179  double score = NStr::StringToDouble(m_Score);
180 
181  if ((m_Cutoff == CHydraSearch::EScoreCutoff::eCutoff_VeryHigh) && score >= 0.99) {
182  // score 0.99 or higher
183  m_Uids.push_back(NStr::StringToInt(contents));
184  }
185  else if ((m_Cutoff == CHydraSearch::EScoreCutoff::eCutoff_High) && score >= 0.95) {
186  // score 0.95 or higher
187  m_Uids.push_back(NStr::StringToInt(contents));
188  }
189  else if ((m_Cutoff == CHydraSearch::EScoreCutoff::eCutoff_Medium) && score >= 0.90) {
190  // score 0.90 or higher
191  m_Uids.push_back(NStr::StringToInt(contents));
192  }
193  else if ((m_Cutoff == CHydraSearch::EScoreCutoff::eCutoff_Low) && score >= 0.80) {
194  // score 0.80 or higher
195  m_Uids.push_back(NStr::StringToInt(contents));
196  }
197 
198  return true;
199 }
200 
201 bool CHydraSearch::DoHydraSearch(const string& query, vector<int>& uids,
202  ESearch search,
203  EScoreCutoff cutoff)
204 {
205  uids.clear();
206  CHydraServer hydra(uids);
207  return hydra.RunHydraSearch(query, search, cutoff);
208 }
209 
This stream exchanges data with an HTTP server located at the URL: http[s]://host[:port]/path[?...
bool DoHydraSearch(const string &query, vector< int > &uids, ESearch search=ESearch::ePUBMED_TOP_20, EScoreCutoff cutoff=EScoreCutoff::eCutoff_Low)
bool warning(const string &message)
bool start_element(const string &name, const attrs_type &attrs)
bool end_element(const string &name)
CHydraServer(vector< int > &uids)
bool text(const string &contents)
CHydraSearch::EScoreCutoff m_Cutoff
bool RunHydraSearch(const string &query, CHydraSearch::ESearch search, CHydraSearch::EScoreCutoff cutoff)
bool error(const string &message)
vector< int > & m_Uids
The xml::error_messages class is used to store all the error message which are collected while parsin...
Definition: errors.hpp:137
bool has_fatal_errors(void) const
Check if there are fatal errors in the error messages.
Definition: errors.cpp:126
bool has_errors(void) const
Check if there are errors in the error messages.
Definition: errors.cpp:122
std::string print(void) const
Convert error messages into a single printable string.
Definition: errors.cpp:130
The xml::event_parser is used to parse an XML document by calling member functions when certain thing...
std::map< std::string, std::string > attrs_type
a type for holding XML node attributes
bool parse_stream(std::istream &stream, error_messages *messages, warnings_as_errors_type how=type_warnings_not_errors)
Parse what ever data that can be read from the given stream.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
Definition: ncbistr.cpp:1387
#define NPOS
Definition: ncbistr.hpp:133
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3405
static string URLEncode(const CTempString str, EUrlEncode flag=eUrlEnc_SkipMarkChars)
URL-encode string.
Definition: ncbistr.cpp:6062
USING_NCBI_SCOPE
static string GetSearchType(CHydraSearch::ESearch search)
API (CHydraSearch) for citation lookup.
void SleepSec(unsigned long sec, EInterruptOnSignal onsignal=eRestartOnSignal)
Sleep.
static string query
Modified on Wed May 08 12:07:59 2024 by modify_doxy.py rev. 669887