NCBI C++ ToolKit
doi_lookup.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: doi_lookup.cpp 47411 2023-03-20 19:18:06Z asztalos $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Igor Filippov
27  *
28  */
29 
30 #include <ncbi_pch.hpp>
31 
35 #include <sstream>
37 
39 #include <objects/biblio/Title.hpp>
41 #include <objects/general/Date.hpp>
43 #include <objects/pub/Pub.hpp>
45 #include <objects/pub/Pub_set.hpp>
51 #include <objects/biblio/Affil.hpp>
52 #include <objects/biblio/DOI.hpp>
55 
60 
61 #include <html/html.hpp>
62 #include <chrono>
63 
66 
67 
69 {
70  string r = "#";
71 
72  if (ch == 198 || ch == 196)
73  {
74  r = "Ae";
75  }
76  else if (ch == 230 || ch == 228)
77  {
78  r = "ae";
79  }
80  else if (ch == 197)
81  {
82  r = "Aa";
83  }
84  else if (ch == 229)
85  {
86  r = "aa";
87  }
88  else if (ch == 220)
89  {
90  r = "Ue";
91  }
92  else if (ch == 252)
93  {
94  r = "ue";
95  }
96  else if (ch == 214)
97  {
98  r = "Oe";
99  }
100  else if (ch == 246)
101  {
102  r = "oe";
103  }
104  else if (ch == 223)
105  {
106  r = "ss";
107  }
108  else if (ch == 199)
109  {
110  r = "C";
111  }
112  else if (ch >= 200 && ch <= 203)
113  {
114  r = "E";
115  }
116  else if (ch >= 204 && ch <= 207)
117  {
118  r = "I";
119  }
120  else if (ch == 209)
121  {
122  r = "N";
123  }
124  else if ((ch >= 210 && ch <= 214) || ch == 216)
125  {
126  r = "O";
127  }
128  else if (ch >= 217 && ch <= 220)
129  {
130  r = "U";
131  }
132  else if (ch == 221)
133  {
134  r = "Y";
135  }
136  else if (ch >= 224 && ch <= 229)
137  {
138  r = "a";
139  }
140  else if (ch == 231)
141  {
142  r = "c";
143  }
144  else if (ch >= 232 && ch <= 235)
145  {
146  r = "e";
147  }
148  else if (ch >= 236 && ch <= 239)
149  {
150  r = "i";
151  }
152  else if (ch == 241)
153  {
154  r = "n";
155  }
156  else if ((ch >= 242 && ch <= 246) || ch == 248)
157  {
158  r = "o";
159  }
160  else if (ch >= 249 && ch <= 252)
161  {
162  r = "u";
163  }
164  else if (ch == 253 || ch == 255)
165  {
166  r = "y";
167  }
168 
169 // https://www.fileformat.info/info/unicode/block/latin_extended_a/list.htm
170  switch (ch)
171  {
172  case 0x100: r = "A"; break; //LATIN CAPITAL LETTER A WITH MACRON
173  case 0x101: r = "a"; break; //LATIN SMALL LETTER A WITH MACRON
174  case 0x102: r = "A"; break; //LATIN CAPITAL LETTER A WITH BREVE
175  case 0x103: r = "a"; break; //LATIN SMALL LETTER A WITH BREVE
176  case 0x104: r = "A"; break; //LATIN CAPITAL LETTER A WITH OGONEK
177  case 0x105: r = "a"; break; //LATIN SMALL LETTER A WITH OGONEK
178  case 0x106: r = "C"; break; //LATIN CAPITAL LETTER C WITH ACUTE
179  case 0x107: r = "c"; break; //LATIN SMALL LETTER C WITH ACUTE
180  case 0x108: r = "C"; break; //LATIN CAPITAL LETTER C WITH CIRCUMFLEX
181  case 0x109: r = "c"; break; //LATIN SMALL LETTER C WITH CIRCUMFLEX
182  case 0x10A: r = "C"; break; //LATIN CAPITAL LETTER C WITH DOT ABOVE
183  case 0x10B: r = "c"; break; //LATIN SMALL LETTER C WITH DOT ABOVE
184  case 0x10C: r = "C"; break; //LATIN CAPITAL LETTER C WITH CARON
185  case 0x10D: r = "c"; break; //LATIN SMALL LETTER C WITH CARON
186  case 0x10E: r = "D"; break; //LATIN CAPITAL LETTER D WITH CARON
187  case 0x10F: r = "d"; break; //LATIN SMALL LETTER D WITH CARON
188  case 0x110: r = "D"; break; //LATIN CAPITAL LETTER D WITH STROKE
189  case 0x111: r = "d"; break; //LATIN SMALL LETTER D WITH STROKE
190  case 0x112: r = "E"; break; //LATIN CAPITAL LETTER E WITH MACRON
191  case 0x113: r = "e"; break; //LATIN SMALL LETTER E WITH MACRON
192  case 0x114: r = "E"; break; //LATIN CAPITAL LETTER E WITH BREVE
193  case 0x115: r = "e"; break; //LATIN SMALL LETTER E WITH BREVE
194  case 0x116: r = "E"; break; //LATIN CAPITAL LETTER E WITH DOT ABOVE
195  case 0x117: r = "e"; break; //LATIN SMALL LETTER E WITH DOT ABOVE
196  case 0x118: r = "E"; break; //LATIN CAPITAL LETTER E WITH OGONEK
197  case 0x119: r = "e"; break; //LATIN SMALL LETTER E WITH OGONEK
198  case 0x11A: r = "E"; break; //LATIN CAPITAL LETTER E WITH CARON
199  case 0x11B: r = "e"; break; //LATIN SMALL LETTER E WITH CARON
200  case 0x11C: r = "G"; break; //LATIN CAPITAL LETTER G WITH CIRCUMFLEX
201  case 0x11D: r = "g"; break; //LATIN SMALL LETTER G WITH CIRCUMFLEX
202  case 0x11E: r = "G"; break; //LATIN CAPITAL LETTER G WITH BREVE
203  case 0x11F: r = "g"; break; //LATIN SMALL LETTER G WITH BREVE
204  case 0x120: r = "G"; break; //LATIN CAPITAL LETTER G WITH DOT ABOVE
205  case 0x121: r = "g"; break; //LATIN SMALL LETTER G WITH DOT ABOVE
206  case 0x122: r = "G"; break; //LATIN CAPITAL LETTER G WITH CEDILLA
207  case 0x123: r = "g"; break; //LATIN SMALL LETTER G WITH CEDILLA
208  case 0x124: r = "H"; break; //LATIN CAPITAL LETTER H WITH CIRCUMFLEX
209  case 0x125: r = "h"; break; //LATIN SMALL LETTER H WITH CIRCUMFLEX
210  case 0x126: r = "H"; break; //LATIN CAPITAL LETTER H WITH STROKE
211  case 0x127: r = "h"; break; //LATIN SMALL LETTER H WITH STROKE
212  case 0x128: r = "I"; break; //LATIN CAPITAL LETTER I WITH TILDE
213  case 0x129: r = "i"; break; //LATIN SMALL LETTER I WITH TILDE
214  case 0x12A: r = "I"; break; //LATIN CAPITAL LETTER I WITH MACRON
215  case 0x12B: r = "i"; break; //LATIN SMALL LETTER I WITH MACRON
216  case 0x12C: r = "I"; break; //LATIN CAPITAL LETTER I WITH BREVE
217  case 0x12D: r = "i"; break; //LATIN SMALL LETTER I WITH BREVE
218  case 0x12E: r = "I"; break; //LATIN CAPITAL LETTER I WITH OGONEK
219  case 0x12F: r = "i"; break; //LATIN SMALL LETTER I WITH OGONEK
220  case 0x130: r = "I"; break; //LATIN CAPITAL LETTER I WITH DOT ABOVE
221  case 0x131: r = "i"; break; //LATIN SMALL LETTER DOTLESS I
222  case 0x132: r = "IJ"; break; //LATIN CAPITAL LIGATURE IJ
223  case 0x133: r = "ij"; break; //LATIN SMALL LIGATURE IJ
224  case 0x134: r = "J"; break; //LATIN CAPITAL LETTER J WITH CIRCUMFLEX
225  case 0x135: r = "j"; break; //LATIN SMALL LETTER J WITH CIRCUMFLEX
226  case 0x136: r = "K"; break; //LATIN CAPITAL LETTER K WITH CEDILLA
227  case 0x137: r = "k"; break; //LATIN SMALL LETTER K WITH CEDILLA
228  case 0x138: r = "k"; break; //LATIN SMALL LETTER KRA
229  case 0x139: r = "L"; break; //LATIN CAPITAL LETTER L WITH ACUTE
230  case 0x13A: r = "l"; break; //LATIN SMALL LETTER L WITH ACUTE
231  case 0x13B: r = "L"; break; //LATIN CAPITAL LETTER L WITH CEDILLA
232  case 0x13C: r = "l"; break; //LATIN SMALL LETTER L WITH CEDILLA
233  case 0x13D: r = "L"; break; //LATIN CAPITAL LETTER L WITH CARON
234  case 0x13E: r = "l"; break; //LATIN SMALL LETTER L WITH CARON
235  case 0x13F: r = "L"; break; //LATIN CAPITAL LETTER L WITH MIDDLE DOT
236  case 0x140: r = "l"; break; //LATIN SMALL LETTER L WITH MIDDLE DOT
237  case 0x141: r = "L"; break; //LATIN CAPITAL LETTER L WITH STROKE
238  case 0x142: r = "l"; break; //LATIN SMALL LETTER L WITH STROKE
239  case 0x143: r = "N"; break; //LATIN CAPITAL LETTER N WITH ACUTE
240  case 0x144: r = "n"; break; //LATIN SMALL LETTER N WITH ACUTE
241  case 0x145: r = "N"; break; //LATIN CAPITAL LETTER N WITH CEDILLA
242  case 0x146: r = "n"; break; //LATIN SMALL LETTER N WITH CEDILLA
243  case 0x147: r = "N"; break; //LATIN CAPITAL LETTER N WITH CARON
244  case 0x148: r = "n"; break; //LATIN SMALL LETTER N WITH CARON
245  case 0x149: r = "n"; break; //LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
246  case 0x14A: r = "N"; break; //LATIN CAPITAL LETTER ENG
247  case 0x14B: r = "n"; break; //LATIN SMALL LETTER ENG
248  case 0x14C: r = "O"; break; //LATIN CAPITAL LETTER O WITH MACRON
249  case 0x14D: r = "o"; break; //LATIN SMALL LETTER O WITH MACRON
250  case 0x14E: r = "O"; break; //LATIN CAPITAL LETTER O WITH BREVE
251  case 0x14F: r = "o"; break; //LATIN SMALL LETTER O WITH BREVE
252  case 0x150: r = "O"; break; //LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
253  case 0x151: r = "o"; break; //LATIN SMALL LETTER O WITH DOUBLE ACUTE
254  case 0x152: r = "OE"; break; //LATIN CAPITAL LIGATURE OE
255  case 0x153: r = "oe"; break; //LATIN SMALL LIGATURE OE
256  case 0x154: r = "R"; break; //LATIN CAPITAL LETTER R WITH ACUTE
257  case 0x155: r = "r"; break; //LATIN SMALL LETTER R WITH ACUTE
258  case 0x156: r = "R"; break; //LATIN CAPITAL LETTER R WITH CEDILLA
259  case 0x157: r = "r"; break; //LATIN SMALL LETTER R WITH CEDILLA
260  case 0x158: r = "R"; break; //LATIN CAPITAL LETTER R WITH CARON
261  case 0x159: r = "r"; break; //LATIN SMALL LETTER R WITH CARON
262  case 0x15A: r = "S"; break; //LATIN CAPITAL LETTER S WITH ACUTE
263  case 0x15B: r = "s"; break; //LATIN SMALL LETTER S WITH ACUTE
264  case 0x15C: r = "S"; break; //LATIN CAPITAL LETTER S WITH CIRCUMFLEX
265  case 0x15D: r = "s"; break; //LATIN SMALL LETTER S WITH CIRCUMFLEX
266  case 0x15E: r = "S"; break; //LATIN CAPITAL LETTER S WITH CEDILLA
267  case 0x15F: r = "s"; break; //LATIN SMALL LETTER S WITH CEDILLA
268  case 0x160: r = "S"; break; //LATIN CAPITAL LETTER S WITH CARON
269  case 0x161: r = "s"; break; //LATIN SMALL LETTER S WITH CARON
270  case 0x162: r = "T"; break; //LATIN CAPITAL LETTER T WITH CEDILLA
271  case 0x163: r = "t"; break; //LATIN SMALL LETTER T WITH CEDILLA
272  case 0x164: r = "T"; break; //LATIN CAPITAL LETTER T WITH CARON
273  case 0x165: r = "t"; break; //LATIN SMALL LETTER T WITH CARON
274  case 0x166: r = "T"; break; //LATIN CAPITAL LETTER T WITH STROKE
275  case 0x167: r = "t"; break; //LATIN SMALL LETTER T WITH STROKE
276  case 0x168: r = "U"; break; //LATIN CAPITAL LETTER U WITH TILDE
277  case 0x169: r = "u"; break; //LATIN SMALL LETTER U WITH TILDE
278  case 0x16A: r = "U"; break; //LATIN CAPITAL LETTER U WITH MACRON
279  case 0x16B: r = "u"; break; //LATIN SMALL LETTER U WITH MACRON
280  case 0x16C: r = "U"; break; //LATIN CAPITAL LETTER U WITH BREVE
281  case 0x16D: r = "u"; break; //LATIN SMALL LETTER U WITH BREVE
282  case 0x16E: r = "U"; break; //LATIN CAPITAL LETTER U WITH RING ABOVE
283  case 0x16F: r = "u"; break; //LATIN SMALL LETTER U WITH RING ABOVE
284  case 0x170: r = "U"; break; //LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
285  case 0x171: r = "u"; break; //LATIN SMALL LETTER U WITH DOUBLE ACUTE
286  case 0x172: r = "U"; break; //LATIN CAPITAL LETTER U WITH OGONEK
287  case 0x173: r = "u"; break; //LATIN SMALL LETTER U WITH OGONEK
288  case 0x174: r = "W"; break; //LATIN CAPITAL LETTER W WITH CIRCUMFLEX
289  case 0x175: r = "w"; break; //LATIN SMALL LETTER W WITH CIRCUMFLEX
290  case 0x176: r = "Y"; break; //LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
291  case 0x177: r = "y"; break; //LATIN SMALL LETTER Y WITH CIRCUMFLEX
292  case 0x178: r = "Y"; break; //LATIN CAPITAL LETTER Y WITH DIAERESIS
293  case 0x179: r = "Z"; break; //LATIN CAPITAL LETTER Z WITH ACUTE
294  case 0x17A: r = "z"; break; //LATIN SMALL LETTER Z WITH ACUTE
295  case 0x17B: r = "Z"; break; //LATIN CAPITAL LETTER Z WITH DOT ABOVE
296  case 0x17C: r = "z"; break; //LATIN SMALL LETTER Z WITH DOT ABOVE
297  case 0x17D: r = "Z"; break; //LATIN CAPITAL LETTER Z WITH CARON
298  case 0x17E: r = "z"; break; //LATIN SMALL LETTER Z WITH CARON
299  case 0x17F: r = "s"; break; //LATIN SMALL LETTER LONG S
300 
301 // https://www.fileformat.info/info/unicode/block/latin_extended_additional/list.htm
302  case 0x1E00: r = "A"; break; //LATIN CAPITAL LETTER A WITH RING BELOW
303  case 0x1E01: r = "a"; break; //LATIN SMALL LETTER A WITH RING BELOW
304  case 0x1E02: r = "B"; break; //LATIN CAPITAL LETTER B WITH DOT ABOVE
305  case 0x1E03: r = "b"; break; //LATIN SMALL LETTER B WITH DOT ABOVE
306  case 0x1E04: r = "B"; break; //LATIN CAPITAL LETTER B WITH DOT BELOW
307  case 0x1E05: r = "b"; break; //LATIN SMALL LETTER B WITH DOT BELOW
308  case 0x1E06: r = "B"; break; //LATIN CAPITAL LETTER B WITH LINE BELOW
309  case 0x1E07: r = "b"; break; //LATIN SMALL LETTER B WITH LINE BELOW
310  case 0x1E08: r = "C"; break; //LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
311  case 0x1E09: r = "c"; break; //LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
312  case 0x1E0A: r = "D"; break; //LATIN CAPITAL LETTER D WITH DOT ABOVE
313  case 0x1E0B: r = "d"; break; //LATIN SMALL LETTER D WITH DOT ABOVE
314  case 0x1E0C: r = "D"; break; //LATIN CAPITAL LETTER D WITH DOT BELOW
315  case 0x1E0D: r = "d"; break; //LATIN SMALL LETTER D WITH DOT BELOW
316  case 0x1E0E: r = "D"; break; //LATIN CAPITAL LETTER D WITH LINE BELOW
317  case 0x1E0F: r = "d"; break; //LATIN SMALL LETTER D WITH LINE BELOW
318  case 0x1E10: r = "D"; break; //LATIN CAPITAL LETTER D WITH CEDILLA
319  case 0x1E11: r = "d"; break; //LATIN SMALL LETTER D WITH CEDILLA
320  case 0x1E12: r = "D"; break; //LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
321  case 0x1E13: r = "d"; break; //LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW
322  case 0x1E14: r = "E"; break; //LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
323  case 0x1E15: r = "e"; break; //LATIN SMALL LETTER E WITH MACRON AND GRAVE
324  case 0x1E16: r = "E"; break; //LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
325  case 0x1E17: r = "e"; break; //LATIN SMALL LETTER E WITH MACRON AND ACUTE
326  case 0x1E18: r = "E"; break; //LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
327  case 0x1E19: r = "e"; break; //LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW
328  case 0x1E1A: r = "E"; break; //LATIN CAPITAL LETTER E WITH TILDE BELOW
329  case 0x1E1B: r = "e"; break; //LATIN SMALL LETTER E WITH TILDE BELOW
330  case 0x1E1C: r = "E"; break; //LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
331  case 0x1E1D: r = "e"; break; //LATIN SMALL LETTER E WITH CEDILLA AND BREVE
332  case 0x1E1E: r = "F"; break; //LATIN CAPITAL LETTER F WITH DOT ABOVE
333  case 0x1E1F: r = "f"; break; //LATIN SMALL LETTER F WITH DOT ABOVE
334  case 0x1E20: r = "G"; break; //LATIN CAPITAL LETTER G WITH MACRON
335  case 0x1E21: r = "g"; break; //LATIN SMALL LETTER G WITH MACRON
336  case 0x1E22: r = "H"; break; //LATIN CAPITAL LETTER H WITH DOT ABOVE
337  case 0x1E23: r = "h"; break; //LATIN SMALL LETTER H WITH DOT ABOVE
338  case 0x1E24: r = "H"; break; //LATIN CAPITAL LETTER H WITH DOT BELOW
339  case 0x1E25: r = "h"; break; //LATIN SMALL LETTER H WITH DOT BELOW
340  case 0x1E26: r = "H"; break; //LATIN CAPITAL LETTER H WITH DIAERESIS
341  case 0x1E27: r = "h"; break; //LATIN SMALL LETTER H WITH DIAERESIS
342  case 0x1E28: r = "H"; break; //LATIN CAPITAL LETTER H WITH CEDILLA
343  case 0x1E29: r = "h"; break; //LATIN SMALL LETTER H WITH CEDILLA
344  case 0x1E2A: r = "H"; break; //LATIN CAPITAL LETTER H WITH BREVE BELOW
345  case 0x1E2B: r = "h"; break; //LATIN SMALL LETTER H WITH BREVE BELOW
346  case 0x1E2C: r = "I"; break; //LATIN CAPITAL LETTER I WITH TILDE BELOW
347  case 0x1E2D: r = "i"; break; //LATIN SMALL LETTER I WITH TILDE BELOW
348  case 0x1E2E: r = "I"; break; //LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
349  case 0x1E2F: r = "i"; break; //LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
350  case 0x1E30: r = "K"; break; //LATIN CAPITAL LETTER K WITH ACUTE
351  case 0x1E31: r = "k"; break; //LATIN SMALL LETTER K WITH ACUTE
352  case 0x1E32: r = "K"; break; //LATIN CAPITAL LETTER K WITH DOT BELOW
353  case 0x1E33: r = "k"; break; //LATIN SMALL LETTER K WITH DOT BELOW
354  case 0x1E34: r = "K"; break; //LATIN CAPITAL LETTER K WITH LINE BELOW
355  case 0x1E35: r = "k"; break; //LATIN SMALL LETTER K WITH LINE BELOW
356  case 0x1E36: r = "L"; break; //LATIN CAPITAL LETTER L WITH DOT BELOW
357  case 0x1E37: r = "l"; break; //LATIN SMALL LETTER L WITH DOT BELOW
358  case 0x1E38: r = "L"; break; //LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
359  case 0x1E39: r = "l"; break; //LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
360  case 0x1E3A: r = "L"; break; //LATIN CAPITAL LETTER L WITH LINE BELOW
361  case 0x1E3B: r = "l"; break; //LATIN SMALL LETTER L WITH LINE BELOW
362  case 0x1E3C: r = "L"; break; //LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
363  case 0x1E3D: r = "l"; break; //LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW
364  case 0x1E3E: r = "M"; break; //LATIN CAPITAL LETTER M WITH ACUTE
365  case 0x1E3F: r = "m"; break; //LATIN SMALL LETTER M WITH ACUTE
366  case 0x1E40: r = "M"; break; //LATIN CAPITAL LETTER M WITH DOT ABOVE
367  case 0x1E41: r = "m"; break; //LATIN SMALL LETTER M WITH DOT ABOVE
368  case 0x1E42: r = "M"; break; //LATIN CAPITAL LETTER M WITH DOT BELOW
369  case 0x1E43: r = "m"; break; //LATIN SMALL LETTER M WITH DOT BELOW
370  case 0x1E44: r = "N"; break; //LATIN CAPITAL LETTER N WITH DOT ABOVE
371  case 0x1E45: r = "n"; break; //LATIN SMALL LETTER N WITH DOT ABOVE
372  case 0x1E46: r = "N"; break; //LATIN CAPITAL LETTER N WITH DOT BELOW
373  case 0x1E47: r = "n"; break; //LATIN SMALL LETTER N WITH DOT BELOW
374  case 0x1E48: r = "N"; break; //LATIN CAPITAL LETTER N WITH LINE BELOW
375  case 0x1E49: r = "n"; break; //LATIN SMALL LETTER N WITH LINE BELOW
376  case 0x1E4A: r = "N"; break; //LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
377  case 0x1E4B: r = "n"; break; //LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW
378  case 0x1E4C: r = "O"; break; //LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
379  case 0x1E4D: r = "o"; break; //LATIN SMALL LETTER O WITH TILDE AND ACUTE
380  case 0x1E4E: r = "O"; break; //LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
381  case 0x1E4F: r = "o"; break; //LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
382  case 0x1E50: r = "O"; break; //LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
383  case 0x1E51: r = "o"; break; //LATIN SMALL LETTER O WITH MACRON AND GRAVE
384  case 0x1E52: r = "O"; break; //LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
385  case 0x1E53: r = "o"; break; //LATIN SMALL LETTER O WITH MACRON AND ACUTE
386  case 0x1E54: r = "P"; break; //LATIN CAPITAL LETTER P WITH ACUTE
387  case 0x1E55: r = "p"; break; //LATIN SMALL LETTER P WITH ACUTE
388  case 0x1E56: r = "P"; break; //LATIN CAPITAL LETTER P WITH DOT ABOVE
389  case 0x1E57: r = "p"; break; //LATIN SMALL LETTER P WITH DOT ABOVE
390  case 0x1E58: r = "P"; break; //LATIN CAPITAL LETTER R WITH DOT ABOVE
391  case 0x1E59: r = "p"; break; //LATIN SMALL LETTER R WITH DOT ABOVE
392  case 0x1E5A: r = "R"; break; //LATIN CAPITAL LETTER R WITH DOT BELOW
393  case 0x1E5B: r = "r"; break; //LATIN SMALL LETTER R WITH DOT BELOW
394  case 0x1E5C: r = "R"; break; //LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
395  case 0x1E5D: r = "r"; break; //LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
396  case 0x1E5E: r = "R"; break; //LATIN CAPITAL LETTER R WITH LINE BELOW
397  case 0x1E5F: r = "r"; break; //LATIN SMALL LETTER R WITH LINE BELOW
398  case 0x1E60: r = "S"; break; //LATIN CAPITAL LETTER S WITH DOT ABOVE
399  case 0x1E61: r = "s"; break; //LATIN SMALL LETTER S WITH DOT ABOVE
400  case 0x1E62: r = "S"; break; //LATIN CAPITAL LETTER S WITH DOT BELOW
401  case 0x1E63: r = "s"; break; //LATIN SMALL LETTER S WITH DOT BELOW
402  case 0x1E64: r = "S"; break; //LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
403  case 0x1E65: r = "s"; break; //LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
404  case 0x1E66: r = "S"; break; //LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
405  case 0x1E67: r = "s"; break; //LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
406  case 0x1E68: r = "S"; break; //LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
407  case 0x1E69: r = "s"; break; //LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
408  case 0x1E6A: r = "T"; break; //LATIN CAPITAL LETTER T WITH DOT ABOVE
409  case 0x1E6B: r = "t"; break; //LATIN SMALL LETTER T WITH DOT ABOVE
410  case 0x1E6C: r = "T"; break; //LATIN CAPITAL LETTER T WITH DOT BELOW
411  case 0x1E6D: r = "t"; break; //LATIN SMALL LETTER T WITH DOT BELOW
412  case 0x1E6E: r = "T"; break; //LATIN CAPITAL LETTER T WITH LINE BELOW
413  case 0x1E6F: r = "t"; break; //LATIN SMALL LETTER T WITH LINE BELOW
414  case 0x1E70: r = "T"; break; //LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
415  case 0x1E71: r = "t"; break; //LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW
416  case 0x1E72: r = "U"; break; //LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
417  case 0x1E73: r = "u"; break; //LATIN SMALL LETTER U WITH DIAERESIS BELOW
418  case 0x1E74: r = "U"; break; //LATIN CAPITAL LETTER U WITH TILDE BELOW
419  case 0x1E75: r = "u"; break; //LATIN SMALL LETTER U WITH TILDE BELOW
420  case 0x1E76: r = "U"; break; //LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
421  case 0x1E77: r = "u"; break; //LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW
422  case 0x1E78: r = "U"; break; //LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
423  case 0x1E79: r = "u"; break; //LATIN SMALL LETTER U WITH TILDE AND ACUTE
424  case 0x1E7A: r = "U"; break; //LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
425  case 0x1E7B: r = "u"; break; //LATIN SMALL LETTER U WITH MACRON AND DIAERESIS
426  case 0x1E7C: r = "V"; break; //LATIN CAPITAL LETTER V WITH TILDE
427  case 0x1E7D: r = "v"; break; //LATIN SMALL LETTER V WITH TILDE
428  case 0x1E7E: r = "V"; break; //LATIN CAPITAL LETTER V WITH DOT BELOW
429  case 0x1E7F: r = "v"; break; //LATIN SMALL LETTER V WITH DOT BELOW
430  case 0x1E80: r = "V"; break; //LATIN CAPITAL LETTER W WITH GRAVE
431  case 0x1E81: r = "w"; break; //LATIN SMALL LETTER W WITH GRAVE
432  case 0x1E82: r = "W"; break; //LATIN CAPITAL LETTER W WITH ACUTE
433  case 0x1E83: r = "w"; break; //LATIN SMALL LETTER W WITH ACUTE
434  case 0x1E84: r = "W"; break; //LATIN CAPITAL LETTER W WITH DIAERESIS
435  case 0x1E85: r = "w"; break; //LATIN SMALL LETTER W WITH DIAERESIS
436  case 0x1E86: r = "W"; break; //LATIN CAPITAL LETTER W WITH DOT ABOVE
437  case 0x1E87: r = "w"; break; //LATIN SMALL LETTER W WITH DOT ABOVE
438  case 0x1E88: r = "W"; break; //LATIN CAPITAL LETTER W WITH DOT BELOW
439  case 0x1E89: r = "w"; break; //LATIN SMALL LETTER W WITH DOT BELOW
440  case 0x1E8A: r = "W"; break; //LATIN CAPITAL LETTER X WITH DOT ABOVE
441  case 0x1E8B: r = "x"; break; //LATIN SMALL LETTER X WITH DOT ABOVE
442  case 0x1E8C: r = "X"; break; //LATIN CAPITAL LETTER X WITH DIAERESIS
443  case 0x1E8D: r = "x"; break; //LATIN SMALL LETTER X WITH DIAERESIS
444  case 0x1E8E: r = "Y"; break; //LATIN CAPITAL LETTER Y WITH DOT ABOVE
445  case 0x1E8F: r = "y"; break; //LATIN SMALL LETTER Y WITH DOT ABOVE
446  case 0x1E90: r = "Z"; break; //LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
447  case 0x1E91: r = "z"; break; //LATIN SMALL LETTER Z WITH CIRCUMFLEX
448  case 0x1E92: r = "Z"; break; //LATIN CAPITAL LETTER Z WITH DOT BELOW
449  case 0x1E93: r = "z"; break; //LATIN SMALL LETTER Z WITH DOT BELOW
450  case 0x1E94: r = "Z"; break; //LATIN CAPITAL LETTER Z WITH LINE BELOW
451  case 0x1E95: r = "z"; break; //LATIN SMALL LETTER Z WITH LINE BELOW
452  case 0x1E96: r = "h"; break; //LATIN SMALL LETTER H WITH LINE BELOW
453  case 0x1E97: r = "t"; break; //LATIN SMALL LETTER T WITH DIAERESIS
454  case 0x1E98: r = "w"; break; //LATIN SMALL LETTER W WITH RING ABOVE
455  case 0x1E99: r = "y"; break; //LATIN SMALL LETTER Y WITH RING ABOVE
456  case 0x1E9A: r = "a"; break; //LATIN SMALL LETTER A WITH RIGHT HALF RING
457  case 0x1E9B: r = "s"; break; //LATIN SMALL LETTER LONG S WITH DOT ABOVE
458  case 0x1E9C: r = "s"; break; //LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE
459  case 0x1E9D: r = "s"; break; //LATIN SMALL LETTER LONG S WITH HIGH STROKE
460  case 0x1E9E: r = "S"; break; //LATIN CAPITAL LETTER SHARP S
461  case 0x1E9F: r = "d"; break; //LATIN SMALL LETTER DELTA
462  case 0x1EA0: r = "A"; break; //LATIN CAPITAL LETTER A WITH DOT BELOW
463  case 0x1EA1: r = "a"; break; //LATIN SMALL LETTER A WITH DOT BELOW
464  case 0x1EA2: r = "A"; break; //LATIN CAPITAL LETTER A WITH HOOK ABOVE
465  case 0x1EA3: r = "a"; break; //LATIN SMALL LETTER A WITH HOOK ABOVE
466  case 0x1EA4: r = "A"; break; //LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
467  case 0x1EA5: r = "a"; break; //LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
468  case 0x1EA6: r = "A"; break; //LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
469  case 0x1EA7: r = "a"; break; //LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
470  case 0x1EA8: r = "A"; break; //LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
471  case 0x1EA9: r = "a"; break; //LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
472  case 0x1EAA: r = "A"; break; //LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
473  case 0x1EAB: r = "a"; break; //LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
474  case 0x1EAC: r = "A"; break; //LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
475  case 0x1EAD: r = "a"; break; //LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
476  case 0x1EAE: r = "A"; break; //LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
477  case 0x1EAF: r = "a"; break; //LATIN SMALL LETTER A WITH BREVE AND ACUTE
478  case 0x1EB0: r = "A"; break; //LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
479  case 0x1EB1: r = "a"; break; //LATIN SMALL LETTER A WITH BREVE AND GRAVE
480  case 0x1EB2: r = "A"; break; //LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
481  case 0x1EB3: r = "a"; break; //LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
482  case 0x1EB4: r = "A"; break; //LATIN CAPITAL LETTER A WITH BREVE AND TILDE
483  case 0x1EB5: r = "a"; break; //LATIN SMALL LETTER A WITH BREVE AND TILDE
484  case 0x1EB6: r = "A"; break; //LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
485  case 0x1EB7: r = "a"; break; //LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
486  case 0x1EB8: r = "E"; break; //LATIN CAPITAL LETTER E WITH DOT BELOW
487  case 0x1EB9: r = "e"; break; //LATIN SMALL LETTER E WITH DOT BELOW
488  case 0x1EBA: r = "E"; break; //LATIN CAPITAL LETTER E WITH HOOK ABOVE
489  case 0x1EBB: r = "e"; break; //LATIN SMALL LETTER E WITH HOOK ABOVE
490  case 0x1EBC: r = "E"; break; //LATIN CAPITAL LETTER E WITH TILDE
491  case 0x1EBD: r = "e"; break; //LATIN SMALL LETTER E WITH TILDE
492  case 0x1EBE: r = "E"; break; //LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
493  case 0x1EBF: r = "e"; break; //LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
494  case 0x1EC0: r = "E"; break; //LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
495  case 0x1EC1: r = "e"; break; //LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
496  case 0x1EC2: r = "E"; break; //LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
497  case 0x1EC3: r = "e"; break; //LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
498  case 0x1EC4: r = "E"; break; //LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
499  case 0x1EC5: r = "e"; break; //LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
500  case 0x1EC6: r = "E"; break; //LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
501  case 0x1EC7: r = "e"; break; //LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
502  case 0x1EC8: r = "I"; break; //LATIN CAPITAL LETTER I WITH HOOK ABOVE
503  case 0x1EC9: r = "i"; break; //LATIN SMALL LETTER I WITH HOOK ABOVE
504  case 0x1ECA: r = "I"; break; //LATIN CAPITAL LETTER I WITH DOT BELOW
505  case 0x1ECB: r = "i"; break; //LATIN SMALL LETTER I WITH DOT BELOW
506  case 0x1ECC: r = "O"; break; //LATIN CAPITAL LETTER O WITH DOT BELOW
507  case 0x1ECD: r = "o"; break; //LATIN SMALL LETTER O WITH DOT BELOW
508  case 0x1ECE: r = "O"; break; //LATIN CAPITAL LETTER O WITH HOOK ABOVE
509  case 0x1ECF: r = "o"; break; //LATIN SMALL LETTER O WITH HOOK ABOVE
510  case 0x1ED0: r = "O"; break; //LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
511  case 0x1ED1: r = "o"; break; //LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
512  case 0x1ED2: r = "O"; break; //LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
513  case 0x1ED3: r = "o"; break; //LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
514  case 0x1ED4: r = "O"; break; //LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
515  case 0x1ED5: r = "o"; break; //LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
516  case 0x1ED6: r = "O"; break; //LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
517  case 0x1ED7: r = "o"; break; //LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
518  case 0x1ED8: r = "O"; break; //LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
519  case 0x1ED9: r = "o"; break; //LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
520  case 0x1EDA: r = "O"; break; //LATIN CAPITAL LETTER O WITH HORN AND ACUTE
521  case 0x1EDB: r = "o"; break; //LATIN SMALL LETTER O WITH HORN AND ACUTE
522  case 0x1EDC: r = "O"; break; //LATIN CAPITAL LETTER O WITH HORN AND GRAVE
523  case 0x1EDD: r = "o"; break; //LATIN SMALL LETTER O WITH HORN AND GRAVE
524  case 0x1EDE: r = "O"; break; //LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
525  case 0x1EDF: r = "o"; break; //LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE
526  case 0x1EE0: r = "O"; break; //LATIN CAPITAL LETTER O WITH HORN AND TILDE
527  case 0x1EE1: r = "o"; break; //LATIN SMALL LETTER O WITH HORN AND TILDE
528  case 0x1EE2: r = "O"; break; //LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
529  case 0x1EE3: r = "o"; break; //LATIN SMALL LETTER O WITH HORN AND DOT BELOW
530  case 0x1EE4: r = "U"; break; //LATIN CAPITAL LETTER U WITH DOT BELOW
531  case 0x1EE5: r = "u"; break; //LATIN SMALL LETTER U WITH DOT BELOW
532  case 0x1EE6: r = "U"; break; //LATIN CAPITAL LETTER U WITH HOOK ABOVE
533  case 0x1EE7: r = "u"; break; //LATIN SMALL LETTER U WITH HOOK ABOVE
534  case 0x1EE8: r = "U"; break; //LATIN CAPITAL LETTER U WITH HORN AND ACUTE
535  case 0x1EE9: r = "u"; break; //LATIN SMALL LETTER U WITH HORN AND ACUTE
536  case 0x1EEA: r = "U"; break; //LATIN CAPITAL LETTER U WITH HORN AND GRAVE
537  case 0x1EEB: r = "u"; break; //LATIN SMALL LETTER U WITH HORN AND GRAVE
538  case 0x1EEC: r = "U"; break; //LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
539  case 0x1EED: r = "u"; break; //LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE
540  case 0x1EEE: r = "U"; break; //LATIN CAPITAL LETTER U WITH HORN AND TILDE
541  case 0x1EEF: r = "u"; break; //LATIN SMALL LETTER U WITH HORN AND TILDE
542  case 0x1EF0: r = "U"; break; //LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
543  case 0x1EF1: r = "u"; break; //LATIN SMALL LETTER U WITH HORN AND DOT BELOW
544  case 0x1EF2: r = "Y"; break; //LATIN CAPITAL LETTER Y WITH GRAVE
545  case 0x1EF3: r = "y"; break; //LATIN SMALL LETTER Y WITH GRAVE
546  case 0x1EF4: r = "Y"; break; //LATIN CAPITAL LETTER Y WITH DOT BELOW
547  case 0x1EF5: r = "y"; break; //LATIN SMALL LETTER Y WITH DOT BELOW
548  case 0x1EF6: r = "Y"; break; //LATIN CAPITAL LETTER Y WITH HOOK ABOVE
549  case 0x1EF7: r = "y"; break; //LATIN SMALL LETTER Y WITH HOOK ABOVE
550  case 0x1EF8: r = "Y"; break; //LATIN CAPITAL LETTER Y WITH TILDE
551  case 0x1EF9: r = "y"; break; //LATIN SMALL LETTER Y WITH TILDE
552  case 0x1EFA: r = "LL"; break; //LATIN CAPITAL LETTER MIDDLE-WELSH LL
553  case 0x1EFB: r = "ll"; break; //LATIN SMALL LETTER MIDDLE-WELSH LL
554  case 0x1EFC: r = "V"; break; //LATIN CAPITAL LETTER MIDDLE-WELSH V
555  case 0x1EFD: r = "v"; break; //LATIN SMALL LETTER MIDDLE-WELSH V
556  case 0x1EFE: r = "Y"; break; //LATIN CAPITAL LETTER Y WITH LOOP
557  case 0x1EFF: r = "y"; break; //LATIN SMALL LETTER Y WITH LOOP
558 
559  case 0x2019: r = "'"; break;
560  default : break;
561  }
562 // if (r == "#")
563 // cout << "Unicode: " << ch << endl;
564  return r;
565 }
566 
568 {
569  string new_str;
570  for (string::const_iterator i = input.begin(); i != input.end(); ++i)
571  {
573  if (sym < 0x80)
574  {
575  new_str += static_cast<char>(sym);
576  }
577  else
578  {
579  string replacement = GetSpecialCharacterReplacement(sym);
580  new_str += replacement;
581  }
582  }
583  return new_str;
584 }
585 
586 void FixAuthorCap(string &name, bool bApostrophes) // FixCapitalizationInElement in sqnutil2.c
587 {
588  if (!NStr::IsUpper(name))
589  return;
590 
592  if (bApostrophes)
594 }
595 
596 
598 {
599  string val = "";
600  if (node.size() > 0) {
601  xml::node::iterator it = node.begin();
602  while (it != node.end()) {
603  const char* c = node.get_content();
604  if (c) {
605  val = c;
606  if (!NStr::IsBlank(val)) {
607  return val;
608  }
609  }
610  ++it;
611  }
612  }
613  return val;
614 }
615 
616 
617 
618 string s_GetTitleSuggestion(const string& medline, const string& title, const string& issn)
619 {
620  if (medline.empty()) {
621  return kEmptyStr;
622  }
623  string suggest = medline;
624  if (!NStr::IsBlank(title) || !NStr::IsBlank(issn)) {
625  suggest += "||(";
626  if (!NStr::IsBlank(title)) {
627  suggest += title;
628  if (!NStr::IsBlank(issn)) {
629  suggest += ":";
630  }
631  }
632  if (!NStr::IsBlank(issn)) {
633  suggest += issn;
634  }
635  suggest += ")";
636  }
637  return suggest;
638 }
639 
640 
641 void s_GetTitle(xml::node& node, string& medline, string& title, string &issn, vector<string>& titles)
642 {
643  string name;
644  string val;
645  const char * n = node.get_name();
646  if (n) {
647  name = n;
648  if (NStr::EqualNocase(name, "Title")) {
649  title = s_GetValFromChildren(node);
650  } else if (NStr::EqualNocase(name, "issn")) {
651  issn = s_GetValFromChildren(node);
652  } else if (NStr::StartsWith(name, "medline", NStr::eNocase)) {
653  medline = s_GetValFromChildren(node);
654  } else if (NStr::EqualNocase(name, "DocumentSummary")) {
655  if (!medline.empty()) {
656  titles.push_back(s_GetTitleSuggestion(medline, title, issn));
657  }
658  medline.resize(0);
659  title.resize(0);
660  issn.resize(0);
661  }
662  }
663  if (node.is_text()) {
664 
665  const char * c = node.get_content();
666  if (c) {
667  val = c;
668  }
669 
670  }
671 
672  if (node.size() > 0) {
673  xml::node::iterator it = node.begin();
674  while (it != node.end()) {
675  s_GetTitle(*it, medline, title, issn, titles);
676  ++it;
677  }
678  }
679 
680 }
681 
682 
683 namespace {
684  // Return false when the query cannot be executed, for example due to connection failure.
685  // A 'true' return value does not mean that the search was successful.
686  bool s_GetJournalIds(const string& old_title, const string& collection, vector<string>& uids)
687  {
688  uids.clear();
689  // get ISO JTA titles
690  string db("nlmcatalog");
691 
692  CGuiEutilsClient ecli;
693  ecli.SetMaxReturn(200);
694 
695  bool success = true;
696  try {
697  ecli.Search(db, old_title + collection, uids);
698  }
699  catch (const CException& e) {
700  LOG_POST(Error << "Error: " << e.GetMsg());
701  success = false;
702  }
703  catch (const exception& e) {
704  LOG_POST(Error << "Error: " << e.what());
705  success = false;
706  }
707  return success;
708  }
709 }
710 
711 bool CDoiLookup::LookupIsojta(string old_title, vector<string> &titles)
712 {
713  bool success = true;
714  auto start = chrono::steady_clock::now();
715 
716  vector<string> search_ids;
717  // shortcuts
718  string shortcut_title = CISOJTALookupWithCache::s_GetISOShortcut(old_title);
719  if (!shortcut_title.empty()) {
720  success = s_GetJournalIds(shortcut_title, "[iso]", search_ids);
721  }
722 
724  if (NStr::IsBlank(old_title)) {
725  return true;
726  }
727 
728  if (search_ids.empty()) {
729  auto diff = chrono::steady_clock::now() - start;
730  if (chrono::duration_cast<chrono::seconds>(diff) < chrono::seconds(45) || success) {
731  // try to get IDs from multi
732  success = s_GetJournalIds(old_title, "[issn]", search_ids);
733  }
734  }
735 
736  if (search_ids.empty()) {
737  auto diff = chrono::steady_clock::now() - start;
738  if (chrono::duration_cast<chrono::seconds>(diff) < chrono::seconds(45) || success) {
739  success = s_GetJournalIds(old_title, "[ti] AND ncbijournals[sb]", search_ids);
740  }
741  }
742  if (search_ids.empty()) {
743  auto diff = chrono::steady_clock::now() - start;
744  if (chrono::duration_cast<chrono::seconds>(diff) < chrono::seconds(45) || success) {
745  success = s_GetJournalIds(old_title, "[jour]", search_ids);
746  }
747  }
748  if (search_ids.empty()) {
749  string title_no_punct = NStr::Replace(old_title, ",", " ");
750  NStr::ReplaceInPlace(title_no_punct, "&", " ");
751  NStr::ReplaceInPlace(title_no_punct, ";", " ");
752  NStr::ReplaceInPlace(title_no_punct, ":", " ");
753  vector<string> tokens;
754  NStr::Split(title_no_punct, " ", tokens, NStr::fSplit_Tokenize);
755  string fields = NStr::Join(tokens, "[All Fields] AND ");
756  auto diff = chrono::steady_clock::now() - start;
757  if (chrono::duration_cast<chrono::seconds>(diff) < chrono::seconds(45) || success) {
758  success = s_GetJournalIds(fields, "[All Fields] AND ncbijournals[All Fields]", search_ids);
759  }
760  }
761  if (!search_ids.empty()) {
762  try {
763  string db = "nlmcatalog";
764  xml::document docsums;
765 
766  CGuiEutilsClient ecli;
767  ecli.SetMaxReturn(200);
768 
769  auto diff = chrono::steady_clock::now() - start;
770  if (chrono::duration_cast<chrono::seconds>(diff) < chrono::seconds(45) || success) {
771  ecli.Summary(db, search_ids, docsums, "2.0");
772  }
773 
774  xml::node::iterator node_it = docsums.begin();
775  string medline = "", title = "", issn = "";
776  while (node_it != docsums.end()) {
777  s_GetTitle(*node_it, medline, title, issn, titles);
778  node_it++;
779  }
780  if (!medline.empty()) {
781  titles.push_back(s_GetTitleSuggestion(medline, title, issn));
782  }
783  } catch (CException& e) {
784  LOG_POST(Error << "Error: " << e.GetMsg());
785  success = false;
786  }
787  }
788  return success;
789 }
790 
791 static void ExtractMiddleInitial(string &name, string &initial)
792 {
793  vector<string> parts;
794  NStr::Split(name, " ", parts, NStr::fSplit_Tokenize);
795  if (parts.size() != 2)
796  return;
797  if (parts[1].size() != 2)
798  return;
799  if (!isupper(parts[1][0]) || parts[1][1] != '.')
800  return;
801  name = parts[0];
802  initial = parts[1][0];
803 }
804 
805 // http://jsonviewer.stack.hu/ online json viewer
806 pair<CRef<CPubdesc>, string> CDoiLookup::GetPubFromCrossRef(const string &doi)
807 {
808  if (doi.empty())
809  return make_pair(CRef<CPubdesc>(NULL), "Empty doi request");
810 
811  CHttpSession session;
812  int response_timeout = CGuiRegistry::GetInstance().GetInt("GBENCH.System.SeqConfigTimeout", 40);
813  CHttpResponse response = session.Get(CUrl("https://api.crossref.org/works/" + NStr::URLEncode(doi)), CTimeout(response_timeout), 0);
814  if (response.GetStatusCode() != 200)
815  return make_pair(CRef<CPubdesc>(NULL), "Server api.crossref.org returned error code: " + NStr::IntToString(response.GetStatusCode()) + " \n" + response.GetStatusText());
816 
817  stringstream ss;
818  NcbiStreamCopy(ss, response.ContentStream());
819  string json = ss.str();
820  if (json.empty())
821  return make_pair(CRef<CPubdesc>(NULL), "Empty json object returned from api.crossref.org");
822 
823  CJson_Document doc;
824  doc.ParseString(json);
825 
826 
827  if (!doc.ReadSucceeded() || !doc.IsObject())
828  return make_pair(CRef<CPubdesc>(NULL), "Unable to parse json returned from api.crossref.org");
829  CJson_ConstObject obj = doc.GetObject();
830  if ( !obj.has("status"))
831  return make_pair(CRef<CPubdesc>(NULL), "Record status is invalid");
832 
833  if (obj["status"].GetValue().GetString() != "ok")
834  return make_pair(CRef<CPubdesc>(NULL), "Record status is " + obj["status"].GetValue().GetString());
835 
836  if (!obj.has("message") || !obj["message"].IsObject())
837  return make_pair(CRef<CPubdesc>(NULL), "Empty record returned from api.crossref.org");
838 
839  CJson_ConstObject msg = obj["message"].GetObject();
840 
841  CRef<CPub> pub(new CPub);
842 
843  vector<string> issn;
844  if (msg.has("ISSN") && msg["ISSN"].IsArray()) {
845  for (size_t i = 0; i < msg["ISSN"].GetArray().size(); i++) {
846  if (msg["ISSN"].GetArray()[i].IsValue() && msg["ISSN"].GetArray()[i].GetValue().IsString()) {
847  string value = msg["ISSN"].GetArray()[i].GetValue().GetString();
848  if (!value.empty())
849  issn.push_back(value);
850  }
851  }
852  }
853 
854 
855  string journal_title;
856  const char* kContainerTitle = "container-title";
857  const char* kInst = "institution";
858 
859  if (msg.has(kContainerTitle) && msg[kContainerTitle].IsArray() && !msg[kContainerTitle].GetArray().empty()) {
860  const auto& cont_title = msg[kContainerTitle].GetArray().front();
861  if (cont_title.IsValue() && cont_title.GetValue().IsString()) {
862  journal_title = CHTMLHelper::StripHTML(Transcode(cont_title.GetValue().GetString()));
863  }
864  }
865  else if (msg.has(kInst) && msg[kInst].IsArray() && !msg[kInst].GetArray().empty()) {
866  const auto& msg_array = msg[kInst].GetArray().front();
867  if (msg_array.IsObject() &&
868  msg_array.GetObject().has("name") &&
869  msg_array.GetObject()["name"].IsValue() &&
870  msg_array.GetObject()["name"].GetValue().IsString()) {
871  journal_title = CHTMLHelper::StripHTML(Transcode(msg_array.GetObject()["name"].GetValue().GetString()));
872  }
873  }
874 
875  if (!journal_title.empty())
876  issn.push_back(journal_title);
877 
878  bool is_isojta = false;
879  for (const auto& old_title : issn)
880  {
881  vector<string> titles;
882  bool success = LookupIsojta(old_title, titles);
883  if (!success) {
884  break;
885  }
886 
887  if (titles.size() == 1)
888  {
889  string new_title = titles[0];
890  size_t pos = NStr::Find(new_title, "||");
891  if (pos != string::npos)
892  {
893  new_title = new_title.substr(0, pos);
894  }
895  if (!NStr::IsBlank(new_title))
896  {
897  journal_title = new_title;
898  is_isojta = true;
899  break;
900  }
901  }
902  }
903  CRef< CTitle::C_E > title(new CTitle::C_E);
904  if (is_isojta)
905  title->SetIso_jta(journal_title);
906  else
907  title->SetName(journal_title);
908 
909  auto& journal = pub->SetArticle().SetFrom().SetJournal();
910  journal.SetTitle().Set().push_back(title);
911 
912  auto& imprint = journal.SetImp();
913 
914  if (msg.has("issue") && msg["issue"].IsValue() && msg["issue"].GetValue().IsString()) {
915  imprint.SetIssue(CHTMLHelper::StripHTML(Transcode(msg["issue"].GetValue().GetString())));
916  }
917 
918  if (msg.has("volume") && msg["volume"].IsValue() && msg["volume"].GetValue().IsString()) {
919  imprint.SetVolume(CHTMLHelper::StripHTML(Transcode(msg["volume"].GetValue().GetString())));
920  }
921 
922  if (msg.has("page") && msg["page"].IsValue() && msg["page"].GetValue().IsString()) {
923  imprint.SetPages(CHTMLHelper::StripHTML(Transcode(msg["page"].GetValue().GetString())));
924  imprint.SetPubstatus(4);
925  }
926 
927  if (msg.has("title") && msg["title"].IsArray()
928  && !msg["title"].GetArray().empty()
929  && msg["title"].GetArray().front().IsValue()
930  && msg["title"].GetArray().front().GetValue().IsString()) {
931  CRef< CTitle::C_E > title(new CTitle::C_E);
932  title->SetName(CHTMLHelper::StripHTML(Transcode(msg["title"].GetArray().front().GetValue().GetString())));
933  pub->SetArticle().SetTitle().Set().push_back(title);
934  }
935 
936  if (msg.has("author") && msg["author"].IsArray())
937  {
938  //bool first_affil = true;
939  for (size_t i = 0; i < msg["author"].GetArray().size(); i++) {
940  if (!msg["author"].GetArray()[i].IsObject())
941  continue;
942  CJson_ConstObject author = msg["author"].GetArray()[i].GetObject();
943  CRef< CAuthor > auth(new CAuthor);
944  if (author.has("given") && author["given"].IsValue() && author["given"].GetValue().IsString()) {
945  string name = CHTMLHelper::StripHTML(Transcode(author["given"].GetValue().GetString()));
946  FixAuthorCap(name, false);
947  string initial;
948  ExtractMiddleInitial(name, initial);
949  auth->SetName().SetName().SetFirst(name);
950  if (!initial.empty())
951  auth->SetName().SetName().SetInitials(initial);
952  }
953  if (author.has("family") && author["family"].IsValue() && author["family"].GetValue().IsString()) {
954  string name = CHTMLHelper::StripHTML(Transcode(author["family"].GetValue().GetString()));
955  FixAuthorCap(name, true);
956  auth->SetName().SetName().SetLast(name);
957  }
958 
959  if (auth->IsSetName()) {
960  pub->SetArticle().SetAuthors().SetNames().SetStd().push_back(auth);
961  }
962 
963  /* // Do not populate affiliation even when available
964  vector<string> affil;
965  if (author.has("affiliation") && author["affiliation"].IsArray())
966  {
967  for (size_t j = 0; j < author["affiliation"].GetArray().size(); j++)
968  {
969  if (!author["affiliation"].GetArray()[j].IsObject())
970  continue;
971  CJson_ConstObject name = author["affiliation"].GetArray()[j].GetObject();
972  if (name.has("name") && name["name"].IsValue() && name["name"].GetValue().IsString() && !name["name"].GetValue().GetString().empty())
973  affil.push_back(name["name"].GetValue().GetString());
974  }
975  }
976  if (!affil.empty() && first_affil)
977  {
978  pub->SetArticle().SetAuthors().SetAffil().SetStr(NStr::Join(affil, ", "));
979  first_affil = false;
980  }
981  */
982  }
983  }
984 
985  string date;
986  if (msg.has("published-print") && msg["published-print"].IsObject()) {
987  date = "published-print";
988  pub->SetArticle().SetFrom().SetJournal().SetImp().SetPubstatus(4);
989  }
990  else if (msg.has("published-online") && msg["published-online"].IsObject())
991  {
992  date = "published-online";
993  // pub->SetArticle().SetFrom().SetJournal().SetImp().SetPubstatus(3);
994  }
995  else if (msg.has("posted") && msg["posted"].IsObject())
996  {
997  date = "posted";
998  }
999 
1000  if ( !date.empty()
1001  && msg[date].GetObject().has("date-parts")
1002  && msg[date].GetObject()["date-parts"].IsArray()
1003  && !msg[date].GetObject()["date-parts"].GetArray().empty()
1004  && msg[date].GetObject()["date-parts"].GetArray().front().IsArray()
1005  && !msg[date].GetObject()["date-parts"].GetArray().front().GetArray().empty())
1006  {
1007 
1008  if (msg[date].GetObject()["date-parts"].GetArray().front().GetArray()[0].IsValue()
1009  && msg[date].GetObject()["date-parts"].GetArray().front().GetArray()[0].GetValue().IsInt4())
1010  {
1011  int year = msg[date].GetObject()["date-parts"].GetArray().front().GetArray()[0].GetValue().GetInt4();
1012  pub->SetArticle().SetFrom().SetJournal().SetImp().SetDate().SetStd().SetYear(year);
1013  }
1014  if (msg[date].GetObject()["date-parts"].GetArray().front().GetArray().size() > 1
1015  && msg[date].GetObject()["date-parts"].GetArray().front().GetArray()[1].IsValue()
1016  && msg[date].GetObject()["date-parts"].GetArray().front().GetArray()[1].GetValue().IsInt4())
1017  {
1018  int month = msg[date].GetObject()["date-parts"].GetArray().front().GetArray()[1].GetValue().GetInt4();
1019  pub->SetArticle().SetFrom().SetJournal().SetImp().SetDate().SetStd().SetMonth(month);
1020  }
1021  if (msg[date].GetObject()["date-parts"].GetArray().front().GetArray().size() > 2
1022  && msg[date].GetObject()["date-parts"].GetArray().front().GetArray()[2].IsValue()
1023  && msg[date].GetObject()["date-parts"].GetArray().front().GetArray()[2].GetValue().IsInt4())
1024  {
1025  int day = msg[date].GetObject()["date-parts"].GetArray().front().GetArray()[2].GetValue().GetInt4();
1026  pub->SetArticle().SetFrom().SetJournal().SetImp().SetDate().SetStd().SetDay(day);
1027  }
1028  }
1029 
1030  CRef< CArticleId > doi_id(new CArticleId);
1031  doi_id->SetDoi(CDOI(doi));
1032  pub->SetArticle().SetIds().Set().push_back(doi_id);
1033 
1034 
1035  CRef<CPubdesc> pubdesc(new CPubdesc);
1036  pubdesc->SetPub().Set().push_back(pub);
1037  if (!is_isojta) {
1038  return make_pair(pubdesc, "Failed ISOJTA lookup");
1039  }
1040 
1041  return make_pair(pubdesc, "");
1042 }
1043 
1044 
1045 // CDoiLookupWithCache
1046 pair<CRef<objects::CPubdesc>, string> CDoiLookupWithCache::GetPub(const string& doi)
1047 {
1048  m_Requests++;
1049  CRef<CPubdesc> looked_up_pubdesc(nullptr);
1050  string error_msg;
1051  if (auto it = m_DOILookupMap.find(doi); it != m_DOILookupMap.end()) {
1052  looked_up_pubdesc = it->second;
1053  m_CacheHits++;
1054  }
1055  else {
1056  pair<CRef<CPubdesc>, string> new_pubdesc_str = CDoiLookup::GetPubFromCrossRef(doi);
1057  looked_up_pubdesc = new_pubdesc_str.first;
1058  if (looked_up_pubdesc) {
1059  m_DOILookupMap.emplace(doi, looked_up_pubdesc);
1060  }
1061  else {
1062  error_msg = new_pubdesc_str.second;
1063  }
1064  }
1065  return make_pair(looked_up_pubdesc, error_msg);
1066 }
1067 
1068 void CDoiLookupWithCache::ReportStats(std::ostream& ostr)
1069 {
1070  ostr << "CDoiLookupWithCache: " << m_CacheHits << " cache hits out of " << m_Requests << " requests\n";
1071 }
1072 
1075  { "Antimicrobial Agents and Chemotherapy", "Antimicrob Agents Chemother" },
1076  { "Cell", "Cell" },
1077  { "Genes", "Genes Basel" },
1078  { "IJSEM", "Int J Syst Evol Microbiol" },
1079  { "Journal of Clinical Microbiology", "J Clin Microbiol" },
1080  { "journal of microbiology", "J Microbiol" },
1081  { "Journal of Virology", "J Virol" },
1082  { "mitochondrial DNA A", "Mitochondrial DNA A DNA Mapp Seq Anal" },
1083  { "mitochondrial DNA B", "Mitochondrial DNA B Resour" },
1084  { "Nature", "Nature" },
1085  { "Nucleic Acids Research", "Nucleic Acids Res" },
1086  { "Science", "Science" },
1087  { "Virology", "Virology" },
1088 };
1089 
1092 
1093 string CISOJTALookupWithCache::s_GetISOShortcut(const string& old_title)
1094 {
1095  auto it = sc_ISOShortcutMap.find(old_title.c_str());
1096  if (it != sc_ISOShortcutMap.end()) {
1097  return it->second;
1098  }
1099  return kEmptyStr;
1100 }
1101 
1102 
1103 //CISOJTALookupWithCache
1104 void CISOJTALookupWithCache::GetJournalAbbr(const string& old_title, vector<string>& titles)
1105 {
1106  m_Requests++;
1107  titles.clear();
1108  if (auto it = m_ISOJTALookupMap.find(old_title); it != m_ISOJTALookupMap.end()) {
1109  titles = it->second;
1110  m_CacheHits++;
1111  }
1112  else {
1113  s_DoLookup(old_title, titles);
1114  m_ISOJTALookupMap.emplace(old_title, titles);
1115  }
1116 }
1117 
1118 void CISOJTALookupWithCache::s_DoLookup(const string& old_title, vector<string>& titles)
1119 {
1120  string new_title = s_GetISOShortcut(old_title);
1121  if (!new_title.empty()) {
1122  titles.push_back(new_title);
1123  return;
1124  }
1125 
1126  string tmp_title = old_title;
1127  bool success = CDoiLookup::LookupIsojta(tmp_title, titles);
1128  if (!success) {
1129  NCBI_USER_THROW("Failed to execute ISOJTA lookup possibly due to service failure");
1130  }
1131 
1132  for (auto& it : titles) {
1133  if (size_t pos = NStr::Find(it, "||"); pos != NPOS) {
1134  it = it.substr(0, pos);
1135  }
1136  }
1137 
1138  titles.erase(remove_if(titles.begin(), titles.end(), [](const string& elem) { return NStr::IsBlank(elem); }), titles.end());
1139  if (titles.size() > 1 && (old_title.find('.') != NPOS)) {
1140  string nodots_title = NStr::Replace(old_title, ".", "");
1141  if (find(titles.begin(), titles.end(), nodots_title) != titles.end()) {
1142  titles.assign({ nodots_title });
1143  }
1144  }
1145 }
1146 
1147 void CISOJTALookupWithCache::ReportStats(std::ostream& ostr)
1148 {
1149  ostr << "CISOJTALookupWithCache: " << m_CacheHits << " cache hits out of " << m_Requests << " requests\n";
1150 }
1151 
1153 
1154 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void remove_if(Container &c, Predicate *__pred)
Definition: chainer.hpp:69
CArticleId –.
Definition: ArticleId.hpp:66
CAuthor –.
Definition: Author.hpp:59
CDOI –.
Definition: DOI.hpp:66
pair< CRef< objects::CPubdesc >, string > GetPub(const string &doi)
TDOILookupMap m_DOILookupMap
Definition: doi_lookup.hpp:59
void ReportStats(std::ostream &ostr)
static string GetSpecialCharacterReplacement(TUnicodeSymbol ch)
Definition: doi_lookup.cpp:68
static bool LookupIsojta(string old_title, vector< string > &titles)
Definition: doi_lookup.cpp:711
static pair< CRef< objects::CPubdesc >, string > GetPubFromCrossRef(const string &doi)
Definition: doi_lookup.cpp:806
static string Transcode(const CStringUTF8 &input)
Definition: doi_lookup.cpp:567
void SetMaxReturn(int ret_max)
Uint8 Search(const string &db, const string &term, vector< objects::CSeq_id_Handle > &uids, const string &xml_path=kEmptyStr)
void Summary(const string &db, const vector< objects::CSeq_id_Handle > &uids, xml::document &docsums, const string &version="")
static CGuiRegistry & GetInstance()
access the application-wide singleton
Definition: registry.cpp:400
int GetInt(const string &key, int default_val=0) const
retrieve values by section and key.
Definition: registry.cpp:133
HTTP response.
static string s_GetISOShortcut(const string &old_title)
TISOJTALookupMap m_ISOJTALookupMap
Definition: doi_lookup.hpp:75
void GetJournalAbbr(const string &old_title, vector< string > &titles)
void ReportStats(std::ostream &ostr)
static void s_DoLookup(const string &old_title, vector< string > &titles)
CJson_ConstNode front(void) const
Return a reference to the first element in the array If the array is empty, the result is undefined.
bool empty(void) const
Test if the array is empty.
bool IsObject(void) const
CJson_ConstObject GetObject(void) const
Get JSON object contents of the node.
bool IsArray(void) const
CJson_ConstArray GetArray(void) const
Get JSON array contents of the node.
bool IsValue(void) const
CJson_ConstValue GetValue(void) const
Get JSON value contents of the node.
CJson_Object.
bool has(const CJson_Node::TKeyType &name) const
Test if an element with this name exists in the object.
Int4 GetInt4(void) const
bool IsString(void) const
TStringType GetString(void) const
bool ParseString(const TStringType &v)
Read JSON data from a UTF8 string.
bool ReadSucceeded(void) const
Test if the most recent read was successful.
Definition: Pub.hpp:56
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
CRef –.
Definition: ncbiobj.hpp:618
class CStaticArrayMap<> provides access to a static array in much the same way as CStaticArraySet<>,...
Definition: static_map.hpp:175
CTimeout – Timeout interval.
Definition: ncbitime.hpp:1693
C_E –.
Definition: Title_.hpp:96
CUrl –.
Definition: ncbi_url.hpp:353
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
The xml::document class is used to hold the XML tree and various bits of information about it.
Definition: document.hpp:80
node::iterator begin(void)
Get an iterator to the first child node of this document.
Definition: document.cpp:695
node::iterator end(void)
Get an iterator that points one past the last child node for this document.
Definition: document.cpp:703
The xml::node::iterator provides a way to access children nodes similar to a standard C++ container.
Definition: node.hpp:704
The xml::node class is used to hold information about one XML node.
Definition: node.hpp:106
bool is_text(void) const
Find out if this node is a text node or sometiming like a text node, CDATA for example.
Definition: node.cpp:1189
const char * get_name(void) const
Get the name of this xml::node.
Definition: node.cpp:769
iterator end(void)
Get an iterator that points one past the last child for this node.
Definition: node.hpp:835
iterator begin(void)
Get an iterator that points to the beginning of this node's children.
Definition: node.cpp:1217
const char * get_content(void) const
Get the content for this text node.
Definition: node.cpp:797
size_type size(void) const
Returns the number of childer this nodes has.
Definition: node.cpp:1199
USING_SCOPE(objects)
CStaticArrayMap< const char *, const char *, PNocase_CStr > TISOShortcutMap
void FixAuthorCap(string &name, bool bApostrophes)
Definition: doi_lookup.cpp:586
DEFINE_STATIC_ARRAY_MAP(TISOShortcutMap, sc_ISOShortcutMap, k_iso_shortcut_pair_map)
static const TISOShortcutPairElem k_iso_shortcut_pair_map[]
string s_GetValFromChildren(xml::node &node)
Definition: doi_lookup.cpp:597
void s_GetTitle(xml::node &node, string &medline, string &title, string &issn, vector< string > &titles)
Definition: doi_lookup.cpp:641
string s_GetTitleSuggestion(const string &medline, const string &title, const string &issn)
Definition: doi_lookup.cpp:618
SStaticPair< const char *, const char * > TISOShortcutPairElem
static void ExtractMiddleInitial(string &name, string &initial)
Definition: doi_lookup.cpp:791
static auto & FixCapitalizationInElement
static auto & CapitalizeAfterApostrophe
#define NULL
Definition: ncbistd.hpp:225
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
#define NCBI_USER_THROW(message)
Throw a quick-and-dirty runtime exception of type 'CException' with the given error message and error...
Definition: ncbiexpt.hpp:715
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
static string StripHTML(const string &str)
Strip all HTML code from a string.
Definition: htmlhelper.hpp:173
CHttpResponse Get(const CUrl &url, const CTimeout &timeout=CTimeout(CTimeout::eDefault), THttpRetries retries=null)
Shortcut for GET requests.
CNcbiIstream & ContentStream(void) const
Get input stream.
int GetStatusCode(void) const
Get response status code.
const string & GetStatusText(void) const
Get response status text.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
std::string CStringUTF8
Definition: ncbistl.hpp:254
bool NcbiStreamCopy(CNcbiOstream &os, CNcbiIstream &is)
Copy the entire contents of stream "is" to stream "os".
Definition: ncbistre.cpp:211
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
Uint4 TUnicodeSymbol
Unicode character.
Definition: ncbistr.hpp:141
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3314
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static TUnicodeSymbol Decode(const char *&src)
Convert sequence of UTF8 code units into Unicode code point.
Definition: ncbistr.hpp:5662
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static bool IsUpper(const CTempString str)
Checks if all letters in the given string have a upper case.
Definition: ncbistr.cpp:445
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3405
static string URLEncode(const CTempString str, EUrlEncode flag=eUrlEnc_SkipMarkChars)
URL-encode string.
Definition: ncbistr.cpp:6062
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2508
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
void SetIds(TIds &value)
Assign a value to Ids data member.
Definition: Cit_art_.cpp:258
void SetTitle(TTitle &value)
Assign a value to Title data member.
Definition: Cit_art_.cpp:210
void SetName(TName &value)
Assign a value to Name data member.
Definition: Author_.cpp:81
void SetFrom(TFrom &value)
Assign a value to From data member.
Definition: Cit_art_.cpp:248
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
Definition: Cit_art_.cpp:227
TDoi & SetDoi(void)
Select the variant.
Definition: ArticleId_.hpp:505
bool IsSetName(void) const
Author, Primary or Secondary Check if a value has been assigned to Name data member.
Definition: Author_.hpp:340
TArticle & SetArticle(void)
Select the variant.
Definition: Pub_.cpp:239
HTML classes.
static int input()
int i
yy_size_t n
This file contains the definition of the xml::document class.
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
int isupper(Uchar c)
Definition: ncbictype.hpp:70
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
User-defined methods of the data storage class.
void ConvertToEntrezTerm(string &title)
Definition: utilities.cpp:2220
CRef< CPub > journal(ParserPtr pp, char *bptr, char *eptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, bool has_muid, CRef< CCit_art > &cit_art, Int4 er)
Definition: ref.cpp:1457
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
Modified on Tue May 28 05:50:25 2024 by modify_doxy.py rev. 669887