NCBI C++ ToolKit
ref.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: ref.cpp 102351 2024-04-25 17:49:42Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * File Name: ref.cpp
27  *
28  * Author: Karl Sirotkin, Hsiu-Chuan Chen
29  *
30  * File Description:
31  *
32  */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include "ftacpp.hpp"
37 
41 #include <objects/biblio/Affil.hpp>
42 #include <objects/seq/Pubdesc.hpp>
44 #include <objects/pub/Pub.hpp>
61 
62 #include "index.h"
63 #include "genbank.h"
64 #include "embl.h"
65 
67 #include "ftamed.h"
68 
69 #include "ftaerr.hpp"
70 #include "indx_blk.h"
71 #include "utilref.h"
72 #include "asci_blk.h"
73 #include "add.h"
74 #include "utilfun.h"
75 #include "ind.hpp"
76 #include "ref.h"
77 #include "xgbfeat.h"
78 #include "xutils.h"
79 #include "fta_xml.h"
80 
81 #ifdef THIS_FILE
82 # undef THIS_FILE
83 #endif
84 #define THIS_FILE "ref.cpp"
85 
86 #define MAXKW 38
87 
88 
91 
92 static const char* strip_sub_str[] = {
93  "to the EMBL/GenBank/DDBJ databases",
94  "to the EMBL/DDBJ/GenBank databases",
95  "to the DDBJ/GenBank/EMBL databases",
96  "to the DDBJ/EMBL/GenBank databases",
97  "to the GenBank/DDBJ/EMBL databases",
98  "to the GenBank/EMBL/DDBJ databases",
99  "to the INSDC",
100  nullptr
101 };
102 
103 static const char* ERRemarks[] = {
104  // epublish
105  "Publication Status: Online-Only", /* 1 */
106  "Publication Status : Online-Only", /* 2 */
107  "Publication_Status: Online-Only", /* 3 */
108  "Publication_Status : Online-Only", /* 4 */
109  "Publication-Status: Online-Only", /* 5 */
110  "Publication-Status : Online-Only", /* 6 */
111  // aheadofprint
112  "Publication Status: Available-Online", /* 7 */
113  "Publication Status : Available-Online", /* 8 */
114  "Publication_Status: Available-Online", /* 9 */
115  "Publication_Status : Available-Online", /* 10 */
116  "Publication-Status: Available-Online", /* 11 */
117  "Publication-Status : Available-Online", /* 12 */
118  "Publication Status: Available-Online prior to print", /* 13 */
119  "Publication Status : Available-Online prior to print", /* 14 */
120  "Publication_Status: Available-Online prior to print", /* 15 */
121  "Publication_Status : Available-Online prior to print", /* 16 */
122  "Publication-Status: Available-Online prior to print", /* 17 */
123  "Publication-Status : Available-Online prior to print", /* 18 */
124  nullptr
125 };
126 
127 /**********************************************************/
128 static void normalize_comment(string& comment)
129 {
130  for (size_t pos = 0; pos < comment.size();) {
131  pos = comment.find("; ", pos);
132  if (pos == string::npos)
133  break;
134  pos += 2;
135 
136  size_t n = 0;
137  for (size_t i = pos; i < comment.size(); ++i) {
138  char c = comment[i];
139  if (c == ' ' || c == ';')
140  ++n;
141  else
142  break;
143  }
144  if (n > 0)
145  comment.erase(pos, n);
146  }
147 }
148 
149 /**********************************************************
150  *
151  * static DatePtr get_lanl_date(s):
152  *
153  * Get year, month, day and return NCBI_DatePtr.
154  * Temporary used for lanl form of date that
155  * is (JUL 21 1993).
156  *
157  * 01-4-94
158  *
159  **********************************************************/
160 static CRef<CDate> get_lanl_date(char* s)
161 {
162  int day = 0;
163  int month = 0;
164  int year;
165  int cal;
166 
167  const char* months[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
168 
169  CRef<CDate> date(new CDate);
170  for (cal = 0; cal < 12; cal++) {
171  if (StringEquNI(s + 1, months[cal], 3)) {
172  month = cal + 1;
173  break;
174  }
175  }
176  day = atoi(s + 5);
177  year = atoi(s + 8);
178  if (year < 1900 || year > 1994) {
179  ErrPostEx(SEV_WARNING, ERR_REFERENCE_IllegalDate, "Illegal year: %d", year);
180  }
181 
182  date->SetStd().SetYear(year);
183  date->SetStd().SetMonth(month);
184  date->SetStd().SetDay(day);
185 
186  if (XDateCheck(date->GetStd()) != 0) {
187  ErrPostEx(SEV_WARNING, ERR_REFERENCE_IllegalDate, "Illegal date: %s", s);
188  date.Reset();
189  }
190 
191  return (date);
192 }
193 
194 /**********************************************************
195  *
196  * static char* clean_up(str):
197  *
198  * Deletes front and tail double or single quotes
199  * if any.
200  *
201  **********************************************************/
202 static string clean_up(const char* str)
203 {
204  if (! str)
205  return {};
206 
207  size_t b = 0;
208  size_t e = StringLen(str);
209 
210  if (b < e && str[e - 1] == ';')
211  --e;
212  while (b < e && (str[b] == '\"' || str[b] == '\''))
213  b++;
214  while (b < e && (str[e - 1] == '\"' || str[e - 1] == '\''))
215  e--;
216 
217  if (b < e)
218  return string(str + b, str + e);
219  return {};
220 }
221 
222 static CRef<CPub> get_num(char* str)
223 {
225 
226  CRef<CPub> ret(new CPub);
227  ret->SetGen().SetSerial_number(serial_num);
228 
229  return ret;
230 }
231 
233 {
234  char* p;
235  Int4 i;
236 
237  CRef<CPub> muid;
238 
239  if (! str)
240  return muid;
241 
243  p = str;
244  else if (format == Parser::EFormat::EMBL) {
245  p = StringIStr(str, "MEDLINE;");
246  if (! p)
247  return muid;
248  for (p += 8; *p == ' ';)
249  p++;
250  } else
251  return muid;
252 
254  if (i < 1)
255  return muid;
256 
257  muid.Reset(new CPub);
258  muid->SetMuid(ENTREZ_ID_FROM(int, i));
259  return muid;
260 }
261 
262 /**********************************************************/
263 static char* get_embl_str_pub_id(char* str, const Char* tag)
264 {
265  const char* p;
266  const char* q;
267 
268  if (! str || ! tag)
269  return nullptr;
270 
271  p = StringIStr(str, tag);
272  if (! p)
273  return nullptr;
274  for (p += StringLen(tag); *p == ' ';)
275  p++;
276 
277  for (q = p; *q != ' ' && *q != '\0';)
278  q++;
279  q--;
280  if (*q != '.')
281  q++;
282  return StringSave(string_view(p, q - p));
283 }
284 
285 /**********************************************************/
287 {
288  char* p;
289  long i;
290 
291  if (! str)
292  return ZERO_ENTREZ_ID;
293 
294  p = StringIStr(str, "PUBMED;");
295  if (! p)
296  return ZERO_ENTREZ_ID;
297  for (p += 7; *p == ' ';)
298  p++;
299  i = atol(p);
300  if (i <= 0)
301  return ZERO_ENTREZ_ID;
302  return ENTREZ_ID_FROM(long, i);
303 }
304 
305 /**********************************************************
306  *
307  * static char* check_book_tit(title):
308  *
309  * Get volume from book title.
310  *
311  * 12-4-93
312  *
313  **********************************************************/
314 static char* check_book_tit(char* title)
315 {
316  char* p;
317  char* q;
318  char* r;
319 
320  p = StringRStr(title, "Vol");
321  if (! p)
322  return nullptr;
323 
324  if (p[3] == '.')
325  q = p + 4;
326  else if (StringEquN(p + 3, "ume", 3))
327  q = p + 6;
328  else
329  return nullptr;
330 
331  while (*q == ' ' || *q == '\t')
332  q++;
333  for (r = q; *r >= '0' && *r <= '9';)
334  r++;
335 
336  if (r == q || *r != '\0')
337  return nullptr;
338 
339  if (p > title) {
340  p--;
341  if (*p != ' ' && *p != '\t' && *p != ',' && *p != ';' && *p != '.')
342  return nullptr;
343 
344  while (*p == ' ' || *p == '\t' || *p == ',' || *p == ';' || *p == '.') {
345  if (p == title)
346  break;
347  p--;
348  }
349  if (*p != ' ' && *p != '\t' && *p != ',' && *p != ';' && *p != '.')
350  p++;
351  }
352  *p = '\0';
353 
354  return (q);
355 }
356 
357 /**********************************************************
358  *
359  * static CitPatPtr get_pat(pp, bptr, auth, title, eptr):
360  *
361  * Return a CitPat pointer for patent ref in ncbi or
362  * embl or ddbj.
363  * Leading "I" or "AR" for NCBI or "A" for EMBL or
364  * "E" for DDBJ in accesion number requiered
365  *
366  * JOURNAL Patent: US 4446235-A 6 01-MAY-1984;
367  * or
368  * RL Patent number US4446235-A/6, 01-MAY-1984.
369  *
370  * 11-14-93
371  *
372  **********************************************************/
373 static CRef<CCit_pat> get_pat(ParserPtr pp, char* bptr, CRef<CAuth_list>& auth_list, CRef<CTitle::C_E>& title, char* eptr)
374 {
375  IndexblkPtr ibp;
376 
377  CRef<CCit_pat> cit_pat;
378 
379  char* country;
380  char* number;
381  char* type;
382  char* app;
383  char* s;
384  char* p;
385  char* q;
386  char* temp;
387 
388  ErrSev sev;
389  Char ch;
390 
391  ibp = pp->entrylist[pp->curindx];
392 
393  temp = StringSave(bptr);
394 
395  ch = (pp->format == Parser::EFormat::EMBL) ? '.' : ';';
396  p = StringChr(temp, ch);
397  if (p)
398  *p = '\0';
399 
400  p = StringChr(bptr, ch);
401  if (p)
402  *p = '\0';
403 
404  if (ibp->is_pat && ibp->psip.NotEmpty()) {
405  ErrPostStr(SEV_ERROR, ERR_FORMAT_MultiplePatRefs, "Too many patent references for patent sequence; ignoring all but the first.");
406  }
407 
408  if (pp->source == Parser::ESource::USPTO)
409  s = bptr;
410  else {
411  q = (pp->format == Parser::EFormat::EMBL) ? (char*)"Patent number" : (char*)"Patent:";
412  size_t len = StringLen(q);
413  if (! StringEquNI(q, bptr, len)) {
414  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Illegal format: \"%s\"", temp);
415  MemFree(temp);
416  return cit_pat;
417  }
418 
419  for (s = bptr + len; *s == ' ';)
420  s++;
421  }
422 
423  for (country = s, q = s; isalpha((int)*s) || *s == ' '; s++)
424  if (*s != ' ')
425  q = s;
426  if (country == q) {
427  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No Patent Document Country: \"%s\"", temp);
428  MemFree(temp);
429  return cit_pat;
430  }
431  s = q + 1;
432 
433  if (pp->format != Parser::EFormat::EMBL &&
435  *s++ = '\0';
436  while (*s == ' ')
437  s++;
438  for (number = s, q = s; isdigit((int)*s) != 0 || *s == ','; s++)
439  if (*s != ',')
440  *q++ = *s;
441 
442  if (number == s) {
443  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No Patent Document Number: \"%s\"", temp);
444  MemFree(temp);
445  return cit_pat;
446  }
447 
448  if (q != s)
449  *q = '\0';
450 
451  if (*s == '-') {
452  *s++ = '\0';
453  for (type = s; *s != ' ' && *s != '/' && *s != '\0';)
454  s++;
455  if (type == s)
456  type = nullptr;
457  } else
458  type = nullptr;
459  if (*s != '\0')
460  *s++ = '\0';
461 
462  if (! type) {
463  sev = (ibp->is_pat ? SEV_ERROR : SEV_WARNING);
464  ErrPostEx(sev, ERR_REFERENCE_Fail_to_parse, "No Patent Document Type: \"%s\"", temp);
465  }
466 
467  for (app = s, q = s; *s >= '0' && *s <= '9';)
468  s++;
469  if (*s != '\0' && *s != ',' && *s != '.' && *s != ' ' && *s != ';' &&
470  *s != '\n') {
471  sev = (ibp->is_pat ? SEV_ERROR : SEV_WARNING);
472  ErrPostEx(sev, ERR_REFERENCE_Fail_to_parse, "No number of sequence in patent: \"%s\"", temp);
473  app = nullptr;
474  s = q;
475  } else if (*s != '\0')
476  for (*s++ = '\0'; *s == ' ';)
477  s++;
478 
479  CRef<CDate_std> std_date;
480  if (*s != '\0') {
481  std_date = get_full_date(s, true, pp->source);
482  }
483 
484  if (std_date.Empty()) {
485  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Illegal format: \"%s\"", temp);
486  MemFree(temp);
487  return cit_pat;
488  }
489 
490  if (p)
491  *p = ch;
492 
493  string msg = NStr::Sanitize(number);
494  if (pp->format == Parser::EFormat::EMBL ||
496  *number = '\0';
497 
498  cit_pat.Reset(new CCit_pat);
499 
500  cit_pat->SetCountry(country);
501  cit_pat->SetNumber(msg);
502 
503  cit_pat->SetDoc_type(type ? type : "");
504  cit_pat->SetDate_issue().SetStd(*std_date);
505  cit_pat->SetTitle(title.Empty() ? "" : title->GetName());
506 
507  if (auth_list.Empty() || ! auth_list->IsSetNames()) {
508  CAuth_list& pat_auth_list = cit_pat->SetAuthors();
509  pat_auth_list.SetNames().SetStr().push_back("");
510  } else
511  cit_pat->SetAuthors(*auth_list);
512 
513  if (auth_list.NotEmpty()) {
514  CAffil& affil = auth_list->SetAffil();
515 
516  s += 13;
517  if (s < eptr && *s != '\0')
518  affil.SetStr(s);
519  else
520  affil.SetStr("");
521  }
522 
523  if (ibp->is_pat && ibp->psip.Empty()) {
524  ibp->psip = new CPatent_seq_id;
525  ibp->psip->SetCit().SetCountry(country);
526  ibp->psip->SetCit().SetId().SetNumber(msg);
527  ibp->psip->SetSeqid(app ? atoi(app) : 0);
528  if (type)
529  ibp->psip->SetCit().SetDoc_type(type);
530  }
531 
532  MemFree(temp);
533  return cit_pat;
534 }
535 
536 /**********************************************************/
537 static void fta_get_part_sup(char* parts, CImprint& imp)
538 {
539  char* start;
540  char* end;
541  char* p;
542  char* q;
543  Char ch;
544  Int4 i;
545  Int4 j;
546 
547  if (! parts || *parts == '\0')
548  return;
549 
550  for (p = parts, i = 0, j = 0; *p != '\0'; p++) {
551  if (*p == '(')
552  i++;
553  else if (*p == ')')
554  j++;
555 
556  if (j > i || i - j > 1)
557  break;
558  }
559 
560  if (*p != '\0' || i < 2)
561  return;
562 
563  start = StringChr(parts, '(');
564  end = StringChr(start + 1, ')');
565 
566  for (p = start + 1; *p == ' ';)
567  p++;
568  if (p == end)
569  return;
570 
571  for (q = end - 1; *q == ' ' && q > p;)
572  q--;
573  if (*q != ' ')
574  q++;
575 
576  ch = *q;
577  *q = '\0';
578 
579  imp.SetPart_sup(p);
580  *q = ch;
581 
582  fta_StringCpy(start, end + 1);
583 }
584 
585 /**********************************************************
586  *
587  * static bool get_parts(bptr, eptr, imp):
588  *
589  * Return a PARTS from medart2asn.c.
590  *
591  **********************************************************/
592 static bool get_parts(char* bptr, char* eptr, CImprint& imp)
593 {
594  char* parts;
595  char* p;
596  char* q;
597  Int4 bad;
598 
599  if (! bptr || ! eptr)
600  return false;
601 
602  parts = StringSave(string_view(bptr, eptr - bptr));
603 
604  for (p = parts; *p != '\0'; p++)
605  if (*p == '\t')
606  *p = ' ';
607 
608  fta_get_part_sup(parts, imp);
609 
610  bad = 0;
611  q = StringChr(parts, '(');
612  p = StringChr(parts, ')');
613 
614  if (p && q) {
615  if (p < q || StringChr(p + 1, ')') || StringChr(q + 1, '('))
616  bad = 1;
617  } else if (p || q)
618  bad = 1;
619 
620  if (bad != 0) {
621  MemFree(parts);
622  return false;
623  }
624 
625  if (q) {
626  *q++ = '\0';
627  *p = '\0';
628 
629  for (p = q; *p == ' ';)
630  p++;
631  for (q = p; *q != '\0' && *q != ' ';)
632  q++;
633  if (*q != '\0')
634  *q++ = '\0';
635  if (q > p)
636  imp.SetIssue(p);
637  for (p = q; *p == ' ';)
638  p++;
639  for (q = p; *q != '\0';)
640  q++;
641  if (q > p) {
642  for (q--; *q == ' ';)
643  q--;
644  *++q = '\0';
645 
646  string supi(" ");
647  supi += p;
648  imp.SetPart_supi(supi);
649  }
650 
651  const Char* issue_str = imp.IsSetIssue() ? imp.GetIssue().c_str() : nullptr;
652  if (imp.IsSetPart_supi() && issue_str &&
653  (issue_str[0] == 'P' || issue_str[0] == 'p') && (issue_str[1] == 'T' || issue_str[1] == 't') &&
654  issue_str[2] == '\0') {
655  string& issue = imp.SetIssue();
656  issue += imp.GetPart_supi();
657  imp.ResetPart_supi();
658  }
659  }
660 
661  for (p = parts; *p == ' ';)
662  p++;
663  for (q = p; *q != '\0' && *q != ' ';)
664  q++;
665  if (*q != '\0')
666  *q++ = '\0';
667  if (q > p)
668  imp.SetVolume(p);
669  for (p = q; *p == ' ';)
670  p++;
671  for (q = p; *q != '\0';)
672  q++;
673  if (q > p) {
674  for (q--; *q == ' ';)
675  q--;
676  *++q = '\0';
677  imp.SetPart_sup(p);
678  }
679 
680  MemFree(parts);
681  return true;
682 }
683 
684 /**********************************************************
685  *
686  * static CitArtPtr get_art(pp, bptr, auth, title, pre,
687  * has_muid, all_zeros, er):
688  *
689  * Return a CitArt pointer for GENBANK or EMBL mode.
690  *
691  **********************************************************/
692 static CRef<CCit_art> get_art(ParserPtr pp, char* bptr, CRef<CAuth_list>& auth_list, CRef<CTitle::C_E>& title, CImprint::EPrepub pre, bool has_muid, bool* all_zeros, Int4 er)
693 {
694  char* eptr;
695  char* end_tit;
696  char* s;
697  char* ss;
698  char* end_volume;
699  char* end_pages;
700  char* tit = nullptr;
701  char* volume = nullptr;
702  char* pages = nullptr;
703  char* year;
704  Char symbol;
705 
706  Int4 i;
707  Int4 is_er;
708 
709  *all_zeros = false;
710 
711  is_er = 0;
712  if (er > 0)
713  is_er |= 01; /* based on REMARKs */
714  if (StringEquN(bptr, "(er)", 4))
715  is_er |= 02;
716 
717  CRef<CCit_art> cit_art;
718 
719  if (pp->format == Parser::EFormat::GenBank)
720  symbol = ',';
721  else if (pp->format == Parser::EFormat::EMBL)
722  symbol = ':';
723  else if (pp->format == Parser::EFormat::XML) {
724  if (pp->source == Parser::ESource::EMBL)
725  symbol = ':';
726  else
727  symbol = ',';
728  } else
729  return cit_art;
730 
731  end_volume = nullptr;
732 
733  size_t len = StringLen(bptr);
734  unique_ptr<char[]> pBuf(new char[len + 1]);
735  char* buf = pBuf.get();
736  StringCpy(buf, bptr);
737  eptr = buf + len - 1;
738  while (eptr > buf && (*eptr == ' ' || *eptr == '\t' || *eptr == '.'))
739  *eptr-- = '\0';
740  if (*eptr != ')') {
741  return cit_art;
742  }
743  for (s = eptr - 1; s > buf && *s != '(';)
744  s--;
745  if (*s != '(') {
746  return cit_art;
747  }
748 
749  year = s + 1;
750  for (s--; s >= buf && isspace((int)*s) != 0;)
751  s--;
752  if (s < buf)
753  s = buf;
754  end_pages = s + 1;
755  if (buf[0] == 'G' && buf[1] == '3')
756  ss = buf + 2;
757  else
758  ss = buf;
759  for (i = 0; ss <= year; ss++) {
760  if (*ss == '(')
761  i++;
762  else if (*ss == ')')
763  i--;
764  else if (*ss >= '0' && *ss <= '9' && i == 0)
765  break;
766  }
767 
768  for (s = end_pages; s >= buf && *s != symbol;)
769  s--;
770  if (s < buf)
771  s = buf;
772  if (*s != symbol) {
773  /* try delimiter from other format
774  */
775  if (pp->format == Parser::EFormat::GenBank)
776  symbol = ':';
777  else if (pp->format == Parser::EFormat::EMBL)
778  symbol = ',';
779  else if (pp->format == Parser::EFormat::XML) {
780  if (pp->source == Parser::ESource::EMBL)
781  symbol = ',';
782  else
783  symbol = ':';
784  }
785 
786  for (s = end_pages; s >= buf && *s != symbol;)
787  s--;
788  if (s < buf)
789  s = buf;
790  }
791 
792  if (*s == symbol && ss != year) {
793  if (ss > s)
794  ss = s + 1;
795  end_volume = s;
796  for (pages = s + 1; isspace(*pages) != 0;)
797  pages++;
798  end_tit = ss - 1;
799  if (end_volume > ss) {
800  volume = ss;
801  if (*end_tit == '(')
802  volume--;
803  }
804  } else {
805  if (pre != CImprint::ePrepub_submitted)
807 
808  end_tit = end_pages;
809  }
810 
811  if (*year == '0') {
812  if (pages && StringEquN(pages, "0-0", 3) &&
814  *all_zeros = true;
815  return cit_art;
816  }
817 
818  tit = buf;
819  if (*tit == '\0') {
820  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No journal title.");
821  return cit_art;
822  }
823 
824  cit_art.Reset(new CCit_art);
825  CCit_jour& journal = cit_art->SetFrom().SetJournal();
826  CImprint& imp = journal.SetImp();
827 
828  if (pre > 0)
829  imp.SetPrepub(pre);
830 
831  *end_pages = '\0';
832  if (pages && ! StringEquN(pages, "0-0", 3)) {
833  i = valid_pages_range(pages, tit, is_er, (pre == CImprint::ePrepub_in_press));
834  if (i == 0)
835  imp.SetPages(pages);
836  else if (i == 1)
837  end_tit = end_pages;
838  else if (i == -1 && is_er > 0) {
839  cit_art.Reset();
840  return cit_art;
841  }
842  } else if (pre != CImprint::ePrepub_submitted)
844 
845  if (volume) {
846  if (! get_parts(volume, end_volume, imp)) {
847  cit_art.Reset();
848  return cit_art;
849  }
850 
851  if (pre != CImprint::ePrepub_submitted && ! imp.IsSetVolume()) {
852  if (imp.IsSetPages()) {
853  cit_art.Reset();
854  return cit_art;
855  }
857  }
858  } else if (is_er > 0 && pre != CImprint::ePrepub_in_press) {
859  cit_art.Reset();
860  return cit_art;
861  }
862 
863  CRef<CDate> date;
864  if (*year != '0')
865  date = get_date(year);
866 
867  if (date.Empty()) {
868  if (is_er == 0)
869  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No date in journal reference");
870 
871  cit_art.Reset();
872  return cit_art;
873  }
874 
875  *end_tit = '\0';
876 
877  CRef<CTitle::C_E> journal_title(new CTitle::C_E);
878 
879  for (char* aux = end_tit - 1; aux > tit && *aux != '.' && *aux != ')' && ! isalnum(*aux); --aux)
880  *aux = 0;
881 
882  journal_title->SetIso_jta(NStr::Sanitize(tit));
883  journal.SetTitle().Set().push_back(journal_title);
884 
885  imp.SetDate(*date);
886  if (pre > 0)
887  imp.SetPrepub(pre);
888 
889  if ((is_er & 01) == 01) {
890  if (er == 1)
892  else
894  }
895 
896  /* check invalid "in-press"
897  */
898  if (pre == CImprint::ePrepub_in_press) {
899  if (has_muid) {
900  ErrPostEx(SEV_WARNING, ERR_REFERENCE_InvalidInPress, "Reference flagged as In-press, but Medline UID exists, In-press ignored: %s", buf);
901  imp.ResetPrepub();
902  }
903 
904  if (imp.IsSetPages() && imp.IsSetVolume() && imp.IsSetDate()) {
905  ErrPostEx(SEV_WARNING, ERR_REFERENCE_InvalidInPress, "Reference flagged as In-press, but citation is complete, In-press ignored: %s", buf);
906  imp.ResetPrepub();
907  }
908  }
909 
910  /* Title and authors are optional for cit_art
911  */
912  if (title)
913  cit_art->SetTitle().Set().push_back(title);
914 
915  if (auth_list.NotEmpty())
916  cit_art->SetAuthors(*auth_list);
917 
918  return cit_art;
919 }
920 
921 /**********************************************************
922  *
923  * static CitGenPtr get_unpub(bptr, eptr, auth, title):
924  *
925  * Return a CitGen pointer.
926  *
927  * 11-14-93
928  *
929  **********************************************************/
930 static CRef<CCit_gen> get_unpub(char* bptr, char* eptr, CRef<CAuth_list>& auth_list, const Char* title)
931 {
932  CRef<CCit_gen> cit_gen(new CCit_gen);
933 
934  char* s;
935  char* str;
936 
937  if (bptr) {
938  for (s = bptr; *s != '\0' && *s != '(';)
939  s++;
940  for (str = s - 1; str > bptr && isspace(*str) != 0;)
941  str--;
942  if (*s == '(')
943  s += 6;
944 
945  if (s < eptr && *s != '\0' && auth_list.NotEmpty())
946  auth_list->SetAffil().SetStr(NStr::Sanitize(s));
947 
948  cit_gen->SetCit(string(bptr, str + 1));
949  }
950 
951  if (auth_list.NotEmpty())
952  cit_gen->SetAuthors(*auth_list);
953 
954  if (title)
955  cit_gen->SetTitle(title);
956 
957  return cit_gen;
958 }
959 
960 /**********************************************************
961  *
962  * static CitArtPtr get_book(bptr, auth, title, pre,
963  * format, p):
964  *
965  * Return a CitArt pointer (!!! that is an article
966  * from book!!).
967  *
968  * 11-14-93
969  *
970  **********************************************************/
971 static CRef<CCit_art> get_book(char* bptr, CRef<CAuth_list>& auth_list, CRef<CTitle::C_E>& title, CImprint::EPrepub pre, Parser::EFormat format, char* jour)
972 {
973  char* s;
974  char* ss;
975  char* tit;
976  char* volume;
977  char* pages;
978  char* press;
979 
980  ERefFormat ref_fmt;
981  bool IS_AUTH = false;
982  char* tbptr;
983  char* p;
984  Char c;
985  Int4 i;
986 
987  tit = nullptr;
988  ref_fmt = GB_REF;
989 
990  tbptr = bptr ? StringSave(bptr) : nullptr;
991 
992  switch (format) {
994  ref_fmt = EMBL_REF;
995  break;
997  ref_fmt = GB_REF;
998  break;
1000  ref_fmt = SP_REF;
1001  break;
1002  default:
1003  break;
1004  }
1005 
1006  CRef<CCit_art> cit_art(new CCit_art);
1007  CCit_book& cit_book = cit_art->SetFrom().SetBook();
1008 
1009  if (pre > 0)
1010  cit_book.SetImp().SetPrepub(pre);
1011 
1012  p = tbptr;
1013  CRef<CTitle::C_E> book_title(new CTitle::C_E);
1014 
1015  if (StringEquN("(in)", tbptr, 4)) {
1016  for (s = tbptr + 4; *s == ' ';)
1017  s++;
1018  for (bptr = s; *s != ';' && *s != '(' && *s != '\0';)
1019  s++;
1020  if (StringEquNI(s, "(Eds.)", 6)) {
1021  tit = s + 6;
1022  IS_AUTH = true;
1023  } else if (StringEquNI(s, "(Ed.)", 5)) {
1024  tit = s + 5;
1025  IS_AUTH = true;
1026  } else if (*s == ';')
1027  tit = s;
1028  if (tit)
1029  while (*tit == ' ' || *tit == ';' || *tit == '\n')
1030  tit++;
1031  *s++ = '\0';
1032  if (IS_AUTH && *bptr != '\0') {
1033  CRef<CAuth_list> book_auth_list;
1034  get_auth(bptr, ref_fmt, jour, book_auth_list);
1035  if (book_auth_list.NotEmpty())
1036  cit_book.SetAuthors(*book_auth_list);
1037  } else {
1038  ErrPostEx(SEV_ERROR, ERR_REFERENCE_UnusualBookFormat, "Cannot parse unusually formatted book reference (generating Cit-gen instead): %s", p);
1039  if (tbptr)
1040  MemFree(tbptr);
1041 
1042  cit_art.Reset();
1043  return cit_art;
1044  }
1045 
1046  ss = StringRChr(tit, ';');
1047  if (! ss)
1048  for (ss = tit; *ss != '\0';)
1049  ss++;
1050  for (s = ss; *s != ':' && s != tit;)
1051  s--;
1052  if (*s != ':')
1053  s = ss;
1054  c = *s;
1055  if (*s != '\0')
1056  *s++ = '\0';
1057 
1058  book_title->SetName("");
1059  if (*tit != '\0') {
1060  volume = check_book_tit(tit);
1061  if (volume)
1062  cit_book.SetImp().SetVolume(volume);
1063 
1064  book_title->SetName(NStr::Sanitize(tit));
1065  }
1066 
1067  if (c == ':') {
1068  for (pages = s; *s != '\0' && *s != ',' && *s != ';';)
1069  s++;
1070  if (*s != '\0')
1071  *s++ = '\0';
1072 
1073  while (*pages == ' ')
1074  pages++;
1075 
1076  if (StringEquN(pages, "0-0", 3))
1077  cit_book.SetImp().SetPrepub(CImprint::ePrepub_in_press);
1078  else {
1079  bool is_in_press = cit_book.GetImp().IsSetPrepub() && cit_book.GetImp().GetPrepub() == CImprint::ePrepub_in_press;
1080  i = valid_pages_range(pages, book_title->GetName().c_str(), 0, is_in_press);
1081 
1082  if (i == 0)
1083  cit_book.SetImp().SetPages(NStr::Sanitize(pages));
1084  else if (i == 1) {
1085  string new_title = book_title->GetName();
1086  new_title += ": ";
1087  new_title += pages;
1088  book_title->SetName(new_title);
1089  }
1090  }
1091  }
1092 
1093  for (press = s; *s != '(' && *s != '\0';)
1094  s++;
1095  if (*s != '\0')
1096  *s++ = '\0';
1097 
1098  cit_book.SetImp().SetPub().SetStr(NStr::Sanitize(press));
1099 
1100  CRef<CDate> date = get_date(s);
1101  if (date.Empty()) {
1102  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No date in book reference");
1103  ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference, "Book format error (cit-gen created): %s", p);
1104  if (tbptr)
1105  MemFree(tbptr);
1106 
1107  cit_art.Reset();
1108  return cit_art;
1109  }
1110 
1111  cit_book.SetImp().SetDate(*date);
1112  }
1113 
1114  cit_book.SetTitle().Set().push_back(book_title);
1115 
1116  if (title.NotEmpty())
1117  cit_art->SetTitle().Set().push_back(title);
1118 
1119  if (auth_list.NotEmpty())
1120  cit_art->SetAuthors(*auth_list);
1121 
1122  if (tbptr)
1123  MemFree(tbptr);
1124 
1125  return cit_art;
1126 }
1127 
1128 /**********************************************************
1129  *
1130  * static CitBookPtr get_thesis(bptr, auth, title, pre):
1131  *
1132  * Return a CitBook pointer.
1133  *
1134  * 11-14-93
1135  *
1136  **********************************************************/
1138 {
1139  CRef<CCit_let> cit_let(new CCit_let);
1140 
1141  cit_let->SetType(CCit_let::eType_thesis);
1142 
1143  CCit_book& book = cit_let->SetCit();
1144 
1145  if (pre > 0)
1146  book.SetImp().SetPrepub(pre);
1147 
1148  char* s;
1149  for (s = bptr; *s != '\0' && *s != '(';)
1150  s++;
1151 
1152  if (*s == '(') {
1153  CRef<CDate> date = get_date(s + 1);
1154  if (date.NotEmpty())
1155  book.SetImp().SetDate(*date);
1156 
1157  s = s + 6;
1158  }
1159 
1160  if (! book.GetImp().IsSetDate()) {
1161  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Fail to parse thesis: missing date");
1162 
1163  cit_let.Reset();
1164  return cit_let;
1165  }
1166 
1167  if (*s != '\0')
1168  book.SetImp().SetPub().SetStr(NStr::Sanitize(s));
1169 
1170  if (title.NotEmpty())
1171  book.SetTitle().Set().push_back(title);
1172  else {
1173  ErrPostStr(SEV_WARNING, ERR_REFERENCE_Thesis, "Missing thesis title");
1174 
1175  CRef<CTitle::C_E> empty_title(new CTitle::C_E);
1176  empty_title->SetName("");
1177  book.SetTitle().Set().push_back(empty_title);
1178  }
1179 
1180  if (auth_list.NotEmpty())
1181  book.SetAuthors(*auth_list);
1182  return cit_let;
1183 }
1184 
1185 /**********************************************************
1186  *
1187  * static CitBookPtr get_whole_book(bptr, auth, title,
1188  * pre):
1189  *
1190  * Return a CitBook pointer.
1191  *
1192  * 11-14-93
1193  *
1194  **********************************************************/
1196 {
1197  CRef<CCit_book> cit_book;
1198 
1199  char* s;
1200 
1201  for (bptr += 5; isspace(*bptr) != 0;)
1202  bptr++;
1203 
1204 
1205  for (s = bptr; *s != '\0' && *s != '(';)
1206  s++;
1207 
1208  if (*s != '(') {
1209  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Fail to parse book: missing date");
1210  return cit_book;
1211  }
1212 
1213  cit_book.Reset(new CCit_book);
1214 
1215  if (pre > 0)
1216  cit_book->SetImp().SetPrepub(pre);
1217 
1218  CRef<CDate> date = get_date(s + 1);
1219  if (date.NotEmpty())
1220  cit_book->SetImp().SetDate(*date);
1221 
1222  *s = '\0';
1223  for (s = bptr; *s != '\0' && *s != '.';)
1224  s++;
1225 
1226  CRef<CTitle::C_E> book_title(new CTitle::C_E);
1227  book_title->SetName(string(bptr, s));
1228  cit_book->SetTitle().Set().push_back(book_title);
1229 
1230  if (*s == '.') {
1231  for (s++; isspace(*s) != 0;)
1232  s++;
1233 
1234  cit_book->SetImp().SetPub().SetStr(NStr::Sanitize(s));
1235  }
1236 
1237  if (auth_list.Empty() || ! auth_list->IsSetNames()) {
1238  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Fail to parse thesis: missing thesis author");
1239  cit_book.Reset();
1240  return cit_book;
1241  }
1242 
1243  cit_book->SetAuthors(*auth_list);
1244 
1245  return cit_book;
1246 }
1247 
1248 /**********************************************************
1249  *
1250  * static CitSubPtr get_sub(pp, bptr, auth):
1251  *
1252  * Return a CitSub pointer.
1253  *
1254  **********************************************************/
1255 static CRef<CCit_sub> get_sub(ParserPtr pp, char* bptr, CRef<CAuth_list>& auth_list)
1256 {
1257  const char** b;
1258  char* s;
1259 
1261 
1262  CRef<CCit_sub> ret;
1263 
1264  for (s = bptr; *s != '(' && *s != '\0';)
1265  s++;
1266  if (*s == '\0') {
1267  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Fail to parse submission: missing date");
1268  return ret;
1269  }
1270 
1271  ret.Reset(new CCit_sub);
1272  CRef<CDate> date;
1273 
1274  if (pp && ! pp->entrylist.empty() &&
1275  IsNewAccessFormat(pp->entrylist[pp->curindx]->acnum) == 0 &&
1276  StringChr(ParFlat_LANL_AC, pp->entrylist[pp->curindx]->acnum[0]) &&
1277  isdigit((int)*(s + 1)) == 0) {
1278  date = get_lanl_date(s);
1279  } else {
1280  CRef<CDate_std> std_date = get_full_date(s + 1, true, pp->source);
1281  if (std_date) {
1282  date.Reset(new CDate);
1283  date->SetStd(*std_date);
1284  }
1285  }
1286 
1287  if (date.Empty())
1288  return ret;
1289 
1290  ret.Reset(new CCit_sub);
1291  ret->SetDate(*date);
1292 
1293  s = s + 13;
1294  if (StringStr(s, "E-mail"))
1295  medium = CCit_sub::eMedium_email;
1296 
1297  if (StringEquNI(" on tape", s, 8)) {
1298  medium = CCit_sub::eMedium_tape;
1299  for (s += 8; *s != '\0' && *s != ':';)
1300  s++;
1301  }
1302  if (*s != '\0' && *(s + 1) != '\0') {
1303  while (*s == ' ')
1304  s++;
1305 
1306  if (*s == ':')
1307  s++;
1308  for (;;) {
1309  for (b = strip_sub_str; *b; b++) {
1310  size_t l_str = StringLen(*b);
1311  if (StringEquN(s, *b, l_str)) {
1312  for (s += l_str; *s == ' ' || *s == '.';)
1313  s++;
1314  break;
1315  }
1316  }
1317  if (! *b)
1318  break;
1319  }
1320 
1321  if (*s != '\0' && auth_list.NotEmpty()) {
1322  auth_list->SetAffil().SetStr(NStr::Sanitize(s));
1323  }
1324  }
1325 
1326  if (*s == '\0') {
1327  ErrPostEx(SEV_WARNING, ERR_REFERENCE_NoContactInfo, "Missing contact info : %s", bptr);
1328  }
1329 
1330  if (auth_list.Empty() || ! auth_list->IsSetNames()) {
1331  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Direct submission: missing author (cit-gen created)");
1332 
1333  ret.Reset();
1334  return ret;
1335  }
1336 
1337  ret->SetAuthors(*auth_list);
1338  ret->SetMedium(medium);
1339 
1340  return ret;
1341 }
1342 
1343 /**********************************************************
1344  *
1345  * static CitSubPtr get_sub_gsdb(bptr, auth, title, pp):
1346  *
1347  * GSDB specific format for CitSub :
1348  * REFERENCE 1 (bases 1 to 378)
1349  * AUTHORS Mundt,M.O.
1350  * TITLE Published by M.O. Mundt, Genomics LS-3,
1351  * Los Alamos National Laboratory,
1352  * Mail Stop M888, Los Alamos, NM, USA, 87545
1353  * JOURNAL Published in GSDB (11-OCT-1996)
1354  *
1355  **********************************************************/
1356 static CRef<CCit_sub> get_sub_gsdb(char* bptr, CRef<CAuth_list>& auth_list, CRef<CTitle::C_E>& title, ParserPtr pp)
1357 {
1358  CRef<CCit_sub> cit_sub;
1359 
1360  char* s;
1361 
1362  for (s = bptr; *s != '(' && *s != '\0';)
1363  s++;
1364  if (*s == '\0') {
1365  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Fail to parse submission: missing date");
1366  return cit_sub;
1367  }
1368 
1369  CRef<CDate_std> std_date = get_full_date(s + 1, true, pp->source);
1370  if (std_date.Empty())
1371  return cit_sub;
1372 
1373  CRef<CDate> date;
1374  date->SetStd(*std_date);
1375 
1376  if (auth_list.Empty() || ! auth_list->IsSetNames()) {
1377  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Direct submission: missing author (cit-gen created)");
1378  return cit_sub;
1379  }
1380 
1381  cit_sub.Reset(new CCit_sub);
1382  cit_sub->SetAuthors(*auth_list);
1383  cit_sub->SetDate(*date);
1384 
1385  if (title.NotEmpty()) {
1386  const Char* s = title->GetName().c_str();
1387  size_t l_str = StringLen("Published by");
1388  if (StringEquN(s, "Published by", l_str)) {
1389  s += l_str;
1390  while (*s == ' ')
1391  s++;
1392  }
1393 
1394  if (*s != '\0') {
1395  auth_list->SetAffil().SetStr(NStr::Sanitize(s));
1396  } else {
1397  ErrPostEx(SEV_WARNING, ERR_REFERENCE_NoContactInfo, "Missing contact info : %s", bptr);
1398  }
1399  } else {
1400  ErrPostEx(SEV_WARNING, ERR_REFERENCE_NoContactInfo, "Missing contact info : %s", bptr);
1401  }
1402 
1403  return cit_sub;
1404 }
1405 
1406 /**********************************************************/
1407 static CRef<CCit_gen> fta_get_citgen(char* bptr, CRef<CAuth_list>& auth_list, CRef<CTitle::C_E>& title)
1408 {
1409  CRef<CCit_gen> cit_gen;
1410 
1411  char* p;
1412  char* q;
1413  char* r;
1414  Char ch;
1415  Int2 year;
1416 
1417  if (! bptr || auth_list.Empty() || ! auth_list->IsSetNames() || title.Empty())
1418  return cit_gen;
1419 
1420  year = 0;
1421  p = StringChr(bptr, '(');
1422  if (p) {
1423  for (p++; *p == ' ' || *p == '\t';)
1424  p++;
1425  for (q = p; *p >= '0' && *p <= '9';)
1426  p++;
1427  for (r = p; *p == ' ' || *p == '\t' || *p == ')';)
1428  p++;
1429  if (*p == '\n' || *p == '\0') {
1430  ch = *r;
1431  *r = '\0';
1432  year = atoi(q);
1433  if (year < 1900)
1434  *r = ch;
1435  else {
1436  for (q--; *q == ' ' || *q == '\t' || *q == '(';)
1437  q--;
1438  *++q = '\0';
1439  }
1440  }
1441  }
1442 
1443  cit_gen.Reset(new CCit_gen);
1444 
1445  if (bptr)
1446  cit_gen->SetCit(bptr);
1447 
1448  cit_gen->SetAuthors(*auth_list);
1449  cit_gen->SetTitle(title->GetName());
1450 
1451  if (year >= 1900)
1452  cit_gen->SetDate().SetStd().SetYear(year);
1453 
1454  return cit_gen;
1455 }
1456 
1457 CRef<CPub> journal(ParserPtr pp, char* bptr, char* eptr, CRef<CAuth_list>& auth_list, CRef<CTitle::C_E>& title, bool has_muid, CRef<CCit_art>& cit_art, Int4 er)
1458 {
1459  CImprint::EPrepub pre = static_cast<CImprint::EPrepub>(0);
1460 
1461  char* p;
1462  char* nearend;
1463  char* end;
1464 
1466 
1467  CRef<CPub> ret(new CPub);
1468  if (! bptr) {
1469  const Char* title_str = title.Empty() ? nullptr : title->GetName().c_str();
1470  ret->SetGen(*get_unpub(bptr, eptr, auth_list, title_str));
1471  return ret;
1472  }
1473 
1474  p = bptr;
1475  size_t my_len = StringLen(p);
1476  if (my_len > 7) {
1477  nearend = p + StringLen(p) - 1;
1478  while (*nearend == ' ' || *nearend == '\t' || *nearend == '.')
1479  *nearend-- = '\0';
1480 
1481  nearend -= 8;
1482  end = nearend + 2;
1483  if (StringEquNI("In press", nearend + 1, 8)) {
1485  *(nearend + 1) = '\0';
1486  }
1487  if (StringEquNI("Submitted", nearend, 9)) {
1489  *nearend = '\0';
1490  }
1491  if (pre == 0 && *end == '(' && isdigit(*(end + 1)) != 0) {
1492  for (nearend = end - 1; nearend > bptr && *nearend != ' ';)
1493  nearend--;
1494  if (StringEquNI("In press", nearend + 1, 8)) {
1496  *(nearend + 1) = '\0';
1497  }
1498  }
1499  }
1500 
1501  if (my_len >= 6 && *p == '(') {
1502  p += 6;
1503  if (StringEquN(" In press", p, 9)) {
1504  retval = ParFlat_IN_PRESS;
1506  }
1507  }
1508 
1509  p = bptr;
1510  if (StringEquN("Unpub", p, 5) || StringEquN("Unknown", p, 7)) {
1511  retval = ParFlat_UNPUB_JOURNAL;
1512  const Char* title_str = title.Empty() ? nullptr : title->GetName().c_str();
1513  ret->SetGen(*get_unpub(bptr, eptr, auth_list, title_str));
1514  } else if (StringEquN("(in)", p, 4)) {
1516 
1517  CRef<CCit_art> article = get_book(bptr, auth_list, title, pre, pp->format, p);
1518 
1519  if (article.Empty())
1520  ret->SetGen(*get_error(bptr, auth_list, title));
1521  else
1522  ret->SetArticle(*article);
1523 
1524  } else if (StringEquN("Thesis", p, 6)) {
1525  retval = ParFlat_THESIS_CITATION;
1526 
1527  CRef<CCit_let> cit_let = get_thesis(bptr, auth_list, title, pre);
1528  if (cit_let.Empty()) {
1529  ret.Reset();
1530  return ret;
1531  }
1532  ret->SetMan(*cit_let);
1533  } else if (StringEquN("Submi", p, 5)) {
1534  retval = ParFlat_SUBMITTED;
1535 
1536  CRef<CCit_sub> cit_sub = get_sub(pp, bptr, auth_list);
1537  if (cit_sub.Empty()) {
1538  ret.Reset();
1539  return ret;
1540  }
1541 
1542  ret->SetSub(*cit_sub);
1543  } else if (StringEquN("Published in GSDB", p, 17)) {
1544  ErrPostEx(SEV_WARNING, ERR_REFERENCE_GsdbRefDropped, "A published-in-gsdb reference was encountered and has been dropped [%s]", bptr);
1545  retval = ParFlat_SUBMITTED;
1546 
1547  CRef<CCit_sub> cit_sub = get_sub_gsdb(bptr, auth_list, title, pp);
1548  if (cit_sub.Empty()) {
1549  ret.Reset();
1550  return ret;
1551  }
1552 
1553  ret->SetSub(*cit_sub);
1554  } else if (StringEquN("Patent", p, 6) ||
1555  pp->source == Parser::ESource::USPTO) {
1556  retval = ParFlat_PATENT_CITATION;
1557 
1558  if (pp->seqtype == CSeq_id::e_Genbank || pp->seqtype == CSeq_id::e_Ddbj ||
1559  pp->seqtype == CSeq_id::e_Embl || pp->seqtype == CSeq_id::e_Other ||
1560  pp->seqtype == CSeq_id::e_Tpe || pp->seqtype == CSeq_id::e_Tpg ||
1561  pp->seqtype == CSeq_id::e_Tpd ||
1562  pp->source == Parser::ESource::USPTO) {
1563  CRef<CCit_pat> cit_pat = get_pat(pp, bptr, auth_list, title, eptr);
1564  if (cit_pat.Empty()) {
1565  ret.Reset();
1566  return ret;
1567  }
1568 
1569  ret->SetPatent(*cit_pat);
1570  } else {
1571  ret.Reset();
1572  return ret;
1573  }
1574  } else if (StringEquN("Book:", p, 5)) {
1575  retval = ParFlat_BOOK_CITATION;
1576 
1577  CRef<CCit_book> book = get_whole_book(bptr, auth_list, title, pre);
1578  if (book.Empty()) {
1579  ret.Reset();
1580  return ret;
1581  }
1582 
1583  ret->SetBook(*book);
1584  } else if (StringEquNI("Published Only in Database", p, 26)) {
1585  retval = ParFlat_GEN_CITATION;
1586  CRef<CCit_gen> cit_gen = fta_get_citgen(bptr, auth_list, title);
1587 
1588  if (cit_gen.Empty()) {
1589  ret.Reset();
1590  return ret;
1591  }
1592 
1593  ret->SetGen(*cit_gen);
1594  } else if (StringEquNI("Online Publication", p, 18)) {
1595  retval = ParFlat_ONLINE_CITATION;
1596 
1597  CRef<CCit_gen> cit_gen = fta_get_citgen(bptr, auth_list, title);
1598 
1599  if (cit_gen.Empty()) {
1600  ret.Reset();
1601  return ret;
1602  }
1603 
1604  ret->SetGen(*cit_gen);
1605  }
1606 
1607  if (retval == ParFlat_MISSING_JOURNAL) {
1608  if (cit_art.NotEmpty())
1609  ret->SetArticle(*cit_art);
1610  else {
1611  bool all_zeros;
1612  CRef<CCit_art> new_art = get_art(pp, bptr, auth_list, title, pre, has_muid, &all_zeros, er);
1613  if (new_art.Empty()) {
1614  if (! all_zeros && ! StringEquN(bptr, "(er)", 4) && er == 0)
1615  ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference, "Journal format error (cit-gen created): %s", bptr);
1616 
1617  ret->SetGen(*get_error(bptr, auth_list, title));
1618  } else
1619  ret->SetArticle(*new_art);
1620  }
1621  }
1622 
1623  return ret;
1624 }
1625 
1626 /**********************************************************/
1627 static char* FindBackSemicolon(char* pchStart, char* pchCurrent)
1628 {
1629  if (! pchStart || ! pchCurrent || pchStart >= pchCurrent)
1630  return nullptr;
1631 
1632  for (pchCurrent--; pchCurrent >= pchStart; pchCurrent--) {
1633  if (isspace((int)*pchCurrent) != 0)
1634  continue;
1635  if (*pchCurrent == ';')
1636  return (pchCurrent);
1637  break;
1638  }
1639 
1640  return nullptr;
1641 }
1642 
1643 /**********************************************************/
1644 static char* FindSemicolon(char* str)
1645 {
1646  if (! str || *str == '\0')
1647  return nullptr;
1648 
1649  while (*str && std::isspace(*str))
1650  str++;
1651 
1652  if (*str == ';')
1653  return (str);
1654 
1655  return nullptr;
1656 }
1657 
1658 /**********************************************************/
1659 static char* ExtractErratum(char* comm)
1660 {
1661  char* start;
1662  char* pchNumber = nullptr;
1663  char* end;
1664  char* p;
1665 
1666  if (! comm)
1667  return nullptr;
1668 
1669  start = StringStr(comm, "Erratum:");
1670  if (! start)
1671  return (comm);
1672 
1673  end = StringChr(start, ']');
1674  if (! end)
1675  return (comm);
1676 
1677  pchNumber = end + 1;
1678  end = FindSemicolon(pchNumber);
1679  if (end)
1680  pchNumber = end + 1;
1681  p = FindBackSemicolon(comm, start);
1682  if (p)
1683  start = p;
1684  fta_StringCpy(start, pchNumber);
1685 
1686  /* Check if the string after cutting signature is empty. If it's really
1687  * empty we have to ignore the whole string (comment).
1688  * Do you want to have a comment which contains nothing!? Probably no.
1689  */
1690  for (p = comm; *p == ' ' || *p == '\t' || *p == '\n';)
1691  p++;
1692  if (*p == '\0')
1693  *comm = '\0';
1694 
1695  return (comm);
1696 }
1697 
1698 /**********************************************************/
1699 static void XMLGetXrefs(char* entry, XmlIndexPtr xip, TQualVector& quals)
1700 {
1701  XmlIndexPtr xipqual;
1702 
1703  if (! entry || ! xip)
1704  return;
1705 
1706  for (; xip; xip = xip->next) {
1707  if (! xip->subtags)
1708  continue;
1709 
1710  CRef<CGb_qual> qual(new CGb_qual);
1711 
1712  for (xipqual = xip->subtags; xipqual; xipqual = xipqual->next) {
1713  if (xipqual->tag == INSDXREF_DBNAME)
1714  qual->SetQual(*XMLGetTagValue(entry, xipqual));
1715  else if (xipqual->tag == INSDXREF_ID)
1716  qual->SetVal(*XMLGetTagValue(entry, xipqual));
1717  }
1718 
1719  if (qual->IsSetQual() && ! qual->GetQual().empty())
1720  quals.push_back(qual);
1721  }
1722 }
1723 
1724 /**********************************************************/
1725 static void fta_add_article_ids(CPub& pub, const string& doi, const string& agricola)
1726 {
1727  if (doi.empty() && agricola.empty())
1728  return;
1729 
1730  if (pub.IsArticle()) {
1731  CCit_art& cit_art = pub.SetArticle();
1732 
1733  if (! agricola.empty()) {
1734  CRef<CArticleId> id(new CArticleId);
1735  id->SetOther().SetDb("AGRICOLA");
1736  id->SetOther().SetTag().SetStr(agricola);
1737 
1738  cit_art.SetIds().Set().push_front(id);
1739  }
1740 
1741  if (! doi.empty()) {
1742  CRef<CArticleId> id(new CArticleId);
1743  id->SetDoi().Set(doi);
1744 
1745  cit_art.SetIds().Set().push_front(id);
1746  }
1747  }
1748 }
1749 
1750 /**********************************************************/
1751 Int4 fta_remark_is_er(const string& str)
1752 {
1753  const char** b;
1754  Int4 i;
1755 
1756  string s = str;
1757  ShrinkSpaces(s);
1758 
1759  for (i = 1, b = ERRemarks; *b; b++, i++) {
1760  if (StringIStr(s.c_str(), *b)) {
1761  if (i <= 6)
1762  return 1; // epublish
1763  else
1764  return 2; // aheadofprint
1765  }
1766  }
1767 
1768  return 0;
1769 }
1770 
1771 /**********************************************************/
1772 static CRef<CPubdesc> XMLRefs(ParserPtr pp, DataBlkPtr dbp, bool& no_auth, bool& rej)
1773 {
1774  char* p;
1775  char* q;
1776  bool is_online;
1777  TEntrezId pmid;
1778 
1779  XmlIndexPtr xip;
1780 
1781  Int4 er;
1782 
1783  CRef<CPubdesc> desc;
1784 
1785  if (! pp || ! dbp || ! dbp->mOffset || ! dbp->mpData)
1786  return desc;
1787 
1788  desc.Reset(new CPubdesc);
1789 
1790  p = StringSave(XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_REFERENCE));
1791  if (p && isdigit((int)*p) != 0) {
1792  desc->SetPub().Set().push_back(get_num(p));
1793  } else {
1794  ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference, "No reference number.");
1795  }
1796 
1797  if (p)
1798  MemFree(p);
1799 
1800  p = StringSave(XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_MEDLINE));
1801  if (p) {
1802  rej = true;
1803  MemFree(p);
1804  desc.Reset();
1805  return desc;
1806  }
1807 
1808  pmid = ZERO_ENTREZ_ID;
1809  p = StringSave(XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_PUBMED));
1810  if (p) {
1812  MemFree(p);
1813  }
1814 
1815  CRef<CAuth_list> auth_list;
1816 
1817  p = StringSave(XMLConcatSubTags(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_AUTHORS, ','));
1818  if (p) {
1819  if (pp->xml_comp) {
1820  q = StringRChr(p, '.');
1821  if (! q || q[1] != '\0') {
1822  string s = p;
1823  s.append(".");
1824  MemFree(p);
1825  p = StringSave(s);
1826  q = nullptr;
1827  }
1828  }
1829  for (q = p; *q == ' ' || *q == '.' || *q == ',';)
1830  q++;
1831  if (*q != '\0') {
1832  q = StringSave(XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_JOURNAL));
1833  char* r = StringChr(p, ',');
1834  if (r && ! StringChr(r + 1, '.'))
1835  *r = '|';
1836  get_auth(p, (pp->source == Parser::ESource::EMBL) ? EMBL_REF : GB_REF, q, auth_list);
1837  MemFree(q);
1838  }
1839  MemFree(p);
1840  }
1841 
1842  p = StringSave(XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_CONSORTIUM));
1843  if (p) {
1844  for (q = p; *q == ' ' || *q == '.' || *q == ',';)
1845  q++;
1846 
1847  if (*q != '\0')
1848  get_auth_consortium(p, auth_list);
1849 
1850  MemFree(p);
1851  }
1852 
1853  if (auth_list.Empty() || ! auth_list->IsSetNames())
1854  no_auth = true;
1855 
1856  p = StringSave(XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_TITLE));
1857 
1858  CRef<CTitle::C_E> title_art(new CTitle::C_E);
1859  if (p) {
1860  if (! StringEquN(p, "Direct Submission", 17) &&
1861  *p != '\0' && *p != ';') {
1862  string title = clean_up(p);
1863  if (! title.empty()) {
1864  title_art->SetName(tata_save(title));
1865  }
1866  }
1867  MemFree(p);
1868  }
1869 
1870  is_online = false;
1871  p = StringSave(XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_JOURNAL));
1872  if (! p) {
1873  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No JOURNAL line, reference dropped");
1874  desc.Reset();
1875  return desc;
1876  }
1877 
1878  if (*p == '\0' || *p == ';') {
1879  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "JOURNAL line is empty, reference dropped");
1880  MemFree(p);
1881  desc.Reset();
1882  return desc;
1883  }
1884 
1885  if (NStr::EqualNocase(p, 0, 18, "Online Publication"))
1886  is_online = true;
1887 
1888  if (char* r = StringSave(XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_REMARK))) {
1889  string comm = NStr::Sanitize(ExtractErratum(r));
1890  MemFree(r);
1891  if (! is_online)
1892  normalize_comment(comm);
1893  desc->SetComment(comm);
1894  }
1895 
1896  er = desc->IsSetComment() ? fta_remark_is_er(desc->GetComment()) : 0;
1897 
1898  CRef<CCit_art> cit_art;
1899  if (pp->medserver == 1 && pmid > ZERO_ENTREZ_ID && (StringEquN(p, "(er)", 4) || er > 0)) {
1900  cit_art = FetchPubPmId(pmid);
1901  if (cit_art.Empty())
1902  pmid = ZERO_ENTREZ_ID;
1903  }
1904 
1905  if (pmid > ZERO_ENTREZ_ID) {
1906  CRef<CPub> pub(new CPub);
1907  pub->SetPmid().Set(pmid);
1908  desc->SetPub().Set().push_back(pub);
1909  }
1910 
1911  CRef<CPub> pub_ref = journal(pp, p, p + StringLen(p), auth_list, title_art, false, cit_art, er);
1912  MemFree(p);
1913 
1914  TQualVector xrefs;
1915  for (xip = static_cast<XmlIndex*>(dbp->mpData); xip; xip = xip->next) {
1916  if (xip->tag == INSDREFERENCE_XREF)
1917  XMLGetXrefs(dbp->mOffset, xip->subtags, xrefs);
1918  }
1919 
1920  string doi;
1921  string agricola;
1922  for (const auto& xref : xrefs) {
1923  if (! xref->IsSetQual())
1924  continue;
1925 
1926  if (NStr::EqualNocase(xref->GetQual(), "ARGICOLA") && agricola.empty())
1927  agricola = xref->GetVal();
1928  else if (NStr::EqualNocase(xref->GetQual(), "DOI") && doi.empty())
1929  doi = xref->GetVal();
1930  }
1931 
1932  fta_add_article_ids(*pub_ref, doi, agricola);
1933 
1934  if (pub_ref.Empty()) {
1935  desc.Reset();
1936  return desc;
1937  }
1938 
1939  if (dbp->mType == ParFlat_REF_NO_TARGET)
1940  desc->SetReftype(CPubdesc::eReftype_no_target);
1941 
1942  desc->SetPub().Set().push_back(pub_ref);
1943 
1944  return desc;
1945 }
1946 
1947 /**********************************************************/
1948 static
1949 CRef<CPubdesc> gb_refs_common(ParserPtr pp, DataBlkPtr dbp, Uint2 col_data, bool bParser, DataBlkPtr** ppInd, bool& no_auth)
1950 {
1951  static DataBlkPtr ind[MAXKW + 1];
1952 
1953  bool has_muid;
1954  char* p;
1955  char* q;
1956  char* r;
1957  bool is_online;
1958  TEntrezId pmid;
1959  Int4 er;
1960 
1961  CRef<CPubdesc> desc(new CPubdesc);
1962 
1963  p = dbp->mOffset + col_data;
1964  if (bParser) {
1965  /* This branch works when this function called in context of PARSER
1966  */
1967  if (*p >= '0' && *p <= '9')
1968  desc->SetPub().Set().push_back(get_num(p));
1969  else
1970  ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference, "No reference number.");
1972  } else {
1973  /* This branch works when this function is called in context of GBDIFF
1974  */
1975  if (ppInd) {
1977  *ppInd = &ind[0];
1978 
1979  return desc;
1980  }
1981 
1982  if (*p < '0' || *p > '9')
1983  ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference, "No reference number.");
1984  }
1985 
1986  has_muid = false;
1987  if (ind[ParFlat_MEDLINE]) {
1988  p = ind[ParFlat_MEDLINE]->mOffset;
1990  if (pub.NotEmpty()) {
1991  has_muid = true;
1992  desc->SetPub().Set().push_back(get_num(p));
1993  }
1994  }
1995 
1996  pmid = ZERO_ENTREZ_ID;
1997  if (ind[ParFlat_PUBMED]) {
1998  p = ind[ParFlat_PUBMED]->mOffset;
1999  if (p)
2001  }
2002 
2003  CRef<CAuth_list> auth_list;
2004  if (ind[ParFlat_AUTHORS]) {
2005  p = ind[ParFlat_AUTHORS]->mOffset;
2006  for (q = p; *q == ' ' || *q == '.' || *q == ',';)
2007  q++;
2008 
2009  if (*q != '\0') {
2010  if (ind[ParFlat_JOURNAL])
2011  q = ind[ParFlat_JOURNAL]->mOffset;
2012 
2013  get_auth(p, GB_REF, q, auth_list);
2014  }
2015  }
2016 
2017  if (ind[ParFlat_CONSRTM]) {
2018  p = ind[ParFlat_CONSRTM]->mOffset;
2019  for (q = p; *q == ' ' || *q == '.' || *q == ',';)
2020  q++;
2021 
2022  if (*q != '\0')
2023  get_auth_consortium(p, auth_list);
2024  }
2025 
2026  if (auth_list.Empty() || ! auth_list->IsSetNames())
2027  no_auth = true;
2028 
2029  CRef<CTitle::C_E> title_art;
2030  if (ind[ParFlat_TITLE]) {
2031  p = ind[ParFlat_TITLE]->mOffset;
2032  if (! StringEquN(p, "Direct Submission", 17) &&
2033  *p != '\0' && *p != ';') {
2034  string title = clean_up(p);
2035  if (! title.empty()) {
2036  title_art.Reset(new CTitle::C_E);
2037  title_art->SetName(NStr::Sanitize(title));
2038  }
2039  }
2040  }
2041 
2042  if (! ind[ParFlat_JOURNAL]) {
2043  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No JOURNAL line, reference dropped");
2044 
2045  desc.Reset();
2046  return desc;
2047  }
2048 
2049  p = ind[ParFlat_JOURNAL]->mOffset;
2050  if (*p == '\0' || *p == ';') {
2051  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "JOURNAL line is empty, reference dropped");
2052 
2053  desc.Reset();
2054  return desc;
2055  }
2056 
2057  is_online = StringEquNI(p, "Online Publication", 18);
2058 
2059  if (ind[ParFlat_REMARK]) {
2060  r = ind[ParFlat_REMARK]->mOffset;
2061  string comm = NStr::Sanitize(ExtractErratum(r));
2062  if (! is_online)
2063  normalize_comment(comm);
2064  desc->SetComment(comm);
2065  }
2066 
2067  er = desc->IsSetComment() ? fta_remark_is_er(desc->GetComment()) : 0;
2068 
2069  CRef<CCit_art> cit_art;
2070  if (pp->medserver == 1 && pmid > ZERO_ENTREZ_ID && (StringEquN(p, "(er)", 4) || er > 0)) {
2071  cit_art = FetchPubPmId(pmid);
2072  if (! cit_art)
2073  pmid = ZERO_ENTREZ_ID;
2074  }
2075 
2076  if (pmid > ZERO_ENTREZ_ID) {
2077  CRef<CPub> pub(new CPub);
2078  pub->SetPmid().Set(pmid);
2079  desc->SetPub().Set().push_back(pub);
2080  }
2081 
2082  CRef<CPub> pub_ref = journal(pp, p, p + ind[ParFlat_JOURNAL]->len, auth_list, title_art, has_muid, cit_art, er);
2083 
2084  if (pub_ref.Empty()) {
2085  desc.Reset();
2086  return desc;
2087  }
2088 
2089  if (dbp->mType == ParFlat_REF_NO_TARGET)
2090  desc->SetReftype(CPubdesc::eReftype_no_target);
2091 
2092  desc->SetPub().Set().push_back(pub_ref);
2093 
2094  return desc;
2095 }
2096 
2097 /**********************************************************
2098  *
2099  * static PubdescPtr embl_refs(pp, dbp, col_data, no_auth):
2100  *
2101  * Parse EMBL references. Return a Pubdesc pointer.
2102  *
2103  * 11-14-93
2104  *
2105  **********************************************************/
2106 static CRef<CPubdesc> embl_refs(ParserPtr pp, DataBlkPtr dbp, Uint2 col_data, bool& no_auth)
2107 {
2108  static DataBlkPtr ind[MAXKW + 1];
2109  char* s;
2110 
2111  bool has_muid;
2112  char* p;
2113  char* q;
2114  TEntrezId pmid;
2115 
2116  Int4 er;
2117 
2118  CRef<CPubdesc> desc(new CPubdesc);
2119 
2120  p = dbp->mOffset + col_data;
2121  while ((*p < '0' || *p > '9') && dbp->len > 0)
2122  p++;
2123  if (*p >= '0' && *p <= '9')
2124  desc->SetPub().Set().push_back(get_num(p));
2125  else
2126  ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference, "No reference number.");
2127 
2129 
2130  has_muid = false;
2131  pmid = ZERO_ENTREZ_ID;
2132 
2133  string doi;
2134  string agricola;
2135 
2136  if (ind[ParFlat_RC])
2137  desc->SetComment(NStr::Sanitize(ind[ParFlat_RC]->mOffset));
2138 
2139  er = desc->IsSetComment() ? fta_remark_is_er(desc->GetComment()) : 0;
2140 
2141  if (ind[ParFlat_RX]) {
2142  p = ind[ParFlat_RX]->mOffset;
2144 
2145  char* id = get_embl_str_pub_id(p, "DOI;");
2146  if (id) {
2147  doi = id;
2148  MemFree(id);
2149  }
2150 
2151  id = get_embl_str_pub_id(p, "AGRICOLA;");
2152  if (id) {
2153  agricola = id;
2154  MemFree(id);
2155  }
2156 
2157  if (pub.NotEmpty()) {
2158  desc->SetPub().Set().push_back(pub);
2159  has_muid = true;
2160  }
2161 
2162  pmid = get_embl_pmid(p);
2163  }
2164 
2165  CRef<CAuth_list> auth_list;
2166  if (ind[ParFlat_RA]) {
2167  p = ind[ParFlat_RA]->mOffset;
2168  s = p + StringLen(p) - 1;
2169  if (*s == ';')
2170  *s = '\0';
2171  for (q = p; *q == ' ' || *q == '.' || *q == ',';)
2172  q++;
2173  if (*q != '\0') {
2174  if (ind[ParFlat_RL])
2175  q = ind[ParFlat_RL]->mOffset;
2176 
2177  get_auth(p, EMBL_REF, q, auth_list);
2178  }
2179  }
2180 
2181  if (ind[ParFlat_RG]) {
2182  p = ind[ParFlat_RG]->mOffset;
2183  s = p + StringLen(p) - 1;
2184  if (*s == ';')
2185  *s = '\0';
2186 
2187  for (q = p; *q == ' ' || *q == '.' || *q == ',';)
2188  q++;
2189 
2190  if (*q != '\0')
2191  get_auth_consortium(p, auth_list);
2192  }
2193 
2194  if (auth_list.Empty() || ! auth_list->IsSetNames())
2195  no_auth = true;
2196 
2197  CRef<CTitle::C_E> title_art;
2198  if (ind[ParFlat_RT]) {
2199  p = ind[ParFlat_RT]->mOffset;
2200  if (*p != '\0' && *p != ';') {
2201  string title = clean_up(p);
2202  if (! title.empty()) {
2203  title_art.Reset(new CTitle::C_E);
2204  title_art->SetName(NStr::Sanitize(title));
2205  }
2206  }
2207  }
2208 
2209  if (! ind[ParFlat_RL]) {
2210  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Illegalreference, "No JOURNAL line, reference dropped.");
2211 
2212  desc.Reset();
2213  return desc;
2214  }
2215 
2216  p = ind[ParFlat_RL]->mOffset;
2217  if (*p == '\0' || *p == ';') {
2218  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Illegalreference, "JOURNAL line is empty, reference dropped.");
2219 
2220  desc.Reset();
2221  return desc;
2222  }
2223 
2224  CRef<CCit_art> cit_art;
2225  if (pp->medserver == 1 && pmid > ZERO_ENTREZ_ID && (StringEquN(p, "(er)", 4) || er > 0)) {
2226  cit_art = FetchPubPmId(pmid);
2227  if (! cit_art)
2228  pmid = ZERO_ENTREZ_ID;
2229  }
2230 
2231  if (pmid > ZERO_ENTREZ_ID) {
2232  CRef<CPub> pub(new CPub);
2233  pub->SetPmid().Set(pmid);
2234  desc->SetPub().Set().push_back(pub);
2235  }
2236 
2237  CRef<CPub> pub_ref = journal(pp, p, p + ind[ParFlat_RL]->len, auth_list, title_art, has_muid, cit_art, er);
2238 
2239  if (pub_ref.Empty()) {
2240  desc.Reset();
2241  return desc;
2242  }
2243 
2244  fta_add_article_ids(*pub_ref, doi, agricola);
2245 
2246  if (dbp->mType == ParFlat_REF_NO_TARGET)
2247  desc->SetReftype(CPubdesc::eReftype_no_target);
2248 
2249  desc->SetPub().Set().push_back(pub_ref);
2250 
2251  return desc;
2252 }
2253 
2254 /**********************************************************/
2255 static void fta_sort_pubs(TPubList& pubs)
2256 {
2257  for (TPubList::iterator pub = pubs.begin(); pub != pubs.end(); ++pub) {
2258  TPubList::iterator next_pub = pub;
2259  for (++next_pub; next_pub != pubs.end(); ++next_pub) {
2260  if ((*next_pub)->Which() > (*pub)->Which())
2261  continue;
2262 
2263  if ((*next_pub)->Which() == (*pub)->Which()) {
2264  if (! (*pub)->IsMuid() || (*pub)->GetMuid() >= (*next_pub)->GetMuid())
2265  continue;
2266  }
2267 
2268  pub->Swap(*next_pub);
2269  }
2270  }
2271 }
2272 
2273 /**********************************************************/
2274 static void fta_check_long_last_name(const CAuth_list& authors, bool soft_report)
2275 {
2276  static const size_t MAX_LAST_NAME_LEN = 30;
2277 
2278  ErrSev sev;
2279 
2280  if (! authors.IsSetNames() || ! authors.GetNames().IsStd())
2281  return;
2282 
2283  for (const auto& author : authors.GetNames().GetStd()) {
2284  if (! author->IsSetName() || ! author->GetName().IsName())
2285  continue;
2286 
2287  const CName_std& name = author->GetName().GetName();
2288 
2289  if (name.IsSetLast() && name.GetLast().size() > MAX_LAST_NAME_LEN) {
2290  /* Downgrade severity of this error to WARNING
2291  * if in HTGS mode. As of 7/31/2002, very long
2292  * consortium names were treated as if
2293  * they were author last names, for HTGS data.
2294  * This can be reverted to ERROR after the
2295  * consortium name slot is available and utilized
2296  * in the ASN.1.
2297  */
2298  sev = (soft_report ? SEV_WARNING : SEV_ERROR);
2299  ErrPostEx(sev, ERR_REFERENCE_LongAuthorName, "Last name of author exceeds 30 characters in length. A format error in the reference data might have caused the author name to be parsed incorrectly. Name is \"%s\".", name.GetLast().c_str());
2300  }
2301  }
2302 }
2303 
2304 /**********************************************************/
2305 static void fta_check_long_name_in_article(const CCit_art& cit_art, bool soft_report)
2306 {
2307  if (cit_art.IsSetAuthors())
2308  fta_check_long_last_name(cit_art.GetAuthors(), soft_report);
2309 
2310  if (cit_art.IsSetFrom()) {
2311  const CCit_book* book = nullptr;
2312  if (cit_art.GetFrom().IsBook())
2313  book = &cit_art.GetFrom().GetBook();
2314  else if (cit_art.GetFrom().IsProc()) {
2315  if (cit_art.GetFrom().GetProc().IsSetBook())
2316  book = &cit_art.GetFrom().GetProc().GetBook();
2317  }
2318 
2319  if (book && book->IsSetAuthors())
2320  fta_check_long_last_name(book->GetAuthors(), soft_report);
2321  }
2322 }
2323 
2324 /**********************************************************/
2325 static void fta_check_long_names(const CPub& pub, bool soft_report)
2326 {
2327  if (pub.IsGen()) /* CitGen */
2328  {
2329  const CCit_gen& cit_gen = pub.GetGen();
2330  if (cit_gen.IsSetAuthors())
2331  fta_check_long_last_name(cit_gen.GetAuthors(), soft_report);
2332  } else if (pub.IsSub()) /* CitSub */
2333  {
2334  if (! soft_report) {
2335  const CCit_sub& cit_sub = pub.GetSub();
2336  if (cit_sub.IsSetAuthors())
2337  fta_check_long_last_name(cit_sub.GetAuthors(), soft_report);
2338  }
2339  } else if (pub.IsMedline()) /* Medline */
2340  {
2341  const CMedline_entry& medline = pub.GetMedline();
2342  if (medline.IsSetCit()) {
2343  fta_check_long_name_in_article(medline.GetCit(), soft_report);
2344  }
2345  } else if (pub.IsArticle()) /* CitArt */
2346  {
2347  fta_check_long_name_in_article(pub.GetArticle(), soft_report);
2348  } else if (pub.IsBook() || pub.IsProc() || pub.IsMan()) /* CitBook or CitProc or
2349  CitLet */
2350  {
2351  const CCit_book* book = nullptr;
2352 
2353  if (pub.IsBook())
2354  book = &pub.GetBook();
2355  else if (pub.IsProc()) {
2356  if (pub.GetProc().IsSetBook())
2357  book = &pub.GetProc().GetBook();
2358  } else {
2359  if (pub.GetMan().IsSetCit())
2360  book = &pub.GetMan().GetCit();
2361  }
2362 
2363  if (book && book->IsSetAuthors())
2364  fta_check_long_last_name(book->GetAuthors(), soft_report);
2365  } else if (pub.IsPatent()) /* CitPat */
2366  {
2367  const CCit_pat& patent = pub.GetPatent();
2368 
2369  if (patent.IsSetAuthors())
2370  fta_check_long_last_name(patent.GetAuthors(), soft_report);
2371 
2372  if (patent.IsSetApplicants())
2373  fta_check_long_last_name(patent.GetApplicants(), soft_report);
2374 
2375  if (patent.IsSetAssignees())
2376  fta_check_long_last_name(patent.GetAssignees(), soft_report);
2377  } else if (pub.IsEquiv()) /* PubEquiv */
2378  {
2379  for (const auto& cur_pub : pub.GetEquiv().Get()) {
2380  fta_check_long_names(*cur_pub, soft_report);
2381  }
2382  }
2383 }
2384 
2385 /**********************************************************/
2386 static void fta_propagate_pmid_muid(CPub_equiv& pub_equiv)
2387 {
2388  TEntrezId pmid = ZERO_ENTREZ_ID;
2389  TEntrezId muid = ZERO_ENTREZ_ID;
2390 
2391  CCit_art* cit_art = nullptr;
2392  for (auto& pub : pub_equiv.Set()) {
2393  if (pub->IsMuid() && muid == ZERO_ENTREZ_ID)
2394  muid = pub->GetMuid();
2395  else if (pub->IsPmid() && pmid == ZERO_ENTREZ_ID)
2396  pmid = pub->GetPmid().Get();
2397  else if (pub->IsArticle() && ! cit_art)
2398  cit_art = &pub->SetArticle();
2399  }
2400 
2401  if (! cit_art || (muid == ZERO_ENTREZ_ID && pmid == ZERO_ENTREZ_ID))
2402  return;
2403 
2404  if (muid != ZERO_ENTREZ_ID) {
2405  CRef<CArticleId> id(new CArticleId);
2406  id->SetMedline().Set(muid);
2407  cit_art->SetIds().Set().push_front(id);
2408  }
2409 
2410  if (pmid != ZERO_ENTREZ_ID) {
2411  CRef<CArticleId> id(new CArticleId);
2412  id->SetPubmed().Set(pmid);
2413  cit_art->SetIds().Set().push_front(id);
2414  }
2415 }
2416 
2417 /**********************************************************
2418  *
2419  * PubdescPtr DescrRefs(pp, dbp, col_data):
2420  *
2421  * Return a Pubdesc pointer.
2422  *
2423  * 4-14-93
2424  *
2425  **********************************************************/
2427 {
2428  bool soft_report = false;
2429 
2430  bool rej = false;
2431  bool no_auth = false;
2432 
2433  if (pp->mode == Parser::EMode::HTGS)
2434  soft_report = true;
2435 
2436  CRef<CPubdesc> desc;
2437 
2438  if (pp->format == Parser::EFormat::SPROT)
2439  desc = sp_refs(pp, dbp, col_data);
2440  else if (pp->format == Parser::EFormat::XML)
2441  desc = XMLRefs(pp, dbp, no_auth, rej);
2442  else if (pp->format == Parser::EFormat::GenBank)
2443  desc = gb_refs_common(pp, dbp, col_data, true, nullptr, no_auth);
2444  else if (pp->format == Parser::EFormat::EMBL)
2445  desc = embl_refs(pp, dbp, col_data, no_auth);
2446 
2447  if (desc && desc->IsSetComment()) {
2448  ShrinkSpaces(desc->SetComment());
2449  }
2450 
2451  if (no_auth) {
2452  if (pp->source == Parser::ESource::EMBL)
2453  ErrPostEx(SEV_ERROR, ERR_REFERENCE_MissingAuthors, "Reference has no author names.");
2454  else {
2455  ErrPostEx(SEV_REJECT, ERR_REFERENCE_MissingAuthors, "Reference has no author names. Entry dropped.");
2456  pp->entrylist[pp->curindx]->drop = true;
2457  }
2458  }
2459 
2460  if (rej) {
2461  ErrPostEx(SEV_REJECT, ERR_REFERENCE_InvalidMuid, "Use of Medline ID in INSDSeq format is not alowed. Entry dropped.");
2462  pp->entrylist[pp->curindx]->drop = true;
2463  }
2464 
2465  if (desc.NotEmpty() && desc->IsSetPub()) {
2466  fta_sort_pubs(desc->SetPub().Set());
2467 
2468  for (const auto& pub : desc->GetPub().Get()) {
2469  fta_check_long_names(*pub, soft_report);
2470  }
2471 
2472  fta_propagate_pmid_muid(desc->SetPub());
2473  }
2474 
2475  return desc;
2476 }
2477 
User-defined methods of the data storage class.
Data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
char * StringRStr(char *where, const char *what)
Definition: add.cpp:1438
string tata_save(string_view t)
Definition: add.cpp:148
void ShrinkSpaces(char *line)
Definition: asci_blk.cpp:118
@Affil.hpp User-defined methods of the data storage class.
Definition: Affil.hpp:56
CArticleId –.
Definition: ArticleId.hpp:66
@Auth_list.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:57
Definition: Date.hpp:53
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
CImprint –.
Definition: Imprint.hpp:66
@Name_std.hpp User-defined methods of the data storage class.
Definition: Name_std.hpp:56
Definition: Pub.hpp:56
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
C_E –.
Definition: Title_.hpp:96
char * mOffset
Definition: ftablock.h:329
size_t len
Definition: ftablock.h:330
CFlatFileData * mpData
Definition: ftablock.h:328
int mType
Definition: ftablock.h:327
@ ParFlat_RG
Definition: embl.h:66
@ ParFlat_RL
Definition: embl.h:69
@ ParFlat_RT
Definition: embl.h:68
@ ParFlat_RX
Definition: embl.h:65
@ ParFlat_RA
Definition: embl.h:67
@ ParFlat_RC
Definition: embl.h:63
#define ERR_FORMAT_MultiplePatRefs
Definition: flat2err.h:47
#define ERR_REFERENCE_Illegalreference
Definition: flat2err.h:287
#define ERR_REFERENCE_InvalidInPress
Definition: flat2err.h:290
#define ERR_REFERENCE_GsdbRefDropped
Definition: flat2err.h:299
#define ERR_REFERENCE_Fail_to_parse
Definition: flat2err.h:288
#define ERR_REFERENCE_LongAuthorName
Definition: flat2err.h:306
#define ERR_REFERENCE_NoContactInfo
Definition: flat2err.h:286
#define ERR_REFERENCE_MissingAuthors
Definition: flat2err.h:307
#define ERR_REFERENCE_UnusualBookFormat
Definition: flat2err.h:300
#define ERR_REFERENCE_Thesis
Definition: flat2err.h:284
#define ERR_REFERENCE_IllegalDate
Definition: flat2err.h:282
#define ERR_REFERENCE_InvalidMuid
Definition: flat2err.h:309
#define ParFlat_LANL_AC
#define INSDXREF_DBNAME
Definition: fta_xml.h:101
#define INSDREFERENCE_REMARK
Definition: fta_xml.h:98
unique_ptr< string > XMLGetTagValue(const char *entry, const XmlIndex *xip)
Definition: xm_index.cpp:202
#define INSDREFERENCE_PUBMED
Definition: fta_xml.h:97
#define INSDREFERENCE_AUTHORS
Definition: fta_xml.h:92
#define INSDREFERENCE_XREF
Definition: fta_xml.h:99
#define INSDXREF_ID
Definition: fta_xml.h:102
#define INSDREFERENCE_TITLE
Definition: fta_xml.h:94
#define INSDREFERENCE_JOURNAL
Definition: fta_xml.h:95
#define INSDREFERENCE_MEDLINE
Definition: fta_xml.h:96
unique_ptr< string > XMLConcatSubTags(const char *entry, const XmlIndex *xip, Int4 tag, Char sep)
Definition: xm_index.cpp:1546
#define INSDREFERENCE_CONSORTIUM
Definition: fta_xml.h:93
#define INSDREFERENCE_REFERENCE
Definition: fta_xml.h:90
unique_ptr< string > XMLFindTagValue(const char *entry, const XmlIndex *xip, Int4 tag)
Definition: xm_index.cpp:214
std::list< CRef< objects::CPub > > TPubList
Definition: ftablock.h:63
bool StringEquNI(const char *s1, const char *s2, size_t n)
Definition: ftacpp.hpp:131
bool StringEquN(const char *s1, const char *s2, size_t n)
Definition: ftacpp.hpp:121
void StringCpy(char *d, const char *s)
Definition: ftacpp.hpp:89
void MemFree(char *p)
Definition: ftacpp.hpp:55
size_t StringLen(const char *s)
Definition: ftacpp.hpp:60
char * StringRChr(char *s, const char c)
Definition: ftacpp.hpp:93
const char * months[]
Definition: ftaerr.cpp:118
CRef< CCit_art > FetchPubPmId(TEntrezId pmid)
Definition: ftamed.cpp:92
static int type
Definition: getdata.c:31
static const char * str(char *buf, int n)
Definition: stats.c:84
@ ParFlat_AUTHORS
Definition: genbank.h:67
@ ParFlat_JOURNAL
Definition: genbank.h:70
@ ParFlat_CONSRTM
Definition: genbank.h:68
@ ParFlat_REMARK
Definition: genbank.h:74
@ ParFlat_MEDLINE
Definition: genbank.h:73
@ ParFlat_TITLE
Definition: genbank.h:69
@ ParFlat_PUBMED
Definition: genbank.h:75
#define SEV_WARNING
Definition: gicache.c:90
#define SEV_ERROR
Definition: gicache.c:91
#define SEV_REJECT
Definition: gicache.c:92
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
Definition: ncbimisc.hpp:1041
#define ENTREZ_ID_FROM(T, value)
Definition: ncbimisc.hpp:1098
#define ZERO_ENTREZ_ID
Definition: ncbimisc.hpp:1102
string
Definition: cgiapp.hpp:687
#define StringStr
Definition: ncbistr.hpp:322
#define StringSave
Definition: ncbistr.hpp:326
#define ErrPostStr
Definition: ncbierr.hpp:68
#define StringChr
Definition: ncbistr.hpp:317
#define ErrPostEx(sev, err_code,...)
Definition: ncbierr.hpp:78
ErrSev
Definition: ncbierr.hpp:63
TPrim & Set(void)
Definition: serialbase.hpp:351
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
char Char
Alias for char.
Definition: ncbitype.h:93
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
Definition: ncbistr.hpp:2876
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
@ fAllowTrailingSymbols
Ignore trailing non-numerics characters.
Definition: ncbistr.hpp:298
bool IsProc(void) const
Check if variant Proc is selected.
Definition: Cit_art_.hpp:507
bool IsSetVolume(void) const
Check if a value has been assigned to Volume data member.
Definition: Imprint_.hpp:746
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_book_.hpp:347
const TPart_supi & GetPart_supi(void) const
Get the Part_supi member data.
Definition: Imprint_.hpp:1139
const TCit & GetCit(void) const
Get the Cit member data.
Definition: Cit_let_.hpp:267
void SetPages(const TPages &value)
Assign a value to Pages data member.
Definition: Imprint_.hpp:861
void SetIds(TIds &value)
Assign a value to Ids data member.
Definition: Cit_art_.cpp:258
const TBook & GetBook(void) const
Get the Book member data.
Definition: Cit_proc_.hpp:214
bool IsSetAuthors(void) const
Check if a value has been assigned to Authors data member.
Definition: Cit_gen_.hpp:623
bool IsSetAuthors(void) const
authors (ANSI requires) Check if a value has been assigned to Authors data member.
Definition: Cit_art_.hpp:534
void SetTitle(TTitle &value)
Assign a value to Title data member.
Definition: Cit_art_.cpp:210
void SetDate(TDate &value)
Assign a value to Date data member.
Definition: Cit_sub_.cpp:101
bool IsSetPrepub(void) const
Check if a value has been assigned to Prepub data member.
Definition: Imprint_.hpp:1080
const TFrom & GetFrom(void) const
Get the From member data.
Definition: Cit_art_.hpp:567
bool IsSetApplicants(void) const
Applicants Check if a value has been assigned to Applicants data member.
Definition: Cit_pat_.hpp:988
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_gen_.hpp:635
bool IsSetAssignees(void) const
Assignees Check if a value has been assigned to Assignees data member.
Definition: Cit_pat_.hpp:1009
void SetIssue(const TIssue &value)
Assign a value to Issue data member.
Definition: Imprint_.hpp:814
void SetTitle(TTitle &value)
Assign a value to Title data member.
Definition: Cit_book_.cpp:62
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_sub_.hpp:357
void SetFrom(TFrom &value)
Assign a value to From data member.
Definition: Cit_art_.cpp:248
const TIssue & GetIssue(void) const
Get the Issue member data.
Definition: Imprint_.hpp:805
void SetAffil(TAffil &value)
Assign a value to Affil data member.
Definition: Auth_list_.cpp:160
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
Definition: Cit_art_.hpp:555
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
Definition: Cit_art_.cpp:227
void SetSerial_number(TSerial_number value)
Assign a value to Serial_number data member.
Definition: Cit_gen_.hpp:902
bool IsSetAuthors(void) const
not necessarily authors of the paper Check if a value has been assigned to Authors data member.
Definition: Cit_sub_.hpp:345
void ResetPart_supi(void)
Reset Part_supi data member.
Definition: Imprint_.cpp:142
TPrepub GetPrepub(void) const
Get the Prepub member data.
Definition: Imprint_.hpp:1099
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
Definition: Cit_sub_.cpp:74
void ResetPrepub(void)
Reset Prepub data member.
Definition: Imprint_.hpp:1092
const TProc & GetProc(void) const
Get the variant data.
Definition: Cit_art_.cpp:155
void SetImp(TImp &value)
Assign a value to Imp data member.
Definition: Cit_book_.cpp:107
TStr & SetStr(void)
Select the variant.
Definition: Affil_.hpp:1200
bool IsSetNames(void) const
Check if a value has been assigned to Names data member.
Definition: Auth_list_.hpp:464
bool IsSetAuthors(void) const
author/inventor Check if a value has been assigned to Authors data member.
Definition: Cit_pat_.hpp:703
void SetVolume(const TVolume &value)
Assign a value to Volume data member.
Definition: Imprint_.hpp:767
void SetNames(TNames &value)
Assign a value to Names data member.
Definition: Auth_list_.cpp:149
bool IsSetIssue(void) const
Check if a value has been assigned to Issue data member.
Definition: Imprint_.hpp:793
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
Definition: Cit_book_.cpp:93
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_pat_.hpp:715
void SetDate(TDate &value)
Assign a value to Date data member.
Definition: Imprint_.cpp:73
const TApplicants & GetApplicants(void) const
Get the Applicants member data.
Definition: Cit_pat_.hpp:1000
bool IsSetDate(void) const
date of publication Check if a value has been assigned to Date data member.
Definition: Imprint_.hpp:716
EPrepub
for prepublication citations
Definition: Imprint_.hpp:94
bool IsBook(void) const
Check if variant Book is selected.
Definition: Cit_art_.hpp:501
void SetPubstatus(TPubstatus value)
Assign a value to Pubstatus data member.
Definition: Imprint_.hpp:1223
bool IsSetAuthors(void) const
authors Check if a value has been assigned to Authors data member.
Definition: Cit_book_.hpp:335
void SetMedium(TMedium value)
Assign a value to Medium data member.
Definition: Cit_sub_.hpp:424
bool IsSetBook(void) const
citation to meeting Check if a value has been assigned to Book data member.
Definition: Cit_proc_.hpp:202
const TNames & GetNames(void) const
Get the Names member data.
Definition: Auth_list_.hpp:478
bool IsSetCit(void) const
same fields as a book Check if a value has been assigned to Cit data member.
Definition: Cit_let_.hpp:255
const TStd & GetStd(void) const
Get the variant data.
Definition: Auth_list_.hpp:410
void SetPart_sup(const TPart_sup &value)
Assign a value to Part_sup data member.
Definition: Imprint_.hpp:997
EMedium
medium of submission
Definition: Cit_sub_.hpp:95
void SetPrepub(TPrepub value)
Assign a value to Prepub data member.
Definition: Imprint_.hpp:1108
bool IsSetPart_supi(void) const
part/sup on issue Check if a value has been assigned to Part_supi data member.
Definition: Imprint_.hpp:1127
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_art_.hpp:546
const TAssignees & GetAssignees(void) const
Get the Assignees member data.
Definition: Cit_pat_.hpp:1021
const TImp & GetImp(void) const
Get the Imp member data.
Definition: Cit_book_.hpp:377
bool IsSetPages(void) const
Check if a value has been assigned to Pages data member.
Definition: Imprint_.hpp:840
void SetPart_supi(const TPart_supi &value)
Assign a value to Part_supi data member.
Definition: Imprint_.hpp:1148
bool IsStd(void) const
Check if variant Std is selected.
Definition: Auth_list_.hpp:404
const TBook & GetBook(void) const
Get the variant data.
Definition: Cit_art_.cpp:133
@ ePubStatus_aheadofprint
epublish, but will be followed by print
Definition: PubStatus_.hpp:75
@ ePubStatus_epublish
published electronically by publisher
Definition: PubStatus_.hpp:68
@ ePrepub_in_press
accepted, not published
Definition: Imprint_.hpp:96
@ ePrepub_submitted
submitted, not accepted
Definition: Imprint_.hpp:95
void SetYear(TYear value)
Assign a value to Year data member.
Definition: Date_std_.hpp:435
void SetMonth(TMonth value)
Assign a value to Month data member.
Definition: Date_std_.hpp:482
TStd & SetStd(void)
Select the variant.
Definition: Date_.cpp:115
void SetDay(TDay value)
Assign a value to Day data member.
Definition: Date_std_.hpp:529
bool IsSetLast(void) const
Check if a value has been assigned to Last data member.
Definition: Name_std_.hpp:410
const TLast & GetLast(void) const
Get the Last member data.
Definition: Name_std_.hpp:422
const TStd & GetStd(void) const
Get the variant data.
Definition: Date_.cpp:109
bool IsSetCit(void) const
article citation Check if a value has been assigned to Cit data member.
const TCit & GetCit(void) const
Get the Cit member data.
bool IsMedline(void) const
Check if variant Medline is selected.
Definition: Pub_.hpp:596
TPmid & SetPmid(void)
Select the variant.
Definition: Pub_.hpp:690
bool IsBook(void) const
Check if variant Book is selected.
Definition: Pub_.hpp:641
const TMedline & GetMedline(void) const
Get the variant data.
Definition: Pub_.cpp:211
const TMan & GetMan(void) const
Get the variant data.
Definition: Pub_.cpp:365
TMuid & SetMuid(void)
Select the variant.
Definition: Pub_.hpp:615
TBook & SetBook(void)
Select the variant.
Definition: Pub_.cpp:283
Tdata & Set(void)
Assign a value to data member.
Definition: Pub_equiv_.hpp:171
const TArticle & GetArticle(void) const
Get the variant data.
Definition: Pub_.cpp:233
const TSub & GetSub(void) const
Get the variant data.
Definition: Pub_.cpp:189
const TPatent & GetPatent(void) const
Get the variant data.
Definition: Pub_.cpp:321
const Tdata & Get(void) const
Get the member data.
Definition: Pub_equiv_.hpp:165
const TProc & GetProc(void) const
Get the variant data.
Definition: Pub_.cpp:299
const TEquiv & GetEquiv(void) const
Get the variant data.
Definition: Pub_.cpp:387
TMan & SetMan(void)
Select the variant.
Definition: Pub_.cpp:371
bool IsEquiv(void) const
Check if variant Equiv is selected.
Definition: Pub_.hpp:671
bool IsProc(void) const
Check if variant Proc is selected.
Definition: Pub_.hpp:647
TSub & SetSub(void)
Select the variant.
Definition: Pub_.cpp:195
bool IsSub(void) const
Check if variant Sub is selected.
Definition: Pub_.hpp:590
TGen & SetGen(void)
Select the variant.
Definition: Pub_.cpp:173
const TGen & GetGen(void) const
Get the variant data.
Definition: Pub_.cpp:167
TPatent & SetPatent(void)
Select the variant.
Definition: Pub_.cpp:327
bool IsPatent(void) const
Check if variant Patent is selected.
Definition: Pub_.hpp:653
bool IsArticle(void) const
Check if variant Article is selected.
Definition: Pub_.hpp:629
TArticle & SetArticle(void)
Select the variant.
Definition: Pub_.cpp:239
bool IsGen(void) const
Check if variant Gen is selected.
Definition: Pub_.hpp:584
const TBook & GetBook(void) const
Get the variant data.
Definition: Pub_.cpp:277
bool IsMan(void) const
Check if variant Man is selected.
Definition: Pub_.hpp:665
void SetQual(const TQual &value)
Assign a value to Qual data member.
Definition: Gb_qual_.hpp:221
bool IsSetQual(void) const
Check if a value has been assigned to Qual data member.
Definition: Gb_qual_.hpp:200
void SetVal(const TVal &value)
Assign a value to Val data member.
Definition: Gb_qual_.hpp:268
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Gb_qual_.hpp:212
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_Tpe
Third Party Annot/Seq EMBL.
Definition: Seq_id_.hpp:111
@ e_Tpd
Third Party Annot/Seq DDBJ.
Definition: Seq_id_.hpp:112
@ e_Ddbj
DDBJ.
Definition: Seq_id_.hpp:107
@ e_Tpg
Third Party Annot/Seq Genbank.
Definition: Seq_id_.hpp:110
@ eReftype_no_target
nothing specified (EMBL)
Definition: Pubdesc_.hpp:95
void ind_subdbp(DataBlkPtr dbp, DataBlkPtr ind[], int maxkw, Parser::EFormat bank)
Definition: ind.cpp:122
@ ParFlat_REF_NO_TARGET
Definition: index.h:63
Int4 IsNewAccessFormat(const Char *acnum)
Definition: indx_blk.cpp:991
char * buf
int i
yy_size_t n
int len
const char * tag
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isspace(Uchar c)
Definition: ncbictype.hpp:69
int isalnum(Uchar c)
Definition: ncbictype.hpp:62
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
#define nullptr
Definition: ncbimisc.hpp:45
static Format format
Definition: njn_ioutil.cpp:53
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
User-defined methods of the data storage class.
static BOOL number
Definition: pcregrep.c:193
static void XMLGetXrefs(char *entry, XmlIndexPtr xip, TQualVector &quals)
Definition: ref.cpp:1699
static CRef< CPub > get_muid(char *str, Parser::EFormat format)
Definition: ref.cpp:232
USING_SCOPE(objects)
static CRef< CPubdesc > gb_refs_common(ParserPtr pp, DataBlkPtr dbp, Uint2 col_data, bool bParser, DataBlkPtr **ppInd, bool &no_auth)
Definition: ref.cpp:1949
static char * get_embl_str_pub_id(char *str, const Char *tag)
Definition: ref.cpp:263
Int4 fta_remark_is_er(const string &str)
Definition: ref.cpp:1751
static const char * ERRemarks[]
Definition: ref.cpp:103
static CRef< CCit_art > get_book(char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, CImprint::EPrepub pre, Parser::EFormat format, char *jour)
Definition: ref.cpp:971
static void fta_check_long_name_in_article(const CCit_art &cit_art, bool soft_report)
Definition: ref.cpp:2305
static CRef< CPub > get_num(char *str)
Definition: ref.cpp:222
#define MAXKW
Definition: ref.cpp:86
static void fta_check_long_last_name(const CAuth_list &authors, bool soft_report)
Definition: ref.cpp:2274
static string clean_up(const char *str)
Definition: ref.cpp:202
static void fta_propagate_pmid_muid(CPub_equiv &pub_equiv)
Definition: ref.cpp:2386
static CRef< CCit_let > get_thesis(char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, CImprint::EPrepub pre)
Definition: ref.cpp:1137
static TEntrezId get_embl_pmid(char *str)
Definition: ref.cpp:286
static void fta_sort_pubs(TPubList &pubs)
Definition: ref.cpp:2255
static CRef< CPubdesc > XMLRefs(ParserPtr pp, DataBlkPtr dbp, bool &no_auth, bool &rej)
Definition: ref.cpp:1772
static CRef< CCit_pat > get_pat(ParserPtr pp, char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, char *eptr)
Definition: ref.cpp:373
static const char * strip_sub_str[]
Definition: ref.cpp:92
static CRef< CCit_sub > get_sub_gsdb(char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, ParserPtr pp)
Definition: ref.cpp:1356
static void fta_add_article_ids(CPub &pub, const string &doi, const string &agricola)
Definition: ref.cpp:1725
static CRef< CCit_sub > get_sub(ParserPtr pp, char *bptr, CRef< CAuth_list > &auth_list)
Definition: ref.cpp:1255
static CRef< CCit_book > get_whole_book(char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, CImprint::EPrepub pre)
Definition: ref.cpp:1195
CRef< CPub > journal(ParserPtr pp, char *bptr, char *eptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, bool has_muid, CRef< CCit_art > &cit_art, Int4 er)
Definition: ref.cpp:1457
static void fta_check_long_names(const CPub &pub, bool soft_report)
Definition: ref.cpp:2325
static void normalize_comment(string &comment)
Definition: ref.cpp:128
static CRef< CCit_gen > fta_get_citgen(char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title)
Definition: ref.cpp:1407
static void fta_get_part_sup(char *parts, CImprint &imp)
Definition: ref.cpp:537
static CRef< CCit_art > get_art(ParserPtr pp, char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, CImprint::EPrepub pre, bool has_muid, bool *all_zeros, Int4 er)
Definition: ref.cpp:692
static char * ExtractErratum(char *comm)
Definition: ref.cpp:1659
static char * FindSemicolon(char *str)
Definition: ref.cpp:1644
CRef< CPubdesc > DescrRefs(ParserPtr pp, DataBlkPtr dbp, Uint2 col_data)
Definition: ref.cpp:2426
static CRef< CPubdesc > embl_refs(ParserPtr pp, DataBlkPtr dbp, Uint2 col_data, bool &no_auth)
Definition: ref.cpp:2106
static char * check_book_tit(char *title)
Definition: ref.cpp:314
static bool get_parts(char *bptr, char *eptr, CImprint &imp)
Definition: ref.cpp:592
static CRef< CDate > get_lanl_date(char *s)
Definition: ref.cpp:160
static CRef< CCit_gen > get_unpub(char *bptr, char *eptr, CRef< CAuth_list > &auth_list, const Char *title)
Definition: ref.cpp:930
static char * FindBackSemicolon(char *pchStart, char *pchCurrent)
Definition: ref.cpp:1627
ERefRetType
Definition: ref.h:37
@ ParFlat_PATENT_CITATION
Definition: ref.h:47
@ ParFlat_ONLINE_CITATION
Definition: ref.h:50
@ ParFlat_MONOGRAPH_NOT_JOURNAL
Definition: ref.h:40
@ ParFlat_THESIS_CITATION
Definition: ref.h:44
@ ParFlat_BOOK_CITATION
Definition: ref.h:48
@ ParFlat_IN_PRESS
Definition: ref.h:46
@ ParFlat_UNPUB_JOURNAL
Definition: ref.h:39
@ ParFlat_GEN_CITATION
Definition: ref.h:49
@ ParFlat_MISSING_JOURNAL
Definition: ref.h:38
@ ParFlat_SUBMITTED
Definition: ref.h:43
CRef< objects::CPubdesc > sp_refs(ParserPtr pp, DataBlkPtr dbp, Uint2 col_data)
Definition: sp_ref.cpp:1279
CRef< objects::CPatent_seq_id > psip
Definition: ftablock.h:190
bool is_pat
Definition: ftablock.h:202
vector< IndexblkPtr > entrylist
XmlIndex * next
Definition: ftablock.h:158
XmlIndex * subtags
Definition: ftablock.h:157
Int4 tag
Definition: ftablock.h:150
Definition: type.c:6
CRef< CDate_std > get_full_date(const char *s, bool is_ref, Parser::ESource source)
Definition: utilfun.cpp:828
void fta_StringCpy(char *dst, const char *src)
Definition: utilfun.cpp:1497
Char * StringIStr(const Char *where, const Char *what)
Definition: utilfun.cpp:591
void get_auth_consortium(char *cons, CRef< CAuth_list > &auths)
Definition: utilref.cpp:292
void get_auth(char *pt, ERefFormat format, char *jour, CRef< CAuth_list > &auths)
Definition: utilref.cpp:253
CRef< CDate > get_date(const Char *year)
Definition: utilref.cpp:503
Int4 valid_pages_range(char *pages, const Char *title, Int4 er, bool inpress)
Definition: utilref.cpp:419
CRef< CCit_gen > get_error(char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title)
Definition: utilref.cpp:541
ERefFormat
Definition: utilref.h:37
@ SP_REF
Definition: utilref.h:40
@ EMBL_REF
Definition: utilref.h:39
@ GB_REF
Definition: utilref.h:38
std::vector< CRef< objects::CGb_qual > > TQualVector
Definition: xgbfeat.h:12
int XDateCheck(const CDate_std &date)
Definition: xutils.cpp:113
Modified on Fri May 24 14:53:09 2024 by modify_doxy.py rev. 669887