NCBI C++ ToolKit
ref.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: ref.cpp 99558 2023-04-18 14:24:19Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * File Name: ref.cpp
27  *
28  * Author: Karl Sirotkin, Hsiu-Chuan Chen
29  *
30  * File Description:
31  *
32  */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include "ftacpp.hpp"
37 
41 #include <objects/biblio/Affil.hpp>
42 #include <objects/seq/Pubdesc.hpp>
44 #include <objects/pub/Pub.hpp>
61 
62 #include "index.h"
63 #include "genbank.h"
64 #include "embl.h"
65 
67 #include "ftamed.h"
68 
69 #include "ftaerr.hpp"
70 #include "indx_blk.h"
71 #include "utilref.h"
72 #include "asci_blk.h"
73 #include "add.h"
74 #include "utilfun.h"
75 #include "ind.hpp"
76 #include "ref.h"
77 #include "xgbfeat.h"
78 #include "xutils.h"
79 #include "fta_xml.h"
80 
81 #ifdef THIS_FILE
82 # undef THIS_FILE
83 #endif
84 #define THIS_FILE "ref.cpp"
85 
86 #define MAXKW 38
87 
88 
91 
92 static const char* strip_sub_str[] = {
93  "to the EMBL/GenBank/DDBJ databases",
94  "to the EMBL/DDBJ/GenBank databases",
95  "to the DDBJ/GenBank/EMBL databases",
96  "to the DDBJ/EMBL/GenBank databases",
97  "to the GenBank/DDBJ/EMBL databases",
98  "to the GenBank/EMBL/DDBJ databases",
99  "to the INSDC",
100  nullptr
101 };
102 
103 static const char* ERRemarks[] = {
104  "Publication Status: Online-Only", /* 1 */
105  "Publication Status : Online-Only", /* 2 */
106  "Publication_Status: Online-Only", /* 3 */
107  "Publication_Status : Online-Only", /* 4 */
108  "Publication-Status: Online-Only", /* 5 */
109  "Publication-Status : Online-Only", /* 6 */
110  "Publication Status: Available-Online", /* 7 */
111  "Publication Status : Available-Online", /* 8 */
112  "Publication_Status: Available-Online", /* 9 */
113  "Publication_Status : Available-Online", /* 10 */
114  "Publication-Status: Available-Online", /* 11 */
115  "Publication-Status : Available-Online", /* 12 */
116  "Publication Status: Available-Online prior to print", /* 13 */
117  "Publication Status : Available-Online prior to print", /* 14 */
118  "Publication_Status: Available-Online prior to print", /* 15 */
119  "Publication_Status : Available-Online prior to print", /* 16 */
120  "Publication-Status: Available-Online prior to print", /* 17 */
121  "Publication-Status : Available-Online prior to print", /* 18 */
122  nullptr
123 };
124 
125 /**********************************************************/
126 static void normalize_comment(string& comment)
127 {
128  string new_comment = comment;
129  char * q, *r;
130 
131  for (r = (char*)new_comment.c_str();;) {
132  r = strstr(r, "; ");
133  if (! r)
134  break;
135  for (r += 2, q = r; *q == ' ' || *q == ';';)
136  q++;
137  if (q > r)
138  fta_StringCpy(r, q);
139  }
140 
141  comment = new_comment;
142 }
143 
144 /**********************************************************
145  *
146  * static DatePtr get_lanl_date(s):
147  *
148  * Get year, month, day and return NCBI_DatePtr.
149  * Temporary used for lanl form of date that
150  * is (JUL 21 1993).
151  *
152  * 01-4-94
153  *
154  **********************************************************/
155 static CRef<CDate> get_lanl_date(char* s)
156 {
157  int day = 0;
158  int month = 0;
159  int year;
160  int cal;
161 
162  const char* months[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
163 
164  CRef<CDate> date(new CDate);
165  for (cal = 0; cal < 12; cal++) {
166  if (StringEquNI(s + 1, months[cal], 3)) {
167  month = cal + 1;
168  break;
169  }
170  }
171  day = atoi(s + 5);
172  year = atoi(s + 8);
173  if (year < 1900 || year > 1994) {
174  ErrPostEx(SEV_WARNING, ERR_REFERENCE_IllegalDate, "Illegal year: %d", year);
175  }
176 
177  date->SetStd().SetYear(year);
178  date->SetStd().SetMonth(month);
179  date->SetStd().SetDay(day);
180 
181  if (XDateCheck(date->GetStd()) != 0) {
182  ErrPostEx(SEV_WARNING, ERR_REFERENCE_IllegalDate, "Illegal date: %s", s);
183  date.Reset();
184  }
185 
186  return (date);
187 }
188 
189 /**********************************************************
190  *
191  * static char* clean_up(str):
192  *
193  * Deletes front and tail double or single quotes
194  * if any.
195  *
196  **********************************************************/
197 static char* clean_up(char* str)
198 {
199  char* newp;
200  char* s;
201 
202  if (! str)
203  return nullptr;
204 
205  s = str + StringLen(str) - 1;
206  if (*s == ';')
207  *s = '\0';
208 
209  while (*str == '\"' || *str == '\'')
210  str++;
211 
212  newp = strdup(str);
213  size_t size = StringLen(newp);
214  while (size > 0 && (newp[size - 1] == '\"' || newp[size - 1] == '\'')) {
215  size--;
216  newp[size] = '\0';
217  }
218 
219  return (newp);
220 }
221 
222 static CRef<CPub> get_num(char* str)
223 {
225 
226  CRef<CPub> ret(new CPub);
227  ret->SetGen().SetSerial_number(serial_num);
228 
229  return ret;
230 }
231 
233 {
234  char* p;
235  Int4 i;
236 
237  CRef<CPub> muid;
238 
239  if (! str)
240  return muid;
241 
243  p = str;
244  else if (format == Parser::EFormat::EMBL) {
245  p = StringIStr(str, "MEDLINE;");
246  if (! p)
247  return muid;
248  for (p += 8; *p == ' ';)
249  p++;
250  } else
251  return muid;
252 
254  if (i < 1)
255  return muid;
256 
257  muid.Reset(new CPub);
258  muid->SetMuid(ENTREZ_ID_FROM(int, i));
259  return muid;
260 }
261 
262 /**********************************************************/
263 static char* get_embl_str_pub_id(char* str, const Char* tag)
264 {
265  char* p;
266  char* q;
267  char* ret;
268  Char ch;
269 
270  if (! str || ! tag)
271  return nullptr;
272 
273  p = StringIStr(str, tag);
274  if (! p)
275  return nullptr;
276  for (p += StringLen(tag); *p == ' ';)
277  p++;
278 
279  ret = nullptr;
280  for (q = p; *q != ' ' && *q != '\0';)
281  q++;
282  q--;
283  if (*q != '.')
284  q++;
285  ch = *q;
286  *q = '\0';
287  ret = StringSave(p);
288  *q = ch;
289  return (ret);
290 }
291 
292 /**********************************************************/
294 {
295  char* p;
296  long i;
297 
298  if (! str)
299  return ZERO_ENTREZ_ID;
300 
301  p = StringIStr(str, "PUBMED;");
302  if (! p)
303  return ZERO_ENTREZ_ID;
304  for (p += 7; *p == ' ';)
305  p++;
306  i = atol(p);
307  if (i <= 0)
308  return ZERO_ENTREZ_ID;
309  return ENTREZ_ID_FROM(long, i);
310 }
311 
312 /**********************************************************
313  *
314  * static char* check_book_tit(title):
315  *
316  * Get volume from book title.
317  *
318  * 12-4-93
319  *
320  **********************************************************/
321 static char* check_book_tit(char* title)
322 {
323  char* p;
324  char* q;
325  char* r;
326 
327  p = StringRStr(title, "Vol");
328  if (! p)
329  return nullptr;
330 
331  if (p[3] == '.')
332  q = p + 4;
333  else if (StringEquN(p + 3, "ume", 3))
334  q = p + 6;
335  else
336  return nullptr;
337 
338  while (*q == ' ' || *q == '\t')
339  q++;
340  for (r = q; *r >= '0' && *r <= '9';)
341  r++;
342 
343  if (r == q || *r != '\0')
344  return nullptr;
345 
346  if (p > title) {
347  p--;
348  if (*p != ' ' && *p != '\t' && *p != ',' && *p != ';' && *p != '.')
349  return nullptr;
350 
351  while (*p == ' ' || *p == '\t' || *p == ',' || *p == ';' || *p == '.') {
352  if (p == title)
353  break;
354  p--;
355  }
356  if (*p != ' ' && *p != '\t' && *p != ',' && *p != ';' && *p != '.')
357  p++;
358  }
359  *p = '\0';
360 
361  return (q);
362 }
363 
364 /**********************************************************
365  *
366  * static CitPatPtr get_pat(pp, bptr, auth, title, eptr):
367  *
368  * Return a CitPat pointer for patent ref in ncbi or
369  * embl or ddbj.
370  * Leading "I" or "AR" for NCBI or "A" for EMBL or
371  * "E" for DDBJ in accesion number requiered
372  *
373  * JOURNAL Patent: US 4446235-A 6 01-MAY-1984;
374  * or
375  * RL Patent number US4446235-A/6, 01-MAY-1984.
376  *
377  * 11-14-93
378  *
379  **********************************************************/
380 static CRef<CCit_pat> get_pat(ParserPtr pp, char* bptr, CRef<CAuth_list>& auth_list, CRef<CTitle::C_E>& title, char* eptr)
381 {
382  IndexblkPtr ibp;
383 
384  CRef<CCit_pat> cit_pat;
385 
386  char* country;
387  char* number;
388  char* type;
389  char* app;
390  char* s;
391  char* p;
392  char* q;
393  char* temp;
394 
395  ErrSev sev;
396  Char ch;
397 
398  ibp = pp->entrylist[pp->curindx];
399 
400  temp = StringSave(bptr);
401 
402  ch = (pp->format == Parser::EFormat::EMBL) ? '.' : ';';
403  p = StringChr(temp, ch);
404  if (p)
405  *p = '\0';
406 
407  p = StringChr(bptr, ch);
408  if (p)
409  *p = '\0';
410 
411  if (ibp->is_pat && ibp->psip.NotEmpty()) {
412  ErrPostStr(SEV_ERROR, ERR_FORMAT_MultiplePatRefs, "Too many patent references for patent sequence; ignoring all but the first.");
413  }
414 
415  if (pp->source == Parser::ESource::USPTO)
416  s = bptr;
417  else {
418  q = (pp->format == Parser::EFormat::EMBL) ? (char*)"Patent number" : (char*)"Patent:";
419  size_t len = StringLen(q);
420  if (! StringEquNI(q, bptr, len)) {
421  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Illegal format: \"%s\"", temp);
422  MemFree(temp);
423  return cit_pat;
424  }
425 
426  for (s = bptr + len; *s == ' ';)
427  s++;
428  }
429 
430  for (country = s, q = s; isalpha((int)*s) || *s == ' '; s++)
431  if (*s != ' ')
432  q = s;
433  if (country == q) {
434  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No Patent Document Country: \"%s\"", temp);
435  MemFree(temp);
436  return cit_pat;
437  }
438  s = q + 1;
439 
440  if (pp->format != Parser::EFormat::EMBL &&
442  *s++ = '\0';
443  while (*s == ' ')
444  s++;
445  for (number = s, q = s; isdigit((int)*s) != 0 || *s == ','; s++)
446  if (*s != ',')
447  *q++ = *s;
448 
449  if (number == s) {
450  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No Patent Document Number: \"%s\"", temp);
451  MemFree(temp);
452  return cit_pat;
453  }
454 
455  if (q != s)
456  *q = '\0';
457 
458  if (*s == '-') {
459  *s++ = '\0';
460  for (type = s; *s != ' ' && *s != '/' && *s != '\0';)
461  s++;
462  if (type == s)
463  type = nullptr;
464  } else
465  type = nullptr;
466  if (*s != '\0')
467  *s++ = '\0';
468 
469  if (! type) {
470  sev = (ibp->is_pat ? SEV_ERROR : SEV_WARNING);
471  ErrPostEx(sev, ERR_REFERENCE_Fail_to_parse, "No Patent Document Type: \"%s\"", temp);
472  }
473 
474  for (app = s, q = s; *s >= '0' && *s <= '9';)
475  s++;
476  if (*s != '\0' && *s != ',' && *s != '.' && *s != ' ' && *s != ';' &&
477  *s != '\n') {
478  sev = (ibp->is_pat ? SEV_ERROR : SEV_WARNING);
479  ErrPostEx(sev, ERR_REFERENCE_Fail_to_parse, "No number of sequence in patent: \"%s\"", temp);
480  app = nullptr;
481  s = q;
482  } else if (*s != '\0')
483  for (*s++ = '\0'; *s == ' ';)
484  s++;
485 
486  CRef<CDate_std> std_date;
487  if (*s != '\0') {
488  std_date = get_full_date(s, true, pp->source);
489  }
490 
491  if (std_date.Empty()) {
492  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Illegal format: \"%s\"", temp);
493  MemFree(temp);
494  return cit_pat;
495  }
496 
497  if (p)
498  *p = ch;
499 
500  string msg = NStr::Sanitize(number);
501  if (pp->format == Parser::EFormat::EMBL ||
503  *number = '\0';
504 
505  cit_pat.Reset(new CCit_pat);
506 
507  cit_pat->SetCountry(country);
508  cit_pat->SetNumber(msg);
509 
510  cit_pat->SetDoc_type(type ? type : "");
511  cit_pat->SetDate_issue().SetStd(*std_date);
512  cit_pat->SetTitle(title.Empty() ? "" : title->GetName());
513 
514  if (auth_list.Empty() || ! auth_list->IsSetNames()) {
515  CAuth_list& pat_auth_list = cit_pat->SetAuthors();
516  pat_auth_list.SetNames().SetStr().push_back("");
517  } else
518  cit_pat->SetAuthors(*auth_list);
519 
520  if (auth_list.NotEmpty()) {
521  CAffil& affil = auth_list->SetAffil();
522 
523  s += 13;
524  if (s < eptr && *s != '\0')
525  affil.SetStr(s);
526  else
527  affil.SetStr("");
528  }
529 
530  if (ibp->is_pat && ibp->psip.Empty()) {
531  ibp->psip = new CPatent_seq_id;
532  ibp->psip->SetCit().SetCountry(country);
533  ibp->psip->SetCit().SetId().SetNumber(msg);
534  ibp->psip->SetSeqid(app ? atoi(app) : 0);
535  if (type)
536  ibp->psip->SetCit().SetDoc_type(type);
537  }
538 
539  MemFree(temp);
540  return cit_pat;
541 }
542 
543 /**********************************************************/
544 static void fta_get_part_sup(char* parts, CImprint& imp)
545 {
546  char* start;
547  char* end;
548  char* p;
549  char* q;
550  Char ch;
551  Int4 i;
552  Int4 j;
553 
554  if (! parts || *parts == '\0')
555  return;
556 
557  for (p = parts, i = 0, j = 0; *p != '\0'; p++) {
558  if (*p == '(')
559  i++;
560  else if (*p == ')')
561  j++;
562 
563  if (j > i || i - j > 1)
564  break;
565  }
566 
567  if (*p != '\0' || i < 2)
568  return;
569 
570  start = StringChr(parts, '(');
571  end = StringChr(start + 1, ')');
572 
573  for (p = start + 1; *p == ' ';)
574  p++;
575  if (p == end)
576  return;
577 
578  for (q = end - 1; *q == ' ' && q > p;)
579  q--;
580  if (*q != ' ')
581  q++;
582 
583  ch = *q;
584  *q = '\0';
585 
586  imp.SetPart_sup(p);
587  *q = ch;
588 
589  fta_StringCpy(start, end + 1);
590 }
591 
592 /**********************************************************
593  *
594  * static bool get_parts(bptr, eptr, imp):
595  *
596  * Return a PARTS from medart2asn.c.
597  *
598  **********************************************************/
599 static bool get_parts(char* bptr, char* eptr, CImprint& imp)
600 {
601  char* parts;
602  char* p;
603  char* q;
604  Char ch;
605  Int4 bad;
606 
607  if (! bptr || ! eptr)
608  return false;
609 
610  ch = *eptr;
611  *eptr = '\0';
612  parts = StringSave(bptr);
613  *eptr = ch;
614 
615  for (p = parts; *p != '\0'; p++)
616  if (*p == '\t')
617  *p = ' ';
618 
619  fta_get_part_sup(parts, imp);
620 
621  bad = 0;
622  q = StringChr(parts, '(');
623  p = StringChr(parts, ')');
624 
625  if (p && q) {
626  if (p < q || StringChr(p + 1, ')') || StringChr(q + 1, '('))
627  bad = 1;
628  } else if (p || q)
629  bad = 1;
630 
631  if (bad != 0) {
632  MemFree(parts);
633  return false;
634  }
635 
636  if (q) {
637  *q++ = '\0';
638  *p = '\0';
639 
640  for (p = q; *p == ' ';)
641  p++;
642  for (q = p; *q != '\0' && *q != ' ';)
643  q++;
644  if (*q != '\0')
645  *q++ = '\0';
646  if (q > p)
647  imp.SetIssue(p);
648  for (p = q; *p == ' ';)
649  p++;
650  for (q = p; *q != '\0';)
651  q++;
652  if (q > p) {
653  for (q--; *q == ' ';)
654  q--;
655  *++q = '\0';
656 
657  string supi(" ");
658  supi += p;
659  imp.SetPart_supi(supi);
660  }
661 
662  const Char* issue_str = imp.IsSetIssue() ? imp.GetIssue().c_str() : nullptr;
663  if (imp.IsSetPart_supi() && issue_str &&
664  (issue_str[0] == 'P' || issue_str[0] == 'p') && (issue_str[1] == 'T' || issue_str[1] == 't') &&
665  issue_str[2] == '\0') {
666  string& issue = imp.SetIssue();
667  issue += imp.GetPart_supi();
668  imp.ResetPart_supi();
669  }
670  }
671 
672  for (p = parts; *p == ' ';)
673  p++;
674  for (q = p; *q != '\0' && *q != ' ';)
675  q++;
676  if (*q != '\0')
677  *q++ = '\0';
678  if (q > p)
679  imp.SetVolume(p);
680  for (p = q; *p == ' ';)
681  p++;
682  for (q = p; *q != '\0';)
683  q++;
684  if (q > p) {
685  for (q--; *q == ' ';)
686  q--;
687  *++q = '\0';
688  imp.SetPart_sup(p);
689  }
690 
691  MemFree(parts);
692  return true;
693 }
694 
695 /**********************************************************
696  *
697  * static CitArtPtr get_art(pp, bptr, auth, title, pre,
698  * has_muid, all_zeros, er):
699  *
700  * Return a CitArt pointer for GENBANK or EMBL mode.
701  *
702  **********************************************************/
703 static CRef<CCit_art> get_art(ParserPtr pp, char* bptr, CRef<CAuth_list>& auth_list, CRef<CTitle::C_E>& title, CImprint::EPrepub pre, bool has_muid, bool* all_zeros, Int4 er)
704 {
705  char* eptr;
706  char* end_tit;
707  char* s;
708  char* ss;
709  char* end_volume;
710  char* end_pages;
711  char* tit = nullptr;
712  char* volume = nullptr;
713  char* pages = nullptr;
714  char* year;
715  Char symbol;
716 
717  Int4 i;
718  Int4 is_er;
719 
720  *all_zeros = false;
721 
722  is_er = 0;
723  if (er > 0)
724  is_er |= 01; /* based on REMARKs */
725  if (StringEquN(bptr, "(er)", 4))
726  is_er |= 02;
727 
728  CRef<CCit_art> cit_art;
729 
730  if (pp->format == Parser::EFormat::GenBank)
731  symbol = ',';
732  else if (pp->format == Parser::EFormat::EMBL)
733  symbol = ':';
734  else if (pp->format == Parser::EFormat::XML) {
735  if (pp->source == Parser::ESource::EMBL)
736  symbol = ':';
737  else
738  symbol = ',';
739  } else
740  return cit_art;
741 
742  end_volume = nullptr;
743 
744  size_t len = StringLen(bptr);
745  unique_ptr<char[]> pBuf(new char[len + 1]);
746  char* buf = pBuf.get();
747  StringCpy(buf, bptr);
748  eptr = buf + len - 1;
749  while (eptr > buf && (*eptr == ' ' || *eptr == '\t' || *eptr == '.'))
750  *eptr-- = '\0';
751  if (*eptr != ')') {
752  return cit_art;
753  }
754  for (s = eptr - 1; s > buf && *s != '(';)
755  s--;
756  if (*s != '(') {
757  return cit_art;
758  }
759 
760  year = s + 1;
761  for (s--; s >= buf && isspace((int)*s) != 0;)
762  s--;
763  if (s < buf)
764  s = buf;
765  end_pages = s + 1;
766  if (buf[0] == 'G' && buf[1] == '3')
767  ss = buf + 2;
768  else
769  ss = buf;
770  for (i = 0; ss <= year; ss++) {
771  if (*ss == '(')
772  i++;
773  else if (*ss == ')')
774  i--;
775  else if (*ss >= '0' && *ss <= '9' && i == 0)
776  break;
777  }
778 
779  for (s = end_pages; s >= buf && *s != symbol;)
780  s--;
781  if (s < buf)
782  s = buf;
783  if (*s != symbol) {
784  /* try delimiter from other format
785  */
786  if (pp->format == Parser::EFormat::GenBank)
787  symbol = ':';
788  else if (pp->format == Parser::EFormat::EMBL)
789  symbol = ',';
790  else if (pp->format == Parser::EFormat::XML) {
791  if (pp->source == Parser::ESource::EMBL)
792  symbol = ',';
793  else
794  symbol = ':';
795  }
796 
797  for (s = end_pages; s >= buf && *s != symbol;)
798  s--;
799  if (s < buf)
800  s = buf;
801  }
802 
803  if (*s == symbol && ss != year) {
804  if (ss > s)
805  ss = s + 1;
806  end_volume = s;
807  for (pages = s + 1; isspace(*pages) != 0;)
808  pages++;
809  end_tit = ss - 1;
810  if (end_volume > ss) {
811  volume = ss;
812  if (*end_tit == '(')
813  volume--;
814  }
815  } else {
816  if (pre != CImprint::ePrepub_submitted)
818 
819  end_tit = end_pages;
820  }
821 
822  if (*year == '0') {
823  if (pages && StringEquN(pages, "0-0", 3) &&
825  *all_zeros = true;
826  return cit_art;
827  }
828 
829  tit = buf;
830  if (*tit == '\0') {
831  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No journal title.");
832  return cit_art;
833  }
834 
835  cit_art.Reset(new CCit_art);
836  CCit_jour& journal = cit_art->SetFrom().SetJournal();
837  CImprint& imp = journal.SetImp();
838 
839  if (pre > 0)
840  imp.SetPrepub(pre);
841 
842  *end_pages = '\0';
843  if (pages && ! StringEquN(pages, "0-0", 3)) {
844  i = valid_pages_range(pages, tit, is_er, (pre == CImprint::ePrepub_in_press));
845  if (i == 0)
846  imp.SetPages(pages);
847  else if (i == 1)
848  end_tit = end_pages;
849  else if (i == -1 && is_er > 0) {
850  cit_art.Reset();
851  return cit_art;
852  }
853  } else if (pre != CImprint::ePrepub_submitted)
855 
856  if (volume) {
857  if (! get_parts(volume, end_volume, imp)) {
858  cit_art.Reset();
859  return cit_art;
860  }
861 
862  if (pre != CImprint::ePrepub_submitted && ! imp.IsSetVolume()) {
863  if (imp.IsSetPages()) {
864  cit_art.Reset();
865  return cit_art;
866  }
868  }
869  } else if (is_er > 0 && pre != CImprint::ePrepub_in_press) {
870  cit_art.Reset();
871  return cit_art;
872  }
873 
874  CRef<CDate> date;
875  if (*year != '0')
876  date = get_date(year);
877 
878  if (date.Empty()) {
879  if (is_er == 0)
880  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No date in journal reference");
881 
882  cit_art.Reset();
883  return cit_art;
884  }
885 
886  *end_tit = '\0';
887 
888  CRef<CTitle::C_E> journal_title(new CTitle::C_E);
889 
890  for (char* aux = end_tit - 1; aux > tit && *aux != '.' && *aux != ')' && ! isalnum(*aux); --aux)
891  *aux = 0;
892 
893  journal_title->SetIso_jta(NStr::Sanitize(tit));
894  journal.SetTitle().Set().push_back(journal_title);
895 
896  imp.SetDate(*date);
897  if (pre > 0)
898  imp.SetPrepub(pre);
899 
900  if ((is_er & 01) == 01) {
901  if (er == 1)
902  imp.SetPubstatus(3); /* epublish */
903  else
904  imp.SetPubstatus(10); /* aheadofprint */
905  }
906 
907  /* check invalid "in-press"
908  */
909  if (pre == CImprint::ePrepub_in_press) {
910  if (has_muid) {
911  ErrPostEx(SEV_WARNING, ERR_REFERENCE_InvalidInPress, "Reference flagged as In-press, but Medline UID exists, In-press ignored: %s", buf);
912  imp.ResetPrepub();
913  }
914 
915  if (imp.IsSetPages() && imp.IsSetVolume() && imp.IsSetDate()) {
916  ErrPostEx(SEV_WARNING, ERR_REFERENCE_InvalidInPress, "Reference flagged as In-press, but citation is complete, In-press ignored: %s", buf);
917  imp.ResetPrepub();
918  }
919  }
920 
921  /* Title and authors are optional for cit_art
922  */
923  if (title)
924  cit_art->SetTitle().Set().push_back(title);
925 
926  if (auth_list.NotEmpty())
927  cit_art->SetAuthors(*auth_list);
928 
929  return cit_art;
930 }
931 
932 /**********************************************************
933  *
934  * static CitGenPtr get_unpub(bptr, eptr, auth, title):
935  *
936  * Return a CitGen pointer.
937  *
938  * 11-14-93
939  *
940  **********************************************************/
941 static CRef<CCit_gen> get_unpub(char* bptr, char* eptr, CRef<CAuth_list>& auth_list, const Char* title)
942 {
943  CRef<CCit_gen> cit_gen(new CCit_gen);
944 
945  char* s;
946  char* str;
947 
948  if (bptr) {
949  for (s = bptr; *s != '\0' && *s != '(';)
950  s++;
951  for (str = s - 1; str > bptr && isspace(*str) != 0;)
952  str--;
953  if (*s == '(')
954  s += 6;
955 
956  if (s < eptr && *s != '\0' && auth_list.NotEmpty())
957  auth_list->SetAffil().SetStr(NStr::Sanitize(s));
958 
959  cit_gen->SetCit(string(bptr, str + 1));
960  }
961 
962  if (auth_list.NotEmpty())
963  cit_gen->SetAuthors(*auth_list);
964 
965  if (title)
966  cit_gen->SetTitle(title);
967 
968  return cit_gen;
969 }
970 
971 /**********************************************************
972  *
973  * static CitArtPtr get_book(bptr, auth, title, pre,
974  * format, p):
975  *
976  * Return a CitArt pointer (!!! that is an article
977  * from book!!).
978  *
979  * 11-14-93
980  *
981  **********************************************************/
982 static CRef<CCit_art> get_book(char* bptr, CRef<CAuth_list>& auth_list, CRef<CTitle::C_E>& title, CImprint::EPrepub pre, Parser::EFormat format, char* jour)
983 {
984  char* s;
985  char* ss;
986  char* tit;
987  char* volume;
988  char* pages;
989  char* press;
990 
991  Uint1 ref_fmt;
992  bool IS_AUTH = false;
993  char* tbptr;
994  char* p;
995  Char c;
996  Int4 i;
997 
998  tit = nullptr;
999  ref_fmt = GB_REF;
1000 
1001  tbptr = bptr ? StringSave(bptr) : nullptr;
1002 
1003  switch (format) {
1004  case Parser::EFormat::EMBL:
1005  ref_fmt = EMBL_REF;
1006  break;
1008  ref_fmt = GB_REF;
1009  break;
1011  ref_fmt = SP_REF;
1012  break;
1013  default:
1014  break;
1015  }
1016 
1017  CRef<CCit_art> cit_art(new CCit_art);
1018  CCit_book& cit_book = cit_art->SetFrom().SetBook();
1019 
1020  if (pre > 0)
1021  cit_book.SetImp().SetPrepub(pre);
1022 
1023  p = tbptr;
1024  CRef<CTitle::C_E> book_title(new CTitle::C_E);
1025 
1026  if (StringEquN("(in)", tbptr, 4)) {
1027  for (s = tbptr + 4; *s == ' ';)
1028  s++;
1029  for (bptr = s; *s != ';' && *s != '(' && *s != '\0';)
1030  s++;
1031  if (StringEquNI(s, "(Eds.)", 6)) {
1032  tit = s + 6;
1033  IS_AUTH = true;
1034  } else if (StringEquNI(s, "(Ed.)", 5)) {
1035  tit = s + 5;
1036  IS_AUTH = true;
1037  } else if (*s == ';')
1038  tit = s;
1039  if (tit)
1040  while (*tit == ' ' || *tit == ';' || *tit == '\n')
1041  tit++;
1042  *s++ = '\0';
1043  if (IS_AUTH && *bptr != '\0') {
1044  CRef<CAuth_list> book_auth_list;
1045  get_auth(bptr, ref_fmt, jour, book_auth_list);
1046  if (book_auth_list.NotEmpty())
1047  cit_book.SetAuthors(*book_auth_list);
1048  } else {
1049  ErrPostEx(SEV_ERROR, ERR_REFERENCE_UnusualBookFormat, "Cannot parse unusually formatted book reference (generating Cit-gen instead): %s", p);
1050  if (tbptr)
1051  MemFree(tbptr);
1052 
1053  cit_art.Reset();
1054  return cit_art;
1055  }
1056 
1057  ss = StringRChr(tit, ';');
1058  if (! ss)
1059  for (ss = tit; *ss != '\0';)
1060  ss++;
1061  for (s = ss; *s != ':' && s != tit;)
1062  s--;
1063  if (*s != ':')
1064  s = ss;
1065  c = *s;
1066  if (*s != '\0')
1067  *s++ = '\0';
1068 
1069  book_title->SetName("");
1070  if (*tit != '\0') {
1071  volume = check_book_tit(tit);
1072  if (volume)
1073  cit_book.SetImp().SetVolume(volume);
1074 
1075  book_title->SetName(NStr::Sanitize(tit));
1076  }
1077 
1078  if (c == ':') {
1079  for (pages = s; *s != '\0' && *s != ',' && *s != ';';)
1080  s++;
1081  if (*s != '\0')
1082  *s++ = '\0';
1083 
1084  while (*pages == ' ')
1085  pages++;
1086 
1087  if (StringEquN(pages, "0-0", 3))
1088  cit_book.SetImp().SetPrepub(CImprint::ePrepub_in_press);
1089  else {
1090  bool is_in_press = cit_book.GetImp().IsSetPrepub() && cit_book.GetImp().GetPrepub() == CImprint::ePrepub_in_press;
1091  i = valid_pages_range(pages, book_title->GetName().c_str(), 0, is_in_press);
1092 
1093  if (i == 0)
1094  cit_book.SetImp().SetPages(NStr::Sanitize(pages));
1095  else if (i == 1) {
1096  string new_title = book_title->GetName();
1097  new_title += ": ";
1098  new_title += pages;
1099  book_title->SetName(new_title);
1100  }
1101  }
1102  }
1103 
1104  for (press = s; *s != '(' && *s != '\0';)
1105  s++;
1106  if (*s != '\0')
1107  *s++ = '\0';
1108 
1109  cit_book.SetImp().SetPub().SetStr(NStr::Sanitize(press));
1110 
1111  CRef<CDate> date = get_date(s);
1112  if (date.Empty()) {
1113  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No date in book reference");
1114  ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference, "Book format error (cit-gen created): %s", p);
1115  if (tbptr)
1116  MemFree(tbptr);
1117 
1118  cit_art.Reset();
1119  return cit_art;
1120  }
1121 
1122  cit_book.SetImp().SetDate(*date);
1123  }
1124 
1125  cit_book.SetTitle().Set().push_back(book_title);
1126 
1127  if (title.NotEmpty())
1128  cit_art->SetTitle().Set().push_back(title);
1129 
1130  if (auth_list.NotEmpty())
1131  cit_art->SetAuthors(*auth_list);
1132 
1133  if (tbptr)
1134  MemFree(tbptr);
1135 
1136  return cit_art;
1137 }
1138 
1139 /**********************************************************
1140  *
1141  * static CitBookPtr get_thesis(bptr, auth, title, pre):
1142  *
1143  * Return a CitBook pointer.
1144  *
1145  * 11-14-93
1146  *
1147  **********************************************************/
1149 {
1150  CRef<CCit_let> cit_let(new CCit_let);
1151 
1152  cit_let->SetType(CCit_let::eType_thesis);
1153 
1154  CCit_book& book = cit_let->SetCit();
1155 
1156  if (pre > 0)
1157  book.SetImp().SetPrepub(pre);
1158 
1159  char* s;
1160  for (s = bptr; *s != '\0' && *s != '(';)
1161  s++;
1162 
1163  if (*s == '(') {
1164  CRef<CDate> date = get_date(s + 1);
1165  if (date.NotEmpty())
1166  book.SetImp().SetDate(*date);
1167 
1168  s = s + 6;
1169  }
1170 
1171  if (! book.GetImp().IsSetDate()) {
1172  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Fail to parse thesis: missing date");
1173 
1174  cit_let.Reset();
1175  return cit_let;
1176  }
1177 
1178  if (*s != '\0')
1179  book.SetImp().SetPub().SetStr(NStr::Sanitize(s));
1180 
1181  if (title.NotEmpty())
1182  book.SetTitle().Set().push_back(title);
1183  else {
1184  ErrPostStr(SEV_WARNING, ERR_REFERENCE_Thesis, "Missing thesis title");
1185 
1186  CRef<CTitle::C_E> empty_title(new CTitle::C_E);
1187  empty_title->SetName("");
1188  book.SetTitle().Set().push_back(empty_title);
1189  }
1190 
1191  if (auth_list.NotEmpty())
1192  book.SetAuthors(*auth_list);
1193  return cit_let;
1194 }
1195 
1196 /**********************************************************
1197  *
1198  * static CitBookPtr get_whole_book(bptr, auth, title,
1199  * pre):
1200  *
1201  * Return a CitBook pointer.
1202  *
1203  * 11-14-93
1204  *
1205  **********************************************************/
1207 {
1208  CRef<CCit_book> cit_book;
1209 
1210  char* s;
1211 
1212  for (bptr += 5; isspace(*bptr) != 0;)
1213  bptr++;
1214 
1215 
1216  for (s = bptr; *s != '\0' && *s != '(';)
1217  s++;
1218 
1219  if (*s != '(') {
1220  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Fail to parse book: missing date");
1221  return cit_book;
1222  }
1223 
1224  cit_book.Reset(new CCit_book);
1225 
1226  if (pre > 0)
1227  cit_book->SetImp().SetPrepub(pre);
1228 
1229  CRef<CDate> date = get_date(s + 1);
1230  if (date.NotEmpty())
1231  cit_book->SetImp().SetDate(*date);
1232 
1233  *s = '\0';
1234  for (s = bptr; *s != '\0' && *s != '.';)
1235  s++;
1236 
1237  CRef<CTitle::C_E> book_title(new CTitle::C_E);
1238  book_title->SetName(string(bptr, s));
1239  cit_book->SetTitle().Set().push_back(book_title);
1240 
1241  if (*s == '.') {
1242  for (s++; isspace(*s) != 0;)
1243  s++;
1244 
1245  cit_book->SetImp().SetPub().SetStr(NStr::Sanitize(s));
1246  }
1247 
1248  if (auth_list.Empty() || ! auth_list->IsSetNames()) {
1249  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Fail to parse thesis: missing thesis author");
1250  cit_book.Reset();
1251  return cit_book;
1252  }
1253 
1254  cit_book->SetAuthors(*auth_list);
1255 
1256  return cit_book;
1257 }
1258 
1259 /**********************************************************
1260  *
1261  * static CitSubPtr get_sub(pp, bptr, auth):
1262  *
1263  * Return a CitSub pointer.
1264  *
1265  **********************************************************/
1266 static CRef<CCit_sub> get_sub(ParserPtr pp, char* bptr, CRef<CAuth_list>& auth_list)
1267 {
1268  const char** b;
1269  char* s;
1270 
1272 
1273  CRef<CCit_sub> ret;
1274 
1275  for (s = bptr; *s != '(' && *s != '\0';)
1276  s++;
1277  if (*s == '\0') {
1278  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Fail to parse submission: missing date");
1279  return ret;
1280  }
1281 
1282  ret.Reset(new CCit_sub);
1283  CRef<CDate> date;
1284 
1285  if (pp && ! pp->entrylist.empty() &&
1286  IsNewAccessFormat(pp->entrylist[pp->curindx]->acnum) == 0 &&
1287  StringChr(ParFlat_LANL_AC, pp->entrylist[pp->curindx]->acnum[0]) &&
1288  isdigit((int)*(s + 1)) == 0) {
1289  date = get_lanl_date(s);
1290  } else {
1291  CRef<CDate_std> std_date = get_full_date(s + 1, true, pp->source);
1292  if (std_date) {
1293  date.Reset(new CDate);
1294  date->SetStd(*std_date);
1295  }
1296  }
1297 
1298  if (date.Empty())
1299  return ret;
1300 
1301  ret.Reset(new CCit_sub);
1302  ret->SetDate(*date);
1303 
1304  s = s + 13;
1305  if (StringStr(s, "E-mail"))
1306  medium = CCit_sub::eMedium_email;
1307 
1308  if (StringEquNI(" on tape", s, 8)) {
1309  medium = CCit_sub::eMedium_tape;
1310  for (s += 8; *s != '\0' && *s != ':';)
1311  s++;
1312  }
1313  if (*s != '\0' && *(s + 1) != '\0') {
1314  while (*s == ' ')
1315  s++;
1316 
1317  if (*s == ':')
1318  s++;
1319  for (;;) {
1320  for (b = strip_sub_str; *b; b++) {
1321  size_t l_str = StringLen(*b);
1322  if (StringEquN(s, *b, l_str)) {
1323  for (s += l_str; *s == ' ' || *s == '.';)
1324  s++;
1325  break;
1326  }
1327  }
1328  if (! *b)
1329  break;
1330  }
1331 
1332  if (*s != '\0' && auth_list.NotEmpty()) {
1333  auth_list->SetAffil().SetStr(NStr::Sanitize(s));
1334  }
1335  }
1336 
1337  if (*s == '\0') {
1338  ErrPostEx(SEV_WARNING, ERR_REFERENCE_NoContactInfo, "Missing contact info : %s", bptr);
1339  }
1340 
1341  if (auth_list.Empty() || ! auth_list->IsSetNames()) {
1342  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Direct submission: missing author (cit-gen created)");
1343 
1344  ret.Reset();
1345  return ret;
1346  }
1347 
1348  ret->SetAuthors(*auth_list);
1349  ret->SetMedium(medium);
1350 
1351  return ret;
1352 }
1353 
1354 /**********************************************************
1355  *
1356  * static CitSubPtr get_sub_gsdb(bptr, auth, title, pp):
1357  *
1358  * GSDB specific format for CitSub :
1359  * REFERENCE 1 (bases 1 to 378)
1360  * AUTHORS Mundt,M.O.
1361  * TITLE Published by M.O. Mundt, Genomics LS-3,
1362  * Los Alamos National Laboratory,
1363  * Mail Stop M888, Los Alamos, NM, USA, 87545
1364  * JOURNAL Published in GSDB (11-OCT-1996)
1365  *
1366  **********************************************************/
1367 static CRef<CCit_sub> get_sub_gsdb(char* bptr, CRef<CAuth_list>& auth_list, CRef<CTitle::C_E>& title, ParserPtr pp)
1368 {
1369  CRef<CCit_sub> cit_sub;
1370 
1371  char* s;
1372 
1373  for (s = bptr; *s != '(' && *s != '\0';)
1374  s++;
1375  if (*s == '\0') {
1376  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Fail to parse submission: missing date");
1377  return cit_sub;
1378  }
1379 
1380  CRef<CDate_std> std_date = get_full_date(s + 1, true, pp->source);
1381  if (std_date.Empty())
1382  return cit_sub;
1383 
1384  CRef<CDate> date;
1385  date->SetStd(*std_date);
1386 
1387  if (auth_list.Empty() || ! auth_list->IsSetNames()) {
1388  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "Direct submission: missing author (cit-gen created)");
1389  return cit_sub;
1390  }
1391 
1392  cit_sub.Reset(new CCit_sub);
1393  cit_sub->SetAuthors(*auth_list);
1394  cit_sub->SetDate(*date);
1395 
1396  if (title.NotEmpty()) {
1397  const Char* s = title->GetName().c_str();
1398  size_t l_str = StringLen("Published by");
1399  if (StringEquN(s, "Published by", l_str)) {
1400  s += l_str;
1401  while (*s == ' ')
1402  s++;
1403  }
1404 
1405  if (*s != '\0') {
1406  auth_list->SetAffil().SetStr(NStr::Sanitize(s));
1407  } else {
1408  ErrPostEx(SEV_WARNING, ERR_REFERENCE_NoContactInfo, "Missing contact info : %s", bptr);
1409  }
1410  } else {
1411  ErrPostEx(SEV_WARNING, ERR_REFERENCE_NoContactInfo, "Missing contact info : %s", bptr);
1412  }
1413 
1414  return cit_sub;
1415 }
1416 
1417 /**********************************************************/
1418 static CRef<CCit_gen> fta_get_citgen(char* bptr, CRef<CAuth_list>& auth_list, CRef<CTitle::C_E>& title)
1419 {
1420  CRef<CCit_gen> cit_gen;
1421 
1422  char* p;
1423  char* q;
1424  char* r;
1425  Char ch;
1426  Int2 year;
1427 
1428  if (! bptr || auth_list.Empty() || ! auth_list->IsSetNames() || title.Empty())
1429  return cit_gen;
1430 
1431  year = 0;
1432  p = StringChr(bptr, '(');
1433  if (p) {
1434  for (p++; *p == ' ' || *p == '\t';)
1435  p++;
1436  for (q = p; *p >= '0' && *p <= '9';)
1437  p++;
1438  for (r = p; *p == ' ' || *p == '\t' || *p == ')';)
1439  p++;
1440  if (*p == '\n' || *p == '\0') {
1441  ch = *r;
1442  *r = '\0';
1443  year = atoi(q);
1444  if (year < 1900)
1445  *r = ch;
1446  else {
1447  for (q--; *q == ' ' || *q == '\t' || *q == '(';)
1448  q--;
1449  *++q = '\0';
1450  }
1451  }
1452  }
1453 
1454  cit_gen.Reset(new CCit_gen);
1455 
1456  if (bptr)
1457  cit_gen->SetCit(bptr);
1458 
1459  cit_gen->SetAuthors(*auth_list);
1460  cit_gen->SetTitle(title->GetName());
1461 
1462  if (year >= 1900)
1463  cit_gen->SetDate().SetStd().SetYear(year);
1464 
1465  return cit_gen;
1466 }
1467 
1468 CRef<CPub> journal(ParserPtr pp, char* bptr, char* eptr, CRef<CAuth_list>& auth_list, CRef<CTitle::C_E>& title, bool has_muid, CRef<CCit_art>& cit_art, Int4 er)
1469 {
1470  CImprint::EPrepub pre = static_cast<CImprint::EPrepub>(0);
1471 
1472  char* p;
1473  char* nearend;
1474  char* end;
1475  bool all_zeros;
1476  int retval = ParFlat_MISSING_JOURNAL;
1477 
1478  CRef<CPub> ret(new CPub);
1479  if (! bptr) {
1480  const Char* title_str = title.Empty() ? nullptr : title->GetName().c_str();
1481  ret->SetGen(*get_unpub(bptr, eptr, auth_list, title_str));
1482  return ret;
1483  }
1484 
1485  p = bptr;
1486  size_t my_len = StringLen(p);
1487  if (my_len > 7) {
1488  nearend = p + StringLen(p) - 1;
1489  while (*nearend == ' ' || *nearend == '\t' || *nearend == '.')
1490  *nearend-- = '\0';
1491 
1492  nearend -= 8;
1493  end = nearend + 2;
1494  if (StringEquNI("In press", nearend + 1, 8)) {
1496  *(nearend + 1) = '\0';
1497  }
1498  if (StringEquNI("Submitted", nearend, 9)) {
1500  *nearend = '\0';
1501  }
1502  if (pre == 0 && *end == '(' && isdigit(*(end + 1)) != 0) {
1503  for (nearend = end - 1; nearend > bptr && *nearend != ' ';)
1504  nearend--;
1505  if (StringEquNI("In press", nearend + 1, 8)) {
1507  *(nearend + 1) = '\0';
1508  }
1509  }
1510  }
1511 
1512  if (my_len >= 6 && *p == '(') {
1513  p += 6;
1514  if (StringEquN(" In press", p, 9)) {
1515  retval = ParFlat_IN_PRESS;
1517  }
1518  }
1519 
1520  p = bptr;
1521  if (StringEquN("Unpub", p, 5) || StringEquN("Unknown", p, 7)) {
1522  retval = ParFlat_UNPUB_JOURNAL;
1523  const Char* title_str = title.Empty() ? nullptr : title->GetName().c_str();
1524  ret->SetGen(*get_unpub(bptr, eptr, auth_list, title_str));
1525  } else if (StringEquN("(in)", p, 4)) {
1527 
1528  CRef<CCit_art> article = get_book(bptr, auth_list, title, pre, pp->format, p);
1529 
1530  if (article.Empty())
1531  ret->SetGen(*get_error(bptr, auth_list, title));
1532  else
1533  ret->SetArticle(*article);
1534 
1535  } else if (StringEquN("Thesis", p, 6)) {
1536  retval = ParFlat_THESIS_CITATION;
1537 
1538  CRef<CCit_let> cit_let = get_thesis(bptr, auth_list, title, pre);
1539  if (cit_let.Empty()) {
1540  ret.Reset();
1541  return ret;
1542  }
1543  ret->SetMan(*cit_let);
1544  } else if (StringEquN("Submi", p, 5)) {
1545  retval = ParFlat_SUBMITTED;
1546 
1547  CRef<CCit_sub> cit_sub = get_sub(pp, bptr, auth_list);
1548  if (cit_sub.Empty()) {
1549  ret.Reset();
1550  return ret;
1551  }
1552 
1553  ret->SetSub(*cit_sub);
1554  } else if (StringEquN("Published in GSDB", p, 17)) {
1555  ErrPostEx(SEV_WARNING, ERR_REFERENCE_GsdbRefDropped, "A published-in-gsdb reference was encountered and has been dropped [%s]", bptr);
1556  retval = ParFlat_SUBMITTED;
1557 
1558  CRef<CCit_sub> cit_sub = get_sub_gsdb(bptr, auth_list, title, pp);
1559  if (cit_sub.Empty()) {
1560  ret.Reset();
1561  return ret;
1562  }
1563 
1564  ret->SetSub(*cit_sub);
1565  } else if (StringEquN("Patent", p, 6) ||
1566  pp->source == Parser::ESource::USPTO) {
1567  retval = ParFlat_PATENT_CITATION;
1568 
1569  if (pp->seqtype == CSeq_id::e_Genbank || pp->seqtype == CSeq_id::e_Ddbj ||
1570  pp->seqtype == CSeq_id::e_Embl || pp->seqtype == CSeq_id::e_Other ||
1571  pp->seqtype == CSeq_id::e_Tpe || pp->seqtype == CSeq_id::e_Tpg ||
1572  pp->seqtype == CSeq_id::e_Tpd ||
1573  pp->source == Parser::ESource::USPTO) {
1574  CRef<CCit_pat> cit_pat = get_pat(pp, bptr, auth_list, title, eptr);
1575  if (cit_pat.Empty()) {
1576  ret.Reset();
1577  return ret;
1578  }
1579 
1580  ret->SetPatent(*cit_pat);
1581  } else {
1582  ret.Reset();
1583  return ret;
1584  }
1585  } else if (StringEquN("Book:", p, 5)) {
1586  retval = ParFlat_BOOK_CITATION;
1587 
1588  CRef<CCit_book> book = get_whole_book(bptr, auth_list, title, pre);
1589  if (book.Empty()) {
1590  ret.Reset();
1591  return ret;
1592  }
1593 
1594  ret->SetBook(*book);
1595  } else if (StringEquNI("Published Only in Database", p, 26)) {
1596  retval = ParFlat_GEN_CITATION;
1597  CRef<CCit_gen> cit_gen = fta_get_citgen(bptr, auth_list, title);
1598 
1599  if (cit_gen.Empty()) {
1600  ret.Reset();
1601  return ret;
1602  }
1603 
1604  ret->SetGen(*cit_gen);
1605  } else if (StringEquNI("Online Publication", p, 18)) {
1606  retval = ParFlat_ONLINE_CITATION;
1607 
1608  CRef<CCit_gen> cit_gen = fta_get_citgen(bptr, auth_list, title);
1609 
1610  if (cit_gen.Empty()) {
1611  ret.Reset();
1612  return ret;
1613  }
1614 
1615  ret->SetGen(*cit_gen);
1616  }
1617 
1618  if (retval == ParFlat_MISSING_JOURNAL) {
1619  if (cit_art.NotEmpty())
1620  ret->SetArticle(*cit_art);
1621  else {
1622  CRef<CCit_art> new_art = get_art(pp, bptr, auth_list, title, pre, has_muid, &all_zeros, er);
1623  if (new_art.Empty()) {
1624  if (! all_zeros && ! StringEquN(bptr, "(er)", 4) && er == 0)
1625  ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference, "Journal format error (cit-gen created): %s", bptr);
1626 
1627  ret->SetGen(*get_error(bptr, auth_list, title));
1628  } else
1629  ret->SetArticle(*new_art);
1630  }
1631  }
1632 
1633  return ret;
1634 }
1635 
1636 /**********************************************************/
1637 static char* FindBackSemicolon(char* pchStart, char* pchCurrent)
1638 {
1639  if (! pchStart || ! pchCurrent || pchStart >= pchCurrent)
1640  return nullptr;
1641 
1642  for (pchCurrent--; pchCurrent >= pchStart; pchCurrent--) {
1643  if (isspace((int)*pchCurrent) != 0)
1644  continue;
1645  if (*pchCurrent == ';')
1646  return (pchCurrent);
1647  break;
1648  }
1649 
1650  return nullptr;
1651 }
1652 
1653 /**********************************************************/
1654 static char* FindSemicolon(char* str)
1655 {
1656  if (! str || *str == '\0')
1657  return nullptr;
1658 
1659  while (*str && std::isspace(*str))
1660  str++;
1661 
1662  if (*str == ';')
1663  return (str);
1664 
1665  return nullptr;
1666 }
1667 
1668 /**********************************************************/
1669 static char* ExtractErratum(char* comm)
1670 {
1671  char* start;
1672  char* pchNumber = nullptr;
1673  char* end;
1674  char* p;
1675 
1676  if (! comm)
1677  return nullptr;
1678 
1679  start = StringStr(comm, "Erratum:");
1680  if (! start)
1681  return (comm);
1682 
1683  end = StringChr(start, ']');
1684  if (! end)
1685  return (comm);
1686 
1687  pchNumber = end + 1;
1688  end = FindSemicolon(pchNumber);
1689  if (end)
1690  pchNumber = end + 1;
1691  p = FindBackSemicolon(comm, start);
1692  if (p)
1693  start = p;
1694  fta_StringCpy(start, pchNumber);
1695 
1696  /* Check if the string after cutting signature is empty. If it's really
1697  * empty we have to ignore the whole string (comment).
1698  * Do you want to have a comment which contains nothing!? Probably no.
1699  */
1700  for (p = comm; *p == ' ' || *p == '\t' || *p == '\n';)
1701  p++;
1702  if (*p == '\0')
1703  *comm = '\0';
1704 
1705  return (comm);
1706 }
1707 
1708 /**********************************************************/
1709 static void XMLGetXrefs(char* entry, XmlIndexPtr xip, TQualVector& quals)
1710 {
1711  XmlIndexPtr xipqual;
1712 
1713  if (! entry || ! xip)
1714  return;
1715 
1716  for (; xip; xip = xip->next) {
1717  if (! xip->subtags)
1718  continue;
1719 
1720  CRef<CGb_qual> qual(new CGb_qual);
1721 
1722  for (xipqual = xip->subtags; xipqual; xipqual = xipqual->next) {
1723  if (xipqual->tag == INSDXREF_DBNAME)
1724  qual->SetQual(XMLGetTagValue(entry, xipqual));
1725  else if (xipqual->tag == INSDXREF_ID)
1726  qual->SetVal(XMLGetTagValue(entry, xipqual));
1727  }
1728 
1729  if (qual->IsSetQual() && ! qual->GetQual().empty())
1730  quals.push_back(qual);
1731  }
1732 }
1733 
1734 /**********************************************************/
1735 static void fta_add_article_ids(CPub& pub, const string& doi, const string& agricola)
1736 {
1737  if (doi.empty() && agricola.empty())
1738  return;
1739 
1740  if (pub.IsArticle()) {
1741  CCit_art& cit_art = pub.SetArticle();
1742 
1743  if (! agricola.empty()) {
1744  CRef<CArticleId> id(new CArticleId);
1745  id->SetOther().SetDb("AGRICOLA");
1746  id->SetOther().SetTag().SetStr(agricola);
1747 
1748  cit_art.SetIds().Set().push_front(id);
1749  }
1750 
1751  if (! doi.empty()) {
1752  CRef<CArticleId> id(new CArticleId);
1753  id->SetDoi().Set(doi);
1754 
1755  cit_art.SetIds().Set().push_front(id);
1756  }
1757  }
1758 }
1759 
1760 /**********************************************************/
1762 {
1763  const char** b;
1764  char* s;
1765  Int4 i;
1766 
1767  s = StringSave(str);
1768  ShrinkSpaces(s);
1769  for (i = 1, b = ERRemarks; *b; b++, i++)
1770  if (StringIStr(s, *b))
1771  break;
1772 
1773  MemFree(s);
1774  if (! *b)
1775  return (0);
1776  if (i < 7)
1777  return (1); /* epublish */
1778  return (2); /* aheadofprint */
1779 }
1780 
1781 /**********************************************************/
1782 static CRef<CPubdesc> XMLRefs(ParserPtr pp, DataBlkPtr dbp, bool& no_auth, bool& rej)
1783 {
1784  char* title;
1785 
1786  char* p;
1787  char* q;
1788  char* r;
1789  bool is_online;
1790  TEntrezId pmid;
1791 
1792  XmlIndexPtr xip;
1793 
1794  Int4 er;
1795 
1796  CRef<CPubdesc> desc;
1797 
1798  if (! pp || ! dbp || ! dbp->mOffset || ! dbp->mpData)
1799  return desc;
1800 
1801  desc.Reset(new CPubdesc);
1802 
1803  p = XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_REFERENCE);
1804  if (p && isdigit((int)*p) != 0) {
1805  desc->SetPub().Set().push_back(get_num(p));
1806  } else {
1807  ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference, "No reference number.");
1808  }
1809 
1810  if (p)
1811  MemFree(p);
1812 
1813  p = XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_MEDLINE);
1814  if (p) {
1815  rej = true;
1816  MemFree(p);
1817  desc.Reset();
1818  return desc;
1819  }
1820 
1821  pmid = ZERO_ENTREZ_ID;
1822  p = XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_PUBMED);
1823  if (p) {
1825  MemFree(p);
1826  }
1827 
1828  CRef<CAuth_list> auth_list;
1829 
1830  p = XMLConcatSubTags(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_AUTHORS, ',');
1831  if (p) {
1832  if (pp->xml_comp) {
1833  q = StringRChr(p, '.');
1834  if (! q || q[1] != '\0') {
1835  string s = p;
1836  s.append(".");
1837  MemFree(p);
1838  p = StringSave(s.c_str());
1839  q = nullptr;
1840  }
1841  }
1842  for (q = p; *q == ' ' || *q == '.' || *q == ',';)
1843  q++;
1844  if (*q != '\0') {
1845  q = XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_JOURNAL);
1846  r = StringChr(p, ',');
1847  if (r && ! StringChr(r + 1, '.'))
1848  *r = '|';
1849  get_auth(p, (pp->source == Parser::ESource::EMBL) ? EMBL_REF : GB_REF, q, auth_list);
1850  MemFree(q);
1851  }
1852  MemFree(p);
1853  }
1854 
1855  p = XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_CONSORTIUM);
1856  if (p) {
1857  for (q = p; *q == ' ' || *q == '.' || *q == ',';)
1858  q++;
1859 
1860  if (*q != '\0')
1861  get_auth_consortium(p, auth_list);
1862 
1863  MemFree(p);
1864  }
1865 
1866  if (auth_list.Empty() || ! auth_list->IsSetNames())
1867  no_auth = true;
1868 
1869  p = XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_TITLE);
1870 
1871  CRef<CTitle::C_E> title_art(new CTitle::C_E);
1872  if (p) {
1873  if (! StringEquN(p, "Direct Submission", 17) &&
1874  *p != '\0' && *p != ';') {
1875  title = clean_up(p);
1876  if (title) {
1877  title_art->SetName(tata_save(title));
1878  free(title);
1879  }
1880  }
1881  MemFree(p);
1882  }
1883 
1884  is_online = false;
1885  p = XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_JOURNAL);
1886  if (! p) {
1887  ErrPostEx(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No JOURNAL line, reference dropped");
1888  desc.Reset();
1889  return desc;
1890  }
1891 
1892  if (*p == '\0' || *p == ';') {
1893  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "JOURNAL line is empty, reference dropped");
1894  MemFree(p);
1895  desc.Reset();
1896  return desc;
1897  }
1898 
1899  if (NStr::EqualNocase(p, 0, 18, "Online Publication"))
1900  is_online = true;
1901 
1902  r = XMLFindTagValue(dbp->mOffset, static_cast<XmlIndex*>(dbp->mpData), INSDREFERENCE_REMARK);
1903  if (r) {
1904  r = ExtractErratum(r);
1905  desc->SetComment(NStr::Sanitize(r));
1906  MemFree(r);
1907 
1908  if (! is_online)
1909  normalize_comment(desc->SetComment());
1910  }
1911 
1912  er = fta_remark_is_er(desc->IsSetComment() ? desc->GetComment().c_str() : nullptr);
1913 
1914  CRef<CCit_art> cit_art;
1915  if (pp->medserver == 1 && pmid > ZERO_ENTREZ_ID && (StringEquN(p, "(er)", 4) || er > 0)) {
1916  cit_art = FetchPubPmId(pmid);
1917  if (cit_art.Empty())
1918  pmid = ZERO_ENTREZ_ID;
1919  }
1920 
1921  if (pmid > ZERO_ENTREZ_ID) {
1922  CRef<CPub> pub(new CPub);
1923  pub->SetPmid().Set(pmid);
1924  desc->SetPub().Set().push_back(pub);
1925  }
1926 
1927  CRef<CPub> pub_ref = journal(pp, p, p + StringLen(p), auth_list, title_art, false, cit_art, er);
1928  MemFree(p);
1929 
1930  TQualVector xrefs;
1931  for (xip = static_cast<XmlIndex*>(dbp->mpData); xip; xip = xip->next) {
1932  if (xip->tag == INSDREFERENCE_XREF)
1933  XMLGetXrefs(dbp->mOffset, xip->subtags, xrefs);
1934  }
1935 
1936  string doi;
1937  string agricola;
1938  for (const auto& xref : xrefs) {
1939  if (! xref->IsSetQual())
1940  continue;
1941 
1942  if (NStr::EqualNocase(xref->GetQual(), "ARGICOLA") && agricola.empty())
1943  agricola = xref->GetVal();
1944  else if (NStr::EqualNocase(xref->GetQual(), "DOI") && doi.empty())
1945  doi = xref->GetVal();
1946  }
1947 
1948  fta_add_article_ids(*pub_ref, doi, agricola);
1949 
1950  if (pub_ref.Empty()) {
1951  desc.Reset();
1952  return desc;
1953  }
1954 
1955  if (dbp->mType == ParFlat_REF_NO_TARGET)
1956  desc->SetReftype(CPubdesc::eReftype_no_target);
1957 
1958  desc->SetPub().Set().push_back(pub_ref);
1959 
1960  return desc;
1961 }
1962 
1963 /**********************************************************/
1964 CRef<CPubdesc> gb_refs_common(ParserPtr pp, DataBlkPtr dbp, Int4 col_data, bool bParser, DataBlkPtr** ppInd, bool& no_auth)
1965 {
1966  static DataBlkPtr ind[MAXKW + 1];
1967 
1968  bool has_muid;
1969  char* p;
1970  char* q;
1971  char* r;
1972  bool is_online;
1973  TEntrezId pmid;
1974  Int4 er;
1975 
1976  CRef<CPubdesc> desc(new CPubdesc);
1977 
1978  p = dbp->mOffset + col_data;
1979  if (bParser) {
1980  /* This branch works when this function called in context of PARSER
1981  */
1982  if (*p >= '0' && *p <= '9')
1983  desc->SetPub().Set().push_back(get_num(p));
1984  else
1985  ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference, "No reference number.");
1987  } else {
1988  /* This branch works when this function is called in context of GBDIFF
1989  */
1990  if (ppInd) {
1992  *ppInd = &ind[0];
1993 
1994  return desc;
1995  }
1996 
1997  if (*p < '0' || *p > '9')
1998  ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference, "No reference number.");
1999  }
2000 
2001  has_muid = false;
2002  if (ind[ParFlat_MEDLINE]) {
2003  p = ind[ParFlat_MEDLINE]->mOffset;
2005  if (pub.NotEmpty()) {
2006  has_muid = true;
2007  desc->SetPub().Set().push_back(get_num(p));
2008  }
2009  }
2010 
2011  pmid = ZERO_ENTREZ_ID;
2012  if (ind[ParFlat_PUBMED]) {
2013  p = ind[ParFlat_PUBMED]->mOffset;
2014  if (p)
2016  }
2017 
2018  CRef<CAuth_list> auth_list;
2019  if (ind[ParFlat_AUTHORS]) {
2020  p = ind[ParFlat_AUTHORS]->mOffset;
2021  for (q = p; *q == ' ' || *q == '.' || *q == ',';)
2022  q++;
2023 
2024  if (*q != '\0') {
2025  if (ind[ParFlat_JOURNAL])
2026  q = ind[ParFlat_JOURNAL]->mOffset;
2027 
2028  get_auth(p, GB_REF, q, auth_list);
2029  }
2030  }
2031 
2032  if (ind[ParFlat_CONSRTM]) {
2033  p = ind[ParFlat_CONSRTM]->mOffset;
2034  for (q = p; *q == ' ' || *q == '.' || *q == ',';)
2035  q++;
2036 
2037  if (*q != '\0')
2038  get_auth_consortium(p, auth_list);
2039  }
2040 
2041  if (auth_list.Empty() || ! auth_list->IsSetNames())
2042  no_auth = true;
2043 
2044  CRef<CTitle::C_E> title_art;
2045  if (ind[ParFlat_TITLE]) {
2046  p = ind[ParFlat_TITLE]->mOffset;
2047  if (! StringEquN(p, "Direct Submission", 17) &&
2048  *p != '\0' && *p != ';') {
2049  q = clean_up(p);
2050  if (q) {
2051  title_art.Reset(new CTitle::C_E);
2052  title_art->SetName(NStr::Sanitize(q));
2053  free(q);
2054  }
2055  }
2056  }
2057 
2058  if (! ind[ParFlat_JOURNAL]) {
2059  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "No JOURNAL line, reference dropped");
2060 
2061  desc.Reset();
2062  return desc;
2063  }
2064 
2065  p = ind[ParFlat_JOURNAL]->mOffset;
2066  if (*p == '\0' || *p == ';') {
2067  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Fail_to_parse, "JOURNAL line is empty, reference dropped");
2068 
2069  desc.Reset();
2070  return desc;
2071  }
2072 
2073  is_online = StringEquNI(p, "Online Publication", 18);
2074 
2075  if (ind[ParFlat_REMARK]) {
2076  r = ind[ParFlat_REMARK]->mOffset;
2077  r = ExtractErratum(r);
2078  desc->SetComment(NStr::Sanitize(r));
2079 
2080  if (! is_online)
2081  normalize_comment(desc->SetComment());
2082  }
2083 
2084  er = fta_remark_is_er(desc->IsSetComment() ? desc->GetComment().c_str() : nullptr);
2085 
2086  CRef<CCit_art> cit_art;
2087  if (pp->medserver == 1 && pmid > ZERO_ENTREZ_ID && (StringEquN(p, "(er)", 4) || er > 0)) {
2088  cit_art = FetchPubPmId(pmid);
2089  if (! cit_art)
2090  pmid = ZERO_ENTREZ_ID;
2091  }
2092 
2093  if (pmid > ZERO_ENTREZ_ID) {
2094  CRef<CPub> pub(new CPub);
2095  pub->SetPmid().Set(pmid);
2096  desc->SetPub().Set().push_back(pub);
2097  }
2098 
2099  CRef<CPub> pub_ref = journal(pp, p, p + ind[ParFlat_JOURNAL]->len, auth_list, title_art, has_muid, cit_art, er);
2100 
2101  if (pub_ref.Empty()) {
2102  desc.Reset();
2103  return desc;
2104  }
2105 
2106  if (dbp->mType == ParFlat_REF_NO_TARGET)
2107  desc->SetReftype(CPubdesc::eReftype_no_target);
2108 
2109  desc->SetPub().Set().push_back(pub_ref);
2110 
2111  return desc;
2112 }
2113 
2114 /**********************************************************
2115  *
2116  * static PubdescPtr embl_refs(pp, dbp, col_data, no_auth):
2117  *
2118  * Parse EMBL references. Return a Pubdesc pointer.
2119  *
2120  * 11-14-93
2121  *
2122  **********************************************************/
2123 static CRef<CPubdesc> embl_refs(ParserPtr pp, DataBlkPtr dbp, Int4 col_data, bool& no_auth)
2124 {
2125  static DataBlkPtr ind[MAXKW + 1];
2126  char* s;
2127 
2128  char* title;
2129  bool has_muid;
2130  char* p;
2131  char* q;
2132  TEntrezId pmid;
2133 
2134  Int4 er;
2135 
2136  CRef<CPubdesc> desc(new CPubdesc);
2137 
2138  p = dbp->mOffset + col_data;
2139  while ((*p < '0' || *p > '9') && dbp->len > 0)
2140  p++;
2141  if (*p >= '0' && *p <= '9')
2142  desc->SetPub().Set().push_back(get_num(p));
2143  else
2144  ErrPostEx(SEV_WARNING, ERR_REFERENCE_Illegalreference, "No reference number.");
2145 
2147 
2148  has_muid = false;
2149  pmid = ZERO_ENTREZ_ID;
2150 
2151  string doi;
2152  string agricola;
2153 
2154  if (ind[ParFlat_RC])
2155  desc->SetComment(NStr::Sanitize(ind[ParFlat_RC]->mOffset));
2156 
2157  er = fta_remark_is_er(desc->IsSetComment() ? desc->GetComment().c_str() : nullptr);
2158 
2159  if (ind[ParFlat_RX]) {
2160  p = ind[ParFlat_RX]->mOffset;
2162 
2163  char* id = get_embl_str_pub_id(p, "DOI;");
2164  if (id) {
2165  doi = id;
2166  MemFree(id);
2167  }
2168 
2169  id = get_embl_str_pub_id(p, "AGRICOLA;");
2170  if (id) {
2171  agricola = id;
2172  MemFree(id);
2173  }
2174 
2175  if (pub.NotEmpty()) {
2176  desc->SetPub().Set().push_back(pub);
2177  has_muid = true;
2178  }
2179 
2180  pmid = get_embl_pmid(p);
2181  }
2182 
2183  CRef<CAuth_list> auth_list;
2184  if (ind[ParFlat_RA]) {
2185  p = ind[ParFlat_RA]->mOffset;
2186  s = p + StringLen(p) - 1;
2187  if (*s == ';')
2188  *s = '\0';
2189  for (q = p; *q == ' ' || *q == '.' || *q == ',';)
2190  q++;
2191  if (*q != '\0') {
2192  if (ind[ParFlat_RL])
2193  q = ind[ParFlat_RL]->mOffset;
2194 
2195  get_auth(p, EMBL_REF, q, auth_list);
2196  }
2197  }
2198 
2199  if (ind[ParFlat_RG]) {
2200  p = ind[ParFlat_RG]->mOffset;
2201  s = p + StringLen(p) - 1;
2202  if (*s == ';')
2203  *s = '\0';
2204 
2205  for (q = p; *q == ' ' || *q == '.' || *q == ',';)
2206  q++;
2207 
2208  if (*q != '\0')
2209  get_auth_consortium(p, auth_list);
2210  }
2211 
2212  if (auth_list.Empty() || ! auth_list->IsSetNames())
2213  no_auth = true;
2214 
2215  CRef<CTitle::C_E> title_art;
2216  if (ind[ParFlat_RT]) {
2217  p = ind[ParFlat_RT]->mOffset;
2218  if (*p != '\0' && *p != ';') {
2219  title = clean_up(p);
2220  if (title && title[0]) {
2221  title_art.Reset(new CTitle::C_E);
2222  title_art->SetName(NStr::Sanitize(title));
2223  }
2224  free(title);
2225  }
2226  }
2227 
2228  if (! ind[ParFlat_RL]) {
2229  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Illegalreference, "No JOURNAL line, reference dropped.");
2230 
2231  desc.Reset();
2232  return desc;
2233  }
2234 
2235  p = ind[ParFlat_RL]->mOffset;
2236  if (*p == '\0' || *p == ';') {
2237  ErrPostStr(SEV_ERROR, ERR_REFERENCE_Illegalreference, "JOURNAL line is empty, reference dropped.");
2238 
2239  desc.Reset();
2240  return desc;
2241  }
2242 
2243  CRef<CCit_art> cit_art;
2244  if (pp->medserver == 1 && pmid > ZERO_ENTREZ_ID && (StringEquN(p, "(er)", 4) || er > 0)) {
2245  cit_art = FetchPubPmId(pmid);
2246  if (! cit_art)
2247  pmid = ZERO_ENTREZ_ID;
2248  }
2249 
2250  if (pmid > ZERO_ENTREZ_ID) {
2251  CRef<CPub> pub(new CPub);
2252  pub->SetPmid().Set(pmid);
2253  desc->SetPub().Set().push_back(pub);
2254  }
2255 
2256  CRef<CPub> pub_ref = journal(pp, p, p + ind[ParFlat_RL]->len, auth_list, title_art, has_muid, cit_art, er);
2257 
2258  if (pub_ref.Empty()) {
2259  desc.Reset();
2260  return desc;
2261  }
2262 
2263  fta_add_article_ids(*pub_ref, doi, agricola);
2264 
2265  if (dbp->mType == ParFlat_REF_NO_TARGET)
2266  desc->SetReftype(CPubdesc::eReftype_no_target);
2267 
2268  desc->SetPub().Set().push_back(pub_ref);
2269 
2270  return desc;
2271 }
2272 
2273 /**********************************************************/
2274 static void fta_sort_pubs(TPubList& pubs)
2275 {
2276  for (TPubList::iterator pub = pubs.begin(); pub != pubs.end(); ++pub) {
2277  TPubList::iterator next_pub = pub;
2278  for (++next_pub; next_pub != pubs.end(); ++next_pub) {
2279  if ((*next_pub)->Which() > (*pub)->Which())
2280  continue;
2281 
2282  if ((*next_pub)->Which() == (*pub)->Which()) {
2283  if (! (*pub)->IsMuid() || (*pub)->GetMuid() >= (*next_pub)->GetMuid())
2284  continue;
2285  }
2286 
2287  pub->Swap(*next_pub);
2288  }
2289  }
2290 }
2291 
2292 /**********************************************************/
2293 static void fta_check_long_last_name(const CAuth_list& authors, bool soft_report)
2294 {
2295  static const size_t MAX_LAST_NAME_LEN = 30;
2296 
2297  ErrSev sev;
2298 
2299  if (! authors.IsSetNames() || ! authors.GetNames().IsStd())
2300  return;
2301 
2302  for (const auto& author : authors.GetNames().GetStd()) {
2303  if (! author->IsSetName() || ! author->GetName().IsName())
2304  continue;
2305 
2306  const CName_std& name = author->GetName().GetName();
2307 
2308  if (name.IsSetLast() && name.GetLast().size() > MAX_LAST_NAME_LEN) {
2309  /* Downgrade severity of this error to WARNING
2310  * if in HTGS mode. As of 7/31/2002, very long
2311  * consortium names were treated as if
2312  * they were author last names, for HTGS data.
2313  * This can be reverted to ERROR after the
2314  * consortium name slot is available and utilized
2315  * in the ASN.1.
2316  */
2317  sev = (soft_report ? SEV_WARNING : SEV_ERROR);
2318  ErrPostEx(sev, ERR_REFERENCE_LongAuthorName, "Last name of author exceeds 30 characters in length. A format error in the reference data might have caused the author name to be parsed incorrectly. Name is \"%s\".", name.GetLast().c_str());
2319  }
2320  }
2321 }
2322 
2323 /**********************************************************/
2324 static void fta_check_long_name_in_article(const CCit_art& cit_art, bool soft_report)
2325 {
2326  if (cit_art.IsSetAuthors())
2327  fta_check_long_last_name(cit_art.GetAuthors(), soft_report);
2328 
2329  if (cit_art.IsSetFrom()) {
2330  const CCit_book* book = nullptr;
2331  if (cit_art.GetFrom().IsBook())
2332  book = &cit_art.GetFrom().GetBook();
2333  else if (cit_art.GetFrom().IsProc()) {
2334  if (cit_art.GetFrom().GetProc().IsSetBook())
2335  book = &cit_art.GetFrom().GetProc().GetBook();
2336  }
2337 
2338  if (book && book->IsSetAuthors())
2339  fta_check_long_last_name(book->GetAuthors(), soft_report);
2340  }
2341 }
2342 
2343 /**********************************************************/
2344 static void fta_check_long_names(const CPub& pub, bool soft_report)
2345 {
2346  if (pub.IsGen()) /* CitGen */
2347  {
2348  const CCit_gen& cit_gen = pub.GetGen();
2349  if (cit_gen.IsSetAuthors())
2350  fta_check_long_last_name(cit_gen.GetAuthors(), soft_report);
2351  } else if (pub.IsSub()) /* CitSub */
2352  {
2353  if (! soft_report) {
2354  const CCit_sub& cit_sub = pub.GetSub();
2355  if (cit_sub.IsSetAuthors())
2356  fta_check_long_last_name(cit_sub.GetAuthors(), soft_report);
2357  }
2358  } else if (pub.IsMedline()) /* Medline */
2359  {
2360  const CMedline_entry& medline = pub.GetMedline();
2361  if (medline.IsSetCit()) {
2362  fta_check_long_name_in_article(medline.GetCit(), soft_report);
2363  }
2364  } else if (pub.IsArticle()) /* CitArt */
2365  {
2366  fta_check_long_name_in_article(pub.GetArticle(), soft_report);
2367  } else if (pub.IsBook() || pub.IsProc() || pub.IsMan()) /* CitBook or CitProc or
2368  CitLet */
2369  {
2370  const CCit_book* book = nullptr;
2371 
2372  if (pub.IsBook())
2373  book = &pub.GetBook();
2374  else if (pub.IsProc()) {
2375  if (pub.GetProc().IsSetBook())
2376  book = &pub.GetProc().GetBook();
2377  } else {
2378  if (pub.GetMan().IsSetCit())
2379  book = &pub.GetMan().GetCit();
2380  }
2381 
2382  if (book && book->IsSetAuthors())
2383  fta_check_long_last_name(book->GetAuthors(), soft_report);
2384  } else if (pub.IsPatent()) /* CitPat */
2385  {
2386  const CCit_pat& patent = pub.GetPatent();
2387 
2388  if (patent.IsSetAuthors())
2389  fta_check_long_last_name(patent.GetAuthors(), soft_report);
2390 
2391  if (patent.IsSetApplicants())
2392  fta_check_long_last_name(patent.GetApplicants(), soft_report);
2393 
2394  if (patent.IsSetAssignees())
2395  fta_check_long_last_name(patent.GetAssignees(), soft_report);
2396  } else if (pub.IsEquiv()) /* PubEquiv */
2397  {
2398  for (const auto& cur_pub : pub.GetEquiv().Get()) {
2399  fta_check_long_names(*cur_pub, soft_report);
2400  }
2401  }
2402 }
2403 
2404 /**********************************************************/
2405 static void fta_propagate_pmid_muid(CPub_equiv& pub_equiv)
2406 {
2407  TEntrezId pmid = ZERO_ENTREZ_ID;
2408  TEntrezId muid = ZERO_ENTREZ_ID;
2409 
2410  CCit_art* cit_art = nullptr;
2411  for (auto& pub : pub_equiv.Set()) {
2412  if (pub->IsMuid() && muid == ZERO_ENTREZ_ID)
2413  muid = pub->GetMuid();
2414  else if (pub->IsPmid() && pmid == ZERO_ENTREZ_ID)
2415  pmid = pub->GetPmid().Get();
2416  else if (pub->IsArticle() && ! cit_art)
2417  cit_art = &pub->SetArticle();
2418  }
2419 
2420  if (! cit_art || (muid == ZERO_ENTREZ_ID && pmid == ZERO_ENTREZ_ID))
2421  return;
2422 
2423  if (muid != ZERO_ENTREZ_ID) {
2424  CRef<CArticleId> id(new CArticleId);
2425  id->SetMedline().Set(muid);
2426  cit_art->SetIds().Set().push_front(id);
2427  }
2428 
2429  if (pmid != ZERO_ENTREZ_ID) {
2430  CRef<CArticleId> id(new CArticleId);
2431  id->SetPubmed().Set(pmid);
2432  cit_art->SetIds().Set().push_front(id);
2433  }
2434 }
2435 
2436 /**********************************************************
2437  *
2438  * PubdescPtr DescrRefs(pp, dbp, col_data):
2439  *
2440  * Return a Pubdesc pointer.
2441  *
2442  * 4-14-93
2443  *
2444  **********************************************************/
2446 {
2447  bool soft_report = false;
2448 
2449  bool rej = false;
2450  bool no_auth = false;
2451 
2452  if (pp->mode == Parser::EMode::HTGS)
2453  soft_report = true;
2454 
2455  CRef<CPubdesc> desc;
2456 
2457  if (pp->format == Parser::EFormat::SPROT)
2458  desc = sp_refs(pp, dbp, col_data);
2459  else if (pp->format == Parser::EFormat::XML)
2460  desc = XMLRefs(pp, dbp, no_auth, rej);
2461  else if (pp->format == Parser::EFormat::GenBank)
2462  desc = gb_refs_common(pp, dbp, col_data, true, nullptr, no_auth);
2463  else if (pp->format == Parser::EFormat::EMBL)
2464  desc = embl_refs(pp, dbp, col_data, no_auth);
2465 
2466  if (desc && desc->IsSetComment()) {
2467  char* comment = (char*)desc->GetComment().c_str();
2468  ShrinkSpaces(comment);
2469  desc->SetComment(comment);
2470  }
2471 
2472  if (no_auth) {
2473  if (pp->source == Parser::ESource::EMBL)
2474  ErrPostEx(SEV_ERROR, ERR_REFERENCE_MissingAuthors, "Reference has no author names.");
2475  else {
2476  ErrPostEx(SEV_REJECT, ERR_REFERENCE_MissingAuthors, "Reference has no author names. Entry dropped.");
2477  pp->entrylist[pp->curindx]->drop = true;
2478  }
2479  }
2480 
2481  if (rej) {
2482  ErrPostEx(SEV_REJECT, ERR_REFERENCE_InvalidMuid, "Use of Medline ID in INSDSeq format is not alowed. Entry dropped.");
2483  pp->entrylist[pp->curindx]->drop = true;
2484  }
2485 
2486  if (desc.NotEmpty() && desc->IsSetPub()) {
2487  fta_sort_pubs(desc->SetPub().Set());
2488 
2489  for (const auto& pub : desc->GetPub().Get()) {
2490  fta_check_long_names(*pub, soft_report);
2491  }
2492 
2493  fta_propagate_pmid_muid(desc->SetPub());
2494  }
2495 
2496  return desc;
2497 }
2498 
User-defined methods of the data storage class.
Data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
char * tata_save(char *str)
Definition: add.cpp:147
char * StringRStr(char *where, const char *what)
Definition: add.cpp:1410
void ShrinkSpaces(char *line)
Definition: asci_blk.cpp:118
@Affil.hpp User-defined methods of the data storage class.
Definition: Affil.hpp:56
CArticleId –.
Definition: ArticleId.hpp:66
@Auth_list.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:57
Definition: Date.hpp:53
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
CImprint –.
Definition: Imprint.hpp:66
@Name_std.hpp User-defined methods of the data storage class.
Definition: Name_std.hpp:56
Definition: Pub.hpp:56
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
C_E –.
Definition: Title_.hpp:96
char * mOffset
Definition: ftablock.h:332
size_t len
Definition: ftablock.h:333
CFlatFileData * mpData
Definition: ftablock.h:331
int mType
Definition: ftablock.h:330
@ ParFlat_RG
Definition: embl.h:66
@ ParFlat_RL
Definition: embl.h:69
@ ParFlat_RT
Definition: embl.h:68
@ ParFlat_RX
Definition: embl.h:65
@ ParFlat_RA
Definition: embl.h:67
@ ParFlat_RC
Definition: embl.h:63
#define ERR_FORMAT_MultiplePatRefs
Definition: flat2err.h:47
#define ERR_REFERENCE_Illegalreference
Definition: flat2err.h:287
#define ERR_REFERENCE_InvalidInPress
Definition: flat2err.h:290
#define ERR_REFERENCE_GsdbRefDropped
Definition: flat2err.h:299
#define ERR_REFERENCE_Fail_to_parse
Definition: flat2err.h:288
#define ERR_REFERENCE_LongAuthorName
Definition: flat2err.h:306
#define ERR_REFERENCE_NoContactInfo
Definition: flat2err.h:286
#define ERR_REFERENCE_MissingAuthors
Definition: flat2err.h:307
#define ERR_REFERENCE_UnusualBookFormat
Definition: flat2err.h:300
#define ERR_REFERENCE_Thesis
Definition: flat2err.h:284
#define ERR_REFERENCE_IllegalDate
Definition: flat2err.h:282
#define ERR_REFERENCE_InvalidMuid
Definition: flat2err.h:309
#define ParFlat_LANL_AC
#define INSDXREF_DBNAME
Definition: fta_xml.h:101
#define INSDREFERENCE_REMARK
Definition: fta_xml.h:98
char * XMLFindTagValue(const char *entry, const XmlIndex *xip, Int4 tag)
Definition: xm_index.cpp:213
#define INSDREFERENCE_PUBMED
Definition: fta_xml.h:97
char * XMLGetTagValue(const char *entry, const XmlIndex *xip)
Definition: xm_index.cpp:197
#define INSDREFERENCE_AUTHORS
Definition: fta_xml.h:92
#define INSDREFERENCE_XREF
Definition: fta_xml.h:99
#define INSDXREF_ID
Definition: fta_xml.h:102
#define INSDREFERENCE_TITLE
Definition: fta_xml.h:94
#define INSDREFERENCE_JOURNAL
Definition: fta_xml.h:95
#define INSDREFERENCE_MEDLINE
Definition: fta_xml.h:96
#define INSDREFERENCE_CONSORTIUM
Definition: fta_xml.h:93
#define INSDREFERENCE_REFERENCE
Definition: fta_xml.h:90
char * XMLConcatSubTags(const char *entry, const XmlIndex *xip, Int4 tag, Char sep)
Definition: xm_index.cpp:1548
std::list< CRef< objects::CPub > > TPubList
Definition: ftablock.h:62
char * StringSave(const char *s)
Definition: ftacpp.hpp:61
bool StringEquNI(const char *s1, const char *s2, size_t n)
Definition: ftacpp.hpp:116
bool StringEquN(const char *s1, const char *s2, size_t n)
Definition: ftacpp.hpp:106
void StringCpy(char *d, const char *s)
Definition: ftacpp.hpp:74
void MemFree(char *p)
Definition: ftacpp.hpp:55
size_t StringLen(const char *s)
Definition: ftacpp.hpp:60
char * StringRChr(char *s, const char c)
Definition: ftacpp.hpp:78
const char * months[]
Definition: ftaerr.cpp:118
CRef< CCit_art > FetchPubPmId(TEntrezId pmid)
Definition: ftamed.cpp:91
@ ParFlat_AUTHORS
Definition: genbank.h:67
@ ParFlat_JOURNAL
Definition: genbank.h:70
@ ParFlat_CONSRTM
Definition: genbank.h:68
@ ParFlat_REMARK
Definition: genbank.h:74
@ ParFlat_MEDLINE
Definition: genbank.h:73
@ ParFlat_TITLE
Definition: genbank.h:69
@ ParFlat_PUBMED
Definition: genbank.h:75
static int type
Definition: getdata.c:31
#define SEV_WARNING
Definition: gicache.c:90
#define SEV_ERROR
Definition: gicache.c:91
#define SEV_REJECT
Definition: gicache.c:92
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
Definition: ncbimisc.hpp:1041
#define ENTREZ_ID_FROM(T, value)
Definition: ncbimisc.hpp:1098
#define ZERO_ENTREZ_ID
Definition: ncbimisc.hpp:1102
#define StringStr
Definition: ncbistr.hpp:322
#define ErrPostStr
Definition: ncbierr.hpp:68
#define StringChr
Definition: ncbistr.hpp:317
#define ErrPostEx(sev, err_code,...)
Definition: ncbierr.hpp:78
ErrSev
Definition: ncbierr.hpp:63
TPrim & Set(void)
Definition: serialbase.hpp:351
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
char Char
Alias for char.
Definition: ncbitype.h:93
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
Definition: ncbistr.hpp:2876
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5352
@ fAllowTrailingSymbols
Ignore trailing non-numerics characters.
Definition: ncbistr.hpp:298
bool IsProc(void) const
Check if variant Proc is selected.
Definition: Cit_art_.hpp:507
bool IsSetVolume(void) const
Check if a value has been assigned to Volume data member.
Definition: Imprint_.hpp:746
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_book_.hpp:347
const TPart_supi & GetPart_supi(void) const
Get the Part_supi member data.
Definition: Imprint_.hpp:1139
const TCit & GetCit(void) const
Get the Cit member data.
Definition: Cit_let_.hpp:267
void SetPages(const TPages &value)
Assign a value to Pages data member.
Definition: Imprint_.hpp:861
void SetIds(TIds &value)
Assign a value to Ids data member.
Definition: Cit_art_.cpp:258
const TBook & GetBook(void) const
Get the Book member data.
Definition: Cit_proc_.hpp:214
bool IsSetAuthors(void) const
Check if a value has been assigned to Authors data member.
Definition: Cit_gen_.hpp:623
bool IsSetAuthors(void) const
authors (ANSI requires) Check if a value has been assigned to Authors data member.
Definition: Cit_art_.hpp:534
void SetTitle(TTitle &value)
Assign a value to Title data member.
Definition: Cit_art_.cpp:210
void SetDate(TDate &value)
Assign a value to Date data member.
Definition: Cit_sub_.cpp:101
bool IsSetPrepub(void) const
Check if a value has been assigned to Prepub data member.
Definition: Imprint_.hpp:1080
const TFrom & GetFrom(void) const
Get the From member data.
Definition: Cit_art_.hpp:567
bool IsSetApplicants(void) const
Applicants Check if a value has been assigned to Applicants data member.
Definition: Cit_pat_.hpp:988
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_gen_.hpp:635
bool IsSetAssignees(void) const
Assignees Check if a value has been assigned to Assignees data member.
Definition: Cit_pat_.hpp:1009
void SetIssue(const TIssue &value)
Assign a value to Issue data member.
Definition: Imprint_.hpp:814
void SetTitle(TTitle &value)
Assign a value to Title data member.
Definition: Cit_book_.cpp:62
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_sub_.hpp:357
void SetFrom(TFrom &value)
Assign a value to From data member.
Definition: Cit_art_.cpp:248
const TIssue & GetIssue(void) const
Get the Issue member data.
Definition: Imprint_.hpp:805
void SetAffil(TAffil &value)
Assign a value to Affil data member.
Definition: Auth_list_.cpp:160
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
Definition: Cit_art_.hpp:555
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
Definition: Cit_art_.cpp:227
void SetSerial_number(TSerial_number value)
Assign a value to Serial_number data member.
Definition: Cit_gen_.hpp:902
bool IsSetAuthors(void) const
not necessarily authors of the paper Check if a value has been assigned to Authors data member.
Definition: Cit_sub_.hpp:345
void ResetPart_supi(void)
Reset Part_supi data member.
Definition: Imprint_.cpp:142
TPrepub GetPrepub(void) const
Get the Prepub member data.
Definition: Imprint_.hpp:1099
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
Definition: Cit_sub_.cpp:74
void ResetPrepub(void)
Reset Prepub data member.
Definition: Imprint_.hpp:1092
const TProc & GetProc(void) const
Get the variant data.
Definition: Cit_art_.cpp:155
void SetImp(TImp &value)
Assign a value to Imp data member.
Definition: Cit_book_.cpp:107
TStr & SetStr(void)
Select the variant.
Definition: Affil_.hpp:1200
bool IsSetNames(void) const
Check if a value has been assigned to Names data member.
Definition: Auth_list_.hpp:464
bool IsSetAuthors(void) const
author/inventor Check if a value has been assigned to Authors data member.
Definition: Cit_pat_.hpp:703
void SetVolume(const TVolume &value)
Assign a value to Volume data member.
Definition: Imprint_.hpp:767
void SetNames(TNames &value)
Assign a value to Names data member.
Definition: Auth_list_.cpp:149
bool IsSetIssue(void) const
Check if a value has been assigned to Issue data member.
Definition: Imprint_.hpp:793
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
Definition: Cit_book_.cpp:93
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_pat_.hpp:715
void SetDate(TDate &value)
Assign a value to Date data member.
Definition: Imprint_.cpp:73
const TApplicants & GetApplicants(void) const
Get the Applicants member data.
Definition: Cit_pat_.hpp:1000
bool IsSetDate(void) const
date of publication Check if a value has been assigned to Date data member.
Definition: Imprint_.hpp:716
EPrepub
for prepublication citations
Definition: Imprint_.hpp:94
bool IsBook(void) const
Check if variant Book is selected.
Definition: Cit_art_.hpp:501
void SetPubstatus(TPubstatus value)
Assign a value to Pubstatus data member.
Definition: Imprint_.hpp:1223
bool IsSetAuthors(void) const
authors Check if a value has been assigned to Authors data member.
Definition: Cit_book_.hpp:335
void SetMedium(TMedium value)
Assign a value to Medium data member.
Definition: Cit_sub_.hpp:424
bool IsSetBook(void) const
citation to meeting Check if a value has been assigned to Book data member.
Definition: Cit_proc_.hpp:202
const TNames & GetNames(void) const
Get the Names member data.
Definition: Auth_list_.hpp:478
bool IsSetCit(void) const
same fields as a book Check if a value has been assigned to Cit data member.
Definition: Cit_let_.hpp:255
const TStd & GetStd(void) const
Get the variant data.
Definition: Auth_list_.hpp:410
void SetPart_sup(const TPart_sup &value)
Assign a value to Part_sup data member.
Definition: Imprint_.hpp:997
EMedium
medium of submission
Definition: Cit_sub_.hpp:95
void SetPrepub(TPrepub value)
Assign a value to Prepub data member.
Definition: Imprint_.hpp:1108
bool IsSetPart_supi(void) const
part/sup on issue Check if a value has been assigned to Part_supi data member.
Definition: Imprint_.hpp:1127
const TAuthors & GetAuthors(void) const
Get the Authors member data.
Definition: Cit_art_.hpp:546
const TAssignees & GetAssignees(void) const
Get the Assignees member data.
Definition: Cit_pat_.hpp:1021
const TImp & GetImp(void) const
Get the Imp member data.
Definition: Cit_book_.hpp:377
bool IsSetPages(void) const
Check if a value has been assigned to Pages data member.
Definition: Imprint_.hpp:840
void SetPart_supi(const TPart_supi &value)
Assign a value to Part_supi data member.
Definition: Imprint_.hpp:1148
bool IsStd(void) const
Check if variant Std is selected.
Definition: Auth_list_.hpp:404
const TBook & GetBook(void) const
Get the variant data.
Definition: Cit_art_.cpp:133
@ ePrepub_in_press
accepted, not published
Definition: Imprint_.hpp:96
@ ePrepub_submitted
submitted, not accepted
Definition: Imprint_.hpp:95
void SetYear(TYear value)
Assign a value to Year data member.
Definition: Date_std_.hpp:435
void SetMonth(TMonth value)
Assign a value to Month data member.
Definition: Date_std_.hpp:482
TStd & SetStd(void)
Select the variant.
Definition: Date_.cpp:115
void SetDay(TDay value)
Assign a value to Day data member.
Definition: Date_std_.hpp:529
bool IsSetLast(void) const
Check if a value has been assigned to Last data member.
Definition: Name_std_.hpp:410
const TLast & GetLast(void) const
Get the Last member data.
Definition: Name_std_.hpp:422
const TStd & GetStd(void) const
Get the variant data.
Definition: Date_.cpp:109
bool IsSetCit(void) const
article citation Check if a value has been assigned to Cit data member.
const TCit & GetCit(void) const
Get the Cit member data.
bool IsMedline(void) const
Check if variant Medline is selected.
Definition: Pub_.hpp:596
TPmid & SetPmid(void)
Select the variant.
Definition: Pub_.hpp:690
bool IsBook(void) const
Check if variant Book is selected.
Definition: Pub_.hpp:641
const TMedline & GetMedline(void) const
Get the variant data.
Definition: Pub_.cpp:211
const TMan & GetMan(void) const
Get the variant data.
Definition: Pub_.cpp:365
TMuid & SetMuid(void)
Select the variant.
Definition: Pub_.hpp:615
TBook & SetBook(void)
Select the variant.
Definition: Pub_.cpp:283
Tdata & Set(void)
Assign a value to data member.
Definition: Pub_equiv_.hpp:171
const TArticle & GetArticle(void) const
Get the variant data.
Definition: Pub_.cpp:233
const TSub & GetSub(void) const
Get the variant data.
Definition: Pub_.cpp:189
const TPatent & GetPatent(void) const
Get the variant data.
Definition: Pub_.cpp:321
const Tdata & Get(void) const
Get the member data.
Definition: Pub_equiv_.hpp:165
const TProc & GetProc(void) const
Get the variant data.
Definition: Pub_.cpp:299
const TEquiv & GetEquiv(void) const
Get the variant data.
Definition: Pub_.cpp:387
TMan & SetMan(void)
Select the variant.
Definition: Pub_.cpp:371
bool IsEquiv(void) const
Check if variant Equiv is selected.
Definition: Pub_.hpp:671
bool IsProc(void) const
Check if variant Proc is selected.
Definition: Pub_.hpp:647
TSub & SetSub(void)
Select the variant.
Definition: Pub_.cpp:195
bool IsSub(void) const
Check if variant Sub is selected.
Definition: Pub_.hpp:590
TGen & SetGen(void)
Select the variant.
Definition: Pub_.cpp:173
const TGen & GetGen(void) const
Get the variant data.
Definition: Pub_.cpp:167
TPatent & SetPatent(void)
Select the variant.
Definition: Pub_.cpp:327
bool IsPatent(void) const
Check if variant Patent is selected.
Definition: Pub_.hpp:653
bool IsArticle(void) const
Check if variant Article is selected.
Definition: Pub_.hpp:629
TArticle & SetArticle(void)
Select the variant.
Definition: Pub_.cpp:239
bool IsGen(void) const
Check if variant Gen is selected.
Definition: Pub_.hpp:584
const TBook & GetBook(void) const
Get the variant data.
Definition: Pub_.cpp:277
bool IsMan(void) const
Check if variant Man is selected.
Definition: Pub_.hpp:665
void SetQual(const TQual &value)
Assign a value to Qual data member.
Definition: Gb_qual_.hpp:221
bool IsSetQual(void) const
Check if a value has been assigned to Qual data member.
Definition: Gb_qual_.hpp:200
void SetVal(const TVal &value)
Assign a value to Val data member.
Definition: Gb_qual_.hpp:268
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Gb_qual_.hpp:212
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_Tpe
Third Party Annot/Seq EMBL.
Definition: Seq_id_.hpp:111
@ e_Tpd
Third Party Annot/Seq DDBJ.
Definition: Seq_id_.hpp:112
@ e_Ddbj
DDBJ.
Definition: Seq_id_.hpp:107
@ e_Tpg
Third Party Annot/Seq Genbank.
Definition: Seq_id_.hpp:110
@ eReftype_no_target
nothing specified (EMBL)
Definition: Pubdesc_.hpp:95
void ind_subdbp(DataBlkPtr dbp, DataBlkPtr ind[], int maxkw, Parser::EFormat bank)
Definition: ind.cpp:122
@ ParFlat_REF_NO_TARGET
Definition: index.h:63
Int4 IsNewAccessFormat(const Char *acnum)
Definition: indx_blk.cpp:995
char * buf
int i
int len
const struct ncbi::grid::netcache::search::fields::SIZE size
#define strdup
Definition: ncbi_ansi_ext.h:70
const char * tag
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isspace(Uchar c)
Definition: ncbictype.hpp:69
int isalnum(Uchar c)
Definition: ncbictype.hpp:62
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
#define nullptr
Definition: ncbimisc.hpp:45
static Format format
Definition: njn_ioutil.cpp:53
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
User-defined methods of the data storage class.
static BOOL number
Definition: pcregrep.c:193
static void XMLGetXrefs(char *entry, XmlIndexPtr xip, TQualVector &quals)
Definition: ref.cpp:1709
static CRef< CPub > get_muid(char *str, Parser::EFormat format)
Definition: ref.cpp:232
USING_SCOPE(objects)
CRef< CPubdesc > DescrRefs(ParserPtr pp, DataBlkPtr dbp, Int4 col_data)
Definition: ref.cpp:2445
static char * get_embl_str_pub_id(char *str, const Char *tag)
Definition: ref.cpp:263
Int4 fta_remark_is_er(const Char *str)
Definition: ref.cpp:1761
static const char * ERRemarks[]
Definition: ref.cpp:103
static CRef< CCit_art > get_book(char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, CImprint::EPrepub pre, Parser::EFormat format, char *jour)
Definition: ref.cpp:982
static void fta_check_long_name_in_article(const CCit_art &cit_art, bool soft_report)
Definition: ref.cpp:2324
static CRef< CPub > get_num(char *str)
Definition: ref.cpp:222
CRef< CPubdesc > gb_refs_common(ParserPtr pp, DataBlkPtr dbp, Int4 col_data, bool bParser, DataBlkPtr **ppInd, bool &no_auth)
Definition: ref.cpp:1964
#define MAXKW
Definition: ref.cpp:86
static void fta_check_long_last_name(const CAuth_list &authors, bool soft_report)
Definition: ref.cpp:2293
static void fta_propagate_pmid_muid(CPub_equiv &pub_equiv)
Definition: ref.cpp:2405
static CRef< CCit_let > get_thesis(char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, CImprint::EPrepub pre)
Definition: ref.cpp:1148
static TEntrezId get_embl_pmid(char *str)
Definition: ref.cpp:293
static void fta_sort_pubs(TPubList &pubs)
Definition: ref.cpp:2274
static CRef< CPubdesc > XMLRefs(ParserPtr pp, DataBlkPtr dbp, bool &no_auth, bool &rej)
Definition: ref.cpp:1782
static CRef< CCit_pat > get_pat(ParserPtr pp, char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, char *eptr)
Definition: ref.cpp:380
static const char * strip_sub_str[]
Definition: ref.cpp:92
static CRef< CCit_sub > get_sub_gsdb(char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, ParserPtr pp)
Definition: ref.cpp:1367
static void fta_add_article_ids(CPub &pub, const string &doi, const string &agricola)
Definition: ref.cpp:1735
static CRef< CPubdesc > embl_refs(ParserPtr pp, DataBlkPtr dbp, Int4 col_data, bool &no_auth)
Definition: ref.cpp:2123
static CRef< CCit_sub > get_sub(ParserPtr pp, char *bptr, CRef< CAuth_list > &auth_list)
Definition: ref.cpp:1266
static CRef< CCit_book > get_whole_book(char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, CImprint::EPrepub pre)
Definition: ref.cpp:1206
CRef< CPub > journal(ParserPtr pp, char *bptr, char *eptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, bool has_muid, CRef< CCit_art > &cit_art, Int4 er)
Definition: ref.cpp:1468
static void fta_check_long_names(const CPub &pub, bool soft_report)
Definition: ref.cpp:2344
static void normalize_comment(string &comment)
Definition: ref.cpp:126
static CRef< CCit_gen > fta_get_citgen(char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title)
Definition: ref.cpp:1418
static void fta_get_part_sup(char *parts, CImprint &imp)
Definition: ref.cpp:544
static CRef< CCit_art > get_art(ParserPtr pp, char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, CImprint::EPrepub pre, bool has_muid, bool *all_zeros, Int4 er)
Definition: ref.cpp:703
static char * ExtractErratum(char *comm)
Definition: ref.cpp:1669
static char * FindSemicolon(char *str)
Definition: ref.cpp:1654
static char * check_book_tit(char *title)
Definition: ref.cpp:321
static bool get_parts(char *bptr, char *eptr, CImprint &imp)
Definition: ref.cpp:599
static CRef< CDate > get_lanl_date(char *s)
Definition: ref.cpp:155
static CRef< CCit_gen > get_unpub(char *bptr, char *eptr, CRef< CAuth_list > &auth_list, const Char *title)
Definition: ref.cpp:941
static char * FindBackSemicolon(char *pchStart, char *pchCurrent)
Definition: ref.cpp:1637
static char * clean_up(char *str)
Definition: ref.cpp:197
@ ParFlat_PATENT_CITATION
Definition: ref.h:47
@ ParFlat_ONLINE_CITATION
Definition: ref.h:50
@ ParFlat_MONOGRAPH_NOT_JOURNAL
Definition: ref.h:40
@ ParFlat_THESIS_CITATION
Definition: ref.h:44
@ ParFlat_BOOK_CITATION
Definition: ref.h:48
@ ParFlat_IN_PRESS
Definition: ref.h:46
@ ParFlat_UNPUB_JOURNAL
Definition: ref.h:39
@ ParFlat_GEN_CITATION
Definition: ref.h:49
@ ParFlat_MISSING_JOURNAL
Definition: ref.h:38
@ ParFlat_SUBMITTED
Definition: ref.h:43
CRef< objects::CPubdesc > sp_refs(ParserPtr pp, DataBlkPtr dbp, Int4 col_data)
Definition: sp_ref.cpp:1329
static const char * str(char *buf, int n)
Definition: stats.c:84
CRef< objects::CPatent_seq_id > psip
Definition: ftablock.h:193
bool is_pat
Definition: ftablock.h:205
vector< IndexblkPtr > entrylist
XmlIndex * next
Definition: ftablock.h:161
XmlIndex * subtags
Definition: ftablock.h:160
Int4 tag
Definition: ftablock.h:153
Definition: type.c:6
CRef< CDate_std > get_full_date(const char *s, bool is_ref, Parser::ESource source)
Definition: utilfun.cpp:972
void fta_StringCpy(char *dst, const char *src)
Definition: utilfun.cpp:1641
Char * StringIStr(const Char *where, const Char *what)
Definition: utilfun.cpp:682
void get_auth_consortium(char *cons, CRef< CAuth_list > &auths)
Definition: utilref.cpp:292
CRef< CDate > get_date(const Char *year)
Definition: utilref.cpp:503
Int4 valid_pages_range(char *pages, const Char *title, Int4 er, bool inpress)
Definition: utilref.cpp:419
void get_auth(char *pt, Uint1 format, char *jour, CRef< CAuth_list > &auths)
Definition: utilref.cpp:253
CRef< CCit_gen > get_error(char *bptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title)
Definition: utilref.cpp:541
#define GB_REF
Definition: utilref.h:37
#define EMBL_REF
Definition: utilref.h:38
#define SP_REF
Definition: utilref.h:39
std::vector< CRef< objects::CGb_qual > > TQualVector
Definition: xgbfeat.h:12
int XDateCheck(const CDate_std &date)
Definition: xutils.cpp:113
void free(voidpf ptr)
Modified on Tue Nov 28 02:22:49 2023 by modify_doxy.py rev. 669887