84 #define THIS_FILE "ftanet.cpp"
86 #define HEALTHY_ACC "U12345"
97 {
"Publication Status: Available-Online prior to print", 51 },
98 {
"Publication Status : Available-Online prior to print", 52 },
99 {
"Publication_Status: Available-Online prior to print", 51 },
100 {
"Publication_Status : Available-Online prior to print", 52 },
101 {
"Publication-Status: Available-Online prior to print", 51 },
102 {
"Publication-Status : Available-Online prior to print", 52 },
103 {
"Publication Status: Online-Only", 31 },
104 {
"Publication Status : Online-Only", 32 },
105 {
"Publication_Status: Online-Only", 31 },
106 {
"Publication_Status : Online-Only", 32 },
107 {
"Publication-Status: Online-Only", 31 },
108 {
"Publication-Status : Online-Only", 32 },
109 {
"Publication Status: Available-Online", 36 },
110 {
"Publication Status : Available-Online", 37 },
111 {
"Publication_Status: Available-Online", 36 },
112 {
"Publication_Status : Available-Online", 37 },
113 {
"Publication-Status: Available-Online", 36 },
114 {
"Publication-Status : Available-Online", 37 },
125 for (; kbp->
str; kbp++) {
130 for (q = p + kbp->
len; *q ==
' ' || *q ==
';';)
137 p = (*comment ==
'\0') ?
nullptr :
StringSave(comment);
140 if (p && (
StringEquNI(p,
"Publication Status", 18) ||
143 ErrPostEx(
SEV_WARNING,
ERR_REFERENCE_UnusualPubStatus,
"An unusual Publication Status comment exists for this record: \"%s\". If it is a new variant of the special comments used to indicate ahead-of-print or online-only articles, then the comment must be added to the appropriate table of the parser.", p);
165 size_t i = strlen(
str);
168 if ((*p ==
' ' || *p ==
'.') && !
strcmp(p + 1,
"III.")) {
182 if ((*p ==
' ' || *p ==
'.') &&
185 if (!
strcmp(p + 1,
"III"))
187 else if (!
strcmp(p + 1,
"2nd"))
189 else if (!
strcmp(p + 1,
"Jr."))
205 if ((*p ==
' ' || *p ==
'.') &&
208 if (!
strcmp(p + 1,
"Jr"))
210 else if (!
strcmp(p + 1,
"IV"))
229 for (
const auto& pub : pub_list) {
236 for (
auto& pub : pub_list) {
238 if (pub->IsArticle()) {
244 }
else if (pub->IsSub()) {
250 }
else if (pub->IsGen()) {
252 if (!
gen.IsSetAuthors() || !
gen.CanGetAuthors())
255 authors = &
gen.SetAuthors();
256 }
else if (pub->IsBook()) {
262 }
else if (pub->IsMan()) {
272 }
else if (pub->IsPatent()) {
285 char* aff = affil.
SetStr().data();
293 CAuth_list::TNames::TStd::iterator it = (
names.SetStd()).begin();
294 CAuth_list::TNames::TStd::iterator it_end = (
names.SetStd()).end();
295 for (; it != it_end; it++) {
296 if ((*it)->IsSetAffil() && (*it)->CanGetAffil() &&
298 CAffil& affil = (*it)->SetAffil();
299 char* aff = affil.
SetStr().data();
303 if ((*it)->IsSetName() && (*it)->CanGetName() &&
304 (*it)->GetName().IsName()) {
305 CName_std& namestd = (*it)->SetName().SetName();
320 for (
auto& pub : pub_list) {
321 if (! pub->IsArticle())
331 string language =
journal.GetImp().GetLanguage();
333 char* lang = language.data();
334 for (p = lang; *p !=
'\0'; p++)
335 if (*p >=
'A' && *p <=
'Z')
337 journal.SetImp().SetLanguage(lang);
348 for (
const auto& pub : pub_descr.
GetPub().
Get()) {
349 if (! pub->IsArticle())
352 const CCit_art& art = pub->GetArticle();
360 status =
journal.GetImp().GetPubstatus();
367 if (comment && *comment != 0)
390 if (! taxon_srv.
Init())
430 static Uint1 fta_init_pubseq(
void)
441 char* env_val = getenv(
"ALTER_OPEN_SERVER");
442 string idserver = env_val ? env_val :
"";
444 env_val = getenv(
"ALTER_USER_NAME");
445 string idusername = env_val ? env_val :
"";
447 env_val = getenv(
"ALTER_USER_PASSWORD");
448 string idpassword = env_val ? env_val :
"";
450 s_pubseq.reset(
new CPubseqAccess(idserver.empty() ?
"PUBSEQ_OS_INTERNAL_GI64" : idserver.c_str(),
451 idusername.empty() ?
"anyone" : idusername.c_str(),
452 idpassword.empty() ?
"allowed" : idpassword.c_str()));
454 if (! s_pubseq || ! s_pubseq->CheckConnection())
460 void fta_entrez_fetch_enable(
ParserPtr pp)
470 "Failed to connect to PUBSEQ OS.");
476 "No PUBSEQ Bioseq fetch will be performed.");
481 void fta_entrez_fetch_disable(
ParserPtr pp)
508 findPubOptions.always_look,
509 findPubOptions.replace_cit,
510 findPubOptions.merge_ids,
534 for (
const auto& pub : pub_list) {
535 if (pub->IsArticle()) {
544 for (CPub_equiv::Tdata::iterator pub = pub_list.begin(); pub != pub_list.end();) {
545 if (! (*pub)->IsMuid() && ! (*pub)->IsPmid()) {
550 ErrPostEx(
SEV_ERROR,
ERR_REFERENCE_ArticleIdDiscarded,
"Article identifier was found for an unpublished, direct submission, book or unparsable article reference, and has been discarded : %s %d.", (*pub)->IsMuid() ?
"MUID" :
"PMID", (*pub)->GetMuid());
552 pub = pub_list.erase(pub);
565 list<CRef<CPub>> cit_arts;
566 for (
auto& pPub : pub_equiv.
Set()) {
567 if (! pPub->IsGen()) {
570 const CCit_gen& cit_gen = pPub->SetGen();
573 cit_arts.push_back(pPub);
578 if (cit_arts.empty()) {
584 auto& cit_gen = cit_arts.front();
586 list<CRef<CPub>> others;
589 for (
auto& pPub : pub_equiv.
Set()) {
592 if (pPub->IsMuid() && ! pMuid)
594 else if (pPub->IsPmid() && ! pPmid)
596 else if (! pPub->IsArticle())
597 others.push_back(pPub);
609 if (new_cit_art.
Empty()) {
614 for (
const auto& pId : new_cit_art->
GetIds().
Get()) {
615 if (pId->IsPubmed()) {
616 pmid = pId->GetPubmed();
617 }
else if (pId->IsMedline()) {
618 muid = pId->GetMedline();
626 }
else if (pmid != oldpmid) {
640 cit_arts.push_back(new_pub);
653 auto& pub_list = pub_equiv.
Set();
656 pub_list.push_back(pPmid);
659 pub_list.push_back(pMuid);
661 pub_list.splice(pub_list.end(), cit_arts);
686 if (pDesc->IsPub()) {
687 const auto& pubdesc = pDesc->GetPub();
688 return (pubdesc.IsSetComment() &&
689 fta_remark_is_er(pubdesc.GetComment().c_str()));
695 for (
auto& pDescr : descrs.Set()) {
696 if (! pDescr->IsPub())
700 fix_pub_equiv(pub_descr.
SetPub(), er);
701 if (m_pParser->qamode)
707 for (
auto& pAnnot : annots) {
708 if (! pAnnot->IsSetData() || ! pAnnot->GetData().IsFtable())
711 for (
auto& pFeat : pAnnot->SetData().SetFtable()) {
712 if (pFeat->IsSetData() && pFeat->GetData().IsPub())
714 fix_pub_equiv(pFeat->SetData().SetPub().SetPub(), er);
715 if (m_pParser->qamode)
717 fta_fix_affil(pFeat->SetData().SetPub().SetPub().Set(), m_pParser->source);
721 if (! pFeat->IsSetCit()) {
725 for (
auto& pPub : pFeat->SetCit().SetPub()) {
727 fix_pub_annot(*pPub, er);
738 for (
auto& pEntry : seq_entries) {
740 find_pub(bio_set->SetAnnot(), bio_set->SetDescr());
742 if (bio_set->GetDescr().Get().empty())
743 bio_set->ResetDescr();
745 if (bio_set->SetAnnot().empty())
746 bio_set->ResetAnnot();
750 find_pub(bioseq->SetAnnot(), bioseq->SetDescr());
752 if (bioseq->GetDescr().Get().empty())
753 bioseq->ResetDescr();
755 if (bioseq->SetAnnot().empty())
756 bioseq->ResetAnnot();
772 find_pub.
Apply(seq_entries);
783 for (
const string& org_syn : org_ref.
GetSyn()) {
785 for (
const string& tax_syn : tax_org_ref.
GetSyn()) {
786 if (org_syn == tax_syn) {
795 "New synonym: %s for [%s].",
796 org_syn.c_str(), org_ref.
GetTaxname().c_str());
802 #define TAX_SERVER_TIMEOUT 3
821 bool connection_failed =
false;
822 for (
size_t i = 0;
i < 3 && taxdata.
Empty(); ++
i) {
824 taxdata = taxon.
GetById(taxid);
826 connection_failed =
true;
832 if (taxdata.
Empty()) {
833 if (connection_failed) {
842 if (taxdata->GetIs_species_level() != 1 && ! isoh) {
847 ret->
Assign(taxdata->GetOrg());
861 if (taxid <= ZERO_TAX_ID && pp->taxserver == 0)
890 bool connection_failed =
true;
891 for (
size_t i = 0;
i < 3 && taxdata.
Empty(); ++
i) {
896 taxdata = taxon.
Lookup(org_ref);
897 connection_failed =
false;
905 if (taxdata.
Empty()) {
909 if (connection_failed) {
929 if (taxdata->GetIs_species_level() != 1 && (ibp->
is_pat ==
false ||
939 ret->
Assign(taxdata->GetOrg());
953 size_t last_char = taxname.size();
954 for (; last_char; --last_char) {
955 if (!
isspace(taxname[last_char]))
959 if (!
isspace(taxname[last_char]))
961 org_ref.
SetTaxname(taxname.substr(0, last_char));
973 organelle_str(organelle),
974 space(taxname.size() ?
" " :
"");
976 old_taxname = taxname;
977 taxname = organelle_str + space + taxname;
992 if (new_org_ref.
Empty() && attempt == 1) {
1001 org_ref.
Assign(*new_org_ref);
1021 for (;
i < 5;
i++) {
1064 CPubseqAccess::IdGiClass id_gi;
1065 CPubseqAccess::IdBlobClass id_blob;
1067 if (! s_pubseq->GetIdGiClass(gi, id_gi) || ! s_pubseq->GetIdBlobClass(id_gi, id_blob) ||
1068 id_blob.div[0] ==
'\0') {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void ShrinkSpaces(char *line)
@Affil.hpp User-defined methods of the data storage class.
@Auth_list.hpp User-defined methods of the data storage class.
void Apply(TEntryList &entries)
list< CRef< CSeq_entry > > TEntryList
unique_ptr< edit::CPubFix > m_pPubFix
void find_pub(list< CRef< CSeq_annot >> &annots, CSeq_descr &descrs)
void fix_pub_annot(CPub &pub, bool er)
void fix_pub_equiv(CPub_equiv &pub_equiv, bool er)
unique_ptr< CPubFixMessageListener > m_pPubFixListener
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
@Name_std.hpp User-defined methods of the data storage class.
@Pubdesc.hpp User-defined methods of the data storage class.
@Seq_descr.hpp User-defined methods of the data storage class.
CRef< CTaxon2_data > GetById(TTaxId tax_id)
TTaxId GetTaxIdByOrgRef(const COrg_ref &inp_orgRef)
CConstRef< CTaxon2_data > LookupMerge(COrg_ref &inp_orgRef, string *psLog=0, TOrgRefStatus *pStatusOut=0)
bool SetSynonyms(bool on_off)
CRef< CTaxon2_data > Lookup(const COrg_ref &inp_orgRef, string *psLog=0)
Template class for iteration on objects of class C.
static const struct name_t names[]
#define ERR_REFERENCE_UnusualPubStatus
#define ERR_SERVER_NotUsed
#define ERR_SERVER_NoTaxLookup
#define ERR_SERVER_Failed
#define ERR_REFERENCE_ArticleIdDiscarded
#define ERR_REFERENCE_MuidPmidMissMatch
#define ERR_ORGANISM_TaxIdNotSpecLevel
#define ERR_ACCESSION_CannotGetDivForSecondary
#define ERR_SERVER_NoPubMedLookup
#define ERR_ORGANISM_TaxNameNotFound
#define ERR_REFERENCE_InvalidPmid
#define ERR_SERVER_TaxServerDown
#define ERR_ORGANISM_TaxIdNotUnique
#define ERR_SERVER_TaxNameWasFound
#define ERR_REFERENCE_CitArtLacksPmid
#define ERR_ORGANISM_NewSynonym
#define ERR_REFERENCE_DifferentPmids
list< CRef< objects::CSeq_entry > > TEntryList
std::list< CRef< objects::CPub > > TPubList
char * StringSave(const char *s)
bool StringEquNI(const char *s1, const char *s2, size_t n)
bool StringEquN(const char *s1, const char *s2, size_t n)
void InitPubmedClient(bool normalize)
edit::IPubmedUpdater * GetPubmedClient()
CRef< CCit_art > FetchPubPmId(TEntrezId pmid)
static CRef< COrg_ref > fta_get_orgref_byid(ParserPtr pp, bool *drop, TTaxId taxid, bool isoh)
Int4 fta_is_con_div(ParserPtr pp, const CSeq_id &id, const Char *acc)
#define TAX_SERVER_TIMEOUT
void fta_find_pub_explore(ParserPtr pp, TEntryList &seq_entries)
static void fta_check_pub_ids(TPubList &pub_list)
static TGi fta_get_gi_for_seq_id(const CSeq_id &id)
void fta_fini_servers(ParserPtr pp)
CRef< COrg_ref > fta_fix_orgref_byid(ParserPtr pp, TTaxId taxid, bool *drop, bool isoh)
void fta_init_servers(ParserPtr pp)
static char * fta_strip_pub_comment(char *comment, const KwordBlk *kbp)
static void fta_fix_affil(TPubList &pub_list, Parser::ESource source)
static void fta_fix_last_initials(CName_std &namestd, bool initials)
static Uint1 fta_init_tax_server(void)
void fta_init_gbdataloader()
static const STimeout s_timeout
void fta_fill_find_pub_option(ParserPtr pp, bool htag, bool rtag)
static CRef< COrg_ref > fta_replace_org(ParserPtr pp, bool *drop, COrg_ref &org_ref, const Char *pn, int merge, Int4 attempt)
static void fta_fix_imprint_language(TPubList &pub_list)
static void fix_synonyms(CTaxon1 &taxon, COrg_ref &org_ref)
static const KwordBlk PubStatus[]
static void fta_strip_er_remarks(CPubdesc &pub_descr)
static Uint1 fta_init_med_server(bool normalize)
void fta_fix_orgref(ParserPtr pp, COrg_ref &org_ref, bool *drop, char *organelle)
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
#define TAX_ID_TO(T, tax_id)
SStrictId_Tax::TId TTaxId
Taxon id type.
void DBAPI_RegisterDriver_FTDS(void)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
TGi GetGiForId(const objects::CSeq_id &id, CScope &scope, EGetIdType flags=0)
Given a Seq-id retrieve the corresponding GI.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
bool CanGetAffil(void) const
Check if it is safe to call GetAffil method.
bool IsSetAffil(void) const
author affiliation Check if a value has been assigned to Affil data member.
bool IsSetAuthors(void) const
authors (ANSI requires) Check if a value has been assigned to Authors data member.
const TJournal & GetJournal(void) const
Get the variant data.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
const TFrom & GetFrom(void) const
Get the From member data.
const TCit & GetCit(void) const
Get the Cit member data.
const TAffil & GetAffil(void) const
Get the Affil member data.
void SetFrom(TFrom &value)
Assign a value to From data member.
void SetAffil(TAffil &value)
Assign a value to Affil data member.
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
bool IsSetAuthors(void) const
not necessarily authors of the paper Check if a value has been assigned to Authors data member.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
bool IsSetCit(void) const
anything, not parsable Check if a value has been assigned to Cit data member.
const Tdata & Get(void) const
Get the member data.
TStr & SetStr(void)
Select the variant.
bool CanGetAuthors(void) const
Check if it is safe to call GetAuthors method.
void SetCit(TCit &value)
Assign a value to Cit data member.
bool IsSetNames(void) const
Check if a value has been assigned to Names data member.
bool CanGetCit(void) const
Check if it is safe to call GetCit method.
bool IsSetAuthors(void) const
author/inventor Check if a value has been assigned to Authors data member.
E_Choice Which(void) const
Which variant is currently selected.
void SetNames(TNames &value)
Assign a value to Names data member.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
bool CanGetNames(void) const
Check if it is safe to call GetNames method.
bool CanGetAuthors(void) const
Check if it is safe to call GetAuthors method.
bool IsSetIds(void) const
lots of ids Check if a value has been assigned to Ids data member.
bool IsSetAuthors(void) const
authors Check if a value has been assigned to Authors data member.
bool IsJournal(void) const
Check if variant Journal is selected.
const TNames & GetNames(void) const
Get the Names member data.
bool IsSetCit(void) const
same fields as a book Check if a value has been assigned to Cit data member.
const TIds & GetIds(void) const
Get the Ids member data.
bool CanGetAuthors(void) const
Check if it is safe to call GetAuthors method.
bool CanGetAuthors(void) const
Check if it is safe to call GetAuthors method.
E_Choice Which(void) const
Which variant is currently selected.
@ ePubStatus_ppublish
published in print by publisher
@ ePubStatus_aheadofprint
epublish, but will be followed by print
@ ePubStatus_epublish
published electronically by publisher
void SetInitials(const TInitials &value)
Assign a value to Initials data member.
bool IsSetSuffix(void) const
Jr, Sr, III Check if a value has been assigned to Suffix data member.
void SetLast(const TLast &value)
Assign a value to Last data member.
bool IsSetInitials(void) const
first + middle initials Check if a value has been assigned to Initials data member.
bool IsSetLast(void) const
Check if a value has been assigned to Last data member.
void SetSuffix(const TSuffix &value)
Assign a value to Suffix data member.
bool CanGetSyn(void) const
Check if it is safe to call GetSyn method.
void ResetSyn(void)
Reset Syn data member.
TSyn & SetSyn(void)
Assign a value to Syn data member.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
const TSyn & GetSyn(void) const
Get the Syn member data.
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
bool IsSetSyn(void) const
synonyms for taxname or common Check if a value has been assigned to Syn data member.
TPmid & SetPmid(void)
Select the variant.
TMuid & SetMuid(void)
Select the variant.
const TPmid & GetPmid(void) const
Get the variant data.
Tdata & Set(void)
Assign a value to data member.
const Tdata & Get(void) const
Get the member data.
TEquiv & SetEquiv(void)
Select the variant.
bool IsEquiv(void) const
Check if variant Equiv is selected.
TMuid GetMuid(void) const
Get the variant data.
TArticle & SetArticle(void)
Select the variant.
void SetAccession(const TAccession &value)
Assign a value to Accession data member.
TGenbank & SetGenbank(void)
Select the variant.
bool IsSetComment(void) const
any comment on this pub in context Check if a value has been assigned to Comment data member.
void ResetComment(void)
Reset Comment data member.
void SetPub(TPub &value)
Assign a value to Pub data member.
const TComment & GetComment(void) const
Get the Comment member data.
const Tdata & Get(void) const
Get the member data.
void SetComment(const TComment &value)
Assign a value to Comment data member.
const TPub & GetPub(void) const
Get the Pub member data.
const CharType(& source)[N]
int strcmp(const char *str1, const char *str2)
IMessage/IMessageListener interfaces and basic implementations.
void SleepSec(unsigned long sec, EInterruptOnSignal onsignal=eRestartOnSignal)
Sleep.
User-defined methods of the data storage class.
CRef< CPub > journal(ParserPtr pp, char *bptr, char *eptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, bool has_muid, CRef< CCit_art > &cit_art, Int4 er)
static const char * str(char *buf, int n)
vector< IndexblkPtr > entrylist
void fta_StringCpy(char *dst, const char *src)
Char * StringIStr(const Char *where, const Char *what)
static wxAcceleratorEntry entries[3]