84 #define THIS_FILE "ftanet.cpp"
86 #define HEALTHY_ACC "U12345"
97 {
"Publication Status: Available-Online prior to print", 51 },
98 {
"Publication Status : Available-Online prior to print", 52 },
99 {
"Publication_Status: Available-Online prior to print", 51 },
100 {
"Publication_Status : Available-Online prior to print", 52 },
101 {
"Publication-Status: Available-Online prior to print", 51 },
102 {
"Publication-Status : Available-Online prior to print", 52 },
103 {
"Publication Status: Online-Only", 31 },
104 {
"Publication Status : Online-Only", 32 },
105 {
"Publication_Status: Online-Only", 31 },
106 {
"Publication_Status : Online-Only", 32 },
107 {
"Publication-Status: Online-Only", 31 },
108 {
"Publication-Status : Online-Only", 32 },
109 {
"Publication Status: Available-Online", 36 },
110 {
"Publication Status : Available-Online", 37 },
111 {
"Publication_Status: Available-Online", 36 },
112 {
"Publication_Status : Available-Online", 37 },
113 {
"Publication-Status: Available-Online", 36 },
114 {
"Publication-Status : Available-Online", 37 },
124 for (; kbp->
str; kbp++) {
129 size_t i = p - comment.c_str();
130 size_t j =
i + kbp->
len;
131 while (j < comment.size() && (comment[j] ==
' ' || comment[j] ==
';'))
139 p = comment.empty() ?
nullptr : comment.c_str();
140 if (p && (
StringEquNI(p,
"Publication Status", 18) ||
143 ErrPostEx(
SEV_WARNING,
ERR_REFERENCE_UnusualPubStatus,
"An unusual Publication Status comment exists for this record: \"%s\". If it is a new variant of the special comments used to indicate ahead-of-print or online-only articles, then the comment must be added to the appropriate table of the parser.", p);
163 size_t i = strlen(
str);
166 if ((*p ==
' ' || *p ==
'.') && !
strcmp(p + 1,
"III.")) {
180 if ((*p ==
' ' || *p ==
'.') &&
183 if (!
strcmp(p + 1,
"III"))
185 else if (!
strcmp(p + 1,
"2nd"))
187 else if (!
strcmp(p + 1,
"Jr."))
203 if ((*p ==
' ' || *p ==
'.') &&
206 if (!
strcmp(p + 1,
"Jr"))
208 else if (!
strcmp(p + 1,
"IV"))
227 for (
const auto& pub : pub_list) {
234 for (
auto& pub : pub_list) {
236 if (pub->IsArticle()) {
242 }
else if (pub->IsSub()) {
248 }
else if (pub->IsGen()) {
250 if (!
gen.IsSetAuthors() || !
gen.CanGetAuthors())
253 authors = &
gen.SetAuthors();
254 }
else if (pub->IsBook()) {
260 }
else if (pub->IsMan()) {
270 }
else if (pub->IsPatent()) {
289 for (
auto& it :
names) {
290 if (it->IsSetAffil() && it->CanGetAffil() &&
292 CAffil& affil = it->SetAffil();
295 if (it->IsSetName() && it->CanGetName() &&
296 it->GetName().IsName()) {
297 CName_std& namestd = it->SetName().SetName();
312 for (
auto& pub : pub_list) {
313 if (! pub->IsArticle())
323 string language =
journal.GetImp().GetLanguage();
325 char* lang = language.data();
326 for (p = lang; *p !=
'\0'; p++)
327 if (*p >=
'A' && *p <=
'Z')
329 journal.SetImp().SetLanguage(lang);
340 for (
const auto& pub : pub_descr.
GetPub().
Get()) {
341 if (! pub->IsArticle())
344 const CCit_art& art = pub->GetArticle();
352 status =
journal.GetImp().GetPubstatus();
359 if (! comment.empty())
380 if (! taxon_srv.
Init())
420 static Uint1 fta_init_pubseq(
void)
431 char* env_val = getenv(
"ALTER_OPEN_SERVER");
432 string idserver = env_val ? env_val :
"";
434 env_val = getenv(
"ALTER_USER_NAME");
435 string idusername = env_val ? env_val :
"";
437 env_val = getenv(
"ALTER_USER_PASSWORD");
438 string idpassword = env_val ? env_val :
"";
440 s_pubseq.reset(
new CPubseqAccess(idserver.empty() ?
"PUBSEQ_OS_INTERNAL_GI64" : idserver.c_str(),
441 idusername.empty() ?
"anyone" : idusername.c_str(),
442 idpassword.empty() ?
"allowed" : idpassword.c_str()));
444 if (! s_pubseq || ! s_pubseq->CheckConnection())
450 void fta_entrez_fetch_enable(
ParserPtr pp)
460 "Failed to connect to PUBSEQ OS.");
466 "No PUBSEQ Bioseq fetch will be performed.");
471 void fta_entrez_fetch_disable(
ParserPtr pp)
498 findPubOptions.always_look,
499 findPubOptions.replace_cit,
500 findPubOptions.merge_ids,
524 for (
const auto& pub : pub_list) {
525 if (pub->IsArticle()) {
534 for (CPub_equiv::Tdata::iterator pub = pub_list.begin(); pub != pub_list.end();) {
535 if (! (*pub)->IsMuid() && ! (*pub)->IsPmid()) {
540 ErrPostEx(
SEV_ERROR,
ERR_REFERENCE_ArticleIdDiscarded,
"Article identifier was found for an unpublished, direct submission, book or unparsable article reference, and has been discarded : %s %d.", (*pub)->IsMuid() ?
"MUID" :
"PMID", (*pub)->GetMuid());
542 pub = pub_list.erase(pub);
555 list<CRef<CPub>> cit_arts;
556 for (
auto& pPub : pub_equiv.
Set()) {
557 if (! pPub->IsGen()) {
560 const CCit_gen& cit_gen = pPub->SetGen();
563 cit_arts.push_back(pPub);
568 if (cit_arts.empty()) {
574 auto& cit_gen = cit_arts.front();
576 list<CRef<CPub>> others;
579 for (
auto& pPub : pub_equiv.
Set()) {
582 if (pPub->IsMuid() && ! pMuid)
584 else if (pPub->IsPmid() && ! pPmid)
586 else if (! pPub->IsArticle())
587 others.push_back(pPub);
599 if (new_cit_art.
Empty()) {
604 for (
const auto& pId : new_cit_art->
GetIds().
Get()) {
605 if (pId->IsPubmed()) {
606 pmid = pId->GetPubmed();
607 }
else if (pId->IsMedline()) {
608 muid = pId->GetMedline();
616 }
else if (pmid != oldpmid) {
630 cit_arts.push_back(new_pub);
643 auto& pub_list = pub_equiv.
Set();
646 pub_list.push_back(pPmid);
649 pub_list.push_back(pMuid);
651 pub_list.splice(pub_list.end(), cit_arts);
676 if (pDesc->IsPub()) {
677 const auto& pubdesc = pDesc->GetPub();
678 return (pubdesc.IsSetComment() &&
679 fta_remark_is_er(pubdesc.GetComment()));
685 for (
auto& pDescr : descrs.Set()) {
686 if (! pDescr->IsPub())
690 fix_pub_equiv(pub_descr.
SetPub(), er);
691 if (m_pParser->qamode)
697 for (
auto& pAnnot : annots) {
698 if (! pAnnot->IsSetData() || ! pAnnot->GetData().IsFtable())
701 for (
auto& pFeat : pAnnot->SetData().SetFtable()) {
702 if (pFeat->IsSetData() && pFeat->GetData().IsPub())
705 fix_pub_equiv(pub_descr.
SetPub(), er);
706 if (m_pParser->qamode)
712 if (! pFeat->IsSetCit()) {
716 for (
auto& pPub : pFeat->SetCit().SetPub()) {
718 fix_pub_annot(*pPub, er);
729 for (
auto& pEntry : seq_entries) {
731 find_pub(bio_set->SetAnnot(), bio_set->SetDescr());
733 if (bio_set->GetDescr().Get().empty())
734 bio_set->ResetDescr();
736 if (bio_set->SetAnnot().empty())
737 bio_set->ResetAnnot();
741 find_pub(bioseq->SetAnnot(), bioseq->SetDescr());
743 if (bioseq->GetDescr().Get().empty())
744 bioseq->ResetDescr();
746 if (bioseq->SetAnnot().empty())
747 bioseq->ResetAnnot();
763 find_pub.
Apply(seq_entries);
774 for (
const string& org_syn : org_ref.
GetSyn()) {
776 for (
const string& tax_syn : tax_org_ref.
GetSyn()) {
777 if (org_syn == tax_syn) {
786 "New synonym: %s for [%s].",
787 org_syn.c_str(), org_ref.
GetTaxname().c_str());
793 #define TAX_SERVER_TIMEOUT 3
812 bool connection_failed =
false;
813 for (
size_t i = 0;
i < 3 && taxdata.
Empty(); ++
i) {
815 taxdata = taxon.
GetById(taxid);
817 connection_failed =
true;
823 if (taxdata.
Empty()) {
824 if (connection_failed) {
833 if (taxdata->GetIs_species_level() != 1 && ! isoh) {
838 ret->
Assign(taxdata->GetOrg());
852 if (taxid <= ZERO_TAX_ID && pp->taxserver == 0)
881 bool connection_failed =
true;
882 for (
size_t i = 0;
i < 3 && taxdata.
Empty(); ++
i) {
887 taxdata = taxon.
Lookup(org_ref);
888 connection_failed =
false;
896 if (taxdata.
Empty()) {
900 if (connection_failed) {
920 if (taxdata->GetIs_species_level() != 1 && (ibp->
is_pat ==
false ||
930 ret->
Assign(taxdata->GetOrg());
944 size_t last_char = taxname.size();
945 for (; last_char; --last_char) {
946 if (!
isspace(taxname[last_char]))
950 if (!
isspace(taxname[last_char]))
952 org_ref.
SetTaxname(taxname.substr(0, last_char));
964 organelle_str(organelle),
965 space(taxname.size() ?
" " :
"");
967 old_taxname = taxname;
968 taxname = organelle_str + space + taxname;
983 if (new_org_ref.
Empty() && attempt == 1) {
992 org_ref.
Assign(*new_org_ref);
1012 for (;
i < 5;
i++) {
1055 CPubseqAccess::IdGiClass id_gi;
1056 CPubseqAccess::IdBlobClass id_blob;
1058 if (! s_pubseq->GetIdGiClass(gi, id_gi) || ! s_pubseq->GetIdBlobClass(id_gi, id_blob) ||
1059 id_blob.div[0] ==
'\0') {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void ShrinkSpaces(char *line)
@Affil.hpp User-defined methods of the data storage class.
@Auth_list.hpp User-defined methods of the data storage class.
void Apply(TEntryList &entries)
list< CRef< CSeq_entry > > TEntryList
unique_ptr< edit::CPubFix > m_pPubFix
void find_pub(list< CRef< CSeq_annot >> &annots, CSeq_descr &descrs)
void fix_pub_annot(CPub &pub, bool er)
void fix_pub_equiv(CPub_equiv &pub_equiv, bool er)
unique_ptr< CPubFixMessageListener > m_pPubFixListener
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
@Name_std.hpp User-defined methods of the data storage class.
@Pubdesc.hpp User-defined methods of the data storage class.
@Seq_descr.hpp User-defined methods of the data storage class.
CRef< CTaxon2_data > GetById(TTaxId tax_id)
TTaxId GetTaxIdByOrgRef(const COrg_ref &inp_orgRef)
CConstRef< CTaxon2_data > LookupMerge(COrg_ref &inp_orgRef, string *psLog=0, TOrgRefStatus *pStatusOut=0)
bool SetSynonyms(bool on_off)
CRef< CTaxon2_data > Lookup(const COrg_ref &inp_orgRef, string *psLog=0)
Template class for iteration on objects of class C.
#define ERR_REFERENCE_UnusualPubStatus
#define ERR_SERVER_NotUsed
#define ERR_SERVER_NoTaxLookup
#define ERR_SERVER_Failed
#define ERR_REFERENCE_ArticleIdDiscarded
#define ERR_REFERENCE_MuidPmidMissMatch
#define ERR_ORGANISM_TaxIdNotSpecLevel
#define ERR_ACCESSION_CannotGetDivForSecondary
#define ERR_SERVER_NoPubMedLookup
#define ERR_ORGANISM_TaxNameNotFound
#define ERR_REFERENCE_InvalidPmid
#define ERR_SERVER_TaxServerDown
#define ERR_ORGANISM_TaxIdNotUnique
#define ERR_SERVER_TaxNameWasFound
#define ERR_REFERENCE_CitArtLacksPmid
#define ERR_ORGANISM_NewSynonym
#define ERR_REFERENCE_DifferentPmids
list< CRef< objects::CSeq_entry > > TEntryList
std::list< CRef< objects::CPub > > TPubList
bool StringEquNI(const char *s1, const char *s2, size_t n)
bool StringEquN(const char *s1, const char *s2, size_t n)
edit::CEUtilsUpdater * GetPubmedClient()
void InitPubmedClient(bool normalize)
CRef< CCit_art > FetchPubPmId(TEntrezId pmid)
static CRef< COrg_ref > fta_get_orgref_byid(ParserPtr pp, bool *drop, TTaxId taxid, bool isoh)
Int4 fta_is_con_div(ParserPtr pp, const CSeq_id &id, const Char *acc)
#define TAX_SERVER_TIMEOUT
void fta_find_pub_explore(ParserPtr pp, TEntryList &seq_entries)
static void fta_check_pub_ids(TPubList &pub_list)
static TGi fta_get_gi_for_seq_id(const CSeq_id &id)
void fta_fini_servers(ParserPtr pp)
CRef< COrg_ref > fta_fix_orgref_byid(ParserPtr pp, TTaxId taxid, bool *drop, bool isoh)
void fta_init_servers(ParserPtr pp)
static void fta_fix_affil(TPubList &pub_list, Parser::ESource source)
static void fta_fix_last_initials(CName_std &namestd, bool initials)
static Uint1 fta_init_tax_server(void)
void fta_init_gbdataloader()
static const STimeout s_timeout
void fta_fill_find_pub_option(ParserPtr pp, bool htag, bool rtag)
static CRef< COrg_ref > fta_replace_org(ParserPtr pp, bool *drop, COrg_ref &org_ref, const Char *pn, int merge, Int4 attempt)
static void fta_fix_imprint_language(TPubList &pub_list)
static void fix_synonyms(CTaxon1 &taxon, COrg_ref &org_ref)
static const KwordBlk PubStatus[]
void fta_strip_pub_comment(string &comment, const KwordBlk *kbp)
static void fta_strip_er_remarks(CPubdesc &pub_descr)
static Uint1 fta_init_med_server(bool normalize)
void fta_fix_orgref(ParserPtr pp, COrg_ref &org_ref, bool *drop, char *organelle)
static const struct name_t names[]
static const char * str(char *buf, int n)
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
#define TAX_ID_TO(T, tax_id)
SStrictId_Tax::TId TTaxId
Taxon id type.
void DBAPI_RegisterDriver_FTDS(void)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
TGi GetGiForId(const objects::CSeq_id &id, CScope &scope, EGetIdType flags=0)
Given a Seq-id retrieve the corresponding GI.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
bool CanGetAffil(void) const
Check if it is safe to call GetAffil method.
bool IsSetAffil(void) const
author affiliation Check if a value has been assigned to Affil data member.
bool IsSetAuthors(void) const
authors (ANSI requires) Check if a value has been assigned to Authors data member.
const TJournal & GetJournal(void) const
Get the variant data.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
const TFrom & GetFrom(void) const
Get the From member data.
const TCit & GetCit(void) const
Get the Cit member data.
const TAffil & GetAffil(void) const
Get the Affil member data.
void SetFrom(TFrom &value)
Assign a value to From data member.
void SetAffil(TAffil &value)
Assign a value to Affil data member.
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
bool IsSetAuthors(void) const
not necessarily authors of the paper Check if a value has been assigned to Authors data member.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
bool IsSetCit(void) const
anything, not parsable Check if a value has been assigned to Cit data member.
list< CRef< CAuthor > > TStd
const Tdata & Get(void) const
Get the member data.
TStr & SetStr(void)
Select the variant.
bool CanGetAuthors(void) const
Check if it is safe to call GetAuthors method.
void SetCit(TCit &value)
Assign a value to Cit data member.
bool IsSetNames(void) const
Check if a value has been assigned to Names data member.
bool CanGetCit(void) const
Check if it is safe to call GetCit method.
bool IsSetAuthors(void) const
author/inventor Check if a value has been assigned to Authors data member.
E_Choice Which(void) const
Which variant is currently selected.
void SetNames(TNames &value)
Assign a value to Names data member.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
bool CanGetNames(void) const
Check if it is safe to call GetNames method.
bool CanGetAuthors(void) const
Check if it is safe to call GetAuthors method.
bool IsSetIds(void) const
lots of ids Check if a value has been assigned to Ids data member.
bool IsSetAuthors(void) const
authors Check if a value has been assigned to Authors data member.
bool IsJournal(void) const
Check if variant Journal is selected.
const TNames & GetNames(void) const
Get the Names member data.
bool IsSetCit(void) const
same fields as a book Check if a value has been assigned to Cit data member.
const TIds & GetIds(void) const
Get the Ids member data.
bool CanGetAuthors(void) const
Check if it is safe to call GetAuthors method.
bool CanGetAuthors(void) const
Check if it is safe to call GetAuthors method.
E_Choice Which(void) const
Which variant is currently selected.
@ ePubStatus_ppublish
published in print by publisher
@ ePubStatus_aheadofprint
epublish, but will be followed by print
@ ePubStatus_epublish
published electronically by publisher
void SetInitials(const TInitials &value)
Assign a value to Initials data member.
bool IsSetSuffix(void) const
Jr, Sr, III Check if a value has been assigned to Suffix data member.
void SetLast(const TLast &value)
Assign a value to Last data member.
bool IsSetInitials(void) const
first + middle initials Check if a value has been assigned to Initials data member.
bool IsSetLast(void) const
Check if a value has been assigned to Last data member.
void SetSuffix(const TSuffix &value)
Assign a value to Suffix data member.
bool CanGetSyn(void) const
Check if it is safe to call GetSyn method.
void ResetSyn(void)
Reset Syn data member.
TSyn & SetSyn(void)
Assign a value to Syn data member.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
const TSyn & GetSyn(void) const
Get the Syn member data.
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
bool IsSetSyn(void) const
synonyms for taxname or common Check if a value has been assigned to Syn data member.
TPmid & SetPmid(void)
Select the variant.
TMuid & SetMuid(void)
Select the variant.
const TPmid & GetPmid(void) const
Get the variant data.
Tdata & Set(void)
Assign a value to data member.
const Tdata & Get(void) const
Get the member data.
TEquiv & SetEquiv(void)
Select the variant.
bool IsEquiv(void) const
Check if variant Equiv is selected.
TMuid GetMuid(void) const
Get the variant data.
TArticle & SetArticle(void)
Select the variant.
void SetAccession(const TAccession &value)
Assign a value to Accession data member.
TGenbank & SetGenbank(void)
Select the variant.
bool IsSetComment(void) const
any comment on this pub in context Check if a value has been assigned to Comment data member.
void ResetComment(void)
Reset Comment data member.
void SetPub(TPub &value)
Assign a value to Pub data member.
const TComment & GetComment(void) const
Get the Comment member data.
const Tdata & Get(void) const
Get the member data.
void SetComment(const TComment &value)
Assign a value to Comment data member.
const TPub & GetPub(void) const
Get the Pub member data.
const CharType(& source)[N]
int strcmp(const char *str1, const char *str2)
IMessage/IMessageListener interfaces and basic implementations.
void SleepSec(unsigned long sec, EInterruptOnSignal onsignal=eRestartOnSignal)
Sleep.
User-defined methods of the data storage class.
CRef< CPub > journal(ParserPtr pp, char *bptr, char *eptr, CRef< CAuth_list > &auth_list, CRef< CTitle::C_E > &title, bool has_muid, CRef< CCit_art > &cit_art, Int4 er)
vector< IndexblkPtr > entrylist
Char * StringIStr(const Char *where, const Char *what)
static wxAcceleratorEntry entries[3]