NCBI C++ ToolKit
Dbtag.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: Dbtag.cpp 103138 2024-09-12 17:40:17Z kans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: .......
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using specifications from the ASN data definition file
34  * 'general.asn'.
35  *
36  * ---------------------------------------------------------------------------
37  */
38 
39 // standard includes
40 
41 // generated includes
42 #include <ncbi_pch.hpp>
45 #include <corelib/ncbistd.hpp>
46 #include <util/compile_time.hpp>
47 
48 // generated classes
49 
51 
52 BEGIN_objects_SCOPE // namespace ncbi::objects::
53 
54 // All these maps are sorted at compile time case insensitive
55 // No need to presort them
56 
58 {
61  std::string_view m_alias;
62 
63  constexpr TApprovedDbTags() = default;
64  constexpr TApprovedDbTags(const CDbtag::TDbtagGroup& _group, CDbtag::EDbtagType _tag) :
65  m_groups{_group}, m_tag{_tag} {}
66  constexpr TApprovedDbTags(const CDbtag::TDbtagGroup& _group, CDbtag::EDbtagType _tag, string_view _alias) :
67  m_groups{_group}, m_tag{_tag}, m_alias{_alias} {}
68 };
69 
71 {
72 #include "Dbtag.inc"
73 });
74 
75 MAKE_CONST_SET(sc_SkippableDbXrefs, ct::tagStrNocase,
76 {
77  "BankIt",
78  "NCBIFILE",
79  "TMSMART"
80 })
81 
82 struct STaxidTaxname {
83  const char* m_genus;
84  const char* m_species;
85  const char* m_subspecies;
86 };
87 
88 MAKE_CONST_MAP(sc_TaxIdTaxnameMap, TTaxId, STaxidTaxname,
89 {
90  { 7955, { "Danio", "rerio", "" } },
91  { 8022, { "Oncorhynchus", "mykiss", "" } },
92  { 9606, { "Homo", "sapiens", "" } },
93  { 9615, { "Canis", "lupus", "familiaris" } },
94  { 9838, { "Camelus", "dromedarius", "" } },
95  { 9913, { "Bos", "taurus", "" } },
96  { 9986, { "Oryctolagus", "cuniculus", "" } },
97  { 10090, { "Mus", "musculus", "" } },
98  { 10093, { "Mus", "pahari", "" } },
99  { 10094, { "Mus", "saxicola", "" } },
100  { 10096, { "Mus", "spretus", "" } },
101  { 10098, { "Mus", "cookii", "" } },
102  { 10105, { "Mus", "minutoides", "" } },
103  { 10116, { "Rattus", "norvegicus", "" } },
104  { 10117, { "Rattus", "rattus", "" } }
105 })
106 
107 namespace {
108 
109 CDbtag::TDbtagGroup xFindStrict(string_view _key)
110 {
111  const auto& _cont = sc_ApprovedTags;
112  auto it = _cont.find(_key);
113  if (it == _cont.end())
114  return 0;
115 
116  if (_key != it->first && _key != it->second.m_alias)
117  return 0;
118 
119  return it->second.m_groups;
120 }
121 
122 bool xGetStrict(string_view _key, CDbtag::EDbtagType& _retval)
123 {
124  const auto& _cont = sc_ApprovedTags;
125  auto it = _cont.find(_key);
126  if (it == _cont.end())
127  return false;
128 
129  if (_key != it->first && _key != it->second.m_alias)
130  return false;
131 
132  _retval = it->second.m_tag;
133  return true;
134 }
135 
136 CDbtag::TDbtagGroup xFindCorrectCaps(const string& v, string_view& correct_caps)
137 {
138  const auto& _cont = sc_ApprovedTags;
139 
140  if (auto it = _cont.find(v); it != _cont.end()) {
141  if (it->second.m_alias == string_view(v))
142  correct_caps = it->second.m_alias;
143  else
144  correct_caps = it->first;
145  return it->second.m_groups;
146  }
147 
148  return CDbtag::fNone;
149 }
150 
151 }
152 
153 // destructor
155 {
156 }
157 
158 bool CDbtag::Match(const CDbtag& dbt2) const
159 {
160  if (! PNocase().Equals(GetDb(), dbt2.GetDb()))
161  return false;
162  return ((GetTag()).Match((dbt2.GetTag())));
163 }
164 
165 
167 {
168  if ( !SetTag().SetAsMatchingTo(dbt2.GetTag()) ) {
169  return false;
170  }
171  SetDb(dbt2.GetDb());
172  return true;
173 }
174 
175 
176 int CDbtag::Compare(const CDbtag& dbt2) const
177 {
178  int ret = PNocase().Compare(GetDb(), dbt2.GetDb());
179  if (ret == 0) {
180  ret = GetTag().Compare(dbt2.GetTag());
181  }
182  return ret;
183 }
184 
185 
186 // Appends a label to "label" based on content of CDbtag
187 void CDbtag::GetLabel(string* label) const
188 {
189  const CObject_id& id = GetTag();
190  switch (id.Which()) {
191  case CObject_id::e_Str:
192  {
193  const string& db = GetDb();
194  const string& str = id.GetStr();
195  if (str.size() > db.size() && str[db.size()] == ':'
197  *label += str; // already prefixed; no need to re-tag
198  } else {
199  *label += db + ": " + str;
200  }
201  break;
202  }
203  case CObject_id::e_Id:
204  *label += GetDb() + ": " + NStr::IntToString(id.GetId());
205  break;
206  default:
207  *label += GetDb();
208  }
209 }
210 
211 // Test if CDbtag.db is in the approved databases list.
212 // NOTE: 'GenBank', 'EMBL', 'DDBJ' and 'REBASE' are approved only in
213 // the context of a RefSeq record.
214 // NOTE: 'GenBank' is approved in the context of a ProbeDb record.
215 bool CDbtag::IsApproved( EIsRefseq refseq, EIsSource is_source, EIsEstOrGss is_est_or_gss ) const
216 {
217  if ( !CanGetDb() ) {
218  return false;
219  }
220  const string& db = GetDb();
221 
222  CDbtag::TDbtagGroup group = xFindStrict(db);
223  if (group == 0)
224  return false;
225 
226 
227  if( (refseq == eIsRefseq_Yes) && (group & fRefSeq) ) {
228  return true;
229  }
230 
231  if( is_source == eIsSource_Yes ) {
232  bool found = (group & fSrc);
233  if ( ! found && (is_est_or_gss == eIsEstOrGss_Yes) ) {
234  // special case: for EST or GSS, source features are allowed non-src dbxrefs
235  found = ( (group & fGenBank) ||
236  (group & fRefSeq) );
237  }
238  return found;
239  } else {
240  return (group & fGenBank);
241  }
242 }
243 
244 
245 const char* CDbtag::IsApprovedNoCase(EIsRefseq refseq, EIsSource is_source ) const
246 {
247  if ( !CanGetDb() ) {
248  return NULL;
249  }
250  const string& db = GetDb();
251 
252  string_view caps;
253 
254  TDbtagGroup group = xFindCorrectCaps(db, caps);
255 
256  if ( (refseq == eIsRefseq_Yes) && (group & fRefSeq)) {
257  return caps.data();
258  }
259  if ( (is_source == eIsSource_Yes ) && (group & fSrc)) {
260  return caps.data();
261  }
262  if (!caps.empty())
263  return caps.data();
264 
265  return nullptr;
266 }
267 
268 
270 {
271  if ( !CanGetDb() ) {
272  return false;
273  }
274  const string& db = GetDb();
275 
276  auto allowed = xFindStrict(db);
277  return (allowed & group);
278 }
279 
280 
281 bool CDbtag::IsSkippable(void) const
282 {
283  return sc_SkippableDbXrefs.find(GetDb())
284  != sc_SkippableDbXrefs.end();
285 }
286 
287 
288 // Retrieve the enumerated type for the dbtag
290 {
291  if (m_Type == eDbtagType_bad) {
292  if ( !CanGetDb() ) {
293  return m_Type;
294  }
295 
296  const string& db = GetDb();
297 
298  if (xGetStrict(db, m_Type))
299  return m_Type;
300  }
301 
302  return m_Type;
303 }
304 
305 CDbtag::TDbtagGroup CDbtag::GetDBFlags (string& correct_caps) const
306 {
307  correct_caps.clear();
308  CDbtag::TDbtagGroup rsult = fNone;
309 
310  if ( !CanGetDb() ) {
311  return fNone;
312  }
313  const string& db = GetDb();
314 
315  string_view caps;
316 
317  auto groups = xFindCorrectCaps(db, caps);
318  if (groups) {
319  correct_caps = caps;
320  return groups;
321  }
322 
323  return rsult;
324 }
325 
326 
327 bool CDbtag::GetDBFlags (bool& is_refseq, bool& is_src, string& correct_caps) const
328 {
329  CDbtag::TDbtagGroup group = CDbtag::GetDBFlags(correct_caps);
330 
331  is_refseq = ((group & fRefSeq) != 0);
332  is_src = ((group & fSrc) != 0);
333 
334  return group != fNone;
335 }
336 
337 
338 // Force a refresh of the internal type
340 {
342 }
343 
344 
345 //=========================================================================//
346 // URLs //
347 //=========================================================================//
348 
349 // special case URLs
350 static constexpr string_view kFBan = "http://www.fruitfly.org/cgi-bin/annot/fban?"; // url not found "Internal Server Error" tested 7/13/2016
351 static constexpr string_view kHInvDbHIT = "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=30&KEN_STR="; // access forbidden 7/13/2016
352 static constexpr string_view kHInvDbHIX = "http://www.jbirc.aist.go.jp/hinv/hinvsys/servlet/ExecServlet?KEN_INDEX=0&KEN_TYPE=31&KEN_STR="; // "Internal Server Error" tested 7/13/2016
353 static constexpr string_view kDictyPrim = "http://dictybase.org/db/cgi-bin/gene_page.pl?primary_id="; // url not found tested 7/13/2016
354 static constexpr string_view kMiRBaseMat = "http://www.mirbase.org/cgi-bin/mature.pl?mature_acc="; // https not available tested 7/13/2016
355 static constexpr string_view kMaizeGDBInt = "https://www.maizegdb.org/cgi-bin/displaylocusrecord.cgi?id=";
356 static constexpr string_view kMaizeGDBStr = "https://www.maizegdb.org/cgi-bin/displaylocusrecord.cgi?term=";
357 static constexpr string_view kHomdTax = "http://www.homd.org/taxon="; // https not available tested 7/13/2016
358 static constexpr string_view kHomdSeq = "http://www.homd.org/seq="; // https not available tested 7/13/2016
359 
360 
361 // mapping of DB to its URL; sorting is not needed
362 
364 {
365  { CDbtag::eDbtagType_AFTOL, "https://wasabi.lutzonilab.net/pub/displayTaxonInfo?aftol_id=" },
366  { CDbtag::eDbtagType_APHIDBASE, "http://bipaa.genouest.org/apps/grs-2.3/grs?reportID=aphidbase_transcript_report&objectID=" }, // "Service Unavailable" tested 7/13/2016
367  { CDbtag::eDbtagType_ASAP, "https://asap.genetics.wisc.edu/asap/feature_info.php?FeatureID=" },
368  { CDbtag::eDbtagType_ATCC, "https://www.atcc.org/Products/All/" },
369  { CDbtag::eDbtagType_AceView_WormGenes, "https://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/av.cgi?db=worm&c=gene&q=" },
370  { CDbtag::eDbtagType_AntWeb, "https://www.antweb.org/specimen.do?name=" },
371  { CDbtag::eDbtagType_ApiDB, "http://www.apidb.org/apidb/showRecord.do?name=GeneRecordClasses.ApiDBGeneRecordClass&primary_key=" }, // https not available tested 7/13/2016
372  { CDbtag::eDbtagType_ApiDB_CryptoDB, "http://cryptodb.org/cryptodb/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=CryptoDB&source_id=" }, // https not available tested 7/13/2016
373  { CDbtag::eDbtagType_ApiDB_PlasmoDB, "http://plasmodb.org/plasmo/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=PlasmoDB&source_id=" }, // https not available tested 7/13/2016
374  { CDbtag::eDbtagType_ApiDB_ToxoDB, "http://toxodb.org/toxo/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=ToxoDB&source_id=" }, // https not available tested 7/13/2016
375  { CDbtag::eDbtagType_BB, "https://beetlebase.org/cgi-bin/cmap/feature_search?features=" },
376  { CDbtag::eDbtagType_BEETLEBASE, "https://www.beetlebase.org/cgi-bin/report.cgi?name=" },
377  { CDbtag::eDbtagType_BGD, "http://bovinegenome.org/genepages/btau40/genes/" }, // https not available tested 7/13/2016
378  { CDbtag::eDbtagType_BoLD, "http://www.boldsystems.org/connectivity/specimenlookup.php?processid=" }, // https not available tested 7/13/2016
379  { CDbtag::eDbtagType_CCDS, "https://www.ncbi.nlm.nih.gov/CCDS/CcdsBrowse.cgi?REQUEST=CCDS&DATA=" },
380  { CDbtag::eDbtagType_CDD, "https://www.ncbi.nlm.nih.gov/Structure/cdd/cddsrv.cgi?uid=" },
381  { CDbtag::eDbtagType_CGNC, "http://birdgenenames.org/cgnc/GeneReport?id=" }, // https not available tested 7/13/2016
382  { CDbtag::eDbtagType_CK, "http://flybane.berkeley.edu/cgi-bin/cDNA/CK_clone.pl?db=CK&dbid=" }, // url not found tested 7/13/2016
383  { CDbtag::eDbtagType_COG, "https://www.ncbi.nlm.nih.gov/research/cog/cog/" },
384  { CDbtag::eDbtagType_CollecTF, "https://collectf.umbc.edu/" },
385  { CDbtag::eDbtagType_ECOCYC, "http://biocyc.org/ECOLI/new-image?type=GENE&object=" }, // https does not result in security cert warning, but "page can't be displayed", tested 7/13/2016
386  { CDbtag::eDbtagType_FANTOM_DB, "https://fantom.gsc.riken.jp/db/annotate/main.cgi?masterid=" },
387  { CDbtag::eDbtagType_FBOL, "http://www.fungalbarcoding.org/BioloMICS.aspx?Table=Fungal%20barcodes&Fields=All&Rec=" }, // https not available tested 7/13/2016
388  { CDbtag::eDbtagType_FLYBASE, "http://flybase.org/reports/" }, // https not available, http site "experiencing problems" tested 7/13/2016
389  { CDbtag::eDbtagType_Fungorum, "http://www.indexfungorum.org/Names/NamesRecord.asp?RecordID=" }, // https not available tested 7/13/2016
390  { CDbtag::eDbtagType_GABI, "https://www.gabipd.org/database/cgi-bin/GreenCards.pl.cgi?Mode=ShowSequence&App=ncbi&SequenceId=" },
391  { CDbtag::eDbtagType_GEO, "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=" },
392  { CDbtag::eDbtagType_GO, "http://amigo.geneontology.org/amigo/term/GO:" },
393  { CDbtag::eDbtagType_GOA, "https://www.ebi.ac.uk/ego/GProtein?ac=" },
394  { CDbtag::eDbtagType_GRIN, "https://www.ars-grin.gov/cgi-bin/npgs/acc/display.pl?" },
395  { CDbtag::eDbtagType_GeneDB, "http://old.genedb.org/genedb/Search?organism=All%3A*&name=" }, // https not available tested 7/13/2016
396  { CDbtag::eDbtagType_GeneID, "https://www.ncbi.nlm.nih.gov/gene/" },
397  { CDbtag::eDbtagType_GrainGenes, "http://wheat.pw.usda.gov/cgi-bin/graingenes/report.cgi?class=marker&name=" }, // https not available tested 7/13/2016
398  { CDbtag::eDbtagType_Greengenes, "http://greengenes.lbl.gov/cgi-bin/show_one_record_v2.pl?prokMSA_id=" }, // https not available tested 7/13/2016
399  { CDbtag::eDbtagType_HGNC, "https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:" }, // https not available tested 7/13/2016
400  { CDbtag::eDbtagType_HMP, "https://www.hmpdacc.org/catalog/grid.php?dataset=genomic&hmp_id=" },
401  { CDbtag::eDbtagType_HOMD, "http://www.homd.org/" }, // https not available tested 7/13/2016
402  { CDbtag::eDbtagType_HPM, "http://www.humanproteomemap.org/protein.php?hpm_id=" }, // https not available tested 7/13/2016
403  { CDbtag::eDbtagType_HPRD, "http://www.hprd.org/protein/" }, // https not available, http returns "Service Temporarily Unavailable" tested 7/13/2016
404  { CDbtag::eDbtagType_HSSP, "http://mrs.cmbi.ru.nl/m6/search?db=all&q=" }, // not sure this points to a useful URL tested 7/13/2016
405  { CDbtag::eDbtagType_H_InvDB, "https://www.h-invitational.jp" },
406  { CDbtag::eDbtagType_IFO, "https://www.nbrc.nite.go.jp/NBRC2/NBRCCatalogueDetailServlet?ID=NBRC&CAT=" },
407  { CDbtag::eDbtagType_IMGT_GENEDB, "http://www.imgt.org/IMGT_GENE-DB/GENElect?species=Homo+sapiens&query=2+" }, // https not available, http "detected an unhandled exception" tested 7/13/2016
408  { CDbtag::eDbtagType_IMGT_HLA, "https://www.ebi.ac.uk/cgi-bin/ipd/imgt/hla/get_allele.cgi?" },
409  { CDbtag::eDbtagType_IMGT_LIGM, "http://www.imgt.org/cgi-bin/IMGTlect.jv?query=201+" }, // https not available, http "detected an unhandled exception" tested 7/13/2016
410  { CDbtag::eDbtagType_IRD, "https://www.fludb.org/brc/fluSegmentDetails.do?irdSubmissionId=" },
411  { CDbtag::eDbtagType_ISD, "http://www.flu.lanl.gov/search/view_record.html?accession=" }, // http "page can't be displayed" tested 7/13/2016
412  { CDbtag::eDbtagType_ISFinder, "http://www-is.biotoul.fr/scripts/is/is_spec.idc?name=" }, // url not found tested 7/13/2016
413  { CDbtag::eDbtagType_InterimID, "https://www.ncbi.nlm.nih.gov/gene/" },
414  { CDbtag::eDbtagType_Interpro, "https://www.ebi.ac.uk/interpro/entry/InterPro/" },
415  { CDbtag::eDbtagType_IntrepidBio, "http://server1.intrepidbio.com/FeatureBrowser/gene/browse/" }, // http request shows "Database is down for maint" tested 7/13/2016
416  { CDbtag::eDbtagType_JCM, "https://www.jcm.riken.go.jp/cgi-bin/jcm/jcm_number?JCM=" },
417  { CDbtag::eDbtagType_JGIDB, "http://genome.jgi-psf.org/cgi-bin/jgrs?id=" }, // https page "can't be displayed" tested 7/13/2016
418  { CDbtag::eDbtagType_LocusID, "https://www.ncbi.nlm.nih.gov/gene/" },
419  { CDbtag::eDbtagType_MGI, "http://www.informatics.jax.org/marker/MGI:" }, // https page "can't be displayed" tested 7/13/2016
420  { CDbtag::eDbtagType_MIM, "https://www.ncbi.nlm.nih.gov/omim/" },
421  { CDbtag::eDbtagType_MaizeGDB, "https://www.maizegdb.org/cgi-bin/displaylocusrecord.cgi?" },
422  { CDbtag::eDbtagType_MycoBank, "http://www.mycobank.org/MycoTaxo.aspx?Link=T&Rec=" }, // https not available tested 7/13/2016
423  { CDbtag::eDbtagType_NMPDR, "http://www.nmpdr.org/linkin.cgi?id=" }, // https not available, http "Internal Server Error" tested 7/13/2016
424  { CDbtag::eDbtagType_NRESTdb, "http://genome.ukm.my/nrestdb/db/single_view_est.php?id=" }, // http "page can't be displayed" tested 7/13/2016
425  { CDbtag::eDbtagType_NextDB, "http://nematode.lab.nig.ac.jp/cgi-bin/db/ShowGeneInfo.sh?celk=" }, // url not found tested 7/13/2016
426  { CDbtag::eDbtagType_OrthoMCL, "http://orthomcl.org/orthomcl/showRecord.do?name=GroupRecordClasses.GroupRecordClass&group_name=" }, // https not available
427  { CDbtag::eDbtagType_Osa1, "http://rice.plantbiology.msu.edu/cgi-bin/gbrowse/rice/?name=" }, // https "page can't be displayed" tested 7/13/2016
428  { CDbtag::eDbtagType_PBR, "https://www.poxvirus.org/query.asp?web_id=" },
429  { CDbtag::eDbtagType_PBmice, "http://www.idmshanghai.cn/PBmice/DetailedSearch.do?type=insert&id=" }, // https not available tested 7/13/2016
430  { CDbtag::eDbtagType_PDB, "http://www.rcsb.org/pdb/cgi/explore.cgi?pdbId=" }, // https "page can't be displayed" tested 7/13/2016
431  { CDbtag::eDbtagType_PFAM, "https://pfam.xfam.org/family/" },
432  { CDbtag::eDbtagType_PGN, "http://pgn.cornell.edu/cgi-bin/search/seq_search_result.pl?identifier=" }, // http page states info no longer avail at this website, includes links to look for a new location tested 7/13/2016
433  { CDbtag::eDbtagType_Phytozome, "https://phytozome.jgi.doe.gov/pz/portal.html#!results?search=0&crown=1&star=1&method=0&searchText=" },
434  { CDbtag::eDbtagType_PomBase, "http://www.pombase.org/spombe/result/" }, // https not available tested 7/13/2016
435  { CDbtag::eDbtagType_RAP_DB, "http://rapdb.dna.affrc.go.jp/cgi-bin/gbrowse_details/latest?name=" }, // https appears available, domain appears to exist but http "page not found" with note about release of a major update tested 7/13/2016
436  { CDbtag::eDbtagType_RATMAP, "https://ratmap.gen.gu.se/ShowSingleLocus.htm?accno=" },
437  { CDbtag::eDbtagType_RBGE_garden, "https://data.rbge.org.uk/living/" },
438  { CDbtag::eDbtagType_RBGE_herbarium, "https://data.rbge.org.uk/herb/" },
439  { CDbtag::eDbtagType_REBASE, "http://rebase.neb.com/rebase/enz/" }, // ID-4590 : https not available 02/14/2018
440  { CDbtag::eDbtagType_RFAM, "http://rfam.xfam.org/family/" }, // https not available tested 7/13/2016
441  { CDbtag::eDbtagType_RGD, "https://rgd.mcw.edu/rgdweb/search/search.html?term=" },
442  { CDbtag::eDbtagType_RiceGenes, "http://ars-genome.cornell.edu/cgi-bin/WebAce/webace?db=ricegenes&class=Marker&object=" }, // http "page can't be displayed" tested 7/13/2016
443  { CDbtag::eDbtagType_SGD, "https://www.yeastgenome.org/locus/" }, // url not found tested 7/13/2016
444  { CDbtag::eDbtagType_SGN, "http://www.sgn.cornell.edu/search/est.pl?request_type=7&request_id=" }, // https not available, http automatically redirects to https, then shows security cert issue, tested 7/13/2016
445  { CDbtag::eDbtagType_SK_FST, "http://aafc-aac.usask.ca/fst/" }, // https not available tested 7/13/2016
446  { CDbtag::eDbtagType_SRPDB, "http://rnp.uthscsa.edu/rnp/SRPDB/rna/sequences/fasta/" }, // https not available tested 7/13/2016
447  { CDbtag::eDbtagType_SubtiList, "http://genolist.pasteur.fr/SubtiList/genome.cgi?external_query+" }, // https not available tested 7/13/2016
448  { CDbtag::eDbtagType_TAIR, "https://www.arabidopsis.org/servlets/TairObject?type=locus&name=" },
449  { CDbtag::eDbtagType_TIGRFAM, "http://www.jcvi.org/cgi-bin/tigrfams/HmmReportPage.cgi?acc=" }, // https not available tested 7/13/2016
450  { CDbtag::eDbtagType_UNITE, "https://unite.ut.ee/bl_forw.php?nimi=" },
451  { CDbtag::eDbtagType_UniGene, "https://www.ncbi.nlm.nih.gov/unigene?term=" },
452  { CDbtag::eDbtagType_UniProt_SwissProt, "https://www.uniprot.org/uniprot/" },
453  { CDbtag::eDbtagType_UniProt_TrEMBL, "https://www.uniprot.org/uniprot/" },
454  { CDbtag::eDbtagType_UniSTS, "https://www.ncbi.nlm.nih.gov/probe?term=" },
455  { CDbtag::eDbtagType_VBASE2, "http://www.vbase2.org/vgene.php?id=" }, // https not available tested 7/13/2016
456  { CDbtag::eDbtagType_VBRC, "http://vbrc.org/query.asp?web_view=curation&web_id=" }, // https not available tested 7/13/2016
457  { CDbtag::eDbtagType_VectorBase, "https://vectorbase.org/gene/" },
458  { CDbtag::eDbtagType_Vega, "http://vega.archive.ensembl.org/id/" },
459  { CDbtag::eDbtagType_WorfDB, "http://worfdb.dfci.harvard.edu/search.pl?form=1&search=" },
460  { CDbtag::eDbtagType_WormBase, "https://www.wormbase.org/search/gene/" },
461  { CDbtag::eDbtagType_Xenbase, "https://www.xenbase.org/entry/gene/showgene.do?method=display&geneId=" },
462  { CDbtag::eDbtagType_ZFIN, "https://zfin.org/" },
463  { CDbtag::eDbtagType_axeldb, "http://www.dkfz-heidelberg.de/tbi/services/axeldb/clone/xenopus?name=" }, // https not available tested 7/13/2016
464  { CDbtag::eDbtagType_dbClone, "https://www.ncbi.nlm.nih.gov/sites/entrez?db=clone&cmd=Retrieve&list_uids=" },
465  { CDbtag::eDbtagType_dbCloneLib, "https://www.ncbi.nlm.nih.gov/sites/entrez?db=clonelib&cmd=Retrieve&list_uids=" },
466  { CDbtag::eDbtagType_dbEST, "https://www.ncbi.nlm.nih.gov/nucest/" },
467  { CDbtag::eDbtagType_dbProbe, "https://www.ncbi.nlm.nih.gov/sites/entrez?db=probe&cmd=Retrieve&list_uids=" },
468  { CDbtag::eDbtagType_dbSNP, "https://www.ncbi.nlm.nih.gov/snp/rs" },
469  { CDbtag::eDbtagType_dbSTS, "https://www.ncbi.nlm.nih.gov/nuccore/" },
470  { CDbtag::eDbtagType_dictyBase, "https://dictybase.org/db/cgi-bin/gene_page.pl?dictybaseid=" },
471  { CDbtag::eDbtagType_miRBase, "http://www.mirbase.org/cgi-bin/mirna_entry.pl?acc=" }, // https not available tested 7/13/2016
472  { CDbtag::eDbtagType_niaEST, "https://lgsun.grc.nia.nih.gov/cgi-bin/pro3?sname1=" }, // project appears to be abandoned, tested 7/16/2021
473  { CDbtag::eDbtagType_taxon, "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?" },
474  { CDbtag::eDbtagType_BEEBASE, "http://hymenopteragenome.org/cgi-bin/gb2/gbrowse/bee_genome45/?name=" }, // https not available tested 7/13/2016
475  { CDbtag::eDbtagType_NASONIABASE, "http://hymenopteragenome.org/cgi-bin/gbrowse/nasonia10_scaffold/?name=" }, // https not available tested 7/13/2016
476  { CDbtag::eDbtagType_BioProject, "https://www.ncbi.nlm.nih.gov/bioproject/" },
477  { CDbtag::eDbtagType_IKMC, "https://www.mousephenotype.org/data/alleles/project_id?ikmc_project_id=" },
478  { CDbtag::eDbtagType_ViPR, "https://www.viprbrc.org/brc/viprStrainDetails.do?viprSubmissionId=" },
479  { CDbtag::eDbtagType_SRA, "https://www.ncbi.nlm.nih.gov/sra/" },
480  { CDbtag::eDbtagType_RefSeq, "https://www.ncbi.nlm.nih.gov/nuccore/" },
481  { CDbtag::eDbtagType_EnsemblGenomes, "http://ensemblgenomes.org/id/" }, // https not available tested 7/13/2016
482  { CDbtag::eDbtagType_EnsemblGenomes_Gn, "http://ensemblgenomes.org/id/" }, // https not available tested 7/13/2016
483  { CDbtag::eDbtagType_EnsemblGenomes_Tr, "http://ensemblgenomes.org/id/" }, // https not available tested 7/13/2016
484  { CDbtag::eDbtagType_TubercuList, "http://tuberculist.epfl.ch/quicksearch.php?gene+name=" }, // https not available tested 7/13/2016
485  { CDbtag::eDbtagType_MedGen, "https://www.ncbi.nlm.nih.gov/medgen/" },
486  { CDbtag::eDbtagType_CGD, "http://www.candidagenome.org/cgi-bin/locus.pl?locus=" }, // https not available tested 7/13/2016
487  { CDbtag::eDbtagType_Assembly, "https://www.ncbi.nlm.nih.gov/assembly/" },
488  { CDbtag::eDbtagType_GenBank, "https://www.ncbi.nlm.nih.gov/nuccore/" },
489  { CDbtag::eDbtagType_BioSample, "https://www.ncbi.nlm.nih.gov/biosample/" },
490  { CDbtag::eDbtagType_ISHAM_ITS, "http://its.mycologylab.org/BioloMICS.aspx?Table=Sequences&ExactMatch=T&Name=MITS" }, // https not available tested 7/13/2016
491  { CDbtag::eDbtagType_I5KNAL, "https://i5k.nal.usda.gov/" },
492  { CDbtag::eDbtagType_VISTA, "https://enhancer.lbl.gov/cgi-bin/dbxref.pl?id=" }, // https not available tested 7/13/2016
493  { CDbtag::eDbtagType_BEI, "https://www.beiresources.org/Catalog/animalViruses/" },
494  { CDbtag::eDbtagType_Araport, "https://bar.utoronto.ca/thalemine/portal.do?externalids=" },
495  { CDbtag::eDbtagType_VGNC, "http://vertebrate.genenames.org/data/gene-symbol-report/#!/vgnc_id/VGNC:" }, // https not available tested 7/13/2016
496  { CDbtag::eDbtagType_RNAcentral, "http://rnacentral.org/rna/" },
497  { CDbtag::eDbtagType_PeptideAtlas, "https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/Search?action=GO&search_key=" },
498  { CDbtag::eDbtagType_EPDnew, "http://epd.vital-it.ch/cgi-bin/get_doc?format=genome&entry=" },
499  { CDbtag::eDbtagType_dbVar, "https://www.ncbi.nlm.nih.gov/dbvar/variants/" },
500  { CDbtag::eDbtagType_EnsemblRapid, "https://rapid.ensembl.org/id/" },
501  { CDbtag::eDbtagType_AllianceGenome, "https://www.alliancegenome.org/gene/" },
502  { CDbtag::eDbtagType_EchinoBase, "https://www.echinobase.org/entry/gene/showgene.do?method=displayGeneSummary&geneId=" },
503 
504  { CDbtag::eDbtagType_ENSEMBL, "https://www.ensembl.org/id/" }, // url seems incorrect, includes msg user has been redirected and "Error 404 Page not found" tested 7/13/2016
505  { CDbtag::eDbtagType_Ensembl, "https://www.ensembl.org/id/" }, // url seems incorrect, includes msg user has been redirected and "Error 404 Page not found" tested 7/13/2016
506  { CDbtag::eDbtagType_PseudoCAP, "http://www.pseudomonas.com/primarySequenceFeature/list?c1=name&e1=1&v1=" }, // url not found tested 7/13/2016
507  { CDbtag::eDbtagType_PseudoCap, "http://www.pseudomonas.com/primarySequenceFeature/list?c1=name&e1=1&v1=" }, // url not found tested 7/13/2016
508 
509  { CDbtag::eDbtagType_AmoebaDB, "https://amoebadb.org/amoeba/app/record/gene/" },
510  { CDbtag::eDbtagType_CryptoDB, "https://cryptodb.org/cryptodb/app/record/gene/" },
511  { CDbtag::eDbtagType_FungiDB, "https://fungidb.org/fungidb/app/record/gene/" },
512  { CDbtag::eDbtagType_GiardiaDB, "https://giardiadb.org/giardiadb/app/record/gene/" },
513  { CDbtag::eDbtagType_MicrosporidiaDB, "https://microsporidiadb.org/micro/app/record/gene/" },
514  { CDbtag::eDbtagType_PiroplasmaDB, "https://piroplasmadb.org/piro/app/record/gene/" },
515  { CDbtag::eDbtagType_PlasmoDB, "https://plasmodb.org/plasmo/app/record/gene/" },
516  { CDbtag::eDbtagType_ToxoDB, "https://toxodb.org/toxo/app/record/gene/" },
517  { CDbtag::eDbtagType_TrichDB, "https://trichdb.org/trichdb/app/record/gene/" },
518  { CDbtag::eDbtagType_TriTrypDB, "https://tritrypdb.org/tritrypdb/app/record/gene/" },
519  { CDbtag::eDbtagType_VEuPathDB, "https://veupathdb.org/gene/" },
520 
521  { CDbtag::eDbtagType_NCBIOrtholog, "https://www.ncbi.nlm.nih.gov/gene/" }, // modified below
522 })
523 
524 string CDbtag::GetUrl(void) const
525 {
526  return GetUrl( kEmptyStr, kEmptyStr, kEmptyStr );
527 }
528 
529 string CDbtag::GetUrl(TTaxId taxid) const
530 {
531  auto find_iter = sc_TaxIdTaxnameMap.find(taxid);
532  if( find_iter == sc_TaxIdTaxnameMap.end() ) {
533  return GetUrl();
534  } else {
535  const STaxidTaxname & taxinfo = find_iter->second;
536  return GetUrl( taxinfo.m_genus, taxinfo.m_species, taxinfo.m_subspecies );
537  }
538 }
539 
540 string CDbtag::GetUrl(const string & taxname_arg ) const
541 {
542  // The exact number doesn't matter, as long as it's long enough
543  // to cover all reasonable cases
544  const static SIZE_TYPE kMaxLen = 500;
545 
546  if( taxname_arg.empty() || taxname_arg.length() > kMaxLen ) {
547  return GetUrl();
548  }
549 
550  // make a copy because we're changing it
551  string taxname = taxname_arg;
552 
553  // convert all non-alpha chars to spaces
554  NON_CONST_ITERATE( string, str_iter, taxname ) {
555  const char ch = *str_iter;
556  if( ! isalpha(ch) ) {
557  *str_iter = ' ';
558  }
559  }
560 
561  // remove initial and final spaces
562  NStr::TruncateSpacesInPlace( taxname );
563 
564  // extract genus, species, subspeces
565 
566  vector<string> taxname_parts;
567  NStr::Split(taxname, " ", taxname_parts, NStr::fSplit_Tokenize);
568 
569  if( taxname_parts.size() == 2 || taxname_parts.size() == 3 ) {
570  string genus;
571  string species;
572  string subspecies;
573 
574  genus = taxname_parts[0];
575  species = taxname_parts[1];
576 
577  if( taxname_parts.size() == 3 ) {
578  subspecies = taxname_parts[2];
579  }
580 
581  return GetUrl( genus, species, subspecies );
582  }
583 
584  // if we couldn't figure out the taxname, use the default behavior
585  return GetUrl();
586 }
587 
588 string CDbtag::GetUrl(const string & genus,
589  const string & species,
590  const string & subspecies) const
591 {
592  auto it = sc_UrlMap.find(GetType());
593  if (it == sc_UrlMap.end()) {
594  return kEmptyStr;
595  }
596 
597  auto prefix = it->second;
598 
599  string tag;
600  bool nonInteger = false;
601  if (GetTag().IsStr()) {
602  tag = GetTag().GetStr();
603  // integer db_xrefs are supposed to be converted to IsId, mark as not an integer
604  nonInteger = true;
605  } else if (GetTag().IsId()) {
607  }
608  if (NStr::IsBlank(tag)) {
609  return kEmptyStr;
610  }
611 
612  // URLs are constructed by catenating the URL prefix with the specific tag
613  // except in a few cases handled below.
614  switch (GetType()) {
616  if (NStr::Find(tag, "FBan") != NPOS) {
617  prefix = kFBan;
618  }
619  break;
620 
621  case eDbtagType_GeneID:
622  if (nonInteger) {
623  // GeneID must be an integer
624  return kEmptyStr;
625  }
626  break;
627 
628  case eDbtagType_BEI:
629  tag += ".aspx";
630  break;
631 
633  {
634  int num_skip = 0;
635  string::const_iterator tag_iter = tag.begin();
636  for ( ; tag_iter != tag.end() && ! isdigit(*tag_iter) ; ++tag_iter ) {
637  num_skip++;
638  }
639  if (num_skip > 0) {
640  tag = tag.substr(num_skip);
641  }
642  }
643  break;
644 
645  case eDbtagType_MGI:
646  case eDbtagType_MGD:
647  if (NStr::StartsWith(tag, "MGI:", NStr::eNocase) ||
648  NStr::StartsWith(tag, "MGD:", NStr::eNocase)) {
649  tag = tag.substr(4);
650  }
651  break;
652 
653  case eDbtagType_HGNC:
654  if (NStr::StartsWith(tag, "HGNC:", NStr::eNocase)) {
655  tag = tag.substr(5);
656  }
657  break;
658 
659  case eDbtagType_VGNC:
660  if (NStr::StartsWith(tag, "VGNC:", NStr::eNocase)) {
661  tag = tag.substr(5);
662  }
663  break;
664 
665  case eDbtagType_RGD:
666  if (NStr::StartsWith(tag, "RGD:", NStr::eNocase)) {
667  tag = tag.substr(4);
668  }
669  break;
670 
671  case eDbtagType_PID:
672  if (tag[0] == 'g') {
673  tag = tag.substr(1);
674  }
675  break;
676 
677  case eDbtagType_SRPDB:
678  tag += ".fasta";
679  break;
680 
681  case eDbtagType_UniSTS:
682  tag += "%20%5BUniSTS%20ID%5D";
683  break;
684 
685  case eDbtagType_dbSNP:
686  if (NStr::StartsWith(tag, "rs", NStr::eNocase)) {
687  tag = tag.substr(2);
688  }
689  break;
690 
691  case eDbtagType_dbSTS:
692  break;
693 
694  case eDbtagType_niaEST:
695  tag += "&val=1";
696  break;
697 
698  case eDbtagType_MaizeGDB:
699  if (GetTag().IsId()) {
700  prefix = kMaizeGDBInt;
701  } else if (GetTag().IsStr()) {
702  prefix = kMaizeGDBStr;
703  }
704  break;
705 
706  case eDbtagType_GDB:
707  {{
708  SIZE_TYPE pos = NStr::Find(tag, "G00-");
709  if (pos != NPOS) {
710  tag = tag.substr(pos + 4);
711  tag.erase(remove(tag.begin(), tag.end(), '-'), tag.end());
712  } else if (!isdigit((unsigned char) tag[0])) {
713  return kEmptyStr;
714  }
715  break;
716  }}
717 
718  case eDbtagType_REBASE:
719  tag += ".html";
720  break;
721 
722  case eDbtagType_H_InvDB:
723  if (NStr::Find(tag, "HIT")) {
724  prefix = kHInvDbHIT;
725  } else if (NStr::Find(tag, "HIX")) {
726  prefix = kHInvDbHIX;
727  }
728  break;
729 
730  case eDbtagType_SK_FST:
731  return prefix;
732  break;
733 
735  if (isdigit((unsigned char) tag[0])) {
736  tag.insert(0, "id=");
737  } else {
738  tag.insert(0, "name=");
739  }
740  break;
741 
743  if (NStr::Find(tag, "_") != NPOS) {
744  prefix = kDictyPrim;
745  }
746  break;
747 
748 
750  if (NStr::Find(tag, "MIMAT") != NPOS) {
751  prefix = kMiRBaseMat;
752  }
753  break;
754 
756  {
757  int num_alpha = 0;
758  int num_digit = 0;
759  int num_unscr = 0;
760  if( x_LooksLikeAccession (tag, num_alpha, num_digit, num_unscr) &&
761  num_alpha == 3 && num_digit == 5 )
762  {
763  prefix = "http://www.wormbase.org/search/protein/";
764  }
765  }
766  break;
767 
769  if( NStr::StartsWith(tag, "tax_") ) {
770  prefix = kHomdTax;
771  tag = tag.substr(4);
772  } else if( NStr::StartsWith(tag, "seq_") ) {
773  prefix = kHomdSeq;
774  tag = tag.substr(4);
775  }
776  break;
777 
778  case eDbtagType_IRD:
779  tag += "&decorator=influenza";
780  break;
781 
782  case eDbtagType_ATCC:
783  tag += ".aspx";
784  break;
785 
786  case eDbtagType_ViPR:
787  tag += "&decorator=vipr";
788  break;
789 
791  if( ! genus.empty() ) {
792  string taxname_url_piece = genus + "+" + species;
793  if( ! subspecies.empty() ) {
794  taxname_url_piece += "+" + subspecies;
795  }
796  string ret = prefix;
797  return NStr::Replace( ret,
798  "species=Homo+sapiens&",
799  "species=" + taxname_url_piece + "&" ) +
800  tag;
801  }
802  break;
803 
805  if( NStr::StartsWith(tag, "HLA") ) {
806  prefix = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=imgthla;id=";
807  }
808  break;
809 
810  case eDbtagType_RefSeq:
811  {{
812  string::const_iterator tag_iter = tag.begin();
813  if (isalpha (*tag_iter)) {
814  ++tag_iter;
815  if (*tag_iter == 'P') {
816  ++tag_iter;
817  if (*tag_iter == '_') {
818  prefix = "https://www.ncbi.nlm.nih.gov/protein/";
819  }
820  }
821  }
822  }}
823  break;
824 
826  if (!tag.empty()){
827  while (tag.size() < SIZE_TYPE(7)){
828  tag = '0' + tag;
829  }
830  }
831  break;
832 
833 
835  if (!tag.empty()){
836  while (tag.size() < SIZE_TYPE(8)){
837  tag = '0' + tag;
838  }
839  }
840  break;
841 
842 
844  if (NStr::StartsWith(tag, "MITS", NStr::eNocase)) {
845  tag = tag.substr(4);
846  }
847  break;
848 
850  if( ! genus.empty() && ! species.empty() ) {
851  string abbrev = "";
852  if (NStr::Equal (genus, "Homo") && NStr::Equal (species, "sapiens")) {
853  abbrev = "hg";
854  } else {
855  string gen = genus;
856  string spc = species;
857  gen = NStr::ToLower(gen);
858  spc = NStr::ToLower(spc);
859  abbrev = gen.substr(0, 1) + spc.substr(0, 1);
860  }
861  tag += "&db=" + abbrev;
862  }
863  break;
864 
866  tag += "/ortholog";
867  break;
868 
869  default:
870  break;
871  }
872 
873  return string(prefix) + tag;
874 }
875 
876 // static
877 bool CDbtag::x_LooksLikeAccession(const string &tag,
878  int &out_num_alpha,
879  int &out_num_digit,
880  int &out_num_unscr)
881 {
882  if ( tag.empty() ) return false;
883 
884  if ( tag.length() >= 16) return false;
885 
886  if ( ! isupper(tag[0]) ) return false;
887 
888  int numAlpha = 0;
889  int numDigits = 0;
890  int numUndersc = 0;
891 
892  string::const_iterator tag_iter = tag.begin();
893  if ( NStr::StartsWith(tag, "NZ_") ) {
894  tag_iter += 3;
895  }
896  for ( ; tag_iter != tag.end() && isalpha(*tag_iter); ++tag_iter ) {
897  numAlpha++;
898  }
899  for ( ; tag_iter != tag.end() && *tag_iter == '_'; ++tag_iter ) {
900  numUndersc++;
901  }
902  for ( ; tag_iter != tag.end() && isdigit(*tag_iter) ; ++tag_iter ) {
903  numDigits++;
904  }
905  if ( tag_iter != tag.end() && *tag_iter != ' ' && *tag_iter != '.') {
906  return false;
907  }
908 
909  if (numUndersc > 1) return false;
910 
911  out_num_alpha = numAlpha;
912  out_num_digit = numDigits;
913  out_num_unscr = numUndersc;
914 
915  if (numUndersc == 0) {
916  if (numAlpha == 1 && numDigits == 5) return true;
917  if (numAlpha == 2 && numDigits == 6) return true;
918  if (numAlpha == 3 && numDigits == 5) return true;
919  if (numAlpha == 4 && numDigits == 8) return true;
920  if (numAlpha == 4 && numDigits == 9) return true;
921  if (numAlpha == 5 && numDigits == 7) return true;
922  } else if (numUndersc == 1) {
923  if (numAlpha != 2 || (numDigits != 6 && numDigits != 8 && numDigits != 9)) return false;
924  if (tag[0] == 'N' || tag[0] == 'X' || tag[0] == 'Z') {
925  if (tag[1] == 'M' ||
926  tag[1] == 'C' ||
927  tag[1] == 'T' ||
928  tag[1] == 'P' ||
929  tag[1] == 'G' ||
930  tag[1] == 'R' ||
931  tag[1] == 'S' ||
932  tag[1] == 'W' ||
933  tag[1] == 'Z') {
934  return true;
935  }
936  }
937  if (tag[0] == 'A' || tag[0] == 'Y') {
938  if (tag[1] == 'P') return true;
939  }
940  }
941 
942  return false;
943 }
944 
945 END_objects_SCOPE // namespace ncbi::objects::
946 
static constexpr string_view kDictyPrim
Definition: Dbtag.cpp:353
static constexpr string_view kHomdTax
Definition: Dbtag.cpp:357
static constexpr string_view kMaizeGDBInt
Definition: Dbtag.cpp:355
MAKE_CONST_SET(sc_SkippableDbXrefs, ct::tagStrNocase, { "BankIt", "NCBIFILE", "TMSMART" }) struct STaxidTaxname
Definition: Dbtag.cpp:75
static constexpr string_view kMaizeGDBStr
Definition: Dbtag.cpp:356
static constexpr auto sc_ApprovedTags
Definition: Dbtag.cpp:70
static constexpr string_view kHomdSeq
Definition: Dbtag.cpp:358
MAKE_CONST_MAP(sc_TaxIdTaxnameMap, TTaxId, STaxidTaxname, { { 7955, { "Danio", "rerio", "" } }, { 8022, { "Oncorhynchus", "mykiss", "" } }, { 9606, { "Homo", "sapiens", "" } }, { 9615, { "Canis", "lupus", "familiaris" } }, { 9838, { "Camelus", "dromedarius", "" } }, { 9913, { "Bos", "taurus", "" } }, { 9986, { "Oryctolagus", "cuniculus", "" } }, { 10090, { "Mus", "musculus", "" } }, { 10093, { "Mus", "pahari", "" } }, { 10094, { "Mus", "saxicola", "" } }, { 10096, { "Mus", "spretus", "" } }, { 10098, { "Mus", "cookii", "" } }, { 10105, { "Mus", "minutoides", "" } }, { 10116, { "Rattus", "norvegicus", "" } }, { 10117, { "Rattus", "rattus", "" } } }) namespace
Definition: Dbtag.cpp:88
static constexpr string_view kMiRBaseMat
Definition: Dbtag.cpp:354
static constexpr string_view kHInvDbHIX
Definition: Dbtag.cpp:352
static constexpr string_view kFBan
Definition: Dbtag.cpp:350
static constexpr string_view kHInvDbHIT
Definition: Dbtag.cpp:351
Definition: Dbtag.hpp:53
void GetLabel(string *label) const
Definition: Dbtag.cpp:187
EDbtagType GetType(void) const
Definition: Dbtag.cpp:289
static bool x_LooksLikeAccession(const string &tag, int &out_num_alpha, int &out_num_digit, int &out_num_unscr)
EIsRefseq
Definition: Dbtag.hpp:272
@ eIsRefseq_Yes
Definition: Dbtag.hpp:274
bool Match(const CDbtag &dbt2) const
Definition: Dbtag.cpp:158
EIsSource
Definition: Dbtag.hpp:276
@ eIsSource_Yes
Definition: Dbtag.hpp:278
@ fGenBank
Definition: Dbtag.hpp:244
@ fSrc
Definition: Dbtag.hpp:246
@ fNone
Definition: Dbtag.hpp:243
@ fRefSeq
Definition: Dbtag.hpp:245
const char * IsApprovedNoCase(EIsRefseq refseq=eIsRefseq_No, EIsSource is_source=eIsSource_No) const
Definition: Dbtag.cpp:245
~CDbtag(void)
Definition: Dbtag.cpp:154
string GetUrl(void) const
void InvalidateType(void)
Definition: Dbtag.cpp:339
bool SetAsMatchingTo(const CDbtag &dbt2)
Definition: Dbtag.cpp:166
EDbtagType m_Type
Definition: Dbtag.hpp:340
int TDbtagGroup
holds bitwise OR of "EDbtagGroup"
Definition: Dbtag.hpp:250
EDbtagType
Definition: Dbtag.hpp:58
@ eDbtagType_EnsemblGenomes_Gn
Definition: Dbtag.hpp:203
@ eDbtagType_ViPR
Definition: Dbtag.hpp:197
@ eDbtagType_FANTOM_DB
Definition: Dbtag.hpp:92
@ eDbtagType_Interpro
Definition: Dbtag.hpp:122
@ eDbtagType_PDB
Definition: Dbtag.hpp:139
@ eDbtagType_dbSNP
Definition: Dbtag.hpp:185
@ eDbtagType_VEuPathDB
Definition: Dbtag.hpp:238
@ eDbtagType_PBmice
Definition: Dbtag.hpp:138
@ eDbtagType_EnsemblGenomes_Tr
Definition: Dbtag.hpp:204
@ eDbtagType_CollecTF
Definition: Dbtag.hpp:85
@ eDbtagType_NASONIABASE
Definition: Dbtag.hpp:194
@ eDbtagType_RAP_DB
Definition: Dbtag.hpp:148
@ eDbtagType_Assembly
Definition: Dbtag.hpp:208
@ eDbtagType_I5KNAL
Definition: Dbtag.hpp:213
@ eDbtagType_EPDnew
Definition: Dbtag.hpp:220
@ eDbtagType_IKMC
Definition: Dbtag.hpp:196
@ eDbtagType_GiardiaDB
Definition: Dbtag.hpp:231
@ eDbtagType_IMGT_LIGM
Definition: Dbtag.hpp:117
@ eDbtagType_JCM
Definition: Dbtag.hpp:124
@ eDbtagType_PFAM
Definition: Dbtag.hpp:140
@ eDbtagType_HOMD
Definition: Dbtag.hpp:109
@ eDbtagType_dictyBase
Definition: Dbtag.hpp:187
@ eDbtagType_NRESTdb
Definition: Dbtag.hpp:133
@ eDbtagType_CGNC
Definition: Dbtag.hpp:81
@ eDbtagType_FungiDB
Definition: Dbtag.hpp:230
@ eDbtagType_dbSTS
Definition: Dbtag.hpp:186
@ eDbtagType_TriTrypDB
Definition: Dbtag.hpp:237
@ eDbtagType_IFO
Definition: Dbtag.hpp:114
@ eDbtagType_HGNC
Definition: Dbtag.hpp:107
@ eDbtagType_EchinoBase
Definition: Dbtag.hpp:227
@ eDbtagType_BoLD
Definition: Dbtag.hpp:78
@ eDbtagType_niaEST
Definition: Dbtag.hpp:189
@ eDbtagType_VectorBase
Definition: Dbtag.hpp:174
@ eDbtagType_NMPDR
Definition: Dbtag.hpp:132
@ eDbtagType_Fungorum
Definition: Dbtag.hpp:95
@ eDbtagType_RefSeq
Definition: Dbtag.hpp:201
@ eDbtagType_BEETLEBASE
Definition: Dbtag.hpp:76
@ eDbtagType_PID
Definition: Dbtag.hpp:192
@ eDbtagType_GeneID
Definition: Dbtag.hpp:104
@ eDbtagType_UniGene
Definition: Dbtag.hpp:168
@ eDbtagType_RATMAP
Definition: Dbtag.hpp:149
@ eDbtagType_BioProject
Definition: Dbtag.hpp:195
@ eDbtagType_GO
Definition: Dbtag.hpp:100
@ eDbtagType_RFAM
Definition: Dbtag.hpp:153
@ eDbtagType_GRIN
Definition: Dbtag.hpp:102
@ eDbtagType_Osa1
Definition: Dbtag.hpp:136
@ eDbtagType_dbVar
Definition: Dbtag.hpp:224
@ eDbtagType_EnsemblRapid
Definition: Dbtag.hpp:225
@ eDbtagType_PomBase
Definition: Dbtag.hpp:146
@ eDbtagType_ApiDB
Definition: Dbtag.hpp:69
@ eDbtagType_axeldb
Definition: Dbtag.hpp:180
@ eDbtagType_RGD
Definition: Dbtag.hpp:154
@ eDbtagType_BB
Definition: Dbtag.hpp:73
@ eDbtagType_IMGT_HLA
Definition: Dbtag.hpp:116
@ eDbtagType_COG
Definition: Dbtag.hpp:83
@ eDbtagType_AceView_WormGenes
Definition: Dbtag.hpp:67
@ eDbtagType_GOA
Definition: Dbtag.hpp:101
@ eDbtagType_dbCloneLib
Definition: Dbtag.hpp:182
@ eDbtagType_GeneDB
Definition: Dbtag.hpp:103
@ eDbtagType_SRPDB
Definition: Dbtag.hpp:161
@ eDbtagType_ASAP
Definition: Dbtag.hpp:63
@ eDbtagType_IMGT_GENEDB
Definition: Dbtag.hpp:115
@ eDbtagType_CCDS
Definition: Dbtag.hpp:79
@ eDbtagType_FLYBASE
Definition: Dbtag.hpp:94
@ eDbtagType_HPM
Definition: Dbtag.hpp:110
@ eDbtagType_UniProt_SwissProt
Definition: Dbtag.hpp:169
@ eDbtagType_bad
Definition: Dbtag.hpp:59
@ eDbtagType_GrainGenes
Definition: Dbtag.hpp:105
@ eDbtagType_dbProbe
Definition: Dbtag.hpp:184
@ eDbtagType_ATCC
Definition: Dbtag.hpp:64
@ eDbtagType_PBR
Definition: Dbtag.hpp:137
@ eDbtagType_SGD
Definition: Dbtag.hpp:158
@ eDbtagType_TAIR
Definition: Dbtag.hpp:164
@ eDbtagType_RiceGenes
Definition: Dbtag.hpp:156
@ eDbtagType_BEI
Definition: Dbtag.hpp:215
@ eDbtagType_SK_FST
Definition: Dbtag.hpp:160
@ eDbtagType_AntWeb
Definition: Dbtag.hpp:68
@ eDbtagType_H_InvDB
Definition: Dbtag.hpp:113
@ eDbtagType_dbEST
Definition: Dbtag.hpp:183
@ eDbtagType_CGD
Definition: Dbtag.hpp:207
@ eDbtagType_HMP
Definition: Dbtag.hpp:108
@ eDbtagType_MGD
Definition: Dbtag.hpp:191
@ eDbtagType_UNITE
Definition: Dbtag.hpp:167
@ eDbtagType_PeptideAtlas
Definition: Dbtag.hpp:219
@ eDbtagType_AmoebaDB
Definition: Dbtag.hpp:228
@ eDbtagType_CryptoDB
Definition: Dbtag.hpp:229
@ eDbtagType_GEO
Definition: Dbtag.hpp:98
@ eDbtagType_CDD
Definition: Dbtag.hpp:80
@ eDbtagType_MIM
Definition: Dbtag.hpp:129
@ eDbtagType_PGN
Definition: Dbtag.hpp:141
@ eDbtagType_TubercuList
Definition: Dbtag.hpp:205
@ eDbtagType_IntrepidBio
Definition: Dbtag.hpp:123
@ eDbtagType_Ensembl
Definition: Dbtag.hpp:221
@ eDbtagType_MGI
Definition: Dbtag.hpp:128
@ eDbtagType_ECOCYC
Definition: Dbtag.hpp:87
@ eDbtagType_ISD
Definition: Dbtag.hpp:119
@ eDbtagType_RBGE_herbarium
Definition: Dbtag.hpp:151
@ eDbtagType_VBASE2
Definition: Dbtag.hpp:172
@ eDbtagType_BGD
Definition: Dbtag.hpp:77
@ eDbtagType_ApiDB_ToxoDB
Definition: Dbtag.hpp:72
@ eDbtagType_HPRD
Definition: Dbtag.hpp:111
@ eDbtagType_Phytozome
Definition: Dbtag.hpp:145
@ eDbtagType_MedGen
Definition: Dbtag.hpp:206
@ eDbtagType_TrichDB
Definition: Dbtag.hpp:236
@ eDbtagType_Araport
Definition: Dbtag.hpp:216
@ eDbtagType_SubtiList
Definition: Dbtag.hpp:163
@ eDbtagType_BEEBASE
Definition: Dbtag.hpp:193
@ eDbtagType_taxon
Definition: Dbtag.hpp:190
@ eDbtagType_REBASE
Definition: Dbtag.hpp:152
@ eDbtagType_APHIDBASE
Definition: Dbtag.hpp:62
@ eDbtagType_FBOL
Definition: Dbtag.hpp:93
@ eDbtagType_AllianceGenome
Definition: Dbtag.hpp:226
@ eDbtagType_PiroplasmaDB
Definition: Dbtag.hpp:233
@ eDbtagType_EnsemblGenomes
Definition: Dbtag.hpp:202
@ eDbtagType_UniProt_TrEMBL
Definition: Dbtag.hpp:170
@ eDbtagType_dbClone
Definition: Dbtag.hpp:181
@ eDbtagType_Vega
Definition: Dbtag.hpp:175
@ eDbtagType_BioSample
Definition: Dbtag.hpp:210
@ eDbtagType_PseudoCAP
Definition: Dbtag.hpp:222
@ eDbtagType_HSSP
Definition: Dbtag.hpp:112
@ eDbtagType_CK
Definition: Dbtag.hpp:82
@ eDbtagType_GDB
Definition: Dbtag.hpp:97
@ eDbtagType_ToxoDB
Definition: Dbtag.hpp:235
@ eDbtagType_OrthoMCL
Definition: Dbtag.hpp:135
@ eDbtagType_NCBIOrtholog
Definition: Dbtag.hpp:239
@ eDbtagType_WorfDB
Definition: Dbtag.hpp:176
@ eDbtagType_TIGRFAM
Definition: Dbtag.hpp:165
@ eDbtagType_InterimID
Definition: Dbtag.hpp:121
@ eDbtagType_ApiDB_CryptoDB
Definition: Dbtag.hpp:70
@ eDbtagType_NextDB
Definition: Dbtag.hpp:134
@ eDbtagType_miRBase
Definition: Dbtag.hpp:188
@ eDbtagType_ZFIN
Definition: Dbtag.hpp:179
@ eDbtagType_IRD
Definition: Dbtag.hpp:118
@ eDbtagType_ISFinder
Definition: Dbtag.hpp:120
@ eDbtagType_VGNC
Definition: Dbtag.hpp:217
@ eDbtagType_MicrosporidiaDB
Definition: Dbtag.hpp:232
@ eDbtagType_ApiDB_PlasmoDB
Definition: Dbtag.hpp:71
@ eDbtagType_GenBank
Definition: Dbtag.hpp:209
@ eDbtagType_VISTA
Definition: Dbtag.hpp:214
@ eDbtagType_RBGE_garden
Definition: Dbtag.hpp:150
@ eDbtagType_UniSTS
Definition: Dbtag.hpp:171
@ eDbtagType_PlasmoDB
Definition: Dbtag.hpp:234
@ eDbtagType_MycoBank
Definition: Dbtag.hpp:131
@ eDbtagType_RNAcentral
Definition: Dbtag.hpp:218
@ eDbtagType_SGN
Definition: Dbtag.hpp:159
@ eDbtagType_ENSEMBL
Definition: Dbtag.hpp:89
@ eDbtagType_ISHAM_ITS
Definition: Dbtag.hpp:211
@ eDbtagType_Greengenes
Definition: Dbtag.hpp:106
@ eDbtagType_MaizeGDB
Definition: Dbtag.hpp:130
@ eDbtagType_Xenbase
Definition: Dbtag.hpp:178
@ eDbtagType_SRA
Definition: Dbtag.hpp:199
@ eDbtagType_JGIDB
Definition: Dbtag.hpp:125
@ eDbtagType_WormBase
Definition: Dbtag.hpp:177
@ eDbtagType_VBRC
Definition: Dbtag.hpp:173
@ eDbtagType_AFTOL
Definition: Dbtag.hpp:61
@ eDbtagType_LocusID
Definition: Dbtag.hpp:127
@ eDbtagType_PseudoCap
Definition: Dbtag.hpp:147
@ eDbtagType_GABI
Definition: Dbtag.hpp:96
EIsEstOrGss
Definition: Dbtag.hpp:280
@ eIsEstOrGss_Yes
Definition: Dbtag.hpp:282
bool GetDBFlags(bool &is_refseq, bool &is_src, string &correct_caps) const
Definition: Dbtag.cpp:327
bool IsApproved(EIsRefseq refseq=eIsRefseq_No, EIsSource is_source=eIsSource_No, EIsEstOrGss is_est_or_gss=eIsEstOrGss_No) const
Definition: Dbtag.cpp:215
int Compare(const CDbtag &dbt2) const
Definition: Dbtag.cpp:176
bool IsSkippable(void) const
Definition: Dbtag.cpp:281
int Compare(const CObject_id &oid2) const
Definition: Object_id.cpp:145
static constexpr auto construct(typename _Enabled::type const (&init)[N])
Include a standard set of the NCBI C++ Toolkit most basic headers.
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:90
static const char * str(char *buf, int n)
Definition: stats.c:84
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
string
Definition: cgiapp.hpp:690
#define NULL
Definition: ncbistd.hpp:225
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
int Compare(const T &s1, const T &s2) const
Return difference between "s1" and "s2".
Definition: ncbistr.hpp:5813
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
Definition: ncbistr.cpp:3192
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2882
PNocase_Generic< string > PNocase
Definition: ncbistr.hpp:4902
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3305
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5378
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
Definition: ncbistr.hpp:2510
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
static const char label[]
TTag & SetTag(void)
Assign a value to Tag data member.
Definition: Dbtag_.hpp:276
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
bool CanGetDb(void) const
Check if it is safe to call GetDb method.
Definition: Dbtag_.hpp:214
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
TDb & SetDb(void)
Assign a value to Db data member.
Definition: Dbtag_.hpp:243
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
std::false_type tagStrNocase
const char * tag
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
int isupper(Uchar c)
Definition: ncbictype.hpp:70
std::string_view m_alias
Definition: Dbtag.cpp:61
constexpr TApprovedDbTags(const CDbtag::TDbtagGroup &_group, CDbtag::EDbtagType _tag)
Definition: Dbtag.cpp:64
CDbtag::EDbtagType m_tag
Definition: Dbtag.cpp:60
constexpr TApprovedDbTags()=default
constexpr TApprovedDbTags(const CDbtag::TDbtagGroup &_group, CDbtag::EDbtagType _tag, string_view _alias)
Definition: Dbtag.cpp:66
CDbtag::TDbtagGroup m_groups
Definition: Dbtag.cpp:59
Modified on Fri Sep 20 14:58:10 2024 by modify_doxy.py rev. 669887