NCBI C++ ToolKit
dbsource_item.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: dbsource_item.cpp 100638 2023-08-22 16:05:52Z foleyjp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Mati Shomrat, NCBI
27 *
28 * File Description:
29 *
30 */
31 #include <ncbi_pch.hpp>
32 #include <corelib/ncbistd.hpp>
33 #include <corelib/ncbiutil.hpp>
34 
36 #include <objects/general/Date.hpp>
46 #include <objects/seq/Bioseq.hpp>
50 #include <objmgr/bioseq_handle.hpp>
51 #include <objmgr/scope.hpp>
52 #include <objmgr/feat_ci.hpp>
53 #include <objmgr/seqdesc_ci.hpp>
54 #include <objmgr/bioseq_ci.hpp>
56 #include <objmgr/util/sequence.hpp>
57 
62 #include <objmgr/util/objutil.hpp>
63 
64 
67 
68 
70  CFlatItem(&ctx)
71 {
72  x_GatherInfo(ctx);
73 }
74 
76 {
77  return eItem_DbSource;
78 }
79 
81 (IFormatter& formatter,
82  IFlatTextOStream& text_os) const
83 
84 {
85  formatter.FormatDBSource(*this, text_os);
86 }
87 
88 
89 static int s_ScoreForDBSource(const CSeq_id_Handle& idh)
90 {
91  CConstRef<CSeq_id> id = idh.GetSeqId();
92  switch (id->Which()) {
93  case CSeq_id::e_not_set: return kMax_Int;
94  case CSeq_id::e_Gi: return 31;
95  case CSeq_id::e_Giim: return 30;
96  case CSeq_id::e_Local: case CSeq_id::e_General: return 20;
97  case CSeq_id::e_Other: return 18;
98  case CSeq_id::e_Gibbmt: return 16;
99  case CSeq_id::e_Gibbsq: case CSeq_id::e_Patent: return 15;
100  case CSeq_id::e_Pdb: return 12;
101  default: return 10;
102  }
103 }
104 
105 
107 {
108  return FindBestChoice(scope.GetIds(idh), s_ScoreForDBSource);
109 }
110 
111 
112 static void s_AddToUniqueIdList(const CSeq_id_Handle& idh, vector<CSeq_id_Handle>& unique_ids)
113 {
114  ITERATE (vector<CSeq_id_Handle>, it, unique_ids) {
115  if (idh == *it) {
116  return;
117  }
118  }
119  unique_ids.push_back(idh);
120 }
121 
122 
123 static bool s_HasLocalBioseq(const CSeq_loc& loc, const CSeq_entry_Handle& tse)
124 {
125  CScope& scope = tse.GetScope();
126  for (CSeq_loc_CI li(loc); li; ++li) {
128  scope.GetBioseqHandleFromTSE(li.GetSeq_id(), tse);
129  if (local) {
130  return true;
131  }
132  }
133  return false;
134 }
135 
136 
138 {
139  const bool bHtml = ctx.Config().DoHTML();
140 
141  const CBioseq_Handle& seq = ctx.GetHandle();
142  const CBioseq_Handle::TId& ids = seq.GetId();
144 
145  if (!idh) {
146  m_DBSource.push_back("UNKNOWN");
147  return;
148  }
149 
150  switch (idh.Which()) {
151 // LCOV_EXCL_START
152  case CSeq_id::e_Pir:
153  m_DBSource.push_back(x_FormatDBSourceID(idh));
155  break;
156 // LCOV_EXCL_STOP
157 
159  m_DBSource.push_back(x_FormatDBSourceID(idh));
160  x_AddSPBlock(ctx);
161  break;
162 // LCOV_EXCL_START
163  case CSeq_id::e_Prf:
164  m_DBSource.push_back(x_FormatDBSourceID(idh));
166  break;
167 // LCOV_EXCL_STOP
168 
169  case CSeq_id::e_Pdb:
170  m_DBSource.push_back(x_FormatDBSourceID(idh));
172  break;
173 
174  case CSeq_id::e_General:
175  if (!NStr::StartsWith(idh.GetSeqId()->GetGeneral().GetDb(), "PID")) {
176  m_DBSource.push_back("UNKNOWN");
177  break;
178  }
179  // otherwise, fall through
182  case CSeq_id::e_Gi: case CSeq_id::e_Ddbj:
184  {
185  CScope& scope = ctx.GetScope();
186  vector<CSeq_id_Handle> unique_ids;
187 
188  // find generating feature
189  const CSeq_feat* feat = sequence::GetCDSForProduct(seq);
190  if (! feat) {
191  // may also be protein product of mature peptide feature
192  feat = sequence::GetPROTForProduct(seq);
193  }
194 
195  if (feat) {
196  const CSeq_loc& loc = feat->GetLocation();
197  CSeq_entry_Handle topLevelEntry = seq.GetTopLevelEntry();
198  if (s_HasLocalBioseq(loc, topLevelEntry)) {
199  for (CSeq_loc_CI li(loc); li; ++li) {
200  s_AddToUniqueIdList(li.GetSeq_id_Handle(), unique_ids);
201  }
202  } /* else {
203  const CSeq_id *cds_seq_id = loc.GetId();
204  if (cds_seq_id && cds_seq_id->IsGi()) {
205  CSeq_id_Base::TGi cds_gi = cds_seq_id->GetGi();
206  s_AddToUniqueIdList( CSeq_id_Handle::GetHandle(cds_gi), unique_ids);
207  }
208  } */
209  }
210 
211  string str;
212  ITERATE (vector<CSeq_id_Handle>, it, unique_ids) {
213  CSeq_id_Handle idh2 = s_FindBestChoiceForDbsource(*it, scope);
214  if (idh2) {
215  str.erase();
216  str = x_FormatDBSourceID(idh2);
217  if (!NStr::IsBlank(str)) {
218  m_DBSource.push_back(str);
219  }
220  } else {
221  m_DBSource.push_back( x_FormatDBSourceID( *it ) );
222  }
223  }
224 
225  if (m_DBSource.empty() && feat) {
226  const CSeq_loc& loc = feat->GetLocation();
227  const CSeq_id *cds_seq_id = loc.GetId();
228  if (cds_seq_id && cds_seq_id->IsGi()) {
229  CSeq_id_Base::TGi cds_gi = cds_seq_id->GetGi();
230  // s_AddToUniqueIdList( CSeq_id_Handle::GetHandle(cds_gi), unique_ids);
232  }
233  }
234 
235  if (m_DBSource.empty()) {
236  m_DBSource.push_back(x_FormatDBSourceID(idh));
237  }
238  break;
239  }
240  default:
241  m_DBSource.push_back("UNKNOWN");
242  }
243 
244  // turn double-quotes to single-quotes in all m_DBSources,
245  // except inside HTML tags
246  NON_CONST_ITERATE( list<string>, it, m_DBSource ) {
247  if( bHtml ) {
249  } else {
250  replace( it->begin(), it->end(), '\"', '\'' );
251  }
252  }
253 }
254 
255 // LCOV_EXCL_START
257 {
258  // In this function, the newlines seem weird because the C toolkit
259  // outputs this way. Hopefully in the future we can do something
260  // more consistent.
261 
262 
263  CSeqdesc_CI dsc(ctx.GetHandle(), CSeqdesc::e_Pir);
264  if ( !dsc ) {
265  return;
266  }
267 
268  x_SetObject(*dsc);
269 
270  bool containsHostLine = false; // try to match C's whitespace
271 
272  const CPIR_block& pir = dsc->GetPir();
273  if (pir.CanGetHost()) {
274  m_DBSource.push_back("host:" + pir.GetHost() + "\n");
275  containsHostLine = true;
276  }
277  if (pir.CanGetSource()) {
278  m_DBSource.push_back("source: " + pir.GetSource() + "\n");
279  }
280  if (pir.CanGetSummary()) {
281  m_DBSource.push_back("summary: " + pir.GetSummary() + "\n");
282  }
283  if (pir.CanGetGenetic()) {
284  m_DBSource.push_back("genetic: " + pir.GetGenetic() + "\n");
285  }
286  if (pir.CanGetIncludes()) {
287  m_DBSource.push_back("includes: " + pir.GetIncludes() + "\n");
288  }
289  if (pir.CanGetPlacement()) {
290  m_DBSource.push_back("placement: " + pir.GetPlacement() + "\n");
291  }
292  if (pir.CanGetSuperfamily()) {
293  m_DBSource.push_back("superfamily: " + pir.GetSuperfamily() + "\n");
294  }
295  if (pir.CanGetCross_reference()) {
296  m_DBSource.push_back("xref: " + pir.GetCross_reference() + "\n");
297  }
298  if (pir.CanGetDate()) {
299  m_DBSource.push_back("PIR dates: " + pir.GetDate() + "\n");
300  }
301  if (pir.CanGetHad_punct() && pir.GetHad_punct() ) {
302  m_DBSource.push_back("punctuation in sequence");
303  }
304  if (pir.CanGetSeqref()) {
305  list<string> xrefs;
306  ITERATE (CPIR_block::TSeqref, it, pir.GetSeqref()) {
307  const char* type = nullptr;
308  switch ((*it)->Which()) {
309  case CSeq_id::e_Genbank: type = "genbank "; break;
310  case CSeq_id::e_Embl: type = "embl "; break;
311  case CSeq_id::e_Pir: type = "pir "; break;
312  case CSeq_id::e_Swissprot: type = "swissprot "; break;
313  case CSeq_id::e_Gi: type = "gi: "; break;
314  case CSeq_id::e_Ddbj: type = "ddbj "; break;
315  case CSeq_id::e_Prf: type = "prf "; break;
316  default: break;
317  }
318  if (type) {
319  xrefs.push_back(type + (*it)->GetSeqIdString(true));
320  }
321  }
322  if ( !xrefs.empty() ) {
323  m_DBSource.push_back("xrefs: " + NStr::Join(xrefs, ", "));
324  }
325  }
326 
327  NON_CONST_ITERATE (list<string>, it, m_DBSource) {
328  if( &*it == &m_DBSource.front() ) {
329  // first one has newline AFTER the semicolon
330  *it += ";\n";
331  // match C toolkit
332  /* if( (it + 1) != m_DBSource.end() && ! NStr::StartsWith(*(it + 1), "host") ) {
333  *it += ";\n";
334  } */
335  } else if( &*it == &m_DBSource.back() ) {
336  // last one ends in period
337  *it += ".";
338  } else {
339  // The C version puts newlines before some of these for some reason
340  *it += ";\n";
341  }
342  // *it += (&*it == &m_DBSource.back() ? "." : "\n;");
343  }
344 
345  // match C's whitespace
346  if( ! containsHostLine ) {
347  m_DBSource.front() += "\n";
348  }
349 }
350 // LCOV_EXCL_STOP
351 
352 static void s_FormatDate(const CDate& date, string& str)
353 {
354  CTime time = date.AsCTime();
355  str += time.AsString(CTimeFormat("b d, Y"));
356 }
357 
358 
360 {
361  CSeqdesc_CI dsc(ctx.GetHandle(), CSeqdesc::e_Sp);
362  if ( !dsc ) {
363  return;
364  }
365  x_SetObject(*dsc);
366 
367  const CSP_block& sp = dsc->GetSp();
368  switch (sp.GetClass()) {
370  m_DBSource.push_back("class: standard.");
371  break;
373  m_DBSource.push_back("class: preliminary.");
374  break;
375  default:
376  break;
377  }
378  // laid out slightly differently from the C version, but I think that's
379  // a bug in the latter (which runs some things together)
380  if (sp.CanGetExtra_acc() && !sp.GetExtra_acc().empty() ) {
381  m_DBSource.push_back("extra accessions:"
382  + NStr::Join(sp.GetExtra_acc(), ","));
383  }
384  if (sp.GetImeth()) {
385  m_DBSource.push_back("seq starts with Met");
386  }
387  if (sp.CanGetPlasnm() && !sp.GetPlasnm().empty() ) {
388  m_DBSource.push_back("plasmid:" + NStr::Join(sp.GetPlasnm(), ","));
389  }
390  if (sp.CanGetCreated()) {
391  string s("created: ");
392  //sp.GetCreated().GetDate(&s, "%3N %D %Y");
393  s_FormatDate(sp.GetCreated(), s);
394  m_DBSource.push_back(s + '.');
395  }
396  if (sp.CanGetSequpd()) {
397  string s("sequence updated: ");
398  //sp.GetSequpd().GetDate(&s, "%3N %D %Y");
399  s_FormatDate(sp.GetSequpd(), s);
400  m_DBSource.push_back(s + '.');
401  }
402  if (sp.CanGetAnnotupd()) {
403  string s("annotation updated: ");
404  //sp.GetAnnotupd().GetDate(&s, "%3N %D %Y");
405  s_FormatDate(sp.GetAnnotupd(), s);
406  m_DBSource.push_back(s + '.');
407  }
408  if (sp.CanGetSeqref() && !sp.GetSeqref().empty() ) {
409  list<string> xrefs;
410  ITERATE (CSP_block::TSeqref, it, sp.GetSeqref()) {
412  CSeq_id_Handle best = sequence::GetId(idh, ctx.GetScope(),
414  if ( !best ) {
415  best = idh;
416  }
417  if (best) {
418  string acc = best.GetSeqId()->GetSeqIdString(true);
419  xrefs.push_back(acc);
420  }
421  /**
422  const char* s = nullptr;
423  switch ((*it)->Which()) {
424  case CSeq_id::e_Genbank: s = "genbank accession "; break;
425  case CSeq_id::e_Embl: s = "embl accession "; break;
426  case CSeq_id::e_Pir: s = "pir locus "; break;
427  case CSeq_id::e_Swissprot: s = "swissprot accession "; break;
428  case CSeq_id::e_Gi: s = "gi: "; break;
429  case CSeq_id::e_Ddbj: s = "ddbj accession "; break;
430  case CSeq_id::e_Prf: s = "prf accession "; break;
431  case CSeq_id::e_Pdb: s = "pdb accession "; break;
432  case CSeq_id::e_Tpg: s = "genbank third party accession "; break;
433  case CSeq_id::e_Tpe: s = "embl third party accession "; break;
434  case CSeq_id::e_Tpd: s = "ddbj third party accession "; break;
435  default: break;
436  }
437  if ( s ) {
438  string acc = (*it)->GetSeqIdString(true);
439  xrefs.push_back(s + acc);
440  }
441  **/
442  }
443  if ( !xrefs.empty() ) {
444  m_DBSource.push_back("xrefs: " + NStr::Join(xrefs, ", "));
445  }
446  }
447  if (sp.CanGetDbref() && !sp.GetDbref().empty() ) {
448  list<string> xrefs;
449  ITERATE (CSP_block::TDbref, it, sp.GetDbref()) {
450  const CObject_id& tag = (*it)->GetTag();
451  string id = (tag.IsStr() ? tag.GetStr()
452  : NStr::IntToString(tag.GetId()));
453  string db = (*it)->GetDb();
454  if ( db == "MIM") {
455  if (ctx.Config().DoHTML()) {
456  xrefs.push_back
457  ("MIM <a href=\""
458  "https://omim.org/entry/" + id
459  + "\">" + id + "</a>");
460  } else {
461  xrefs.push_back("MIM:" + id);
462  }
463  } else {
464  // For exmaple, HGNC has HGNC as part of its identifier, so we may need to eliminate
465  // such redundancies (example accession: Q02094.1)
466  if( id.substr(0, db.length() + 1) == (db + ":") ) {
467  xrefs.push_back(id); // in this case, id already has db at beginning
468  } else {
469  xrefs.push_back(db + ':' + id); // no space(!)
470  }
471  }
472  }
473  m_DBSource.push_back
474  ("xrefs (non-sequence databases): " + NStr::Join(xrefs, ", "));
475  }
476 }
477 
478 // LCOV_EXCL_START
480 {
481  CSeqdesc_CI dsc(ctx.GetHandle(), CSeqdesc::e_Prf);
482  if ( !dsc ) {
483  return;
484  }
485 
486  x_SetObject(*dsc);
487 
488  const CPRF_block& prf = dsc->GetPrf();
489  if (prf.CanGetExtra_src()) {
490  const CPRF_ExtraSrc& es = prf.GetExtra_src();
491  if (es.CanGetHost()) {
492  m_DBSource.push_back("host:" + es.GetHost());
493  }
494  if (es.CanGetPart()) {
495  m_DBSource.push_back("part: " + es.GetPart());
496  }
497  if (es.CanGetState()) {
498  m_DBSource.push_back("state: " + es.GetState());
499  }
500  if (es.CanGetStrain()) {
501  m_DBSource.push_back("strain: " + es.GetStrain());
502  }
503  if (es.CanGetTaxon()) {
504  m_DBSource.push_back("taxonomy: " + es.GetTaxon());
505  }
506  }
507  NON_CONST_ITERATE (list<string>, it, m_DBSource) {
508  *it += (&*it == &m_DBSource.back() ? '.' : ';');
509  }
510 }
511 // LCOV_EXCL_STOP
512 
513 
515 {
516  CSeqdesc_CI dsc(ctx.GetHandle(), CSeqdesc::e_Pdb);
517  if ( !dsc ) {
518  return;
519  }
520 
521  x_SetObject(*dsc);
522 
523  const CPDB_block& pdb = dsc->GetPdb();
524  {{
525  string s("deposition: ");
526  s_FormatDate(pdb.GetDeposition(), s);
527  m_DBSource.push_back(s);
528  }}
529  m_DBSource.push_back("class: " + pdb.GetClass());
530  if (!pdb.GetSource().empty() ) {
531  m_DBSource.push_back("source: " + x_FormatPDBSource(pdb));
532  }
533  if (pdb.CanGetExp_method()) {
534  m_DBSource.push_back("Exp. method: " + pdb.GetExp_method());
535  }
536  if (pdb.CanGetReplace()) {
537  const CPDB_replace& rep = pdb.GetReplace();
538  if ( !rep.GetIds().empty() ) {
539  m_DBSource.push_back
540  ("ids replaced: " + x_FormatPDBSource(pdb));
541  }
542  string s("replacement date: ");
543  DateToString(rep.GetDate(), s);
544  m_DBSource.push_back(s);
545  }
546  NON_CONST_ITERATE (list<string>, it, m_DBSource) {
547  *it += (&*it == &m_DBSource.back() ? '.' : ';');
548  }
549 }
550 
551 
553 {
555  if (idh) {
556  id = idh.GetSeqId();
557  }
558  if (!id) {
559  return kEmptyStr;
560  }
561 
562  CSeq_id::E_Choice choice = id->Which();
563 
564  switch (choice) {
565  case CSeq_id::e_Local:
566  {{
567  const CObject_id& oi = id->GetLocal();
568  return (oi.IsStr() ? oi.GetStr() : NStr::IntToString(oi.GetId()));
569  }}
570  case CSeq_id::e_Gi:
571  {{
572  return "gi: " + NStr::NumericToString(id->GetGi());
573  }}
574  case CSeq_id::e_Pdb:
575  {{
576  const CPDB_seq_id& pdb = id->GetPdb();
577  string s("pdb: "), sep;
578  if ( !pdb.GetMol().Get().empty() ) {
579  s += "molecule " + pdb.GetMol().Get();
580  sep = ", ";
581  }
582  if (pdb.IsSetChain() && pdb.GetChain() > 0) {
583  s += sep + "chain " + NStr::IntToString(pdb.GetChain());
584  sep = ", ";
585  }
586  if (pdb.IsSetChain_id()) {
587  s += sep + "chain " + pdb.GetChain_id();
588  sep = ", ";
589  }
590  if (pdb.CanGetRel()) {
591  s += sep + "release ";
592  s_FormatDate(pdb.GetRel(), s);
593  sep = ", ";
594  }
595  return s;
596  }}
597  default:
598  {{
599  const CTextseq_id* tsid = id->GetTextseq_Id();
600  if (! tsid) {
601  return kEmptyStr;
602  }
603  string s, sep, comma, ht;
604  bool is_uniprot = false;
605  switch (choice) {
606  case CSeq_id::e_Embl: s = "embl "; comma = ","; break;
607  case CSeq_id::e_Other: s = "REFSEQ: "; break;
608  case CSeq_id::e_Swissprot: s = "UniProtKB: "; is_uniprot = true; comma = ","; break;
609  case CSeq_id::e_Pir: s = "UniProtKB: "; is_uniprot = true; break;
610  case CSeq_id::e_Prf: s = "prf: "; break;
611  default: break;
612  }
613  if (tsid->CanGetName()) {
614  s += "locus " + tsid->GetName();
615  sep = " ";
616  } else {
617  comma.erase();
618  }
619  if (tsid->CanGetAccession()) {
620  string acc = tsid->GetAccession();
621  if (tsid->CanGetVersion() &&
622  choice != CSeq_id::e_Swissprot) {
623  acc += '.' + NStr::IntToString(tsid->GetVersion());
624  }
625 #if 0
626  GetContext()->Config().GetHTMLFormatter().FormatNucId(ht, *idh.GetSeqId(), GetContext()->GetScope().GetGi(idh), acc);
627 #else
628  if (is_uniprot) {
630  } else {
632  GI_TO(TIntId, GetContext()->GetScope().GetGi(idh)), acc);
633  }
634 #endif
635  s += comma + sep + "accession " + ht;
636  sep = " ";
637  }
638  /**
639  if (tsid->CanGetRelease()) {
640  s += sep + "release " + tsid->GetRelease();
641  }
642  **/
643  if (id->IsSwissprot()) {
644  s += ';';
645  }
646  return s;
647  }}
648  }
649 
650  return kEmptyStr;
651 }
652 
654 {
655  if( ! pdb.IsSetSource() || pdb.GetSource().empty() ) {
656  return kEmptyStr;
657  }
658 
659  const bool bIsHtml = ( GetContext() && GetContext()->Config().DoHTML() );
660 
661  string answer;
662  const CPDB_block::TSource & source = pdb.GetSource();
663  ITERATE( CPDB_block::TSource, source_iter, source ) {
664  const string & a_source = *source_iter;
665  if( ! answer.empty() ) {
666  answer += ", ";
667  }
668 
669  const static string kMmdbIdPrefix = "Mmdb_id:";
670  string prefix;
671  string url;
672  string url_suffix;
673  if( bIsHtml && x_ExtractLinkableSource(a_source, prefix, url, url_suffix) ) {
674  answer += prefix;
675  answer += " <a href=\"" + url + url_suffix + "\">";
676  answer += url_suffix;
677  answer += "</a>";
678  } else {
679  answer += a_source;
680  }
681  }
682 
683  return answer;
684 }
685 
687  const string & a_source,
688  string & out_prefix,
689  string & out_url,
690  string & out_url_suffix )
691 {
692  const static struct {
693  string m_prefix;
694  string m_url;
695  bool m_must_be_all_digits;
696  } potentialPrefixes[] = {
697  { "Mmdb_id:", "https://www.ncbi.nlm.nih.gov/Structure/mmdb/mmdbsrv.cgi?uid=", true }
698  };
699 
700  const static size_t numPotentialPrefixes = sizeof(potentialPrefixes)/sizeof(potentialPrefixes[0]);
701 
702  for( size_t idx = 0; idx < numPotentialPrefixes; ++idx ) {
703  const string & prefix = potentialPrefixes[idx].m_prefix;
704  const string & url = potentialPrefixes[idx].m_url;
705  const bool must_be_all_digits = potentialPrefixes[idx].m_must_be_all_digits;
706 
707  if( a_source.length() <= prefix.length() ) {
708  continue;
709  }
710 
711  if( ! NStr::StartsWith(a_source, prefix, NStr::eNocase) ) {
712  continue;
713  }
714 
715  // first_non_space_pos points to first non-space character after the prefix.
716  string::size_type first_non_space_pos = prefix.length();
717  for( ; first_non_space_pos < a_source.length(); ++first_non_space_pos ) {
718  if( ! isspace(a_source[first_non_space_pos]) ) {
719  break;
720  }
721  }
722  if( first_non_space_pos >= a_source.length() ) {
723  continue;
724  }
725 
726  // some require extra test to make sure they're all digits
727  if( must_be_all_digits ) {
728  bool non_digit_found = false;
729  string::size_type test_pos = first_non_space_pos;
730  for( ; test_pos < a_source.length(); ++test_pos ) {
731  if( ! isdigit(a_source[test_pos]) ) {
732  non_digit_found = true;
733  break;
734  }
735  }
736  if( non_digit_found ) {
737  continue;
738  }
739  }
740 
741  // all tests passed, so prepare to give result to caller
742  out_prefix = prefix;
743  out_url = url;
744  out_url_suffix = NStr::TruncateSpaces(a_source.substr(first_non_space_pos));
745  return true;
746  }
747 
748  // didn't find any matches
749  return false;
750 }
751 
752 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
const CFlatFileConfig & Config(void) const
Definition: context.hpp:689
CBioseq_Handle –.
void x_AddSPBlock(CBioseqContext &ctx)
string x_FormatPDBSource(const CPDB_block &pdb)
void Format(IFormatter &formatter, IFlatTextOStream &text_os) const override
void x_AddPDBBlock(CBioseqContext &ctx)
TDBSource m_DBSource
bool x_ExtractLinkableSource(const string &a_source, string &out_prefix, string &out_url, string &out_url_suffix)
void x_AddPIRBlock(CBioseqContext &ctx)
void x_GatherInfo(CBioseqContext &ctx) override
string x_FormatDBSourceID(const CSeq_id_Handle &idh)
EItem GetItemType() const override
void x_AddPRFBlock(CBioseqContext &ctx)
Definition: Date.hpp:53
CTime AsCTime(CTime::ETimeZone tz=CTime::eLocal) const
Definition: Date.cpp:70
const IHTMLFormatter & GetHTMLFormatter() const
bool DoHTML(void) const
CBioseqContext * GetContext(void)
Definition: item_base.hpp:113
void x_SetObject(const CSerialObject &obj)
Definition: item_base.hpp:160
CPDB_block –.
Definition: PDB_block.hpp:66
CPDB_replace –.
Definition: PDB_replace.hpp:66
CPIR_block –.
Definition: PIR_block.hpp:66
CPRF_ExtraSrc –.
CPRF_block –.
Definition: PRF_block.hpp:66
CSP_block –.
Definition: SP_block.hpp:66
CScope –.
Definition: scope.hpp:92
CSeq_entry_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
CTimeFormat –.
Definition: ncbitime.hpp:131
CTime –.
Definition: ncbitime.hpp:296
@ eItem_DbSource
Definition: item.hpp:67
virtual void FormatDBSource(const CDBSourceItem &dbs, IFlatTextOStream &text_os)=0
virtual void FormatNucId(string &str, const CSeq_id &seq_id, TIntId gi, const string &acc_id) const =0
virtual void FormatUniProtId(string &str, const string &prot_id) const =0
Include a standard set of the NCBI C++ Toolkit most basic headers.
CS_CONTEXT * ctx
Definition: t0006.c:12
static bool s_HasLocalBioseq(const CSeq_loc &loc, const CSeq_entry_Handle &tse)
static void s_FormatDate(const CDate &date, string &str)
static void s_AddToUniqueIdList(const CSeq_id_Handle &idh, vector< CSeq_id_Handle > &unique_ids)
static const CSeq_id_Handle s_FindBestChoiceForDbsource(const CSeq_id_Handle &idh, CScope &scope)
static int s_ScoreForDBSource(const CSeq_id_Handle &idh)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
Int8 TIntId
Definition: ncbimisc.hpp:999
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define GI_TO(T, gi)
Definition: ncbimisc.hpp:1085
const TPrim & Get(void) const
Definition: serialbase.hpp:347
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2144
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
CSeq_id::E_Choice Which(void) const
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)
Get the encoding CDS feature of a given protein sequence.
Definition: sequence.cpp:2549
const CSeq_feat * GetPROTForProduct(const CBioseq &product, CScope *scope)
Get the mature peptide feature of a protein.
Definition: sequence.cpp:2593
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
TIds GetIds(const CSeq_id &id, TGetFlags flags=0)
Get "native" bioseq ids without filtering and matching.
Definition: scope.cpp:401
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
Definition: scope.cpp:253
vector< CSeq_id_Handle > TId
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
const TId & GetId(void) const
#define kMax_Int
Definition: ncbi_limits.h:184
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5083
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5411
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3182
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const
Transform time to string.
Definition: ncbitime.cpp:1511
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
NCBI_NS_NCBI::TGi TGi
Definition: Seq_id_.hpp:180
TChain GetChain(void) const
Get the Chain member data.
bool IsSetChain_id(void) const
chain identifier; length-independent generalization of 'chain' Check if a value has been assigned to ...
bool IsSetChain(void) const
Deprecated: 'chain' can't support multiple character PDB chain identifiers (introduced in 2015).
const TName & GetName(void) const
Get the Name member data.
bool CanGetName(void) const
Check if it is safe to call GetName method.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
bool IsSwissprot(void) const
Check if variant Swissprot is selected.
Definition: Seq_id_.hpp:859
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
TVersion GetVersion(void) const
Get the Version member data.
bool CanGetVersion(void) const
Check if it is safe to call GetVersion method.
const TMol & GetMol(void) const
Get the Mol member data.
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93
bool CanGetRel(void) const
Check if it is safe to call GetRel method.
bool CanGetAccession(void) const
Check if it is safe to call GetAccession method.
const TChain_id & GetChain_id(void) const
Get the Chain_id member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
const TRel & GetRel(void) const
Get the Rel member data.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_Gibbmt
Geninfo backbone moltype.
Definition: Seq_id_.hpp:97
@ e_Giim
Geninfo import id.
Definition: Seq_id_.hpp:98
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_Tpe
Third Party Annot/Seq EMBL.
Definition: Seq_id_.hpp:111
@ e_Tpd
Third Party Annot/Seq DDBJ.
Definition: Seq_id_.hpp:112
@ e_Gibbsq
Geninfo backbone seqid.
Definition: Seq_id_.hpp:96
@ e_General
for other databases
Definition: Seq_id_.hpp:105
@ e_Ddbj
DDBJ.
Definition: Seq_id_.hpp:107
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_Prf
PRF SEQDB.
Definition: Seq_id_.hpp:108
@ e_not_set
No variant selected.
Definition: Seq_id_.hpp:94
@ e_Tpg
Third Party Annot/Seq Genbank.
Definition: Seq_id_.hpp:110
@ e_Local
local use
Definition: Seq_id_.hpp:95
@ e_Pdb
PDB sequence.
Definition: Seq_id_.hpp:109
const TPdb & GetPdb(void) const
Get the variant data.
Definition: Seqdesc_.cpp:538
const TSp & GetSp(void) const
Get the variant data.
Definition: Seqdesc_.cpp:406
const TPrf & GetPrf(void) const
Get the variant data.
Definition: Seqdesc_.cpp:516
const TPir & GetPir(void) const
Get the variant data.
Definition: Seqdesc_.cpp:312
@ e_Pir
PIR specific info.
Definition: Seqdesc_.hpp:120
@ e_Prf
PRF specific information.
Definition: Seqdesc_.hpp:130
@ e_Sp
SWISSPROT specific info.
Definition: Seqdesc_.hpp:125
@ e_Pdb
PDB specific information.
Definition: Seqdesc_.hpp:131
const TDate & GetDate(void) const
Get the Date member data.
const TSource & GetSource(void) const
Get the Source member data.
Definition: PDB_block_.hpp:472
const TExp_method & GetExp_method(void) const
Get the Exp_method member data.
Definition: PDB_block_.hpp:497
list< string > TSource
Definition: PDB_block_.hpp:95
bool IsSetSource(void) const
Check if a value has been assigned to Source data member.
Definition: PDB_block_.hpp:460
bool CanGetExp_method(void) const
Check if it is safe to call GetExp_method method.
Definition: PDB_block_.hpp:491
const TReplace & GetReplace(void) const
Get the Replace member data.
Definition: PDB_block_.hpp:544
const TClass & GetClass(void) const
Get the Class member data.
Definition: PDB_block_.hpp:400
bool CanGetReplace(void) const
Check if it is safe to call GetReplace method.
Definition: PDB_block_.hpp:538
const TDeposition & GetDeposition(void) const
Get the Deposition member data.
Definition: PDB_block_.hpp:370
const TIds & GetIds(void) const
Get the Ids member data.
bool CanGetSeqref(void) const
Check if it is safe to call GetSeqref method.
const TCross_reference & GetCross_reference(void) const
Get the Cross_reference member data.
THad_punct GetHad_punct(void) const
Get the Had_punct member data.
Definition: PIR_block_.hpp:665
const TIncludes & GetIncludes(void) const
Get the Includes member data.
Definition: PIR_block_.hpp:893
const TSummary & GetSummary(void) const
Get the Summary member data.
Definition: PIR_block_.hpp:799
const TPlacement & GetPlacement(void) const
Get the Placement member data.
Definition: PIR_block_.hpp:940
bool CanGetSummary(void) const
Check if it is safe to call GetSummary method.
Definition: PIR_block_.hpp:793
bool CanGetHad_punct(void) const
Check if it is safe to call GetHad_punct method.
Definition: PIR_block_.hpp:652
bool CanGetCross_reference(void) const
Check if it is safe to call GetCross_reference method.
bool CanGetSource(void) const
Check if it is safe to call GetSource method.
Definition: PIR_block_.hpp:746
const THost & GetHost(void) const
Get the Host member data.
Definition: PIR_block_.hpp:705
bool CanGetDate(void) const
Check if it is safe to call GetDate method.
bool CanGetPlacement(void) const
Check if it is safe to call GetPlacement method.
Definition: PIR_block_.hpp:934
bool CanGetSuperfamily(void) const
Check if it is safe to call GetSuperfamily method.
Definition: PIR_block_.hpp:981
const TSource & GetSource(void) const
Get the Source member data.
Definition: PIR_block_.hpp:752
bool CanGetHost(void) const
Check if it is safe to call GetHost method.
Definition: PIR_block_.hpp:699
list< CRef< CSeq_id > > TSeqref
Definition: PIR_block_.hpp:103
const TSeqref & GetSeqref(void) const
Get the Seqref member data.
const TSuperfamily & GetSuperfamily(void) const
Get the Superfamily member data.
Definition: PIR_block_.hpp:987
bool CanGetIncludes(void) const
Check if it is safe to call GetIncludes method.
Definition: PIR_block_.hpp:887
const TDate & GetDate(void) const
Get the Date member data.
const TGenetic & GetGenetic(void) const
Get the Genetic member data.
Definition: PIR_block_.hpp:846
bool CanGetGenetic(void) const
Check if it is safe to call GetGenetic method.
Definition: PIR_block_.hpp:840
const TTaxon & GetTaxon(void) const
Get the Taxon member data.
const TPart & GetPart(void) const
Get the Part member data.
const TExtra_src & GetExtra_src(void) const
Get the Extra_src member data.
Definition: PRF_block_.hpp:209
bool CanGetState(void) const
Check if it is safe to call GetState method.
const THost & GetHost(void) const
Get the Host member data.
const TStrain & GetStrain(void) const
Get the Strain member data.
bool CanGetHost(void) const
Check if it is safe to call GetHost method.
const TState & GetState(void) const
Get the State member data.
bool CanGetStrain(void) const
Check if it is safe to call GetStrain method.
bool CanGetExtra_src(void) const
Check if it is safe to call GetExtra_src method.
Definition: PRF_block_.hpp:203
bool CanGetTaxon(void) const
Check if it is safe to call GetTaxon method.
bool CanGetPart(void) const
Check if it is safe to call GetPart method.
TImeth GetImeth(void) const
Get the Imeth member data.
Definition: SP_block_.hpp:615
const TExtra_acc & GetExtra_acc(void) const
Get the Extra_acc member data.
Definition: SP_block_.hpp:577
TClass GetClass(void) const
Get the Class member data.
Definition: SP_block_.hpp:537
bool CanGetCreated(void) const
Check if it is safe to call GetCreated method.
Definition: SP_block_.hpp:746
bool CanGetDbref(void) const
Check if it is safe to call GetDbref method.
Definition: SP_block_.hpp:696
bool CanGetPlasnm(void) const
Check if it is safe to call GetPlasnm method.
Definition: SP_block_.hpp:646
const TCreated & GetCreated(void) const
Get the Created member data.
Definition: SP_block_.hpp:752
bool CanGetAnnotupd(void) const
Check if it is safe to call GetAnnotupd method.
Definition: SP_block_.hpp:788
bool CanGetExtra_acc(void) const
Check if it is safe to call GetExtra_acc method.
Definition: SP_block_.hpp:571
bool CanGetSeqref(void) const
Check if it is safe to call GetSeqref method.
Definition: SP_block_.hpp:671
list< CRef< CSeq_id > > TSeqref
Definition: SP_block_.hpp:107
const TDbref & GetDbref(void) const
Get the Dbref member data.
Definition: SP_block_.hpp:702
const TPlasnm & GetPlasnm(void) const
Get the Plasnm member data.
Definition: SP_block_.hpp:652
const TAnnotupd & GetAnnotupd(void) const
Get the Annotupd member data.
Definition: SP_block_.hpp:794
list< CRef< CDbtag > > TDbref
Definition: SP_block_.hpp:108
const TSequpd & GetSequpd(void) const
Get the Sequpd member data.
Definition: SP_block_.hpp:773
const TSeqref & GetSeqref(void) const
Get the Seqref member data.
Definition: SP_block_.hpp:677
bool CanGetSequpd(void) const
Check if it is safe to call GetSequpd method.
Definition: SP_block_.hpp:767
@ eClass_standard
conforms to all SWISSPROT checks
Definition: SP_block_.hpp:94
@ eClass_prelim
only seq and biblio checked
Definition: SP_block_.hpp:95
const CharType(& source)[N]
Definition: pointer.h:1149
const char * tag
int isspace(Uchar c)
Definition: ncbictype.hpp:69
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
Useful/utility classes and methods.
void DateToString(const CDate &date, string &str, EDateToString format_choice=eDateToString_regular)
Definition: objutil.cpp:1238
bool ConvertQuotesNotInHTMLTags(string &str)
Definition: objutil.cpp:1774
static const char * prefix[]
Definition: pcregrep.c:405
static const char * str(char *buf, int n)
Definition: stats.c:84
Definition: type.c:6
CScope & GetScope()
#define local
Definition: zutil.h:33
Modified on Sat Dec 02 09:19:54 2023 by modify_doxy.py rev. 669887