NCBI C++ ToolKit
link_utils.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: link_utils.cpp 46289 2021-03-03 00:52:22Z evgeniev $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Liangshou Wu
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
34 
40 
42 #include <objmgr/feat_ci.hpp>
43 #include <objmgr/util/sequence.hpp>
44 #include <objmgr/util/feature.hpp>
45 
48 
49 #include <gui/objutils/label.hpp>
50 #include <gui/objutils/utils.hpp>
51 
52 
54 
56 USING_SCOPE(sequence);
57 
58 
59 /////////////////////////////////////////////////////////////////////////////
60 // CLinkUtils::
61 //
62 
63 static const string kCommonUrl = "https://www.ncbi.nlm.nih.gov";
64 
65 
67  const string& tax_id_file,
68  CScope& scope,
69  TLinksMap& links,
70  TSeqPos from,
71  TSeqPos to,
72  bool relative)
73 {
74  CBioseq_Handle bsh = scope.GetBioseqHandle(idh);
75  if (!bsh)
76  return;
77  bool is_na = bsh.IsNa();
78  CSeq_id_Handle source_idh = sequence::GetId(idh, scope,
80 
81  ///
82  /// check to see if we add a genome-specific search link
83  ///
84  TTaxId tax_id = sequence::GetTaxId(bsh);
85  bool genome_specific_search = false;
86  if ( !tax_id_file.empty() ) {
87  CNcbiIfstream istr(tax_id_file.c_str(), IOS_BASE::in);
88  if (istr.good()) {
89  string line;
90  while (NcbiGetlineEOL(istr, line)) {
91  try {
92  if (tax_id == NStr::StringToNumeric<TTaxId>(line)) {
93  genome_specific_search = true;
94  break;
95  }
96  } catch (...) {
97  break;
98  }
99  }
100  }
101  }
102  string source_id_str;
103  CLabel::GetLabel(*source_idh.GetSeqId(), &source_id_str,
104  CLabel::eDefault, &scope);
105 
106  ///
107  /// add a link for sequence retrieval
108  ///
109  string entrezdb_tag;
110  if (is_na) {
111  CSeq_id::EAccessionInfo acc_info =
112  source_idh.GetSeqId()->IdentifyAccession();
113  if ((acc_info & CSeq_id::eAcc_division_mask) == CSeq_id::eAcc_est) {
114  entrezdb_tag = "/nucest/";
115  } else if ((acc_info & CSeq_id::eAcc_division_mask) == CSeq_id::eAcc_gss) {
116  entrezdb_tag = "/nucgss/";
117  } else {
118  entrezdb_tag = "/nuccore/";
119  }
120  } else {
121  entrezdb_tag = "/protein/";
122  }
123 
124  ///
125  /// add links to self - that is, a link to view the current sequence
126  ///
127  typedef pair<string, string> TPair;
128  const TPair sc_Pairs[] = {
129  TPair("GenBank record", "report=genbank"),
130  TPair("FASTA record", "report=fasta"),
131  };
132 
133  string type = "Extra";
134  string name = "";
135  string label = "";
136  string url = "";
137  const size_t size = sizeof(sc_Pairs) / sizeof(TPair);
138  for (size_t i = 0; i < size; ++i) {
139  name = sc_Pairs[i].first;
140  label = source_id_str;
141 
142  string tag = sc_Pairs[i].second;
143  if ( !is_na ) {
144  tag = NStr::Replace(tag, "genbank", "genpept");
145  }
146  url = entrezdb_tag + source_id_str + "?" + tag;
147  x_AddLink(links, type, name, label, url, relative);
148  }
149 
150  ///
151  /// add a link to BLAST the genomic location
152  ///
153  {{
154  name = "BLAST ";
155  label = source_id_str;
156  string params("QUERY=" + source_id_str);
157  if (from || to) {
158  params += "&QUERY_FROM=";
159  params += NStr::IntToString(from+1);
160  params += "&QUERY_TO=";
161  params += NStr::IntToString(to+1);
162  }
163 
164  string extra_params("&");
165  if (is_na) {
166  name += "nr";
167  extra_params +=
168  "PAGE=Nucleotides&"
169  "PROGRAM=blastn&"
170  "MEGABLAST=on&"
171  "BLAST_PROGRAMS=megaBlast&"
172  "PAGE_TYPE=BlastSearch&"
173  "SHOW_DEFAULTS=on";
174 
175  } else {
176  name += "protein";
177  extra_params +=
178  "PAGE=Proteins&"
179  "PROGRAM=blastp&"
180  "BLAST_PROGRAMS=blastp&"
181  "PAGE_TYPE=BlastSearch&"
182  "SHOW_DEFAULTS=on";
183  }
184  url = "/blast/Blast.cgi?" + params + extra_params;
185  x_AddLink(links, type, name, label, url, relative);
186 
187  // add a link for genome-specific BLAST
188  if (genome_specific_search) {
189  if (!is_na) {
190  params += "&PROGRAM=tblastn";
191  }
192  name = "BLAST to Genome";
193  url = "/genome/seq/BlastGen/BlastGen.cgi?taxid=";
194  url += NStr::NumericToString(tax_id) + "&" + params;
195  x_AddLink(links, type, name, label, url, relative);
196  }
197  }}
198 }
199 
200 
201 
202 void CLinkUtils::AddFeatureLinks(const CSeq_feat& feat,
203  const CBioseq_Handle& handle,
204  const string& tax_id_file,
205  TLinksMap& links,
206  bool relative,
207  bool exclude_self_links)
208 {
209  const CSeq_feat_Base::TLocation& loc_obj = feat.GetLocation();
211 
212  /// guard against the cases where the feature contains location
213  /// with multiple seq-ids
214  if ( !loc_obj.GetId() ) {
215  loc = CSeqUtils::MixLocToLoc(loc_obj, handle);
216  }
217 
218  if ( !loc ) {
219  loc.Reset(&loc_obj);
220  }
221 
222  AddFeatureLinks(feat, *loc, tax_id_file, handle.GetScope(), links, relative, exclude_self_links);
223 }
224 
225 
226 void CLinkUtils::AddFeatureLinks(const CSeq_feat& feat,
227  const CSeq_loc& loc,
228  const string& tax_id_file,
229  CScope& scope,
230  TLinksMap& links,
231  bool relative,
232  bool exclude_self_links)
233 {
234  /// grab the source location identifier and range
235  CBioseq_Handle bsh = scope.GetBioseqHandle(*loc.GetId());
236  bool is_na = bsh.IsNa();
237  CSeq_id_Handle source_idh =
238  sequence::GetIdHandle(loc, &scope);
239  source_idh = sequence::GetId(source_idh, scope,
241  if (!source_idh || source_idh.GetSeqId()->IsLocal()) {
242  return;
243  }
244 
245  TTaxId tax_id = sequence::GetTaxId(bsh);
246  bool genome_specific_search = false;
247  if ( !tax_id_file.empty() ) {
248  CNcbiIfstream istr(tax_id_file.c_str(), IOS_BASE::in);
249  if (istr.good()) {
250  string line;
251  while (NcbiGetlineEOL(istr, line)) {
252  try {
253  if (tax_id == NStr::StringToNumeric<TTaxId>(line)) {
254  genome_specific_search = true;
255  break;
256  }
257  } catch (...) {
258  break;
259  }
260  }
261  }
262  }
263  string source_id_str;
264  CLabel::GetLabel(*source_idh.GetSeqId(), &source_id_str,
265  CLabel::eDefault, &scope);
266  string range_str(kEmptyStr);
267  if ( !loc.IsWhole() ) {
268  range_str = " (";
269  range_str += NStr::IntToString(loc.GetTotalRange().GetFrom() + 1,
271  range_str += "..";
272  range_str += NStr::IntToString(loc.GetTotalRange().GetTo() + 1,
274  range_str += ")";
275  }
276 
277  string entrezdb_tag;
278  if (is_na) {
279  CSeq_id::EAccessionInfo acc_info =
280  source_idh.GetSeqId()->IdentifyAccession();
281  if ((acc_info & CSeq_id::eAcc_division_mask) == CSeq_id::eAcc_est) {
282  entrezdb_tag = "/nucest/";
283  } else if ((acc_info & CSeq_id::eAcc_division_mask) == CSeq_id::eAcc_gss) {
284  entrezdb_tag = "/nucgss/";
285  } else {
286  entrezdb_tag = "/nuccore/";
287  }
288  } else {
289  entrezdb_tag = "/protein/";
290  }
291 
292  TSeqRange range = loc.GetTotalRange();
293  if (range == TSeqRange::GetWhole()) {
294  range.SetFrom(0);
295  range.SetTo(bsh.GetBioseqLength());
296  }
297 
298  /// grab the product location identifier
299  CSeq_id_Handle product_idh;
300  string product_id_str;
301  bool is_product_na = false;
302  if (feat.IsSetProduct()) {
303  product_idh =
304  sequence::GetIdHandle(feat.GetProduct(), &scope);
305  CBioseq_Handle bsh = scope.GetBioseqHandle(product_idh);
306  if (bsh) {
307  is_product_na = bsh.IsNa();
308 
309  product_idh = sequence::GetId(product_idh, scope,
311  CLabel::GetLabel(*product_idh.GetSeqId(), &product_id_str,
312  CLabel::eDefault, &scope);
313  }
314  }
315 
316  ///
317  /// add links to self - that is, a link to view the current sequence
318  ///
319  string type = "Extra";
320  string name = "";
321  string label = "";
322  string url = "";
323  if (range.GetLength() > 5) {
324  typedef pair<string, string> TPair;
325  const TPair sc_Pairs[] = {
326  TPair("GenBank record", "report=genbank"),
327  TPair("FASTA record", "report=fasta"),
328  };
329 
330  const size_t size = sizeof(sc_Pairs) / sizeof(TPair);
331 
332  if (!exclude_self_links) {
333  for (size_t i = 0; i < size; ++i) {
334  name = sc_Pairs[i].first;
335  label = source_id_str + range_str;
336 
337  string tag = sc_Pairs[i].second;
338  if ( !is_na ) {
339  tag = NStr::Replace(tag, "bank", "pept");
340  }
341 
342  url = entrezdb_tag + source_id_str + "?" + tag +
343  "&from=" + NStr::IntToString(range.GetFrom() + 1) +
344  "&to=" + NStr::IntToString(range.GetTo() + 1);
345  x_AddLink(links, type, name, label, url, relative);
346  }
347  }
348 
349  ///
350  /// add a link to BLAST the genomic location
351  ///
352  if (!exclude_self_links || feat.GetData().IsRna())
353  {
354  name = "BLAST ";
355  label = source_id_str + range_str;
356  url = "/blast/Blast.cgi?";
357  string params("QUERY=" + source_id_str);
358  string extra_params("&");
359  if ( !loc.IsWhole() ) {
360  params += "&QUERY_FROM=";
361  params += NStr::IntToString(range.GetFrom() + 1);
362  params += "&QUERY_TO=";
363  params += NStr::IntToString(range.GetTo() + 1);
364  }
365  if (is_na) {
366  name += "nr";
367  extra_params +=
368  "PAGE=Nucleotides&"
369  "PROGRAM=blastn&"
370  "MEGABLAST=on&"
371  "BLAST_PROGRAMS=megaBlast&"
372  "PAGE_TYPE=BlastSearch&"
373  "SHOW_DEFAULTS=on";
374 
375  } else {
376  name += "protein";
377  extra_params +=
378  "PAGE=Proteins&"
379  "PROGRAM=blastp&"
380  "BLAST_PROGRAMS=blastp&"
381  "PAGE_TYPE=BlastSearch&"
382  "SHOW_DEFAULTS=on";
383  }
384  url = url + params + extra_params;
385  x_AddLink(links, type, name, label, url, relative);
386 
387  // add a link for genome-specific BLAST
388  if (genome_specific_search) {
389  if (!is_na) {
390  params += "&PROGRAM=tblastn";
391  }
392  name = "BLAST to Genome";
393  label = source_id_str + range_str;
394  url = "/genome/seq/BlastGen/BlastGen.cgi?taxid=";
395  url += NStr::NumericToString(tax_id) + "&" + params;
396  x_AddLink(links, type, name, label, url, relative);
397  }
398  }
399  }
400 
401 
402  if (feat.IsSetProduct() && product_idh.Which() != CSeq_id::e_Local) {
403  string product_db;
404  if (is_product_na) {
405  product_db = "/nuccore/";
406  } else {
407  product_db = "/protein/";
408  }
409 
410  ///
411  /// add links to self - that is, a link to view the product sequence
412  ///
413  typedef pair<string, string> TPair;
414  const TPair sc_Pairs[] = {
415  TPair("GenBank record", "report=genbank"),
416  TPair("FASTA record", "report=fasta"),
417  TPair("Graphical View", "report=graph")
418  };
419 
420  const size_t size = sizeof(sc_Pairs) / sizeof(TPair);
421 
422  for (size_t i = 0; i < size; ++i) {
423  string tag = sc_Pairs[i].second;
424  if ( !is_product_na ) {
425  tag = NStr::Replace(tag, "bank", "pept");
426  }
427 
428  url = product_db + product_id_str + "?" + tag;
429  x_AddLink(links, type, sc_Pairs[i].first, product_id_str, url, relative);
430  }
431 
432  ///
433  /// add a link to BLAST the product
434  ///
435  {{
436  name = "BLAST ";
437  string params("QUERY=" + product_id_str);
438  string extra_params("&");
439  if (is_product_na) {
440  name += "mRNA";
441  extra_params += "PAGE=Nucleotides&"
442  "PROGRAM=blastn&"
443  "MEGABLAST=on&"
444  "BLAST_PROGRAMS=megaBlast&"
445  "PAGE_TYPE=BlastSearch&"
446  "SHOW_DEFAULTS=on";
447  } else {
448  name += "Protein";
449  extra_params += "PAGE=Proteins&"
450  "PROGRAM=blastp&"
451  "BLAST_PROGRAMS=blastp&"
452  "PAGE_TYPE=BlastSearch&"
453  "SHOW_DEFAULTS=on";
454  }
455 
456  url = "/blast/Blast.cgi?" + params + extra_params;
457  x_AddLink(links, type, name, product_id_str, url, relative);
458 
459  // add a link for genome-specific BLAST
460  if (genome_specific_search) {
461  if (!is_product_na) {
462  params += "&PROGRAM=tblastn";
463  }
464  name = "BLAST to Genome";
465  url = "/genome/seq/BlastGen/BlastGen.cgi?taxid=";
466  url += NStr::NumericToString(tax_id) + "&" + params;
467  x_AddLink(links, type, name, product_id_str, url, relative);
468  }
469  }}
470  }
471 
472  ///
473  /// type-specific links:
474  ///
475 
476 
477  ///
478  /// process dbxrefs
479  ///
480  x_AddDbxrefFeatureLinks(feat, loc, scope, bsh, tax_id, links, true, relative);
481 
482  // extract links stored in seq-feat::exts
483  // example ASN seq-feat that contains links:
484  // seq-feat ::= {
485  // ...
486  // exts {
487  // {
488  // type str "links",
489  // data {
490  // {
491  // label str "GRC: HG-980",
492  // data str "projects/genome/assembly/grc/issue_detail.cgi?ID=HG-980"
493  // }
494  // }
495  // }
496  // }
497  if (feat.IsSetExts()) {
498  ITERATE (CSeq_feat::TExts, iter, feat.GetExts()) {
499  if ( (*iter)->GetType().IsStr() &&
500  NStr::EqualNocase((*iter)->GetType().GetStr(), "links") ) {
501  ITERATE (CUser_object::TData, link_iter, (*iter)->GetData()) {
502  if ((*link_iter)->GetData().IsStr()) {
503  url = (*link_iter)->GetData().GetStr();
504  if ( !url.empty() && (*link_iter)->GetLabel().IsStr()) {
505  label = (*link_iter)->GetLabel().GetStr();
506  name = kEmptyStr;
507  size_t pos = label.find_first_of(":");
508  if (pos != string::npos) {
509  name = NStr::TruncateSpaces(label.substr(0, pos));
510  label = NStr::TruncateSpaces(label.substr(pos + 1));
511  }
512  if ( !NStr::StartsWith(url, "http://") && url[0] != '/') {
513  url = "/" + url;
514  }
515 
516  x_AddLink(links, type, name, label, url, relative);
517  }
518  }
519  }
520  break;
521  }
522  }
523  }
524 
525  if (feat.GetData().Which() == CSeqFeatData::e_Variation &&
526  feat.GetData().GetVariation().CanGetId() &&
527  feat.GetData().GetVariation().GetId().GetDb() == "dbVar") {
528  const CVariation_ref& var = feat.GetData().GetVariation();
529  label = "";
530  if (var.CanGetId() && var.GetId().GetTag().IsStr() &&
531  var.GetId().GetTag().GetStr().find("sv") == 1) {
532  CLabel::GetLabel(feat, &label, CLabel::eContent, &scope);
533  } else if (var.CanGetParent_id()) {
534  if (var.GetParent_id().GetTag().IsId()) {
536  } else {
537  label = var.GetParent_id().GetTag().GetStr();
538  }
539  }
540 
541  if ( !label.empty() ) {
542  name = "dbVar";
543  url = "/dbvar/variants/" + label;
544  x_AddLink(links, type, name, label, url, relative);
545  }
546 
547  if (feat.IsSetExts()) {
548  ITERATE (CSeq_feat::TExts, iter, feat.GetExts()) {
549 
550  if ((*iter)->GetType().IsStr() &&
551  !(*iter)->GetData().empty()) {
552  name = "Other Variant Calls from this Sample";
553  if ((*iter)->GetType().GetStr() == "related calls") {
554  const CUser_object::TData& fields = (*iter)->GetData();
555  if (fields.size() < 5) {
556  ITERATE (CUser_object::TData, f_iter, fields) {
557  label = (*f_iter)->GetLabel().GetStr();
558  url = "/dbvar/variants/" + (*f_iter)->GetData().GetStr();
559  x_AddLink(links, type, name, label, url, relative);
560  }
561  } else if (var.CanGetSample_id()) {
562  string term;
563  if (var.GetSample_id().IsStr()) {
564  term = var.GetSample_id().GetStr();
565  } else {
566  term = NStr::NumericToString(var.GetSample_id().GetId());
567  }
568  CConstRef<CDbtag> study_id = feat.GetNamedDbxref("study_accession");
569  if (study_id) {
570  term += " and " + study_id->GetTag().GetStr();
571  }
572 
573  label = NStr::NumericToString(fields.size());
574  url = "/dbvar/?term=" + term;
575  x_AddLink(links, type, name, label, url, relative);
576  }
577  break;
578  } else if ((*iter)->GetType().GetStr() == "related call count") {
579  string term;
580  ITERATE (CUser_object::TData, f_iter, (*iter)->GetData()) {
581  if ((*f_iter)->GetLabel().IsStr() && (*f_iter)->GetData().IsStr()) {
582  label = (*f_iter)->GetLabel().GetStr();
583  term = (*f_iter)->GetData().GetStr();
584  break;
585  }
586  }
587 
588  if ( !term.empty() ) {
589  CConstRef<CDbtag> study_id = feat.GetNamedDbxref("study_accession");
590  if (study_id) {
591  term += " and " + study_id->GetTag().GetStr();
592  }
593  url = "/dbvar/?term=" + term;
594  x_AddLink(links, type, name, label, url, relative);
595  }
596 
597  break;
598  }
599  }
600  }
601  }
602  }
603 }
604 
606  CScope& scope,
607  TLinksMap& links,
608  bool relative)
609 {
610  if (!feat.IsSetProduct()) {
611  return;
612  }
613 
614  /// grab the product location identifier
615  string product_id_str;
616  bool is_product_na = false;
617  CSeq_id_Handle product_idh = sequence::GetIdHandle(feat.GetProduct(), &scope);
618  CBioseq_Handle bsh = scope.GetBioseqHandle(product_idh);
619  if (bsh) {
620  is_product_na = bsh.IsNa();
621 
622  product_idh = sequence::GetId(product_idh, scope, sequence::eGetId_Best);
623  CLabel::GetLabel(*product_idh.GetSeqId(), &product_id_str, CLabel::eDefault, &scope);
624  }
625 
626  if (!product_idh && product_idh.Which() == CSeq_id::e_Local)
627  return;
628 
629  string type = "Extra";
630  string name = "BLAST ";
631  string label = "";
632  string url = "";
633 
634  string params("QUERY=" + product_id_str);
635  string extra_params("&");
636  if (is_product_na) {
637  name += "mRNA";
638  extra_params += "PAGE=Nucleotides&"
639  "PROGRAM=blastn&"
640  "MEGABLAST=on&"
641  "BLAST_PROGRAMS=megaBlast&"
642  "PAGE_TYPE=BlastSearch&"
643  "SHOW_DEFAULTS=on";
644  }
645  else {
646  name += "Protein";
647  extra_params += "PAGE=Proteins&"
648  "PROGRAM=blastp&"
649  "BLAST_PROGRAMS=blastp&"
650  "PAGE_TYPE=BlastSearch&"
651  "SHOW_DEFAULTS=on";
652  }
653 
654  url = "/blast/Blast.cgi?" + params + extra_params;
655  x_AddLink(links, type, name, product_id_str, url, relative);
656 }
657 
659  const CBioseq_Handle& handle,
660  TLinksMap& links,
661  bool add_parent_gene_dbxrefs,
662  bool relative)
663 {
664  const CSeq_feat_Base::TLocation& loc_obj = feat.GetLocation();
666 
667  /// guard against the cases where the feature contains location
668  /// with multiple seq-ids
669  if (!loc_obj.GetId()) {
670  loc = CSeqUtils::MixLocToLoc(loc_obj, handle);
671  }
672 
673  if (!loc) {
674  loc.Reset(&loc_obj);
675  }
676 
677  AddDbxrefFeatureLinks(feat, *loc, handle.GetScope(), links, add_parent_gene_dbxrefs, relative);
678 }
679 
681  const CSeq_loc& loc,
682  CScope& scope,
683  TLinksMap& links,
684  bool add_parent_gene_dbxrefs,
685  bool relative)
686 {
687  /// grab the source location identifier and range
688  CBioseq_Handle bsh = scope.GetBioseqHandle(*loc.GetId());
689  CSeq_id_Handle source_idh =
690  sequence::GetIdHandle(loc, &scope);
691  source_idh = sequence::GetId(source_idh, scope,
693  if (!source_idh || source_idh.GetSeqId()->IsLocal()) {
694  return;
695  }
696 
697  TTaxId tax_id = sequence::GetTaxId(bsh);
698  x_AddDbxrefFeatureLinks(feat, loc, scope, bsh, tax_id, links, add_parent_gene_dbxrefs, relative);
699 }
700 
702  const CRef<CDbtag>& dbt,
703  CScope& scope,
704  TLinksMap& links,
705  bool relative)
706 {
707  if (dbt) {
708  string rsid{NStr::NumericToString(NSnp::GetRsid(*dbt))};
709 
710  ///
711  /// link for SNP summary
712  ///
713  string type = "Basic";
714  string name = "SNP summary";
715  string label = "rs" + rsid;
716  string url = "/snp/rs" + rsid;
717  x_AddLink(links, type, name, label, url, relative);
718 
719  CSnpBitfield bf(feat);
720  if(bf.isGood()) {
721  type = "Extra";
722 
723  ///
724  /// link for Genotype information
725  ///
727  name = "Genotype information";
728  url = "/snp/rs" + rsid + "#frequency_tab";
729  x_AddLink(links, type, name, label, url, relative);
730  }
731 
732  ///
733  /// link for SNP3D web page
734  ///
736  name = "SNP3D Page";
737  url = "/projects/SNP/snp3D.cgi?rsnum=" + rsid;
738  x_AddLink(links, type, name, label, url, relative);
739  }
740 
741  ///
742  /// link for OMIM page for this SNP
743  ///
745  name = "OMIM information";
746  url = "/projects/SNP/snp_redirect.cgi?snp_id=" + rsid;
747  x_AddLink(links, type, name, label, url, relative);
748  }
749 
750  // VarVu link for clinical SNPs
752  // try to match genes to the current position
755 
756  if(feat.CanGetLocation()) {
758 
759  location.Assign(feat.GetLocation());
760  // allow matching on any strand (SV-3025)
761  location.SetStrand(eNa_strand_both);
762  CFeat_CI feat_iter(scope, location, sel_gene);
763 
764  CConstRef<CSeq_feat> mapped_feat;
765  for ( ; feat_iter; ++feat_iter) {
766  mapped_feat.Reset(&feat_iter->GetMappedFeature());
767  string sGeneLocus;
768  if(mapped_feat->GetData().Which() == CSeqFeatData::e_Gene &&
769  mapped_feat->GetData().GetGene().IsSetLocus()) {
770  sGeneLocus = mapped_feat->GetData().GetGene().GetLocus();
771  }
772  if( !sGeneLocus.empty() ) {
773  name = "Variation viewer";
774  label += " (" + sGeneLocus + ")";
775  url = "/sites/varvu?rs=" + rsid + "&gene=" + sGeneLocus;
776  x_AddLink(links, type, name, label, url, relative);
777  }
778  }
779  }
780  }
781  }
782  }
783 }
784 
785 
787  const string& type, const string& name,
788  const string& label, const string& url,
789  bool relative)
790 {
791  string edited_url(url);
792  if (!relative) {
793  if (!NStr::StartsWith(url, "http://") && !NStr::StartsWith(url, "https://")) {
794  edited_url = kCommonUrl + url;
795  }
796  }
797  else {
798  if (NStr::StartsWith(url, kCommonUrl)) {
799  edited_url = NStr::Replace(url, kCommonUrl, "");
800  }
801  }
802 
803  ((links[type])[name]).push_back(std::pair<string, string>(label, edited_url));
804 }
805 
807  const CSeq_loc& loc,
808  CScope& scope,
809  CBioseq_Handle& bsh,
810  TTaxId tax_id,
811  TLinksMap& links,
812  bool add_parent_gene_dbxrefs,
813  bool relative)
814 {
815  string type = "Basic";
816  string name = "";
817  string label = "";
818  string url = "";
819  if (feat.IsSetDbxref()) {
820  const CSeq_feat::TDbxref& refs = feat.GetDbxref();
821  ITERATE(CSeq_feat::TDbxref, iter, refs) {
822  switch ((*iter)->GetType()) {
824  AddDbxrefLinksForSNP(feat, *iter, scope, links, relative);
825  break;
826  default:
827  string label_str;
828  (*iter)->GetLabel(&label_str);
829  size_t pos = label_str.find(":");
830  if (pos == NPOS) {
831  pos = label_str.rfind(" ");
832  }
833 
834  if (pos != NPOS) {
835  name = label_str.substr(0, pos);
836  label = label_str.substr(pos + 1);
837  }
838  else {
839  label = label_str;
840  }
841  url = tax_id != ZERO_TAX_ID ? (*iter)->GetUrl(tax_id) : (*iter)->GetUrl();
843  if (label.empty() || url.empty()) {
844  break;
845  }
846 
847  if (relative && NStr::StartsWith(url, kCommonUrl)) {
849  }
850 
851  if ((*iter)->GetType() == CDbtag::eDbtagType_GeneID) {
852  // Here we get Gene ID for a feature, so we can add Gene
853  // Symbol. Unfortunately, Gene Symbol is valid only for
854  // a feature of subtype eSubtype_gene, so we get best
855  // gene here first.
856  CConstRef<CSeq_feat> gene_symbol_feat(&feat);
857  // First attempt - through GetBestGeneFor*
858  switch (feat.GetData().GetSubtype()) {
860  break;
862  gene_symbol_feat =
863  GetBestGeneForCds(feat, scope);
864  break;
866  gene_symbol_feat =
867  GetBestGeneForMrna(feat, scope);
868  break;
869  default:
870  gene_symbol_feat = NULL;
871  break;
872  }
873  if (gene_symbol_feat.Empty()) {
874  // Second attempt, if GetBestGeneFor*
875  // did not find anything, or feature is not CDS or
876  // mRNA and not a gene - get gene with this GeneID
877  int gene_id = (*iter)->GetTag().GetId();
878  SAnnotSelector sel;
879  sel.SetResolveAll()
881  for (CFeat_CI feat_iter(bsh, loc.GetTotalRange(), sel);
882  feat_iter;
883  ++feat_iter)
884  {
886  feat_iter->GetNamedDbxref("GeneID");
887  if (tag && tag->GetTag().GetId() == gene_id) {
888  gene_symbol_feat = feat_iter->GetSeq_feat();
889  break;
890  }
891  }
892  }
893  if (gene_symbol_feat.NotEmpty()) {
894  string gene_symbol;
895  feature::GetLabel(gene_symbol_feat.GetObject(),
896  &gene_symbol, feature::fFGL_Content);
897  if (!gene_symbol.empty()) {
898  label += " (" + gene_symbol + ")";
899  }
900  if (add_parent_gene_dbxrefs && (CSeqFeatData::eSubtype_gene != feat.GetData().GetSubtype())) {
901  // Add Dbxrefs from the parent gene
902  AddDbxrefFeatureLinks(gene_symbol_feat.GetObject(), loc, scope, links, add_parent_gene_dbxrefs, relative);
903  }
904  }
905  }
906 
907  x_AddLink(links, type, name, label, url, relative);
908  break;
909  }
910  }
911  }
912 }
913 
User-defined methods of the data storage class.
CBioseq_Handle –.
@ eDbtagType_dbSNP
Definition: Dbtag.hpp:185
@ eDbtagType_GeneID
Definition: Dbtag.hpp:104
CFeat_CI –.
Definition: feat_ci.hpp:64
static void AddDbxrefFeatureLinks(const objects::CSeq_feat &feat, const objects::CBioseq_Handle &handle, TLinksMap &links, bool add_parent_gene_dbxrefs=true, bool relative=true)
static void AddSequenceLinks(const objects::CSeq_id_Handle &idh, const string &tax_id_file, objects::CScope &scope, TLinksMap &links, TSeqPos from=0, TSeqPos to=0, bool relative=true)
Definition: link_utils.cpp:66
static void AddDbxrefLinksForSNP(const objects::CSeq_feat &feat, const CRef< objects::CDbtag > &dbt, objects::CScope &scope, TLinksMap &links, bool relative=true)
Definition: link_utils.cpp:701
static void AddBlastProductLink(const objects::CSeq_feat &feat, objects::CScope &scope, TLinksMap &links, bool relative)
Definition: link_utils.cpp:605
static void AddFeatureLinks(const objects::CSeq_feat &feat, const objects::CBioseq_Handle &handle, const string &tax_id_file, TLinksMap &links, bool relative=true, bool exclude_self_links=false)
static void x_AddLink(TLinksMap &links, const string &type, const string &name, const string &label, const string &url, bool relative=true)
Definition: link_utils.cpp:786
static void x_AddDbxrefFeatureLinks(const objects::CSeq_feat &feat, const objects::CSeq_loc &loc, objects::CScope &scope, objects::CBioseq_Handle &bsh, TTaxId tax_id, TLinksMap &links, bool add_parent_gene_dbxrefs=true, bool relative=true)
Definition: link_utils.cpp:806
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CConstRef< CDbtag > GetNamedDbxref(const CTempString &db) const
Return a specified DB xref.
Definition: Seq_feat.cpp:415
CSnpBitfield is a facade for representing any version of the SNP bitfield.
bool IsTrue(EProperty prop) const
bool isGood() const
@ eHasSnp3D
Has 3D structure SNP3D.
static TRsid GetRsid(const CMappedFeat &mapped_feat)
Return rsid of SNP.
Definition: snp_utils.cpp:109
Definition: map.hpp:338
static const TPair sc_Pairs[]
SStaticPair< const char *, const char * > TPair
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static int type
Definition: getdata.c:31
static const char location[]
Definition: config.c:97
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define NULL
Definition: ncbistd.hpp:225
static objects::SAnnotSelector GetAnnotSelector(TAnnotFlags flags=0)
request an annotation selector for a given type
Definition: utils.cpp:167
static CRef< objects::CSeq_loc > MixLocToLoc(const objects::CSeq_loc &mix_loc, const objects::CBioseq_Handle &handle)
Create a new seq-loc with a unique seq-id from a "mixed" loc.
Definition: utils.cpp:661
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
@ eDefault
Definition: label.hpp:73
@ eContent
Definition: label.hpp:62
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
CSeq_id::E_Choice Which(void) const
string GetLabel(const CSeq_id &id)
@ eAcc_gss
Definition: Seq_id.hpp:275
@ eAcc_est
Definition: Seq_id.hpp:265
@ eAcc_division_mask
Definition: Seq_id.hpp:299
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
CMappedFeat GetBestGeneForMrna(const CMappedFeat &mrna_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0, CFeatTree::EBestGeneType lookup_type=CFeatTree::eBestGene_TreeOnly)
Definition: feature.cpp:3301
CMappedFeat GetBestGeneForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0, CFeatTree::EBestGeneType lookup_type=CFeatTree::eBestGene_TreeOnly)
Definition: feature.cpp:3321
@ fFGL_Content
Include its content if there is any.
Definition: feature.hpp:73
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
TTaxId GetTaxId(const CBioseq_Handle &handle)
return the tax-id associated with a given sequence.
Definition: sequence.cpp:274
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
TSeqPos GetBioseqLength(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
bool IsNa(void) const
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
Definition: ncbiobj.hpp:1385
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:1392
TObjectType & GetObject(void) const
Get object.
Definition: ncbiobj.hpp:1697
static TThisType GetWhole(void)
Definition: range.hpp:272
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NPOS
Definition: ncbistr.hpp:133
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3314
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3405
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3186
@ fWithCommas
Use commas as thousands separator.
Definition: ncbistr.hpp:254
static const char label[]
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
vector< CRef< CUser_field > > TData
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
E_Choice Which(void) const
Which variant is currently selected.
const TExts & GetExts(void) const
Get the Exts member data.
Definition: Seq_feat_.hpp:1477
list< CRef< CUser_object > > TExts
Definition: Seq_feat_.hpp:127
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
Definition: Seq_feat_.hpp:1333
bool CanGetLocation(void) const
Check if it is safe to call GetLocation method.
Definition: Seq_feat_.hpp:1111
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
bool IsSetExts(void) const
set of extensions; will replace 'ext' field Check if a value has been assigned to Exts data member.
Definition: Seq_feat_.hpp:1465
const TGene & GetGene(void) const
Get the variant data.
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
Definition: Seq_feat_.hpp:1321
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
const TVariation & GetVariation(void) const
Get the variant data.
bool IsRna(void) const
Check if variant Rna is selected.
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
@ e_Local
local use
Definition: Seq_id_.hpp:95
const TSample_id & GetSample_id(void) const
Get the Sample_id member data.
const TId & GetId(void) const
Get the Id member data.
const TParent_id & GetParent_id(void) const
Get the Parent_id member data.
bool CanGetParent_id(void) const
Check if it is safe to call GetParent_id method.
bool CanGetSample_id(void) const
Check if it is safe to call GetSample_id method.
bool CanGetId(void) const
Check if it is safe to call GetId method.
int i
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
const char * tag
std::istream & in(std::istream &in_, double &x_)
SAnnotSelector –.
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
first_type first
Definition: static_set.hpp:64
second_type second
Definition: static_set.hpp:65
Definition: type.c:6
Modified on Wed Apr 17 13:08:58 2024 by modify_doxy.py rev. 669887