/* $Id: snp_utils.cpp 94067 2021-06-22 14:12:27Z grichenk $
2  * ===========================================================================
3  *
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Melvin Quintos, Dmitry Rudnev
27  *
28  * File Description:
29  * Provides implementation of NSnp class. See snp_extra.hpp
30  * for class usage.
31  *
32  */
34 #include <ncbi_pch.hpp>
42 #include <objects/general/Date.hpp>
52 #include <objmgr/seq_vector.hpp>
57 const string NSnp::sm_dbTag_dbSNP("dbSNP");
59 ///////////////////////////////////////////////////////////////////////////////
60 // Public Methods
61 ///////////////////////////////////////////////////////////////////////////////
62 bool NSnp::IsSnp(const CMappedFeat &mapped_feat)
63 {
64  return IsSnp(mapped_feat.GetOriginalFeature());
65 }
67 bool NSnp::IsSnp(const CSeq_feat &feat)
68 {
69  return feat.IsSetData() && feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_variation && GetTag(feat).NotEmpty() && IsSnp(*GetTag(feat));
70 }
72 bool NSnp::IsSnp(const CDbtag& tag)
73 {
74  return tag.GetType() == CDbtag::eDbtagType_dbSNP;
75  }
78 {
79  return SrcFeat.GetNamedDbxref(sm_dbTag_dbSNP);
80 }
83 {
84  return GetTag(SrcFeat.GetOriginalFeature());
85 }
89 {
90  CTime time;
91  CSeq_annot_Handle h = feat.GetAnnot();
93  if (h.Seq_annot_CanGetDesc()) {
94  const CAnnot_descr &desc = h.Seq_annot_GetDesc();
95  if (desc.CanGet()) {
96  ITERATE( CAnnot_descr::Tdata, it, desc.Get() ) {
97  const CRef<CAnnotdesc> &d = *it;
98  if (d->IsCreate_date()) {
99  time = d->GetCreate_date().AsCTime();
100  break;
101  }
102  }
103  }
104  }
106  return time;
107 }
110 {
111  return GetRsid(mapped_feat.GetOriginalFeature());
112 }
115 {
116  CConstRef<CDbtag> ref = GetTag(feat);
117  if (ref) {
118  return GetRsid(*ref);
119  }
120  return 0;
121 }
123 {
124  const auto& dbtag = tag.GetTag();
125  if (dbtag.IsStr() && (string::npos != dbtag.GetStr().find("rs"))) {
126  return NStr::StringToNumeric<NSnp::TRsid>(dbtag.GetStr().substr(2));
127  }
128  else {
129  return dbtag.GetId8();
130  }
131 }
133 int NSnp::GetLength(const CMappedFeat &mapped_feat)
134 {
135  return GetLength(mapped_feat.GetOriginalFeature());
136 }
138 int NSnp::GetLength(const CSeq_feat &feat)
139 {
140  int length = 0;
142  // features pre-SNP 2.0 have length encoded as neighbors in "Extra"
143  if(GetBitfield(feat).GetVersion() < 20) {
144  if (feat.IsSetExt()) {
145  CConstRef<CUser_field> field =
146  feat.GetExt().GetFieldRef("Extra");
147  if (field) {
148  string s1, s2;
149  const string &str = field->GetData().GetStr();
150  if (NStr::SplitInTwo(str, "=", s1, s2)) {
151  vector<string> v;
153  NStr::Split(str, ",", v);
154  if (v.size()==4) {
157  length = rc + lc + 1;
158  }
159  }
160  }
161  }
162  } else {
163  // SNP 2.0 length is feature length
164  if(feat.CanGetLocation()) {
165  length = feat.GetLocation().GetTotalRange().GetLength();
166  }
167  }
169  return length;
170 }
172 string NSnp::ClinSigAsString(const CVariation_ref& var, ELetterCase LetterCase)
173 {
174  ITERATE (CVariation_ref::TPhenotype, pnt_iter, var.GetPhenotype()) {
175  if ((*pnt_iter)->CanGetClinical_significance()) {
176  return ClinSigAsString((*pnt_iter)->GetClinical_significance(), LetterCase);
177  }
178  }
179  return "";
180 }
182 string NSnp::ClinSigAsString(TClinSigID ClinSigID, ELetterCase LetterCase)
183 {
184  string sResult;
185  switch(ClinSigID)
186  {
188  sResult = "Benign";
189  break;
191  sResult = "Likely benign";
192  break;
194  sResult = "Likely pathogenic";
195  break;
197  sResult = "Pathogenic";
198  break;
200  sResult = "Drug response";
201  break;
203  sResult = "Histocompatibility";
204  break;
206  sResult = "Uncertain significance";
207  break;
209  sResult = "Not tested";
210  break;
212  default:
213  sResult = "Other";
214  break;
215  }
216  return LetterCase == eLetterCase_ForceLower ? NStr::ToLower(sResult) : sResult;
217 }
221 {
222  return GetBitfield(mapped_feat.GetOriginalFeature());
223 }
226 {
227  CSnpBitfield b;
229  if(IsSnp(feat)) {
230  b = feat;
231  }
233  return b;
234 }
237 void NSnp::GetAlleles(const CMappedFeat &mapped_feat, TAlleles& Alleles)
238 {
239  GetAlleles(mapped_feat.GetOriginalFeature(), Alleles);
240 }
242 void NSnp::GetAlleles(const CSeq_feat &feat, TAlleles& Alleles, bool isPadding, CBioseq_Handle* bsh)
243 {
244  bool isRefAlleleEmpty{false};
245  bool isAnyAltAlleleEmpty{false};
247  Alleles.clear();
249  if (feat.CanGetQual()) {
250  Alleles.reserve(feat.GetQual().size());
251  ITERATE (CSeq_feat::TQual, it, feat.GetQual()) {
252  const CGb_qual& qual = **it;
253  if (qual.GetQual() == "replace") {
254  string sQualVal(qual.GetVal());
255  Alleles.push_back(sQualVal.empty() ? "-" : sQualVal);
256  if(sQualVal.empty()) {
257  if(it == feat.GetQual().begin()) {
258  isRefAlleleEmpty = true;
259  } else {
260  isAnyAltAlleleEmpty = true;
261  }
262  }
263  }
264  }
265  }
266  if(isPadding && bsh && (isRefAlleleEmpty || isAnyAltAlleleEmpty)) {
267  string sPadding;
268  const CSeq_loc& feat_seq_loc(feat.GetLocation());
269  CSeqVector seq_vector(*bsh, CBioseq_Handle::eCoding_Iupac);
270  int delta(isRefAlleleEmpty ? 0 : -1);
271  if(feat_seq_loc.GetStart(ESeqLocExtremes::eExtreme_Positional) + delta < 0) {
272  seq_vector.GetSeqData(feat_seq_loc.GetStop(ESeqLocExtremes::eExtreme_Positional), feat_seq_loc.GetStop(ESeqLocExtremes::eExtreme_Positional) + 1, sPadding);
273  } else {
274  seq_vector.GetSeqData(feat_seq_loc.GetStart(ESeqLocExtremes::eExtreme_Positional) + delta, feat_seq_loc.GetStart(ESeqLocExtremes::eExtreme_Positional) + delta + 1, sPadding);
275  }
276  for(auto& allele: Alleles) {
277  allele = allele == "-" ? sPadding : sPadding + allele;
278  }
279  }
280 }
282 bool NSnp::IsSnpKnown( CScope &scope, const CMappedFeat &private_snp, const string &allele)
283 {
284  const CSeq_loc &loc = private_snp.GetLocation();
285  return IsSnpKnown(scope, loc, allele);
286 }
288 bool NSnp::IsSnpKnown( CScope& scope, const CSeq_loc& loc, const string & allele)
289 {
290  bool isKnown = false;
291  SAnnotSelector sel; // annotation selector
293  // Prepare Annotation Selection to find the SNPs
294  //sel = CSeqUtils::GetAnnotSelector(CSeqFeatData::eSubtype_variation);
295  sel .SetOverlapTotalRange()
296  .SetResolveAll()
297  .AddNamedAnnots("SNP")
298  .SetExcludeExternal(false)
302  .SetMaxSize(100000); // In case someone does something silly.
304  CFeat_CI feat_it(scope, loc, sel);
306  if (allele == kEmptyStr) {
307  // Don't check for alleles
308  // Existing of any returned SNP means there are known SNPs
309  if (feat_it.GetSize()>0) {
310  isKnown = true;
311  }
312  }
313  else {
314  // Check all the alleles for all the returned SNPs
315  for (; feat_it && !isKnown; ++feat_it) {
316  const CSeq_feat & or_feat = feat_it->GetOriginalFeature();
317  if (or_feat.CanGetQual()) {
318  ITERATE (CSeq_feat::TQual, it, or_feat.GetQual()) {
319  const CRef<CGb_qual> &qual = *it;
320  if (qual->GetQual() == "replace" &&
321  qual->GetVal().find(allele) != string::npos) {
322  isKnown = true;
323  break;
324  }
325  }
326  }
327  }
328  }
330  return isKnown;
331 }
334 const string NSNPVariationHelper::sResourceLink_RsID("%rsid%");
336 bool NSNPVariationHelper::ConvertFeat(CVariation& Variation, const CSeq_feat& SrcFeat)
337 {
338  if(!x_CommonConvertFeat(&Variation, SrcFeat)) {
339  return false;
340  }
342  pPlacement->SetLoc().Assign(SrcFeat.GetLocation());
343  Variation.SetPlacements().push_back(pPlacement);
345  // save a copy of the feature since not every bit
346  // currently is adequately represented in Variation
347  CSnpBitfield bf(NSnp::GetBitfield(SrcFeat));
348  if(bf.isGood()) {
349  CRef<CUser_object> pExt(new CUser_object());
350  CNcbiOstrstream ostr;
351  ostr << MSerial_AsnText << SrcFeat;
354  Variation.SetExt().push_back(pExt);
355  }
356  return true;
357 }
360 {
361  if(!x_CommonConvertFeat(&Variation, SrcFeat)) {
362  return false;
363  }
364  // save a copy of the feature since not every bit
365  // currently is adequately represented in Variation
366  CSnpBitfield bf(NSnp::GetBitfield(SrcFeat));
367  if(bf.isGood()) {
368  CNcbiOstrstream ostr;
369  ostr << MSerial_AsnText << SrcFeat;
370  Variation.SetExt().SetField(SNP_VAR_EXT_BITFIELD).SetData().SetStr(CNcbiOstrstreamToString(ostr));
371  Variation.SetExt().SetClass(SNP_VAR_EXT_CLASS);
372  }
373  return true;
374 }
380 {
381  prop.SetVersion(bf.GetVersion());
383  /// resource link
384  int res_link = 0;
387  }
390  }
393  }
396  }
399  }
402  }
403  if (res_link) {
404  prop.SetResource_link(res_link);
405  }
407  /// gene function
408  int gene_location = 0;
409  if (bf.IsTrue(CSnpBitfield::eInGene)) {
411  }
412  if (bf.IsTrue(CSnpBitfield::eInGene5)) {
414  }
415  if (bf.IsTrue(CSnpBitfield::eInGene3)) {
417  }
418  if (bf.IsTrue(CSnpBitfield::eIntron)) {
420  }
421  if (bf.IsTrue(CSnpBitfield::eDonor)) {
423  }
426  }
427  if (bf.IsTrue(CSnpBitfield::eInUTR5)) {
429  }
430  if (bf.IsTrue(CSnpBitfield::eInUTR3)) {
432  }
433  if (gene_location) {
434  prop.SetGene_location(gene_location);
435  }
437  // effect
438  int effect(0);
441  }
444  }
447  }
450  }
453  }
454  if (effect) {
455  prop.SetEffect(effect);
456  }
458  /// mapping
459  int mapping = 0;
462  }
465  }
468  }
469  if (mapping) {
470  prop.SetMapping(mapping);
471  }
475  /// There is not 1:1 correspondance between Bitfield weight
476  /// and VariantProperties map-weight. See SNP-5729.
477  /// So, I am commenting out. JB Holmes, April 2013
478  /// weight
479  // int weight = bf.GetWeight();
480  // if (weight) {
481  // prop.SetMap_weight(weight);
482  // }
484  /// allele frequency
485  int allele_freq = 0;
488  }
491  }
494  }
497  }
498  if (allele_freq) {
499  prop.SetFrequency_based_validation(allele_freq);
500  }
502  /// genotype
503  int genotype = 0;
506  }
509  }
510  if (genotype) {
511  prop.SetGenotype(genotype);
512  }
514  /// quality checking
515  int qual_check = 0;
518  }
521  }
524  }
527  }
530  }
531  if (qual_check) {
532  prop.SetQuality_check(qual_check);
533  }
534 }
536 void NSNPVariationHelper::VariantPropAsStrings(list<string>& ResList, const CVariantProperties& prop, ESNPPropTypes ePropType)
537 {
538  ResList.clear();
539  switch(ePropType) {
541  if(prop.CanGetGene_location()) {
544  ResList.push_back("In Gene");
546  ResList.push_back("In 5\' Gene");
548  ResList.push_back("In 3\' Gene");
550  ResList.push_back("Intron");
552  ResList.push_back("Donor");
554  ResList.push_back("Acceptor");
556  ResList.push_back("In 5\' UTR");
558  ResList.push_back("In 3\' UTR");
560  ResList.push_back("In Start Codon");
562  ResList.push_back("In Stop Codon");
564  ResList.push_back("Intergenic");
566  ResList.push_back("In Conserved Non-coding region");
567  }
568  break;
569  case eSNPPropName_Effect:
570  if(prop.CanGetEffect()) {
571  CVariantProperties::TEffect effect(prop.GetEffect());
573  ResList.push_back("No change");
574  else {
576  ResList.push_back("Synonymous");
578  ResList.push_back("Nonsense");
580  ResList.push_back("Missense");
582  ResList.push_back("Frameshift");
584  ResList.push_back("Up-regulator");
586  ResList.push_back("Down-regulator");
588  ResList.push_back("Methylation");
590  ResList.push_back("Stop-gain");
592  ResList.push_back("Stop-loss");
593  }
594  }
595  break;
597  if(prop.CanGetMapping()) {
600  ResList.push_back("Has other SNP");
602  ResList.push_back("Has Assembly conflict");
604  ResList.push_back("Is assembly specific");
605  }
606  break;
611  ResList.push_back(">1% minor allele freq in 1+ populations");
613  ResList.push_back(">1% minor allele freq in each and all populations");
615  ResList.push_back(">5% minor allele freq in 1+ populations");
617  ResList.push_back(">5% minor allele freq in each and all populations");
619  ResList.push_back("Is mutation");
621  ResList.push_back("Validated (has a minor allele in two or more separate chromosomes)");
622  }
623  break;
625  if(prop.CanGetQuality_check()) {
628  ResList.push_back("Reference allele missing from SNP alleles");
630  ResList.push_back("Genotype conflict");
632  ResList.push_back("Non-overlapping allele sets");
634  ResList.push_back("Strain specific fixed difference");
636  ResList.push_back("Member SS withdrawn by submitter");
637  }
638  break;
640  if(prop.CanGetResource_link()) {
643  ResList.push_back("Clinical");
645  ResList.push_back("Provisional");
647  ResList.push_back("Preserved");
649  ResList.push_back("On high density genotyping kit");
650  if(resource_link & CVariantProperties::eResource_link_has3D)
651  ResList.push_back("SNP3D");
653  ResList.push_back("SubmitterLinkOut");
654  }
655  break;
657  // NB: take care to have the same order as in eSNPPropName_ResourceLink
658  if(prop.CanGetResource_link()) {
661  ResList.push_back("");
663  ResList.push_back("");
665  ResList.push_back("");
667  ResList.push_back("");
669  ResList.push_back("");
670  }
671  break;
672  default:
673  break;
674  }
675 }
Modified on Wed Apr 17 13:10:50 2024 by rev. 669887