NCBI C++ ToolKit
feature_per_bioseq.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: feature_per_bioseq.cpp 100215 2023-07-09 00:39:58Z gotvyans $
2  * =========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * =========================================================================
25  *
26  * Authors: Igor Filippov, Sema Kachalo
27  *
28  */
29 
30 #include <ncbi_pch.hpp>
31 #include "discrepancy_core.hpp"
32 #include "utils.hpp"
33 #include <objmgr/feat_ci.hpp>
34 #include <objmgr/util/feature.hpp>
36 
37 
41 
42 // COUNT_RRNAS
43 
44 static inline string rRnaLabel(const CSeqFeatData& data) // eSubtype_rRNA assumed
45 {
46  return data.IsRna() && data.GetRna().IsSetExt() && data.GetRna().GetExt().Which() == CRNA_ref::C_Ext::e_Name ? data.GetRna().GetExt().GetName() : "==invalid==";
47 }
48 
49 
50 DISCREPANCY_CASE1(COUNT_RRNAS, SEQUENCE, eDisc, "Count rRNAs", "FIND_DUP_RRNAS")
51 {
52  const CSeqdesc* biosrc = context.GetBiosource();
54  size_t total = 0;
56  for (const CSeq_feat* feat : context.FeatRRNAs()) {
57  tmp[rRnaLabel(feat->GetData())].Add(*context.SeqFeatObjRef(*feat));
58  total++;
59  }
60  if (total) {
61  auto bsref = context.BioseqObjRef();
62  string item = " [n] sequence[s] [has] [(]" + to_string(total) + "[)] rRNA feature" + (total == 1 ? kEmptyStr : "s");
63  m_Objs[item].Add(*bsref).Incr();
64  string short_name = bsref->GetBioseqLabel();
65  string subitem = "[n] rRNA feature[s] found on [(]" + short_name;
66  for (auto& it : tmp.GetMap()) {
67  m_Objs[item][subitem].Ext().Add(it.second->GetObjects());
68  }
69  for (auto& it : tmp.GetMap()) {
70  if (it.second->GetObjects().size() > 1) {
71  m_Objs["[(]" + to_string(it.second->GetObjects().size()) + "[)] rRNA features on [(]" + short_name + "[)] have the same name [(](" + it.first + ")"].Add(tmp[it.first].GetObjects());
72  }
73  }
74  }
75  }
76 }
77 
78 
79 // COUNT_TRNAS
80 // (also report extra and missing tRNAs)
81 
83 {
85  const char* long_symbol;
86  size_t num_expected;
87 };
88 
89 static const DesiredAAData desired_aaList[] = {
90  { 'A', "Ala", 1 },
91  { 'B', "Asx", 0 },
92  { 'C', "Cys", 1 },
93  { 'D', "Asp", 1 },
94  { 'E', "Glu", 1 },
95  { 'F', "Phe", 1 },
96  { 'G', "Gly", 1 },
97  { 'H', "His", 1 },
98  { 'I', "Ile", 1 },
99  { 'J', "Xle", 0 },
100  { 'K', "Lys", 1 },
101  { 'L', "Leu", 2 },
102  { 'M', "Met", 1 },
103  { 'N', "Asn", 1 },
104  { 'P', "Pro", 1 },
105  { 'Q', "Gln", 1 },
106  { 'R', "Arg", 1 },
107  { 'S', "Ser", 2 },
108  { 'T', "Thr", 1 },
109  { 'V', "Val", 1 },
110  { 'W', "Trp", 1 },
111  { 'X', "Xxx", 0 },
112  { 'Y', "Tyr", 1 },
113  { 'Z', "Glx", 0 },
114  { 'U', "Sec", 0 },
115  { 'O', "Pyl", 0 },
116  { '*', "Ter", 0 }
117 };
118 
119 
120 DISCREPANCY_CASE1(COUNT_TRNAS, SEQUENCE, eDisc, "Count tRNAs", "FIND_DUP_TRNAS")
121 {
122  const CSeqdesc* biosrc = context.GetBiosource();
124  size_t total = 0;
126  for (const CSeq_feat* feat : context.FeatTRNAs()) {
127  tmp[context.GetAminoacidName(*feat)].Add(*context.SeqFeatObjRef(*feat));
128  total++;
129  }
130  if (total) {
131  static CSafeStatic<map<string, size_t> > DesiredCount;
132  if (DesiredCount->empty()) {
133  for (size_t i = 0; i < ArraySize(desired_aaList); i++) {
135  }
136  }
137 
138  auto bsref = context.BioseqObjRef();
139  string item = " [n] sequence[s] [has] [(]" + to_string(total) + "[)] tRNA feature" + (total == 1 ? kEmptyStr : "s");
140  m_Objs[item].NoRec().Add(*bsref);
141  string short_name = bsref->GetBioseqLabel();
142  string subitem = "[n] tRNA feature[s] found on [(]" + short_name;
143  for (auto& it : tmp.GetMap()) {
144  m_Objs[item][subitem].Ext().Add(it.second->GetObjects());
145  }
146  // extra tRNAs
147  for (size_t i = 0; i < ArraySize(desired_aaList); i++) {
148  const size_t n = tmp[desired_aaList[i].long_symbol].GetObjects().size();
149  if (n > desired_aaList[i].num_expected) {
150  subitem = "Sequence [(]" + short_name + "[)] has [(]" + to_string(n) + "[)] trna-[(]" + desired_aaList[i].long_symbol + "[)] feature" + (n == 1 ? kEmptyStr : "s");
151  m_Objs[subitem].Add(*bsref);
152  m_Objs[subitem].Add(tmp[desired_aaList[i].long_symbol].GetObjects());
153  }
154  }
155  // unusual tRNAs
156  for (auto& it : tmp.GetMap()) {
157  if (DesiredCount->find(it.first) == DesiredCount->end()) {
158  subitem = "Sequence [(]" + short_name + "[)] has [(]" + to_string(it.second->GetObjects().size()) + "[)] trna-[(]" + it.first + "[)] feature" + (it.second->GetObjects().size() == 1 ? kEmptyStr : "s");
159  m_Objs[subitem].Add(*bsref);
160  m_Objs[subitem].Add(tmp[it.first].GetObjects(), false);
161  }
162  }
163  // missing tRNAs
164  for (size_t i = 0; i < ArraySize(desired_aaList); i++) {
165  const size_t n = tmp[desired_aaList[i].long_symbol].GetObjects().size();
166  if (n < desired_aaList[i].num_expected) {
167  subitem = "Sequence [(]" + short_name + "[)] is missing trna-[(]" + desired_aaList[i].long_symbol;
168  m_Objs[subitem].Add(*bsref);
169  }
170  }
171  }
172  }
173 }
174 
175 
#define static
CSafeStatic<>::
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
@ eDisc
#define DISCREPANCY_CASE1(name, type, group, descr,...)
vector< CConstRef< CObject > > GetObjects(CSeq_entry_Handle seh, const string &field, CFieldNamePanel::EFieldType field_type, int subtype, const string &ncRNA_class, CConstRef< objects::CSeq_submit > submit, CRef< CEditingActionConstraint > constraint, vector< CSeq_entry_Handle > *descr_context=nullptr)
static string rRnaLabel(const CSeqFeatData &data)
USING_SCOPE(objects)
static const DesiredAAData desired_aaList[]
static char tmp[3200]
Definition: utf8.c:42
char data[12]
Definition: iconv.c:80
constexpr size_t ArraySize(const Element(&)[Size])
Definition: ncbimisc.hpp:1532
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
TGenome GetGenome(void) const
Get the Genome member data.
Definition: BioSource_.hpp:422
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
Definition: BioSource_.hpp:397
@ e_Name
for naming "other" type
Definition: RNA_ref_.hpp:134
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
int i
yy_size_t n
const char * long_symbol
static CS_CONTEXT * context
Definition: will_convert.c:21
#define const
Definition: zconf.h:232
Modified on Fri Sep 20 14:58:30 2024 by modify_doxy.py rev. 669887