NCBI C++ ToolKit
prot_prod_tests.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: prot_prod_tests.cpp 90372 2020-06-08 15:55:30Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Josh Cherry
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
34 #include <corelib/ncbitime.hpp>
35 #include <objects/general/Date.hpp>
38 #include <objects/seq/Bioseq.hpp>
39 #include <objects/seq/MolInfo.hpp>
41 #include <objects/seq/Seq_inst.hpp>
42 #include <objects/seq/Seqdesc.hpp>
47 #include <objmgr/util/sequence.hpp>
48 #include <objmgr/seqdesc_ci.hpp>
49 #include <objmgr/bioseq_handle.hpp>
50 #include <objmgr/seq_vector.hpp>
51 #include <objmgr/seqdesc_ci.hpp>
52 #include <objmgr/annot_ci.hpp>
54 
57 
59  const CSeqTestContext* ctx) const
60 {
61  const CSeq_id* id = dynamic_cast<const CSeq_id*>(&obj);
62  if (id && ctx) {
63  CBioseq_Handle handle = ctx->GetScope().GetBioseqHandle(*id);
64 
65  CSeqdesc_CI iter(handle, CSeqdesc::e_Molinfo);
66  for ( ; iter; ++iter) {
67  const CMolInfo& info = iter->GetMolinfo();
68  if (info.GetBiomol() == CMolInfo::eBiomol_peptide) {
69  return true;
70  }
71  }
72  }
73  return false;
74 }
75 
76 
79  const CSeqTestContext* ctx)
80 {
82  const CSeq_id* id = dynamic_cast<const CSeq_id*>(&obj);
83  if ( !id || !ctx ) {
84  return ref;
85  }
86 
87  ref.Reset(new CSeq_test_result_set());
88 
90  ref->Set().push_back(result);
91 
92  int len = ctx->GetScope()
93  .GetBioseqHandle(dynamic_cast<const CSeq_id&>(obj)).GetInst_Length();
94  result->SetOutput_data()
95  .AddField("length", len);
96  return ref;
97 }
98 
99 
102  const CSeqTestContext* ctx)
103 {
105  const CSeq_id* id = dynamic_cast<const CSeq_id*>(&obj);
106  if ( !id || !ctx ) {
107  return ref;
108  }
109 
110  ref.Reset(new CSeq_test_result_set());
111 
113  ref->Set().push_back(result);
114 
115  CBioseq_Handle hand = ctx->GetScope().GetBioseqHandle(*id);
116  CAnnot_CI it(hand);
117  int hits = 0;
118  bool has_partial = false;
119  while (it) {
120  if (it->IsNamed() && it->GetName() == "CDDSearch") {
121  const list<CRef<CSeq_feat> >& ftable
123  hits = ftable.size();
124  ITERATE (list<CRef<CSeq_feat> >, it, ftable) {
125  if ((*it)->IsSetPartial() && (*it)->GetPartial()) {
126  has_partial = true;
127  break;
128  }
129  }
130  break;
131  }
132  ++it;
133  }
134 
135  CRef<CSeq_annot> annot(new CSeq_annot);
136  result->SetOutput_data()
137  .AddField("annotated_cdd_hits", hits);
138  result->SetOutput_data()
139  .AddField("has_partial_cdd_hit", has_partial);
140  return ref;
141 }
142 
143 
144 // Find the taxid from a bioseq handle by iterating over
145 // Seqdesc's of type "source". Return 0 if not found.
147 {
149  TTaxId taxid;
150  while (it) {
151  taxid = it->GetSource().GetOrg().GetTaxId();
152  if (taxid != ZERO_TAX_ID) {
153  return taxid;
154  }
155  }
156  return ZERO_TAX_ID;
157 }
158 
159 
161  const CSeqTestContext* ctx) const
162 {
163  // Does it pass the base class criteria?
164  if (!CTestProtProd::CanTest(obj, ctx)) {
165  return false;
166  }
167  // If so, it must be a CSeq_id. Is it resolvable to a gi?
168  const CSeq_id* id = dynamic_cast<const CSeq_id*>(&obj);
169  CBioseq_Handle hand = ctx->GetScope().GetBioseqHandle(*id);
170  try {
172  }
173  catch (std::exception&) {
174  return false;
175  }
176  return true;
177 }
178 
179 
182  const CSeqTestContext* ctx)
183 {
185  const CSeq_id* id = dynamic_cast<const CSeq_id*>(&obj);
186  if ( !id || !ctx ) {
187  return ref;
188  }
189 
190  ref.Reset(new CSeq_test_result_set());
191 
192  CRef<CSeq_test_result> result = x_SkeletalTestResult("entrez_neighbors");
193  ref->Set().push_back(result);
194 
195  CBioseq_Handle hand = ctx->GetScope().GetBioseqHandle(*id);
197  TTaxId taxid = s_GetTaxId(hand);
198  if (taxid == ZERO_TAX_ID) {
199  throw runtime_error("CTestProtProd_EntrezNeighbors::RunTest: "
200  "taxid not found for " + id->GetSeqIdString(true));
201  }
202  const unsigned int kChunkSize = 50;
203  CEntrez2Client e2c;
204  vector<TGi> neigh;
205  e2c.GetNeighbors(gi, "protein", "protein", neigh);
206  vector<TGi> sp_neigh;
207  vector<TGi> neigh_subset;
208  neigh_subset.reserve(kChunkSize);
209  for (unsigned int start = 0; start < neigh.size(); start += kChunkSize) {
210  neigh_subset.clear();
211  for (unsigned int i = 0; i < kChunkSize; ++i) {
212  if (start + i == neigh.size()) {
213  break;
214  }
215  neigh_subset.push_back(neigh[start + i]);
216  }
217  e2c.FilterIds(neigh_subset, "protein", "srcdb_swiss-prot[PROP] NOT txid"
218  + NStr::NumericToString(taxid) + "[ORGN]", sp_neigh);
219  if (!sp_neigh.empty()) {
220  break;
221  }
222  }
223  result->SetOutput_data().AddField("has_swissprot_neighbor_different_taxid",
224  !sp_neigh.empty());
225  if (!sp_neigh.empty()) {
226  // Order not necessarily preserved by FilterIds, so figure out
227  // which element of sp_neigh comes first in neigh
229  for (unsigned int i = 0; i < neigh_subset.size(); ++i) {
230  index[neigh_subset[i]] = i;
231  }
232  unsigned int lowest_index = neigh_subset.size();
233  TGi first_gi = ZERO_GI; // initialize to avoid compiler warning
234  for (unsigned int i = 0; i < sp_neigh.size(); ++i) {
235  if (index[sp_neigh[i]] < lowest_index) {
236  lowest_index = index[sp_neigh[i]];
237  first_gi = sp_neigh[i];
238  }
239  }
240  CSeq_id neigh_id;
241  neigh_id.SetGi(first_gi);
242  result->SetOutput_data()
243  .AddField("top_match_seq_id", neigh_id.GetSeqIdString(true));
244  int length_top_match =
245  ctx->GetScope().GetBioseqHandle(neigh_id).GetBioseqLength();
246  result->SetOutput_data()
247  .AddField("length_top_match", length_top_match);
248  }
249  return ref;
250 }
251 
252 
User-defined methods of the data storage class.
CAnnot_CI –.
Definition: annot_ci.hpp:59
CBioseq_Handle –.
void FilterIds(const vector< TUid > &query_uids, const string &db, const string &query_string, vector< TUid > &result_uids)
Given some uids, a database, and an entrez query string, determine which of these uids match the quer...
void GetNeighbors(TUid query_uid, const string &db_from, const string &db_to, vector< TUid > &neighbor_uids)
A simplified interface for getting neighbors (links)
TTaxId GetTaxId() const
Definition: Org_ref.cpp:72
CSeqTestContext defines any contextual information that a derived class might need.
Definition: seqtest.hpp:52
CRef< objects::CSeq_test_result > x_SkeletalTestResult(const string &test_name)
Create a Seq-test-result with some fields filled in, including a name for this test,...
Definition: seqtest.cpp:54
CSeq_test_result_set –.
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Base class for all serializable objects.
Definition: serialbase.hpp:150
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
bool CanTest(const CSerialObject &obj, const CSeqTestContext *ctx) const
Test to see whether the given object *can* be used in this test.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
CRef< objects::CSeq_test_result_set > RunTest(const CSerialObject &obj, const CSeqTestContext *ctx)
RunTest() is called for each registered object.
bool CanTest(const CSerialObject &obj, const CSeqTestContext *ctx) const
Test to see whether the given object *can* be used in this test.
Definition: map.hpp:338
CS_CONTEXT * ctx
Definition: t0006.c:12
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define ZERO_GI
Definition: ncbimisc.hpp:1088
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eGetId_ForceGi
return only a gi-based seq-id
Definition: sequence.hpp:99
bool IsNamed(void) const
CConstRef< CSeq_annot > GetCompleteSeq_annot(void) const
Complete and return const reference to the current seq-annot.
const string & GetName(void) const
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
Tdata & Set(void)
Assign a value to data member.
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
TGi & SetGi(void)
Select the variant.
Definition: Seq_id_.hpp:896
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
const TFtable & GetFtable(void) const
Get the variant data.
Definition: Seq_annot_.hpp:621
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
int i
int len
static MDB_envinfo info
Definition: mdb_load.c:37
const size_t kChunkSize
Definition: na_utils.cpp:587
Defines: CTimeFormat - storage class for time format.
USING_SCOPE(objects)
TTaxId s_GetTaxId(const CBioseq_Handle &hand)
else result
Definition: token2.c:20
#define ftable
Definition: utilfeat.h:37
Modified on Wed Apr 24 14:13:54 2024 by modify_doxy.py rev. 669887