NCBI C++ ToolKit
struct_cmt_reader.cpp

Search Toolkit Book for _cmt_reader_8cpp_source

Go to the documentation of this file.
1 /* $Id: struct_cmt_reader.cpp 97441 2022-07-18 19:02:48Z ludwigf $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Sergiy Gotvyanskyy, NCBI
27 *
28 * File Description:
29 * Reader for structured comments for sequences
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include <objects/seq/Seqdesc.hpp>
45 
49 
50 #include <util/line_reader.hpp>
52 
53 
54 #include <common/test_assert.h> /* This header must go last */
55 
58 
60 {
61 }
62 
64 {
65 }
66 
67 const string& CStructuredCommentsReader::CStructComment::GetPrefix(const objects::CSeqdesc& desc)
68 {
69  if (!desc.IsUser())
70  return kEmptyStr;
71 
72  auto& user = desc.GetUser();
73  if (user.IsSetType() && user.GetType().IsStr() && NStr::Equal(user.GetType().GetStr(), "StructuredComment"))
74  {
75  if (user.IsSetData() && user.GetData().size() > 0)
76  {
77  const auto& fdata = *user.GetData().front();
78  if (fdata.IsSetLabel() && fdata.GetLabel().IsStr() && NStr::Equal(fdata.GetLabel().GetStr(), "StructuredCommentPrefix"))
79  return fdata.GetData().GetStr();
80  }
81  }
82 
83  return kEmptyStr;
84 }
85 
86 objects::CUser_object* CStructuredCommentsReader::_AddStructuredComment(objects::CUser_object* user_obj, CStructComment& cmt, const CTempString& name, const CTempString& value)
87 {
88  if (name.compare("StructuredCommentPrefix") == 0)
89  user_obj = 0; // reset user obj so to create a new one
90 
91  if (user_obj == 0)
92  {
93  // create new user object
94  CRef<CSeqdesc> user_desc(new CSeqdesc);
95  user_obj = &(user_desc->SetUser());
96  user_obj->SetType().SetStr("StructuredComment");
97  cmt.m_descs.push_back(user_desc);
98  }
99  user_obj->AddField(name, value);
100  // signal to create next user object
101  if (name.compare("StructuredCommentSuffix") == 0)
102  return 0;
103  else
104  return user_obj;
105 }
106 
107 void CStructuredCommentsReader::_BuildStructuredComment(CStructComment& cmt, const vector<string>& cols, const vector<CTempString>& values)
108 {
109  cmt.m_descs.reserve(values.size() - 1);
110  objects::CUser_object* user = 0;
111 
112  for (size_t i = 1; i<values.size(); i++)
113  {
114  if (!values[i].empty())
115  {
116  // create new user object
117  user = _AddStructuredComment(user, cmt, cols[i], values[i]);
118  }
119  }
120 }
121 
122 void CStructuredCommentsReader::_LoadHeaderLine(ILineReader& reader, vector<string>& cols)
123 {
124  cols.clear();
125 
126  while (!reader.AtEOF() && cols.empty())
127  {
128  reader.ReadLine();
129  // First line is a collumn definitions
130  CTempString current = reader.GetCurrentLine();
131  if (NStr::StartsWith(current, '#'))
132  continue;
133 
134  NStr::Split(current, "\t", cols);
135  }
136 }
137 
139  const CSeq_id& seqID, const CSeq_id& commentID)
140 {
141  // idea: try match the raw text of the commentID with the "money" part of the given ID
142 
143  if (seqID.Compare(commentID) == CSeq_id::e_YES) {
144  return true;
145  }
146  if (!commentID.IsLocal()) {
147  return false;
148  }
149 
150  const auto& commentIdText = commentID.GetLocal().GetStr();
151  const CTextseq_id* pTsid = seqID.GetTextseq_Id();
152  if (pTsid) {
153  if (pTsid->IsSetAccession()) {
154  return (pTsid->GetAccession() == commentIdText);
155  }
156  if (pTsid->IsSetName()) {
157  return (pTsid->GetName() == commentIdText);
158  }
159  return false;
160  }
161 
162  string seqIdText;
163  switch (seqID.Which()) {
164  default:
165  return false;
166  case CSeq_id::e_Gibbsq:
167  seqIdText = NStr::IntToString(seqID.GetGibbsq());
168  break;
169  case CSeq_id::e_Gibbmt:
170  seqIdText = NStr::IntToString(seqID.GetGibbmt());
171  break;
172  case CSeq_id::e_Giim:
173  seqIdText = NStr::IntToString(seqID.GetGiim().GetId());
174  break;
175  case CSeq_id::e_General: {
176  const auto& general = seqID.GetGeneral();
177  if (general.IsSetTag()) {
178  if (general.GetTag().IsStr()) {
179  seqIdText = general.GetTag().GetStr();
180  }
181  else {
182  seqIdText = NStr::IntToString(general.GetTag().GetId());
183  }
184  }
185  break;
186  }
187  case CSeq_id::e_Patent: {
188  const CId_pat& idp = seqID.GetPatent().GetCit();
189  seqIdText = idp.GetId().IsNumber() ?
190  idp.GetId().GetNumber() : idp.GetId().GetApp_number();
191  seqIdText += '_';
192  seqIdText += NStr::IntToString(seqID.GetPatent().GetSeqid());
193  break;
194  }
195  case CSeq_id::e_Gi:
196  seqIdText = NStr::NumericToString(seqID.GetGi());
197  break;
198  case CSeq_id::e_Pdb: {
199  const CPDB_seq_id& pid = seqID.GetPdb();
200  seqIdText = pid.GetMol().Get();
201  if (pid.IsSetChain_id()) {
202  seqIdText += '_';
203  seqIdText += pid.GetChain_id();
204  }
205  else if (pid.IsSetChain()) {
206  unsigned char chain = static_cast<unsigned char>(pid.GetChain());
207  if (chain > ' ') {
208  seqIdText += '_';
209  seqIdText += static_cast<char>(chain);
210  }
211  }
212  break;
213  }
214  }
215  return (seqIdText == commentIdText);
216 }
217 
218 
219 
220 
User-defined methods of the data storage class.
static const string & GetPrefix(const objects::CSeqdesc &)
vector< CRef< objects::CSeqdesc > > m_descs
objects::CUser_object * _AddStructuredComment(objects::CUser_object *user_obj, CStructComment &cmt, const CTempString &name, const CTempString &value)
static bool SeqIdMatchesCommentId(const objects::CSeq_id &seqID, const objects::CSeq_id &commentID)
void _BuildStructuredComment(CStructComment &cmt, const vector< string > &cols, const vector< CTempString > &values)
void _LoadHeaderLine(ILineReader &reader, vector< string > &cols)
CStructuredCommentsReader(objects::ILineErrorListener *logger)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
const TPrim & Get(void) const
Definition: serialbase.hpp:347
CTempString GetCurrentLine(void) const
void ReadLine(void)
Definition: line_reader.hpp:88
virtual bool AtEOF(void) const =0
Indicates (negatively) whether there is any more input.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
Definition: Seq_id.cpp:411
@ e_YES
SeqIds compared, but are different.
Definition: Seq_id.hpp:583
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5086
int compare(const CTempString str) const
Compare the current string with a given string.
Definition: tempstr.hpp:806
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5414
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5386
bool IsNumber(void) const
Check if variant Number is selected.
Definition: Id_pat_.hpp:426
const TId & GetId(void) const
Get the Id member data.
Definition: Id_pat_.hpp:525
const TNumber & GetNumber(void) const
Get the variant data.
Definition: Id_pat_.hpp:432
const TApp_number & GetApp_number(void) const
Get the variant data.
Definition: Id_pat_.hpp:452
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TChain GetChain(void) const
Get the Chain member data.
const TPdb & GetPdb(void) const
Get the variant data.
Definition: Seq_id_.cpp:435
TGibbsq GetGibbsq(void) const
Get the variant data.
Definition: Seq_id_.hpp:787
TId GetId(void) const
Get the Id member data.
bool IsSetChain_id(void) const
chain identifier; length-independent generalization of 'chain' Check if a value has been assigned to ...
bool IsSetChain(void) const
Deprecated: 'chain' can't support multiple character PDB chain identifiers (introduced in 2015).
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
const TName & GetName(void) const
Get the Name member data.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
const TMol & GetMol(void) const
Get the Mol member data.
TSeqid GetSeqid(void) const
Get the Seqid member data.
const TGiim & GetGiim(void) const
Get the variant data.
Definition: Seq_id_.cpp:215
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Seq_id_.cpp:193
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
const TChain_id & GetChain_id(void) const
Get the Chain_id member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
const TPatent & GetPatent(void) const
Get the variant data.
Definition: Seq_id_.cpp:325
TGibbmt GetGibbmt(void) const
Get the variant data.
Definition: Seq_id_.hpp:814
const TCit & GetCit(void) const
Get the Cit member data.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_Gibbmt
Geninfo backbone moltype.
Definition: Seq_id_.hpp:97
@ e_Giim
Geninfo import id.
Definition: Seq_id_.hpp:98
@ e_Gibbsq
Geninfo backbone seqid.
Definition: Seq_id_.hpp:96
@ e_General
for other databases
Definition: Seq_id_.hpp:105
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_Pdb
PDB sequence.
Definition: Seq_id_.hpp:109
TUser & SetUser(void)
Select the variant.
Definition: Seqdesc_.cpp:390
int i
Lightweight interface for getting lines of data with minimal memory copying.
constexpr bool empty(list< Ts... >) noexcept
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
USING_SCOPE(objects)
Modified on Wed Sep 04 15:06:07 2024 by modify_doxy.py rev. 669887