NCBI C++ ToolKit
cuUtils.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuUtils.cpp 97211 2022-06-27 17:11:47Z dzhang $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Chris Lanczycki
27  *
28  * File Description:
29  *
30  * Various utility functions for CDTree
31  *
32  * ===========================================================================
33  */
34 
35 #include <ncbi_pch.hpp>
39 #include <objects/cdd/Cdd.hpp>
40 #include <objects/cdd/Cdd_id.hpp>
44 
45 #include <algorithm>
46 #include <stdio.h>
48 
49 // comma is now allowed (4/15/04)
50 // single-quote is now allowed (4/28/04)
51 
53 BEGIN_SCOPE(cd_utils)
54 
56  0x806000, // dark brown
57  0xff0000, // red
58  0x149600, // dark green
59  0x0000ff, // dark blue
60  0xb428ff, // purple
61  0xff8c1a, // orange
62  0x00ffff, // light blue
63  0xff499b, // pink
64  0xc89600, // brown
65  0x00ff00, // light green
66  0xaaaaaa, // gray
67  0xcee000, // olive
68  0x4a85ff, // blue
69  0xffc800, // light orange
70  0xc800bc, // maroon
71  0xeeee00, // yellow
72 };
74 
76 //-------------------------------------------------------------------
77 // make a string for a seq-id (underscore delimiter for PDBs)
78 //-------------------------------------------------------------------
79  TGi GI;
80 
81  if (SeqID.Empty()) {
82  Str += "<Empty Sequence>";
83  return;
84  } else if (!SeqID->IsGi() && !SeqID->IsPdb()) {
85  Str += "<Non-gi/pdb Sequence Types Unsupported>";
86  return;
87  }
88 
89  if (SeqID->IsGi()) {
90  GI = SeqID->GetGi();
91  Str += NStr::NumericToString<TGi>(GI);
92  } else if (SeqID->IsPdb()) {
93 
94  const CPDB_seq_id& pPDB_ID = SeqID->GetPdb();
95  char buf[1024];
96  char chain=pPDB_ID.GetChain();
97  sprintf(buf,"pdb %s_%c",pPDB_ID.GetMol().Get().c_str(),chain);
98  int len=strlen(buf);
99  if(chain==' ')buf[len-2]=0; // if there is no chain info (i.e., uses the default chain value)
100  Str += string(buf);
101 
102  }
103 
104 }
105 
106 string Make_SeqID_String(const CRef< CSeq_id > SeqID, bool Pad, int Len) {
107 //-------------------------------------------------------------------
108 // make a string for a seq-id (space delimiter for PDBs)
109 //-------------------------------------------------------------------
110  TGi GI;
111  string Str = kEmptyStr;
112 
113  if (SeqID.Empty()) {
114  return "<Empty Sequence>";
115  } else if (!SeqID->IsGi() && !SeqID->IsPdb() && !SeqID->IsOther() && !SeqID->IsLocal()) {
116  return SeqID->GetSeqIdString();
117  }
118 
119  // Custom string construction for Gi, PDB, Other, or Local types
120  if (SeqID->IsGi()) {
121  GI = SeqID->GetGi();
122  Str = NStr::NumericToString<TGi>(GI);
123  } else if (SeqID->IsPdb()) {
124  const CPDB_seq_id& pPDB_ID = SeqID->GetPdb();
125  Str = pPDB_ID.GetMol().Get() + " " + string(1, (char) pPDB_ID.GetChain());
126  } else if (SeqID->IsOther()) {
127  Str = SeqID->GetOther().GetAccession();
128  } else if (SeqID->IsLocal()) {
129  if (SeqID->GetLocal().IsId()) {
130  Str = SeqID->GetLocal().GetId();
131  } else if (SeqID->GetLocal().IsStr()) {
132  Str = SeqID->GetLocal().GetStr();
133  }
134  }
135 
136  // pad with spaces to length of Len
137  if (Pad) {
138  if ( (int)Str.size() < Len) {
139  Str.append(Len - Str.size(), ' ');
140  }
141  }
142  return Str;
143 }
144 
145 string GetSeqIDStr(const CRef< CSeq_id >& SeqID)
146 {
147  string Str = Make_SeqID_String(SeqID, false, 0);
148  return Str;
149 }
150 
151 // Return the first CCdd_id object in a CD.
152 string CddIdString(const CCdd& cdd) {
153  string s;
154  list< CRef< CCdd_id > >::const_iterator i, ibegin, iend;
155 
156  ibegin = cdd.GetId().Get().begin();
157  iend = cdd.GetId().Get().end();
158  for (i = ibegin; i != iend; ++i) {
159  if (i != ibegin) {
160  s.append(", ");
161  }
162  s.append(CddIdString(**i));
163  }
164  return s;
165 }
166 
167 
168 string CddIdString(const CCdd_id& id) {
169 
170  CCdd_id::E_Choice e = id.Which();
171  if (e == CCdd_id::e_Uid) {
172  return "UID " + NStr::IntToString(id.GetUid());
173  } else if (e == CCdd_id::e_Gid) {
174  string s = "Accession " + id.GetGid().GetAccession();
175  if (id.GetGid().IsSetDatabase()) {
176  s.append(" Database " + id.GetGid().GetDatabase());
177  }
178  if (id.GetGid().IsSetRelease()) {
179  s.append(" Release " + id.GetGid().GetRelease());
180  }
181  if (id.GetGid().IsSetVersion()) {
182  s.append(" Version " + NStr::IntToString(id.GetGid().GetVersion()));
183  }
184  return s;
185  } else {
186  return "Unset/Unknown Cdd_id";
187  }
188 
189 }
190 
191 bool SameCDAccession(const CCdd_id& id1, const CCdd_id& id2) {
192 
193  bool result = false;
194  CCdd_id::E_Choice e1 = id1.Which(), e2 = id2.Which();
195  if (e1 == CCdd_id::e_Gid && e1 == e2) {
196  if (id1.GetGid().GetAccession() == id2.GetGid().GetAccession()) {
197  result = true;
198  }
199  }
200  return result;
201 }
202 
203 bool Prosite2Regex(const std::string& prosite, std::string* regex, std::string* errString) {
204 //-------------------------------------------------------------------
205 // copied from Paul. see sequence_set.cpp
206 //-------------------------------------------------------------------
207  errString->erase();
208  try {
209  // check allowed characters
210  static const std::string allowed = "-ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789[],(){}<>.";
211  unsigned int i;
212  for (i=0; i<prosite.size(); i++) {
213  if (allowed.find(toupper((unsigned char) prosite[i])) == std::string::npos) break;
214  }
215  if (i != prosite.size()) throw "invalid ProSite character";
216  if (prosite[prosite.size() - 1] != '.') throw "ProSite pattern must end with '.'";
217 
218  // translate into real regex syntax;
219  regex->erase();
220 
221  bool inGroup = false;
222  for (i=0; i<prosite.size(); i++) {
223 
224  // handle grouping and termini
225  bool characterHandled = true;
226  switch (prosite[i]) {
227  case '-': case '.': case '>':
228  if (inGroup) {
229  *regex += ')';
230  inGroup = false;
231  }
232  if (prosite[i] == '>') *regex += '$';
233  break;
234  case '<':
235  *regex += '^';
236  break;
237  default:
238  characterHandled = false;
239  break;
240  }
241  if (characterHandled) continue;
242  if (!inGroup && (
243  (isalpha((unsigned char) prosite[i]) && toupper((unsigned char) prosite[i]) != 'X') ||
244  prosite[i] == '[' || prosite[i] == '{')) {
245  *regex += '(';
246  inGroup = true;
247  }
248 
249  // translate syntax
250  switch (prosite[i]) {
251  case '(':
252  *regex += '{';
253  break;
254  case ')':
255  *regex += '}';
256  break;
257  case '{':
258  *regex += "[^";
259  break;
260  case '}':
261  *regex += ']';
262  break;
263  case 'X': case 'x':
264  *regex += '.';
265  break;
266  default:
267  *regex += toupper((unsigned char) prosite[i]);
268  break;
269  }
270  }
271  }
272 
273  catch (const char *err) {
274  *errString = string(err);
275  // AFrame::ShowError(err);
276  }
277 
278  return true;
279 }
280 
281 END_SCOPE(cd_utils)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CCdd_id –.
Definition: Cdd_id.hpp:66
Definition: Cdd.hpp:51
#define Len
bool Prosite2Regex(const std::string &prosite, std::string *regex, std::string *errString)
Definition: cuUtils.cpp:203
string CddIdString(const CCdd &cdd)
Definition: cuUtils.cpp:152
bool SameCDAccession(const CCdd_id &id1, const CCdd_id &id2)
Definition: cuUtils.cpp:191
const int CDTreeColorCycle[]
Definition: cuUtils.cpp:55
void Make_GI_or_PDB_String_CN3D(const CRef< CSeq_id > SeqID, std::string &Str)
Definition: cuUtils.cpp:75
string GetSeqIDStr(const CRef< CSeq_id > &SeqID)
Definition: cuUtils.cpp:145
const int kNumColorsInCDTreeColorCycle
Definition: cuUtils.cpp:73
string Make_SeqID_String(const CRef< CSeq_id > SeqID, bool Pad, int Len)
Definition: cuUtils.cpp:106
string
Definition: cgiapp.hpp:687
const TPrim & Get(void) const
Definition: serialbase.hpp:347
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2144
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5083
const TId & GetId(void) const
Get the Id member data.
Definition: Cdd_.hpp:1171
const TAccession & GetAccession(void) const
Get the Accession member data.
Definition: Global_id_.hpp:306
E_Choice Which(void) const
Which variant is currently selected.
Definition: Cdd_id_.hpp:231
const TGid & GetGid(void) const
Get the variant data.
Definition: Cdd_id_.cpp:100
const Tdata & Get(void) const
Get the member data.
E_Choice
Choice variants.
Definition: Cdd_id_.hpp:86
@ e_Gid
holds accession/version pairs
Definition: Cdd_id_.hpp:89
@ e_Uid
for synchronization with Entrez holds PSSM-Ids
Definition: Cdd_id_.hpp:88
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
TChain GetChain(void) const
Get the Chain member data.
const TPdb & GetPdb(void) const
Get the variant data.
Definition: Seq_id_.cpp:435
bool IsOther(void) const
Check if variant Other is selected.
Definition: Seq_id_.hpp:871
bool IsPdb(void) const
Check if variant Pdb is selected.
Definition: Seq_id_.hpp:922
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
const TMol & GetMol(void) const
Get the Mol member data.
const TOther & GetOther(void) const
Get the variant data.
Definition: Seq_id_.cpp:347
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Seq_id_.cpp:193
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
const TAccession & GetAccession(void) const
Get the Accession member data.
char * buf
int i
int len
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int toupper(Uchar c)
Definition: ncbictype.hpp:73
else result
Definition: token2.c:20
#define const
Definition: zconf.h:230
Modified on Tue Nov 28 02:29:52 2023 by modify_doxy.py rev. 669887