NCBI C++ ToolKit
raw_scoremat.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: raw_scoremat.c 72951 2016-06-09 18:13:53Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Aaron Ucko
27  *
28  * File Description:
29  * Protein alignment score matrices; shared between the two toolkits.
30  *
31  */
32 
34 
35 #include <ctype.h>
36 #include <string.h>
37 
38 #include "sm_blosum45.c"
39 #include "sm_blosum50.c"
40 #include "sm_blosum62.c"
41 #include "sm_blosum80.c"
42 #include "sm_blosum90.c"
43 #include "sm_pam30.c"
44 #include "sm_pam70.c"
45 #include "sm_pam250.c"
46 #include "sm_identity.c"
47 
48 static const char kNCBIstdaa[] = "-ABCDEFGHIKLMNPQRSTVWXYZU*OJ";
49 
50 
52 {
53  const char *p;
54 
55  /* Translate to NCBIeaa */
56  if (aa >= 0 && aa < sizeof(kNCBIstdaa)) {
57  aa = kNCBIstdaa[aa];
58  } else if (islower((unsigned char) aa)) {
59  aa = toupper((unsigned char) aa);
60  }
61 
62  p = strchr(sm->symbols, aa);
63  return p ? (int)(p - sm->symbols) : -1;
64 }
65 
66 
68  int aa1, int aa2)
69 {
70  int i1, i2;
71  i1 = NCBISM_GetIndex(sm, aa1);
72  i2 = NCBISM_GetIndex(sm, aa2);
73  if (i1 >=0 && i2 >= 0) {
74  return sm->scores[(size_t)i1 * strlen(sm->symbols) + (size_t)i2];
75  } else {
76  return sm->defscore;
77  }
78 }
79 
80 
83 {
84  const char* sym;
85  int dim, i, j, aa1, aa2;
86 
87  sym = psm->symbols;
88  dim = (int)strlen(sym);
89  /* fill with default */
90  for (i = 0; i < NCBI_FSM_DIM; ++i) {
91  fsm->s[0][i] = psm->defscore;
92  }
93  for (i = 1; i < NCBI_FSM_DIM; ++i) {
94  memcpy(fsm->s[i], fsm->s[0], NCBI_FSM_DIM * sizeof(fsm->s[0][0]));
95  }
96  for (i = 0; i < dim; ++i) {
97  aa1 = sym[i];
98  /* get core (NCBIeaa x NCBIeaa) */
99  for (j = 0; j < dim; ++j) {
100  aa2 = sym[j];
101  fsm->s[aa1][aa2] = psm->scores[i * dim + j];
102  }
103  /* extend horizontally */
104  for (aa2 = 0; aa2 < sizeof(kNCBIstdaa); ++aa2) {
105  fsm->s[aa1][aa2] = fsm->s[aa1][(int)kNCBIstdaa[aa2]];
106  }
107  for (aa2 = 'a'; aa2 <= 'z'; ++aa2) {
108  fsm->s[aa1][aa2] = fsm->s[aa1][toupper((unsigned char) aa2)];
109  }
110  }
111  /* extend vertically */
112  for (aa1 = 0; aa1 < sizeof(kNCBIstdaa); ++aa1) {
113  memcpy(fsm->s[aa1], fsm->s[(int)kNCBIstdaa[aa1]], NCBI_FSM_DIM);
114  }
115  for (aa1 = 'a'; aa1 <= 'z'; ++aa1) {
116  memcpy(fsm->s[aa1], fsm->s[toupper((unsigned char) aa1)], NCBI_FSM_DIM);
117  }
118 }
119 
120 static
121 int /* bool */ s_NCBISM_StartsWith(const char* str, const char* pfx)
122 {
123  for ( ; *pfx; ++str, ++pfx) {
124  if (tolower((unsigned char)*str) != *pfx) {
125  return 0;
126  }
127  }
128  return 1;
129 }
130 
132 {
133  switch (name[0]) {
134  case 'B': case 'b':
135  if ( !s_NCBISM_StartsWith(name, "blosum") ) {
136  return NULL;
137  }
138  switch (name[6]) {
139  case '4': return strcmp(name + 6, "45") ? NULL : &NCBISM_Blosum45;
140  case '5': return strcmp(name + 6, "50") ? NULL : &NCBISM_Blosum50;
141  case '6': return strcmp(name + 6, "62") ? NULL : &NCBISM_Blosum62;
142  case '8': return strcmp(name + 6, "80") ? NULL : &NCBISM_Blosum80;
143  case '9': return strcmp(name + 6, "90") ? NULL : &NCBISM_Blosum90;
144  default: return NULL;
145  }
146 
147  case 'P': case 'p':
148  if ( !s_NCBISM_StartsWith(name, "pam") ) {
149  return NULL;
150  }
151  switch (name[3]) {
152  case '2': return strcmp(name + 3, "250") ? NULL : &NCBISM_Pam250;
153  case '3': return strcmp(name + 3, "30") ? NULL : &NCBISM_Pam30;
154  case '7': return strcmp(name + 3, "70") ? NULL : &NCBISM_Pam70;
155  }
156 
157  case 'I': case 'i':
158  if ( !s_NCBISM_StartsWith(name, "identity") ) {
159  return NULL;
160  }
161  return &NCBISM_Identity;
162 
163  default:
164  return NULL;
165  }
166 }
static const char * str(char *buf, int n)
Definition: stats.c:84
#define NULL
Definition: ncbistd.hpp:225
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
int tolower(Uchar c)
Definition: ncbictype.hpp:72
int toupper(Uchar c)
Definition: ncbictype.hpp:73
int islower(Uchar c)
Definition: ncbictype.hpp:66
static int s_NCBISM_StartsWith(const char *str, const char *pfx)
Definition: raw_scoremat.c:121
int NCBISM_GetIndex(const SNCBIPackedScoreMatrix *sm, int aa)
Map a standard residue code into an index suitable for a particular packed score matrix.
Definition: raw_scoremat.c:51
const SNCBIPackedScoreMatrix * NCBISM_GetStandardMatrix(const char *name)
Definition: raw_scoremat.c:131
TNCBIScore NCBISM_GetScore(const SNCBIPackedScoreMatrix *sm, int aa1, int aa2)
Look up an entry in a packed score matrix.
Definition: raw_scoremat.c:67
void NCBISM_Unpack(const SNCBIPackedScoreMatrix *psm, SNCBIFullScoreMatrix *fsm)
Expand a packed score matrix into an unpacked one, which callers can proceed to index directly by sta...
Definition: raw_scoremat.c:81
static const char kNCBIstdaa[]
Definition: raw_scoremat.c:48
const SNCBIPackedScoreMatrix NCBISM_Pam30
Definition: sm_pam30.c:92
const SNCBIPackedScoreMatrix NCBISM_Blosum62
Definition: sm_blosum62.c:92
const SNCBIPackedScoreMatrix NCBISM_Pam250
Definition: sm_pam250.c:92
const SNCBIPackedScoreMatrix NCBISM_Blosum50
Definition: sm_blosum50.c:92
#define NCBI_FSM_DIM
Recommended approach: unpack and index directly.
Definition: raw_scoremat.h:85
const SNCBIPackedScoreMatrix NCBISM_Blosum80
Definition: sm_blosum80.c:92
const SNCBIPackedScoreMatrix NCBISM_Pam70
Definition: sm_pam70.c:92
const SNCBIPackedScoreMatrix NCBISM_Blosum45
The standard matrices.
Definition: sm_blosum45.c:92
const SNCBIPackedScoreMatrix NCBISM_Identity
Definition: sm_identity.c:92
const SNCBIPackedScoreMatrix NCBISM_Blosum90
Definition: sm_blosum90.c:92
int TNCBIScore
data types
Definition: raw_scoremat.h:45
TNCBIScore s[128][128]
Definition: raw_scoremat.h:87
const TNCBIScore * scores
strlen(symbols) x strlen(symbols)
Definition: raw_scoremat.h:48
TNCBIScore defscore
score for unknown residues
Definition: raw_scoremat.h:49
const char * symbols
order of residues
Definition: raw_scoremat.h:47
Modified on Fri Sep 20 14:57:21 2024 by modify_doxy.py rev. 669887