NCBI C++ ToolKit
thrdzsc.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: thrdzsc.cpp 33815 2007-05-04 17:18:18Z kazimird $
2 *===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name: thrdzsc.c
27 *
28 * Author: Stephen Bryant
29 *
30 * Initial Version Creation Date: 08/16/2000
31 *
32 * $Revision: 33815 $
33 *
34 * File Description: threader
35 */
36 
37 #ifdef _MSC_VER
38 #pragma warning(disable:4244) // disable double->float warning in MSVC
39 #endif
40 
41 #include <ncbi_pch.hpp>
42 #include <corelib/ncbistd.hpp>
43 #include <util/random_gen.hpp>
44 
45 static ncbi::CRandom randGenerator;
46 static void RandomSeed(int s)
47 {
48  randGenerator.SetSeed(s);
49 }
50 static int RandomNum(void)
51 {
52  return randGenerator.GetRand();
53 }
54 
55 extern "C" {
56 
59 #include <math.h>
60 
61 /* Sorts the stored energies in descending order, picks up the alignment.
62 Calculates the Z-score by shuffling the aliged region 10000 times. */
63 
64 void zsc(Thd_Tbl* ttb, Seq_Mtf* psm, Qry_Seq* qsq, Cxl_Los** cpr, Cor_Def* cdf,
65  Rcx_Ptl* pmf, Seg_Gsm* spe, Cur_Aln* sai, Rnd_Smp* pvl, double ScalingFactor) {
66 /*---------------------------------------------------------*/
67 /* ttb: Tables to hold Results of Gibbs sampled threading */
68 /* qsq: Sequence to thread with alignment contraints */
69 /* cpr: Contacts by segment, largest possible set */
70 /* cdf: Core segment locations and loop length limits */
71 /* pmf: Potential of mean force as a 3-d lookup table */
72 /* psm: Sequence motif parameters */
73 /* spe: Partial sums of contact energies by segment pair */
74 /* sai: Current alignment of query sequence with core */
75 /* pvl: Storage for sequence permutation parameters */
76 /*---------------------------------------------------------*/
77 
78 int i,j,k,nmt,
79  itr,ii,jj; /* Counters */
80 int i_max; /* Index of the top hit */
81 int ntr; /* Number of positions to be shuffled */
82 int nsc; /* Number of core segments */
83 int ppi; /* Index of the peptide group in potential */
84 int n_perm; /* Number of permutations */
85 int mn,mx;
86 int n_thr; /* Number of top threads to calculate Z-score for */
87 int to,from,
88  tlf=0,tls=0,dln;
89 int *cf; /* Flags residues within the core */
90 int t1,t2; /* Motif residue types */
91 int r1,r2; /* Motif residue positions */
92 int s1,s2; /* Core segment indices */
93 int d; /* Distance interval */
94 int ms,cs,
95  ls; /* Energy terms */
96 float avg,avg2,
97  avgm,avgp,
98  avgm2,avgp2; /* Averages */
99 int nl,ot;
100 float disp,dispm,dispp;/* Square root of variance */
101 float tg,tgp,tgm; /* Energy of shuffled sequence */
102 float tg_max=0.0f; /* Energy of a given thread */
103 int *gi;
104 int *lsg,*aqi,
105  *r,*o,*sq;
106 int g;
107 
108 Cxl_Los *cr; /* Pointer to segment reference contact lists */
109 
110 /* Parameters */
111 
112 nsc=cdf->sll.n;
113 n_perm=10000;
114 n_thr=1; /* Should be less than ttb->n */
115 ppi=pmf->ppi;
116 nmt=psm->n;
117 cf=sai->cf;
118 sq=pvl->sq;
119 aqi=pvl->aqi;
120 r=pvl->r;
121 o=pvl->o;
122 lsg=pvl->lsg;
123 
124 
125 /* Start loop over n_thr top threads */
126 
127 i_max=ttb->mx;
128 for(ii=0;ii<n_thr;ii++){
129 
130  avg = avg2 = avgm = avgp = avgm2 = avgp2 = 0.0;
131  ntr=0;
132 
133  for(jj=0;jj<qsq->n;jj++)sq[jj]=qsq->sq[jj];
134 
135 /* Collect sequence indices of threaded residues of sequence */
136 
137 for(i=0;i<=nsc;i++) lsg[i]=1;
138 
139 itr=0;
140 
141 for(j=0;j<nsc;j++){
142 
143  mn=ttb->al[j][i_max]-ttb->no[j][i_max];
144  mx=ttb->al[j][i_max]+ttb->co[j][i_max];
145 
146  for(k=mn;k<=mx;k++){
147  aqi[itr]=k;
148  itr++;}
149 
150  if(j<nsc-1){
151  to=ttb->al[j+1][i_max]-ttb->no[j+1][i_max];
152  from=ttb->al[j][i_max]+ttb->co[j][i_max];
153  dln=to-from-1;
154  if(dln > cdf->lll.lrfs[j+1] || dln < 1) lsg[j+1]=0;}
155 
156  }
157 
158 
159 for(j=0;j<=nsc;j++){
160 
161  if(lsg[j]==0) continue;
162 
163  if(j==0){tlf=ttb->al[j][i_max]-ttb->no[j][i_max]-1;
164  tls=tlf-cdf->lll.lrfs[j]+1;
165  if(tls<1) tls=1;}
166 
167  if(j==nsc){tls=ttb->al[j-1][i_max]+ttb->co[j-1][i_max]+1;
168  tlf=tls+cdf->lll.lrfs[j]-1;
169  if(tlf>qsq->n) tlf=qsq->n;}
170 
171  if(j!=0 && j!=nsc){tls=ttb->al[j-1][i_max]+ttb->co[j-1][i_max]+1;
172  tlf=ttb->al[j][i_max]-ttb->no[j][i_max]-1;}
173 
174 
175  for(k=tls;k<=tlf;k++){
176  aqi[itr]=k;
177  itr++; } }
178 
179  ntr=itr-1;
180 
181 
182 /* Flag residues within the cores */
183 
184 for(i=0;i<nmt; i++) cf[i]=(-1);
185 for(i=0; i<nsc; i++) {
186 
187  mn=cdf->sll.rfpt[i]-ttb->no[i][i_max];
188  mx=cdf->sll.rfpt[i]+ttb->co[i][i_max];
189 
190  for(j=mn; j<=mx; j++) cf[j]=i;
191 
192  }
193 
194 /* Loop over n_perm permutations */
195 
196 for(k=0; k<=n_perm; k++) {
197 
198 /* srand48(k); */
199  RandomSeed(k);
200 
201 /* Perform the permutation of the residues of aligned part together with the
202 intercore loops. Tail loops are not included. */
203 
204 /* for(i=0;i<=ntr;i++)r[i]=lrand48(); */
205 for(i=0;i<=ntr;i++)r[i]=RandomNum();
206 for(i=0;i<=ntr;i++)o[i]=i;
207 
208 for(i=ntr; i>0; ) {nl=i;
209  i=0;
210  for(j=0; j<nl; j=j+1){
211  if(r[o[j]]>r[o[j+1]]){
212  ot=o[j];
213  o[j]=o[j+1];
214  o[j+1]=ot;
215  i=j;
216  }}}
217 
218 if(k!=0){
219  for(i=0;i<qsq->n;i++) sq[i]=-1;
220 
221  for(i=0;i<=ntr;i++){r[i]=aqi[o[i]];
222  sq[aqi[i]]=qsq->sq[r[i]];}
223  }
224 
225 /* Calculate the energy for a given permuted sequence aligned with
226 the structure*/
227 
228 /* Loop over core segments */
229 
230 for(i=0; i<nsc; i++) {
231 
232  cr=cpr[i];
233 
234  spe->gs[i]=0; spe->ms[i]=0;
235 
236  for(j=0;j<nsc;j++) { spe->gss[i][j]=0; spe->gss[j][i]=0;}
237 
238  /* Loop over residue-residue contacts in the reference list */
239 
240  for(j=0; j<cr->rr.n; j++) {
241 
242  /* Test that the contact is within the allowed extent range */
243  r1=cr->rr.r1[j];
244  s1=cf[r1];
245  if(s1<0) continue;
246  t1=sq[ttb->al[s1][i_max]-(cdf->sll.rfpt[s1]-r1)];
247  if(t1<0) continue;
248  r2=cr->rr.r2[j];
249  s2=cf[r2];
250  if(s2<0) continue;
251  t2=sq[ttb->al[s2][i_max]-(cdf->sll.rfpt[s2]-r2)];
252  if(t2<0) continue;
253  d=cr->rr.d[j];
254 
255  spe->gss[s1][s2]+=pmf->rrt[d][t1][t2];
256 
257  }
258 
259 
260  /*Loop over residue-peptide contacts in the refernce list */
261 
262  for(j=0; j<cr->rp.n; j++) {
263 
264  /* Test that the contact is present in the current core */
265 
266  r1=cr->rp.r1[j];
267  s1=cf[r1];
268  if(s1<0) continue;
269  t1=sq[ttb->al[s1][i_max]-(cdf->sll.rfpt[s1]-r1)];
270  if(t1<0) continue;
271  r2=cr->rp.p2[j];
272  s2=cf[r2];
273  if(s2<0) continue;
274  d=cr->rp.d[j];
275 
276  spe->gss[s1][s2]+=pmf->rrt[d][t1][ppi];
277 
278  }
279 
280  /* Loop over residue-fixed contacts in the reference list */
281 
282  for(j=0; j<cr->rf.n; j++) {
283 
284  /* Test that the contact is present in the current list */
285  r1=cr->rf.r1[j];
286  s1=cf[r1];
287  if(s1<0) continue;
288  t1=sq[ttb->al[s1][i_max]-(cdf->sll.rfpt[s1]-r1)];
289  if(t1<0) continue;
290  t2=cr->rf.t2[j];
291  d=cr->rf.d[j];
292 
293  spe->gs[i]+=pmf->rrt[d][t1][t2];
294 
295  }
296 
297 /* Sum motif energies */
298 
299  mn=cdf->sll.rfpt[i]-ttb->no[i][i_max];
300  mx=cdf->sll.rfpt[i]+ttb->co[i][i_max];
301 
302  for(j=mn; j<=mx; j++) {
303 
304  t1=sq[ttb->al[i][i_max]-(cdf->sll.rfpt[i]-j)];
305  if(t1<0) continue;
306  spe->ms[i]+=psm->ww[j][t1]; }
307 
308 
309 }
310 
311 g=0; ms=0; cs=0; ls=0;
312 for(i=0;i<nsc;i++) {
313 
314  g+=spe->gs[i];
315  ms+=spe->ms[i];
316  cs+=spe->cs[i];
317  ls+=spe->ls[i];
318 
319  gi=spe->gss[i];
320  for(j=0;j<nsc;j++) g+=gi[j]; }
321 
322  /*ms=ms-psm->score0;*/
323 
324 /* Entire energy for a current permutation */
325 
326  if(k!=0) {
327  tg=((float)(g+ms+cs+ls))/ScalingFactor;
328  tgm=((float)(ms))/ScalingFactor;
329  tgp=((float)(g))/ScalingFactor;}
330 
331  else {
332  tg = tgm = tgp = 0.0;
333  tg_max=((float)(g+ms+cs+ls))/ScalingFactor;}
334 
335  avg+=tg;
336  avg2+=tg*tg;
337 
338  avgm+=tgm;
339  avgm2+=tgm*tgm;
340 
341  avgp+=tgp;
342  avgp2+=tgp*tgp;
343 
344  } /* End of loop over permutations */
345 
346 /* Calculate the mean, variance and Z-score */
347 
348  disp=sqrt(((float)avg2 - (avg*avg)/n_perm)/(n_perm-1));
349  dispm=sqrt(((float)avgm2 - (avgm*avgm)/n_perm)/(n_perm-1));
350  dispp=sqrt(((float)avgp2 - (avgp*avgp)/n_perm)/(n_perm-1));
351 
352  avg=avg/n_perm;
353  avgm=avgm/n_perm;
354  avgp=avgp/n_perm;
355 
356  ttb->zsc[i_max]=ScalingFactor*(tg_max-avg)/disp;
357  ttb->g0[i_max]=avg*ScalingFactor;
358  ttb->m0[i_max]=avgm*ScalingFactor;
359  ttb->errm[i_max]=dispm*ScalingFactor;
360  ttb->errp[i_max]=dispp*ScalingFactor;
361 
362  i_max=ttb->nx[i_max];
363 
364  } /* End of loop over threads */
365 
366 
367 
368 }
369 
370 } // extern "C"
Include a standard set of the NCBI C++ Toolkit most basic headers.
int i
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
int * lrfs
Definition: thrdatd.h:79
struct _Cor_Def::@23 sll
struct _Cor_Def::@24 lll
int * rfpt
Definition: thrdatd.h:69
int n
Definition: thrdatd.h:74
int * cf
Definition: thrdatd.h:193
int * r2
Definition: thrdatd.h:276
int n
Definition: thrdatd.h:279
struct _Cxl_Los::@29 rr
struct _Cxl_Los::@30 rp
int * p2
Definition: thrdatd.h:283
int * r1
Definition: thrdatd.h:275
int * t2
Definition: thrdatd.h:290
struct _Cxl_Los::@31 rf
int * d
Definition: thrdatd.h:277
int * sq
Definition: thrdatd.h:92
int n
Definition: thrdatd.h:93
int *** rrt
Definition: thrdatd.h:105
int ppi
Definition: thrdatd.h:108
int * o
Definition: thrdatd.h:312
int * lsg
Definition: thrdatd.h:310
int * r
Definition: thrdatd.h:312
int * aqi
Definition: thrdatd.h:311
int * sq
Definition: thrdatd.h:313
int * cs
Definition: thrdatd.h:228
int ** gss
Definition: thrdatd.h:225
int * gs
Definition: thrdatd.h:226
int * ls
Definition: thrdatd.h:229
int * ms
Definition: thrdatd.h:227
int ** ww
Definition: thrdatd.h:318
int n
Definition: thrdatd.h:320
int ** no
Definition: thrdatd.h:153
float * zsc
Definition: thrdatd.h:144
float * errp
Definition: thrdatd.h:148
int * nx
Definition: thrdatd.h:156
int ** al
Definition: thrdatd.h:152
float * m0
Definition: thrdatd.h:146
float * errm
Definition: thrdatd.h:147
int ** co
Definition: thrdatd.h:154
float * g0
Definition: thrdatd.h:145
int mx
Definition: thrdatd.h:157
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
static ncbi::CRandom randGenerator
Definition: thrdzsc.cpp:45
static int RandomNum(void)
Definition: thrdzsc.cpp:50
static void RandomSeed(int s)
Definition: thrdzsc.cpp:46
void zsc(Thd_Tbl *ttb, Seq_Mtf *psm, Qry_Seq *qsq, Cxl_Los **cpr, Cor_Def *cdf, Rcx_Ptl *pmf, Seg_Gsm *spe, Cur_Aln *sai, Rnd_Smp *pvl, double ScalingFactor)
Definition: thrdzsc.cpp:64
Modified on Mon Mar 04 05:10:25 2024 by modify_doxy.py rev. 669887