NCBI C++ ToolKit
sls_alp_data.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: sls_alp_data.hpp 89176 2020-03-04 15:49:30Z gouriano $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25 
26 /*****************************************************************************
27 
28 File name: sls_alp_data.hpp
29 
30 Author: Sergey Sheetlin
31 
32 Contents: Contains input data
33 
34 ******************************************************************************/
35 
36 #ifndef ALGO_BLAST_GUMBEL_PARAMS__INCLUDED_SLS_ALP_DATA
37 #define ALGO_BLAST_GUMBEL_PARAMS__INCLUDED_SLS_ALP_DATA
38 
39 #include <ncbiconf.h>
40 #include <complex>
41 #include <iostream>
42 #include <map>
43 #include <vector>
44 #include <fstream>
45 #include <float.h>
46 #include <ctime>
47 #include <stdlib.h>
48 #include <limits>
49 
50 #ifndef NCBI_OS_MSWIN
51 #include <sys/time.h>
52 
53 #else
54 #include <sys/timeb.h>
55 
56 #define _CRTDBG_MAP_ALLOC
57 #include <crtdbg.h>
58 
59 #endif
60 
61 #include <corelib/ncbistl.hpp>
62 #include <util/random_gen.hpp>
64 
65 #include "sls_alp_regression.hpp"
66 
67 
69 BEGIN_SCOPE(blast)
70 
71 const double mb_bytes=1048576.0;
72 
73 BEGIN_SCOPE(Sls)
74 
76  {
83  };
84 
85 
86 
87  struct error//struct to handle exceptions
88  {
90  error(std::string st_,Int4 error_code_){st=st_;error_code=error_code_;};
92  //if=0 Result was calculated and returned
93 
94  //if =1 //Computation stoped because time
95  // or memory requirements exceeded
96  // user-specified thresholds
97 
98  //if =2 //Computation stopped due to different
99  // reasons than time or memory
100  // repeating computation with the same
101  // input parameters may be successful
102  //if =3 //Result can not be computed for current
103  // input parameters
104 
105  //if =4 //Other cases
106  //if =41 //memory allocation error
107  };
108 
109  struct error_for_single_realization//struct to handle exceptions during calclation of single realization
110  {
113  };
114 
115 
116  struct data_for_lambda_equation//struct for lambda_equation
117  {
118  Int4 d_number_of_AA;//number of AA
119  Int4** d_smatr;//scoring matrix
120  double *d_RR1;//AA probabilities
121  double *d_RR2;//AA probabilities
122  };
123 
124 
125  class alp_data;
126 
127  template<typename T> class array_positive{
128  public:
129  array_positive(alp_data *alp_data_)// constructor
130  {
131  d_elem=NULL;
132  d_alp_data=alp_data_;
133  if(!d_alp_data)
134  {
135  throw error("Unexpected error",4);
136  };
137  d_dim=-1;
138  d_step=200;
139  };
140 
142 
143 
145 
146 
147  inline void set_elem(
148  Int4 ind_,
149  T elem_)
150  {
151  while(ind_>d_dim)
152  {
153  increment_array();
154  };
155 
156  d_elem[ind_]=elem_;
157  };
158 
159  inline void increase_elem_by_1(
160  Int4 ind_)
161  {
162  while(ind_>d_dim)
163  {
164  increment_array();
165  };
166 
167  d_elem[ind_]++;
168  };
169 
170  inline void increase_elem_by_x(
171  Int4 ind_,
172  T x_)
173  {
174  while(ind_>d_dim)
175  {
176  increment_array();
177  };
178 
179  d_elem[ind_]+=x_;
180  };
181 
182 
183 
184  public:
185 
187  Int4 d_dim;//dimension of the array is d_dim+1
189  alp_data *d_alp_data;//initial data
190  };
191 
192 
193  template<typename T> class array{
194  public:
195  array(alp_data *alp_data_)// constructor
196  {
197  d_elem=NULL;
198  d_alp_data=alp_data_;
199  d_dim=-1;
200  d_ind0=0;
201  d_step=200;
203  };
204 
206 
208 
210 
211 
212  inline void set_elem(
213  Int4 ind_,
214  T elem_)
215  {
216  while(ind_>d_dim_plus_d_ind0)
217  {
219  };
220 
221  while(ind_<d_ind0)
222  {
224  };
225 
226  d_elem[ind_-d_ind0]=elem_;
227  };
228 
229  inline void increase_elem_by_1(
230  Int4 ind_)
231  {
232  while(ind_>d_dim_plus_d_ind0)
233  {
235  };
236 
237  while(ind_<d_ind0)
238  {
240  };
241 
242  d_elem[ind_-d_ind0]++;
243  };
244 
245 
246  public:
247 
249  Int4 d_dim;//dimension of the array is d_dim+1
250  Int4 d_ind0;//the leftmost index of the array
253  alp_data *d_alp_data;//initial data
254  };
255 
256 
257  struct q_elem
258  {
261  };
262 
264 
265  public:
267  alp_data *alp_data_,
268  Int4 open_,
269  Int4 epen_,
270  Int4 number_of_AA_,
271  Int4 **smatr_,
272  double *RR1_,
273  double *RR2_);
274 
275  double d_mu;
276  double d_nu;
277  double d_eta;
278  double d_mu_SI;
279  double d_mu_DS;
280  double d_mu_ID;
281  double d_mu_IS;
282  double d_mu_SD;
285 
286 
287  double d_for_D[3];
288  double d_for_I[2];
289  double d_for_S[3];
290 
291  char d_for_D_states[3];
292  char d_for_I_states[2];
293  char d_for_S_states[3];
294 
295  double **d_exp_s;
296  double d_lambda;
298 
299 
300 
302 
303  static double lambda_equation(double x_,void* func_number_);
304 
305 
307  alp_data *d_alp_data;//initial data
308 
309  };
310 
311 
312 
313  class alp_data{
314 
315 
316 
317  public:
318 
319  alp_data(//constructor
320  Int4 rand_,//randomization number
321  Int4 open_,//gap opening penalty
322  Int4 epen_,//gap extension penalty
323  string smatr_file_name_,//scoring matrix file name
324  string RR1_file_name_,//probabilities1 file name
325  string RR2_file_name_,//probabilities2 file name
326  double max_time_,//maximum allowed calculation time in seconds
327  double max_mem_,//maximum allowed memory usage in MB
328  double eps_lambda_,//relative error for lambda calculation
329  double eps_K_,//relative error for K calculation
330  string out_file_name_);//output file name
331 
332  alp_data(//constructor
334  CRef<CGumbelParamsRandDiagnostics>& rand_params_);
335 
336 
337 
338  ~alp_data();//destructor
339 
340  inline double ran2()//generates the next random value
341  {
342  return (double)(d_rand_object->GetRand())/(double)(d_rand_object->GetMax());
343  };
344 
345  void read_smatr(
346  string smatr_file_name_,
347  Int4 **&smatr_,
348  Int4 &number_of_AA_smatr_);
349 
350  void check_out_file(
351  string out_file_name_);
352 
353 
354 
355  static double round(//returns nearest integer to x_
356  const double &x_);
357 
358  static string long_to_string(//convert interer ot string
359  Int4 number_);
360 
361  static char digit_to_string(//convert interer ot string
362  Int4 digit_);
363 
364  static void get_current_time(
365  double &seconds_);
366 
367 
368 
369 
370 
371 
372  void read_RR(
373  string RR_file_name_,
374  double *&RR_,
375  double *&RR_sum_,
376  Int4 *&RR_sum_elements_,
377  Int4 &number_of_AA_RR_);
378 
379  void read_RR(
380  const vector<double> &vector_,
381  double *&RR_,
382  double *&RR_sum_,
383  Int4 *&RR_sum_elements_,
384  Int4 &number_of_AA_RR_);
385 
386 
388 
389  static void assert_mem(void *pointer_);
390 
391 
392 
393  template<typename T>
395  Int4 dim_,
396  T ** &matr_)
397  {
398  matr_=NULL;
399  bool ee_error_flag=false;
400  error ee_error("",0);
401 
402  try
403  {
404  try
405  {
406 
407  Int4 i;
408  matr_=new T *[dim_];
409  assert_mem(matr_);
410 
411  for(i=0;i<dim_;i++)
412  {
413  matr_[i]=NULL;
414  };
415 
416  for(i=0;i<dim_;i++)
417  {
418  matr_[i]=new T [dim_];
419  assert_mem(matr_[i]);
420  };
421  d_memory_size_in_MB+=(double)sizeof(T)*(double)dim_*(double)dim_/mb_bytes;
422 
423  }
424  catch (error er)
425  {
426  ee_error_flag=true;
427  ee_error=er;
428  };
429  }
430  catch (...)
431  {
432  ee_error_flag=true;
433  ee_error=error("Internal error in the program\n",4);
434  };
435 
436  //memory release
437 
438  if(ee_error_flag)
439  {
440 
441  if(matr_)
442  {
443  Int4 i;
444  for(i=0;i<dim_;i++)
445  {
446  if(matr_[i])
447  {
448  delete[]matr_[i];matr_[i]=NULL;
449  };
450  };
451 
452  delete[]matr_;matr_=NULL;
453  };
454 
455  throw error(ee_error.st,ee_error.error_code);
456  };
457 
458  };
459 
460  template<typename T>
462  Int4 dim_,
463  T ** &matr_)
464  {
465  Int4 i;
466  if(matr_)
467  {
468  for(i=0;i<dim_;i++)
469  {
470  delete []matr_[i];matr_[i]=NULL;
471  };
472  delete []matr_;matr_=NULL;
473  };
474 
475  d_memory_size_in_MB-=(double)sizeof(T)*(double)dim_*(double)dim_/mb_bytes;
476  };
477 
478  static Int4 random_long(
479  double value_,
480  Int4 dim_);
481 
482 
483  template<typename T>
484  static T random_long(
485  double value_,
486  Int4 dim_,
487  double *sum_distr_,
488  T* elements_)//sum_distr_[dim_-1] must be equal to 1
489  {
490  if(value_<0||value_>1)
491  {
492  throw error("Unexpected error in q_elem importance_sampling::get_random_pair\n",4);
493  };
494 
495  Int4 v1=0;
496  Int4 v2=dim_;
497 
498  while(v2-v1>1)
499  {
500  Int4 v3=(Int4)(alp_data::round(double(v2+v1)/2.0));
501  if(sum_distr_[v3-1]==value_)
502  {
503  v1=v3-1;
504  break;
505  };
506 
507  if(sum_distr_[v3-1]>value_)
508  {
509  v2=v3;
510  }
511  else
512  {
513  v1=v3;
514  };
515  };
516 
517  return elements_[v2-1];
518 
519  };
520 
521 
522  template<class T>
523  static inline T Tmax(T i_, T j_)
524  {
525  if(i_>j_)
526  {
527  return i_;
528  };
529  return j_;
530  };
531 
532  template<class T>
533  static inline T Tmin(T i_, T j_)
534  {
535  if(i_<j_)
536  {
537  return i_;
538  };
539  return j_;
540  };
541 
542 
543  template<class T>
544  static inline T Tmax(T x_,T y_,T z_)
545  {
546  return Tmax(Tmax(x_,y_),z_);
547  };
548 
549  template<class T>
550  static inline T Tmin(T x_,T y_,T z_)
551  {
552  return Tmin(Tmin(x_,y_),z_);
553  };
554 
555  template<class T>
556  static inline T Tmax(T x_,T y_,T z_,T w_)
557  {
558  return Tmax(Tmax(x_,y_),Tmax(z_,w_));
559  };
560 
561  template<class T>
562  static inline T Tmin(T x_,T y_,T z_,T w_)
563  {
564  return Tmin(Tmin(x_,y_),Tmin(z_,w_));
565  };
566 
567 
568 
569 
570 
571 
572  public:
573 
574 
575 
576  //input parameters
577  Int4 d_open;//gap opening penalty
578  Int4 d_epen;//gap extension penalty
579  double d_max_time;//maximum allowed calculation time in seconds
580  double d_max_mem;//maximum allowed memory usage in MB
581  double d_eps_lambda;//relative error for lambda calculation
582  double d_eps_K;//relative error for K calculation
583  string d_out_file_name;//output file name
584 
585 
586  //additional parameters
587 
588  bool d_smatr_symmetric_flag;//true if the scoring matrix is symmetric
589 
590  Int4 d_number_of_AA;//number of AA
592 
593  Int4** d_smatr;//scoring matrix
594 
595  double *d_RR1;//AA probabilities
596  double *d_RR1_sum;//probability distribution function for d_RR
597  Int4 *d_RR1_sum_elements;//numbers of AA corresponded to d_RR
598 
599  double *d_RR2;//AA probabilities
600  double *d_RR2_sum;//probability distribution function for d_RR
601  Int4 *d_RR2_sum_elements;//numbers of AA corresponded to d_RR
602 
605 
606 
607  double d_memory_size_in_MB;//approximate current allocated memory size
608 
609  importance_sampling *d_is;//data for the importance sampling
610 
611  double *d_r_i_dot;
612  double *d_r_dot_j;
613 
615 
617 
618  //for debugging
621 
623 
625 
628 
629 
630 
631 
632 private:
633 
634  #ifndef NCBI_OS_MSWIN
635 
636  #else
637  _CrtMemState d_s1, d_s2, d_s3;
638  #endif
639 
640 
641 
642 
643  };
644 
645  //array_positive functions
646  template<class T>
648  {
649  delete[]d_elem;d_elem=NULL;
650  if(d_alp_data)
651  {
652  d_alp_data->d_memory_size_in_MB-=(double)sizeof(T)*(double)(d_dim+1)/mb_bytes;
653  };
654 
655  };
656 
657 
658  template<class T>
660  {
661  bool ee_error_flag=false;
662  error ee_error("",0);
663  T *d_elem_new=NULL;
664 
665  try
666  {
667  try
668  {
669 
670  d_dim+=d_step;
671 
672  d_elem_new=new T[d_dim+1];
673  alp_data::assert_mem(d_elem_new);
674 
675  Int4 i;
676  for(i=0;i<d_dim+1-d_step;i++)
677  {
678  d_elem_new[i]=d_elem[i];
679  };
680 
681  for(i=d_dim+1-d_step;i<d_dim+1;i++)
682  {
683  d_elem_new[i]=0;
684  };
685 
686 
687  delete[]d_elem;d_elem=NULL;
688  if(d_alp_data)
689  {
690  d_alp_data->d_memory_size_in_MB+=(double)sizeof(T)*(double)d_step/mb_bytes;
691  };
692 
693  d_elem=d_elem_new;d_elem_new=NULL;
694 
695  }
696  catch (error er)
697  {
698  ee_error_flag=true;
699  ee_error=er;
700  };
701  }
702  catch (...)
703  {
704  ee_error_flag=true;
705  ee_error=error("Internal error in the program\n",4);
706  };
707 
708  //memory release
709 
710  if(ee_error_flag)
711  {
712  delete[]d_elem_new;d_elem_new=NULL;
713  throw error(ee_error.st,ee_error.error_code);
714  };
715 
716  };
717 
718  //array functions
719 
720  template<class T>
722  {
723  delete[]d_elem;d_elem=NULL;
724  if(d_alp_data)
725  {
726  d_alp_data->d_memory_size_in_MB-=(double)sizeof(T)*(double)(d_dim+1)/mb_bytes;
727  };
728 
729  };
730 
731  template<class T>
733  {
734  bool ee_error_flag=false;
735  error ee_error("",0);
736  T *d_elem_new=NULL;
737 
738  try
739  {
740  try
741  {
742 
743 
744  d_dim+=d_step;
745 
746  d_elem_new=new T[d_dim+1];
747  alp_data::assert_mem(d_elem_new);
748 
749  Int4 i;
750  for(i=0;i<d_dim+1-d_step;i++)
751  {
752  d_elem_new[i]=d_elem[i];
753  };
754 
755  for(i=d_dim+1-d_step;i<d_dim+1;i++)
756  {
757  d_elem_new[i]=0;
758  };
759 
760  d_dim_plus_d_ind0=d_dim+d_ind0;
761 
762  if(d_alp_data)
763  {
764  d_alp_data->d_memory_size_in_MB+=(double)sizeof(T)*(double)d_step/mb_bytes;
765  };
766 
767 
768  delete[]d_elem;d_elem=NULL;
769  d_elem=d_elem_new;d_elem_new=NULL;
770 
771 
772  }
773  catch (error er)
774  {
775  ee_error_flag=true;
776  ee_error=er;
777  };
778  }
779  catch (...)
780  {
781  ee_error_flag=true;
782  ee_error=error("Internal error in the program\n",4);
783  };
784 
785  //memory release
786 
787  if(ee_error_flag)
788  {
789  delete[]d_elem_new;d_elem_new=NULL;
790  throw error(ee_error.st,ee_error.error_code);
791  };
792 
793  };
794 
795  template<class T>
797  {
798  bool ee_error_flag=false;
799  error ee_error("",0);
800  T *d_elem_new=NULL;
801 
802  try
803  {
804  try
805  {
806  d_dim+=d_step;
807  d_ind0-=d_step;
808 
809  d_elem_new=new T[d_dim+1];
810  alp_data::assert_mem(d_elem_new);
811 
812  Int4 i;
813 
814  for(i=0;i<d_step;i++)
815  {
816  d_elem_new[i]=0;
817  };
818 
819  for(i=0;i<d_dim+1-d_step;i++)
820  {
821  d_elem_new[i+d_step]=d_elem[i];
822  };
823 
824  if(d_alp_data)
825  {
826  d_alp_data->d_memory_size_in_MB+=(double)sizeof(T)*(double)d_step/mb_bytes;
827  };
828 
829  delete[]d_elem;d_elem=NULL;
830  d_elem=d_elem_new;d_elem_new=NULL;
831 
832 
833  }
834  catch (error er)
835  {
836  ee_error_flag=true;
837  ee_error=er;
838  };
839  }
840  catch (...)
841  {
842  ee_error_flag=true;
843  ee_error=error("Internal error in the program\n",4);
844  };
845 
846  //memory release
847 
848  if(ee_error_flag)
849  {
850  delete[]d_elem_new;d_elem_new=NULL;
851  throw error(ee_error.st,ee_error.error_code);
852  };
853 
854  };
855 
856 
857 END_SCOPE(Sls)
858 
859 END_SCOPE(blast)
861 
862 
863 #endif //! ALGO_BLAST_GUMBEL_PARAMS__INCLUDED_SLS_ALP_DATA
CRandom::
Definition: random_gen.hpp:66
static T Tmax(T x_, T y_, T z_)
double * d_r_i_dot
bool d_smatr_symmetric_flag
double d_eps_K
importance_sampling * d_is
double * d_RR1_sum
struct_for_randomization * d_rand_all
void get_memory_for_matrix(Int4 dim_, T **&matr_)
static double round(const double &x_)
_CrtMemState d_s1
alp_data(Int4 rand_, Int4 open_, Int4 epen_, string smatr_file_name_, string RR1_file_name_, string RR2_file_name_, double max_time_, double max_mem_, double eps_lambda_, double eps_K_, string out_file_name_)
double d_eps_lambda
double * d_r_dot_j
Int4 d_dim1_tmp
Int4 * d_RR2_sum_elements
static T Tmax(T i_, T j_)
double * d_RR1
static T Tmin(T x_, T y_, T z_)
static T Tmin(T x_, T y_, T z_, T w_)
double d_max_mem
static void get_current_time(double &seconds_)
static T Tmax(T x_, T y_, T z_, T w_)
void read_smatr(string smatr_file_name_, Int4 **&smatr_, Int4 &number_of_AA_smatr_)
string d_out_file_name
static T Tmin(T i_, T j_)
Int4 d_minimum_realizations_number
_CrtMemState d_s3
double d_time_before1
double * d_RR2_sum
static Int4 random_long(double value_, Int4 dim_)
Int4 d_dim2_tmp
bool d_rand_flag
_CrtMemState d_s2
double ran2()
Uint4 d_random_factor
CRandom * d_rand_object
Int4 d_number_of_AA_smatr
double get_allocated_memory_in_MB()
void delete_memory_for_matrix(Int4 dim_, T **&matr_)
Int4 * d_RR1_sum_elements
Int4 ** d_smatr
double * d_RR2
static void assert_mem(void *pointer_)
static T random_long(double value_, Int4 dim_, double *sum_distr_, T *elements_)
static char digit_to_string(Int4 digit_)
void check_out_file(string out_file_name_)
double d_memory_size_in_MB
double d_max_time
bool d_sentinels_flag
Int4 d_realizations_number2
void read_RR(string RR_file_name_, double *&RR_, double *&RR_sum_, Int4 *&RR_sum_elements_, Int4 &number_of_AA_RR_)
static string long_to_string(Int4 number_)
Int4 d_number_of_AA
alp_data * d_alp_data
void increase_elem_by_x(Int4 ind_, T x_)
void increase_elem_by_1(Int4 ind_)
void increment_array()
void set_elem(Int4 ind_, T elem_)
array_positive(alp_data *alp_data_)
void increment_array_on_the_rigth()
Int4 d_dim_plus_d_ind0
void increase_elem_by_1(Int4 ind_)
alp_data * d_alp_data
Int4 d_ind0
void increment_array_on_the_left()
T * d_elem
array(alp_data *alp_data_)
Int4 d_dim
Int4 d_step
void set_elem(Int4 ind_, T elem_)
importance_sampling(alp_data *alp_data_, Int4 open_, Int4 epen_, Int4 number_of_AA_, Int4 **smatr_, double *RR1_, double *RR2_)
static double lambda_equation(double x_, void *func_number_)
#define T(s)
Definition: common.h:230
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
const CVect2< U > & v2
Definition: globals.hpp:440
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
TValue GetRand(void)
Get the next random number in the interval [0..GetMax()] (inclusive)
Definition: random_gen.hpp:238
static TValue GetMax(void)
The max. value GetRand() returns.
Definition: random_gen.hpp:295
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
int i
Front end for a platform-specific configuration summary.
The NCBI C++/STL use hints.
const double mb_bytes
error(std::string st_, Int4 error_code_)
std::string st
Int4 error_code
vector< Int4 > d_preliminary_realizations_numbers_ALP
Int4 d_total_realizations_number_with_killing
vector< Int4 > d_preliminary_realizations_numbers_killing
vector< Int4 > d_first_stage_preliminary_realizations_numbers_ALP
#define const
Definition: zconf.h:232
Modified on Sun Apr 14 05:26:55 2024 by modify_doxy.py rev. 669887