NCBI C++ ToolKit
sls_alp_data.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: sls_alp_data.cpp 62325 2014-04-01 19:20:49Z boratyng $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25 
26 /*****************************************************************************
27 
28 File name: sls_alp_data.cpp
29 
30 Author: Sergey Sheetlin
31 
32 Contents: Input data for the ascending ladder points simulation
33 
34 ******************************************************************************/
35 
36 
37 #include <ncbi_pch.hpp>
38 
39 #include <ncbi_pch.hpp>
40 #include "sls_alp_data.hpp"
41 
43 USING_SCOPE(blast);
45 
46 
47 alp_data::alp_data(//constructor
50 {
51 
52 
53  bool ee_error_flag=false;
54  error ee_error("",0);
55 
56  d_smatr=NULL;
57  d_RR1=NULL;
60 
61  d_RR2=NULL;
64 
65  d_is=NULL;
68 
70 
72 
73 
74  if(!options_)
75  {
76  throw error("Unexpected error\n",4);
77  };
78 
79 
80  try
81  {
82  try
83  {
84 
85  d_sentinels_flag=false;
86 
87 
89 
90  #ifndef NCBI_OS_MSWIN
91 
92  #else
93  _CrtMemCheckpoint( &d_s1 );
94  #endif
95 
96 
97  Int4 number_of_AA_RR1;
98  Int4 number_of_AA_RR2;
99 
100 
101 
102  Int4 i,j;
104 
105  if(d_number_of_AA_smatr<=0)
106  {
107  throw error("Error - number of letters in the scoring matrix file must be greater than 0\n",3);
108  };
109 
111 
112 
113  for(i=0;i<d_number_of_AA_smatr;i++)
114  {
115  for(j=0;j<d_number_of_AA_smatr;j++)
116  {
117  d_smatr[i][j]=(options_->GetScoreMatrix())[i][j];
118  };
119  };
120 
122 
123 
124  read_RR(
125  options_->GetSeq1ResidueProbs(),
126  d_RR1,
127  d_RR1_sum,
129  number_of_AA_RR1);
130 
131 
132  read_RR(
133  options_->GetSeq2ResidueProbs(),
134  d_RR2,
135  d_RR2_sum,
137  number_of_AA_RR2);
138 
139 
140  if(number_of_AA_RR1==d_number_of_AA_smatr)
141  {
143  }
144  else
145  {
146  throw error("Number of letters is different for the scoring matrix and probabilities array\n",3);
147  };
148 
149  if(number_of_AA_RR2!=d_number_of_AA_smatr)
150  {
151  throw error("Number of letters is different for the scoring matrix and probabilities array\n",3);
152  };
153 
154 
155  d_open=options_->GetGapOpening()+options_->GetGapExtension();
156 
157  d_epen=options_->GetGapExtension();
158 
159  d_max_time=options_->GetMaxCalcTime();
160 
161  d_max_mem=options_->GetMaxCalcMemory();
162 
163  d_eps_lambda=options_->GetLambdaAccuracy();
164 
165  d_eps_K=options_->GetKAccuracy();
166 
167  d_out_file_name="test.out";
169 
173 
174 
175  //randomization
176  Uint4 random_factor = 0;
177 
178  CRef<CGumbelParamsRandDiagnostics> AdvancedParams_tmp
179  = rand_params_;
180 
181  if(AdvancedParams_tmp.Empty())
182  {
183  random_factor=(Uint4)time(NULL);
184  #ifndef NCBI_OS_MSWIN //UNIX program
185  struct timeval tv;
186  struct timezone tz;
187  gettimeofday(&tv, &tz);
188  random_factor+=tv.tv_usec*10000000;
189  #else
190  struct _timeb timebuffer;
191  char *timeline;
192  _ftime( &timebuffer );
193  timeline = ctime( & ( timebuffer.time ) );
194  random_factor+=timebuffer.millitm*10000000;
195  #endif
196 
197  d_rand_flag=false;
198 
199  }
200  else
201  {
202  d_rand_flag=true;
203  if(d_rand_flag)
204  {
205  random_factor=AdvancedParams_tmp->GetRandomSeed();
206 
207  Int4 size=AdvancedParams_tmp->GetFirstStagePrelimReNumbers().size();
209 
210  Int4 i;
211  for(i=0;i<size;i++)
212  {
214  };
215 
216 
217  size=AdvancedParams_tmp->GetPrelimReNumbers().size();
219  for(i=0;i<size;i++)
220  {
222  };
223 
224 
225  size=AdvancedParams_tmp->GetPrelimReNumbersKilling().size();
227  for(i=0;i<size;i++)
228  {
230  };
231 
232 
233 
236 
237 
238  };
239  };
240 
241 
242  d_random_factor=random_factor;
243 
246 
247 
248 
249 
251  this,
252  d_open,
253  d_epen,
255  d_smatr,
256  d_RR1,
257  d_RR2);
258 
260 
261  d_memory_size_in_MB+=sizeof(*d_is)/mb_bytes;
262 
263  d_r_i_dot=new double[d_number_of_AA];
265  d_r_dot_j=new double[d_number_of_AA];
267  Int4 k;
268  for(k=0;k<d_number_of_AA;k++)
269  {
270  d_r_i_dot[k]=0;
271  if(d_RR1[k]!=0)
272  {
273  Int4 i;
274  for(i=0;i<d_number_of_AA;i++)
275  {
276  if(d_RR2[i]!=0)
277  {
278  d_r_i_dot[k]+=d_is->d_exp_s[k][i]*d_RR2[i];
279  };
280  };
281  };
282  };
283 
284  for(k=0;k<d_number_of_AA;k++)
285  {
286  d_r_dot_j[k]=0;
287  if(d_RR2[k]!=0)
288  {
289  Int4 i;
290  for(i=0;i<d_number_of_AA;i++)
291  {
292  if(d_RR1[i]!=0)
293  {
294  d_r_dot_j[k]+=d_is->d_exp_s[i][k]*d_RR1[i];
295  };
296  };
297  };
298  };
299 
300 
301  d_memory_size_in_MB+=(double)(sizeof(double)*d_number_of_AA*2.0)/mb_bytes;
302 
303  double tmp_size1=kMax_I4;
304 
305  double tmp_size=Tmin((double)(tmp_size1),
306  (
307 
309  )
310  /(
311  (double)(sizeof(double)*12)+(double)(sizeof(Int4)*17)
312  )
313  );
314 
315  d_dim1_tmp=(Int4)tmp_size;
316  d_dim2_tmp=(Int4)tmp_size;
317  }
318  catch (error er)
319  {
320  ee_error_flag=true;
321  ee_error=er;
322  };
323  }
324  catch (...)
325  {
326  ee_error_flag=true;
327  ee_error=error("Internal error in the program\n",4);
328  };
329 
330 
331  if(ee_error_flag)
332  {
333  this->~alp_data();
334  throw error(ee_error.st,ee_error.error_code);
335  };
336 
337 };
338 
339 alp_data::alp_data(//constructor
340 Int4 rand_,//randomization number
341 Int4 open_,//gap opening penalty
342 Int4 epen_,//gap extension penalty
343 string smatr_file_name_,//scoring matrix file name
344 string RR1_file_name_,//probabilities1 file name
345 string RR2_file_name_,//probabilities2 file name
346 double max_time_,//maximum allowed calculation time in seconds
347 double max_mem_,//maximum allowed memory usage in MB
348 double eps_lambda_,//relative error for lambda calculation
349 double eps_K_,//relative error for K calculation
350 string out_file_name_)//output file name
351 {
352 
353  ifstream frand;
354  bool ee_error_flag=false;
355  error ee_error("",0);
356 
357  d_smatr=NULL;
358  d_RR1=NULL;
359  d_RR1_sum=NULL;
361 
362  d_RR2=NULL;
363  d_RR2_sum=NULL;
365 
366  d_is=NULL;
367  d_r_i_dot=NULL;
368  d_r_dot_j=NULL;
369 
371 
372 
373 
374  try
375  {
376  try
377  {
378  d_sentinels_flag=false;
379 
380 
382 
383  #ifndef NCBI_OS_MSWIN //UNIX program
384 
385  #else
386  _CrtMemCheckpoint( &d_s1 );
387  #endif
388 
389 
390  Int4 number_of_AA_RR1;
391  Int4 number_of_AA_RR2;
392 
393  read_smatr(
394  smatr_file_name_,
395  d_smatr,
397 
398 
399 
400  read_RR(
401  RR1_file_name_,
402  d_RR1,
403  d_RR1_sum,
405  number_of_AA_RR1);
406 
407 
408  read_RR(
409  RR2_file_name_,
410  d_RR2,
411  d_RR2_sum,
413  number_of_AA_RR2);
414 
415 
416  if(number_of_AA_RR1==d_number_of_AA_smatr)
417  {
419  }
420  else
421  {
422  throw error("Number of letters is different in the files "+smatr_file_name_+" and "+RR1_file_name_+"\n",3);
423  };
424 
425  if(number_of_AA_RR2!=d_number_of_AA_smatr)
426  {
427  throw error("Number of letters is different in the files "+smatr_file_name_+" and "+RR2_file_name_+"\n",3);
428  };
429 
430  Int4 t;
431  for(t=0;t<number_of_AA_RR1;t++)
432  {
433  if(d_RR1[t]!=d_RR2[t])
434  {
436  break;
437  };
438  };
439 
440 
441  check_out_file(out_file_name_);
442 
443  d_open=open_+epen_;
444  d_epen=epen_;
445  d_max_time=max_time_;
446  d_max_mem=max_mem_;
447  d_eps_lambda=eps_lambda_;
448  d_eps_K=eps_K_;
449  d_out_file_name=out_file_name_;
451 
455 
456  //randomization
457  Uint4 random_factor=rand_;
458 
459 
460  if((Int4)random_factor<0)
461  {
462  random_factor=(Uint4)time(NULL);
463  #ifndef NCBI_OS_MSWIN //UNIX program
464  struct timeval tv;
465  struct timezone tz;
466  gettimeofday(&tv, &tz);
467  random_factor+=tv.tv_usec*10000000;
468  #else
469  struct _timeb timebuffer;
470  char *timeline;
471  _ftime( &timebuffer );
472  timeline = ctime( & ( timebuffer.time ) );
473  random_factor+=timebuffer.millitm*10000000;
474  #endif
475 
476  d_rand_flag=false;
477 
478  }
479  else
480  {
481  d_rand_flag=true;
482  if(d_rand_flag)
483  {
484  string rand_st="rand_"+alp_data::long_to_string(random_factor)+".out";
485  frand.open(rand_st.data(),ios::in);
486  if(!frand)
487  {
488  d_rand_flag=false;
489  }
490  else
491  {
492 
493  Int4 i,size;
494 
495 
496 
497  frand>>d_rand_all->d_random_factor;
498 
499 
500  if((Int4)random_factor!=d_rand_all->d_random_factor)
501  {
502  throw error("Unexpected error in randomization seed\n",3);
503  };
504 
505 
506 
507  frand>>size;
508  for(i=0;i<size;i++)
509  {
510  Int4 tmp;
511  frand>>tmp;
513  };
514 
515  frand>>size;
516  for(i=0;i<size;i++)
517  {
518  Int4 tmp;
519  frand>>tmp;
521  };
522 
523  frand>>size;
524  for(i=0;i<size;i++)
525  {
526  Int4 tmp;
527  frand>>tmp;
529  };
530 
531 
534 
535  frand.close();
536  };
537  };
538  };
539 
540 
541  d_random_factor=random_factor;
542 
545 
546 
547 
548 
550  this,
551  d_open,
552  d_epen,
554  d_smatr,
555  d_RR1,
556  d_RR2);
557 
559 
560  d_memory_size_in_MB+=sizeof(*d_is)/mb_bytes;
561 
562  d_r_i_dot=new double[d_number_of_AA];
564  d_r_dot_j=new double[d_number_of_AA];
566  Int4 k;
567  for(k=0;k<d_number_of_AA;k++)
568  {
569  d_r_i_dot[k]=0;
570  if(d_RR1[k]!=0)
571  {
572  Int4 i;
573  for(i=0;i<d_number_of_AA;i++)
574  {
575  if(d_RR2[i]!=0)
576  {
577  d_r_i_dot[k]+=d_is->d_exp_s[k][i]*d_RR2[i];
578  };
579  };
580  };
581  };
582 
583  for(k=0;k<d_number_of_AA;k++)
584  {
585  d_r_dot_j[k]=0;
586  if(d_RR2[k]!=0)
587  {
588  Int4 i;
589  for(i=0;i<d_number_of_AA;i++)
590  {
591  if(d_RR1[i]!=0)
592  {
593  d_r_dot_j[k]+=d_is->d_exp_s[i][k]*d_RR1[i];
594  };
595  };
596  };
597  };
598 
599 
600  d_memory_size_in_MB+=(double)(sizeof(double)*d_number_of_AA*2.0)/mb_bytes;
601 
602  double tmp_size1=kMax_I4;
603 
604  double tmp_size=Tmin((double)(tmp_size1),
605  (
606 
608  )
609  /(
610  (double)(sizeof(double)*12)+(double)(sizeof(Int4)*17)
611  )
612  );
613 
614  d_dim1_tmp=(Int4)tmp_size;
615  d_dim2_tmp=(Int4)tmp_size;
616  }
617  catch (error er)
618  {
619  ee_error_flag=true;
620  ee_error=er;
621  };
622  }
623  catch (...)
624  {
625  ee_error_flag=true;
626  ee_error=error("Internal error in the program\n",4);
627  };
628 
629  if(frand.is_open())
630  {
631  frand.close();
632  };
633 
634  if(ee_error_flag)
635  {
636  this->~alp_data();
637  throw error(ee_error.st,ee_error.error_code);
638  };
639 
640 };
641 
643 double value_,
644 Int4 dim_)
645 {
646  if(value_<0||value_>1.0||dim_<=0)
647  {
648  throw error("Unexpected error",4);
649  };
650 
651  if(dim_==1)
652  {
653  return 0;
654  };
655 
656  Int4 tmp=(Int4)floor(value_*(double)dim_);
657  tmp=Tmin(tmp,dim_-1);
658  return tmp;
659 };
660 
661 
662 alp_data::~alp_data()//destructor
663 {
664  delete d_rand_object;
665 
666  delete[]d_RR1;d_RR1=NULL;
667  delete[]d_RR1_sum;d_RR1_sum=NULL;
669 
670  delete[]d_RR2;d_RR2=NULL;
671  delete[]d_RR2_sum;d_RR2_sum=NULL;
673 
674 
675  d_memory_size_in_MB-=(double)(2.0*sizeof(double)+sizeof(Int4))*(double)d_number_of_AA/mb_bytes;
676 
678 
679  delete d_is;d_is=NULL;
680 
681  d_memory_size_in_MB-=sizeof(*d_is)/mb_bytes;
682 
683  delete[]d_r_i_dot;d_r_i_dot=NULL;
684  delete[]d_r_dot_j;d_r_dot_j=NULL;
685  d_memory_size_in_MB-=(double)(sizeof(double)*d_number_of_AA*2.0)/mb_bytes;
686 
687  delete d_rand_all;d_rand_all=NULL;
689 
690 
691 };
692 
694  string out_file_name_)
695 {
696  bool ee_error_flag=false;
697  error ee_error("",0);
698  ifstream f;
699  char *str_ch=NULL;
700 
701  try
702  {
703  try
704  {
705  f.open(out_file_name_.data(),ios::in);
706  if(!f)
707  {
708  return;
709  };
710 
711  bool symmetric_case_flag;
712 
713  string str;
714  getline(f,str);
715  str_ch=new char[str.length()+1];
716  if(!str_ch)
717  {
718  throw error("Memory allocation error\n",41);
719  };
720 
721  Int4 k;
722  for(k=0;k<(Int4)str.length();k++)
723  {
724  str_ch[k]=str[k];
725  };
726  str_ch[str.length()]='\0';
727 
728 
729  char str_for_test0[]="number of realizations with killing";
730  char *test_flag0= strstr(str_ch,str_for_test0);
731 
732  if(!test_flag0)
733  {
734  throw error("The output file "+out_file_name_+" exists and does not have correct format;\nplease delete the file and rerun the program\n",3);
735  };
736 
737  char str_for_test[]="0.5*";
738 
739  char*test_flag= strstr(str_ch,str_for_test);
740  if(test_flag)
741  {
742  symmetric_case_flag=true;
743  }
744  else
745  {
746  symmetric_case_flag=false;
747  };
748 
749 
750 
751 
752  if(symmetric_case_flag)
753  {
755  {
756  throw error("The output file "+out_file_name_+" exists and corresponds to symmetric case; \ncurrent calculation uses non-symmetric parameters;\nplease define another output file name\n",3);
757  };
758  };
759 
760  if(!symmetric_case_flag)
761  {
763  {
764  throw error("The output file "+out_file_name_+" exists and corresponds to non-symmetric case; \ncurrent calculation uses symmetric parameters;\nplease define another output file name\n",3);
765  };
766  };
767 
768  f.close();
769  }
770  catch (error er)
771  {
772  ee_error_flag=true;
773  ee_error=er;
774  };
775  }
776  catch (...)
777  {
778  ee_error_flag=true;
779  ee_error=error("Internal error in the program\n",4);
780  };
781 
782  delete[]str_ch;str_ch=NULL;
783 
784  if(f.is_open())
785  {
786  f.close();
787  };
788 
789  if(ee_error_flag)
790  {
791  throw error(ee_error.st,ee_error.error_code);
792  };
793 
794 };
795 
796 
798 {
799 
800  #ifndef NCBI_OS_MSWIN //UNIX program
801 
802  return 0;
803 
804  #else
805  _CrtMemCheckpoint( &d_s2 );
806 
807  _CrtMemDifference( &d_s3, &d_s1, &d_s2);
808 
809  double total=0;
810  int use;
811  for (use = 0; use < _MAX_BLOCKS; use++)
812  {
813  total+=d_s3.lSizes[use];
814  }
815 
816  total/=(double)1048576;
817  return total;
818 
819  #endif
820 
821 };
822 
823 // Kludge: limit optimization by ICC 10.x to avoid undesired references to
824 // __svml_exp2 (problematic to use from DLLs on x86_64 or at all on ia32).
825 #if defined(NCBI_COMPILER_ICC) && defined(__OPTIMIZE__) \
826  && NCBI_COMPILER_VERSION >= 1000 && NCBI_COMPILER_VERSION < 1100
827 # pragma optimization_level 1
828 #endif
829 double importance_sampling::lambda_equation(double x_,void* func_number_)
830 {
832  Int4 d_number_of_AA=data->d_number_of_AA;
833  Int4** d_smatr=data->d_smatr;
834  double *d_RR1=data->d_RR1;
835  double *d_RR2=data->d_RR2;
836 
837  double res=0;
838  Int4 i,j;
839 
840  for(i=0;i<d_number_of_AA;i++)
841  {
842  for(j=0;j<d_number_of_AA;j++)
843  {
844  res+=d_RR1[i]*d_RR2[j]*exp(x_*d_smatr[i][j]);
845  };
846  };
847 
848  return res-1.0;
849 };
850 
852 string smatr_file_name_,
853 Int4 **&smatr_,
854 Int4 &number_of_AA_smatr_)
855 {
856  bool ee_error_flag=false;
857  error ee_error("",0);
858  ifstream f;
859 
860  try
861  {
862  try
863  {
864 
865  Int4 i,j;
866  f.open(smatr_file_name_.data(),ios::in);
867  if(!f)
868  {
869  throw error("Error - file "+smatr_file_name_+" is not found\n",3);
870  };
871 
872  f>>number_of_AA_smatr_;
873 
874  if(number_of_AA_smatr_<=0)
875  {
876  throw error("Error - number of letters in the scoring matrix file must be greater than 0\n",3);
877  };
878 
879  get_memory_for_matrix(number_of_AA_smatr_,smatr_);
880 
881 
882  for(i=0;i<number_of_AA_smatr_;i++)
883  {
884  for(j=0;j<number_of_AA_smatr_;j++)
885  {
886  f>>smatr_[i][j];
887  };
888  };
889 
890  f.close();
891 
892  bool flag=true;
893  for(i=0;i<number_of_AA_smatr_;i++)
894  {
895  for(j=0;j<i;j++)
896  {
897  if(smatr_[i][j]!=smatr_[j][i])
898  {
899  flag=false;
900  };
901  };
902  };
903 
905 
907 
908  }
909  catch (error er)
910  {
911  ee_error_flag=true;
912  ee_error=er;
913  };
914  }
915  catch (...)
916  {
917  ee_error_flag=true;
918  ee_error=error("Internal error in the program\n",4);
919  };
920 
921  //memory release
922  if(f.is_open())
923  {
924  f.close();
925  };
926 
927  if(ee_error_flag)
928  {
929  throw error(ee_error.st,ee_error.error_code);
930  };
931 
932 };
933 
935 string RR_file_name_,
936 double *&RR_,
937 double *&RR_sum_,
938 Int4 *&RR_sum_elements_,
939 Int4 &number_of_AA_RR_)
940 {
941  bool ee_error_flag=false;
942  error ee_error("",0);
943  ifstream f;
944 
945  try
946  {
947  try
948  {
949 
950  Int4 i;
951  f.open(RR_file_name_.data(),ios::in);
952  if(!f)
953  {
954  throw error("Error - file "+RR_file_name_+" is not found\n",3);
955  };
956 
957  f>>number_of_AA_RR_;
958 
959  if(number_of_AA_RR_<=0)
960  {
961  throw error("Error - number of letters in the probabilities file must be greater than 0\n",3);
962  };
963 
964  RR_=new double[number_of_AA_RR_];
965  assert_mem(RR_);
966 
967  RR_sum_=new double[number_of_AA_RR_];
968  assert_mem(RR_sum_);
969 
970  RR_sum_elements_=new Int4 [number_of_AA_RR_];
971  assert_mem(RR_sum_elements_);
972 
973  d_memory_size_in_MB+=(double)(2.0*sizeof(double)+sizeof(Int4))*(double)number_of_AA_RR_/mb_bytes;
974 
975 
976  for(i=0;i<number_of_AA_RR_;i++)
977  {
978  f>>RR_[i];
979 
980  if(RR_[i]<0)
981  {
982  throw error("Error - input letter's probability number "+long_to_string(i+1)+" is negative\n",3);
983  };
984 
985  if(RR_[i]>1.0)
986  {
987  throw error("Error - input letter's probability number "+long_to_string(i+1)+" is greater than 1.0\n",3);
988  };
989 
990 
991  if(i!=0)
992  {
993  RR_sum_[i]=RR_sum_[i-1]+RR_[i];
994  }
995  else
996  {
997  RR_sum_[i]=RR_[i];
998  };
999  RR_sum_elements_[i]=i;
1000  };
1001 
1002  if(fabs(RR_sum_[number_of_AA_RR_-1]-1.0)>0.000000000001)
1003  {
1004  //cout<<"Warning: sum of probabilities in the file "<<RR_file_name_<<" is not equal to 1\n\n";
1005  };
1006 
1007 
1008  f.close();
1009  }
1010  catch (error er)
1011  {
1012  ee_error_flag=true;
1013  ee_error=er;
1014  };
1015  }
1016  catch (...)
1017  {
1018  ee_error_flag=true;
1019  ee_error=error("Internal error in the program\n",4);
1020  };
1021 
1022  //memory release
1023  if(f.is_open())
1024  {
1025  f.close();
1026  };
1027 
1028  if(ee_error_flag)
1029  {
1030  throw error(ee_error.st,ee_error.error_code);
1031  };
1032 
1033 };
1034 
1036 const vector<double> &vector_,
1037 double *&RR_,
1038 double *&RR_sum_,
1039 Int4 *&RR_sum_elements_,
1040 Int4 &number_of_AA_RR_)
1041 {
1042  bool ee_error_flag=false;
1043  error ee_error("",0);
1044 
1045  try
1046  {
1047  try
1048  {
1049 
1050  Int4 i;
1051 
1052  number_of_AA_RR_=vector_.size();
1053 
1054  if(number_of_AA_RR_<=0)
1055  {
1056  throw error("Error - number of letters in the probabilities file must be greater than 0\n",3);
1057  };
1058 
1059  RR_=new double[number_of_AA_RR_];
1060  assert_mem(RR_);
1061 
1062  RR_sum_=new double[number_of_AA_RR_];
1063  assert_mem(RR_sum_);
1064 
1065  RR_sum_elements_=new Int4 [number_of_AA_RR_];
1066  assert_mem(RR_sum_elements_);
1067 
1068  d_memory_size_in_MB+=(double)(2.0*sizeof(double)+sizeof(Int4))*(double)number_of_AA_RR_/mb_bytes;
1069 
1070 
1071  for(i=0;i<number_of_AA_RR_;i++)
1072  {
1073  RR_[i]=vector_[i];
1074 
1075  if(RR_[i]<0)
1076  {
1077  throw error("Error - input letter's probability number "+long_to_string(i+1)+" is negative\n",3);
1078  };
1079 
1080  if(RR_[i]>1.0)
1081  {
1082  throw error("Error - input letter's probability number "+long_to_string(i+1)+" is greater than 1.0\n",3);
1083  };
1084 
1085 
1086  if(i!=0)
1087  {
1088  RR_sum_[i]=RR_sum_[i-1]+RR_[i];
1089  }
1090  else
1091  {
1092  RR_sum_[i]=RR_[i];
1093  };
1094  RR_sum_elements_[i]=i;
1095  };
1096 
1097  if(fabs(RR_sum_[number_of_AA_RR_-1]-1.0)>0.000000000001)
1098  {
1099  //cout<<"Warning: sum of probabilities in the file "<<RR_file_name_<<" is not equal to 1\n\n";
1100  };
1101 
1102 
1103  }
1104  catch (error er)
1105  {
1106  ee_error_flag=true;
1107  ee_error=er;
1108  };
1109  }
1110  catch (...)
1111  {
1112  ee_error_flag=true;
1113  ee_error=error("Internal error in the program\n",4);
1114  };
1115 
1116  if(ee_error_flag)
1117  {
1118  throw error(ee_error.st,ee_error.error_code);
1119  };
1120 
1121 };
1122 
1123 
1124 string alp_data::long_to_string(//convert interer ot string
1125 Int4 number_)
1126 {
1127  string res_="";
1128  string tmp_string;
1129  if(number_>0)
1130  {
1131  tmp_string="";
1132  }
1133  else
1134  {
1135  if(number_==0)
1136  {
1137  tmp_string="";
1138  }
1139  else
1140  {
1141  tmp_string="-";
1142  };
1143  };
1144  number_=abs(number_);
1145  do{
1146  Int4 reminder=number_%10;
1147  number_=(number_-reminder)/10;
1148  res_=digit_to_string(reminder)+res_;
1149  if (number_==0)
1150  {
1151  break;
1152  };
1153  }
1154  while (true);
1155 
1156  return tmp_string+res_;
1157 };
1158 
1159 char alp_data::digit_to_string(//convert interer ot string
1160 Int4 digit_)
1161 {
1162  switch(digit_)
1163  {
1164  case 0:return '0';
1165  case 1:return '1';
1166  case 2:return '2';
1167  case 3:return '3';
1168  case 4:return '4';
1169  case 5:return '5';
1170  case 6:return '6';
1171  case 7:return '7';
1172  case 8:return '8';
1173  case 9:return '9';
1174  default:return '?';
1175  };
1176 };
1177 
1178 
1179 
1180 
1181 void alp_data::assert_mem(void *pointer_)
1182 {
1183  if(!pointer_)
1184  {
1185  throw error("Memory allocation error\n",41);
1186  };
1187 };
1188 
1189 double alp_data::round(//returns nearest integer to x_
1190 const double &x_)
1191 {
1192  double x_floor=floor(x_);
1193  double x_ceil=ceil(x_);
1194  if(fabs(x_-x_floor)<0.5)
1195  {
1196  return x_floor;
1197  };
1198  return x_ceil;
1199 };
1200 
1201 
1202 
1204 alp_data *alp_data_,
1205 Int4 open_,
1206 Int4 epen_,
1207 Int4 number_of_AA_,
1208 Int4 **smatr_,
1209 double *RR1_,
1210 double *RR2_)
1211 {
1212  d_elements=NULL;
1214 
1215  d_exp_s=NULL;
1216 
1217 
1218  d_alp_data=alp_data_;
1219  if(!d_alp_data)
1220  {
1221  throw error("Unexpected error",4);
1222  };
1223 
1224  bool ee_error_flag=false;
1225  error ee_error("",0);
1226 
1227  try
1228  {
1229  try
1230  {
1231 
1232 
1233 
1234  {
1235 
1236  //calculation of the importance sampling theta
1237 
1238  data_for_lambda_equation tmp_ptr;
1239  tmp_ptr.d_number_of_AA=number_of_AA_;
1240  tmp_ptr.d_RR1=RR1_;
1241  tmp_ptr.d_RR2=RR2_;
1242  tmp_ptr.d_smatr=smatr_;
1243 
1244  //calculate maximum of smatr_ elements
1245  Int4 smatr_max=smatr_[0][0];
1246  Int4 smatr_max_i=0;
1247  Int4 smatr_max_j=0;
1248  Int4 smatr_min=smatr_[0][0];
1249 
1250  Int4 smatr_pos_max=kMin_I4;
1251  Int4 smatr_neg_min=kMax_I4;
1252 
1253  double eps=0.00001;
1254  double threshold=DBL_MIN*10.0;
1255 
1256  double aver_score=0;
1257  Int4 i,j;
1258  for(i=0;i<number_of_AA_;i++)
1259  {
1260  for(j=0;j<number_of_AA_;j++)
1261  {
1262  if(RR1_[j]*RR2_[i]<=threshold)
1263  {
1264  continue;
1265  };
1266 
1267  aver_score+=RR1_[i]*RR2_[j]*smatr_[i][j];
1268 
1269  if(smatr_max<smatr_[i][j])
1270  {
1271  smatr_max=smatr_[i][j];
1272  smatr_max_i=i;
1273  smatr_max_j=j;
1274  };
1275  smatr_min=alp_data::Tmin(smatr_min,smatr_[i][j]);
1276 
1277 
1278  if(smatr_[i][j]>0)
1279  {
1280  smatr_pos_max=alp_data::Tmax(smatr_pos_max,smatr_[i][j]);
1281  };
1282 
1283  if(smatr_[i][j]<0)
1284  {
1285  smatr_neg_min=alp_data::Tmin(smatr_neg_min,smatr_[i][j]);
1286  };
1287 
1288  };
1289  };
1290 
1291  if(aver_score>=-threshold)
1292  {
1293  throw error("Error - sum[i,j] RR1[i]*RR2[j]*smatr[i][j]>=0; the program cannot continue the calculation\n",3);
1294  };
1295 
1296  if(smatr_max<=0)
1297  {
1298  throw error("Error - at least one element of the scoring matrix must be positive\n",3);
1299  };
1300 
1301 
1302 
1303  double a=eps;
1304 
1305  while(importance_sampling::lambda_equation(a,(void*)(&tmp_ptr))>0)
1306  {
1307  a/=2.0;
1308 
1309  if(a<threshold*100.0)
1310  {
1311  throw error("Error - the input parameters correspond to non-logarithmic regime\n",3);
1312  };
1313  };
1314 
1315  if(a<threshold*100.0)
1316  {
1317  throw error("Error - the input parameters define the regime which is too close to the critical regime\n",3);
1318  };
1319 
1320  eps=a/10.0;
1321 
1322 
1323  double tmp_pr=RR1_[smatr_max_i]*RR2_[smatr_max_j];
1324  double b=(log(1+10*eps)-log(tmp_pr))/(double)smatr_max;
1325 
1326 
1327  Int4 n_partition=2;
1328  std::vector<double> res_lambda;
1329 
1330 
1333  (void*)(&tmp_ptr),
1334  a,
1335  b,
1336  n_partition,
1337  eps,
1338  res_lambda);
1339 
1340  sort(res_lambda.begin(),res_lambda.end());
1341 
1342  if(res_lambda.size()==0)
1343  {
1344  throw error("Error - the program is not able to find the ungapped lambda\n",3);
1345  };
1346 
1347  d_lambda=res_lambda[res_lambda.size()-1];
1349 
1350  //cout<<"\nUngapped lambda is "<<d_ungap_lambda<<endl;
1351 
1352  d_lambda*=1.07;
1353  };
1354 
1355 
1356 
1357  d_is_number_of_AA=number_of_AA_;
1358 
1359  d_elements=new q_elem[number_of_AA_*number_of_AA_];
1361 
1362  d_elements_values=new double[number_of_AA_*number_of_AA_];
1364 
1365 
1366 
1368 
1369  Int4 ind=0;
1370  double sum=0;
1371  Int4 a,b;
1372  for(a=0;a<number_of_AA_;a++)
1373  {
1374  for(b=0;b<number_of_AA_;b++)
1375  {
1376  d_exp_s[a][b]=exp(d_lambda*smatr_[a][b]);
1377  d_elements_values[ind]=RR1_[a]*RR2_[b]*d_exp_s[a][b];
1378  sum+=d_elements_values[ind];
1379  ind++;
1380  };
1381  };
1382 
1383 
1384  for(a=0;a<number_of_AA_;a++)
1385  {
1386  for(b=0;b<number_of_AA_;b++)
1387  {
1388  d_exp_s[a][b]/=sum;
1389  };
1390  };
1391 
1392 
1393  for(ind=0;ind<number_of_AA_*number_of_AA_;ind++)
1394  {
1395  d_elements_values[ind]/=sum;
1396  };
1397 
1398 
1399  for(ind=1;ind<number_of_AA_*number_of_AA_;ind++)
1400  {
1402  };
1403 
1404 
1405  ind=0;
1406  for(a=0;a<number_of_AA_;a++)
1407  {
1408  for(b=0;b<number_of_AA_;b++)
1409  {
1410  q_elem elem_tmp;
1411 
1412  elem_tmp.d_a=a;
1413  elem_tmp.d_b=b;
1414 
1415  d_elements[ind]=elem_tmp;
1417 
1418  ind++;
1419 
1420  };
1421  };
1422 
1423 
1424 
1425  d_mu=exp(-fabs(d_lambda)*open_);
1426  d_nu=exp(-fabs(d_lambda)*epen_);
1427 
1428  double tmp=1+d_mu-d_nu;
1429 
1430  d_eta=(1-d_nu)*(1-d_nu)/(tmp*tmp);
1431  d_mu_SI=1-d_nu;
1432  d_mu_IS=d_mu*(1-d_nu)/(tmp*tmp);
1433  d_mu_DS=d_mu/tmp;
1434  d_mu_SD=(1-d_nu)*(1-d_nu)/tmp;
1435  d_mu_ID=d_mu*(1-d_nu)/tmp;
1436 
1437 
1438  d_for_D[0]=d_nu; d_for_D_states[0]='D';
1439  d_for_D[1]=d_for_D[0]+d_mu_SD; d_for_D_states[1]='S';
1440  d_for_D[2]=d_for_D[1]+d_mu_ID; d_for_D_states[2]='I';
1441 
1442  d_for_I[0]=d_nu; d_for_I_states[0]='I';
1443  d_for_I[1]=d_for_I[0]+d_mu_SI; d_for_I_states[1]='S';
1444 
1445  d_for_S[0]=d_eta; d_for_S_states[0]='S';
1446  d_for_S[1]=d_for_S[0]+d_mu_DS; d_for_S_states[1]='D';
1447  d_for_S[2]=d_for_S[1]+d_mu_IS; d_for_S_states[2]='I';
1448 
1449  d_alp_data->d_memory_size_in_MB+=sizeof(double)*number_of_AA_/mb_bytes;
1450  d_alp_data->d_memory_size_in_MB+=sizeof(q_elem)*number_of_AA_/mb_bytes;
1451  }
1452  catch (error er)
1453  {
1454  ee_error_flag=true;
1455  ee_error=er;
1456  };
1457  }
1458  catch (...)
1459  {
1460  ee_error_flag=true;
1461  ee_error=error("Internal error in the program\n",4);
1462  };
1463 
1464  //memory release
1465 
1466  if(ee_error_flag)
1467  {
1468  this->~importance_sampling();
1469  throw error(ee_error.st,ee_error.error_code);
1470  };
1471 
1472 };
1473 
1475 {
1476  delete []d_elements;d_elements=NULL;
1478 
1479  if(d_alp_data)
1480  {
1484  };
1485 
1486 };
1487 
1489 double &seconds_)
1490 {
1491 #ifndef NCBI_OS_MSWIN //UNIX program
1492  struct timeval tv;
1493  struct timezone tz;
1494  time_t t;
1495 
1496  gettimeofday(&tv, &tz);
1497  t = tv.tv_sec;
1498  localtime(&t);
1499 
1500  seconds_=(double)(t)+(double)(tv.tv_usec) * 0.000001;
1501 
1502 #else
1503 
1504  struct _timeb timebuffer;
1505 
1506  _ftime( &timebuffer );
1507 
1508  seconds_=timebuffer.time+(double)(timebuffer.millitm)/1000.0;
1509 
1510 #endif
1511 };
1512 
1513 
const TFrequencies & GetSeq2ResidueProbs(void) const
Get sequence 2 residue probabilities.
const TFrequencies & GetSeq1ResidueProbs(void) const
Get sequence 1 residue probabilities.
double GetMaxCalcMemory(void) const
Get maximum memory allowed for computation.
double GetMaxCalcTime(void) const
Get maximum calculation time allowed.
Int4 GetGapOpening(void) const
Get gap opening penalty.
double GetKAccuracy(void) const
Get relative error threshold for K parameter calculation for gapped alignment.
Int4 GetNumResidues(void) const
Get number of residues in utilized alphabet.
const Int4 ** GetScoreMatrix(void) const
Get score matrix.
double GetLambdaAccuracy(void) const
Get relative error threshold for lambda parameter calculation for gapped aligmment.
Int4 GetGapExtension(void) const
Get gap extention penalty.
Int4 GetTotalReNumber(void) const
Get total realizations number.
const vector< Int4 > & GetPrelimReNumbersKilling(void) const
Get perliminary realizations numbers killing array.
const vector< Int4 > & GetFirstStagePrelimReNumbers(void) const
Get first stage preliminary realizations numbers.
Int4 GetTotalReNumberKilling(void) const
Get total realizations number killing.
Uint4 GetRandomSeed(void) const
Get random seed.
const vector< Int4 > & GetPrelimReNumbers(void) const
Get preliminary realizations numbers.
CRandom::
Definition: random_gen.hpp:66
double * d_r_i_dot
bool d_smatr_symmetric_flag
double d_eps_K
importance_sampling * d_is
double * d_RR1_sum
struct_for_randomization * d_rand_all
void get_memory_for_matrix(Int4 dim_, T **&matr_)
static double round(const double &x_)
_CrtMemState d_s1
alp_data(Int4 rand_, Int4 open_, Int4 epen_, string smatr_file_name_, string RR1_file_name_, string RR2_file_name_, double max_time_, double max_mem_, double eps_lambda_, double eps_K_, string out_file_name_)
double d_eps_lambda
double * d_r_dot_j
Int4 d_dim1_tmp
Int4 * d_RR2_sum_elements
static T Tmax(T i_, T j_)
double * d_RR1
double d_max_mem
static void get_current_time(double &seconds_)
void read_smatr(string smatr_file_name_, Int4 **&smatr_, Int4 &number_of_AA_smatr_)
string d_out_file_name
static T Tmin(T i_, T j_)
Int4 d_minimum_realizations_number
_CrtMemState d_s3
double * d_RR2_sum
static Int4 random_long(double value_, Int4 dim_)
Int4 d_dim2_tmp
bool d_rand_flag
_CrtMemState d_s2
Uint4 d_random_factor
CRandom * d_rand_object
Int4 d_number_of_AA_smatr
double get_allocated_memory_in_MB()
void delete_memory_for_matrix(Int4 dim_, T **&matr_)
Int4 * d_RR1_sum_elements
Int4 ** d_smatr
double * d_RR2
static void assert_mem(void *pointer_)
static char digit_to_string(Int4 digit_)
void check_out_file(string out_file_name_)
double d_memory_size_in_MB
double d_max_time
bool d_sentinels_flag
void read_RR(string RR_file_name_, double *&RR_, double *&RR_sum_, Int4 *&RR_sum_elements_, Int4 &number_of_AA_RR_)
static string long_to_string(Int4 number_)
Int4 d_number_of_AA
static void find_tetta_general(function_type *func_, void *func_pointer_, double a_, double b_, Int4 n_partition_, double eps_, std::vector< double > &res_)
importance_sampling(alp_data *alp_data_, Int4 open_, Int4 epen_, Int4 number_of_AA_, Int4 **smatr_, double *RR1_, double *RR2_)
static double lambda_equation(double x_, void *func_number_)
static const char * str(char *buf, int n)
Definition: stats.c:84
static char tmp[3200]
Definition: utf8.c:42
static FILE * f
Definition: readconf.c:23
char data[12]
Definition: iconv.c:80
#define NULL
Definition: ncbistd.hpp:225
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define kMin_I4
Definition: ncbi_limits.h:217
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define kMax_I4
Definition: ncbi_limits.h:218
void SetSeed(TValue seed)
Seed the random number generator with "seed".
Definition: random_gen.cpp:287
int i
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
#define fabs(v)
Definition: ncbi_dispd.c:46
#define abs(a)
Definition: ncbi_heapmgr.c:130
unsigned int a
Definition: ncbi_localip.c:102
EIPRangeType t
Definition: ncbi_localip.c:101
std::istream & in(std::istream &in_, double &x_)
USING_SCOPE(blast)
USING_NCBI_SCOPE
const double mb_bytes
std::string st
Int4 error_code
vector< Int4 > d_preliminary_realizations_numbers_ALP
Int4 d_total_realizations_number_with_killing
vector< Int4 > d_preliminary_realizations_numbers_killing
vector< Int4 > d_first_stage_preliminary_realizations_numbers_ALP
Modified on Fri Sep 20 14:58:15 2024 by modify_doxy.py rev. 669887