84 const int query_offset[],
85 const int query_end[],
86 const int subject_offset[],
87 const int subject_end[],
88 const int query_gapped_start[],
89 const int subject_gapped_start[],
91 const double evalue[] =
NULL,
92 const int num_ident[] =
NULL)
94 const int kQueryContext = 0;
95 const int kSubjectFrame = 0;
96 const int kQueryFrame = 0;
103 for (
int i = 0;
i < num_hsps;
i++) {
106 subject_offset[
i], subject_end[
i],
107 query_gapped_start[
i],
108 subject_gapped_start[
i],
109 kQueryContext, kQueryFrame, kSubjectFrame, score[
i],
124 unsigned int query_length)
126 ifstream
in(filename.c_str());
128 throw runtime_error(filename +
" could not be found");
135 for (
unsigned int i = 0;
i < query_length;
i++) {
142 throw runtime_error(
"Error reading from " + filename);
152 throw runtime_error(
"NULL BlastScoreBlk*!");
156 loadPssmFromFile(
"data/aa.129295.pssm.txt",
175 Int8 effective_searchsp,
177 bool doSmithWaterman,
178 double evalue_threshold =
189 Int8 effective_searchsp,
191 bool doSmithWaterman,
192 double evalue_threshold =
203 Int8 effective_searchsp,
205 bool doSmithWaterman,
206 double evalue_threshold,
213 runRedoAlignmentCoreUnitTest(program, *qsl, *ssl, init_hsp_list,
214 ending_hsp_list, effective_searchsp, compositonBasedStatsMode,
215 doSmithWaterman, evalue_threshold, hit_list_size);
226 Int8 effective_searchsp,
228 bool doSmithWaterman,
229 double evalue_threshold,
233 char* program_buffer =
NULL;
235 BOOST_REQUIRE_MESSAGE(rv == (
Int2)0,
"BlastNumber2Program failed");
237 sfree(program_buffer);
249 core_prog, strand_opt,&query_info);
251 query_info, &query_blk, core_prog, strand_opt, blast_msg);
253 BOOST_REQUIRE(m->empty());
258 BOOST_REQUIRE(rv == 0);
269 BOOST_REQUIRE(rv == 0);
294 BOOST_REQUIRE(writer_info ==
NULL);
306 const double k_rps_scale_factor = 1.0;
309 program, &sbp, k_rps_scale_factor,
316 setupPositionBasedBlastScoreBlk(sbp,
324 &effective_searchsp, 1);
329 eff_len_params, sbp, query_info,
NULL);
337 BOOST_REQUIRE(eff_len_opts ==
NULL);
339 BOOST_REQUIRE(eff_len_params ==
NULL);
345 const int kAvgSubjLen = 0;
348 sbp, query_info, kAvgSubjLen,
359 BOOST_REQUIRE(results);
362 query_info, sbp,
NULL, seq_src,
365 ext_params, hit_params, psi_options,
367 BOOST_REQUIRE_MESSAGE(rv == (
Int2)0,
"Blast_RedoAlignmentCore failed!");
370 BOOST_REQUIRE(hsp_stream ==
NULL);
372 BOOST_REQUIRE(ext_params ==
NULL);
374 BOOST_REQUIRE(ext_options ==
NULL);
376 BOOST_REQUIRE(hit_params ==
NULL);
378 BOOST_REQUIRE(scoring_params ==
NULL);
380 BOOST_REQUIRE(psi_options ==
NULL);
382 BOOST_REQUIRE(sbp ==
NULL);
384 BOOST_REQUIRE(results && results->
num_queries == 1);
389 BOOST_REQUIRE_EQUAL(ending_hsp_list->
hspcnt, hsp_list->
hspcnt);
393 cout <<
"Expected num hsps=" << ending_hsp_list->
hspcnt;
394 cout <<
" Actual num hsps=" << hsp_list->
hspcnt << endl;
397 for (
int index=0; index<hsp_list->
hspcnt; index++)
403 cout << index <<
": query_offset="
405 cout << index <<
": query_end="
407 cout << index <<
": subject_offset="
409 cout << index <<
": subject_end="
411 cout << index <<
": score="
412 << actual_hsp->
score << endl;
413 cout << index <<
": bit_score="
415 cout << index <<
": evalue="
416 << actual_hsp->
evalue << endl;
417 cout << index <<
": num_ident="
422 BOOST_REQUIRE_EQUAL(expected_hsp->
query.
end,
426 BOOST_REQUIRE_EQUAL(expected_hsp->
subject.
end,
428 BOOST_REQUIRE_EQUAL(expected_hsp->
score,
430 BOOST_REQUIRE_EQUAL(expected_hsp->
num_ident,
434 cerr <<
"Diff in evalues for " << index <<
"=" << diff << endl;
436 BOOST_REQUIRE_CLOSE(expected_hsp->
evalue, actual_hsp->
evalue, 10.0);
441 BOOST_REQUIRE(results ==
NULL);
449 const int k_num_hsps_start = 3;
450 const int k_num_hsps_end = 2;
458 const int query_offset[k_num_hsps_start] = { 28, 46, 463};
459 const int query_end[k_num_hsps_start] = { 485, 331, 488};
460 const int subject_offset[k_num_hsps_start] = { 36, 327, 320};
461 const int subject_end[k_num_hsps_start] = { 512, 604, 345};
462 const int score[k_num_hsps_start] = { 554, 280, 28};
463 const int query_gapped_start[k_num_hsps_start] = { 431, 186, 480};
464 const int subject_gapped_start[k_num_hsps_start] = { 458, 458, 337};
468 setUpHSPList(k_num_hsps_start,
469 query_offset, query_end,
470 subject_offset, subject_end,
472 subject_gapped_start,
475 const int query_offset_final[k_num_hsps_end] = { 2, 46};
476 const int query_end_final[k_num_hsps_end] = { 485, 331};
477 const int subject_offset_final[k_num_hsps_end] = { 9, 327};
478 const int subject_end_final[k_num_hsps_end] = { 512, 604};
479 const int score_final[k_num_hsps_end] = { 510, 282};
480 const double evalue_final[k_num_hsps_end] = {7.0065e-61, 1.6958e-30};
481 const int num_idents_final[k_num_hsps_end] = { 171, 94 };
484 setUpHSPList(k_num_hsps_end,
487 subject_offset_final,
490 subject_offset_final,
495 const Int8 kEffSearchSp = 500000;
496 const bool kSmithWaterman =
false;
498 runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
499 init_hsp_list, ending_hsp_list,
504 BOOST_REQUIRE(ending_hsp_list ==
NULL);
509 const int k_num_hsps_start = 3;
510 const int k_num_hsps_end = 2;
518 const int query_offset[k_num_hsps_start] = { 28, 46, 463};
519 const int query_end[k_num_hsps_start] = { 485, 331, 488};
520 const int subject_offset[k_num_hsps_start] = { 36, 327, 320};
521 const int subject_end[k_num_hsps_start] = { 512, 604, 345};
522 const int score[k_num_hsps_start] = { 554, 280, 28};
523 const int query_gapped_start[k_num_hsps_start] = { 431, 186, 480};
524 const int subject_gapped_start[k_num_hsps_start] = { 458, 458, 337};
528 setUpHSPList(k_num_hsps_start,
529 query_offset, query_end,
530 subject_offset, subject_end,
532 subject_gapped_start,
535 const int query_offset_final[k_num_hsps_end] = { 2, 46};
536 const int query_end_final[k_num_hsps_end] = { 517, 331};
537 const int subject_offset_final[k_num_hsps_end] = { 9, 327};
538 const int subject_end_final[k_num_hsps_end] = { 546, 604};
539 const int score_final[k_num_hsps_end] = { 537, 298};
540 const double evalue_final[k_num_hsps_end] = {1.1954e-64, 1.5494e-32};
541 const int num_idents_final[k_num_hsps_end] = { 177, 95 };
544 setUpHSPList(k_num_hsps_end,
547 subject_offset_final,
550 subject_offset_final,
555 const Int8 kEffSearchSp = 500000;
556 const bool kSmithWaterman =
false;
558 runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
559 init_hsp_list, ending_hsp_list,
564 BOOST_REQUIRE(ending_hsp_list ==
NULL);
569 const int k_num_hsps_start = 6;
570 const int k_num_hsps_end = 2;
574 const int query_offset[k_num_hsps_start] = { 24, 99, 16, 84, 6, 223 };
575 const int query_end[k_num_hsps_start] = { 62, 128, 24, 114, 25, 231 };
576 const int subject_offset[k_num_hsps_start] =
577 { 245, 0, 198, 86, 334, 151 };
578 const int subject_end[k_num_hsps_start] =
579 { 287, 29, 206, 119, 353, 159 };
580 const int score[k_num_hsps_start] = { 37, 26, 25, 25, 24, 24 };
581 const int query_gapped_start[k_num_hsps_start] =
582 { 29, 104, 20, 91, 19, 227 };
583 const int subject_gapped_start[k_num_hsps_start] =
584 { 250, 5, 202, 93, 347, 155 };
589 setUpHSPList(k_num_hsps_start,
590 query_offset, query_end,
591 subject_offset, subject_end,
593 subject_gapped_start,
596 const int query_offset_final[k_num_hsps_end] = { 24, 18 };
597 const int query_end_final[k_num_hsps_end] = { 30, 31 };
598 const int subject_offset_final[k_num_hsps_end] = { 245, 200 };
599 const int subject_end_final[k_num_hsps_end] = { 251, 210 };
600 const int score_final[k_num_hsps_end] = { 29, 24 };
601 const double evalue_final[k_num_hsps_end] =
602 { 1.361074 , 6.425098 };
603 const int ident_final[k_num_hsps_end] = { 3, 6};
607 setUpHSPList(k_num_hsps_end,
610 subject_offset_final,
613 subject_offset_final,
619 const Int8 kEffSearchSp = 84660;
620 const bool kSmithWaterman =
false;
622 runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
623 init_hsp_list, ending_hsp_list,
627 BOOST_REQUIRE(ending_hsp_list ==
NULL);
632 const int k_num_hsps_start = 6;
633 const int k_num_hsps_end = 5;
635 CSeq_id subj_id(
"gb|AAA22059|");
639 const bool is_protein(
true);
648 const int query_offset[k_num_hsps_start] = { 3, 1, 4, 3, 0, 1 };
649 const int query_end[k_num_hsps_start] = { 236, 232, 236, 235, 226, 233 };
650 const int subject_offset[k_num_hsps_start] =
651 { 1, 1, 6, 6, 12, 22 };
652 const int subject_end[k_num_hsps_start] =
653 { 238, 238, 238, 238, 238, 254 };
654 const int score[k_num_hsps_start] = { 345, 344, 343, 339, 332, 320 };
655 const int query_gapped_start[k_num_hsps_start] =
656 { 32, 194, 9, 8, 104, 9 };
657 const int subject_gapped_start[k_num_hsps_start] =
658 { 30, 200, 11, 11, 116, 30 };
663 setUpHSPList(k_num_hsps_start,
664 query_offset, query_end,
665 subject_offset, subject_end,
667 subject_gapped_start,
670 const int query_offset_final[k_num_hsps_end] = { 4, 3, 3, 0, 0};
671 const int query_end_final[k_num_hsps_end] = { 236, 235, 220, 226, 232};
672 const int subject_offset_final[k_num_hsps_end] = { 6, 6, 1, 12, 6};
673 const int subject_end_final[k_num_hsps_end] = { 238, 238, 218, 238, 238};
674 const int score_final[k_num_hsps_end] = { 73, 72, 69, 68, 66};
675 const double evalue_final[k_num_hsps_end] =
676 { 1.26e-05 , 1.7e-5 , 4.0e-5, 5.1e-5, 0.0000775};
677 const int num_idents_final[k_num_hsps_end] = { 87, 85, 81, 84, 85 };
681 setUpHSPList(k_num_hsps_end,
684 subject_offset_final,
687 subject_offset_final,
693 const Int8 kEffSearchSp = 84660;
694 const bool kSmithWaterman =
false;
696 runRedoAlignmentCoreUnitTest(kProgram, query_seqs[0], *ssl,
697 init_hsp_list, ending_hsp_list,
701 BOOST_REQUIRE(ending_hsp_list ==
NULL);
706 const int k_num_hsps_start = 3;
707 const int k_num_hsps_end = 5;
715 const int query_offset[k_num_hsps_start] = { 28, 46, 463 };
716 const int query_end[k_num_hsps_start] = { 485, 331, 488 };
717 const int subject_offset[k_num_hsps_start] = { 36, 327, 320 };
718 const int subject_end[k_num_hsps_start] = { 512, 604, 345 };
719 const int score[k_num_hsps_start] = { 554, 280, 28 };
720 const int query_gapped_start[k_num_hsps_start] = { 431, 186, 480 };
721 const int subject_gapped_start[k_num_hsps_start] = { 458, 458, 337 };
725 setUpHSPList(k_num_hsps_start,
726 query_offset, query_end,
727 subject_offset, subject_end,
729 subject_gapped_start,
732 const int query_offset_final[k_num_hsps_end] = { 2, 250, 494, 67, 2 };
733 const int query_end_final[k_num_hsps_end] = { 485, 331, 530, 86, 24 };
734 const int subject_offset_final[k_num_hsps_end] = { 9, 523, 261, 585, 570 };
735 const int subject_end_final[k_num_hsps_end] = { 512, 604, 297, 604, 592 };
736 const int score_final[k_num_hsps_end] = { 591, 39, 37, 33, 32 };
737 const double evalue_final[k_num_hsps_end] = { 2.3451e-72, 0.387,
738 0.6692, 1.9988, 2.6256 };
739 const int num_idents_final[k_num_hsps_end] = { 172, 22, 9, 8, 7 };
742 setUpHSPList(k_num_hsps_end,
745 subject_offset_final,
748 subject_offset_final,
753 const Int8 kEffSearchSp = 500000;
754 const bool kSmithWaterman =
true;
756 runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
757 init_hsp_list, ending_hsp_list,
761 BOOST_REQUIRE(ending_hsp_list ==
NULL);
766 const int k_num_hsps_start = 3;
767 const int k_num_hsps_end = 3;
771 const int query_offset[k_num_hsps_start] = { 28, 46, 463};
772 const int query_end[k_num_hsps_start] = { 485, 331, 488};
773 const int subject_offset[k_num_hsps_start] = { 36, 327, 320};
774 const int subject_end[k_num_hsps_start] = { 512, 604, 345};
775 const int score[k_num_hsps_start] = { 554, 280, 28};
776 const int query_gapped_start[k_num_hsps_start] = { 431, 186, 480};
777 const int subject_gapped_start[k_num_hsps_start] = { 458, 458, 337};
781 setUpHSPList(k_num_hsps_start,
782 query_offset, query_end,
783 subject_offset, subject_end,
785 subject_gapped_start,
788 const int query_offset_final[k_num_hsps_end] = { 2, 250, 67 };
789 const int query_end_final[k_num_hsps_end] = { 485, 331, 86};
790 const int subject_offset_final[k_num_hsps_end] = { 9, 523, 585};
791 const int subject_end_final[k_num_hsps_end] = { 512, 604, 604};
792 const int score_final[k_num_hsps_end] = { 510, 34, 31};
793 const double evalue_final[k_num_hsps_end] = {7.0065e-61, 1.349, 3.7944};
794 const int num_idents_final[k_num_hsps_end] = { 171, 22, 8 };
797 setUpHSPList(k_num_hsps_end,
800 subject_offset_final,
803 subject_offset_final,
808 const Int8 kEffSearchSp = 500000;
809 const bool kSmithWaterman =
true;
811 runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
812 init_hsp_list, ending_hsp_list,
817 BOOST_REQUIRE(ending_hsp_list ==
NULL);
822 const int k_num_hsps_start = 6;
823 const int k_num_hsps_end = 8;
827 const int query_offset[k_num_hsps_start] =
828 { 24, 99, 16, 84, 6, 223 };
829 const int query_end[k_num_hsps_start] =
830 { 62, 128, 24, 114, 25, 231 };
831 const int subject_offset[k_num_hsps_start] =
832 { 245, 0, 198, 86, 334, 151 };
833 const int subject_end[k_num_hsps_start] =
834 { 287, 29, 206, 119, 353, 159 };
835 const int score[k_num_hsps_start] =
836 { 37, 26, 25, 25, 24, 24 };
837 const int query_gapped_start[k_num_hsps_start] =
838 { 29, 104, 20, 91, 19, 227 };
839 const int subject_gapped_start[k_num_hsps_start] =
840 { 250, 5, 202, 93, 347, 155 };
846 setUpHSPList(k_num_hsps_start,
847 query_offset, query_end,
848 subject_offset, subject_end,
850 subject_gapped_start,
853 const int query_offset_final[k_num_hsps_end] =
854 { 24, 140, 126, 10, 137, 198, 18, 137 };
855 const int query_end_final[k_num_hsps_end] =
856 { 30, 171, 205, 35, 157, 208, 31, 152 };
857 const int subject_offset_final[k_num_hsps_end] =
858 { 245, 408, 212, 130, 339, 388, 200, 186 };
859 const int subject_end_final[k_num_hsps_end] =
860 { 251, 439, 287, 155, 359, 398, 210, 201 };
861 const int score_final[k_num_hsps_end] =
862 { 29, 28, 28, 28, 25, 24, 24, 22 };
863 const double evalue_final[k_num_hsps_end] =
864 { 1.361074, 1.837947, 2.118044, 2.153685, 4.198304, 5.529096,
865 6.425098, 8.532644 };
866 const int ident_final[k_num_hsps_end] =
867 { 3, 8, 23, 10, 6, 5, 6, 5};
870 setUpHSPList(k_num_hsps_end,
873 subject_offset_final,
876 subject_offset_final,
882 const Int8 kEffSearchSp = 84660;
883 const bool kSmithWaterman =
true;
885 runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
886 init_hsp_list, ending_hsp_list,
891 BOOST_REQUIRE(ending_hsp_list ==
NULL);
905 const int k_num_hsps_start = 4;
906 const int k_num_hsps_end = 4;
907 CSeq_id query_id(
"gi|48100936");
910 const int query_offset[k_num_hsps_start] = { 995, 1004, 995, 973};
911 const int query_end[k_num_hsps_start] = { 1314, 1314, 1403, 1316};
912 const int subject_offset[k_num_hsps_start] = { 61, 36, 61, 106};
913 const int subject_end[k_num_hsps_start] = { 384, 384, 455, 420};
914 const int score[k_num_hsps_start] = { 341, 327, 314, 301};
915 const int query_gapped_start[k_num_hsps_start] = { 1233, 1017, 1310,
917 const int subject_gapped_start[k_num_hsps_start] = { 303, 49, 347, 331};
921 setUpHSPList(k_num_hsps_start,
922 query_offset, query_end,
923 subject_offset, subject_end,
925 subject_gapped_start,
927 const int query_offset_final[k_num_hsps_end] =
928 { 995, 1261, 1025, 1210};
929 const int query_end_final[k_num_hsps_end] =
930 { 1314, 1341, 1125, 1243};
931 const int subject_offset_final[k_num_hsps_end] =
933 const int subject_end_final[k_num_hsps_end] =
934 { 384, 115, 482, 50};
935 const int score_final[k_num_hsps_end] =
937 const double evalue_final[k_num_hsps_end] =
938 { 2.712e-34, 3.6003e-05, 0.00048334, 0.00441};
939 const int num_idents_final[k_num_hsps_end] = { 108, 31, 30, 12 };
942 setUpHSPList(k_num_hsps_end,
945 subject_offset_final,
948 subject_offset_final,
953 const Int8 kEffSearchSp = 1000*1000;
954 const bool kSmithWaterman =
true;
956 const int kHitListSize = 1;
957 const double kEvalueThreshold = 0.005;
959 runRedoAlignmentCoreUnitTest(kProgram, query_id, subj_id,
960 init_hsp_list, ending_hsp_list,
962 kSmithWaterman, kEvalueThreshold,
966 BOOST_REQUIRE(ending_hsp_list ==
NULL);
Declares the CBl2Seq (BLAST 2 Sequences) class.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Structures and API used for saving BLAST hits.
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
BlastHSPWriter * BlastHSPWriterNew(BlastHSPWriterInfo **writer_info, BlastQueryInfo *query_info, BLAST_SequenceBlk *query)
A generic function to create writer.
Declaration of ADT to save and retrieve lists of HSPs in the BLAST engine.
int BlastHSPStreamWrite(BlastHSPStream *hsp_stream, BlastHSPList **hsp_list)
Invokes the user-specified write function for this BlastHSPStream implementation.
BlastHSPStream * BlastHSPStreamFree(BlastHSPStream *hsp_stream)
Frees the BlastHSPStream structure by invoking the destructor function set by the user-defined constr...
BlastHSPStream * BlastHSPStreamNew(EBlastProgramType program, const BlastExtensionOptions *extn_opts, Boolean sort_on_read, Int4 num_queries, BlastHSPWriter *writer)
Initialize the HSP stream.
Header file for composition-based statistics.
Int2 Blast_RedoAlignmentCore(EBlastProgramType program_number, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *query_info, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results)
Top level routine to recompute alignments for each match found by the gapped BLAST algorithm (single-...
Definitions which are dependant on the NCBI C++ Object Manager.
PSIBlastOptions * PSIBlastOptionsFree(PSIBlastOptions *psi_options)
Deallocate PSI BLAST options.
#define BLAST_HITLIST_SIZE
Number of database sequences to save hits for.
Int2 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions **options)
Allocate memory for BlastEffectiveLengthsOptions* and fill with default values.
Int2 BLAST_FillEffectiveLengthsOptions(BlastEffectiveLengthsOptions *options, Int4 dbseq_num, Int8 db_length, Int8 *searchsp_eff, Int4 num_searchsp)
Fill the non-default values in the BlastEffectiveLengthsOptions structure.
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
BlastEffectiveLengthsOptions * BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions *options)
Deallocate memory for BlastEffectiveLengthsOptions*.
BlastExtensionOptions * BlastExtensionOptionsFree(BlastExtensionOptions *options)
Deallocate memory for BlastExtensionOptions.
#define BLAST_EXPECT_VALUE
Default parameters for saving hits.
Int2 BlastHitSavingOptionsNew(EBlastProgramType program, BlastHitSavingOptions **options, Boolean gapped_calculation)
Allocate memory for BlastHitSavingOptions.
@ eSmithWatermanTbck
Smith-waterman finds optimal scores, then ALIGN_EX to find alignment.
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
Int2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions **options, Boolean gapped)
Allocate memory for BlastExtensionOptions and fill with default values.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
BlastHitSavingParameters * BlastHitSavingParametersFree(BlastHitSavingParameters *parameters)
Deallocate memory for BlastHitSavingOptions*.
BlastEffectiveLengthsParameters * BlastEffectiveLengthsParametersFree(BlastEffectiveLengthsParameters *parameters)
Deallocate memory for BlastEffectiveLengthsParameters*.
Int2 BlastExtensionParametersNew(EBlastProgramType blast_program, const BlastExtensionOptions *options, BlastScoreBlk *sbp, BlastQueryInfo *query_info, BlastExtensionParameters **parameters)
Calculate the raw values for the X-dropoff parameters.
BlastExtensionParameters * BlastExtensionParametersFree(BlastExtensionParameters *parameters)
Deallocate memory for BlastExtensionParameters.
Int2 BlastScoringParametersNew(const BlastScoringOptions *options, BlastScoreBlk *sbp, BlastScoringParameters **parameters)
Calculate scaled cutoff scores and gap penalties.
Int2 BlastHitSavingParametersNew(EBlastProgramType program_number, const BlastHitSavingOptions *options, const BlastScoreBlk *sbp, const BlastQueryInfo *query_info, Int4 avg_subject_length, Int4 compositionBasedStats, BlastHitSavingParameters **parameters)
Allocate memory and initialize the BlastHitSavingParameters structure.
BlastScoringParameters * BlastScoringParametersFree(BlastScoringParameters *parameters)
Deallocate memory for BlastScoringParameters.
Int2 BlastEffectiveLengthsParametersNew(const BlastEffectiveLengthsOptions *options, Int8 db_length, Int4 num_seqs, BlastEffectiveLengthsParameters **parameters)
Allocate memory for BlastEffectiveLengthsParameters.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Declares the CBlastProteinOptionsHandle class.
void ** _PSIAllocateMatrix(unsigned int ncols, unsigned int nrows, unsigned int data_type_sz)
Generic 2 dimensional matrix allocator.
void ** _PSIDeallocateMatrix(void **matrix, unsigned int ncols)
Generic 2 dimensional matrix deallocator.
void _PSICopyMatrix_int(int **dest, int **src, unsigned int ncols, unsigned int nrows)
Copies src matrix into dest matrix, both of which must be int matrices with dimensions ncols by nrows...
Private interface for Position Iterated BLAST API, contains the PSSM generation engine.
Utilities initialize/setup BLAST.
Int2 BlastSetup_ScoreBlkInit(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const BlastScoringOptions *scoring_options, EBlastProgramType program_number, BlastScoreBlk **sbpp, double scale_factor, Blast_Message **blast_message, GET_MATRIX_PATH get_path)
Initializes the score block structure.
Int2 BLAST_CalcEffLengths(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, const BlastEffectiveLengthsParameters *eff_len_params, const BlastScoreBlk *sbp, BlastQueryInfo *query_info, Blast_Message **blast_message)
Function to calculate effective query length and db length as well as effective search space.
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
SPsiBlastScoreMatrix * SPsiBlastScoreMatrixNew(size_t ncols)
Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Int2 BlastNumber2Program(EBlastProgramType number, char **program)
Return string name for program given a number.
Wrapper class for BLAST_SequenceBlk .
Runs the BLAST algorithm between 2 sequences.
Wrapper class for BlastExtensionOptions .
Wrapper class for BlastHitSavingOptions .
Encapsulates ALL the BLAST algorithm's options.
Wrapper class for BlastQueryInfo .
Class whose purpose is to create CScope objects which have data loaders added with different prioriti...
CRef< objects::CScope > NewScope()
Create a new, properly configured CScope.
Wrapper class for BlastScoringOptions .
Wrapper class for BlastSeqSrc .
static CTestObjMgr & Instance()
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
Constants used in compositional score matrix adjustment.
ECompoAdjustModes
An collection of constants that specify all permissible modes of composition adjustment.
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
@ eNoCompositionBasedStats
Don't use composition based statistics.
@ eCompositionMatrixAdjust
Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence pro...
const CBlastOptionsHandle & GetOptionsHandle() const
Retrieve the options handle.
#define BLASTAA_SIZE
Size of aminoacid alphabet.
void SetupQueries(TSeqLocVector &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
Populates BLAST_SequenceBlk with sequence data for use in CORE BLAST.
objects::ENa_strand GetStrandOption() const
BlastSeqSrc * MultiSeqBlastSeqSrcInit(TSeqLocVector &seq_vector, EBlastProgramType program, bool dbscan_mode=false)
Initialize the sequence source structure.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
const TSeqLocVector & GetQueries() const
Retrieve a vector of query sequences.
void SetupQueryInfo(TSeqLocVector &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
Allocates the query information structure and fills the context offsets, in case of multiple queries,...
EProgram ProgramNameToEnum(const std::string &program_name)
Map a string into an element of the ncbi::blast::EProgram enumeration (except eBlastProgramMax).
const TSeqLocVector & GetSubjects() const
Retrieve a vector of subject sequences.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
int16_t Int2
2-byte (16-bit) signed integer
int64_t Int8
8-byte (64-bit) signed integer
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
ENa_strand
strand of nucleic acid
Implementation of a number of BlastHSPWriters to save hits from a BLAST search, and subsequently retu...
BlastHSPCollectorParams * BlastHSPCollectorParamsNew(const BlastHitSavingOptions *hit_options, Int4 compositionBasedStats, Boolean gapped_calculation)
Sets up parameter set for use by collector.
BlastHSPWriterInfo * BlastHSPCollectorInfoNew(BlastHSPCollectorParams *params)
WriterInfo to create a default writer: the collecter.
Definitions used to get joint probabilities for a scoring matrix.
int Blast_FrequencyDataIsAvailable(const char *matrix_name)
Retrieve the background letter probabilities implicitly used in constructing the score matrix matrix_...
void CheckForBlastSeqSrcErrors(const BlastSeqSrc *seqsrc)
Magic spell ;-) needed for some weird compilers... very empiric.
Defines: CTimeFormat - storage class for time format.
std::istream & in(std::istream &in_, double &x_)
BOOST_AUTO_TEST_CASE(testRedoAlignmentWithCompBasedStats)
Implementation of the BlastSeqSrc interface for a vector of sequence locations.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Int8 eff_searchsp
Effective search space for this context.
Options for setting up effective lengths and search spaces.
Parameters for setting up effective lengths and search spaces.
Options used for gapped extension These include: a.
EBlastTbackExt eTbackExt
type of traceback extension.
Int4 compositionBasedStats
mode of compositional adjustment to use; if zero then compositional adjustment is not used
Computed values used as parameters for gapped alignments.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
The structure to contain all BLAST results, for multiple queries.
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Int4 num_queries
Number of query sequences.
Default implementation of BlastHSPStream.
A wrap of data structure used to create a writer.
ADT definition of BlastHSPWriter.
Structure holding all information about an HSP.
double evalue
This HSP's e-value.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastSeg query
Query sequence info.
double bit_score
Bit score, calculated from score.
BlastSeg subject
Subject sequence info.
Int4 score
This HSP's raw score.
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Int4 hsplist_count
Filled size of the HSP lists array.
double expect_value
The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.
Int4 hitlist_size
Maximal number of database sequences to return results for.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
Structure used for scoring calculations.
Blast_KarlinBlk ** kbp
Karlin-Altschul parameters.
Blast_KarlinBlk ** kbp_psi
K-A parameters for position-based alignments.
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
SPsiBlastScoreMatrix * psi_matrix
PSSM and associated data.
Blast_KarlinBlk ** kbp_gap_psi
K-A parameters for psi alignments.
Boolean gapped_calculation
gap-free search if FALSE
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
Structure to hold the a message from the core of the BLAST engine.
static int ** loadPssmFromFile(const string &filename, unsigned int query_length)
CRedoAlignmentTestFixture()
static BlastHSPList * setUpHSPList(int num_hsps, const int query_offset[], const int query_end[], const int subject_offset[], const int subject_end[], const int query_gapped_start[], const int subject_gapped_start[], const int score[], const double evalue[]=NULL, const int num_ident[]=NULL)
~CRedoAlignmentTestFixture()
static void runRedoAlignmentCoreUnitTest(EBlastProgramType program, CSeq_id &qid, CSeq_id &sid, BlastHSPList *init_hsp_list, const BlastHSPList *ending_hsp_list, Int8 effective_searchsp, ECompoAdjustModes compositonBasedStatsMode, bool doSmithWaterman, double evalue_threshold=BLAST_EXPECT_VALUE, int hit_list_size=BLAST_HITLIST_SIZE)
static void setupPositionBasedBlastScoreBlk(BlastScoreBlk *sbp, unsigned int qlen)
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
int ** data
actual scoring matrix data, stored in row-major form
SBlastScoreMatrix * pssm
position-specific score matrix
Structure to represent a single sequence to be fed to BLAST.
CConstRef< objects::CSeq_loc > seqloc
Seq-loc describing the sequence to use as query/subject to BLAST The types of Seq-loc currently suppo...
CRef< objects::CScope > scope
Scope where the sequence referenced can be found by the toolkit's object manager.
Utility stuff for more convenient using of Boost.Test library.