47 #include "../core/jumper.h"
60 #ifndef SMALLEST_EVALUE
62 #define SMALLEST_EVALUE 1.0e-180
95 retval = -(pos + pos2advance - 1);
116 ENa_strand strand,
bool translate,
int length,
int original_length,
124 retval = original_length -
170 unsigned int first,
unsigned int nsegs,
174 Int4 query_length,
Int4 subject_length,
175 bool translate1,
bool translate2)
183 int length1 = query_length;
184 int length2 = subject_length;
186 lengths.reserve(nsegs);
198 for (
unsigned int esp_index =
first; esp_index< (
unsigned int)esp->
size && esp_index < (
unsigned int)(
first+nsegs); esp_index++) {
199 switch (esp->
op_type[esp_index]) {
210 strands.push_back(m_strand);
211 strands.push_back(s_strand);
212 starts.push_back(m_start);
213 starts.push_back(s_start);
224 strands.push_back(m_strand);
226 starts.push_back(m_start);
227 starts.push_back(s_start);
239 strands.push_back(s_strand);
240 starts.push_back(m_start);
241 starts.push_back(s_start);
248 lengths.push_back(esp->
num[esp_index]);
252 if (lengths.size() != nsegs)
253 lengths.resize(nsegs);
255 if (starts.size() != nsegs*2)
256 starts.resize(nsegs*2);
258 if (strands.size() != nsegs*2)
259 strands.resize(nsegs*2);
287 ids.push_back(master);
288 ids.push_back(slave);
290 dense_seg.
SetNumseg((
int) lengths.size());
291 dense_seg.
SetLens().swap(lengths);
311 bool translate_master,
312 bool translate_slave)
318 int nsegs = (
int) lengths.size();
322 for (
int i = 0;
i < nsegs;
i++) {
331 master_loc->
SetInt().SetId(*master);
332 master_loc->
SetInt().SetFrom(m_start);
333 if (translate_master)
336 m_stop = m_start + lengths[
i] - 1;
337 master_loc->
SetInt().SetTo(m_stop);
338 master_loc->
SetInt().SetStrand(strands[2*
i]);
344 if ( (s_start = starts[2*
i+1]) !=
GAP_VALUE) {
345 slave_loc->
SetInt().SetId(*slave);
346 slave_loc->
SetInt().SetFrom(s_start);
350 s_stop = s_start + lengths[
i] - 1;
351 slave_loc->
SetInt().SetTo(s_stop);
352 slave_loc->
SetInt().SetStrand(strands[2*
i+1]);
359 std_seg->SetIds().push_back(master);
360 std_seg->SetIds().push_back(slave);
361 std_seg->SetLoc().push_back(master_loc);
362 std_seg->SetLoc().push_back(slave_loc);
364 retval.push_back(std_seg);
378 for (
int index=0; index<esp->
size; index++)
386 int temp_num = esp->
num[index];
389 esp->
num[index] = esp->
num[index-1];
391 esp->
num[index-1] = temp_num;
392 esp->
op_type[index-1] = temp_op;
408 int product_length = product_end - product_start + 1;
409 int genomic_length = genomic_end - genomic_start + 1;
412 for (
const auto& it : exon.
GetParts()) {
413 switch (it->Which()) {
420 p += it->GetMismatch();
421 g += it->GetMismatch();
425 p += it->GetProduct_ins();
429 g += it->GetGenomic_ins();
433 cerr <<
"Urecognized exon part\t" << product_id.
AsFastaString()
438 if (p != product_length) {
441 << product_length <<
"\t" << p << endl;
444 if (
g != genomic_length) {
447 << genomic_length <<
"\t" <<
g << endl;
475 const Uint1 kGap = 15;
483 while (last_h->
next &&
486 last_h = last_h->
next;
496 exon->SetProduct_start().SetNucpos(hsp->
query.
offset);
497 exon->SetProduct_end().SetNucpos(last_hsp->
query.
end - 1);
499 exon->SetGenomic_end(last_hsp->
subject.
end - 1);
501 exon->SetProduct_strand(product_strand);
502 exon->SetGenomic_strand(genomic_strand);
512 exon->SetAcceptor_before_exon().SetBases(l_bases);
522 exon->SetDonor_after_exon().SetBases(r_bases);
527 prev = hh, hh = hh->next) {
529 int query_pos = hh->hsp->query.offset;
530 int subject_pos = hh->hsp->subject.offset;
536 _ASSERT(hh->hsp->query.offset >=
prev->hsp->query.end);
537 _ASSERT(hh->hsp->subject.offset >=
prev->hsp->subject.end);
538 if (hh->hsp->query.offset >
prev->hsp->query.end) {
540 chunk->SetProduct_ins(hh->hsp->query.offset -
541 prev->hsp->query.end);
543 exon->SetParts().push_back(chunk);
546 if (hh->hsp->subject.offset >
prev->hsp->subject.end) {
548 chunk->SetGenomic_ins(hh->hsp->subject.offset -
549 prev->hsp->subject.end);
551 exon->SetParts().push_back(chunk);
558 query_pos += num_matches;
559 subject_pos += num_matches;
561 if (num_matches > 0) {
564 chunk->SetMatch(num_matches);
565 exon->SetParts().push_back(chunk);
574 chunk->SetGenomic_ins(1);
578 chunk->SetProduct_ins(1);
582 chunk->SetMismatch(1);
587 exon->SetParts().push_back(chunk);
590 num_matches =
MAX(hh->hsp->query.end - query_pos, 0);
591 _ASSERT(hh->hsp->query.end - query_pos >= -1);
595 if (num_matches > 0) {
597 chunk->SetMatch(num_matches);
598 exon->SetParts().push_back(chunk);
607 exons.push_back(exon);
632 bool translate_master,
bool translate_slave)
638 if (translate_master || translate_slave) {
641 translate_master, translate_slave);
662 Int4 query_length,
Int4 subject_length)
669 bool translate1, translate2;
670 bool is_disc_align =
false;
672 if (hsp->
score == 0) {
677 for (
int i=0;
i<
t->size;
i++) {
680 is_disc_align =
true;
706 for (
int index=0; index< esp->
size; index++)
711 for (index2=
first; index2<esp->
size; index2++, nsegs++){
730 strands, query_length, subject_length,
731 translate1, translate2);
735 translate1, translate2);
739 seqalign->
SetSegs().SetDisc().Set().push_back(sa_tmp);
748 strands, query_length, subject_length,
749 translate1, translate2);
771 Int4 query_length,
Int4 subject_length)
780 Int4 original_length1, original_length2;
788 Int4 from1, from2, to1, to2;
796 original_length1 = subject_length;
797 original_length2 = query_length;
798 id1.
Reset(subject_id);
805 original_length1 = query_length;
806 original_length2 = subject_length;
808 id2.
Reset(subject_id);
822 for (
int index=0; index<esp->
size; index++)
833 slp1->
SetInt().SetTo(
MIN(start1,original_length1) - 1);
834 slp1->
SetInt().SetId(*id1);
835 slp1->
SetInt().SetStrand(strand1);
853 to1 =
MIN(start1,original_length1) - 1;
854 slp1->
SetInt().SetTo(to1);
855 slp1->
SetInt().SetId(*id1);
856 slp1->
SetInt().SetStrand(strand1);
860 to2 =
MIN(start2,original_length2) - 1;
861 slp2->
SetInt().SetFrom(from2);
862 slp2->
SetInt().SetTo(to2);
863 if (start2 > original_length2)
864 slp1->
SetInt().SetTo(to1 - 1);
868 slp2->
SetInt().SetTo(original_length2 - from2 - 1);
869 slp2->
SetInt().SetFrom(original_length2 - to2 - 1);
872 slp2->
SetInt().SetId(*id2);
873 slp2->
SetInt().SetStrand(strand2);
883 seg->SetLoc().push_back(slp2);
884 seg->SetLoc().push_back(slp1);
888 seg->SetLoc().push_back(slp1);
889 seg->SetLoc().push_back(slp2);
893 ids.resize(seg->GetDim());
895 seqalign->
SetSegs().SetStd().push_back(seg);
905 to2 =
MIN(start2,original_length2) - 1;
906 slp2->
SetInt().SetFrom(from2);
907 slp2->
SetInt().SetTo(to2);
911 slp2->
SetInt().SetTo(original_length2 - from2 - 1);
912 slp2->
SetInt().SetFrom(original_length2 - to2 - 1);
914 slp2->
SetInt().SetId(*id2);
915 slp2->
SetInt().SetStrand(strand2);
928 to1 =
MIN(start1, original_length1) - 1;
936 while (to2 >= original_length2) {
944 to2 = original_length2 - from2 - 1;
945 from2 = original_length2 - tmp_int - 1;
948 slp1->
SetInt().SetFrom(from1);
949 slp1->
SetInt().SetTo(to1);
950 slp1->
SetInt().SetId(*id1);
951 slp1->
SetInt().SetStrand(strand1);
952 slp2->
SetInt().SetFrom(from2);
953 slp2->
SetInt().SetTo(to2);
954 slp2->
SetInt().SetId(*id2);
955 slp2->
SetInt().SetStrand(strand2);
970 to1 =
MIN(start1,original_length1) - 1;
975 if (to2 >= original_length2) {
976 to2 = original_length2 -1;
983 to2 = original_length2 - from2 - 1;
984 from2 = original_length2 - tmp_int - 1;
987 slp1->
SetInt().SetFrom(from1);
988 slp1->
SetInt().SetTo(to1);
989 slp1->
SetInt().SetId(*id1);
990 slp1->
SetInt().SetStrand(strand1);
991 slp2->
SetInt().SetFrom(from2);
992 slp2->
SetInt().SetTo(to2);
993 slp2->
SetInt().SetId(*id2);
994 slp2->
SetInt().SetStrand(strand2);
1011 seq_int2_last->
SetTo(start2 - 1);
1014 seq_int2_last->
SetFrom(original_length2 - start2);
1019 if(seq_int2_last->
GetFrom() > seq_int2_last->
GetTo()) {
1022 seq_int2_last->
SetTo(seq_int2_last->
GetTo() + 3);
1028 seq_int1_last->
GetTo() != 0)
1029 seq_int1_last->
SetTo(seq_int1_last->
GetTo() + 1);
1038 to2 =
MIN(start2,original_length2) - 1;
1044 to2 = original_length2 - from2 - 1;
1045 from2 = original_length2 - tmp_int - 1;
1047 slp2->
SetInt().SetFrom(from2);
1048 slp2->
SetInt().SetTo(to2);
1050 slp2->
SetInt().SetId(*id2);
1073 seg->SetLoc().push_back(slp2);
1074 seg->SetLoc().push_back(slp1);
1078 seg->SetLoc().push_back(slp1);
1079 seg->SetLoc().push_back(slp2);
1083 ids.resize(seg->GetDim());
1085 seqalign->
SetSegs().SetStd().push_back(seg);
1102 retval->SetId().SetStr(ident_string);
1105 retval->SetValue().SetInt(
i);
1107 retval->SetValue().SetReal(d);
1132 if (evalue >= 0.0) {
1152 if ( !seqid_list.empty() ) {
1153 retval += seqid_list.size();
1165 const vector<string> & seqid_list,
1174 static const string kScore(
"score");
1176 static const string kBlastScore(
"blast_score");
1181 static const string kSumN(
"sum_n");
1187 if (evalue >= 0.0) {
1188 string score_type = (hsp->
num <= 1) ?
"e_value" :
"sum_e";
1189 scores.push_back(
s_MakeScore(score_type, evalue, 0,
false));
1195 static const string kBitScore(
"bit_score");
1201 static const string kNumIdent(
"num_ident");
1206 static const string kCompAdj(
"comp_adjustment_method");
1211 if ( !seqid_list.empty() ) {
1212 ITERATE(vector<string>, sid, seqid_list) {
1213 scores.push_back(
s_MakeScore(*sid, 0.0, 0,
true));
1218 static const string kNumPositives(
"num_positives");
1222 if(query_length > 0) {
1223 static const string kQueryCovHsp(
"hsp_percent_coverage");
1225 scores.push_back(
s_MakeScore(kQueryCovHsp, hsp_coverage, 0,
false));
1235 const vector<string> & seqid_list)
1237 if (seqid_list.empty())
1241 userObject->
SetType().SetStr(
"use_this_seqid");
1242 userObject->
AddField(
"SEQIDS", seqid_list);
1243 seqalign->
SetExt().push_back(userObject);
1255 const vector<string> & seqid_list,
1276 Int4 query_length,
Int4 subject_length,
1277 const vector<string> & seqid_list)
1286 ids.push_back(query_id);
1287 ids.push_back(subject_id);
1300 starts.push_back(query_length - hsp->
query.
end);
1305 starts.push_back(subject_length - hsp->
subject.
end);
1326 Int4 query_length,
Int4 subject_length,
1327 const vector<string> & seqid_list)
1336 query_loc->
SetInt().SetId(*query_id);
1337 subject_loc->
SetInt().SetId(*subject_id);
1342 ids.push_back(query_id);
1343 ids.push_back(subject_id);
1357 query_loc->
SetInt().SetFrom(query_length -
1373 subject_loc->
SetInt().SetFrom(subject_length -
1375 subject_loc->
SetInt().SetTo(subject_length -
1379 retval->SetLoc().push_back(query_loc);
1380 retval->SetLoc().push_back(subject_loc);
1402 Int4 subject_length,
1403 const vector<string> & seqid_list,
1416 vector<string> emptyList;
1423 for (index=0; index<hsp_list->
hspcnt; index++) {
1425 seqalign->
SetSegs().SetDendiag().push_back(
1434 for (index=0; index<hsp_list->
hspcnt; index++) {
1436 seqalign->
SetSegs().SetStd().push_back(
1446 sa_vector.push_back(seqalign);
1463 Int4 query_length,
Int4 subject_length,
bool is_ooframe,
1464 const vector<string> & seqid_list,
1473 sa_vector.reserve(hsp_list->
hspcnt);
1474 vector<string> emptyList;
1476 for (
int index = 0; index < hsp_list->
hspcnt; index++) {
1483 query_length, subject_length);
1487 query_length, subject_length);
1490 if (seqalign.
Empty())
continue;
1501 sa_vector.push_back(seqalign);
1510 retval->
Set().clear();
1518 const int query_row = 0;
1522 if (
query.IsInt()) {
1523 q_shift =
query.GetInt().GetFrom();
1539 const int kSubjDimension = 1;
1564 for (
int index = 0; index < hit_list->
hsplist_count; index++) {
1578 if(subject_id.
Empty()){
1584 vector <TSeqRange> ranges;
1585 for (
int i=0;
i<hsp_list->
hspcnt;
i++) {
1590 ranges.push_back(rg);
1595 if (!ranges.empty() && seqinfo_src->
GetMasks(kOid, ranges, masks)) {
1596 subj_masks.push_back(masks);
1600 vector<string> seqid_list;
1603 vector<CRef<CSeq_align > > hit_align;
1632 seq_aligns->
Set().push_back(*iter);
1639 seq_aligns->
Set().push_back(*iter);
1652 vector<TSeqLocInfoVector>& subj_masks)
1666 for (
int pattern_index = 0; pattern_index <
pattern_info->num_patterns;
1670 if (one_phi_results) {
1681 *
query.GetSeq_loc(0),
1686 subj_masks[pattern_index]));
1688 retval.push_back(seq_aligns);
1696 *
query.GetSeq_loc(0),
1701 subj_masks[pattern_index]));
1702 retval.push_back(seq_aligns);
1731 for (
int index = 0; index < hsp_list->
hspcnt; index++) {
1759 vector<TSeqLocInfoVector>& subj_masks)
1773 vector<CRef<CSeq_align > > hit_align;
1774 retval.reserve(
results->num_queries);
1777 for (
int qindex = 0; qindex <
results->num_queries; qindex++) {
1785 for (sindex = 0; sindex < hit_list->
hsplist_count; ++sindex) {
1787 if (hsp_list->
oid ==
static_cast<Int4>(subj_idx))
1807 vector<string> seqid_list;
1812 vector <TSeqRange> ranges;
1813 for (
int i=0;
i<hsp_list->
hspcnt;
i++) {
1818 ranges.push_back(rg);
1823 if (!ranges.empty() &&
1824 seqinfo_src.
GetMasks(subj_idx, ranges, masks)) {
1825 subj_masks[qindex].push_back(masks);
1855 seq_aligns->
Set().push_back(*iter);
1860 retval.push_back(seq_aligns);
1876 const size_t num_queries,
1877 const size_t num_subjects)
1880 result_alnvec.reserve(alnvec.size());
1882 for (
size_t iQuery = 0; iQuery < num_queries; iQuery++)
1884 for (
size_t iSubject = 0; iSubject < num_subjects; iSubject++)
1886 size_t iLinearIndex = iSubject * num_queries + iQuery;
1888 result_alnvec.push_back(aln_set);
1892 _ASSERT(result_alnvec.size() == alnvec.size());
1893 return result_alnvec;
1903 vector<TSeqLocInfoVector>& subj_masks)
1906 size_t seqinfo_size = seqinfo_src->
Size();
1909 int num_of_queries =
results->num_queries;
1914 subj_masks.resize(num_of_queries *seqinfo_size);
1916 for (
Uint4 index = 0; index < seqinfo_size; index++) {
1917 vector<TSeqLocInfoVector> tmp_subj_masks(num_of_queries);
1920 *seqinfo_src,
prog, index,
1921 is_gapped, is_ooframe,
1929 for (TSeqAlignVector::size_type
i = 0;
i < seqalign.size(); ++
i) {
1930 retval.push_back(seqalign[
i]);
1932 subj_masks[ seqinfo_size *
i + index] = tmp_subj_masks[
i];
1948 vector<TSeqLocInfoVector>& subj_masks)
1956 subj_masks.resize(
results->num_queries);
1957 retval.reserve(
results->num_queries);
1960 for (
int index = 0; index <
results->num_queries; index++) {
1966 *
query.GetSeq_loc(index),
1971 subj_masks[index]));
1973 retval.push_back(seq_aligns);
1974 _TRACE(
"Query " << index <<
": " << seq_aligns->
Get().size()
1989 vector<TSeqLocInfoVector>& subj_masks,
2015 local_data, &seqinfo_src,
2022 &seqinfo_src, gapped,
2023 oof_mode, subj_masks);
2042 Int4 query_length,
Int4 subject_length,
2043 const vector<string> & seqid_list)
2052 query_loc->
SetInt().SetId(*query_id);
2053 subject_loc->
SetInt().SetId(*subject_id);
2058 ids.push_back(query_id);
2059 ids.push_back(subject_id);
2080 retval->SetLoc().push_back(query_loc);
2081 retval->SetLoc().push_back(subject_loc);
2106 const vector<string> & ) =
NULL;
2125 vector<string> seqid_list;
2129 for (
int j = 0; j < hsp_list->
hspcnt; j++)
2136 seg_list.push_back((*fun_ptr) (hsp, query_id, subject_id,
2137 query_length, subject_length, seqid_list));
CRef< CSeq_align > RemapAlignToLoc(const CSeq_align &align, CSeq_align::TDim row, const CSeq_loc &loc)
Remap seq-align row to the seq-loc.
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
#define CODON_LENGTH
Codons are always of length 3.
void Blast_HSPListSortByEvalue(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by e-value, with scores and other criteria used to resolve ties.
BlastHSPResults ** PHIBlast_HSPResultsSplit(const BlastHSPResults *results, const SPHIQueryInfo *pattern_info)
Splits the BlastHSPResults structure for a PHI BLAST search into an array of BlastHSPResults structur...
double Blast_HSPGetQueryCoverage(const BlastHSP *hsp, Int4 query_length)
Calculate query coverage percentage of an hsp.
Boolean Blast_ProgramIsPhiBlast(EBlastProgramType p)
Returns true if program is PHI-BLAST (i.e.
#define TRANSLATED_SUBJECT_MASK
This bit is on if the subject is translated.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
#define TRANSLATED_QUERY_MASK
This bit is on if the query is translated.
Utility function to convert internal BLAST result structures into objects::CSeq_align_set objects.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
EResultType
Specifies the style of Seq-aligns that should be built from the internal BLAST data structures.
@ eSequenceComparison
Seq-aligns in the BLAST 2 Sequence style (one alignment per query-subject pair)
Wrapper class for BlastHSPResults .
void OffsetRow(TDim row, TSignedSeqPos offset)
Offset row's coords.
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
void Validate(bool full_test=false) const
Validators.
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
Abstract base class to encapsulate retrieval of sequence identifiers.
Provides access (not ownership) to the C structures used to configure local BLAST search class implem...
Collection of masked regions for a single query sequence.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
EGapAlignOpType
Operation types within the edit script.
@ eGapAlignDel2
Frame shift deletion of two nucleotides.
@ eGapAlignIns2
Frame shift insertion of two nucleotides.
@ eGapAlignIns1
Frame shift insertion of one nucleotide.
@ eGapAlignIns
Insertion: a gap in subject.
@ eGapAlignDel1
Frame shift deletion of one nucleotide.
@ eGapAlignDecline
Non-aligned region.
@ eGapAlignSub
Substitution.
@ eGapAlignDel
Deletion: a gap in query.
virtual CConstRef< objects::CSeq_loc > GetSeq_loc(size_t index)=0
Get the Seq_loc for the sequence indicated by index.
void BLASTPrelminSearchHitListToStdSeg(EBlastProgramType program, BlastHitList *hit_list, const CSeq_loc &query_loc, TSeqPos query_length, const IBlastSeqInfoSrc *subject_seqinfo, list< CRef< CStd_seg > > &seg_list)
CRef< CDense_diag > x_UngappedHSPToDenseDiag(BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< string > &seqid_list)
Creates a Dense-diag object from HSP information and sequence identifiers for a non-translated ungapp...
static CSeq_align::C_Segs::TStd s_CreateStdSegs(CRef< CSeq_id > master, CRef< CSeq_id > slave, CDense_seg::TStarts &starts, CDense_seg::TLens &lengths, CDense_seg::TStrands &strands, bool translate_master, bool translate_slave)
Creates a Std-seg object from the starts, lengths and strands vectors and two Seq-ids for a translate...
virtual CConstRef< objects::CSeq_loc > GetSeqLoc(Uint4 index) const =0
Method to retrieve the sequence location given its ordinal number.
static int s_GetCurrPos(int &pos, int pos2advance)
Advances position in a sequence, according to an edit script instruction.
static const TSeqPos kBlastAlignmentDim
BLAST alignments have always 2 dimensions (i.e.
static void s_CollectSeqAlignData(const BlastHSP *hsp, const GapEditScript *esp, unsigned int first, unsigned int nsegs, CDense_seg::TStarts &starts, CDense_seg::TLens &lengths, CDense_seg::TStrands &strands, Int4 query_length, Int4 subject_length, bool translate1, bool translate2)
Fills vectors of start positions, lengths and strands for all alignment segments.
static void s_CorrectUASequence(BlastHSP *hsp)
Checks if any decline-to-align segments immediately follow an insertion or deletion,...
#define SMALLEST_EVALUE
Threshold below which e-values are saved as 0.
static CRef< CSeq_align > s_BlastHSP2SeqAlign(EBlastProgramType program, BlastHSP *hsp, CRef< CSeq_id > id1, CRef< CSeq_id > id2, Int4 query_length, Int4 subject_length)
Converts a traceback editing block to a Seq-align, provided the 2 sequence identifiers.
static void s_CreateDenseg(CDense_seg &dense_seg, CRef< CSeq_id > master, CRef< CSeq_id > slave, CDense_seg::TStarts &starts, CDense_seg::TLens &lengths, CDense_seg::TStrands &strands)
Creates a Dense-seg object from the starts, lengths and strands vectors and two Seq-ids.
const char BLASTNA_TO_IUPACNA[]
Translates between blastna and iupacna.
void GetFilteredRedundantSeqids(const IBlastSeqInfoSrc &sisrc, int oid, vector< string > &seqids, bool use_gis=true)
Get Seqids for a sequence in a redundant database.
static size_t s_CalculateScoreVectorSize(const BlastHSP *hsp, const vector< string > &seqid_list)
Computes the exact size of a CSeq_align::TScore for a given HSP.
static TSeqPos s_GetAlignmentStart(int &curr_pos, int num, ENa_strand strand, bool translate, int length, int original_length, short frame)
Finds the starting position of a sequence segment in an alignment, given an editing script.
static CRef< CScore > s_MakeScore(const string &ident_string, double d, int i, bool is_integer)
Creates and initializes CScore with a given name, and with integer or double value.
CRef< CSeq_align_set > BlastHitList2SeqAlign_OMF(const BlastHitList *hit_list, EBlastProgramType prog, const CSeq_loc &query_loc, TSeqPos query_length, const IBlastSeqInfoSrc *seqinfo_src, bool is_gapped, bool is_ooframe, TSeqLocInfoVector &subj_masks)
static Int4 s_GetProteinFrameLength(Int4 nuc_length, Int2 frame)
Finds length of a protein frame given a nucleotide length and a frame number.
static TSeqAlignVector s_BLAST_OneSubjectResults2CSeqAlign(const BlastHSPResults *results, ILocalQueryData &query_data, const IBlastSeqInfoSrc &seqinfo_src, EBlastProgramType prog, Uint4 subj_idx, bool is_gapped, bool is_ooframe, vector< TSeqLocInfoVector > &subj_masks)
Extracts results from the BlastHSPResults structure for only one subject sequence,...
CRef< CStd_seg > x_NonTranslatedHSPToStdSeg(BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< string > &seqid_list)
Creates a Std-seg object from HSP information and sequence identifiers for a non-translated ungapped ...
static void s_AddUserObjectToSeqAlign(CRef< CSeq_align > &seqalign, const vector< string > &seqid_list)
Produce UserObject with Seq-ids to limit formatting to ("use_this_gi")
virtual BlastQueryInfo * GetQueryInfo()=0
Accessor for the BlastQueryInfo structure.
static ENa_strand s_Frame2Strand(short frame)
Converts a frame into the appropriate strand.
TSeqAlignVector PhiBlastResults2SeqAlign_OMF(const BlastHSPResults *results, EBlastProgramType prog, class ILocalQueryData &query, const IBlastSeqInfoSrc *seqinfo_src, const SPHIQueryInfo *pattern_info, vector< TSeqLocInfoVector > &subj_masks)
static TSeqAlignVector s_BlastResults2SeqAlignDatabaseSearch_OMF(const BlastHSPResults *results, EBlastProgramType prog, class ILocalQueryData &query, const IBlastSeqInfoSrc *seqinfo_src, bool is_gapped, bool is_ooframe, vector< TSeqLocInfoVector > &subj_masks)
#define GAP_VALUE
Value in the Dense-seg indicating a gap.
static CRef< CSeq_align > s_CreateSeqAlign(CRef< CSeq_id > master, CRef< CSeq_id > slave, CDense_seg::TStarts starts, CDense_seg::TLens lengths, CDense_seg::TStrands strands, bool translate_master, bool translate_slave)
Creates a Seq-align for a single HSP from precalculated vectors of start positions,...
static void s_ValidateExon(const CSpliced_exon &exon, const CSeq_id &product_id, const CSeq_id &genomic_id)
virtual size_t GetNumQueries()=0
Get the number of queries.
static void s_RemapToSubjectLoc(CRef< CSeq_align > &subj_aligns, const CSeq_loc &subj_loc)
Remap subject alignment if its location specified the reverse strand or a starting location other tha...
TSeqAlignVector LocalBlastResults2SeqAlign(BlastHSPResults *hsp_results, ILocalQueryData &local_data, const IBlastSeqInfoSrc &seqinfo_src, EBlastProgramType program, bool gapped, bool oof_mode, vector< TSeqLocInfoVector > &subj_masks, EResultType result_type)
Convert traceback output into Seq-align format.
CRef< CSeq_align_set > CreateEmptySeq_align_set()
Constructs an empty Seq-align-set containing an empty discontinuous seq-align, and appends it to a pr...
void BLASTHspListToSeqAlign(EBlastProgramType program, BlastHSPList *hsp_list, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, bool is_ooframe, const vector< string > &seqid_list, vector< CRef< CSeq_align > > &sa_vector)
This is called for each query and each subject in a BLAST search.
virtual size_t Size() const =0
Returns the size of the underlying container of sequences.
void MakeSplicedSeg(CSpliced_seg &spliced_seg, CRef< CSeq_id > product_id, CRef< CSeq_id > genomic_id, int product_length, const HSPChain *chain)
Convert a spliced alignmeny in BlastHSPChain into Spliced_seg.
virtual size_t GetSeqLength(size_t index)=0
Get the length of the sequence indicated by index.
void BLASTUngappedHspListToSeqAlign(EBlastProgramType program, BlastHSPList *hsp_list, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< string > &seqid_list, vector< CRef< CSeq_align > > &sa_vector)
Creates a Seq-align from an HSP list for an ungapped search.
CRef< CStd_seg > x_UngappedHSPToStdSeg(BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< string > &seqid_list)
Creates a Std-seg object from HSP information and sequence identifiers for a translated ungapped sear...
void RemapToQueryLoc(CRef< CSeq_align > sar, const CSeq_loc &query)
Remaps Seq-align offsets relative to the query Seq-loc.
virtual bool CanReturnPartialSequence() const =0
Return true if the implementation can return anything besides a seq-loc for the entire sequence.
static TSeqAlignVector s_BlastResults2SeqAlignSequenceCmp_OMF(const BlastHSPResults *results, EBlastProgramType prog, class ILocalQueryData &query_data, const IBlastSeqInfoSrc *seqinfo_src, bool is_gapped, bool is_ooframe, vector< TSeqLocInfoVector > &subj_masks)
static void s_AddScoresToSeqAlign(CRef< CSeq_align > &seqalign, const BlastHSP *hsp, const vector< string > &seqid_list, Int4 query_length)
Given an HSP structure, creates a list of scores and inserts them into a Seq-align.
static void s_BuildScoreList(const BlastHSP *hsp, CSeq_align::TScore &scores, const vector< string > &seqid_list, Int4 query_length)
Creates a list of score objects for a Seq-align, given an HSP structure.
void GetSequenceLengthAndId(const IBlastSeqInfoSrc *seqinfo_src, int oid, CRef< objects::CSeq_id > &seqid, TSeqPos *length)
Retrieves subject sequence Seq-id and length.
static void s_AdjustNegativeSubjFrameInBlastn(ENa_strand subj_strand, EBlastProgramType program, BlastHSPList *hsp_list)
This function changes the subject frame for HSPs if the program is blastn and the subject was specifi...
virtual bool GetMasks(Uint4 index, const TSeqRange &target_range, TMaskedSubjRegions &retval) const =0
Retrieves the subject masks for the corresponding index.
static TSeqAlignVector s_TransposeSeqAlignVector(const TSeqAlignVector &alnvec, const size_t num_queries, const size_t num_subjects)
Transpose the (linearly organized) seqalign set matrix from (q1 s1 q2 s1 ...
static CRef< CSeq_align > s_OOFBlastHSP2SeqAlign(EBlastProgramType program, BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length)
This function is used for out-of-frame traceback conversion Converts an OOF editing script chain to a...
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Info(CExceptionArgs_Base &args)
C & SerialAssign(C &dest, const C &src, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const string AsFastaString(void) const
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
void SetFrom(TFrom value)
Assign a value to From data member.
void SetTo(TTo value)
Assign a value to To data member.
void SetType(TType &value)
Assign a value to Type data member.
Tdata & Set(void)
Assign a value to data member.
TScore & SetScore(void)
Assign a value to Score data member.
TLens & SetLens(void)
Assign a value to Lens data member.
vector< CRef< CScore > > TScore
list< CRef< CStd_seg > > TStd
void SetProduct_id(TProduct_id &value)
Assign a value to Product_id data member.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
vector< ENa_strand > TStrands
TExons & SetExons(void)
Assign a value to Exons data member.
void SetProduct_length(TProduct_length value)
Assign a value to Product_length data member.
void SetDim(TDim value)
Assign a value to Dim data member.
vector< TSignedSeqPos > TStarts
void SetDim(TDim value)
Assign a value to Dim data member.
vector< TSeqPos > TStarts
void SetType(TType value)
Assign a value to Type data member.
vector< CRef< CSeq_id > > TIds
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
vector< CRef< CSeq_id > > TIds
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
vector< CRef< CSeq_id > > TIds
TExt & SetExt(void)
Assign a value to Ext data member.
TStarts & SetStarts(void)
Assign a value to Starts data member.
void SetProduct_type(TProduct_type value)
Assign a value to Product_type data member.
TStrands & SetStrands(void)
Assign a value to Strands data member.
list< CRef< CSpliced_exon > > TExons
vector< ENa_strand > TStrands
void SetGenomic_id(TGenomic_id &value)
Assign a value to Genomic_id data member.
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
TIds & SetIds(void)
Assign a value to Ids data member.
TNucpos GetNucpos(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eType_partial
mapping pieces together
@ eType_diags
unbroken, but not ordered, diagonals
@ eProduct_type_transcript
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
TFrom GetFrom(void) const
Get the From member data.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsGi(void) const
Check if variant Gi is selected.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
bool IsInt(void) const
Check if variant Int is selected.
unsigned int
A callback function used to compare two keys in a database.
#define MAPPER_SPLICE_SIGNAL
if(yy_accept[yy_current_state])
const struct ncbi::grid::netcache::search::fields::SIZE size
#define MIN(a, b)
returns smaller of a and b.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ABS(a)
returns absolute value of a (|a|)
#define MAX(a, b)
returns larger of a and b.
static int pattern_info(int what, void *where, BOOL unsetok)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
static const char * kScore
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Uint1 left_edge
Two subject bases before the alignment in the four least significant bits and flags in most significa...
The structure to contain all BLAST results, for multiple queries.
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Structure holding all information about an HSP.
double evalue
This HSP's e-value.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastSeg query
Query sequence info.
double bit_score
Bit score, calculated from score.
Int4 num
How many HSP's are linked together for sum statistics evaluation? If unset (0), this HSP is not part ...
BlastSeg subject
Subject sequence info.
GapEditScript * gap_info
ALL gapped alignment is here.
Int2 comp_adjustment_method
which mode of composition adjustment was used; relevant only for blastp and tblastn
Int4 score
This HSP's raw score.
BlastHSPMappingInfo * map_info
The structure to contain all BLAST results for one query sequence.
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Int4 hsplist_count
Filled size of the HSP lists array.
The query related information.
struct SPHIQueryInfo * pattern_info
Counts of PHI BLAST pattern occurrences, used in PHI BLAST only.
Int2 frame
Translation frame.
Edit script: linked list of correspondencies between two sequences.
Int4 * num
Array of number of operations.
Int4 size
Size of above arrays.
EGapAlignOpType * op_type
Array of type of operation.
A chain of HSPs: spliced alignment.
HSPContainer * hsps
A list of HSPs that belong to this chain.
struct HSPContainer * next
Uint1 query_base
Query base at this position.
Uint1 subject_base
Subject base at this position.
Int4 query_pos
Query position.
Alignment edit script for gapped alignment.
In PHI BLAST, structure containing information about all pattern occurrences in query.
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)