70 if (
CFile(fn).Exists() && ! fs.fail()) {
73 fs.getline(line, 256);
75 if (line[0] ==
'#')
continue;
88 const string suffix = (ig_opt->
m_IsProtein) ?
".pdm." :
".ndm.";
98 "Domain annotation data file could not be found in [internal_data] directory");
103 vector<string> tokens;
105 if (!tokens.empty()) {
107 for (
int i=1;
i<11; ++
i) {
124 if (lines.size() == 0) {
128 vector<string> tokens;
130 if (!tokens.empty()) {
135 if (tokens.size() == 3) {
137 }
else if (tokens.size() == 4) {
140 }
else if (tokens.size() == 5) {
155 if (lines.size() == 0) {
156 ERR_POST(
Warning <<
"D gene frame definition file could not be found");
159 vector<string> tokens;
161 if (!tokens.empty()) {
175 if ((*result)->HasAlignments()) {
177 CSeq_align_set::Tdata::iterator it = align_list.begin();
178 while (it != align_list.end()) {
179 if((
int)((*it)->GetAlignLength()) - (
int)((*it)->GetTotalGapCount(0)) < length){
180 it = align_list.erase(it);
194 if ((*result)->HasAlignments()) {
198 int top_hit_actual_len = 0;
201 int highest_score = 0;
213 if (score >= highest_score) {
214 highest_score = score;
215 extend_strand = (*align)->GetSeqStrand(0);
217 (*align)->GetSegs().GetDenseg().GetStarts()[1]);
221 int allowed_len =
min ((*align)->GetSegs().GetDenseg().GetStarts()[1],
222 query_len - ((*align)->GetSegs().GetDenseg().GetStarts()[0] +
223 (
int)(*align)->GetSegs().GetDenseg().GetLens()[0]));
224 top_hit_actual_len =
min(desired_len, allowed_len);
229 top_hit_actual_len =
min(desired_len,
230 min((*align)->GetSegs().GetDenseg().GetStarts()[0],
231 (*align)->GetSegs().GetDenseg().GetStarts()[1]));
238 int allowed_len =
min ((*align)->GetSegs().GetDenseg().GetStarts()[1],
239 query_len - ((*align)->GetSegs().GetDenseg().GetStarts()[0] +
240 (
int)(*align)->GetSegs().GetDenseg().GetLens()[0]));
241 actual_len =
min(top_hit_actual_len,
min(desired_len, allowed_len));
246 actual_len =
min(top_hit_actual_len,
min(desired_len,
247 min((*align)->GetSegs().GetDenseg().GetStarts()[0],
248 (*align)->GetSegs().GetDenseg().GetStarts()[1])));
254 if (actual_len > 0 && (*align)->GetSeqStrand(0) == extend_strand) {
257 (*align)->SetSegs().SetDenseg().SetStarts()[1] -= actual_len;
258 (*align)->SetSegs().SetDenseg().SetLens()[0] += actual_len;
262 (*align)->SetSegs().SetDenseg().SetStarts()[0] -= actual_len;
263 (*align)->SetSegs().SetDenseg().SetStarts()[1] -= actual_len;
264 (*align)->SetSegs().SetDenseg().SetLens()[0] += actual_len;
279 if ((*result)->HasAlignments()) {
283 int top_hit_actual_len = 0;
285 int highest_score = 0;
297 if (score >= highest_score) {
298 highest_score = score;
299 extend_strand = (*align)->GetSeqStrand(0);
302 int j_align_stop = (*align)->GetSegs().GetDenseg().GetSeqStop(1);
304 j_stop - j_align_stop);
308 int query_align_start = (*align)->GetSegs().GetDenseg().GetSeqStart(0);
309 int allowed_query_length = query_align_start;
311 top_hit_actual_len =
min(desired_len, allowed_query_length);
314 int allowed_query_length = query_stop - (*align)->GetSegs().GetDenseg().GetSeqStop(0);
315 top_hit_actual_len =
min(desired_len, allowed_query_length);
322 int query_align_start = (*align)->GetSegs().GetDenseg().GetSeqStart(0);
323 int allowed_query_length = query_align_start;
324 actual_len =
min(allowed_query_length, top_hit_actual_len);
328 int allowed_query_length = query_stop - (*align)->GetSegs().GetDenseg().GetSeqStop(0);
329 actual_len =
min(top_hit_actual_len, allowed_query_length);
334 if (actual_len > 0 && (*align)->GetSeqStrand(0) == extend_strand) {
337 int num_seg = (*align)->GetSegs().GetDenseg().GetNumseg();
338 int num_dim = (*align)->GetSegs().GetDenseg().GetDim();
339 (*align)->SetSegs().SetDenseg().SetStarts()[num_seg*num_dim - 2] -= actual_len;
340 (*align)->SetSegs().SetDenseg().SetLens()[num_seg-1] += actual_len;
343 int num_seg = (*align)->GetSegs().GetDenseg().GetNumseg();
344 (*align)->SetSegs().SetDenseg().SetLens()[num_seg-1] += actual_len;
358 vector<CRef <CIgAnnotation> > annots;
414 for (
int gene = 1; gene < num_genes; ++gene) {
447 cerr <<
"blast failed" << endl;
455 for (
int gene = 0; gene < num_genes; ++gene) {
469 cerr <<
"blast failed" << endl;
484 bool skipped =
false;
525 return final_results;
535 if (sx != sy)
return (sx > sy);
547 return (x_id < y_id);
596 if ((*annot)->m_GeneInfo[0] == -1 || (*annot)->m_GeneInfo[4] == -1 || (*annot)->m_GeneInfo[5] == -1) {
599 mask_list.push_back(
mask);
602 bool ms = (*annot)->m_MinusStrand;
606 mask_list.push_back(
mask);
610 mask_list.push_back(
mask);
652 if ((*annot)->m_GeneInfo[0] == -1) {
657 mask_list.push_back(
mask);
668 bool ms = (*annot)->m_MinusStrand;
673 if (begin > 0 && begin <=
len-1) {
678 if (end < len -1 && end >= 0) {
718 if ((*previous_d_results)[iq].HasAlignments()){
719 align_d = (*previous_d_results)[iq].SetSeqAlign();
722 if ((*annot)->m_GeneInfo[0] == -1 || !align_d || align_d.
Empty() || align_d->
IsEmpty()) {
727 mask_list.push_back(
mask);
731 bool ms = (*annot)->m_MinusStrand;
732 int v_end_or_j_begin = (
ms)?
734 int j_begin_or_v_end = (
ms)?
736 if (v_end_or_j_begin > 0) {
741 if (j_begin_or_v_end < len-1 && j_begin_or_v_end > 0) {
764 if ((*annot)->m_GeneInfo[0] ==-1) {
768 mask_list.push_back(
mask);
771 int begin = (*annot)->m_GeneInfo[0];
772 int end = (*annot)->m_GeneInfo[1];
776 mask_list.push_back(
mask);
781 mask_list.push_back(
mask);
797 if (sx < 0.999999 * sy || sy < 0.999999 * sx)
return false;
801 if (ix > iy)
return false;
811 if (sid.substr(0, 4) ==
"lcl|")
return(sid.substr(4, sid.length()));
823 if (ids.find(this_id) == string::npos) {
847 if ((*result)->HasAlignments()) {
861 if ((*it)->GetSeq_id(1).Match(align->
GetSeq_id(1)) &&
863 (*it)->GetSeqStop(1) == align->
GetSeqStop(1))
return true;
875 if (sx < 0.999999 * sy)
return true;
876 if (sy < 0.999999 * sx)
return false;
880 if (ix != iy)
return (ix > iy);
892 return (x_id < y_id);
901 if (sx != sy)
return (sx > sy);
921 if (ds < js || de < je + margin)
return true;
923 if (ds > js - margin || de > je)
return true;
979 bool va_or_vd_as_heavy_chain) {
983 if (align_D && !align_D->
Get().empty()) {
985 CSeq_align_set::Tdata::iterator it = align_list.begin();
987 if (q_ct!=
"VH" && q_ct!=
"VD" && q_ct!=
"VA" && q_ct!=
"VB" ) {
988 while (it != align_list.end()) {
989 it = align_list.erase(it);
992 }
else if (q_ct ==
"VA" || q_ct ==
"VD") {
993 if (va_or_vd_as_heavy_chain) {
999 while (it != align_list.end()) {
1000 it = align_list.erase(it);
1006 it = align_list.begin();
1007 while (it != align_list.end()) {
1011 char s_ct = q_ct[1];
1015 if (d_chain_type !=
"N/A"){
1016 if (d_chain_type[1] != q_ct[1]) keep =
false;
1018 string sid = (*it)->GetSeq_id(1).AsFastaString();
1020 if (sid.substr(0, 4) ==
"LCL|") sid = sid.substr(4, sid.length());
1021 if ((sid.substr(0, 2) ==
"IG" || sid.substr(0, 2) ==
"TR")
1025 if (s_ct!=
'B' && s_ct!=
'D') s_ct = q_ct[1];
1026 if (s_ct != q_ct[1]) keep =
false;
1031 if (!keep) it = align_list.erase(it);
1037 bool strand_found =
false;
1039 if ((*it)->GetSeqStrand(0) == q_st) {
1040 strand_found =
true;
1045 it = align_list.begin();
1046 while (it != align_list.end()) {
1047 if ((*it)->GetSeqStrand(0) != q_st) {
1048 it = align_list.erase(it);
1053 it = align_list.begin();
1054 while (it != align_list.end()) {
1056 int q_ds = (*it)->GetSeqStart(0);
1057 int q_de = (*it)->GetSeqStop(0);
1060 if (!keep) it = align_list.erase(it);
1068 if (align_J && !align_J->
Get().empty()) {
1070 CSeq_align_set::Tdata::iterator it = align_list.begin();
1071 while (it != align_list.end()) {
1075 char s_ct = q_ct[1];
1079 if (j_chain_type !=
"N/A"){
1080 if (j_chain_type[1] != q_ct[1]) keep =
false;
1082 string sid = (*it)->GetSeq_id(1).AsFastaString();
1084 if (sid.substr(0, 4) ==
"LCL|") sid = sid.substr(4, sid.length());
1085 if ((sid.substr(0, 2) ==
"IG" || sid.substr(0, 2) ==
"TR")
1088 }
else if (sid[0] ==
'J') {
1091 if (s_ct!=
'H' && s_ct!=
'L' && s_ct!=
'K' &&
1092 s_ct!=
'A' && s_ct!=
'B' && s_ct!=
'D' && s_ct!=
'G') s_ct = q_ct[1];
1093 if (s_ct != q_ct[1]) keep =
false;
1099 if ((*it)->GetSeqStrand(0) != q_st) keep =
false;
1103 int q_js = (*it)->GetSeqStart(0);
1104 int q_je = (*it)->GetSeqStop(0);
1106 if (q_je < q_ve - allowed_VJ_distance || q_js > q_ve -
j_wordsize) keep =
false;
1108 if (q_js > q_ve + allowed_VJ_distance || q_je < q_ve +
j_wordsize) keep =
false;
1111 if (!keep) it = align_list.erase(it);
1123 CSeq_align_set::Tdata::iterator it;
1127 while (it != al_D.end()) {
1129 it = al_D.erase(it);
1138 while (it != al_J.end()) {
1140 it = al_J.erase(it);
1148 while (it != al_J.end()) {
1150 it = al_J.erase(it);
1155 while (it != al_D.end()) {
1157 it = al_D.erase(it);
1189 original_align_D->
Assign(*align_D);
1198 original_align_J->
Assign(*align_J);
1202 x_FindDJAln(align_D, align_J, q_ct, q_ms, q_st, q_ve, iq,
false);
1203 if ((original_align_D.
NotEmpty() && !original_align_D->
Get().empty()) && (q_ct ==
"VA" || q_ct ==
"VD")) {
1207 x_FindDJAln(original_align_D, original_align_J, q_ct, q_ms, q_st, q_ve, iq,
true);
1208 int as_heavy_chain_score = 0;
1209 int as_light_chain_score = 0;
1211 if(original_align_J.
NotEmpty() && !original_align_J->
Get().empty()){
1215 if(original_align_D.
NotEmpty() && !original_align_D->
Get().empty()){
1218 if (align_J.
NotEmpty() && !align_J->
Get().empty()){
1223 if (as_heavy_chain_score + d_score> as_light_chain_score){
1225 align_D->
Assign(*original_align_D);
1228 align_J->
Assign(*original_align_J);
1244 if (j_cdr3end > 0 && subject_start - j_cdr3end <= 1) {
1250 max(subject_start,
min(j_cdr3end + 1,
1259 if (subject_end > j_cdr3end) {
1263 }
else if (j_cdr3end > 0 && subject_start - j_cdr3end <= 2) {
1275 if (subject_end > j_cdr3end) {
1278 }
else if (j_cdr3end > 0 && subject_start - j_cdr3end <= 4) {
1290 if (subject_end > j_cdr3end) {
1299 if (j_fwr4end_offset >= 0) {
1326 string q_ct = (*annot)->m_ChainType[0];
1327 bool q_ms = (*annot)->m_MinusStrand;
1329 int q_ve = (q_ms) ? (*annot)->m_GeneInfo[0] : (*annot)->m_GeneInfo[1] - 1;
1338 if (align_D && !align_D.Empty() && !align_D->IsEmpty()) {
1340 CSeq_align_set::Tdata::iterator it = align_list.begin();
1343 it = align_list.begin();
1344 while (it != align_list.end()) {
1348 char s_ct = q_ct[1];
1352 if (d_chain_type !=
"N/A"){
1353 if (d_chain_type[1] != q_ct[1]) keep =
false;
1355 string sid = (*it)->GetSeq_id(1).AsFastaString();
1357 if (sid.substr(0, 4) ==
"LCL|") sid = sid.substr(4, sid.length());
1358 if ((sid.substr(0, 2) ==
"IG" || sid.substr(0, 2) ==
"TR")
1362 if (s_ct!=
'B' && s_ct!=
'D') s_ct = q_ct[1];
1363 if (s_ct != q_ct[1]) keep =
false;
1368 if (!keep) it = align_list.erase(it);
1374 bool strand_found =
false;
1376 if ((*it)->GetSeqStrand(0) == q_st) {
1377 strand_found =
true;
1382 it = align_list.begin();
1383 while (it != align_list.end()) {
1384 if ((*it)->GetSeqStrand(0) != q_st) {
1385 it = align_list.erase(it);
1390 it = align_list.begin();
1391 while (it != align_list.end()) {
1393 int q_ds = (*it)->GetSeqStart(0);
1394 int q_de = (*it)->GetSeqStop(0);
1397 if (!keep) it = align_list.erase(it);
1409 if (align_J && align_J.
NotEmpty() && !align_J->
IsEmpty() && !align_list.empty()) {
1412 CSeq_align_set::Tdata::iterator it = al_J.begin();
1413 while (it != al_J.end()) {
1415 it = al_J.erase(it);
1430 bool q_ms = (*annot)->m_MinusStrand;
1435 if (align_C && !align_C->
Get().empty()) {
1437 CSeq_align_set::Tdata::iterator it = align_list.begin();
1438 while (it != align_list.end()) {
1442 if ((*it)->GetSeqStrand(0) != q_st) keep =
false;
1445 if (!keep) it = align_list.erase(it);
1461 string q_ct = (*annot)->m_ChainType[0];
1462 bool q_ms = (*annot)->m_MinusStrand;
1464 int q_ve = (q_ms) ? (*annot)->m_GeneInfo[0] : (*annot)->m_GeneInfo[1] - 1;
1468 x_FindDJ( results_D, results_J, *annot, align_D, align_J, q_ct, q_ms, q_st, q_ve, iq);
1480 string q_ct = (*annot)->m_ChainType[0];
1483 if (align_D && !align_D.
Empty() && !align_D->
IsEmpty()) {
1487 (*annot)->m_GeneInfo[3] = align->
GetSeqStop(0)+1;
1518 (*annot)->m_GeneInfo[7] = align->
GetSeqStop(0)+1;
1519 if ((*annot)->m_JDomain[3] > 0 && (*annot)->m_JDomain[1] > 0) {
1529 (*annot)->m_CDomain[1] - 1;
1537 (*annot)->m_CDomain[0] = query_start;
1541 int diff =
max(0, (*annot)->m_CDomain[0] - (*annot)->m_JDomain[3] - 1);
1544 if ((*annot)->m_JDomain[4] > 0) {
1545 j_end -= (*annot)->m_JDomain[4];
1547 int j_stop = align_j->
Get().front()->GetSeqStop(1);
1548 int j_extend_max =
max(0, j_end - j_stop);
1549 int extend_len =
min(diff, j_extend_max);
1550 if (extend_len > 0) {
1551 (*annot)->m_JDomain[3] += extend_len;
1567 bool q_ms = (*annot)->m_MinusStrand;
1578 (*annot)->m_GeneInfo[5] = align->
GetSeqStop(0)+1;
1581 if (frame_offset >= 0) {
1582 int frame_adj = (align->
GetSeqStart(1) + 3 - frame_offset) % 3;
1583 (*annot)->m_FrameInfo[2] = (q_ms) ?
1602 string q_ct = (*annot)->m_ChainType[0];
1603 bool q_ms = (*annot)->m_MinusStrand;
1614 (*annot)->m_GeneInfo[3] = align->
GetSeqStop(0)+1;
1627 (*annot)->m_GeneInfo[5] = align->
GetSeqStop(0)+1;
1630 if (frame_offset >= 0) {
1631 int frame_adj = (align->
GetSeqStart(1) + 3 - frame_offset) % 3;
1632 (*annot)->m_FrameInfo[2] = (q_ms) ?
1650 CScope scope_q(*mgr), scope_s(*mgr);
1652 bool annotate_subject =
false;
1659 if (db_name_V == db_name_domain) {
1660 db_domain.
Reset(&(*db_V));
1662 db_domain.
Reset(
new CSeqDB(db_name_domain, db_type));
1664 annotate_subject =
true;
1673 if ((*result)->HasAlignments() && (*gl_results)[iq].HasAlignments()) {
1677 (*gl_results)[iq].GetSeqAlign()->Get().front();
1684 int q_ends[2], q_dir;
1704 int domain_info[10];
1709 CAlnMap s_map((*it)->GetSegs().GetDenseg());
1710 int s_start = (*it)->GetSeqStart(1);
1711 int s_stop = (*it)->GetSeqStop(1);
1727 query.SetId((*it)->GetSeq_id(1));
1734 if (
result.HasAlignments()) {
1740 scope_q.RemoveBioseq(hdl_q);
1744 for (
int i =0;
i<10;
i+=2) {
1746 start = domain_info[
i] - 1;
1747 stop = domain_info[
i+1] - 1;
1754 if (start <= d_stop && stop >= d_start) {
1755 int start_copy = start;
1756 int stop_copy = stop;
1757 if (start_copy < d_start) start_copy = d_start;
1758 if (stop_copy > d_stop) stop_copy = d_stop;
1759 if (start_copy <= stop_copy) {
1772 if (start > s_stop || stop < s_start)
continue;
1774 if (start < s_start) start = s_start;
1776 if (stop > s_stop) stop = s_stop;
1778 if (start > stop)
continue;
1783 if ((start - q_ends[1])*q_dir > 0 || (stop - q_ends[0])*q_dir < 0)
continue;
1785 if ((start - q_ends[0])*q_dir < 0) start = q_ends[0];
1787 if ((stop - q_ends[1])*q_dir > 0) stop = q_ends[1];
1789 if ((start - stop)*q_dir > 0)
continue;
1800 seg = q_map.
GetSeg(aln_stop);
1807 if ((start - stop)*q_dir > 0)
continue;
1818 while (i<10 && annot->m_DomainInfo[
i] < 0)
i+=2;
1819 if (
i < 10 && domain_info[
i] > 0) {
1820 extension = (domain_info[
i] - 1 -
1831 while (i<10 && annot->m_DomainInfo[
i] >=0) {
1848 if (start >= 0 && (start - q_ends[1])*q_dir < 0) {
1852 if ((start - q_ends[1])*q_dir <= 0) {
1860 if (frame_offset >= 0) {
1861 int q_start = (*it)->GetSeqStart(0);
1862 int q_stop = (*it)->GetSeqStop(0);
1863 int q_mid = q_start + q_stop;
1864 int q_dif = q_stop - q_start;
1865 int frame_adj = (3 - ((*it)->GetSeqStart(1) + 3 - frame_offset) % 3) %3;
1866 annot->
m_FrameInfo[0] = (q_mid - q_dir *q_dif)/2 + q_dir * frame_adj;
1875 q_start =
max(q_start, fwr3_stop);
1876 q_mid = q_start + q_stop;
1877 q_dif = q_stop - q_start;
1880 q_stop =
min(q_stop, fwr3_stop);
1881 q_mid = q_start + q_stop;
1882 q_dif = q_stop - q_start;
1886 frame_adj = ((*it)->GetSeqStop(1) + 3 - frame_offset) % 3;
1889 annot->
m_FrameInfo[1] = (q_mid + q_dir *q_dif)/2 - q_dir * frame_adj;
1912 if ((*result)->HasAlignments()) {
1913 int num_aligns = (*result)->GetSeqAlign()->Size();
1928 for (
int i=0;
i<num_aligns; ++
i) {
1938 if ((*result)->HasAlignments()) {
1940 (&*((*result)->GetSeqAlign())));
1955 int num_results =
result->GetNumResults();
1959 for (
int iq = 0; iq< num_queries && ir< num_results; ++iq) {
1970 while(!qid->
Match(*rid)) {
1977 while(ir < num_results && (*
result)[ir].
GetSeqId()->Match(*qid)) {
1983 align_list.insert(align_list.end(), add_list.begin(), add_list.end());
1997 bool new_result = (final_results.
Empty());
2006 int actual_align = 0;
2008 if ((*result)->HasAlignments()) {
2010 (&*((*result)->GetSeqAlign())));
2013 if (num_aligns >= 0) {
2015 if (align_list.size() > (CSeq_align_set::Tdata::size_type)num_aligns) {
2016 CSeq_align_set::Tdata::iterator it = align_list.begin();
2017 for (
int i=0;
i<num_aligns; ++
i) ++it;
2018 align_list.erase(it, align_list.end());
2019 actual_align = num_aligns;
2021 actual_align = align_list.size();
2037 while( !(*final_results)[iq].
GetSeqId()->Match(*
query)) ++iq;
2039 if (!align.
Empty()) {
2045 CSeq_align_set::Tdata::iterator it = align_list.begin();
2046 while (it != align_list.end()) {
2052 if (!align_list.empty()) {
2053 ig_list.insert(ig_list.end(), align_list.begin(), align_list.end());
2079 if ((*result)->HasAlignments()){
2080 (*result)->SetSeqAlign()->Set().clear();
Declares the CBl2Seq (BLAST 2 Sequences) class.
@ eSequenceComparison
Seq-aligns in the BLAST 2 Sequence style (one alignment per query-subject pair)
@ eBlastn
Nucl-Nucl (traditional blastn)
@ eBlastp
Protein-Protein.
TSignedSeqPos GetStart(TNumrow row, TNumseg seg, int offset=0) const
TSignedSeqPos GetAlnPosFromSeqPos(TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
TNumseg GetSeg(TSeqPos aln_pos) const
CDense_seg::TNumseg TNumseg
TSignedSeqPos GetSeqPosFromSeqPos(TNumrow for_row, TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
Runs the BLAST algorithm between 2 sequences.
Defines BLAST error codes (user errors included)
Creates BlastOptionsHandle objects with default values for the programs/tasks requested.
Encapsulates ALL the BLAST algorithm's options.
size_type Size() const
Returns the number of queries found in this query vector.
void SetMaskedRegions(size_type i, TMaskedQueryRegions mqr)
Assign a list of masked regions to one query.
void AddMask(size_type i, CRef< CSeqLocInfo > sli)
Add a masked region to the set for a query.
CRef< CBlastSearchQuery > GetBlastSearchQuery(size_type i) const
Get the CBlastSearchQuery object at index i.
Class to perform a BLAST search on local BLAST databases Note that PHI-BLAST can be run using this cl...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
API for Remote Blast Requests.
Search Results for All Queries.
Search Results for One Query.
ESeqType
Sequence types (eUnknown tries protein, then nucleotide).
CRef< CBioseq > SeqidToBioseq(const CSeq_id &seqid) const
Get a CBioseq for a given Seq-id.
structure for seqloc info
TSeqPos GetSeqStop(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
bool GetNamedScore(const string &id, int &score) const
Get score.
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
@ eRight
Towards higher aln coord (always to the right)
@ eBackwards
Towards lower seq coord (to the left if plus strand, right if minus)
@ eForward
Towards higher seq coord (to the right if plus strand, left if minus)
Collection of masked regions for a single query sequence.
Class for the messages for an individual query sequence.
Constants used in compositional score matrix adjustment.
@ eNoCompositionBasedStats
Don't use composition based statistics.
void x_AnnotateDomain(CRef< CSearchResultSet > &gl_results, CRef< CSearchResultSet > &dm_results, vector< CRef< CIgAnnotation > > &annot)
Annotate the query chaintype and domains based on blast results.
static bool s_DJNotCompatible(const CSeq_align &d, const CSeq_align &j, bool ms, int margin)
string GetDatabaseName() const
Returns the database name if appropriate, else kEmptyStr for subject sequences.
void SetCompositionBasedStats(ECompoAdjustModes mode)
void SetEvalueThreshold(double eval)
Sets EvalueThreshold.
CRef< IQueryFactory > m_Subject
virtual void SetNumberOfThreads(size_t nthreads)
Mutator for the number of threads.
double GetEvalueThreshold() const
CRef< CSearchResultSet > Run()
Run the Ig-BLAST engine.
static bool s_CompareSeqAlignByScore(const CRef< CSeq_align > &x, const CRef< CSeq_align > &y)
CRef< CSeq_align_set > & SetSeqAlign()
CIgAnnotationInfo(CConstRef< CIgBlastOptions > &ig_options)
int GetJDomain(const string &sid)
void x_AnnotateC(CRef< CSearchResultSet > &results_c, CRef< CSearchResultSet > &results_j, vector< CRef< CIgAnnotation > > &annot)
static int max_allowed_VD_distance
CRef< CLocalDbAdapter > m_LocalDb
bool GetDomainInfo(const string sid, int *domain_info)
void SetGapOpeningCost(int g)
static void s_ReadLinesFromFile(const string &fn, vector< string > &lines)
CConstRef< objects::CSeq_align_set > GetSeqAlign() const
Accessor for the Seq-align results.
vector< string > m_ChainType
static void s_SortResultsByEvalue(CRef< CSearchResultSet > &results)
Sort blast results according to evalue.
const string & GetRID(void)
Gets the request id (RID) associated with the search.
static int extend_length3end
void x_AnnotateDJ(CRef< CSearchResultSet > &results_D, CRef< CSearchResultSet > &results_J, vector< CRef< CIgAnnotation > > &annot)
Annotate the D and J genes based on blast results.
static bool s_SeqAlignInSet(CSeq_align_set::Tdata &align_list, CRef< CSeq_align > &align)
CRef< CLocalDbAdapter > m_Db[5]
static int max_allowed_VJ_distance_with_D
CRef< CSearchResultSet > Run()
Executes the search.
void x_FindDJAln(CRef< CSeq_align_set > &align_D, CRef< CSeq_align_set > &align_J, string q_ct, bool q_ms, ENa_strand q_st, int q_ve, int iq, bool va_or_vd_as_heavy_chain)
void x_ProcessCResult(CRef< CSearchResultSet > &results_C, vector< CRef< CIgAnnotation > > &annots)
void x_SetAnnotation(vector< CRef< CIgAnnotation > > &annot, CRef< CSearchResultSet > &final_results)
Append annotation info to the final results.
void x_SetupNoOverlapDSearch(const vector< CRef< CIgAnnotation > > &annots, CRef< CSearchResultSet > &results, CRef< IQueryFactory > &qf, CRef< CBlastOptionsHandle > &opts_hndl, int db_type)
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
map< string, int > m_Fwr4EndOffset
void SetGapExtensionCost(int e)
map< string, int > m_DomainIndex
bool IsBlastDb() const
Returns true if this object represents a BLAST database.
static int max_allowed_V_end_to_J_end
CRef< CIgAnnotation > & SetIgAnnotation()
void x_ScreenByAlignLength(CRef< CSearchResultSet > &results, int length)
int GetFrameOffset(const string sid)
void x_SetupDJSearch(const vector< CRef< CIgAnnotation > > &annots, CRef< IQueryFactory > &qf, CRef< CBlastOptionsHandle > &opts_hndl, int db_type)
Prepare blast option handle and query for D, J germline database search.
void SetMismatchPenalty(int p)
vector< string > m_TopGeneIds
CRef< CBlastAncillaryData > GetAncillaryData() const
Accessor for the query's search ancillary.
static string s_RemoveLocalPrefix(const string &sid)
void SetHitlistSize(int s)
Sets HitlistSize.
CRef< objects::CSeq_align_set > SetSeqAlign()
Only intended to be used if you need to edit the seqlign.
CConstRef< CIgBlastOptions > m_IgOptions
void x_SetupCRegionSearch(const vector< CRef< CIgAnnotation > > &annots, CRef< IQueryFactory > &qf, CRef< CBlastOptionsHandle > &opts_hndl)
CRef< CBlastOptionsHandle > m_Options
vector< int > m_DomainData
static void s_AppendResults(CRef< CSearchResultSet > &results, int num_aligns, int gene, CRef< CSearchResultSet > &final_results)
Append blast results to the final results.
const string GetDomainChainType(const string sid)
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
bool Submit(void)
This submits the search (if necessary) and returns immediately.
void x_SetupDbSearch(vector< CRef< CIgAnnotation > > &annot, CRef< IQueryFactory > &qf)
Prepare blast option handle and query for specified database search.
CRef< CSearchResultSet > RunEx()
Performs the same functionality as Run(), but it returns a different data type.
map< string, int > m_FrameOffset
map< string, string > m_DJChainType
void x_AnnotateD(CRef< CSearchResultSet > &results_D, vector< CRef< CIgAnnotation > > &annot)
void x_FillJDomain(CRef< CSeq_align > &align, CRef< CIgAnnotation > &annot)
static bool s_CompareSeqAlignByScoreAndName(const CRef< CSeq_align > &x, const CRef< CSeq_align > &y)
static int max_v_j_overlap
void x_ProcessDJResult(CRef< CSearchResultSet > &results_V, CRef< CSearchResultSet > &results_D, CRef< CSearchResultSet > &results_J, vector< CRef< CIgAnnotation > > &annots)
const string GetDJChainType(const string sid)
void x_ExtendAlign3end(CRef< CSearchResultSet > &results)
CIgAnnotationInfo m_AnnotationInfo
static int extend_length5end
int GetFwr4EndOffset(const string &sid)
static int max_allowed_j_deletion
void push_back(value_type &element)
Add a value to the back of this container.
CRef< CSearchResultSet > GetResultSet()
Submit the search (if necessary) and return the results.
void SetFilterString(const char *f, bool clear=true)
Sets FilterString.
map< string, int > m_JDomainInfo
TQueryMessages GetErrors(int min_severity=eBlastSevError) const
Accessor for the error/warning messsages for this query.
void x_FindDJ(CRef< CSearchResultSet > &results_D, CRef< CSearchResultSet > &results_J, CRef< CIgAnnotation > &annot, CRef< CSeq_align_set > &align_D, CRef< CSeq_align_set > &align_J, string q_ct, bool q_ms, ENa_strand q_st, int q_ve, int iq)
CConstRef< objects::CSeq_id > GetSeqId() const
Accessor for the query's sequence identifier.
void Combine(const TQueryMessages &other)
Combine other messages with these.
void x_AnnotateJ(CRef< CSearchResultSet > &results_J, vector< CRef< CIgAnnotation > > &annot)
CRef< CBlastQueryVector > m_Query
static int max_allowed_VJ_distance_without_D
void x_ExtendAlign5end(CRef< CSearchResultSet > &results)
void x_AnnotateV(CRef< CSearchResultSet > &results, vector< CRef< CIgAnnotation > > &annot)
Annotate the V gene based on blast results.
void x_ProcessDGeneResult(CRef< CSearchResultSet > &results_V, CRef< CSearchResultSet > &results_D, CRef< CSearchResultSet > &results_J, vector< CRef< CIgAnnotation > > &annots)
static bool s_CompareSeqAlignByEvalue(const CRef< CSeq_align > &x, const CRef< CSeq_align > &y)
void x_SetChainType(CRef< CSearchResultSet > &results, vector< CRef< CIgAnnotation > > &annot)
Set the subject chain type and frame info.
static string s_MakeTopHitsId(const CSeq_align_set::Tdata &align_list, int num_align)
string m_CustomInternalData
map< string, string > m_DomainChainType
CRef< CSearchDatabase > m_RemoteDb
void x_SetupVSearch(CRef< IQueryFactory > &qf, CRef< CBlastOptionsHandle > &opts_hndl)
Prepare blast option handle and query for V germline database search.
static bool s_IsSeqAlignAsGood(const CRef< CSeq_align > &x, const CRef< CSeq_align > &y)
void x_ConvertResultType(CRef< CSearchResultSet > &results)
Convert bl2seq result to database search mode.
void SetEntrezQuery(const char *x)
Restrict search to sequences matching this Entrez query.
void SetMatchReward(int r)
bool HasAlignments() const
Return true if there are any alignments for this query.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
CDiagContext_Extra & Print(const string &name, const string &value)
The method does not print the argument, but adds it to the string.
CDiagContext & GetDiagContext(void)
Get diag context instance.
CDiagContext_Extra Extra(void) const
Create a temporary CDiagContext_Extra object.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const string AsFastaString(void) const
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
@ eContent
Untagged human-readable accession or the like.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void RemoveBioseq(const CBioseq_Handle &seq)
Revoke Bioseq previously added using AddBioseq().
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
TSeqPos GetBioseqLength(void) const
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string & ToUpper(string &str)
Convert string to upper case – string& version.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
const TDenseg & GetDenseg(void) const
Get the variant data.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
ENa_strand
strand of nucleic acid
Declares CIgBlast, the C++ API for the IG-BLAST engine.
Main class to perform a BLAST search on the local machine.
constexpr bool empty(list< Ts... >) noexcept
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares the CRemoteBlast class.
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
string SeqDB_ResolveDbPath(const string &filename)
Resolve a file path using SeqDB's path algorithms.
static SLJIT_INLINE sljit_ins ms(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Structure to represent a single sequence to be fed to BLAST.