103 template<
class container>
bool
110 list<TGi>& use_this_gi,
111 int& comp_adj_method)
113 const string k_GiPrefix =
"gi:";
114 bool hasScore =
false;
115 ITERATE (
typename container, iter, scoreList) {
118 if (
id.GetStr()==
"score"){
119 score = (*iter)->GetValue().GetInt();
120 }
else if (
id.GetStr()==
"bit_score"){
121 bits = (*iter)->GetValue().GetReal();
122 }
else if (
id.GetStr()==
"e_value" ||
id.GetStr()==
"sum_e") {
123 evalue = (*iter)->GetValue().GetReal();
125 }
else if (
id.GetStr()==
"use_this_gi"){
126 Uint4 gi_v = (
Uint4)((*iter)->GetValue().GetInt());
128 }
else if (
id.GetStr()==
"sum_n"){
129 sum_n = (*iter)->GetValue().GetInt();
130 }
else if (
id.GetStr()==
"num_ident"){
131 num_ident = (*iter)->GetValue().GetInt();
132 }
else if (
id.GetStr()==
"comp_adjustment_method") {
133 comp_adj_method = (*iter)->GetValue().GetInt();
137 TGi gi = NStr::StringToNumeric<TGi>(strGi);
138 use_this_gi.push_back(gi);
156 list<string> string_l;
163 list<string>::iterator iter = string_l.begin();
164 while(iter != string_l.end())
177 string errsevmsg[] = {
"UNKNOWN",
"INFO",
"WARNING",
"ERROR",
186 if(iter->level == 4){
189 iter->level = iter->level;
195 out << errsevmsg[iter->level] <<
": " << iter->message <<
"\n";
204 vector<string> split_line;
206 ITERATE(vector<string>, iter, split_line) {
216 static bool s_FillDbInfoRemotely(
const string&
dbname,
222 blastdb->SetType() =
info.is_protein
231 info.definition = dbinfo->GetDescription();
232 if (
info.definition.empty())
236 info.total_length = dbinfo->GetTotal_length();
237 info.number_seqs =
static_cast<int>(dbinfo->GetNum_sequences());
252 int dbfilt_algorithm)
261 if (
info.definition.empty())
268 info.filt_algorithm_name.clear();
269 info.filt_algorithm_options.clear();
270 if (dbfilt_algorithm == -1) {
274 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
275 (!defined(NCBI_COMPILER_MIPSPRO)) )
276 string filtering_algorithm;
279 info.filt_algorithm_name,
280 info.filt_algorithm_options);
287 bool is_protein,
int numSeqs,
Int8 numLetters,
string&
tag)
291 info.is_protein = is_protein;
293 info.definition =
string(
"User specified sequence set.");
296 info.definition =
string(
"User specified sequence set ") +
299 info.number_seqs = numSeqs;
300 info.total_length = numLetters;
301 retval.push_back(
info);
306 const string& blastdb_names,
bool is_protein,
307 int dbfilt_algorithm ,
312 bool found_all =
false;
314 vector<string> missing_names;
315 vector< CRef<objects::CBlast4_database_info> > all_db_info =
316 rmt_blast_services.
GetDatabaseInfo(blastdb_names,is_protein,&found_all,&missing_names);
317 if( !missing_names.empty() ){
319 for(
size_t ndx=0 ; ndx < missing_names.size(); ndx++){
320 msg += missing_names[ndx];
322 msg +=
string(
"' not found on NCBI servers.\n");
325 for(
size_t ndx=0 ; ndx < all_db_info.size(); ndx++){
327 objects::CBlast4_database_info &dbinfo = *all_db_info[ndx];
328 info.name = dbinfo.GetDatabase().GetName();
329 info.definition = dbinfo.GetDescription();
330 if (
info.definition.empty())
334 info.total_length = dbinfo.GetTotal_length();
335 info.number_seqs =
static_cast<int>(dbinfo.GetNum_sequences());
336 if (
info.total_length < 0) {
341 msg +=
string(
"' has bad total length on NCBI servers.\n");
345 retval.push_back(
info);
350 vector<CTempString> dbs;
352 retval.reserve(dbs.size());
354 ITERATE(vector<CTempString>,
i, dbs) {
356 info.is_protein = is_protein;
357 bool success =
false;
366 retval.push_back(
info);
371 msg +=
string(
"' not found on NCBI servers.\n");
393 for (
size_t i = 1;
i < dbinfo_list.size();
i++) {
394 db_titles +=
"; " + dbinfo_list[
i].definition;
395 tot_num_seqs +=
static_cast<Int8>(dbinfo_list[
i].number_seqs);
396 tot_length += dbinfo_list[
i].total_length;
411 " total letters\n\n";
415 ITERATE(vector<SDbInfo>, dbinfo, dbinfo_list) {
416 if (dbinfo->subset ==
false) {
417 out <<
" Database: ";
420 if ( !dbinfo->filt_algorithm_name.empty() ) {
421 out <<
" Masked using: '" << dbinfo->filt_algorithm_name <<
"'";
422 if ( !dbinfo->filt_algorithm_options.empty() ) {
423 out <<
", options: '" << dbinfo->filt_algorithm_options <<
"'";
428 out <<
" Posted date: ";
429 out << dbinfo->date <<
"\n";
431 out <<
" Number of letters in database: ";
434 out <<
" Number of sequences in database: ";
439 out <<
" Subset of the database(s) listed below" <<
"\n";
440 out <<
" Number of letters searched: ";
443 out <<
" Number of sequences searched: ";
460 out <<
"Gapped" <<
"\n";
465 out <<
" a alpha sigma";
473 sprintf(
buffer,
"%#8.3g ", k);
475 sprintf(
buffer,
"%#8.3g ", h);
479 sprintf(
buffer,
"%#8.3g ", gbp->
a);
512 bool use_long_seqids =
false;
516 use_long_seqids = (
registry.
Get(
"BLAST",
"LONG_SEQID") ==
"1");
518 if (!use_long_seqids) {
551 if((*iter)->IsTitle()) {
552 all_descr_str += (*iter)->GetTitle();
556 return all_descr_str;
567 const string label(
"Query");
570 label, tabular, rid);
581 const string label(
"Subject");
600 }
else if (tabular) {
624 out <<
"\n" <<
"# RID: " << rid;
626 out <<
"\n" <<
"RID: " << rid <<
"\n";
632 const string& pattern,
637 out << num_patterns <<
" occurrence(s) of pattern: " <<
"\n"
638 << pattern <<
" at position(s) ";
641 for (vector<int>::iterator it =
offsets.begin();
651 out <<
" of query sequence" <<
"\n";
652 out <<
"pattern probability=" <<
prob <<
"\n";
662 list<TGi>& use_this_gi)
664 int comp_adj_method = 0;
667 num_ident, use_this_gi, comp_adj_method);
676 list<string>& use_this_seq)
678 int comp_adj_method = 0;
681 num_ident, use_this_seq, comp_adj_method);
691 list<TGi>& use_this_gi,
692 int& comp_adj_method)
694 bool hasScore =
false;
704 sum_n, num_ident, use_this_gi, comp_adj_method);
711 score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
714 score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
717 score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
720 if(use_this_gi.size() == 0) {
728 const string k_GiPrefix =
"gi:";
729 list<string> use_this_seq;
730 ITERATE(list<TGi>, iter_gi, use_this_gi){
732 use_this_seq.push_back(strSeq);
743 list<string>& use_this_seq,
744 int& comp_adj_method)
746 bool hasScore =
false;
754 list<TGi> use_this_gi;
757 sum_n, num_ident, use_this_gi, comp_adj_method);
764 score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
767 score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
770 score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
773 if(use_this_gi.size() == 0) {
797 const CDbtag& dtg =
id->GetGeneral();
801 retval =
id->GetSeqIdString(with_version);
816 double total_bit_score,
819 string& bit_score_str,
820 string& total_bit_score_str,
821 string& raw_score_str)
823 char evalue_buf[100], bit_score_buf[100], total_bit_score_buf[100];
826 static string kBitScoreFormat(
"%4.1lf");
827 #ifdef CTOOLKIT_COMPATIBLE
828 static bool ctoolkit_compatible =
false;
829 static bool value_set =
false;
831 if (getenv(
"CTOOLKIT_COMPATIBLE")) {
832 kBitScoreFormat.assign(
"%4.0lf");
833 ctoolkit_compatible =
true;
839 if (evalue < 1.0e-180) {
840 snprintf(evalue_buf,
sizeof(evalue_buf),
"0.0");
841 }
else if (evalue < 1.0e-99) {
842 snprintf(evalue_buf,
sizeof(evalue_buf),
"%2.0le", evalue);
843 #ifdef CTOOLKIT_COMPATIBLE
844 if (ctoolkit_compatible) {
845 strncpy(evalue_buf, evalue_buf+1,
sizeof(evalue_buf-1));
848 }
else if (evalue < 0.0009) {
849 snprintf(evalue_buf,
sizeof(evalue_buf),
"%3.0le", evalue);
850 }
else if (evalue < 0.1) {
851 snprintf(evalue_buf,
sizeof(evalue_buf),
"%4.3lf", evalue);
852 }
else if (evalue < 1.0) {
853 snprintf(evalue_buf,
sizeof(evalue_buf),
"%3.2lf", evalue);
854 }
else if (evalue < 10.0) {
855 snprintf(evalue_buf,
sizeof(evalue_buf),
"%2.1lf", evalue);
857 snprintf(evalue_buf,
sizeof(evalue_buf),
"%2.0lf", evalue);
860 if (bit_score > 99999){
861 snprintf(bit_score_buf,
sizeof(bit_score_buf),
"%5.3le", bit_score);
862 }
else if (bit_score > 99.9){
863 snprintf(bit_score_buf,
sizeof(bit_score_buf),
"%3.0ld",
866 snprintf(bit_score_buf,
sizeof(bit_score_buf), kBitScoreFormat.c_str(),
869 if (total_bit_score > 99999){
870 snprintf(total_bit_score_buf,
sizeof(total_bit_score_buf),
"%5.3le",
872 }
else if (total_bit_score > 99.9){
873 snprintf(total_bit_score_buf,
sizeof(total_bit_score_buf),
"%3.0ld",
874 (
long)total_bit_score);
876 snprintf(total_bit_score_buf,
sizeof(total_bit_score_buf),
"%2.1lf",
879 evalue_str = evalue_buf;
880 bit_score_str = bit_score_buf;
881 total_bit_score_str = total_bit_score_buf;
893 bool is_first_aln =
true;
894 unsigned int num_align = 0;
897 if ((*iter)->GetSegs().IsDisc()) {
900 subid = &((*iter)->GetSeq_id(1));
901 if(is_first_aln || (!is_first_aln && !subid->
Match(*previous_id))){
909 is_first_aln =
false;
912 new_aln.
Set().push_back(*iter);
921 bool is_first_aln =
true;
922 unsigned int num_align = 0;
925 if ((*iter)->GetSegs().IsDisc()) {
928 subid = &((*iter)->GetSeq_id(1));
929 if(is_first_aln || (!is_first_aln && !subid->
Match(*previous_id))){
937 is_first_aln =
false;
950 bool is_first_aln =
true;
951 unsigned int num_align = 0;
952 bool finishCurrent =
false;
954 if ((*iter)->GetSegs().IsDisc()) {
957 subid = &((*iter)->GetSeq_id(1));
958 if(is_first_aln || (!is_first_aln && !subid->
Match(*previous_id))){
959 finishCurrent = (num_align + 1 ==
number) ?
true :
false;
962 is_first_aln =
false;
965 if(num_align >
number && !finishCurrent) {
968 new_aln.
Set().push_back(*iter);
975 int& num_gaps,
int& num_gap_opens)
977 num_gaps = num_gap_opens = align_length = 0;
982 for (
int i=0;
i<chunk_vec->size();
i++) {
984 int chunk_length = chunk->GetAlnRange().GetLength();
987 if (chunk->IsGap()) {
989 num_gaps += chunk_length;
993 align_length += chunk_length;
1004 for(CSeq_align_set::Tdata::const_iterator iter =
source.Get().begin();
1005 iter !=
source.Get().end(); iter++) {
1006 if((*iter)->IsSetSegs()){
1010 for(CSeq_align_set::Tdata::const_iterator iter2 =
1013 target.
Set().push_back(*iter2);
1016 target.
Set().push_back(*iter);
1051 if((*iter)->IsSetDim()){
1052 ds.
SetDim((*iter)->GetDim());
1054 if((*iter)->IsSetIds()){
1055 ds.
SetIds() = (*iter)->GetIds();
1059 if((*iter)->IsSetStarts()){
1064 if((*iter)->IsSetLen()){
1065 ds.
SetLens().push_back((*iter)->GetLen());
1067 if((*iter)->IsSetStrands()){
1072 if((*iter)->IsSetScores()){
1095 if(bdl_id && bdl_id->
Match(
id) &&
1096 (*iter_bdl)->IsSetTaxid() && (*iter_bdl)->CanGetTaxid()){
1097 taxid = (*iter_bdl)->GetTaxid();
1112 frame = (start % 3) + 1;
1139 int score1, sum_n1, num_ident1;
1140 double bits1, evalue1;
1141 list<TGi> use_this_gi1;
1143 int score2, sum_n2, num_ident2;
1144 double bits2, evalue2;
1145 list<TGi> use_this_gi2;
1148 GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1149 GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1153 bool retval =
false;
1156 if(length1 > 0 && length2 > 0 && num_ident1 > 0 &&num_ident2 > 0 ) {
1157 if (((
double)num_ident1)/length1 == ((
double)num_ident2)/length2) {
1159 retval = evalue1 < evalue2;
1162 retval = ((double)num_ident1)/length1 >= ((double)num_ident2)/length2;
1166 retval = evalue1 < evalue2;
1181 int score1, sum_n1, num_ident1;
1182 double bits1, evalue1;
1183 list<TGi> use_this_gi1;
1185 int score2, sum_n2, num_ident2;
1186 double bits2, evalue2;
1187 list<TGi> use_this_gi2;
1189 GetAlnScores(*(info1->
Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1190 GetAlnScores(*(info2->
Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1191 return bits1 > bits2;
1200 bool retval =
false;
1203 retval = cov1 > cov2;
1204 }
else if (cov1 == cov2) {
1205 int score1, sum_n1, num_ident1;
1206 double bits1, evalue1;
1207 list<TGi> use_this_gi1;
1209 int score2, sum_n2, num_ident2;
1210 double bits2, evalue2;
1211 list<TGi> use_this_gi2;
1212 GetAlnScores(*(info1->
Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1213 GetAlnScores(*(info2->
Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1214 retval = evalue1 < evalue2;
1223 int start1 = 0, start2 = 0;
1230 start1 =
min(info1->
Get().front()->GetSeqStart(0),
1231 info1->
Get().front()->GetSeqStop(0));
1232 start2 =
min(info2->
Get().front()->GetSeqStart(0),
1233 info2->
Get().front()->GetSeqStop(0));
1235 if (start1 == start2) {
1237 int score1, sum_n1, num_ident1;
1238 double bits1, evalue1;
1239 list<TGi> use_this_gi1;
1241 int score2, sum_n2, num_ident2;
1242 double bits2, evalue2;
1243 list<TGi> use_this_gi2;
1246 GetAlnScores(*(info1->
Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1247 GetAlnScores(*(info1->
Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1248 return evalue1 < evalue2;
1251 return start1 < start2;
1261 int score1, sum_n1, num_ident1;
1262 double bits1, evalue1;
1263 list<TGi> use_this_gi1;
1265 int score2, sum_n2, num_ident2;
1266 double bits2, evalue2;
1267 list<TGi> use_this_gi2;
1270 GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1271 GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1272 return bits1 > bits2;
1280 int start1 = 0, start2 = 0;
1285 if (start1 == start2) {
1287 int score1, sum_n1, num_ident1;
1288 double bits1, evalue1;
1289 list<TGi> use_this_gi1;
1291 int score2, sum_n2, num_ident2;
1292 double bits2, evalue2;
1293 list<TGi> use_this_gi2;
1296 GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1297 GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1298 return evalue1 < evalue2;
1302 return start1 < start2;
1310 int start1 = 0, start2 = 0;
1315 if (start1 == start2) {
1317 int score1, sum_n1, num_ident1;
1318 double bits1, evalue1;
1319 list<TGi> use_this_gi1;
1321 int score2, sum_n2, num_ident2;
1322 double bits2, evalue2;
1323 list<TGi> use_this_gi2;
1326 GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1327 GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1328 return evalue1 < evalue2;
1332 return start1 < start2;
1349 if (do_translation) {
1352 final_aln = denseg_aln;
1368 bool do_translation) {
1369 double identity = 0;
1380 if (do_translation) {
1383 final_aln = denseg_aln;
1397 alnvec.GetWholeAlnSeqString(0,
query);
1398 alnvec.GetWholeAlnSeqString(1,
subject);
1403 for (
int i = 0;
i < length; ++
i) {
1410 identity = ((double)num_ident)/length;
1419 double &percentIdent1,
1420 double &percentIdent2)
1449 double evalue1 = seqSetInfo1->evalue;
1450 double evalue2 = seqSetInfo2->evalue;
1451 double percentIdent1 = seqSetInfo1->percent_identity;
1452 double percentIdent2 = seqSetInfo2->percent_identity;
1454 bool retval =
false;
1455 if(percentIdent1 < 0 || percentIdent2 < 0) {
1458 if(percentIdent1 > 0 &&percentIdent2 > 0) {
1459 if (percentIdent1 == percentIdent2) {
1460 retval = evalue1 < evalue2;
1463 retval = percentIdent1 >= percentIdent2;
1466 retval = evalue1 < evalue2;
1474 int score1, score2, sum_n, num_ident;
1475 double bits, evalue;
1476 list<TGi> use_this_gi;
1477 double total_bits1 = 0, total_bits2 = 0;
1481 sum_n, num_ident, use_this_gi);
1482 total_bits1 += bits;
1487 sum_n, num_ident, use_this_gi);
1488 total_bits2 += bits;
1492 return total_bits1 >= total_bits2;
1496 #ifndef NCBI_COMPILER_WORKSHOP
1505 const string& mv_build_name)
1511 id1 = &(info1->
Get().front()->GetSeq_id(1));
1512 id2 = &(info2->
Get().front()->GetSeq_id(1));
1514 int linkout1 = 0, linkout2 = 0;
1533 const string& mv_build_name)
1537 #ifndef NCBI_COMPILER_WORKSHOP
1543 bool do_translation,
CScope& scope,
int
1545 const string& mv_build_name)
1550 if (sort_method == 1) {
1551 #ifndef NCBI_COMPILER_WORKSHOP
1555 }
else if (sort_method == 2) {
1557 }
else if (sort_method == 3) {
1569 const string& mv_build_name)
1573 int linkoutPrev = 0;
1576 const CSeq_id&
id = (*iter)->GetSeq_id(1);
1581 if(prevSubjectId.
Empty() || !
id.Match(*prevSubjectId)){
1582 prevSubjectId = &id;
1583 linkout = linkoutdb ? linkoutdb->
GetLinkout(
id, mv_build_name): 0;
1584 linkoutPrev = linkout;
1588 linkout = linkoutPrev;
1591 if (sort_method == 1) {
1592 target[1]->Set().push_back(*iter);
1593 }
else if (sort_method == 2){
1594 target[0]->Set().push_back(*iter);
1596 target[1]->Set().push_back(*iter);
1599 if (sort_method == 1) {
1600 target[0]->Set().push_back(*iter);
1601 }
else if (sort_method == 2) {
1602 target[1]->Set().push_back(*iter);
1604 target[0]->Set().push_back(*iter);
1608 target[0]->Set().push_back(*iter);
1612 target[0]->Set().push_back(*iter);
1624 const CSeq_id& cur_id = (*iter)->GetSeq_id(1);
1625 if(previous_id.
Empty()) {
1627 temp->
Set().push_back(*iter);
1628 target.push_back(temp);
1629 }
else if (cur_id.
Match(*previous_id)){
1630 temp->
Set().push_back(*iter);
1634 temp->
Set().push_back(*iter);
1635 target.push_back(temp);
1637 previous_id = &cur_id;
1652 align_set->
Set().push_back(*iter2);
1666 for(
size_t i = 0;
i < seqIdList.size();
i++) {
1672 const CSeq_id& cur_id = (*iter)->GetSeq_id(1);
1673 if(previous_id.
Empty() || !cur_id.
Match(*previous_id)) {
1674 if(
count >= seqIdList.size()) {
1678 if(hitsMap.
find(idString) != hitsMap.
end()) {
1680 temp->
Set().push_back(*iter);
1681 hitsMap[idString] = temp;
1688 else if (cur_id.
Match(*previous_id)){
1690 temp->
Set().push_back(*iter);
1693 previous_id = &cur_id;
1700 vector <string> seqIds;
1708 list< CRef<CSeq_align_set> > orderedSet;
1710 for(
size_t i = 0;
i < seqIds.size();
i++) {
1711 if(hitsMap.
find(seqIds[
i]) != hitsMap.
end()) {
1712 orderedSet.push_back(hitsMap[seqIds[
i]]);
1721 bool success =
false;
1734 if (!strTag.empty())
1736 vector<string> vecInfo;
1746 if (vecInfo.size() != 3)
1751 strRun = vecInfo[0];
1752 strSpotId = vecInfo[1];
1753 strReadIndex = vecInfo[2];
1763 string strRun, strSpotId,strReadIndex;
1770 link +=
"?run=" + strRun;
1771 link +=
"." + strSpotId;
1772 link +=
"." + strReadIndex;
1785 if(!id_general.
Empty() && id_general->
AsFastaString().find(
"gnl|BL_ORD_ID") != string::npos){
1790 if (id_general.
Empty()){
1792 if (id_other.
Empty()){
1793 bestid = id_accession;
1796 bestid = id_general;
1807 bool db_is_na,
string rid,
int query_number,
1808 bool for_alignment) {
1813 if(!id_general.
Empty()
1814 && id_general->
AsFastaString().find(
"gnl|BL_ORD_ID") != string::npos){
1822 bool nodb_path =
false;
1824 if (user_url.find(
"dumpgnl.cgi") ==string::npos){
1829 char *chptr, *dbtmp;
1831 char*
dbname =
new char[
sizeof(char)*length + 2];
1835 dbtmp =
new char[
sizeof(char)*length + 2];
1836 memset(dbtmp,
'\0',
sizeof(
char)*length + 2);
1837 for(
i = 0;
i < length;
i++) {
1845 while (!
isspace((
unsigned char)
dbname[
i]) && j < 256 &&
i < length) {
1853 if((chptr = strrchr(tmpbuff,
'/')) !=
NULL) {
1854 strcat(dbtmp, (
char*)(chptr+1));
1865 if (!bestID.empty()){
1866 strcpy(gnl, bestID.c_str());
1874 if (user_url.find(
"?") == string::npos){
1875 link += user_url +
"?" +
"db=" +
str +
"&na=" + (db_is_na?
"1" :
"0");
1877 if (user_url.find(
"=") != string::npos) {
1880 link += user_url +
"db=" +
str +
"&na=" + (db_is_na?
"1" :
"0");
1883 if (gnl[0] !=
'\0'){
1896 link +=
"&RID=" + rid;
1899 if (query_number > 0){
1903 if (user_url.find(
"dumpgnl.cgi") ==string::npos){
1905 link +=
"&log$=nuclalign";
1907 link +=
"&log$=nucltop";
1934 value_type(
"composition_based_statistics",
""));
1940 bool is_first =
true;
1943 string parameter = it->first;
1945 if (parameters_to_change.count(
NStr::ToLower(parameter)) > 0 ||
1955 it->first +
"=" + parameters_to_change[it->first];
1962 cgi_query += it->first +
"=" + it->second;
1972 string format_type =
ctx.GetRequestValue(
"FORMAT_TYPE").GetValue();
1973 string ridstr =
ctx.GetRequestValue(
"RID").GetValue();
1974 string align_view =
ctx.GetRequestValue(
"ALIGNMENT_VIEW").GetValue();
1976 cgi_query +=
"RID=" + ridstr;
1977 cgi_query +=
"&FORMAT_TYPE=" + format_type;
1978 cgi_query +=
"&ALIGNMENT_VIEW=" + align_view;
1980 cgi_query +=
"&QUERY_NUMBER=" +
ctx.GetRequestValue(
"QUERY_NUMBER").GetValue();
1981 cgi_query +=
"&FORMAT_OBJECT=" +
ctx.GetRequestValue(
"FORMAT_OBJECT").GetValue();
1982 cgi_query +=
"&RUN_PSIBLAST=" +
ctx.GetRequestValue(
"RUN_PSIBLAST").GetValue();
1983 cgi_query +=
"&I_THRESH=" +
ctx.GetRequestValue(
"I_THRESH").GetValue();
1985 cgi_query +=
"&DESCRIPTIONS=" +
ctx.GetRequestValue(
"DESCRIPTIONS").GetValue();
1987 cgi_query +=
"&ALIGNMENTS=" +
ctx.GetRequestValue(
"ALIGNMENTS").GetValue();
1989 cgi_query +=
"&NUM_OVERVIEW=" +
ctx.GetRequestValue(
"NUM_OVERVIEW").GetValue();
1991 cgi_query +=
"&NCBI_GI=" +
ctx.GetRequestValue(
"NCBI_GI").GetValue();
1993 cgi_query +=
"&SHOW_OVERVIEW=" +
ctx.GetRequestValue(
"SHOW_OVERVIEW").GetValue();
1995 cgi_query +=
"&SHOW_LINKOUT=" +
ctx.GetRequestValue(
"SHOW_LINKOUT").GetValue();
1997 cgi_query +=
"&GET_SEQUENCE=" +
ctx.GetRequestValue(
"GET_SEQUENCE").GetValue();
1999 cgi_query +=
"&MASK_CHAR=" +
ctx.GetRequestValue(
"MASK_CHAR").GetValue();
2000 cgi_query +=
"&MASK_COLOR=" +
ctx.GetRequestValue(
"MASK_COLOR").GetValue();
2002 cgi_query +=
"&SHOW_CDS_FEATURE=" +
ctx.GetRequestValue(
"SHOW_CDS_FEATURE").GetValue();
2005 cgi_query +=
"&FORMAT_EQ_TEXT=" +
2007 GetRequestValue(
"FORMAT_EQ_TEXT").
2012 cgi_query +=
"&FORMAT_EQ_OP=" +
2014 GetRequestValue(
"FORMAT_EQ_OP").
2019 cgi_query +=
"&FORMAT_EQ_MENU=" +
2021 GetRequestValue(
"FORMAT_EQ_MENU").
2025 cgi_query +=
"&EXPECT_LOW=" +
ctx.GetRequestValue(
"EXPECT_LOW").GetValue();
2026 cgi_query +=
"&EXPECT_HIGH=" +
ctx.GetRequestValue(
"EXPECT_HIGH").GetValue();
2028 cgi_query +=
"&BL2SEQ_LINK=" +
ctx.GetRequestValue(
"BL2SEQ_LINK").GetValue();
2035 const string& mv_build_name)
2037 bool is_mixed =
false;
2038 bool is_first =
true;
2039 int prev_database = 0;
2043 const CSeq_id&
id = (*iter)->GetSeq_id(1);
2044 int linkout = linkoutdb
2048 if (!is_first && cur_database != prev_database) {
2052 prev_database = cur_database;
2063 bool formatAsMixedDbs =
false;
2064 string mixedDbs =
ctx.GetRequestValue(
"MIXED_DATABASE").GetValue();
2065 if(!mixedDbs.empty()) {
2067 formatAsMixedDbs = (mixedDbs ==
"on" || mixedDbs ==
"true" || mixedDbs ==
"yes") ?
true :
false;
2069 return formatAsMixedDbs;
2079 string lnk_tl_info =
"",
2080 string lnk_title =
"")
2082 const string kLinkTitle=
" title=\"View <@lnk_tl_info@> for <@label@>\" ";
2083 const string kLinkTarget=
"target=\"lnk" + rid +
"\"";
2084 string lnkTitle = (lnk_title.empty()) ? kLinkTitle : lnk_title;
2106 bool textLink =
true)
2109 list<string> linkout_list;
2110 string url_link,lnk_displ,lnk_title,lnkTitleInfo;
2112 vector<string> accs;
2114 string firstAcc = (accs.size() > 0)? accs[0] : labelList;
2123 lnkTitleInfo =
"UniGene cluster";
2124 string uid = !linkoutInfo.
is_na ?
"[Protein Accession]" :
"[Nucleotide Accession]";
2132 linkout_list.push_back(url_link);
2139 url_link = struct_link.empty() ?
kStructureUrl : struct_link;
2143 linkTitle =
" title=\"View 3D structure <@label@>\"";
2147 lnk_displ = textLink ?
"AlphaFold Structure" :
kStructureImg;
2148 linkTitle =
" title=\"View AlphaFold 3D structure <@label@>\"";
2153 string molID,chainID;
2162 linkout_list.push_back(url_link);
2164 if (linkout &
eGeo){
2166 lnk_displ = textLink ?
"GEO Profiles" :
kGeoImg;
2168 lnkTitleInfo =
"Expression profiles";
2177 linkout_list.push_back(url_link);
2179 if(linkout &
eGene){
2183 lnkTitleInfo =
"gene information";
2191 string uid = !linkoutInfo.
is_na ?
"[Protein Accession]" :
"[Nucleotide Accession]";
2200 linkout_list.push_back(url_link);
2206 lnk_displ =
"Map Viewer";
2208 lnkTitleInfo =
"BLAST hits on the " + linkoutInfo.
taxName +
" genome";
2229 linkout_list.push_back(url_link);
2236 string linkTitle =
" title=\"View <@label@> aligned to the " + linkoutInfo.
taxName +
" genome\"";
2243 linkout_list.push_back(url_link);
2250 string linkTitle =
" title=\"View Bioassays involving <@label@>\"";
2259 linkout_list.push_back(url_link);
2265 lnkTitleInfo =
"Bioassay data";
2266 string linkTitle =
" title=\"View Bioassays involving <@label@>\"";
2275 linkout_list.push_back(url_link);
2281 lnkTitleInfo =
"genomic information";
2283 string uid = !linkoutInfo.
is_na ?
"Protein Accession" :
"Nucleotide Accession";
2291 linkout_list.push_back(url_link);
2297 urlTag =
"GENOME_DATA_VIEWER_TRANSCR";
2298 lnkTitleInfo =
"title=\"View the annotation of the transcript <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as the protein annotation and browse to other regions.\"";
2301 urlTag = linkoutInfo.
is_na ?
"GENOME_DATA_VIEWER_NUC" :
"GENOME_DATA_VIEWER_PROT";
2302 lnkTitleInfo = linkoutInfo.
is_na ?
2303 "title=\"View BLAST hits for <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as hits and browse to other regions.\""
2305 "title=\"View the annotation of the protein <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as the protein annotation and browse to other regions.\"";
2313 seqFrom = (seqFrom == 0) ? seqFrom : seqFrom - 1;
2316 seqTo = (seqTo == 0) ? seqTo : seqTo - 1;
2325 linkout_list.push_back(url_link);
2327 return linkout_list;
2333 const string& cdd_rid,
2334 const string& entrez_term,
2337 bool structure_linkout_as_group,
2338 bool for_alignment,
int cur_align,
2339 string preComputedResID)
2342 list<string> linkout_list;
2348 first_gi = (first_gi ==
ZERO_GI) ? gi : first_gi;
2353 linkoutInfo.
Init(rid,
2362 structure_linkout_as_group,
2375 return linkout_list;
2381 if(linkLetter ==
"U") {
2384 else if(linkLetter ==
"S") {
2387 else if(linkLetter ==
"E") {
2390 else if(linkLetter ==
"G") {
2393 else if(linkLetter ==
"M") {
2396 else if(linkLetter ==
"N") {
2399 else if(linkLetter ==
"B") {
2402 else if(linkLetter ==
"R") {
2405 else if(linkLetter ==
"V") {
2408 else if(linkLetter ==
"T") {
2418 if(linkout_map.count(linkout) > 0){
2419 linkout_map[linkout].push_back(cur_id);
2422 vector <CBioseq::TId > idList;
2423 idList.push_back(cur_id);
2430 const string& mv_build_name,
2441 linkout = (*linkoutdb)->GetLinkout(gi, mv_build_name);
2445 linkout = (*linkoutdb)->GetLinkout(*seqID, mv_build_name);
2448 int linkoutWithoutVersion = (*linkoutdb)->GetLinkout(*seqIDNew, mv_build_name);
2449 if(linkoutWithoutVersion && (linkoutWithoutVersion |
eStructure)) {
2450 linkout = linkout | linkoutWithoutVersion;
2456 cerr <<
"[BLAST FORMATTER EXCEPTION] Problem with linkoutdb: " << e.
GetMsg() << endl;
2465 map<
int, vector <CBioseq::TId > > &linkout_map,
2467 const string& mv_build_name)
2469 if(!linkoutdb)
return;
2475 if(linkout &
eGene){
2481 if (linkout &
eGeo){
2512 map<
int, vector <CBioseq::TId > > &linkout_map,
2514 const string& mv_build_name)
2516 const int kMaxDeflineNum = 10;
2519 iter != bdl.end(); iter++){
2527 if(num > kMaxDeflineNum)
break;
2538 taxName =
info.common_name;
2552 list<string> &linkout_list)
2562 string lnk_displ =
"Identical Proteins";
2566 linkout_list.push_back(url_link);
2575 map<
int, vector < CBioseq::TId > > &linkout_map,
2576 bool getIdentProteins)
2579 list<string> linkout_list;
2581 vector<string> linkLetters;
2583 for(
size_t i = 0;
i < linkLetters.size();
i++) {
2585 vector < CBioseq::TId > idList;
2592 if(linkout_map.
find(linkout) != linkout_map.
end()) {
2593 idList = linkout_map[linkout];
2595 bool disableLink = (linkout == 0 || idList.size() == 0 || ( (linkout &
eStructure) && (linkoutInfo.
cdd_rid ==
"" || linkoutInfo.
cdd_rid ==
"0")));
2597 string giList,labelList;
2599 for (
size_t i = 0;
i < idList.size();
i++) {
2602 if (first_gi ==
ZERO_GI) first_gi = gi;
2607 if(!labelList.empty()) labelList +=
",";
2611 if(!giList.empty() && (linkout &
eBioAssay) && !linkoutInfo.
is_na)
continue;
2612 if(!giList.empty()) giList +=
",";
2616 linkoutInfo.
gnl.clear();
2628 if(one_linkout.size() > 0) {
2629 list<string>::iterator iter = one_linkout.begin();
2630 linkout_list.push_back(*iter);
2634 if(getIdentProteins) {
2637 return linkout_list;
2643 list<string> linkout_list;
2645 if(bdl.size() > 0) {
2647 list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2652 !linkoutInfo.
is_na && bdl.size() > 1);
2654 return linkout_list;
2660 const string& cdd_rid,
2661 const string& entrez_term,
2663 bool structure_linkout_as_group,
2666 string& linkoutOrder,
2671 string &preComputedResID,
2673 const string& mv_build_name)
2676 list<string> linkout_list;
2678 if(bdl.size() > 0) {
2680 list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2683 SLinkoutInfo linkoutInfo;
2684 linkoutInfo.
Init(rid,
2693 structure_linkout_as_group,
2696 linkoutInfo.cur_align = cur_align;
2697 linkoutInfo.taxid = taxid;
2702 !is_na && bdl.size() > 1);
2704 return linkout_list;
2710 bool getIdentProteins)
2712 list<string> linkout_list;
2720 return linkout_list;
2725 const string& cdd_rid,
2726 const string& entrez_term,
2728 bool structure_linkout_as_group,
2731 string& linkoutOrder,
2736 string &preComputedResID,
2738 const string& mv_build_name,
2739 bool getIdentProteins)
2742 list<string> linkout_list;
2746 SLinkoutInfo linkoutInfo;
2747 linkoutInfo.
Init(rid,
2756 structure_linkout_as_group,
2759 linkoutInfo.cur_align = cur_align;
2760 linkoutInfo.taxid = taxid;
2766 return linkout_list;
2782 bool oppositeStrands =
false;
2783 bool isFirst =
false;
2790 query_list.push_back(query_range);
2797 subject_list.push_back(subject_range);
2799 oppositeStrands = (!isFirst) ? (*iter)->GetSeqStrand(0) != (*iter)->GetSeqStrand(1) : oppositeStrands;
2805 return oppositeStrands;
2814 list<CRange<TSeqPos> > merge_list;
2816 bool is_first =
true;
2821 merge_list.push_back(*iter);
2826 merge_list.pop_back();
2828 merge_list.push_back(temp_range);
2829 prev_range = temp_range;
2831 merge_list.push_back(*iter);
2843 list<CRange<TSeqPos> > merge_list;
2845 list<CRange<TSeqPos> > temp;
2852 temp.push_back(seq_range);
2859 int master_covered_lenghth = 0;
2861 master_covered_lenghth += iter->GetLength();
2863 return master_covered_lenghth;
2872 list<CRange<TSeqPos> > query_list;
2873 list<CRange<TSeqPos> > subject_list;
2880 *master_covered_lenghth = 0;
2882 *master_covered_lenghth += iter->GetLength();
2887 from = (from == 0) ? iter->GetFrom() :
min(from,iter->GetFrom());
2888 to =
max(to,iter->GetTo());
2892 return subjectRange;
2899 bool nuc_to_nuc_translation,
2904 const string& mv_build_name) {
2907 if (db_sort == 0 && hit_sort < 1 && hsp_sort < 1)
2910 list< CRef<CSeq_align_set> > seqalign_hit_total_list;
2911 vector< CRef<CSeq_align_set> > seqalign_vec(2);
2917 linkoutdb, mv_build_name);
2925 nuc_to_nuc_translation,
2929 seqalign_hit_total_list.splice(seqalign_hit_total_list.end(),one_seqalign_hit_total_list);
2935 list< CRef<CSeq_align_set> >
2937 bool nuc_to_nuc_translation,
2941 list< CRef<CSeq_align_set> > seqalign_hit_total_list;
2942 list< CRef<CSeq_align_set> > seqalign_hit_list;
2951 nuc_to_nuc_translation);
2962 }
else if (hsp_sort ==
eScore) {
2968 seqalign_hit_total_list.push_back(temp);
2970 return seqalign_hit_total_list;
2975 bool nuc_to_nuc_translation,
2985 nuc_to_nuc_translation,
2996 int score, sum_n, num_ident;
2997 double bits, evalue;
2998 list<TGi> use_this_gi;
3004 sum_n, num_ident, use_this_gi);
3009 if(evalue >= evalueLow && evalue <= evalueHigh) {
3010 new_aln->
Set().push_back(*iter);
3025 if (numerator == denominator)
3028 int retval =(
int) (0.5 + 100.0*((
double)numerator)/((double)denominator));
3029 retval =
min(99, retval);
3036 if (numerator == denominator)
3039 double retval =100*(double)numerator/(
double)denominator;
3045 double percentIdentLow,
3046 double percentIdentHigh)
3048 int score, sum_n, num_ident;
3049 double bits, evalue;
3050 list<TGi> use_this_gi;
3056 sum_n, num_ident, use_this_gi);
3058 if(seqAlnLength > 0 && num_ident > 0) {
3060 if(alnPercentIdent >= percentIdentLow && alnPercentIdent <= percentIdentHigh) {
3061 new_aln->
Set().push_back(*iter);
3071 double percentIdentLow,
3072 double percentIdentHigh)
3074 int score, sum_n, num_ident;
3075 double bits, evalue;
3076 list<TGi> use_this_gi;
3082 sum_n, num_ident, use_this_gi);
3088 if(seqAlnLength > 0 && num_ident > 0) {
3090 if( (evalue >= evalueLow && evalue <= evalueHigh) &&
3091 (alnPercentIdent >= percentIdentLow && alnPercentIdent <= percentIdentHigh)) {
3092 new_aln->Set().push_back(*iter);
3108 double percentIdent,
3112 double percentIdentLow,
3113 double percentIdentHigh,
3119 bool isInRange =
false;
3122 string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3127 if(evalueLow >= 0 && percentIdentLow >= 0 && queryCoverLow >= 0) {
3128 isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3129 (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh) &&
3130 (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3132 else if(evalueLow >= 0 && percentIdentLow >= 0) {
3133 isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3134 (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3136 else if(evalueLow >= 0 && queryCoverLow >= 0) {
3137 isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3138 (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3140 else if(queryCoverLow >= 0 && percentIdentLow >= 0) {
3141 isInRange = (queryCover >= queryCoverLow && queryCover <= queryCoverHigh) &&
3142 (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3144 else if(evalueLow >= 0) {
3145 isInRange = (evalue >= evalueLow && evalue <= evalueHigh);
3147 else if(percentIdentLow >= 0) {
3148 isInRange = (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3150 else if(queryCoverLow >= 0) {
3151 isInRange = (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3159 double percentIdentLow,
3160 double percentIdentHigh,
3164 list< CRef<CSeq_align_set> > seqalign_hit_total_list;
3165 list< CRef<CSeq_align_set> > seqalign_hit_list;
3182 seqalign_hit_total_list.push_back(temp);
3195 int alignCount = 0,hspCount = 0;
3197 const CSeq_id& newQueryId = (*iter)->GetSeq_id(0);
3198 if(prevQueryId.
Empty() || !newQueryId.
Match(*prevQueryId)){
3199 if (hspCount >= maxHsps) {
3203 prevQueryId = &newQueryId;
3205 if (alignCount < maxAligns) {
3206 const CSeq_id& newSubjectId = (*iter)->GetSeq_id(1);
3208 if(prevSubjectId.
Empty() || !newSubjectId.
Match(*prevSubjectId)){
3210 prevSubjectId = &newSubjectId;
3214 new_aln->
Set().push_back(*iter);
3225 if(queryNumber == 0) {
3231 int currQueryNum = 0;
3234 const CSeq_id& newQueryId = (*iter)->GetSeq_id(0);
3235 if(prevQueryId.
Empty() || !newQueryId.
Match(*prevQueryId)){
3237 prevQueryId = &newQueryId;
3240 if(currQueryNum == queryNumber) {
3241 if(new_aln.
Empty()) {
3244 new_aln->
Set().push_back(*iter);
3246 else if(currQueryNum > queryNumber) {
3256 string l_cfg_file_name;
3260 bool cfgExists =
true;
3262 string l_fmtcfg_env;
3263 if(
NULL != getenv(
"NCBI") ) l_ncbi_env = getenv(
"NCBI");
3264 if(
NULL != getenv(
"FMTCFG") ) l_fmtcfg_env = getenv(
"FMTCFG");
3266 if( l_fmtcfg_env.empty() )
3267 l_cfg_file_name =
".ncbirc";
3269 l_cfg_file_name = l_fmtcfg_env;
3271 CFile l_fchecker( l_cfg_file_name );
3272 cfgExists = l_fchecker.
Exists();
3273 if( (!cfgExists) && (!l_ncbi_env.empty()) ) {
3274 if( l_ncbi_env.rfind(
"/") != (l_ncbi_env.length() -1 ))
3275 l_ncbi_env.append(
"/");
3276 l_cfg_file_name = l_ncbi_env + l_cfg_file_name;
3277 CFile l_fchecker2( l_cfg_file_name );
3278 cfgExists = l_fchecker2.
Exists();
3283 if( l_dbg ) fprintf(stderr,
"REGISTRY: %s\n",l_cfg_file_name.c_str());
3305 string l_key, l_host_port, l_format;
3306 string l_secion_name =
"BLASTFMTUTIL";
3307 string l_fmt_suffix =
"_FORMAT";
3308 string l_host_port_suffix =
"_HOST_PORT";
3309 string l_subst_pattern;
3315 string l_base_dir =
m_Reg->Get(l_secion_name,
"INCLUDE_BASE_DIR");
3316 if( !l_base_dir.empty() && ( l_base_dir.rfind(
"/") != (l_base_dir.length()-1)) ) {
3317 l_base_dir.append(
"/");
3321 string default_host_port;
3325 l_subst_pattern=
"<@"+l_key_ndx+
"@>";
3326 l_host_port =
m_Reg->Get(l_secion_name, l_key_ndx);
3329 if( l_host_port.empty()){
3330 l_key = url_name + l_host_port_suffix; l_subst_pattern=
"<@"+l_key+
"@>";
3331 l_host_port =
m_Reg->Get(l_secion_name, l_key);
3333 if( l_host_port.empty())
return GetURLDefault(url_name,index);
3336 l_key = url_name + l_fmt_suffix ;
3339 l_format =
m_Reg->Get(l_secion_name, l_key_ndx);
3342 if( l_format.empty() ) l_format =
m_Reg->Get(l_secion_name, l_key);
3345 string l_format_file = l_base_dir + l_format;
3346 CFile l_fchecker( l_format_file );
3347 bool file_name_mode = l_fchecker.
Exists();
3348 if( file_name_mode ) {
3349 string l_inc_file_name = l_format_file;
3353 char *l_mem =
new char [ (size_t) l_inc_size + 1];
3354 memset( l_mem,0, (
size_t) l_inc_size + 1 ) ;
3355 l_file.seekg( 0, ios::beg );
3356 l_file.read(l_mem, l_inc_size);
3358 l_format.erase(); l_format.reserve( (
size_t)l_inc_size + 1 );
3363 result_url =
NStr::Replace(l_format,l_subst_pattern,l_host_port);
3365 if( result_url.empty())
return GetURLDefault(url_name,index);
3373 string search_name = url_name;
3377 if( (url_it = sm_TagUrlMap.find( search_name ) ) != sm_TagUrlMap.end()) {
3382 string error_msg =
"CAlignFormatUtil::GetURLDefault:no_defualt_for"+url_name;
3392 if (matrix_name ==
NULL ||
3399 if (packed_mtx ==
NULL) {
3416 retval(
'*',
'*') = 1;
3418 retval(
'U',
'U') = retval(
'C',
'C');
3419 retval(
'U',
'C') = retval(
'C',
'C');
3420 retval(
'C',
'U') = retval(
'C',
'C');
3427 string tmplParam =
"<@" + tmplParamName +
"@>";
3435 string tmplParam =
"<@" + tmplParamName +
"@>";
3442 templParamVal =
AddSpaces(templParamVal, maxParamValLength, spacesFormatFlag);
3443 string outString =
MapTemplate(inpString,tmplParamName,templParamVal);
3453 if(maxParamValLength >= paramVal.size()) {
3454 size_t numSpaces = maxParamValLength - paramVal.size() + 1;
3456 numSpaces = numSpaces/2;
3458 spaceString.assign(numSpaces,
' ');
3461 paramVal = paramVal.substr(0, maxParamValLength - 3) +
"...";
3465 paramVal = paramVal + spaceString;
3468 paramVal = spaceString + paramVal + spaceString;
3471 paramVal = spaceString + paramVal;
3486 string httpProt =
"https:";
3487 if(!config_reg.
Empty()) {
3488 if(config_reg.
HasEntry(
"BLASTFMTUTIL",
"PROTOCOL")) {
3489 httpProt = config_reg.
Get(
"BLASTFMTUTIL",
"PROTOCOL");
3514 string db,logstr_moltype;
3517 logstr_moltype =
"nucl";
3520 logstr_moltype =
"prot";
3522 string logstr_location = (seqUrlInfo->
isAlignLink) ?
"align" :
"top";
3536 string url_link = urlTemplate;
3537 if (seqUrlInfo->
user_url.find(
"sra.cgi") != string::npos) {
3538 string strRun, strSpotId,strReadIndex;
3555 const unsigned int kWgsProjLength = 4;
3556 const unsigned int kWgsProjIDLengthMin = 8;
3557 const unsigned int kWgsProjIDLengthMax = 10;
3560 if (wgsAccession.size() < 6) {
3569 string wgsProj = wgsAccession.substr(0,kWgsProjLength);
3570 for (
size_t i = 0;
i < wgsProj.length();
i ++){
3577 string wgsId = wgsAccession.substr(kWgsProjLength);
3578 if(wgsId.length() >= kWgsProjIDLengthMin && wgsId.length() <= kWgsProjIDLengthMax) {
3579 for (
size_t i = 0;
i < wgsId.length();
i ++){
3596 const unsigned int kWgsProgNameLength = 6;
3599 wgsProjName = wgsAccession.substr(0,kWgsProgNameLength);
3611 string title =
"title=\"Show report for " + seqUrlInfo->accession +
"\" ";
3613 string temp_class_info =
kClassInfo; temp_class_info +=
" ";
3615 string wgsAccession = seqUrlInfo->accession;
3620 if(isWGS && seqUrlInfo->useTemplates) {
3626 else if (hasTextSeqID) {
3627 string entrezTag = (seqUrlInfo->useTemplates) ?
"ENTREZ_TM" :
"ENTREZ";
3631 if(!seqUrlInfo->useTemplates) {
3644 if(seqUrlInfo->useTemplates) {
3646 url_link = l_TraceUrl + (
string)
"?cmd=retrieve&dopt=fasta&val=" + actual_id +
"&RID=" + seqUrlInfo->rid;
3650 temp_class_info = (!seqUrlInfo->defline.empty())?
CAlignFormatUtil::MapTemplate(temp_class_info,
"defline",seqUrlInfo->defline):temp_class_info;
3661 user_url = (seqUrlInfo->addCssInfo) ?
m_Reg->Get(
"LOCAL_ID",
"TOOL_URL_ALIGN") :
m_Reg->Get(
"LOCAL_ID",
"TOOL_URL");
3668 temp_class_info = (!seqUrlInfo->defline.empty())?
CAlignFormatUtil::MapTemplate(temp_class_info,
"defline",seqUrlInfo->defline):temp_class_info;
3675 seqUrlInfo->seqUrl = url_link;
3693 string title =
"title=\"Show report for " + seqUrlInfo->accession +
"\" ";
3696 !((seqUrlInfo->user_url.find(
"dumpgnl.cgi") != string::npos && seqUrlInfo->gi >
ZERO_GI) ||
3697 (seqUrlInfo->user_url.find(
"maps.cgi") != string::npos))) {
3699 string url_with_parameters,toolURLParams;
3700 if(
m_Reg && !seqUrlInfo->blastType.empty() && seqUrlInfo->blastType !=
"newblast") {
3701 toolURLParams =
m_Reg->Get(seqUrlInfo->blastType,
"TOOL_URL_PARAMS");
3703 if(!toolURLParams.empty()) {
3704 string urlLinkTemplate = seqUrlInfo->user_url + toolURLParams;
3705 url_with_parameters =
s_MapURLLink(urlLinkTemplate, seqUrlInfo, *ids);
3708 if (seqUrlInfo->user_url.find(
"sra.cgi") != string::npos) {
3713 seqUrlInfo->database,
3714 seqUrlInfo->isDbNa, seqUrlInfo->rid,
3715 seqUrlInfo->queryNumber,
3716 seqUrlInfo->isAlignLink);
3720 if (!seqUrlInfo->useTemplates) {
3722 if(seqUrlInfo->addCssInfo) {
3725 url_link +=
"<a " + title + deflineInfo +
"href=\"";
3727 url_link += url_with_parameters;
3728 if (!seqUrlInfo->useTemplates) url_link +=
"\">";
3735 seqUrlInfo->seqUrl = url_link;
3750 if ((seqUrlInfo->advancedView || seqUrlInfo->blastType ==
"mapview" || seqUrlInfo->blastType ==
"mapview_prev") ||
3751 seqUrlInfo->blastType ==
"gsfasta" || seqUrlInfo->blastType ==
"gsfasta_prev") {
3755 string url_link =
GetIDUrl(seqUrlInfo,ids);
3763 string linkURL =
GetIDUrl(seqUrlInfo,ids);
3764 if(!linkURL.empty()) {
3778 static string s_MapCustomLink(
string linkUrl,
string reportType,
string accession,
string linkText,
string linktrg,
string linkTitle =
kCustomLinkTitle,
string linkCls =
"")
3796 list<string> customLinksList;
3801 linkUrl = seqUrlInfo->
seqUrl;
3805 string linkText = (seqUrlInfo->
isDbNa) ?
"GenBank" :
"GenPept";
3807 linkUrl +=
"&from=<@fromHSP@>&to=<@toHSP@>";
3808 linkTiltle =
"Aligned region spanning positions <@fromHSP@> to <@toHSP@> on <@seqid@>";
3811 customLinksList.push_back(link);
3813 return customLinksList;
3820 string dbtype = (seqUrlInfo->
isDbNa) ?
"nuccore" :
"protein";
3825 string seqViewerParams;
3827 seqViewerParams =
m_Reg->Get(seqUrlInfo->
blastType,
"SEQVIEW_PARAMS");
3829 seqViewerParams = seqViewerParams.empty() ?
kSeqViewerParams : seqViewerParams;
3834 string linkTitle =
"Show alignment to <@seqid@> in <@custom_report_type@>";
3840 link_loc =
"fromSubj";
3844 link_loc =
"fromHSP";
3845 linkTitle +=
" for <@fromHSP@> to <@toHSP@> range";
3849 string title = (seqUrlInfo->
isDbNa) ?
"Nucleotide Graphics" :
"Protein Graphics";
3859 list<string> customLinksList =
GetGiLinksList(seqUrlInfo,hspRange);
3861 if(!graphicLink.empty()) {
3862 customLinksList.push_back(graphicLink);
3864 return customLinksList;
3869 int customLinkTypes = customLinkTypesInp;
3877 else if(seqUrlInfo->
blastType ==
"sra") {
3880 else if(seqUrlInfo->
blastType ==
"snp") {
3883 else if(seqUrlInfo->
blastType ==
"gsfasta") {
3886 return customLinkTypes;
3894 objects::CScope &scope,
3895 int customLinkTypes)
3897 list<string> customLinksList;
3898 string linkUrl,link;
3904 linkUrl = seqUrlInfo->
seqUrl;
3906 customLinksList.push_back(link);
3910 customLinksList.push_back(link);
3914 customLinksList.push_back(link);
3918 customLinksList.push_back(link);
3921 linkUrl = seqUrlInfo->
seqUrl;
3923 customLinksList.push_back(link);
3926 linkUrl = seqUrlInfo->
seqUrl;
3928 customLinksList.push_back(link);
3933 linkUrl = seqUrlInfo->
resourcesUrl + rs +
"?report=FLT";
3937 customLinksList.push_back(link);
3941 customLinksList.push_back(link);
3945 customLinksList.push_back(link);
3948 linkUrl = seqUrlInfo->
seqUrl;
3950 customLinksList.push_back(link);
3952 return customLinksList;
3958 objects::CScope &scope)
3962 string linkUrl,link;
3973 if(!linkUrl.empty()) {
3974 linkUrl +=
"&segs="+ seqUrlInfo->
segs;
3984 objects::CScope &scope)
3992 linkUrl = seqUrlInfo->
seqUrl;
3996 linkUrl = seqUrlInfo->
seqUrl;
3997 vector<string> parts;
4001 if(parts.size() > 1) {
4004 linkUrl = seqUrlInfo->
resourcesUrl + rs +
"?report=fasta";
4025 const CDbtag& dtg = subject_id.GetGeneral();
4026 const string& dbName = dtg.
GetDb();
4044 list<TGi> use_this_gi;
4046 use_this_gi.clear();
4049 num_ident, use_this_gi);
4052 seqSetInfo->sum_n = sum_n == -1 ? 1:sum_n ;
4054 seqSetInfo->use_this_gi = use_this_gi;
4055 seqSetInfo->bit_score = bits;
4056 seqSetInfo->raw_score = score;
4057 seqSetInfo->evalue = evalue;
4058 seqSetInfo->match = num_ident;
4061 seqSetInfo->flip =
false;
4063 return seqSetInfo.release();
4078 if(aln.
Get().empty())
4083 double total_bits = 0;
4084 double highest_bits = 0;
4085 double lowest_evalue = 0;
4086 int highest_length = 1;
4087 int highest_ident = 0;
4089 double totalLen = 0;
4091 list<TGi> use_this_gi;
4098 totalLen += align_length;
4101 num_ident, use_this_gi);
4102 use_this_gi.clear();
4112 if (bits > highest_bits) {
4113 highest_length = align_length;
4114 highest_ident = num_ident;
4118 if (bits > highest_bits) {
4119 highest_bits = bits;
4120 lowest_evalue = evalue;
4123 seqSetInfo->
match = highest_ident;
4129 seqSetInfo->
evalue = lowest_evalue;
4130 seqSetInfo->
hspNum =
static_cast<int>(aln.
Size());
4144 if(aln.
Get().empty())
4147 double highest_bits = 0;
4148 int highest_length = 1;
4149 int highest_ident = 0;
4151 list<TGi> use_this_gi;
4157 num_ident, use_this_gi);
4166 if (bits > highest_bits) {
4167 highest_length = align_length;
4168 highest_ident = num_ident;
4170 highest_bits = bits;
4175 return percent_identity;
4179 template<
class container>
bool
4183 double& totalBitScore,
4184 int& percentCoverage,
4185 double& percentIdent,
4190 list<TGi>& use_this_gi)
4192 const string k_GiPrefix =
"gi:";
4193 bool hasScore =
false;
4196 ITERATE (
typename container, iter, scoreList) {
4200 if (
id.GetStr()==
"seq_evalue") {
4201 evalue = (*iter)->GetValue().GetReal();
4202 }
else if (
id.GetStr()==
"seq_bit_score"){
4203 bitScore = (*iter)->GetValue().GetReal();
4204 }
else if (
id.GetStr()==
"seq_total_bit_score"){
4205 totalBitScore = (*iter)->GetValue().GetReal();
4206 }
else if (
id.GetStr()==
"seq_percent_coverage"){
4207 percentCoverage = (*iter)->GetValue().GetInt();
4208 }
else if (
id.GetStr()==
"seq_percent_identity" && (*iter)->GetValue().IsInt()){
4209 percentIdent = (*iter)->GetValue().GetInt();
4210 }
else if (
id.GetStr()==
"seq_percent_identity" && (*iter)->GetValue().IsReal()){
4211 percentIdent = (*iter)->GetValue().GetReal();
4212 }
else if (
id.GetStr()==
"seq_hspnum"){
4213 hspNum = (*iter)->GetValue().GetInt();
4214 }
else if (
id.GetStr()==
"seq_align_totlen"){
4215 totalLen = (*iter)->GetValue().GetReal();
4216 }
else if (
id.GetStr()==
"score"){
4217 rawScore = (*iter)->GetValue().GetInt();
4218 }
else if (
id.GetStr()==
"use_this_gi"){
4219 Uint4 gi_v = (
Uint4) ((*iter)->GetValue().GetInt());
4221 }
else if (
id.GetStr()==
"sum_n"){
4222 sum_n = (*iter)->GetValue().GetInt();
4226 TGi gi = NStr::StringToNumeric<TGi>(strGi);
4227 use_this_gi.push_back(gi);
4238 const string k_GiPrefix =
"gi:";
4245 for (CUser_object::TData::const_iterator fit = fields.begin(); fit != fields.end(); ++fit) {
4254 TGi gi = NStr::StringToNumeric<TGi>(strGi);
4255 use_this_gi.push_back(gi);
4273 for (CUser_object::TData::const_iterator fit = fields.begin(); fit != fields.end(); ++fit) {
4280 use_this_seq.push_back(*acc_iter);
4292 bool hasScore =
false;
4294 double bitScore = -1;
4295 double totalBitScore = -1;
4296 int percentCoverage = -1;
4297 double percentIdent = -1;
4299 double totalLen = 0;
4302 list<TGi> use_this_gi;
4303 list<string> use_this_seq;
4307 hasScore =
s_GetBlastScore(aln.
GetScore(),evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4313 evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4316 evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4319 evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4323 if(use_this_gi.size() == 0) {
4332 seqSetInfo->evalue = evalue;
4333 seqSetInfo->bit_score = bitScore;
4334 seqSetInfo->total_bit_score = totalBitScore;
4335 seqSetInfo->percent_coverage = percentCoverage;
4336 seqSetInfo->percent_identity = percentIdent;
4337 seqSetInfo->hspNum = hspNum;
4338 seqSetInfo->totalLen = (
Int8)totalLen;
4340 seqSetInfo->sum_n = sum_n == -1 ? 1:sum_n ;
4343 seqSetInfo->use_this_seq = use_this_seq;
4344 seqSetInfo->raw_score = rawScore;
4347 seqSetInfo->flip =
false;
4349 return seqSetInfo.release();
4354 list<TGi>& use_this_gi,
4365 list<TGi>& use_this_gi,
4384 iter != bdl.end(); iter++){
4388 if ((*iter)->IsSetTaxid() && (*iter)->CanGetTaxid()){
4389 taxid = (*iter)->GetTaxid();
4391 if (!use_this_gi.empty()) {
4392 ITERATE(list<TGi>, iter_gi, use_this_gi){
4393 if(cur_gi == *iter_gi){
4400 if ((*iter_id)->Match(aln_id)
4402 (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4422 const string k_GiPrefix =
"gi:";
4423 const string k_SeqIDPrefix =
"seqid:";
4447 ITERATE(list<string>, iter_seq, use_this_seq){
4456 list<TGi> use_this_gi;
4457 ITERATE(list<string>, iter_seq, use_this_seq){
4460 if(isGi) use_this_gi.push_back(NStr::StringToNumeric<TGi>(strGI));
4473 ITERATE(list<string>, iter_seq, use_this_seq){
4476 if((isGi && cur_gi == NStr::StringToNumeric<TGi>((useThisSeq))) || (!isGi && curSeqID == useThisSeq)){
4481 if(isGiList) *isGiList = isGi;
4490 if(alnSeqID->
IsGi()) {
4497 bool found = std::find(seqList.begin(), seqList.end(), curSeqID) != seqList.end();
4500 ITERATE(list<string>, iter_seq, use_this_seq){
4502 found = std::find(seqList.begin(), seqList.end(), useThisSeq) != seqList.end();
4513 bool has_match =
false;
4515 ITERATE(list<string>, iter_seq, use_this_seq) {
4518 if(useThisSeq == textSeqIDToMatch) {
4528 list<string> new_use_this_seq;
4529 bool hasAccType =
false;
4532 ITERATE(list<string>, iter_seq, use_this_seq) {
4535 if(useThisSeqAccType != accessionType) {
4536 new_use_this_seq.push_back(useThisSeq);
4542 use_this_seq = new_use_this_seq;
4548 list<string>& use_this_seq,
4564 if(gi) *gi =
FindGi(*ids);
4565 if(textSeqID) *textSeqID =
GetLabel(wid,
true);
4569 iter != bdl.end(); iter++){
4573 string curSeqID =
GetLabel(wid,
true);
4574 if (taxid && (*iter)->IsSetTaxid() && (*iter)->CanGetTaxid()){
4575 *taxid = (*iter)->GetTaxid();
4577 if (!use_this_seq.empty()) {
4578 ITERATE(list<string>, iter_seq, use_this_seq){
4581 if((isGi && cur_gi == NStr::StringToNumeric<TGi>((useThisSeq))) || (!isGi && curSeqID == useThisSeq)){
4588 if ((*iter_id)->Match(aln_id)
4590 (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4597 if(gi) *gi = cur_gi;
4598 if(textSeqID) *textSeqID = curSeqID;
4610 list<TGi>& use_this_gi)
4619 iter != bdl.end(); iter++){
4622 if (!use_this_gi.empty()) {
4623 ITERATE(list<TGi>, iter_gi, use_this_gi){
4624 if(cur_gi == *iter_gi){
4631 if ((*iter_id)->Match(aln_id)
4633 (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4665 list<CRef<CSeq_align> >::iterator mItr=alnset.
Set().begin();
4679 for(;mItr != alnset.
Set().end(); ++mItr) {
4689 if(coll[0] == align_subj_rng) {
4693 subj_rng_coll += align_subj_rng;
4705 query_rng.
SetFrom(
map.GetSeqPosFromAlnPos(0,subj_aln_start));
4706 query_rng.
SetTo(
map.GetSeqPosFromAlnPos(0,subj_aln_end));
4710 subj_rng_coll += subj_rng;
4728 for (CBioseq::TId::const_iterator iter = ids.begin(); iter != ids.end();
4730 if ((*iter)->Which() == choice){
4757 for (;desc_t; ++desc_t) {
4767 if (
id.IsGi() ||
id.IsPrf() ||
id.IsPir()) {
4768 retval =
id.AsFastaString();
4771 retval =
id.GetSeqIdString(
true);
4780 bool hasTextSeqID =
true;
4786 hasTextSeqID =
false;
4790 if(hasTextSeqID && textSeqID) {
4793 return hasTextSeqID;
4800 bool hasTextSeqID =
false;
4810 if(!seqID.
Empty()) {
4811 hasTextSeqID =
true;
4814 return hasTextSeqID;
4818 vector <string> &seqList)
4821 list<string> use_this_seq;
4826 subid = &((*iter)->GetSeq_id(1));
4827 if(previous_id.
Empty() || !subid->
Match(*previous_id)){
4828 use_this_seq.clear();
4833 previous_id = subid;
4835 new_aln->
Set().push_back(*iter);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Declares the CBlastServices class.
TDim GetNumRows(void) const
TSeqPos GetAlnStop(TNumseg seg) const
TSignedRange GetSeqAlnRange(TNumrow row) const
CRef< CAlnChunkVec > GetAlnChunks(TNumrow row, const TSignedRange &range, TGetChunkFlags flags=fAlnSegsOnly) const
API for Remote Blast Services.
static CNcbiApplication * Instance(void)
Singleton method.