88 "ARNDCQEGHILKMFPSTWYVBZX";
104 template<
class container>
bool
111 list<TGi>& use_this_gi,
112 int& comp_adj_method)
114 const string k_GiPrefix =
"gi:";
115 bool hasScore =
false;
116 ITERATE (
typename container, iter, scoreList) {
119 if (
id.GetStr()==
"score"){
120 score = (*iter)->GetValue().GetInt();
121 }
else if (
id.GetStr()==
"bit_score"){
122 bits = (*iter)->GetValue().GetReal();
123 }
else if (
id.GetStr()==
"e_value" ||
id.GetStr()==
"sum_e") {
124 evalue = (*iter)->GetValue().GetReal();
126 }
else if (
id.GetStr()==
"use_this_gi"){
127 Uint4 gi_v = (
Uint4)((*iter)->GetValue().GetInt());
129 }
else if (
id.GetStr()==
"sum_n"){
130 sum_n = (*iter)->GetValue().GetInt();
131 }
else if (
id.GetStr()==
"num_ident"){
132 num_ident = (*iter)->GetValue().GetInt();
133 }
else if (
id.GetStr()==
"comp_adjustment_method") {
134 comp_adj_method = (*iter)->GetValue().GetInt();
138 TGi gi = NStr::StringToNumeric<TGi>(strGi);
139 use_this_gi.push_back(gi);
157 list<string> string_l;
164 list<string>::iterator iter = string_l.begin();
165 while(iter != string_l.end())
178 string errsevmsg[] = {
"UNKNOWN",
"INFO",
"WARNING",
"ERROR",
187 if(iter->level == 4){
190 iter->level = iter->level;
196 out << errsevmsg[iter->level] <<
": " << iter->message <<
"\n";
205 vector<string> split_line;
207 ITERATE(vector<string>, iter, split_line) {
217 static bool s_FillDbInfoRemotely(
const string&
dbname,
223 blastdb->SetType() =
info.is_protein
232 info.definition = dbinfo->GetDescription();
233 if (
info.definition.empty())
237 info.total_length = dbinfo->GetTotal_length();
238 info.number_seqs =
static_cast<int>(dbinfo->GetNum_sequences());
253 int dbfilt_algorithm)
262 if (
info.definition.empty())
269 info.filt_algorithm_name.clear();
270 info.filt_algorithm_options.clear();
271 if (dbfilt_algorithm == -1) {
275 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
276 (!defined(NCBI_COMPILER_MIPSPRO)) )
277 string filtering_algorithm;
280 info.filt_algorithm_name,
281 info.filt_algorithm_options);
288 bool is_protein,
int numSeqs,
Int8 numLetters,
string&
tag)
292 info.is_protein = is_protein;
294 info.definition =
string(
"User specified sequence set.");
297 info.definition =
string(
"User specified sequence set ") +
300 info.number_seqs = numSeqs;
301 info.total_length = numLetters;
302 retval.push_back(
info);
307 const string& blastdb_names,
bool is_protein,
308 int dbfilt_algorithm ,
313 bool found_all =
false;
315 vector<string> missing_names;
316 vector< CRef<objects::CBlast4_database_info> > all_db_info =
317 rmt_blast_services.
GetDatabaseInfo(blastdb_names,is_protein,&found_all,&missing_names);
318 if( !missing_names.empty() ){
320 for(
size_t ndx=0 ; ndx < missing_names.size(); ndx++){
321 msg += missing_names[ndx];
323 msg +=
string(
"' not found on NCBI servers.\n");
326 for(
size_t ndx=0 ; ndx < all_db_info.size(); ndx++){
328 objects::CBlast4_database_info &dbinfo = *all_db_info[ndx];
329 info.name = dbinfo.GetDatabase().GetName();
330 info.definition = dbinfo.GetDescription();
331 if (
info.definition.empty())
335 info.total_length = dbinfo.GetTotal_length();
336 info.number_seqs =
static_cast<int>(dbinfo.GetNum_sequences());
337 if (
info.total_length < 0) {
342 msg +=
string(
"' has bad total length on NCBI servers.\n");
346 retval.push_back(
info);
351 vector<CTempString> dbs;
353 retval.reserve(dbs.size());
355 ITERATE(vector<CTempString>,
i, dbs) {
357 info.is_protein = is_protein;
358 bool success =
false;
367 retval.push_back(
info);
372 msg +=
string(
"' not found on NCBI servers.\n");
374 msg +=
string(
"' not found.\n");
394 for (
size_t i = 1;
i < dbinfo_list.size();
i++) {
395 db_titles +=
"; " + dbinfo_list[
i].definition;
396 tot_num_seqs +=
static_cast<Int8>(dbinfo_list[
i].number_seqs);
397 tot_length += dbinfo_list[
i].total_length;
412 " total letters\n\n";
416 ITERATE(vector<SDbInfo>, dbinfo, dbinfo_list) {
417 if (dbinfo->subset ==
false) {
418 out <<
" Database: ";
421 if ( !dbinfo->filt_algorithm_name.empty() ) {
422 out <<
" Masked using: '" << dbinfo->filt_algorithm_name <<
"'";
423 if ( !dbinfo->filt_algorithm_options.empty() ) {
424 out <<
", options: '" << dbinfo->filt_algorithm_options <<
"'";
429 out <<
" Posted date: ";
430 out << dbinfo->date <<
"\n";
432 out <<
" Number of letters in database: ";
435 out <<
" Number of sequences in database: ";
440 out <<
" Subset of the database(s) listed below" <<
"\n";
441 out <<
" Number of letters searched: ";
444 out <<
" Number of sequences searched: ";
461 out <<
"Gapped" <<
"\n";
466 out <<
" a alpha sigma";
474 sprintf(
buffer,
"%#8.3g ", k);
476 sprintf(
buffer,
"%#8.3g ", h);
480 sprintf(
buffer,
"%#8.3g ", gbp->
a);
513 bool use_long_seqids =
false;
517 use_long_seqids = (
registry.
Get(
"BLAST",
"LONG_SEQID") ==
"1");
519 if (!use_long_seqids) {
552 if((*iter)->IsTitle()) {
553 all_descr_str += (*iter)->GetTitle();
557 return all_descr_str;
568 const string label(
"Query");
571 label, tabular, rid);
582 const string label(
"Subject");
601 }
else if (tabular) {
625 out <<
"\n" <<
"# RID: " << rid;
627 out <<
"\n" <<
"RID: " << rid <<
"\n";
633 const string& pattern,
635 vector<int>& offsets,
638 out << num_patterns <<
" occurrence(s) of pattern: " <<
"\n"
639 << pattern <<
" at position(s) ";
642 for (vector<int>::iterator it = offsets.begin();
643 it != offsets.end(); it++)
652 out <<
" of query sequence" <<
"\n";
653 out <<
"pattern probability=" <<
prob <<
"\n";
663 list<TGi>& use_this_gi)
665 int comp_adj_method = 0;
668 num_ident, use_this_gi, comp_adj_method);
677 list<string>& use_this_seq)
679 int comp_adj_method = 0;
682 num_ident, use_this_seq, comp_adj_method);
692 list<TGi>& use_this_gi,
693 int& comp_adj_method)
695 bool hasScore =
false;
705 sum_n, num_ident, use_this_gi, comp_adj_method);
712 score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
715 score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
718 score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
721 if(use_this_gi.size() == 0) {
729 const string k_GiPrefix =
"gi:";
730 list<string> use_this_seq;
731 ITERATE(list<TGi>, iter_gi, use_this_gi){
733 use_this_seq.push_back(strSeq);
744 list<string>& use_this_seq,
745 int& comp_adj_method)
747 bool hasScore =
false;
755 list<TGi> use_this_gi;
758 sum_n, num_ident, use_this_gi, comp_adj_method);
765 score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
768 score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
771 score, bits, evalue, sum_n, num_ident, use_this_gi, comp_adj_method);
774 if(use_this_gi.size() == 0) {
798 const CDbtag& dtg =
id->GetGeneral();
802 retval =
id->GetSeqIdString(with_version);
818 double total_bit_score,
821 string& bit_score_str,
822 string& total_bit_score_str,
823 string& raw_score_str)
825 char evalue_buf[100], bit_score_buf[100], total_bit_score_buf[100];
828 static string kBitScoreFormat(
"%4.1lf");
829 #ifdef CTOOLKIT_COMPATIBLE
830 static bool ctoolkit_compatible =
false;
831 static bool value_set =
false;
833 if (getenv(
"CTOOLKIT_COMPATIBLE")) {
834 kBitScoreFormat.assign(
"%4.0lf");
835 ctoolkit_compatible =
true;
841 if (evalue < 1.0e-180) {
842 snprintf(evalue_buf,
sizeof(evalue_buf),
"0.0");
843 }
else if (evalue < 1.0e-99) {
844 snprintf(evalue_buf,
sizeof(evalue_buf),
"%2.0le", evalue);
845 #ifdef CTOOLKIT_COMPATIBLE
846 if (ctoolkit_compatible) {
847 strncpy(evalue_buf, evalue_buf+1,
sizeof(evalue_buf-1));
850 }
else if (evalue < 0.0009) {
851 snprintf(evalue_buf,
sizeof(evalue_buf),
"%3.0le", evalue);
852 }
else if (evalue < 0.1) {
853 snprintf(evalue_buf,
sizeof(evalue_buf),
"%4.3lf", evalue);
854 }
else if (evalue < 1.0) {
855 snprintf(evalue_buf,
sizeof(evalue_buf),
"%3.2lf", evalue);
856 }
else if (evalue < 10.0) {
857 snprintf(evalue_buf,
sizeof(evalue_buf),
"%2.1lf", evalue);
859 snprintf(evalue_buf,
sizeof(evalue_buf),
"%2.0lf", evalue);
862 if (bit_score > 99999){
863 snprintf(bit_score_buf,
sizeof(bit_score_buf),
"%5.3le", bit_score);
864 }
else if (bit_score > 99.9){
865 snprintf(bit_score_buf,
sizeof(bit_score_buf),
"%3.0ld",
868 snprintf(bit_score_buf,
sizeof(bit_score_buf), kBitScoreFormat.c_str(),
871 if (total_bit_score > 99999){
872 snprintf(total_bit_score_buf,
sizeof(total_bit_score_buf),
"%5.3le",
874 }
else if (total_bit_score > 99.9){
875 snprintf(total_bit_score_buf,
sizeof(total_bit_score_buf),
"%3.0ld",
876 (
long)total_bit_score);
878 snprintf(total_bit_score_buf,
sizeof(total_bit_score_buf),
"%2.1lf",
881 evalue_str = evalue_buf;
882 bit_score_str = bit_score_buf;
883 total_bit_score_str = total_bit_score_buf;
895 bool is_first_aln =
true;
896 unsigned int num_align = 0;
899 if ((*iter)->GetSegs().IsDisc()) {
902 subid = &((*iter)->GetSeq_id(1));
903 if(is_first_aln || (!is_first_aln && !subid->
Match(*previous_id))){
911 is_first_aln =
false;
914 new_aln.
Set().push_back(*iter);
923 bool is_first_aln =
true;
924 unsigned int num_align = 0;
927 if ((*iter)->GetSegs().IsDisc()) {
930 subid = &((*iter)->GetSeq_id(1));
931 if(is_first_aln || (!is_first_aln && !subid->
Match(*previous_id))){
939 is_first_aln =
false;
952 bool is_first_aln =
true;
953 unsigned int num_align = 0;
954 bool finishCurrent =
false;
956 if ((*iter)->GetSegs().IsDisc()) {
959 subid = &((*iter)->GetSeq_id(1));
960 if(is_first_aln || (!is_first_aln && !subid->
Match(*previous_id))){
961 finishCurrent = (num_align + 1 ==
number) ?
true :
false;
964 is_first_aln =
false;
967 if(num_align >
number && !finishCurrent) {
970 new_aln.
Set().push_back(*iter);
977 int& num_gaps,
int& num_gap_opens)
979 num_gaps = num_gap_opens = align_length = 0;
981 for (
int row = 0; row < salv.
GetNumRows(); row++) {
984 for (
int i=0;
i<chunk_vec->size();
i++) {
986 int chunk_length = chunk->GetAlnRange().GetLength();
989 if (chunk->IsGap()) {
991 num_gaps += chunk_length;
995 align_length += chunk_length;
1006 for(CSeq_align_set::Tdata::const_iterator iter =
source.Get().begin();
1007 iter !=
source.Get().end(); iter++) {
1008 if((*iter)->IsSetSegs()){
1012 for(CSeq_align_set::Tdata::const_iterator iter2 =
1015 target.
Set().push_back(*iter2);
1018 target.
Set().push_back(*iter);
1053 if((*iter)->IsSetDim()){
1054 ds.
SetDim((*iter)->GetDim());
1056 if((*iter)->IsSetIds()){
1057 ds.
SetIds() = (*iter)->GetIds();
1061 if((*iter)->IsSetStarts()){
1066 if((*iter)->IsSetLen()){
1067 ds.
SetLens().push_back((*iter)->GetLen());
1069 if((*iter)->IsSetStrands()){
1074 if((*iter)->IsSetScores()){
1097 if(bdl_id && bdl_id->
Match(
id) &&
1098 (*iter_bdl)->IsSetTaxid() && (*iter_bdl)->CanGetTaxid()){
1099 taxid = (*iter_bdl)->GetTaxid();
1114 frame = (start % 3) + 1;
1141 int score1, sum_n1, num_ident1;
1142 double bits1, evalue1;
1143 list<TGi> use_this_gi1;
1145 int score2, sum_n2, num_ident2;
1146 double bits2, evalue2;
1147 list<TGi> use_this_gi2;
1150 GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1151 GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1155 bool retval =
false;
1158 if(length1 > 0 && length2 > 0 && num_ident1 > 0 &&num_ident2 > 0 ) {
1159 if (((
double)num_ident1)/length1 == ((
double)num_ident2)/length2) {
1161 retval = evalue1 < evalue2;
1164 retval = ((double)num_ident1)/length1 >= ((double)num_ident2)/length2;
1168 retval = evalue1 < evalue2;
1183 int score1, sum_n1, num_ident1;
1184 double bits1, evalue1;
1185 list<TGi> use_this_gi1;
1187 int score2, sum_n2, num_ident2;
1188 double bits2, evalue2;
1189 list<TGi> use_this_gi2;
1191 GetAlnScores(*(info1->
Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1192 GetAlnScores(*(info2->
Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1193 return bits1 > bits2;
1202 bool retval =
false;
1205 retval = cov1 > cov2;
1206 }
else if (cov1 == cov2) {
1207 int score1, sum_n1, num_ident1;
1208 double bits1, evalue1;
1209 list<TGi> use_this_gi1;
1211 int score2, sum_n2, num_ident2;
1212 double bits2, evalue2;
1213 list<TGi> use_this_gi2;
1214 GetAlnScores(*(info1->
Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1215 GetAlnScores(*(info2->
Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1216 retval = evalue1 < evalue2;
1225 int start1 = 0, start2 = 0;
1232 start1 =
min(info1->
Get().front()->GetSeqStart(0),
1233 info1->
Get().front()->GetSeqStop(0));
1234 start2 =
min(info2->
Get().front()->GetSeqStart(0),
1235 info2->
Get().front()->GetSeqStop(0));
1237 if (start1 == start2) {
1239 int score1, sum_n1, num_ident1;
1240 double bits1, evalue1;
1241 list<TGi> use_this_gi1;
1243 int score2, sum_n2, num_ident2;
1244 double bits2, evalue2;
1245 list<TGi> use_this_gi2;
1248 GetAlnScores(*(info1->
Get().front()), score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1249 GetAlnScores(*(info1->
Get().front()), score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1250 return evalue1 < evalue2;
1253 return start1 < start2;
1263 int score1, sum_n1, num_ident1;
1264 double bits1, evalue1;
1265 list<TGi> use_this_gi1;
1267 int score2, sum_n2, num_ident2;
1268 double bits2, evalue2;
1269 list<TGi> use_this_gi2;
1272 GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1273 GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1274 return bits1 > bits2;
1282 int start1 = 0, start2 = 0;
1287 if (start1 == start2) {
1289 int score1, sum_n1, num_ident1;
1290 double bits1, evalue1;
1291 list<TGi> use_this_gi1;
1293 int score2, sum_n2, num_ident2;
1294 double bits2, evalue2;
1295 list<TGi> use_this_gi2;
1298 GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1299 GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1300 return evalue1 < evalue2;
1304 return start1 < start2;
1312 int start1 = 0, start2 = 0;
1317 if (start1 == start2) {
1319 int score1, sum_n1, num_ident1;
1320 double bits1, evalue1;
1321 list<TGi> use_this_gi1;
1323 int score2, sum_n2, num_ident2;
1324 double bits2, evalue2;
1325 list<TGi> use_this_gi2;
1328 GetAlnScores(*info1, score1, bits1, evalue1, sum_n1, num_ident1, use_this_gi1);
1329 GetAlnScores(*info2, score2, bits2, evalue2, sum_n2, num_ident2, use_this_gi2);
1330 return evalue1 < evalue2;
1334 return start1 < start2;
1351 if (do_translation) {
1354 final_aln = denseg_aln;
1370 bool do_translation) {
1371 double identity = 0;
1382 if (do_translation) {
1385 final_aln = denseg_aln;
1399 alnvec.GetWholeAlnSeqString(0,
query);
1400 alnvec.GetWholeAlnSeqString(1,
subject);
1405 for (
int i = 0;
i < length; ++
i) {
1412 identity = ((double)num_ident)/length;
1421 double &percentIdent1,
1422 double &percentIdent2)
1451 double evalue1 = seqSetInfo1->evalue;
1452 double evalue2 = seqSetInfo2->evalue;
1453 double percentIdent1 = seqSetInfo1->percent_identity;
1454 double percentIdent2 = seqSetInfo2->percent_identity;
1456 bool retval =
false;
1457 if(percentIdent1 < 0 || percentIdent2 < 0) {
1460 if(percentIdent1 > 0 &&percentIdent2 > 0) {
1461 if (percentIdent1 == percentIdent2) {
1462 retval = evalue1 < evalue2;
1465 retval = percentIdent1 >= percentIdent2;
1468 retval = evalue1 < evalue2;
1476 int score1, score2, sum_n, num_ident;
1477 double bits, evalue;
1478 list<TGi> use_this_gi;
1479 double total_bits1 = 0, total_bits2 = 0;
1483 sum_n, num_ident, use_this_gi);
1484 total_bits1 += bits;
1489 sum_n, num_ident, use_this_gi);
1490 total_bits2 += bits;
1494 return total_bits1 >= total_bits2;
1498 #ifndef NCBI_COMPILER_WORKSHOP
1507 const string& mv_build_name)
1513 id1 = &(info1->
Get().front()->GetSeq_id(1));
1514 id2 = &(info2->
Get().front()->GetSeq_id(1));
1516 int linkout1 = 0, linkout2 = 0;
1535 const string& mv_build_name)
1539 #ifndef NCBI_COMPILER_WORKSHOP
1545 bool do_translation,
CScope& scope,
int
1547 const string& mv_build_name)
1552 if (sort_method == 1) {
1553 #ifndef NCBI_COMPILER_WORKSHOP
1557 }
else if (sort_method == 2) {
1559 }
else if (sort_method == 3) {
1571 const string& mv_build_name)
1575 int linkoutPrev = 0;
1578 const CSeq_id&
id = (*iter)->GetSeq_id(1);
1583 if(prevSubjectId.
Empty() || !
id.Match(*prevSubjectId)){
1584 prevSubjectId = &id;
1585 linkout = linkoutdb ? linkoutdb->
GetLinkout(
id, mv_build_name): 0;
1586 linkoutPrev = linkout;
1590 linkout = linkoutPrev;
1593 if (sort_method == 1) {
1594 target[1]->Set().push_back(*iter);
1595 }
else if (sort_method == 2){
1596 target[0]->Set().push_back(*iter);
1598 target[1]->Set().push_back(*iter);
1601 if (sort_method == 1) {
1602 target[0]->Set().push_back(*iter);
1603 }
else if (sort_method == 2) {
1604 target[1]->Set().push_back(*iter);
1606 target[0]->Set().push_back(*iter);
1610 target[0]->Set().push_back(*iter);
1614 target[0]->Set().push_back(*iter);
1626 const CSeq_id& cur_id = (*iter)->GetSeq_id(1);
1627 if(previous_id.
Empty()) {
1629 temp->
Set().push_back(*iter);
1630 target.push_back(temp);
1631 }
else if (cur_id.
Match(*previous_id)){
1632 temp->
Set().push_back(*iter);
1636 temp->
Set().push_back(*iter);
1637 target.push_back(temp);
1639 previous_id = &cur_id;
1654 align_set->
Set().push_back(*iter2);
1668 for(
size_t i = 0;
i < seqIdList.size();
i++) {
1674 const CSeq_id& cur_id = (*iter)->GetSeq_id(1);
1675 if(previous_id.
Empty() || !cur_id.
Match(*previous_id)) {
1676 if(count >= seqIdList.size()) {
1680 if(hitsMap.
find(idString) != hitsMap.
end()) {
1682 temp->
Set().push_back(*iter);
1683 hitsMap[idString] = temp;
1690 else if (cur_id.
Match(*previous_id)){
1692 temp->
Set().push_back(*iter);
1695 previous_id = &cur_id;
1702 vector <string> seqIds;
1710 list< CRef<CSeq_align_set> > orderedSet;
1712 for(
size_t i = 0;
i < seqIds.size();
i++) {
1713 if(hitsMap.
find(seqIds[
i]) != hitsMap.
end()) {
1714 orderedSet.push_back(hitsMap[seqIds[
i]]);
1723 bool success =
false;
1736 if (!strTag.empty())
1738 vector<string> vecInfo;
1748 if (vecInfo.size() != 3)
1753 strRun = vecInfo[0];
1754 strSpotId = vecInfo[1];
1755 strReadIndex = vecInfo[2];
1765 string strRun, strSpotId,strReadIndex;
1772 link +=
"?run=" + strRun;
1773 link +=
"." + strSpotId;
1774 link +=
"." + strReadIndex;
1787 if(!id_general.
Empty() && id_general->
AsFastaString().find(
"gnl|BL_ORD_ID") != string::npos){
1792 if (id_general.
Empty()){
1794 if (id_other.
Empty()){
1795 bestid = id_accession;
1798 bestid = id_general;
1809 bool db_is_na,
string rid,
int query_number,
1810 bool for_alignment) {
1815 if(!id_general.
Empty()
1816 && id_general->
AsFastaString().find(
"gnl|BL_ORD_ID") != string::npos){
1824 bool nodb_path =
false;
1826 if (user_url.find(
"dumpgnl.cgi") ==string::npos){
1831 char *chptr, *dbtmp;
1833 char*
dbname =
new char[
sizeof(char)*length + 2];
1837 dbtmp =
new char[
sizeof(char)*length + 2];
1838 memset(dbtmp,
'\0',
sizeof(
char)*length + 2);
1839 for(
i = 0;
i < length;
i++) {
1847 while (!
isspace((
unsigned char)
dbname[
i]) && j < 256 &&
i < length) {
1855 if((chptr = strrchr(tmpbuff,
'/')) !=
NULL) {
1856 strcat(dbtmp, (
char*)(chptr+1));
1867 if (!bestID.empty()){
1868 strcpy(gnl, bestID.c_str());
1876 if (user_url.find(
"?") == string::npos){
1877 link += user_url +
"?" +
"db=" +
str +
"&na=" + (db_is_na?
"1" :
"0");
1879 if (user_url.find(
"=") != string::npos) {
1882 link += user_url +
"db=" +
str +
"&na=" + (db_is_na?
"1" :
"0");
1885 if (gnl[0] !=
'\0'){
1898 link +=
"&RID=" + rid;
1901 if (query_number > 0){
1905 if (user_url.find(
"dumpgnl.cgi") ==string::npos){
1907 link +=
"&log$=nuclalign";
1909 link +=
"&log$=nucltop";
1936 value_type(
"composition_based_statistics",
""));
1942 bool is_first =
true;
1945 string parameter = it->first;
1947 if (parameters_to_change.count(
NStr::ToLower(parameter)) > 0 ||
1957 it->first +
"=" + parameters_to_change[it->first];
1964 cgi_query += it->first +
"=" + it->second;
1974 string format_type =
ctx.GetRequestValue(
"FORMAT_TYPE").GetValue();
1975 string ridstr =
ctx.GetRequestValue(
"RID").GetValue();
1976 string align_view =
ctx.GetRequestValue(
"ALIGNMENT_VIEW").GetValue();
1978 cgi_query +=
"RID=" + ridstr;
1979 cgi_query +=
"&FORMAT_TYPE=" + format_type;
1980 cgi_query +=
"&ALIGNMENT_VIEW=" + align_view;
1982 cgi_query +=
"&QUERY_NUMBER=" +
ctx.GetRequestValue(
"QUERY_NUMBER").GetValue();
1983 cgi_query +=
"&FORMAT_OBJECT=" +
ctx.GetRequestValue(
"FORMAT_OBJECT").GetValue();
1984 cgi_query +=
"&RUN_PSIBLAST=" +
ctx.GetRequestValue(
"RUN_PSIBLAST").GetValue();
1985 cgi_query +=
"&I_THRESH=" +
ctx.GetRequestValue(
"I_THRESH").GetValue();
1987 cgi_query +=
"&DESCRIPTIONS=" +
ctx.GetRequestValue(
"DESCRIPTIONS").GetValue();
1989 cgi_query +=
"&ALIGNMENTS=" +
ctx.GetRequestValue(
"ALIGNMENTS").GetValue();
1991 cgi_query +=
"&NUM_OVERVIEW=" +
ctx.GetRequestValue(
"NUM_OVERVIEW").GetValue();
1993 cgi_query +=
"&NCBI_GI=" +
ctx.GetRequestValue(
"NCBI_GI").GetValue();
1995 cgi_query +=
"&SHOW_OVERVIEW=" +
ctx.GetRequestValue(
"SHOW_OVERVIEW").GetValue();
1997 cgi_query +=
"&SHOW_LINKOUT=" +
ctx.GetRequestValue(
"SHOW_LINKOUT").GetValue();
1999 cgi_query +=
"&GET_SEQUENCE=" +
ctx.GetRequestValue(
"GET_SEQUENCE").GetValue();
2001 cgi_query +=
"&MASK_CHAR=" +
ctx.GetRequestValue(
"MASK_CHAR").GetValue();
2002 cgi_query +=
"&MASK_COLOR=" +
ctx.GetRequestValue(
"MASK_COLOR").GetValue();
2004 cgi_query +=
"&SHOW_CDS_FEATURE=" +
ctx.GetRequestValue(
"SHOW_CDS_FEATURE").GetValue();
2007 cgi_query +=
"&FORMAT_EQ_TEXT=" +
2009 GetRequestValue(
"FORMAT_EQ_TEXT").
2014 cgi_query +=
"&FORMAT_EQ_OP=" +
2016 GetRequestValue(
"FORMAT_EQ_OP").
2021 cgi_query +=
"&FORMAT_EQ_MENU=" +
2023 GetRequestValue(
"FORMAT_EQ_MENU").
2027 cgi_query +=
"&EXPECT_LOW=" +
ctx.GetRequestValue(
"EXPECT_LOW").GetValue();
2028 cgi_query +=
"&EXPECT_HIGH=" +
ctx.GetRequestValue(
"EXPECT_HIGH").GetValue();
2030 cgi_query +=
"&BL2SEQ_LINK=" +
ctx.GetRequestValue(
"BL2SEQ_LINK").GetValue();
2037 const string& mv_build_name)
2039 bool is_mixed =
false;
2040 bool is_first =
true;
2041 int prev_database = 0;
2045 const CSeq_id&
id = (*iter)->GetSeq_id(1);
2046 int linkout = linkoutdb
2050 if (!is_first && cur_database != prev_database) {
2054 prev_database = cur_database;
2065 bool formatAsMixedDbs =
false;
2066 string mixedDbs =
ctx.GetRequestValue(
"MIXED_DATABASE").GetValue();
2067 if(!mixedDbs.empty()) {
2069 formatAsMixedDbs = (mixedDbs ==
"on" || mixedDbs ==
"true" || mixedDbs ==
"yes") ?
true :
false;
2071 return formatAsMixedDbs;
2081 string lnk_tl_info =
"",
2082 string lnk_title =
"")
2084 const string kLinkTitle=
" title=\"View <@lnk_tl_info@> for <@label@>\" ";
2085 const string kLinkTarget=
"target=\"lnk" + rid +
"\"";
2086 string lnkTitle = (lnk_title.empty()) ? kLinkTitle : lnk_title;
2108 bool textLink =
true)
2111 list<string> linkout_list;
2112 string url_link,lnk_displ,lnk_title,lnkTitleInfo;
2114 vector<string> accs;
2116 string firstAcc = (accs.size() > 0)? accs[0] : labelList;
2125 lnkTitleInfo =
"UniGene cluster";
2126 string uid = !linkoutInfo.
is_na ?
"[Protein Accession]" :
"[Nucleotide Accession]";
2134 linkout_list.push_back(url_link);
2141 url_link = struct_link.empty() ?
kStructureUrl : struct_link;
2145 linkTitle =
" title=\"View 3D structure <@label@>\"";
2149 lnk_displ = textLink ?
"AlphaFold Structure" :
kStructureImg;
2150 linkTitle =
" title=\"View AlphaFold 3D structure <@label@>\"";
2155 string molID,chainID;
2164 linkout_list.push_back(url_link);
2166 if (linkout &
eGeo){
2168 lnk_displ = textLink ?
"GEO Profiles" :
kGeoImg;
2170 lnkTitleInfo =
"Expression profiles";
2179 linkout_list.push_back(url_link);
2181 if(linkout &
eGene){
2186 lnkTitleInfo =
"gene " + geneSym;
2194 string uid = !linkoutInfo.
is_na ?
"[Protein Accession]" :
"[Nucleotide Accession]";
2203 linkout_list.push_back(url_link);
2209 lnk_displ =
"Map Viewer";
2211 lnkTitleInfo =
"BLAST hits on the " + linkoutInfo.
taxName +
" genome";
2232 linkout_list.push_back(url_link);
2239 string linkTitle =
" title=\"View <@label@> aligned to the " + linkoutInfo.
taxName +
" genome\"";
2246 linkout_list.push_back(url_link);
2253 string linkTitle =
" title=\"View Bioassays involving <@label@>\"";
2262 linkout_list.push_back(url_link);
2268 lnkTitleInfo =
"Bioassay data";
2269 string linkTitle =
" title=\"View Bioassays involving <@label@>\"";
2278 linkout_list.push_back(url_link);
2284 lnkTitleInfo =
"genomic information";
2286 string uid = !linkoutInfo.
is_na ?
"Protein Accession" :
"Nucleotide Accession";
2294 linkout_list.push_back(url_link);
2300 urlTag =
"GENOME_DATA_VIEWER_TRANSCR";
2301 lnkTitleInfo =
"title=\"View the annotation of the transcript <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as the protein annotation and browse to other regions.\"";
2304 urlTag = linkoutInfo.
is_na ?
"GENOME_DATA_VIEWER_NUC" :
"GENOME_DATA_VIEWER_PROT";
2305 lnkTitleInfo = linkoutInfo.
is_na ?
2306 "title=\"View BLAST hits for <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as hits and browse to other regions.\""
2308 "title=\"View the annotation of the protein <@label@> within a genomic context in NCBI's Genome Data Viewer (GDV)- genome browser for RefSeq annotated assemblies. See other genomic features annotated at the same location as the protein annotation and browse to other regions.\"";
2316 seqFrom = (seqFrom == 0) ? seqFrom : seqFrom - 1;
2319 seqTo = (seqTo == 0) ? seqTo : seqTo - 1;
2328 linkout_list.push_back(url_link);
2330 return linkout_list;
2336 const string& cdd_rid,
2337 const string& entrez_term,
2340 bool structure_linkout_as_group,
2341 bool for_alignment,
int cur_align,
2342 string preComputedResID)
2345 list<string> linkout_list;
2351 first_gi = (first_gi ==
ZERO_GI) ? gi : first_gi;
2356 linkoutInfo.
Init(rid,
2365 structure_linkout_as_group,
2378 return linkout_list;
2384 if(linkLetter ==
"U") {
2387 else if(linkLetter ==
"S") {
2390 else if(linkLetter ==
"E") {
2393 else if(linkLetter ==
"G") {
2396 else if(linkLetter ==
"M") {
2399 else if(linkLetter ==
"N") {
2402 else if(linkLetter ==
"B") {
2405 else if(linkLetter ==
"R") {
2408 else if(linkLetter ==
"V") {
2411 else if(linkLetter ==
"T") {
2421 if(linkout_map.count(linkout) > 0){
2422 linkout_map[linkout].push_back(cur_id);
2425 vector <CBioseq::TId > idList;
2426 idList.push_back(cur_id);
2433 const string& mv_build_name,
2444 linkout = (*linkoutdb)->GetLinkout(gi, mv_build_name);
2448 linkout = (*linkoutdb)->GetLinkout(*seqID, mv_build_name);
2451 int linkoutWithoutVersion = (*linkoutdb)->GetLinkout(*seqIDNew, mv_build_name);
2452 if(linkoutWithoutVersion && (linkoutWithoutVersion |
eStructure)) {
2453 linkout = linkout | linkoutWithoutVersion;
2459 cerr <<
"[BLAST FORMATTER EXCEPTION] Problem with linkoutdb: " << e.
GetMsg() << endl;
2468 map<
int, vector <CBioseq::TId > > &linkout_map,
2470 const string& mv_build_name)
2472 if(!linkoutdb)
return;
2478 if(linkout &
eGene){
2484 if (linkout &
eGeo){
2515 map<
int, vector <CBioseq::TId > > &linkout_map,
2517 const string& mv_build_name)
2519 const int kMaxDeflineNum = 10;
2522 iter != bdl.end(); iter++){
2530 if(num > kMaxDeflineNum)
break;
2541 taxName =
info.common_name;
2555 list<string> &linkout_list)
2565 string lnk_displ =
"Identical Proteins";
2569 linkout_list.push_back(url_link);
2578 map<
int, vector < CBioseq::TId > > &linkout_map,
2579 bool getIdentProteins)
2582 list<string> linkout_list;
2584 vector<string> linkLetters;
2586 for(
size_t i = 0;
i < linkLetters.size();
i++) {
2588 vector < CBioseq::TId > idList;
2595 if(linkout_map.
find(linkout) != linkout_map.
end()) {
2596 idList = linkout_map[linkout];
2598 bool disableLink = (linkout == 0 || idList.size() == 0 || ( (linkout &
eStructure) && (linkoutInfo.
cdd_rid ==
"" || linkoutInfo.
cdd_rid ==
"0")));
2600 string giList,labelList;
2602 for (
size_t i = 0;
i < idList.size();
i++) {
2605 if (first_gi ==
ZERO_GI) first_gi = gi;
2610 if(!labelList.empty()) labelList +=
",";
2614 if(!giList.empty() && (linkout &
eBioAssay) && !linkoutInfo.
is_na)
continue;
2615 if(!giList.empty()) giList +=
",";
2619 linkoutInfo.
gnl.clear();
2631 if(one_linkout.size() > 0) {
2632 list<string>::iterator iter = one_linkout.begin();
2633 linkout_list.push_back(*iter);
2637 if(getIdentProteins) {
2640 return linkout_list;
2646 list<string> linkout_list;
2648 if(bdl.size() > 0) {
2650 list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2655 !linkoutInfo.
is_na && bdl.size() > 1);
2657 return linkout_list;
2663 const string& cdd_rid,
2664 const string& entrez_term,
2666 bool structure_linkout_as_group,
2669 string& linkoutOrder,
2674 string &preComputedResID,
2676 const string& mv_build_name)
2679 list<string> linkout_list;
2681 if(bdl.size() > 0) {
2683 list< CRef< CBlast_def_line > >::const_iterator iter = bdl.begin();
2686 SLinkoutInfo linkoutInfo;
2687 linkoutInfo.
Init(rid,
2696 structure_linkout_as_group,
2699 linkoutInfo.cur_align = cur_align;
2700 linkoutInfo.taxid = taxid;
2705 !is_na && bdl.size() > 1);
2707 return linkout_list;
2713 bool getIdentProteins)
2715 list<string> linkout_list;
2723 return linkout_list;
2728 const string& cdd_rid,
2729 const string& entrez_term,
2731 bool structure_linkout_as_group,
2734 string& linkoutOrder,
2739 string &preComputedResID,
2741 const string& mv_build_name,
2742 bool getIdentProteins)
2745 list<string> linkout_list;
2749 SLinkoutInfo linkoutInfo;
2750 linkoutInfo.
Init(rid,
2759 structure_linkout_as_group,
2762 linkoutInfo.cur_align = cur_align;
2763 linkoutInfo.taxid = taxid;
2769 return linkout_list;
2785 bool oppositeStrands =
false;
2786 bool isFirst =
false;
2793 query_list.push_back(query_range);
2800 subject_list.push_back(subject_range);
2802 oppositeStrands = (!isFirst) ? (*iter)->GetSeqStrand(0) != (*iter)->GetSeqStrand(1) : oppositeStrands;
2808 return oppositeStrands;
2817 list<CRange<TSeqPos> > merge_list;
2819 bool is_first =
true;
2824 merge_list.push_back(*iter);
2829 merge_list.pop_back();
2831 merge_list.push_back(temp_range);
2832 prev_range = temp_range;
2834 merge_list.push_back(*iter);
2846 list<CRange<TSeqPos> > merge_list;
2848 list<CRange<TSeqPos> > temp;
2855 temp.push_back(seq_range);
2862 int master_covered_lenghth = 0;
2864 master_covered_lenghth += iter->GetLength();
2866 return master_covered_lenghth;
2875 list<CRange<TSeqPos> > query_list;
2876 list<CRange<TSeqPos> > subject_list;
2883 *master_covered_lenghth = 0;
2885 *master_covered_lenghth += iter->GetLength();
2890 from = (from == 0) ? iter->GetFrom() :
min(from,iter->GetFrom());
2891 to =
max(to,iter->GetTo());
2895 return subjectRange;
2902 bool nuc_to_nuc_translation,
2907 const string& mv_build_name) {
2910 if (db_sort == 0 && hit_sort < 1 && hsp_sort < 1)
2913 list< CRef<CSeq_align_set> > seqalign_hit_total_list;
2914 vector< CRef<CSeq_align_set> > seqalign_vec(2);
2920 linkoutdb, mv_build_name);
2928 nuc_to_nuc_translation,
2932 seqalign_hit_total_list.splice(seqalign_hit_total_list.end(),one_seqalign_hit_total_list);
2938 list< CRef<CSeq_align_set> >
2940 bool nuc_to_nuc_translation,
2944 list< CRef<CSeq_align_set> > seqalign_hit_total_list;
2945 list< CRef<CSeq_align_set> > seqalign_hit_list;
2954 nuc_to_nuc_translation);
2965 }
else if (hsp_sort ==
eScore) {
2971 seqalign_hit_total_list.push_back(temp);
2973 return seqalign_hit_total_list;
2978 bool nuc_to_nuc_translation,
2988 nuc_to_nuc_translation,
2999 int score, sum_n, num_ident;
3000 double bits, evalue;
3001 list<TGi> use_this_gi;
3007 sum_n, num_ident, use_this_gi);
3012 if(evalue >= evalueLow && evalue <= evalueHigh) {
3013 new_aln->
Set().push_back(*iter);
3028 if (numerator == denominator)
3031 int retval =(
int) (0.5 + 100.0*((
double)numerator)/((double)denominator));
3032 retval =
min(99, retval);
3039 if (numerator == denominator)
3042 double retval =100*(double)numerator/(
double)denominator;
3048 double percentIdentLow,
3049 double percentIdentHigh)
3051 int score, sum_n, num_ident;
3052 double bits, evalue;
3053 list<TGi> use_this_gi;
3059 sum_n, num_ident, use_this_gi);
3061 if(seqAlnLength > 0 && num_ident > 0) {
3063 if(alnPercentIdent >= percentIdentLow && alnPercentIdent <= percentIdentHigh) {
3064 new_aln->
Set().push_back(*iter);
3074 double percentIdentLow,
3075 double percentIdentHigh)
3077 int score, sum_n, num_ident;
3078 double bits, evalue;
3079 list<TGi> use_this_gi;
3085 sum_n, num_ident, use_this_gi);
3091 if(seqAlnLength > 0 && num_ident > 0) {
3093 if( (evalue >= evalueLow && evalue <= evalueHigh) &&
3094 (alnPercentIdent >= percentIdentLow && alnPercentIdent <= percentIdentHigh)) {
3095 new_aln->Set().push_back(*iter);
3111 double percentIdent,
3115 double percentIdentLow,
3116 double percentIdentHigh,
3122 bool isInRange =
false;
3125 string evalue_buf, bit_score_buf, total_bit_buf, raw_score_buf;
3130 if(evalueLow >= 0 && percentIdentLow >= 0 && queryCoverLow >= 0) {
3131 isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3132 (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh) &&
3133 (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3135 else if(evalueLow >= 0 && percentIdentLow >= 0) {
3136 isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3137 (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3139 else if(evalueLow >= 0 && queryCoverLow >= 0) {
3140 isInRange = (evalue >= evalueLow && evalue <= evalueHigh) &&
3141 (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3143 else if(queryCoverLow >= 0 && percentIdentLow >= 0) {
3144 isInRange = (queryCover >= queryCoverLow && queryCover <= queryCoverHigh) &&
3145 (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3147 else if(evalueLow >= 0) {
3148 isInRange = (evalue >= evalueLow && evalue <= evalueHigh);
3150 else if(percentIdentLow >= 0) {
3151 isInRange = (percentIdent >= percentIdentLow && percentIdent <= percentIdentHigh);
3153 else if(queryCoverLow >= 0) {
3154 isInRange = (queryCover >= queryCoverLow && queryCover <= queryCoverHigh);
3162 double percentIdentLow,
3163 double percentIdentHigh,
3167 list< CRef<CSeq_align_set> > seqalign_hit_total_list;
3168 list< CRef<CSeq_align_set> > seqalign_hit_list;
3185 seqalign_hit_total_list.push_back(temp);
3198 int alignCount = 0,hspCount = 0;
3200 const CSeq_id& newQueryId = (*iter)->GetSeq_id(0);
3201 if(prevQueryId.
Empty() || !newQueryId.
Match(*prevQueryId)){
3202 if (hspCount >= maxHsps) {
3206 prevQueryId = &newQueryId;
3208 if (alignCount < maxAligns) {
3209 const CSeq_id& newSubjectId = (*iter)->GetSeq_id(1);
3211 if(prevSubjectId.
Empty() || !newSubjectId.
Match(*prevSubjectId)){
3213 prevSubjectId = &newSubjectId;
3217 new_aln->
Set().push_back(*iter);
3228 if(queryNumber == 0) {
3234 int currQueryNum = 0;
3237 const CSeq_id& newQueryId = (*iter)->GetSeq_id(0);
3238 if(prevQueryId.
Empty() || !newQueryId.
Match(*prevQueryId)){
3240 prevQueryId = &newQueryId;
3243 if(currQueryNum == queryNumber) {
3244 if(new_aln.
Empty()) {
3247 new_aln->
Set().push_back(*iter);
3249 else if(currQueryNum > queryNumber) {
3259 string l_cfg_file_name;
3263 bool cfgExists =
true;
3265 string l_fmtcfg_env;
3266 if(
NULL != getenv(
"NCBI") ) l_ncbi_env = getenv(
"NCBI");
3267 if(
NULL != getenv(
"FMTCFG") ) l_fmtcfg_env = getenv(
"FMTCFG");
3269 if( l_fmtcfg_env.empty() )
3270 l_cfg_file_name =
".ncbirc";
3272 l_cfg_file_name = l_fmtcfg_env;
3274 CFile l_fchecker( l_cfg_file_name );
3275 cfgExists = l_fchecker.
Exists();
3276 if( (!cfgExists) && (!l_ncbi_env.empty()) ) {
3277 if( l_ncbi_env.rfind(
"/") != (l_ncbi_env.length() -1 ))
3278 l_ncbi_env.append(
"/");
3279 l_cfg_file_name = l_ncbi_env + l_cfg_file_name;
3280 CFile l_fchecker2( l_cfg_file_name );
3281 cfgExists = l_fchecker2.
Exists();
3286 if( l_dbg ) fprintf(stderr,
"REGISTRY: %s\n",l_cfg_file_name.c_str());
3308 string l_key, l_host_port, l_format;
3309 string l_secion_name =
"BLASTFMTUTIL";
3310 string l_fmt_suffix =
"_FORMAT";
3311 string l_host_port_suffix =
"_HOST_PORT";
3312 string l_subst_pattern;
3318 string l_base_dir =
m_Reg->Get(l_secion_name,
"INCLUDE_BASE_DIR");
3319 if( !l_base_dir.empty() && ( l_base_dir.rfind(
"/") != (l_base_dir.length()-1)) ) {
3320 l_base_dir.append(
"/");
3324 string default_host_port;
3328 l_subst_pattern=
"<@"+l_key_ndx+
"@>";
3329 l_host_port =
m_Reg->Get(l_secion_name, l_key_ndx);
3332 if( l_host_port.empty()){
3333 l_key = url_name + l_host_port_suffix; l_subst_pattern=
"<@"+l_key+
"@>";
3334 l_host_port =
m_Reg->Get(l_secion_name, l_key);
3336 if( l_host_port.empty())
return GetURLDefault(url_name,index);
3339 l_key = url_name + l_fmt_suffix ;
3342 l_format =
m_Reg->Get(l_secion_name, l_key_ndx);
3345 if( l_format.empty() ) l_format =
m_Reg->Get(l_secion_name, l_key);
3348 string l_format_file = l_base_dir + l_format;
3349 CFile l_fchecker( l_format_file );
3350 bool file_name_mode = l_fchecker.
Exists();
3351 if( file_name_mode ) {
3352 string l_inc_file_name = l_format_file;
3356 char *l_mem =
new char [ (size_t) l_inc_size + 1];
3357 memset( l_mem,0, (
size_t) l_inc_size + 1 ) ;
3358 l_file.seekg( 0, ios::beg );
3359 l_file.read(l_mem, l_inc_size);
3361 l_format.erase(); l_format.reserve( (
size_t)l_inc_size + 1 );
3366 result_url =
NStr::Replace(l_format,l_subst_pattern,l_host_port);
3368 if( result_url.empty())
return GetURLDefault(url_name,index);
3376 string search_name = url_name;
3380 if( (url_it = sm_TagUrlMap.find( search_name ) ) != sm_TagUrlMap.end()) {
3385 string error_msg =
"CAlignFormatUtil::GetURLDefault:no_defualt_for"+url_name;
3395 if (matrix_name ==
NULL ||
3402 if (packed_mtx ==
NULL) {
3419 retval(
'*',
'*') = 1;
3421 retval(
'U',
'U') = retval(
'C',
'C');
3422 retval(
'U',
'C') = retval(
'C',
'C');
3423 retval(
'C',
'U') = retval(
'C',
'C');
3430 string tmplParam =
"<@" + tmplParamName +
"@>";
3438 string tmplParam =
"<@" + tmplParamName +
"@>";
3445 templParamVal =
AddSpaces(templParamVal, maxParamValLength, spacesFormatFlag);
3446 string outString =
MapTemplate(inpString,tmplParamName,templParamVal);
3456 if(maxParamValLength >= paramVal.size()) {
3457 size_t numSpaces = maxParamValLength - paramVal.size() + 1;
3459 numSpaces = numSpaces/2;
3461 spaceString.assign(numSpaces,
' ');
3464 paramVal = paramVal.substr(0, maxParamValLength - 3) +
"...";
3468 paramVal = paramVal + spaceString;
3471 paramVal = spaceString + paramVal + spaceString;
3474 paramVal = spaceString + paramVal;
3489 string httpProt =
"https:";
3490 if(!config_reg.
Empty()) {
3491 if(config_reg.
HasEntry(
"BLASTFMTUTIL",
"PROTOCOL")) {
3492 httpProt = config_reg.
Get(
"BLASTFMTUTIL",
"PROTOCOL");
3517 string db,logstr_moltype;
3520 logstr_moltype =
"nucl";
3523 logstr_moltype =
"prot";
3525 string logstr_location = (seqUrlInfo->
isAlignLink) ?
"align" :
"top";
3539 string url_link = urlTemplate;
3540 if (seqUrlInfo->
user_url.find(
"sra.cgi") != string::npos) {
3541 string strRun, strSpotId,strReadIndex;
3558 const unsigned int kWgsProjLength = 4;
3559 const unsigned int kWgsProjIDLengthMin = 8;
3560 const unsigned int kWgsProjIDLengthMax = 10;
3563 if (wgsAccession.size() < 6) {
3572 string wgsProj = wgsAccession.substr(0,kWgsProjLength);
3573 for (
size_t i = 0;
i < wgsProj.length();
i ++){
3580 string wgsId = wgsAccession.substr(kWgsProjLength);
3581 if(wgsId.length() >= kWgsProjIDLengthMin && wgsId.length() <= kWgsProjIDLengthMax) {
3582 for (
size_t i = 0;
i < wgsId.length();
i ++){
3599 const unsigned int kWgsProgNameLength = 6;
3602 wgsProjName = wgsAccession.substr(0,kWgsProgNameLength);
3614 string title =
"title=\"Show report for " + seqUrlInfo->accession +
"\" ";
3616 string temp_class_info =
kClassInfo; temp_class_info +=
" ";
3618 string wgsAccession = seqUrlInfo->accession;
3623 if(isWGS && seqUrlInfo->useTemplates) {
3629 else if (hasTextSeqID) {
3630 string entrezTag = (seqUrlInfo->useTemplates) ?
"ENTREZ_TM" :
"ENTREZ";
3634 if(!seqUrlInfo->useTemplates) {
3647 if(seqUrlInfo->useTemplates) {
3649 url_link = l_TraceUrl + (
string)
"?cmd=retrieve&dopt=fasta&val=" + actual_id +
"&RID=" + seqUrlInfo->rid;
3653 temp_class_info = (!seqUrlInfo->defline.empty())?
CAlignFormatUtil::MapTemplate(temp_class_info,
"defline",seqUrlInfo->defline):temp_class_info;
3664 user_url = (seqUrlInfo->addCssInfo) ?
m_Reg->Get(
"LOCAL_ID",
"TOOL_URL_ALIGN") :
m_Reg->Get(
"LOCAL_ID",
"TOOL_URL");
3671 temp_class_info = (!seqUrlInfo->defline.empty())?
CAlignFormatUtil::MapTemplate(temp_class_info,
"defline",seqUrlInfo->defline):temp_class_info;
3678 seqUrlInfo->seqUrl = url_link;
3696 string title =
"title=\"Show report for " + seqUrlInfo->accession +
"\" ";
3699 !((seqUrlInfo->user_url.find(
"dumpgnl.cgi") != string::npos && seqUrlInfo->gi >
ZERO_GI) ||
3700 (seqUrlInfo->user_url.find(
"maps.cgi") != string::npos))) {
3702 string url_with_parameters,toolURLParams;
3703 if(
m_Reg && !seqUrlInfo->blastType.empty() && seqUrlInfo->blastType !=
"newblast") {
3704 toolURLParams =
m_Reg->Get(seqUrlInfo->blastType,
"TOOL_URL_PARAMS");
3706 if(!toolURLParams.empty()) {
3707 string urlLinkTemplate = seqUrlInfo->user_url + toolURLParams;
3708 url_with_parameters =
s_MapURLLink(urlLinkTemplate, seqUrlInfo, *ids);
3711 if (seqUrlInfo->user_url.find(
"sra.cgi") != string::npos) {
3716 seqUrlInfo->database,
3717 seqUrlInfo->isDbNa, seqUrlInfo->rid,
3718 seqUrlInfo->queryNumber,
3719 seqUrlInfo->isAlignLink);
3723 if (!seqUrlInfo->useTemplates) {
3725 if(seqUrlInfo->addCssInfo) {
3728 url_link +=
"<a " + title + deflineInfo +
"href=\"";
3730 url_link += url_with_parameters;
3731 if (!seqUrlInfo->useTemplates) url_link +=
"\">";
3738 seqUrlInfo->seqUrl = url_link;
3753 if ((seqUrlInfo->advancedView || seqUrlInfo->blastType ==
"mapview" || seqUrlInfo->blastType ==
"mapview_prev") ||
3754 seqUrlInfo->blastType ==
"gsfasta" || seqUrlInfo->blastType ==
"gsfasta_prev") {
3758 string url_link =
GetIDUrl(seqUrlInfo,ids);
3766 string linkURL =
GetIDUrl(seqUrlInfo,ids);
3767 if(!linkURL.empty()) {
3781 static string s_MapCustomLink(
string linkUrl,
string reportType,
string accession,
string linkText,
string linktrg,
string linkTitle =
kCustomLinkTitle,
string linkCls =
"")
3799 list<string> customLinksList;
3804 linkUrl = seqUrlInfo->
seqUrl;
3808 string linkText = (seqUrlInfo->
isDbNa) ?
"GenBank" :
"GenPept";
3810 linkUrl +=
"&from=<@fromHSP@>&to=<@toHSP@>";
3811 linkTiltle =
"Aligned region spanning positions <@fromHSP@> to <@toHSP@> on <@seqid@>";
3814 customLinksList.push_back(link);
3816 return customLinksList;
3823 string dbtype = (seqUrlInfo->
isDbNa) ?
"nuccore" :
"protein";
3828 string seqViewerParams;
3830 seqViewerParams =
m_Reg->Get(seqUrlInfo->
blastType,
"SEQVIEW_PARAMS");
3832 seqViewerParams = seqViewerParams.empty() ?
kSeqViewerParams : seqViewerParams;
3837 string linkTitle =
"Show alignment to <@seqid@> in <@custom_report_type@>";
3843 link_loc =
"fromSubj";
3847 link_loc =
"fromHSP";
3848 linkTitle +=
" for <@fromHSP@> to <@toHSP@> range";
3852 string title = (seqUrlInfo->
isDbNa) ?
"Nucleotide Graphics" :
"Protein Graphics";
3862 list<string> customLinksList =
GetGiLinksList(seqUrlInfo,hspRange);
3864 if(!graphicLink.empty()) {
3865 customLinksList.push_back(graphicLink);
3867 return customLinksList;
3872 int customLinkTypes = customLinkTypesInp;
3880 else if(seqUrlInfo->
blastType ==
"sra") {
3883 else if(seqUrlInfo->
blastType ==
"snp") {
3886 else if(seqUrlInfo->
blastType ==
"gsfasta") {
3889 return customLinkTypes;
3897 objects::CScope &scope,
3898 int customLinkTypes)
3900 list<string> customLinksList;
3901 string linkUrl,link;
3907 linkUrl = seqUrlInfo->
seqUrl;
3909 customLinksList.push_back(link);
3913 customLinksList.push_back(link);
3917 customLinksList.push_back(link);
3921 customLinksList.push_back(link);
3924 linkUrl = seqUrlInfo->
seqUrl;
3926 customLinksList.push_back(link);
3929 linkUrl = seqUrlInfo->
seqUrl;
3931 customLinksList.push_back(link);
3936 linkUrl = seqUrlInfo->
resourcesUrl + rs +
"?report=FLT";
3940 customLinksList.push_back(link);
3944 customLinksList.push_back(link);
3948 customLinksList.push_back(link);
3951 linkUrl = seqUrlInfo->
seqUrl;
3953 customLinksList.push_back(link);
3955 return customLinksList;
3961 objects::CScope &scope)
3965 string linkUrl,link;
3976 if(!linkUrl.empty()) {
3977 linkUrl +=
"&segs="+ seqUrlInfo->
segs;
3987 objects::CScope &scope)
3995 linkUrl = seqUrlInfo->
seqUrl;
3999 linkUrl = seqUrlInfo->
seqUrl;
4000 vector<string> parts;
4004 if(parts.size() > 1) {
4007 linkUrl = seqUrlInfo->
resourcesUrl + rs +
"?report=fasta";
4030 CGeneInfoFileReader::TGeneInfoList::const_iterator itInfo = infoList.begin();
4031 for (; itInfo != infoList.end(); itInfo++)
4034 geneSym =
info->GetSymbol();
4041 geneSym =
"(Gene info extraction error: " + e.
GetMsg() +
")";
4042 cerr <<
"[BLAST FORMATTER EXCEPTION] Gene info extraction error: " << e.
GetMsg() << endl;
4046 geneSym =
"(Gene info extraction error)";
4047 cerr <<
"[BLAST FORMATTER EXCEPTION] Gene info extraction error " << endl;
4068 const CDbtag& dtg = subject_id.GetGeneral();
4069 const string& dbName = dtg.
GetDb();
4087 list<TGi> use_this_gi;
4089 use_this_gi.clear();
4092 num_ident, use_this_gi);
4095 seqSetInfo->sum_n = sum_n == -1 ? 1:sum_n ;
4097 seqSetInfo->use_this_gi = use_this_gi;
4098 seqSetInfo->bit_score = bits;
4099 seqSetInfo->raw_score = score;
4100 seqSetInfo->evalue = evalue;
4101 seqSetInfo->match = num_ident;
4104 seqSetInfo->flip =
false;
4106 return seqSetInfo.release();
4121 if(aln.
Get().empty())
4126 double total_bits = 0;
4127 double highest_bits = 0;
4128 double lowest_evalue = 0;
4129 int highest_length = 1;
4130 int highest_ident = 0;
4132 double totalLen = 0;
4134 list<TGi> use_this_gi;
4141 totalLen += align_length;
4144 num_ident, use_this_gi);
4145 use_this_gi.clear();
4155 if (bits > highest_bits) {
4156 highest_length = align_length;
4157 highest_ident = num_ident;
4161 if (bits > highest_bits) {
4162 highest_bits = bits;
4163 lowest_evalue = evalue;
4166 seqSetInfo->
match = highest_ident;
4172 seqSetInfo->
evalue = lowest_evalue;
4173 seqSetInfo->
hspNum =
static_cast<int>(aln.
Size());
4187 if(aln.
Get().empty())
4190 double highest_bits = 0;
4191 int highest_length = 1;
4192 int highest_ident = 0;
4194 list<TGi> use_this_gi;
4200 num_ident, use_this_gi);
4209 if (bits > highest_bits) {
4210 highest_length = align_length;
4211 highest_ident = num_ident;
4213 highest_bits = bits;
4218 return percent_identity;
4222 template<
class container>
bool
4226 double& totalBitScore,
4227 int& percentCoverage,
4228 double& percentIdent,
4233 list<TGi>& use_this_gi)
4235 const string k_GiPrefix =
"gi:";
4236 bool hasScore =
false;
4239 ITERATE (
typename container, iter, scoreList) {
4243 if (
id.GetStr()==
"seq_evalue") {
4244 evalue = (*iter)->GetValue().GetReal();
4245 }
else if (
id.GetStr()==
"seq_bit_score"){
4246 bitScore = (*iter)->GetValue().GetReal();
4247 }
else if (
id.GetStr()==
"seq_total_bit_score"){
4248 totalBitScore = (*iter)->GetValue().GetReal();
4249 }
else if (
id.GetStr()==
"seq_percent_coverage"){
4250 percentCoverage = (*iter)->GetValue().GetInt();
4251 }
else if (
id.GetStr()==
"seq_percent_identity" && (*iter)->GetValue().IsInt()){
4252 percentIdent = (*iter)->GetValue().GetInt();
4253 }
else if (
id.GetStr()==
"seq_percent_identity" && (*iter)->GetValue().IsReal()){
4254 percentIdent = (*iter)->GetValue().GetReal();
4255 }
else if (
id.GetStr()==
"seq_hspnum"){
4256 hspNum = (*iter)->GetValue().GetInt();
4257 }
else if (
id.GetStr()==
"seq_align_totlen"){
4258 totalLen = (*iter)->GetValue().GetReal();
4259 }
else if (
id.GetStr()==
"score"){
4260 rawScore = (*iter)->GetValue().GetInt();
4261 }
else if (
id.GetStr()==
"use_this_gi"){
4262 Uint4 gi_v = (
Uint4) ((*iter)->GetValue().GetInt());
4264 }
else if (
id.GetStr()==
"sum_n"){
4265 sum_n = (*iter)->GetValue().GetInt();
4269 TGi gi = NStr::StringToNumeric<TGi>(strGi);
4270 use_this_gi.push_back(gi);
4281 const string k_GiPrefix =
"gi:";
4288 for (CUser_object::TData::const_iterator fit = fields.begin(); fit != fields.end(); ++fit) {
4297 TGi gi = NStr::StringToNumeric<TGi>(strGi);
4298 use_this_gi.push_back(gi);
4316 for (CUser_object::TData::const_iterator fit = fields.begin(); fit != fields.end(); ++fit) {
4323 use_this_seq.push_back(*acc_iter);
4335 bool hasScore =
false;
4337 double bitScore = -1;
4338 double totalBitScore = -1;
4339 int percentCoverage = -1;
4340 double percentIdent = -1;
4342 double totalLen = 0;
4345 list<TGi> use_this_gi;
4346 list<string> use_this_seq;
4350 hasScore =
s_GetBlastScore(aln.
GetScore(),evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4356 evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4359 evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4362 evalue,bitScore, totalBitScore,percentCoverage,percentIdent,hspNum,totalLen,rawScore,sum_n,use_this_gi);
4366 if(use_this_gi.size() == 0) {
4375 seqSetInfo->evalue = evalue;
4376 seqSetInfo->bit_score = bitScore;
4377 seqSetInfo->total_bit_score = totalBitScore;
4378 seqSetInfo->percent_coverage = percentCoverage;
4379 seqSetInfo->percent_identity = percentIdent;
4380 seqSetInfo->hspNum = hspNum;
4381 seqSetInfo->totalLen = (
Int8)totalLen;
4383 seqSetInfo->sum_n = sum_n == -1 ? 1:sum_n ;
4386 seqSetInfo->use_this_seq = use_this_seq;
4387 seqSetInfo->raw_score = rawScore;
4390 seqSetInfo->flip =
false;
4392 return seqSetInfo.release();
4397 list<TGi>& use_this_gi,
4408 list<TGi>& use_this_gi,
4427 iter != bdl.end(); iter++){
4431 if ((*iter)->IsSetTaxid() && (*iter)->CanGetTaxid()){
4432 taxid = (*iter)->GetTaxid();
4434 if (!use_this_gi.empty()) {
4435 ITERATE(list<TGi>, iter_gi, use_this_gi){
4436 if(cur_gi == *iter_gi){
4443 if ((*iter_id)->Match(aln_id)
4445 (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4465 const string k_GiPrefix =
"gi:";
4466 const string k_SeqIDPrefix =
"seqid:";
4490 ITERATE(list<string>, iter_seq, use_this_seq){
4499 list<TGi> use_this_gi;
4500 ITERATE(list<string>, iter_seq, use_this_seq){
4503 if(isGi) use_this_gi.push_back(NStr::StringToNumeric<TGi>(strGI));
4516 ITERATE(list<string>, iter_seq, use_this_seq){
4519 if((isGi && cur_gi == NStr::StringToNumeric<TGi>((useThisSeq))) || (!isGi && curSeqID == useThisSeq)){
4524 if(isGiList) *isGiList = isGi;
4533 if(alnSeqID->
IsGi()) {
4540 bool found = std::find(seqList.begin(), seqList.end(), curSeqID) != seqList.end();
4543 ITERATE(list<string>, iter_seq, use_this_seq){
4545 found = std::find(seqList.begin(), seqList.end(), useThisSeq) != seqList.end();
4556 bool has_match =
false;
4558 ITERATE(list<string>, iter_seq, use_this_seq) {
4561 if(useThisSeq == textSeqIDToMatch) {
4571 list<string> new_use_this_seq;
4572 bool hasAccType =
false;
4575 ITERATE(list<string>, iter_seq, use_this_seq) {
4578 if(useThisSeqAccType != accessionType) {
4579 new_use_this_seq.push_back(useThisSeq);
4585 use_this_seq = new_use_this_seq;
4591 list<string>& use_this_seq,
4607 if(gi) *gi =
FindGi(*ids);
4608 if(textSeqID) *textSeqID =
GetLabel(wid,
true);
4612 iter != bdl.end(); iter++){
4616 string curSeqID =
GetLabel(wid,
true);
4617 if (taxid && (*iter)->IsSetTaxid() && (*iter)->CanGetTaxid()){
4618 *taxid = (*iter)->GetTaxid();
4620 if (!use_this_seq.empty()) {
4621 ITERATE(list<string>, iter_seq, use_this_seq){
4624 if((isGi && cur_gi == NStr::StringToNumeric<TGi>((useThisSeq))) || (!isGi && curSeqID == useThisSeq)){
4631 if ((*iter_id)->Match(aln_id)
4633 (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4640 if(gi) *gi = cur_gi;
4641 if(textSeqID) *textSeqID = curSeqID;
4653 list<TGi>& use_this_gi)
4662 iter != bdl.end(); iter++){
4665 if (!use_this_gi.empty()) {
4666 ITERATE(list<TGi>, iter_gi, use_this_gi){
4667 if(cur_gi == *iter_gi){
4674 if ((*iter_id)->Match(aln_id)
4676 (*iter_id)->IsGeneral() && (*iter_id)->GetGeneral().CanGetDb() &&
4708 list<CRef<CSeq_align> >::iterator mItr=alnset.
Set().begin();
4722 for(;mItr != alnset.
Set().end(); ++mItr) {
4732 if(coll[0] == align_subj_rng) {
4736 subj_rng_coll += align_subj_rng;
4748 query_rng.
SetFrom(
map.GetSeqPosFromAlnPos(0,subj_aln_start));
4749 query_rng.
SetTo(
map.GetSeqPosFromAlnPos(0,subj_aln_end));
4753 subj_rng_coll += subj_rng;
4771 for (CBioseq::TId::const_iterator iter = ids.begin(); iter != ids.end();
4773 if ((*iter)->Which() == choice){
4800 for (;desc_t; ++desc_t) {
4810 if (
id.IsGi() ||
id.IsPrf() ||
id.IsPir()) {
4811 retval =
id.AsFastaString();
4814 retval =
id.GetSeqIdString(
true);
4823 bool hasTextSeqID =
true;
4829 hasTextSeqID =
false;
4833 if(hasTextSeqID && textSeqID) {
4836 return hasTextSeqID;
4843 bool hasTextSeqID =
false;
4853 if(!seqID.
Empty()) {
4854 hasTextSeqID =
true;
4857 return hasTextSeqID;
4861 vector <string> &seqList)
4864 list<string> use_this_seq;
4869 subid = &((*iter)->GetSeq_id(1));
4870 if(previous_id.
Empty() || !subid->
Match(*previous_id)){
4871 use_this_seq.clear();
4876 previous_id = subid;
4878 new_aln->
Set().push_back(*iter);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Declares the CBlastServices class.
TDim GetNumRows(void) const
TSeqPos GetAlnStop(TNumseg seg) const