82 colinfo +=
"ID: missing ";
126 vector<CTableAnnotDataSource::STableLocation>
129 vector<STableLocation> locations;
132 bool missing_assembly =
false;
133 bool is_rsid =
false;
144 msg =
"SeqTable not found in SeqAnnot";
151 msg =
"SeqAnnot does not contain a SeqTable";
159 if (column_meta_info ==
NULL)
162 bool prev_is_rsid =
false;
163 int prev_id_idx = -1;
164 int prev_start_idx = -1;
165 int prev_end_idx = -1;
166 int prev_strand_idx = -1;
167 int prev_length_idx = -1;
168 int prev_data_region_idx = -1;
169 int prev_genotype_idx = -1;
172 for (
size_t idx=0; idx<cols.size(); ++idx) {
180 if (idx < column_meta_info->GetData().
size()) {
194 if (assembly !=
"") {
198 msg =
"Assembly not provided for chromosome";
203 missing_assembly =
true;
250 (prev_id_idx >= 0 && (prev_is_rsid ||
251 (prev_start_idx > prev_id_idx &&
252 (prev_end_idx > prev_start_idx ||
253 prev_length_idx > prev_start_idx) &&
254 (prev_strand_idx > prev_id_idx ||
255 strand_required ==
false))) )) {
260 prev_length_idx, prev_strand_idx, prev_data_region_idx, prev_genotype_idx, prev_is_rsid);
261 locations.push_back(
fc);
270 prev_length_idx = -1;
271 prev_data_region_idx = -1;
272 prev_genotype_idx = -1;
276 prev_id_idx = (
int)idx;
277 prev_is_rsid = is_rsid;
280 prev_start_idx = (
int)idx;
283 prev_end_idx = (
int)idx;
286 prev_length_idx = (
int)idx;
292 if (prev_strand_idx == -1 || prev_strand_idx < prev_start_idx)
293 prev_strand_idx = (
int)idx;
296 prev_data_region_idx = (
int)idx;
299 prev_genotype_idx = (
int)idx;
305 if ((prev_id_idx >= 0) && (prev_is_rsid ||
306 (prev_start_idx > prev_id_idx &&
307 (prev_end_idx > prev_start_idx || prev_length_idx > prev_id_idx) &&
308 (prev_strand_idx > prev_id_idx || strand_required ==
false))) ) {
312 prev_length_idx, prev_strand_idx, prev_data_region_idx, prev_genotype_idx, prev_is_rsid);
313 locations.push_back(
fc);
317 if (locations.size() == 0) {
318 if (prev_id_idx == -1) {
319 if (missing_assembly)
320 msg =
"Assembly not provided for chromosome";
322 msg =
"Missing ID column";
325 if (prev_start_idx == -1)
326 msg =
"Start Position column not defined";
328 if (prev_end_idx == -1 && prev_length_idx == -1) {
330 msg +=
" and neither a Stop Position nor Length column was given";
332 msg =
"Neither a Stop Position nor Length column was defined";
335 if (strand_required && prev_strand_idx == -1) {
337 msg =
"Strand column not defined";
340 msg +=
" Also, no Strand column was identified.";
354 if (++err_count < 100) {
355 LOG_POST(
"Invalid row " << row+1 <<
" " << logstr);
376 if (column_meta_info ==
NULL)
383 LOG_POST(
"Error - SeqTable not found in SeqAnnot");
389 LOG_POST(
"Error - SeqAnnot does not contain a SeqTable");
397 bool start_one_based =
true;
402 bool stop_one_based =
true;
403 if (
fc.m_StopPosCol >= 0) {
411 stop_one_based = start_one_based;
417 string id_meta_info_str;
419 bool is_chromosome =
false;
431 if (!assembly.empty() ) {
442 is_chromosome =
true;
452 string properties =
string(
"&xtype=SeqLoc") +
" &derived_field=true";
459 column->SetHeader(*cinfo);
493 for (
size_t row=0; row<cols[
fc.m_IdCol]->GetData().GetSize(); ++row) {
497 bool null_seqloc =
false;
502 if (!cols[
fc.m_StartPosCol]->TryGetInt(row, from)) {
503 logstr =
"cannot convert start position to an integer";
505 if (++err_count < 100) {
506 LOG_POST(
"Invalid row " << row+1 <<
" " << logstr);
511 if (start_one_based) {
514 logstr =
"start column is one-based but value is < 1";
518 if (++err_count < 100) {
519 LOG_POST(
"Invalid row " << row+1 <<
" " << logstr);
524 cols[
fc.m_StartPosCol]->SetData().SetInt()[row] = from;
528 logstr =
"start column is < 0";
533 if (++err_count < 100) {
534 LOG_POST(
"Invalid row " << row+1 <<
" " << logstr);
539 if (
fc.m_StopPosCol >= 0) {
540 if (!cols[
fc.m_StopPosCol]->TryGetInt(row, to)) {
541 logstr =
"cannot convert stop position to an integer";
545 if (++err_count < 100) {
546 LOG_POST(
"Invalid row " << row+1 <<
" " << logstr);
551 if (stop_one_based) {
553 logstr =
"stop column is one-based but value is < 1";
559 if (++err_count < 100) {
560 LOG_POST(
"Invalid row " << row+1 <<
" " << logstr);
565 cols[
fc.m_StopPosCol]->SetData().SetInt()[row] = to;
569 logstr =
"stop column is < 0";
575 if (++err_count < 100) {
576 LOG_POST(
"Invalid row " << row+1 <<
" " << logstr);
581 else if (
fc.m_LengthCol >= 0) {
583 if (!cols[
fc.m_LengthCol]->TryGetInt(row,
len)) {
584 logstr =
"cannot convert length to an integer";
588 if (++err_count < 100) {
589 LOG_POST(
"Invalid row " << row+1 <<
" " << logstr);
598 if (
fc.m_StrandCol != -1) {
599 if (cols[
fc.m_StrandCol]->GetData().IsString()) {
600 string strand = *cols[
fc.m_StrandCol]->GetStringPtr(row);
614 if (++err_count < 100) {
615 logstr =
" strand identifier: " + strand +
616 " must be +,-,pos, or neg";
620 LOG_POST(
"Invalid row " << row+1 <<
" " << logstr);
624 else if (cols[
fc.m_StrandCol]->GetData().IsInt()) {
626 if (cols[
fc.m_StrandCol]->TryGetInt(row, strand_int)) {
628 if ((strand_int >= 0 && strand_int <= 4) || strand_int==255) {
642 if (++err_count < 100) {
648 LOG_POST(logstr <<
" in row: " << row+1);
655 logstr =
"Unable to retrieve strand value";
659 if (++err_count < 100)
660 LOG_POST(logstr <<
" in row: " << row+1);
676 if (
fc.m_StrandCol == -1) {
693 else if (!is_chromosome) {
696 if (cols[
fc.m_IdCol]->GetData().IsInt()) {
697 int gid = cols[
fc.m_IdCol]->GetData().GetInt()[row];
705 logstr =
"Error constructing seq-id from GI " +
710 if (++err_count < 100) {
711 LOG_POST(logstr <<
" in row: " << row+1);
718 seq_id = cols[
fc.m_IdCol]->GetSeq_id(row);
725 logstr =
"Exception getting seq-id " + e.
GetMsg();
729 if (++err_count < 100)
730 LOG_POST(logstr <<
" in row: " << row+1);
736 string idstr = *cols[
fc.m_IdCol]->GetStringPtr(row);
743 logstr =
"Error creating id for: " + idstr +
" : " + ex.
GetMsg();
747 if (++err_count < 100) {
748 LOG_POST(
"Invalid row " << row + 1 <<
" " << logstr);
755 logstr =
"Unable to generate seq-id from id: " + idstr;
759 if (++err_count < 100) {
760 LOG_POST(
"Invalid row " << row + 1 <<
" " << logstr);
764 if (seq_id && !null_seqloc) {
782 error_column->
SetData().SetString().push_back(err_str);
791 table.SetColumns().push_back(error_column);
793 string properties =
string(
"&xtype=") +
795 " &derived_field=true";
811 if (column_meta_info ==
NULL)
818 LOG_POST(
"Error - SeqTable not found in SeqAnnot");
824 LOG_POST(
"Error - SeqAnnot does not contain a SeqTable");
835 string id_meta_info_str;
855 string properties =
string(
"&xtype=SeqLoc") +
" &derived_field=true";
862 column->SetHeader(*cinfo);
888 size_t rsid_search_size = 100;
900 size_t search_row = 0;
901 size_t current_size = 0;
902 size_t row_count = cols[
fc.m_IdCol]->GetData().GetSize();
903 while (search_row < row_count) {
907 for (current_size=0; search_row < row_count && current_size<rsid_search_size; ++current_size, ++search_row) {
908 rsids += *cols[
fc.m_IdCol]->GetStringPtr(search_row);
912 rsids = rsids.substr(0, rsids.length()-1);
921 LOG_POST(
"Exception while searching SNP database: " + e.
GetMsg());
925 if (rsid_search_size > 2) {
927 rsid_search_size = rsid_search_size/2;
932 LOG_POST(
"Unidentified exception while searching SNP database");
936 if (rsid_search_size > 2) {
938 rsid_search_size = rsid_search_size/2;
945 NSNPWebServices::TSNPSearchCompoundResultList::iterator current_iter = SNPSearchResultList.begin();
947 for (; row < search_row; ++row) {
955 string rsid = *cols[
fc.m_IdCol]->GetStringPtr(row);
959 NSNPWebServices::TSNPSearchCompoundResultList::iterator iter = current_iter;
962 if (iter->first != rsid) {
964 iter = std::find_if(SNPSearchResultList.begin(), SNPSearchResultList.end(), pred);
967 if (iter == SNPSearchResultList.end()) {
968 x_LogErr(
"No matches for given rsid", errstr, err_count,
static_cast<int>(row));
970 else if ((*iter).second.size() == 0) {
971 x_LogErr(
"No matches for rsid and assembly: " + assembly,
972 errstr, err_count,
static_cast<int>(row));
978 if (first_result->CanGetPlacements() == 0) {
979 x_LogErr(
"Unexpected absence of placements in SNP Search Result", errstr, err_count,
static_cast<int>(row));
983 if (placements.size() == 0) {
984 x_LogErr(
"Unexpected number of placements (0) in SNP Search Result", errstr, err_count,
static_cast<int>(row));
987 location->Assign(placements.front()->GetLoc());
995 if (!found || errstr !=
"") {
1003 error_column->
SetData().SetString().push_back(errstr);
1015 if (err_count > 0) {
1016 table.SetColumns().push_back(error_column);
1018 string properties =
string(
"&xtype=") +
1020 " &derived_field=true";
1036 if (column_meta_info ==
NULL)
1043 LOG_POST(
"Error - SeqTable not found in SeqAnnot");
1049 LOG_POST(
"Error - SeqAnnot does not contain a SeqTable");
1064 for (
size_t col_num=0; col_num<cols.size(); ++col_num) {
1065 if (col_num !=
fc.m_IdCol &&
1066 col_num !=
fc.m_LengthCol &&
1067 col_num !=
fc.m_StartPosCol &&
1068 col_num !=
fc.m_StopPosCol &&
1069 col_num !=
fc.m_StrandCol) {
1090 for (
size_t row=0; row<col_data.size(); ++row) {
1097 cols[col_num]->SetData().SetString().push_back(
val);
1111 for (
size_t row=0; row<col_data.size(); ++row) {
1118 cols[col_num]->SetData().SetString().push_back(
val);
1127 bool start_one_based =
true;
1134 if (start_one_based) {
1137 start_meta_info.
SetData().SetStr(meta_info_str);
1141 bool stop_one_based =
true;
1142 if (
fc.m_StopPosCol >= 0) {
1149 if (stop_one_based) {
1152 stop_meta_info.
SetData().SetStr(meta_info_str);
1158 stop_one_based = start_one_based;
1178 if (!assembly.empty() ) {
1207 if (
fc.m_StrandCol == -1 ||
1208 cols[
fc.m_StrandCol]->GetData().IsString()) {
1212 if (
fc.m_StrandCol != -1) {
1214 cols[
fc.m_StrandCol]->GetHeader();
1218 cinfo->
SetTitle(
string(
"Column.") +
1231 if (
fc.m_DataRegionCol == -1) {
1233 cinfo->
SetTitle(
"Data Region Col");
1246 if (
fc.m_StopPosCol == -1 &&
fc.m_LengthCol >= 0) {
1252 stop_position_column->
SetHeader(*cinfo);
1262 bool errors_occured =
false;
1305 if (!xform_strand_column.
IsNull() ||
1306 !stop_position_column.
IsNull() ||
1307 !data_region_column.
IsNull() ||
1308 !(mapper ==
NULL) ||
1313 for (
size_t row=0; row<cols[
fc.m_IdCol]->GetData().GetSize(); ++row) {
1320 if (!cols[
fc.m_StartPosCol]->TryGetInt(row, from)) {
1321 logstr =
" cannot convert start position to an integer";
1322 if (++err_count < 100) {
1323 LOG_POST(
"Invalid row " << row+1 << logstr);
1327 if (start_one_based) {
1329 logstr =
" start column is one-based but value is < 1";
1330 if (++err_count < 100) {
1331 LOG_POST(
"Invalid row " << row+1 << logstr);
1336 cols[
fc.m_StartPosCol]->SetData().SetInt()[row] = from;
1338 else if (from < 0) {
1339 logstr =
" start column is < 0";
1340 if (++err_count < 100) {
1341 LOG_POST(
"Invalid row " << row+1 << logstr);
1350 if (!stop_position_column.
IsNull()) {
1353 if (!cols[
fc.m_LengthCol]->TryGetInt(row,
len)) {
1354 logstr =
" cannot convert length to an integer";
1355 if (errstr !=
"") errstr +=
" | ";
1358 if (++err_count < 100) {
1359 LOG_POST(
"Invalid row " << row+1 << logstr);
1365 logstr =
" length column is < 0";
1366 if (errstr !=
"") errstr +=
" | ";
1369 if (++err_count < 100) {
1370 LOG_POST(
"Invalid row " << row+1 << logstr);
1377 stop_position_column->
SetData().SetInt().push_back(to);
1382 if (!cols[
fc.m_StopPosCol]->TryGetInt(row, to)) {
1383 logstr =
" cannot convert stop position to an integer";
1384 if (errstr !=
"") errstr +=
" | ";
1387 if (++err_count < 100) {
1388 LOG_POST(
"Invalid row " << row+1 << logstr);
1392 if (stop_one_based) {
1394 logstr =
" stop column is one-based but value is < 1";
1395 if (errstr !=
"") errstr +=
" | ";
1398 if (++err_count < 100) {
1399 LOG_POST(
"Invalid row " << row+1 << logstr);
1404 cols[
fc.m_StopPosCol]->SetData().SetInt()[row] = to;
1407 logstr =
" stop column is < 0";
1408 if (errstr !=
"") errstr +=
" | ";
1411 if (++err_count < 100) {
1412 LOG_POST(
"Invalid row " << row+1 << logstr);
1421 if (!xform_strand_column.
IsNull()) {
1423 if (
fc.m_StrandCol != -1) {
1424 string strand = *cols[
fc.m_StrandCol]->GetStringPtr(row);
1429 logstr =
string(
" error - strand: \"") + strand +
"\" not valid";
1430 if (errstr !=
"") errstr +=
" | ";
1433 if (++err_count < 100)
1434 LOG_POST(
"Invalid row " << row+1 << logstr);
1453 cols[
fc.m_StartPosCol]->SetData().SetInt()[row] = from;
1458 if (!stop_position_column.
IsNull())
1459 stop_position_column->
SetData().SetInt().push_back(to);
1461 cols[
fc.m_StopPosCol]->SetData().SetInt()[row] = to;
1465 xform_strand_column->
SetData().SetInt().push_back((
int)strand_e);
1470 if (!data_region_column.
IsNull()) {
1472 data_region_column->
SetData().SetString().push_back(region_name);
1475 if (mapper !=
NULL) {
1478 string idstr = *cols[
fc.m_IdCol]->GetStringPtr(row);
1482 xform_ids_column->
SetData().SetId().push_back(seq_id);
1487 xform_ids_column->
SetData().SetId().push_back(seq_id);
1489 logstr =
string(
" Error id: ") + idstr +
" - " + ex.
GetMsg();
1490 if (errstr !=
"") errstr +=
" | ";
1493 if (++err_count < 100) {
1494 LOG_POST(
"Invalid row " << row + 1 << logstr);
1507 xform_ids_column->
SetData().SetId().push_back(seq_id);
1514 errors_occured =
true;
1515 disabled_column->
SetSparse().SetIndexes().push_back(
static_cast<int>(row));
1521 error_column->
SetData().SetString().push_back(errstr);
1535 if (!xform_strand_column.
IsNull()) {
1536 if (
fc.m_StrandCol != -1) {
1537 table.SetColumns().erase(
table.SetColumns().begin() +
fc.m_StrandCol);
1538 table.SetColumns().insert(
table.SetColumns().begin() +
fc.m_StrandCol,
1539 xform_strand_column);
1542 table.SetColumns().push_back(xform_strand_column);
1544 string properties =
string(
"&xtype=") +
1546 " &derived_field=true";
1552 if (!data_region_column.
IsNull()) {
1553 table.SetColumns().push_back(data_region_column);
1556 string properties =
string(
"&xtype=") +
1558 " &derived_field=true";
1562 if (!stop_position_column.
IsNull()) {
1563 table.SetColumns().push_back(stop_position_column);
1566 string properties =
string(
"&xtype=") +
1568 " &derived_field=true";
1572 if (!xform_ids_column.
IsNull()) {
1573 table.SetColumns().erase(
table.SetColumns().begin() +
fc.m_IdCol);
1574 table.SetColumns().insert(
table.SetColumns().begin() +
fc.m_IdCol,
1578 if (errors_occured) {
1579 table.SetColumns().push_back(error_column);
1581 string properties =
string(
"&xtype=") +
1583 " &derived_field=true";
1586 table.SetColumns().push_back(disabled_column);
1588 properties =
string(
"&xtype=disabled_indices") +
1590 " &derived_field=true";
1605 if (column_meta_info ==
NULL)
1612 LOG_POST(
"Error - SeqTable not found in SeqAnnot");
1618 LOG_POST(
"Error - SeqAnnot does not contain a SeqTable");
1633 for (
size_t col_num=0; col_num<cols.size(); ++col_num) {
1653 for (
size_t row=0; row<col_data.size(); ++row) {
1660 cols[col_num]->SetData().SetString().push_back(
val);
1674 for (
size_t row=0; row<col_data.size(); ++row) {
1681 cols[col_num]->SetData().SetString().push_back(
val);
1711 if (
fc.m_StrandCol != -1) {
1713 cols[
fc.m_StrandCol]->GetHeader();
1730 if (
fc.m_StrandCol == -1 ||
1731 cols[
fc.m_StrandCol]->GetData().IsString()) {
1735 if (
fc.m_StrandCol != -1) {
1737 cols[
fc.m_StrandCol]->GetHeader();
1753 if (
fc.m_DataRegionCol == -1) {
1755 cinfo->
SetTitle(
"Data Region Col");
1768 if (
fc.m_StopPosCol == -1) {
1774 stop_position_column->
SetHeader(*cinfo);
1780 if (
fc.m_StartPosCol == -1) {
1786 start_position_column->
SetHeader(*cinfo);
1796 bool errors_occured =
false;
1831 size_t rsid_search_size = 100;
1847 size_t search_row = 0;
1848 size_t current_size = 0;
1849 size_t row_count = cols[
fc.m_IdCol]->GetData().GetSize();
1850 while (search_row < row_count) {
1854 for (current_size=0; search_row < row_count && current_size<rsid_search_size; ++current_size, ++search_row) {
1855 rsids += *cols[
fc.m_IdCol]->GetStringPtr(search_row);
1859 rsids = rsids.substr(0, rsids.length()-1);
1868 LOG_POST(
"Exception while searching SNP database: " + e.
GetMsg());
1872 if (rsid_search_size > 2) {
1874 rsid_search_size = rsid_search_size/2;
1879 LOG_POST(
"Unidentified exception while searching SNP database");
1883 if (rsid_search_size > 2) {
1885 rsid_search_size = rsid_search_size/2;
1892 NSNPWebServices::TSNPSearchCompoundResultList::iterator current_iter = SNPSearchResultList.begin();
1894 for (; row < search_row; ++row) {
1902 string rsid = *cols[
fc.m_IdCol]->GetStringPtr(row);
1906 NSNPWebServices::TSNPSearchCompoundResultList::iterator iter = current_iter;
1909 if (iter->first != rsid) {
1911 iter = std::find_if(SNPSearchResultList.begin(), SNPSearchResultList.end(), pred);
1914 if (iter == SNPSearchResultList.end()) {
1915 x_LogErr(
"No matches for given rsid", errstr, err_count,
static_cast<int>(row));
1917 else if ((*iter).second.size() == 0) {
1918 x_LogErr(
"No matches for rsid and assembly: " + assembly,
1919 errstr, err_count,
static_cast<int>(row));
1925 if (first_result->CanGetPlacements() == 0) {
1926 x_LogErr(
"Unexpected absence of placements in SNP Search Result", errstr, err_count,
static_cast<int>(row));
1930 if (placements.size() == 0) {
1931 x_LogErr(
"Unexpected number of placements (0) in SNP Search Result", errstr, err_count,
static_cast<int>(row));
1934 const CSeq_id*
id = placements.front()->GetLoc().GetId();
1938 seqid_column->
SetData().SetId().push_back(rid);
1941 start_position_column->
SetData().SetInt().push_back(from);
1944 stop_position_column->
SetData().SetInt().push_back(to);
1946 xform_strand_column->
SetData().SetInt().push_back(placements.front()->GetLoc().GetStrand());
1955 seqid_column->
SetData().SetId().push_back(empty_id);
1956 start_position_column->
SetData().SetInt().push_back(0);
1957 stop_position_column->
SetData().SetInt().push_back(0);
1963 if (!data_region_column.
IsNull()) {
1965 data_region_column->
SetData().SetString().push_back(region_name);
1971 errors_occured =
true;
1972 disabled_column->
SetSparse().SetIndexes().push_back(
static_cast<int>(row));
1975 error_column->
SetData().SetString().push_back(errstr);
1986 if (!seqid_column.
IsNull()) {
1987 table.SetColumns().push_back(seqid_column);
1989 string properties =
string(
"&xtype=") +
1991 " &derived_field=true";
1995 if (!xform_strand_column.
IsNull()) {
1996 if (
fc.m_StrandCol != -1) {
1997 table.SetColumns().erase(
table.SetColumns().begin() +
fc.m_StrandCol);
1998 table.SetColumns().insert(
table.SetColumns().begin() +
fc.m_StrandCol,
1999 xform_strand_column);
2002 table.SetColumns().push_back(xform_strand_column);
2004 string properties =
string(
"&xtype=") +
2006 " &derived_field=true";
2012 if (!data_region_column.
IsNull()) {
2013 table.SetColumns().push_back(data_region_column);
2016 string properties =
string(
"&xtype=") +
2018 " &derived_field=true";
2022 if (!start_position_column.
IsNull()) {
2023 table.SetColumns().push_back(start_position_column);
2026 string properties =
string(
"&xtype=") +
2028 " &derived_field=true";
2032 if (!stop_position_column.
IsNull()) {
2033 table.SetColumns().push_back(stop_position_column);
2036 string properties =
string(
"&xtype=") +
2038 " &derived_field=true";
2042 if (errors_occured) {
2043 table.SetColumns().push_back(error_column);
2045 string properties =
string(
"&xtype=") +
2047 " &derived_field=true";
2050 table.SetColumns().push_back(disabled_column);
2052 properties =
string(
"&xtype=disabled_indices") +
2054 " &derived_field=true";
2069 if (column_meta_info ==
NULL)
2076 LOG_POST(
"Error - SeqTable not found in SeqAnnot");
2082 LOG_POST(
"Error - SeqAnnot does not contain a SeqTable");
2099 for (
size_t col_num=0; col_num<cols.size(); ++col_num) {
2119 for (
size_t row=0; row<col_data.size(); ++row) {
2126 cols[col_num]->SetData().SetString().push_back(
val);
2140 for (
size_t row=0; row<col_data.size(); ++row) {
2147 cols[col_num]->SetData().SetString().push_back(
val);
2197 if (
fc.m_StrandCol != -1) {
2199 cols[
fc.m_StrandCol]->GetHeader();
2214 if (
fc.m_StartPosCol == -1) {
2220 start_position_column->
SetHeader(*cinfo);
2258 bool errors_occured =
false;
2293 size_t rsid_search_size = 100;
2305 size_t search_row = 0;
2306 size_t current_size = 0;
2307 size_t row_count = cols[
fc.m_IdCol]->GetData().GetSize();
2308 while (search_row < row_count) {
2312 for (current_size=0; search_row < row_count && current_size<rsid_search_size; ++current_size, ++search_row) {
2313 rsids += *cols[
fc.m_IdCol]->GetStringPtr(search_row);
2317 rsids = rsids.substr(0, rsids.length()-1);
2326 LOG_POST(
"Exception while searching SNP database: " + e.
GetMsg());
2330 if (rsid_search_size > 2) {
2332 rsid_search_size = rsid_search_size/2;
2337 LOG_POST(
"Unidentified exception while searching SNP database");
2341 if (rsid_search_size > 2) {
2343 rsid_search_size = rsid_search_size/2;
2350 NSNPWebServices::TSNPSearchCompoundResultList::iterator current_iter = SNPSearchResultList.begin();
2352 for (; row < search_row; ++row) {
2359 string rsid = *cols[
fc.m_IdCol]->GetStringPtr(row);
2363 NSNPWebServices::TSNPSearchCompoundResultList::iterator iter = current_iter;
2366 if (iter->first != rsid) {
2368 iter = std::find_if(SNPSearchResultList.begin(), SNPSearchResultList.end(), pred);
2371 if (iter == SNPSearchResultList.end()) {
2372 x_LogErr(
"No matches for given rsid", errstr, err_count,
static_cast<int>(row));
2374 else if ((*iter).second.size() == 0) {
2375 x_LogErr(
"No matches for rsid and assembly: " + assembly,
2376 errstr, err_count,
static_cast<int>(row));
2382 if (first_result->CanGetPlacements() == 0) {
2383 x_LogErr(
"Unexpected absence of placements in SNP Search Result", errstr, err_count,
static_cast<int>(row));
2387 if (placements.size() == 0) {
2388 x_LogErr(
"Unexpected number of placements (0) in SNP Search Result", errstr, err_count,
static_cast<int>(row));
2391 const CSeq_id*
id = placements.front()->GetLoc().GetId();
2395 seqid_column->
SetData().SetId().push_back(rid);
2398 start_position_column->
SetData().SetInt().push_back(from);
2405 if (
fc.m_GenotypeCol != -1) {
2406 string geno_str = *cols[
fc.m_GenotypeCol]->GetStringPtr(row);
2410 if (geno_str.length() >= 1)
2412 if (geno_str.length() >= 2)
2417 CCommonString_table_Base::TStrings::iterator iter = std::find(strs.begin(), strs.end(), val1);
2418 if (iter != strs.end()) {
2419 size_t idx = iter-strs.begin();
2420 replace_column1->
SetData().SetCommon_string().SetIndexes().push_back(
static_cast<int>(idx));
2423 strs.push_back(val1);
2424 replace_column1->
SetData().SetCommon_string().SetIndexes().push_back(
static_cast<int>(strs.size()-1));
2430 CCommonString_table_Base::TStrings::iterator iter = std::find(strs.begin(), strs.end(), val2);
2431 if (iter != strs.end()) {
2432 size_t idx = iter-strs.begin();
2433 replace_column2->
SetData().SetCommon_string().SetIndexes().push_back(
static_cast<int>(idx));
2436 strs.push_back(val2);
2437 replace_column2->
SetData().SetCommon_string().SetIndexes().push_back(
static_cast<int>(strs.size()-1));
2445 seqid_column->
SetData().SetId().push_back(empty_id);
2446 start_position_column->
SetData().SetInt().push_back(0);
2452 errors_occured =
true;
2453 disabled_column->
SetSparse().SetIndexes().push_back(
static_cast<int>(row));
2459 error_column->
SetData().SetString().push_back(errstr);
2471 if (!variation_column.
IsNull()) {
2472 table.SetColumns().push_back(variation_column);
2474 string properties =
string(
"&xtype=") +
2476 " &derived_field=true";
2480 if (!seqid_column.
IsNull()) {
2481 table.SetColumns().push_back(seqid_column);
2483 string properties =
string(
"&xtype=") +
2485 " &derived_field=true";
2489 if (!start_position_column.
IsNull()) {
2490 table.SetColumns().push_back(start_position_column);
2493 string properties =
string(
"&xtype=") +
2495 " &derived_field=true";
2500 if (!replace_column1.
IsNull()) {
2501 table.SetColumns().push_back(replace_column1);
2504 string properties =
string(
"&xtype=") +
2506 " &derived_field=true";
2510 if (!replace_column1.
IsNull()) {
2511 table.SetColumns().push_back(replace_column1);
2514 string properties =
string(
"&xtype=") +
2516 " &derived_field=true";
2520 if (errors_occured) {
2521 table.SetColumns().push_back(error_column);
2523 string properties =
string(
"&xtype=") +
2525 " &derived_field=true";
2528 table.SetColumns().push_back(disabled_column);
2530 properties =
string(
"&xtype=disabled_indices") +
2532 " &derived_field=true";
2562 if (column_meta_info ==
NULL)
2566 for (
int idx=(
int)cols.size()-1; idx>=0; --idx) {
2572 table.SetColumns().erase(
table.SetColumns().begin() + idx);
2573 column_meta_info->
SetData().erase(column_meta_info->
SetData().begin() + idx);
2583 ofstream ofs(fname.c_str());
2605 const string& tag_name)
2609 string tag =
"&" + tag_name +
"=";
2611 size_t tag_idx = meta_string.find(
tag);
2612 if (tag_idx != string::npos) {
2615 size_t tag_end_idx = meta_string.find_first_of(
"&", tag_idx+1);
2616 size_t start_idx = tag_idx +
string(
tag).length();
2618 if (tag_end_idx == string::npos) {
2619 value = meta_string.substr(start_idx, meta_string.length()-start_idx);
2622 value = meta_string.substr(start_idx, tag_end_idx-start_idx);
2632 const string& tag_name,
2633 const string& new_value)
2637 string tag =
"&" + tag_name +
"=";
2639 size_t tag_idx = meta_string.find(
tag);
2640 if (tag_idx != string::npos) {
2643 size_t tag_end_idx = meta_string.find_first_of(
"&", tag_idx+1);
2644 size_t start_idx = tag_idx +
string(
tag).length();
2648 if (tag_end_idx == string::npos) {
2649 meta_string.erase(start_idx, meta_string.length()-start_idx);
2650 meta_string += trim_value;
2653 meta_string.erase(start_idx, tag_end_idx-start_idx);
2655 meta_string.insert(start_idx, trim_value +
" ");
2668 LOG_POST(
"Error - annot-desc not initialized.");
2673 if (!annot_desc.
CanGet()) {
2674 LOG_POST(
"Error - annot-desc list not initialized.");
2680 CAnnot_descr_Base::Tdata::iterator tditer = desc_list_data.begin();
2682 for (; tditer!=desc_list_data.end(); ++tditer) {
2683 if ( (*tditer)->IsUser() )
2687 if (tditer == desc_list_data.end()) {
2688 LOG_POST(
"Error - User data not found in annot-desc");
2698 return &column_meta_info;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsRefSeq() const
Is this assembly a RefSeq assembly?
static CRef< CGencollSvc > GetInstance(void)
IdMapper implementation using a GC-Assembly.
EAliasMapping
Mapping destination type.
@ eGenBankAcc
GenBank accession.
@ eRefSeqAcc
RefSeq accession.
IdMapper base class implementation.
ENa_strand x_GetStrand(string strand)
Return enumerated strand type based on string.
void x_LogErr(const string &logstr, string &errstr, int &err_count, int row)
Log an error (but stop logging if error count gets high.
bool x_UpdateMetaInfoTag(string &meta_string, const string &tag_name, const string &new_value)
Change the value for 'tag_name' in 'meta_string' to 'new_value'.
CTableAnnotDataSource()
ctor
bool AddSeqLoc(const STableLocation &fc, int loc_number, ICanceled *cancel=NULL)
Add a location to the table based on the specified columns.
CAnnotdesc::TUser * x_GetColumnMetaInfo()
Get meta information stored in User_Data in m_AnnotContainer.
string GetField(size_t row, size_t col) const
return a specific field from a specific row, based on current table type and delimiter
vector< STableLocation > FindLocations(string &msg, bool strand_required)
Find colums that can be combined to form locations based on column info If no locations are found,...
bool CreateSnps(const STableLocation &fc, ICanceled *cancel=NULL)
Create snips (features that show variations) for each row.
string x_GetMetaInfoTag(const string &meta_string, const string &tag_name)
Search string 'meta_string' for the value assigned to 'tag_name', e.g.
bool CreateSnpFeature(const STableLocation &fc, ICanceled *cancel=NULL)
Create a region feature for each snp using specified snp (rsid) column.
void RemoveSeqLocs()
Remove any seqloc columns (can be used to undo the AddSeqLoc actions)
bool AddSnpSeqLoc(const STableLocation &fc, int loc_number, ICanceled *cancel=NULL)
Add a location to the table using the rsid (snp/variation) ids location from the snp db.
void Clear()
clears all columns rows and delimiters
bool CreateFeature(const STableLocation &fc, ICanceled *cancel=NULL)
Create a feature using specified columns.
CRef< CSeq_annot > m_AnnotContainer
void WriteAsn(const string &fname)
Write seqtable to file 'fname'.
static string GetStringFromDataType(eDataType t)
Return a string version of a data-type (e.g. "Length" for eLength)
static eDataType GetDataTypeFromString(const string &s)
Return enum data-type from string.
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
Interface for testing cancellation request in a long lasting operation.
static const char location[]
static const char si[8][64]
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Error(CExceptionArgs_Base &args)
const string & GetMsg(void) const
Get message string.
static CRef< objects::CSeq_id > MapStringId(const string &str, objects::IIdMapper *mapper)
list< TSNPSearchCompoundResult > TSNPSearchCompoundResultList
results of a search for one or several ids
pair< string, TSNPSearchResultList > TSNPSearchCompoundResult
results of a search for a given id (string) TSNPSearchResultList may be empty if the id has been foun...
static void Search(const std::string &sTerms, const std::string &sAssemblyAccession, TSNPSearchCompoundResultList &ResultList)
search for given SNP ID(s) and get a list of results
#define MSerial_AsnText
I/O stream manipulators –.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
TObjectType & GetObject(void)
Get object.
TObjectType & GetNCObject(void) const
Get object.
TObjectType & GetObject(void) const
Get object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
virtual bool IsCanceled(void) const =0
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
static string & ToLower(string &str)
Convert string to lower case – string& version.
static const char label[]
const TStr & GetStr(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
bool IsStr(void) const
Check if variant Str is selected.
TData & SetData(void)
Assign a value to Data data member.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
TBit & SetBit(void)
Select the variant.
TField_id GetField_id(void) const
Get the Field_id member data.
TLoc & SetLoc(void)
Select the variant.
void SetHeader(THeader &value)
Assign a value to Header data member.
void SetField_name(const TField_name &value)
Assign a value to Field_name data member.
const TTitle & GetTitle(void) const
Get the Title member data.
void Select(E_Choice index, EResetVariant reset=eDoResetVariant)
Select the requested variant if needed.
vector< CRef< CSeqTable_column > > TColumns
void SetSparse(TSparse &value)
Assign a value to Sparse data member.
const THeader & GetHeader(void) const
Get the Header member data.
void Select(E_Choice index, EResetVariant reset=eDoResetVariant)
Select the requested variant if needed.
TString & SetString(void)
Select the variant.
void ResetField_id(void)
Reset Field_id data member.
void SetData(TData &value)
Assign a value to Data data member.
vector< CStringUTF8 > TStrings
void SetTitle(const TTitle &value)
Assign a value to Title data member.
void SetDefault(TDefault &value)
Assign a value to Default data member.
void SetField_id(TField_id value)
Assign a value to Field_id data member.
@ eField_id_location
location as Seq-loc
@ eField_id_location_id
location Seq-id
@ eField_id_location_strand
location strand
@ eField_id_id_local
main feature fields id.local.id
@ eField_id_location_to
interval to
@ eField_id_location_gi
gi
@ eField_id_data_imp_key
various data fields
@ eField_id_location_from
interval from
@ e_Real
a set of reals, one per row
@ e_Loc
a set of locations, one per row
@ e_String
a set of strings, one per row
@ e_Int
a set of 4-byte integers, one per row
@ e_Common_string
a set of string with small set of possible values
@ e_Indexes
Indexes of rows with values.
@ e_Region
named region (globin locus)
ENa_strand
strand of nucleic acid
@ e_Gi
GenInfo Integrated Database.
TSeq_table & SetSeq_table(void)
Select the variant.
Tdata & Set(void)
Assign a value to data member.
void SetData(TData &value)
Assign a value to Data data member.
void SetDesc(TDesc &value)
Assign a value to Desc data member.
TName & SetName(void)
Select the variant.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
bool CanGetDesc(void) const
Check if it is safe to call GetDesc method.
bool IsSeq_table(void) const
Check if variant Seq_table is selected.
bool CanGet(void) const
Check if it is safe to call Get method.
list< CRef< CAnnotdesc > > Tdata
list< CRef< CVariantPlacement > > TPlacements
unsigned int
A callback function used to compare two keys in a database.
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
double value_type
The numeric datatype used by the parser.
const struct ncbi::grid::netcache::search::fields::SIZE size
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
CRef< objects::CObjectManager > om
static const char * column
string GetColumnInfo() const
Return informational string as to which columns are selected.
CompareSNPResults(const string &rsid)
bool operator()(const NSNPWebServices::TSNPSearchCompoundResult &rhs) const