NCBI C++ ToolKit
table_annot_data_source.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: table_annot_data_source.cpp 47485 2023-05-02 14:46:59Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Bob Falk
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
36 #include <corelib/ncbistre.hpp>
37 
60 
62 #include <gui/objutils/snp_gui.hpp>
64 #include <gui/objutils/utils.hpp>
65 
66 
67 #include <math.h>
68 #include <sstream>
69 
70 
73 
74 /*****************************************************************************/
75 /*************************** CTableAnnotDataSource::STableLocation ***********/
76 
78 {
79  string colinfo;
80 
81  if (m_IdCol == -1)
82  colinfo += "ID: missing ";
83  else
84  colinfo += "ID: " + NStr::NumericToString(m_IdCol);
85 
86  if (m_StartPosCol != -1)
87  colinfo += " Start Pos: " + NStr::NumericToString(m_StartPosCol);
88 
89  if (m_StopPosCol != -1)
90  colinfo += " Stop Pos: " + NStr::NumericToString(m_StopPosCol);
91 
92  if (m_LengthCol != -1)
93  colinfo += " Length: " + NStr::NumericToString(m_LengthCol);
94 
95  if (m_StrandCol != -1)
96  colinfo += " Strand: " + NStr::NumericToString(m_StrandCol);
97 
98  if (m_GenotypeCol != -1)
99  colinfo += " Genotype: " + NStr::NumericToString(m_GenotypeCol);
100 
101  if (m_DataRegionCol != -1)
102  colinfo += " Data Region: " + NStr::NumericToString(m_DataRegionCol);
103 
104  return colinfo;
105 }
106 
107 /*****************************************************************************/
108 /*************************** CTableAnnotDataSource **************************/
109 
110 
112 {
114 }
115 
117 {
118 }
119 
120 
121 string CTableAnnotDataSource::GetField(size_t row, size_t col) const
122 {
123  return "";
124 }
125 
126 vector<CTableAnnotDataSource::STableLocation>
127 CTableAnnotDataSource::FindLocations(string& msg, bool strand_required)
128 {
129  vector<STableLocation> locations;
130 
131  msg = "";
132  bool missing_assembly = false;
133  bool is_rsid = false;
134 
135  // Since there may be multiple locations on a single
136  // row, we need to have rules for when one location ends and
137  // another one starts. Since we want to allow rows with a single
138  // id (and possibly strand) and multiple start/stop/length's, we
139  // decide that a new location should be saved whenever:
140  // 1. a new id is encountered
141  // 2. a new start position is encountered
142  // 3. The last column is reached
143  if (!m_AnnotContainer->IsSetData()) {
144  msg = "SeqTable not found in SeqAnnot";
145  LOG_POST(Error << msg);
146  return locations;
147  }
148  CSeq_annot::TData& table_container = m_AnnotContainer->SetData();
149 
150  if (!table_container.IsSeq_table()) {
151  msg = "SeqAnnot does not contain a SeqTable";
152  LOG_POST(Error << msg);
153  return locations;
154  }
155  CSeq_table& table = table_container.SetSeq_table();
156  CSeq_table::TColumns& cols = table.SetColumns();
157 
158  CAnnotdesc::TUser* column_meta_info = x_GetColumnMetaInfo();
159  if (column_meta_info == NULL)
160  return locations;
161 
162  bool prev_is_rsid = false;
163  int prev_id_idx = -1;
164  int prev_start_idx = -1;
165  int prev_end_idx = -1;
166  int prev_strand_idx = -1;
167  int prev_length_idx = -1;
168  int prev_data_region_idx = -1;
169  int prev_genotype_idx = -1;
170 
171  // Iterate over columns looking for possible locations
172  for (size_t idx=0; idx<cols.size(); ++idx) {
173 
174  CSeqTable_column_info& header = cols[idx]->SetHeader();
175 
176  string field_name;
177  string assembly;
179 
180  if (idx < column_meta_info->GetData().size()) {
181  const CUser_field& field_meta_info = column_meta_info->GetData()[idx].GetObject();
182  field_name = x_GetMetaInfoTag(field_meta_info.GetData().GetStr(), "xtype");
183  assembly = x_GetMetaInfoTag(field_meta_info.GetData().GetStr(),
184  "genome_assembly");
185  }
186 
191  // Chromosomes need an assembly otherwise we can't use them
194  if (assembly != "") {
196  }
197  else {
198  msg = "Assembly not provided for chromosome";
199  LOG_POST(Error << msg);
200  // If there is no assembly, set the type to a non-id type so that it is not
201  // seen as an id field when/if the table is converted to a feature table.
202  //header.SetField_id(CSeqTable_column_info::eField_id_comment);
203  missing_assembly = true;
204  }
205  }
206  else {
207  // acccept any other kind of id here, so just call it unspecified
209  }
210 
211  // SNP/Variation ids have locations that we will look up rather than using
212  // the value in the table
213  is_rsid = false;
214  const CUser_field& id_meta_info = column_meta_info->GetData()[idx].GetObject();
215  if (id_meta_info.IsSetData() && id_meta_info.GetData().IsStr())
216  is_rsid = x_GetMetaInfoTag(id_meta_info.GetData().GetStr(), "xtype") == "Rsid";
217  }
220  }
221  else if (CTableImportColumn::GetDataTypeFromString(field_name) ==
224  }
225  else if (CTableImportColumn::GetDataTypeFromString(field_name) ==
228  }
229  else if (CTableImportColumn::GetDataTypeFromString(field_name) ==
231  col_type = CTableImportColumn::eLength;
232  }
233  else if (CTableImportColumn::GetDataTypeFromString(field_name) ==
235  col_type = CTableImportColumn::eStrand;
236  }
237  else if (CTableImportColumn::GetDataTypeFromString(field_name) ==
240  }
241 
242  // Is the current set of fields sufficient to create a
243  // location? Since there may be multiple locations, new ones
244  // are created when a new start position or ID column is
245  // encountered (or after the last column, handled at function end)
246  // Note that rsids (snips) have implied locations (we can look it up)
247  // so that other fields are not needed
248  if ((col_type == CTableImportColumn::eUnspecifiedID ||
249  col_type == CTableImportColumn::eStartPosition) &&
250  (prev_id_idx >= 0 && (prev_is_rsid || // we have an id
251  (prev_start_idx > prev_id_idx && // we have a start position
252  (prev_end_idx > prev_start_idx || // we have an end position or length
253  prev_length_idx > prev_start_idx) &&
254  (prev_strand_idx > prev_id_idx || // we have a strand if required
255  strand_required == false))) )) {
256 
257  // The strand may be -1 and either length OR stop position
258  // is not negative 1. Dataregion may also be -1 (undefined)
259  STableLocation fc(prev_id_idx, prev_start_idx, prev_end_idx,
260  prev_length_idx, prev_strand_idx, prev_data_region_idx, prev_genotype_idx, prev_is_rsid);
261  locations.push_back(fc);
262 
263  // Before a new location is created, a new start position
264  // and stop position OR length must be found. The ID and
265  // strand may be reused.
266  //prev_id_idx = -1;
267  //prev_strand_idx = -1;
268  prev_start_idx = -1;
269  prev_end_idx = -1;
270  prev_length_idx = -1;
271  prev_data_region_idx = -1;
272  prev_genotype_idx = -1;
273  }
274 
275  if (col_type == CTableImportColumn::eUnspecifiedID) {
276  prev_id_idx = (int)idx;
277  prev_is_rsid = is_rsid;
278  }
279  else if (col_type == CTableImportColumn::eStartPosition) {
280  prev_start_idx = (int)idx;
281  }
282  else if (col_type == CTableImportColumn::eStopPosition) {
283  prev_end_idx = (int)idx;
284  }
285  else if (col_type == CTableImportColumn::eLength) {
286  prev_length_idx = (int)idx;
287  }
288  else if (col_type == CTableImportColumn::eStrand) {
289  // If there are multiple locs they can use a strand field before
290  // the id or start position, but prioritize the closest strand after
291  // the current start position. (every loc has a new start position)
292  if (prev_strand_idx == -1 || prev_strand_idx < prev_start_idx)
293  prev_strand_idx = (int)idx;
294  }
295  else if (col_type == CTableImportColumn::eDataRegion) {
296  prev_data_region_idx = (int)idx;
297  }
298  else if (col_type == CTableImportColumn::eGenotype) {
299  prev_genotype_idx = (int)idx;
300  }
301  }
302 
303  // We are at the end of the columns. Can we form another location
304  // with the data we have?:
305  if ((prev_id_idx >= 0) && (prev_is_rsid ||
306  (prev_start_idx > prev_id_idx &&
307  (prev_end_idx > prev_start_idx || prev_length_idx > prev_id_idx) &&
308  (prev_strand_idx > prev_id_idx || strand_required == false))) ) {
309  // The strand may be -1 and either length OR stop position
310  // is not negative 1.
311  STableLocation fc(prev_id_idx, prev_start_idx, prev_end_idx,
312  prev_length_idx, prev_strand_idx, prev_data_region_idx, prev_genotype_idx, prev_is_rsid);
313  locations.push_back(fc);
314  }
315 
316  // If no locations are found, return a user-friendly message as to why.
317  if (locations.size() == 0) {
318  if (prev_id_idx == -1) {
319  if (missing_assembly)
320  msg = "Assembly not provided for chromosome";
321  else
322  msg = "Missing ID column";
323  }
324  else {
325  if (prev_start_idx == -1)
326  msg = "Start Position column not defined";
327 
328  if (prev_end_idx == -1 && prev_length_idx == -1) {
329  if (msg != "")
330  msg += " and neither a Stop Position nor Length column was given";
331  else
332  msg = "Neither a Stop Position nor Length column was defined";
333  }
334 
335  if (strand_required && prev_strand_idx == -1) {
336  if (msg == "") {
337  msg = "Strand column not defined";
338  }
339  else {
340  msg += " Also, no Strand column was identified.";
341  }
342  }
343  }
344  }
345 
346  return locations;
347 }
348 
349 void CTableAnnotDataSource::x_LogErr(const string& logstr, string& errstr, int& err_count, int row)
350 {
351  if (errstr != "")
352  errstr += " | ";
353  errstr += logstr;
354  if (++err_count < 100) {
355  LOG_POST("Invalid row " << row+1 << " " << logstr);
356  }
357 }
358 
360  CompareSNPResults(const string& rsid) : m_Rsid(rsid) {}
361 
362  bool operator()(const NSNPWebServices::TSNPSearchCompoundResult& rhs) const { return m_Rsid == rhs.first; }
363 
364  string m_Rsid;
365 };
366 
368  int loc_number,
369  ICanceled* call)
370 {
371  // Get User data from Annot Descriptor this is the Properties data
372  // structure, where each entry is a combination of column number
373  // and field properties, e.g.
374  // Column.1 &xtype=StartPosition &one_based=true
375  CAnnotdesc::TUser* column_meta_info = x_GetColumnMetaInfo();
376  if (column_meta_info == NULL)
377  return false;
378 
379  // Now get the actual columns (which have headers with descriptive
380  // info as well - as much as we currently need for finding features
381  // actually)
382  if (!m_AnnotContainer->IsSetData()) {
383  LOG_POST("Error - SeqTable not found in SeqAnnot");
384  return false;
385  }
386  CSeq_annot::TData& table_container = m_AnnotContainer->SetData();
387 
388  if (!table_container.IsSeq_table()) {
389  LOG_POST("Error - SeqAnnot does not contain a SeqTable");
390  return false;
391  }
392  CSeq_table& table = table_container.SetSeq_table();
393  CSeq_table::TColumns& cols = table.SetColumns();
394 
395  const CUser_field& id_meta_info = column_meta_info->GetData()[fc.m_IdCol].GetObject();
396 
397  bool start_one_based = true;
398  const CUser_field& start_meta_info = column_meta_info->GetData()[fc.m_StartPosCol].GetObject();
399  if (start_meta_info.IsSetData() && start_meta_info.GetData().IsStr())
400  start_one_based = x_GetMetaInfoTag(start_meta_info.GetData().GetStr(), "one_based") == "true";
401 
402  bool stop_one_based = true;
403  if (fc.m_StopPosCol >= 0) {
404  const CUser_field& stop_meta_info = column_meta_info->GetData()[fc.m_StopPosCol].GetObject();
405  if (stop_meta_info.IsSetData() && stop_meta_info.GetData().IsStr())
406  stop_one_based = x_GetMetaInfoTag(stop_meta_info.GetData().GetStr(), "one_based") == "true";
407  }
408  else {
409  // we are using length instead of a 'to' position. Stop will be
410  // start + len, so if start is one-based, stop will be to.
411  stop_one_based = start_one_based;
412  }
413 
414  //
415  /// Chromosomes need to be re-mapped using the assembly
416  CIdMapper* mapper = NULL;
417  string id_meta_info_str;
418 
419  bool is_chromosome = false;
420  if (id_meta_info.IsSetData() && id_meta_info.GetData().IsStr())
421  id_meta_info_str = id_meta_info.GetData().GetStr();
422 
423  string column_type = x_GetMetaInfoTag(id_meta_info_str, "xtype");
426  string assembly;
427  string meta_info;
428 
429  assembly = x_GetMetaInfoTag(id_meta_info_str, "genome_assembly");
430 
431  if (!assembly.empty() ) {
432  // used for initializing CIdMapperGCAssembly; having a full assembly seems to be inevitable here
433  // this is used only by Gbench, so using seqconfig as assembly service should be safe
434  CRef<CGC_Assembly> assm = CGencollSvc::GetInstance()->GetGCAssembly(assembly, true, "Gbench", nullptr, true);
435  if (assm) {
437  CRef<CScope> scope(new CScope(*obj_mgr));
438  scope->AddDefaults();
441  mapper = new CIdMapperGCAssembly(*scope, *assm, alias);
442  is_chromosome = true;
443  }
444  }
445  }
446 
447  //
448  // Add a new column for the SeqLoc
450 
451  string label = "Column." + NStr::NumericToString(cols.size() + 1);
452  string properties = string("&xtype=SeqLoc") + " &derived_field=true";
453  column_meta_info->AddField(label, properties);
454 
456  cinfo->SetTitle(string("Location ") + NStr::NumericToString(loc_number));
457 
459  column->SetHeader(*cinfo);
462 
463  // If there are errors we can put them in an error column
464  // at the end of conversion, if no errors have occured, the column
465  // will not be added to the table. Columns with errors will also
466  // be marked as disabled to keep the table valid.
467  CRef< CSeqTable_column > error_column;
468  {
470  cinfo->SetTitle("Error Messages");
472 
473  error_column.Reset(new CSeqTable_column());
474  error_column->SetHeader(*cinfo);
477  error_column->SetData(data.GetNCObject());
478 
479  // I think our table viewer is not handling sparse data...
480  //CSeqTable_sparse_index* si = new CSeqTable_sparse_index();
481  //si->Select(CSeqTable_sparse_index_Base::e_Indexes);
482  //error_column->SetSparse(*si);
483  }
484 
485  // if call is asynchronous, return if/when cancelled.
486  if (call != NULL &&
487  call->IsCanceled())
488  return false;
489 
490  // Iterate over all rows and attempt to add a loccation for each row. If there
491  // is an error, add a null location for that row.
492  int err_count = 0;
493  for (size_t row=0; row<cols[fc.m_IdCol]->GetData().GetSize(); ++row) {
494  int from = 0;
495  int to = 0;
496 
497  bool null_seqloc = false;
498  string logstr;
499  string err_str;
500 
502  if (!cols[fc.m_StartPosCol]->TryGetInt(row, from)) {
503  logstr = "cannot convert start position to an integer";
504  err_str = logstr;
505  if (++err_count < 100) {
506  LOG_POST("Invalid row " << row+1 << " " << logstr);
507  }
508  null_seqloc = true;
509  }
510  else {
511  if (start_one_based) {
512  if (from < 1) {
513  null_seqloc = true;
514  logstr = "start column is one-based but value is < 1";
515  if (err_str != "")
516  err_str += " | ";
517  err_str += logstr;
518  if (++err_count < 100) {
519  LOG_POST("Invalid row " << row+1 << " " << logstr);
520  }
521  }
522  else {
523  from -= 1;
524  cols[fc.m_StartPosCol]->SetData().SetInt()[row] = from;
525  }
526  }
527  else if (from < 0) {
528  logstr = "start column is < 0";
529  if (err_str != "")
530  err_str += " | ";
531  err_str += logstr;
532  null_seqloc = true;
533  if (++err_count < 100) {
534  LOG_POST("Invalid row " << row+1 << " " << logstr);
535  }
536  }
537  }
538 
539  if (fc.m_StopPosCol >= 0) {
540  if (!cols[fc.m_StopPosCol]->TryGetInt(row, to)) {
541  logstr = "cannot convert stop position to an integer";
542  if (err_str != "")
543  err_str += " | ";
544  err_str += logstr;
545  if (++err_count < 100) {
546  LOG_POST("Invalid row " << row+1 << " " << logstr);
547  }
548  null_seqloc = true;
549  }
550  else {
551  if (stop_one_based) {
552  if (to < 1) {
553  logstr = "stop column is one-based but value is < 1";
554  if (err_str != "")
555  err_str += " | ";
556  err_str += logstr;
557  null_seqloc = true;
558 
559  if (++err_count < 100) {
560  LOG_POST("Invalid row " << row+1 << " " << logstr);
561  }
562  }
563  else {
564  to -= 1;
565  cols[fc.m_StopPosCol]->SetData().SetInt()[row] = to;
566  }
567  }
568  else if (to < 0) {
569  logstr = "stop column is < 0";
570  if (err_str != "")
571  err_str += " | ";
572  err_str += logstr;
573  null_seqloc = true;
574 
575  if (++err_count < 100) {
576  LOG_POST("Invalid row " << row+1 << " " << logstr);
577  }
578  }
579  }
580  }
581  else if (fc.m_LengthCol >= 0) {
582  int len = 0;
583  if (!cols[fc.m_LengthCol]->TryGetInt(row, len)) {
584  logstr = "cannot convert length to an integer";
585  if (err_str != "")
586  err_str += " | ";
587  err_str += logstr;
588  if (++err_count < 100) {
589  LOG_POST("Invalid row " << row+1 << " " << logstr);
590  }
591  null_seqloc = true;
592  }
593  to = from + len;
594  }
595 
596  ENa_strand strand_e = eNa_strand_unknown;
597 
598  if (fc.m_StrandCol != -1) {
599  if (cols[fc.m_StrandCol]->GetData().IsString()) {
600  string strand = *cols[fc.m_StrandCol]->GetStringPtr(row);
601  strand_e = x_GetStrand(strand);
602 
603  // Invalid strand string - log an error and set the strand type based
604  // on the to and from positions.
605  if (strand_e == eNa_strand_unknown) {
606  if (from > to) {
607  strand_e = eNa_strand_minus;
608  std::swap(from, to);
609  }
610  else {
611  strand_e = eNa_strand_plus;
612  }
613 
614  if (++err_count < 100) {
615  logstr = " strand identifier: " + strand +
616  " must be +,-,pos, or neg";
617  if (err_str != "")
618  err_str += " | ";
619  err_str += logstr;
620  LOG_POST("Invalid row " << row+1 << " " << logstr);
621  }
622  }
623  }
624  else if (cols[fc.m_StrandCol]->GetData().IsInt()) {
625  int strand_int;
626  if (cols[fc.m_StrandCol]->TryGetInt(row, strand_int)) {
627  // As an integer, strand must match values defined for ENa_strand:
628  if ((strand_int >= 0 && strand_int <= 4) || strand_int==255) {
629  strand_e = ENa_strand(strand_int);
630  }
631  else {
632  // Invalid strand number - guess strand based on
633  // relative start and stop positions.
634  if (from > to) {
635  strand_e = eNa_strand_minus;
636  std::swap(from, to);
637  }
638  else {
639  strand_e = eNa_strand_plus;
640  }
641 
642  if (++err_count < 100) {
643  logstr = "Integer strand value: " + NStr::IntToString(strand_int) +
644  " not valid";
645  if (err_str != "")
646  err_str += " | ";
647  err_str += logstr;
648  LOG_POST(logstr << " in row: " << row+1);
649  }
650  }
651 
652  }
653  else {
654  null_seqloc = true;
655  logstr = "Unable to retrieve strand value";
656  if (err_str != "")
657  err_str += " | ";
658  err_str += logstr;
659  if (++err_count < 100)
660  LOG_POST(logstr << " in row: " << row+1);
661  }
662  }
663  }
664 
665  /// From and to are the same - single position
666  if (from == to) {
667  location->SetPnt().SetPoint(from);
668  }
669  else {
670  location->SetInt().SetFrom(from);
671  location->SetInt().SetTo(to);
672  }
673 
674 
675  // Set the strand automatically based on whether to position > from position
676  if (fc.m_StrandCol == -1) {
677  strand_e = (to >= from) ? eNa_strand_plus : eNa_strand_minus;
678 
679  // from and to on minus strand should still have (to < from)
680  if (strand_e == eNa_strand_minus) {
681  location->SetInt().SetFrom(to);
682  location->SetInt().SetTo(from);
683  }
684  }
685 
686  location->SetStrand(strand_e);
687 
688  // Create the id - could be chromosome that needs mapping, an ID or a GI. If
689  // it is a GI it may be saved in a column of type int instead of ID.
690  if (null_seqloc) {
691  location->SetNull();
692  }
693  else if (!is_chromosome) {
694  CConstRef<CSeq_id> seq_id;
695 
696  if (cols[fc.m_IdCol]->GetData().IsInt()) {
697  int gid = cols[fc.m_IdCol]->GetData().GetInt()[row];
698 
699  try {
700  seq_id.Reset(new CSeq_id(CSeq_id_Base::e_Gi, gid));
701  location->SetId(seq_id.GetObject());
702  }
703  catch(CException& ex) {
704  null_seqloc = true;
705  logstr = "Error constructing seq-id from GI " +
706  NStr::IntToString(gid) + " | " + ex.GetMsg();
707  if (err_str != "")
708  err_str += " | ";
709  err_str += logstr;
710  if (++err_count < 100) {
711  LOG_POST(logstr << " in row: " << row+1);
712  }
713  location->SetNull();
714  }
715  }
716  else {
717  try {
718  seq_id = cols[fc.m_IdCol]->GetSeq_id(row);
719  if (seq_id) {
720  location->SetId(seq_id.GetObject());
721  }
722  }
723  catch(CException& e) {
724  null_seqloc = true;
725  logstr = "Exception getting seq-id " + e.GetMsg();
726  if (err_str != "")
727  err_str += " | ";
728  err_str += logstr;
729  if (++err_count < 100)
730  LOG_POST(logstr << " in row: " << row+1);
731  location->SetNull();
732  }
733  }
734  }
735  else { // id is chromosome
736  string idstr = *cols[fc.m_IdCol]->GetStringPtr(row);
737  CRef<CSeq_id> seq_id(nullptr);
738  try {
739  seq_id = CSeqUtils::MapStringId(idstr, mapper);
740  }
741  catch (const CException& ex) {
742  null_seqloc = true;
743  logstr = "Error creating id for: " + idstr + " : " + ex.GetMsg();
744  if (err_str != "")
745  err_str += " | ";
746  err_str += logstr;
747  if (++err_count < 100) {
748  LOG_POST("Invalid row " << row + 1 << " " << logstr);
749  }
750  }
751 
752  if (seq_id.IsNull()) {
753  // Error - can't create an id - a null one will be saved
754  null_seqloc = true;
755  logstr = "Unable to generate seq-id from id: " + idstr;
756  if (err_str != "")
757  err_str += " | ";
758  err_str += logstr;
759  if (++err_count < 100) {
760  LOG_POST("Invalid row " << row + 1 << " " << logstr);
761  }
762  }
763 
764  if (seq_id && !null_seqloc) {
765  location->SetId(seq_id.GetObject());
766  }
767  else {
768  // Any errors with seq-id result in the null location being
769  // created and added here.
770  location->SetNull();
771  }
772  }
773 
774  // if call is asynchronous, return if/when cancelled.
775  if (call != NULL &&
776  call->IsCanceled())
777  return false;
778 
779  data->SetLoc().push_back(location);
780 
781  // Add error (or blank if no error for this row)
782  error_column->SetData().SetString().push_back(err_str);
783  }
784 
785  delete mapper;
786 
787  column->SetData(*data);
788  table.SetColumns().push_back(column);
789 
790  if (err_count > 0) {
791  table.SetColumns().push_back(error_column);
792  string label = error_column->GetHeader().GetTitle();
793  string properties = string("&xtype=") +
795  " &derived_field=true";
796  column_meta_info->AddField(label, properties);
797  }
798 
799  return true;
800 }
801 
803  int loc_number,
804  ICanceled* call)
805 {
806  // Get User data from Annot Descriptor this is the Properties data
807  // structure, where each entry is a combination of column number
808  // and field properties, e.g.
809  // Column.1 &xtype=StartPosition &one_based=true
810  CAnnotdesc::TUser* column_meta_info = x_GetColumnMetaInfo();
811  if (column_meta_info == NULL)
812  return false;
813 
814  // Now get the actual columns (which have headers with descriptive
815  // info as well - as much as we currently need for finding features
816  // actually)
817  if (!m_AnnotContainer->IsSetData()) {
818  LOG_POST("Error - SeqTable not found in SeqAnnot");
819  return false;
820  }
821  CSeq_annot::TData& table_container = m_AnnotContainer->SetData();
822 
823  if (!table_container.IsSeq_table()) {
824  LOG_POST("Error - SeqAnnot does not contain a SeqTable");
825  return false;
826  }
827  CSeq_table& table = table_container.SetSeq_table();
828  CSeq_table::TColumns& cols = table.SetColumns();
829 
830  const CUser_field& id_meta_info = column_meta_info->GetData()[fc.m_IdCol].GetObject();
831 
832  //
833  /// Chromosomes need to be re-mapped using the assembly
834  //CIdMapperConfig* mapper = NULL;
835  string id_meta_info_str;
836 
837  string assembly;
838 
839  if (id_meta_info.IsSetData() && id_meta_info.GetData().IsStr())
840  id_meta_info_str = id_meta_info.GetData().GetStr();
841 
842  string column_type = x_GetMetaInfoTag(id_meta_info_str, "xtype");
845  string meta_info;
846 
847  assembly = x_GetMetaInfoTag(id_meta_info_str, "genome_assembly");
848  }
849 
850  //
851  // Add a new column for the SeqLoc
853 
854  string label = "Column." + NStr::NumericToString(cols.size() + 1);
855  string properties = string("&xtype=SeqLoc") + " &derived_field=true";
856  column_meta_info->AddField(label, properties);
857 
859  cinfo->SetTitle(string("Location ") + NStr::NumericToString(loc_number));
860 
862  column->SetHeader(*cinfo);
865 
866  // If there are errors we can put them in an error column
867  // at the end of conversion, if no errors have occured, the column
868  // will not be added to the table. Columns with errors will also
869  // be marked as disabled to keep the table valid.
870  CRef< CSeqTable_column > error_column;
871  {
873  cinfo->SetTitle("Error Messages");
875 
876  error_column.Reset(new CSeqTable_column());
877  error_column->SetHeader(*cinfo);
880  error_column->SetData(data.GetNCObject());
881  }
882 
883  // if call is asynchronous, return if/when cancelled.
884  if (call != NULL &&
885  call->IsCanceled())
886  return false;
887 
888  size_t rsid_search_size = 100;
889 
890  // Visit all rows in the table and add any columns needed to create a
891  // complete feature. If there are no columns to add/update, skip
892  // this step.
893  int err_count = 0;
894 
896  CRef<CScope> scope(new CScope(*om));
897  scope->AddDefaults();
898 
899  size_t row = 0;
900  size_t search_row = 0;
901  size_t current_size = 0;
902  size_t row_count = cols[fc.m_IdCol]->GetData().GetSize();
903  while (search_row < row_count) {
904 
905  // could resize rsids to begin with (rsids have predictable lengths...)
906  string rsids = "";
907  for (current_size=0; search_row < row_count && current_size<rsid_search_size; ++current_size, ++search_row) {
908  rsids += *cols[fc.m_IdCol]->GetStringPtr(search_row);
909  rsids += ",";
910  }
911  // remove last comma
912  rsids = rsids.substr(0, rsids.length()-1);
913 
914  // Query for current set of rsids
916  try {
917  // assembly may be blank.
918  NSNPWebServices::Search(rsids, assembly, SNPSearchResultList);
919  }
920  catch (CException& e) {
921  LOG_POST("Exception while searching SNP database: " + e.GetMsg());
922  // If there is an error getting a group of ids continually reduce the batch
923  // size until there is only 1 id retrieved and then if there is still an
924  // error process it as an error
925  if (rsid_search_size > 2) {
926  search_row = row;
927  rsid_search_size = rsid_search_size/2;
928  continue;
929  }
930  }
931  catch(...) {
932  LOG_POST("Unidentified exception while searching SNP database");
933  // If there is an error getting a group of ids continually reduce the batch
934  // size until there is only 1 id retrieved and then if there is still an
935  // error process it as an error
936  if (rsid_search_size > 2) {
937  search_row = row;
938  rsid_search_size = rsid_search_size/2;
939  continue;
940  }
941  }
942 
943  // process the current set of rows. Results in result list should be in same order
944  // as the rows, but we will search the result set if there is a mismatch.
945  NSNPWebServices::TSNPSearchCompoundResultList::iterator current_iter = SNPSearchResultList.begin();
946 
947  for (; row < search_row; ++row) {
948 
949  string logstr;
950  string errstr = "";
951 
953 
954  bool found = false;
955  string rsid = *cols[fc.m_IdCol]->GetStringPtr(row);
956 
957  // See if current snp query result matches current row. If not, search for current
958  // row rsid in entire result set. (usually they should match)
959  NSNPWebServices::TSNPSearchCompoundResultList::iterator iter = current_iter;
960  ++current_iter;
961 
962  if (iter->first != rsid) {
963  CompareSNPResults pred(rsid);
964  iter = std::find_if(SNPSearchResultList.begin(), SNPSearchResultList.end(), pred);
965  }
966 
967  if (iter == SNPSearchResultList.end()) {
968  x_LogErr("No matches for given rsid", errstr, err_count, static_cast<int>(row));
969  }
970  else if ((*iter).second.size() == 0) {
971  x_LogErr("No matches for rsid and assembly: " + assembly,
972  errstr, err_count, static_cast<int>(row));
973  }
974  else {
975  const NSNPWebServices::TSNPSearchResultList::value_type& first_result = (*iter).second.front();
976 
977  // CVariation used as a search result can have one and only one placement
978  if (first_result->CanGetPlacements() == 0) {
979  x_LogErr("Unexpected absence of placements in SNP Search Result", errstr, err_count, static_cast<int>(row));
980  }
981  else {
982  const CVariation::TPlacements& placements(first_result->GetPlacements());
983  if (placements.size() == 0) {
984  x_LogErr("Unexpected number of placements (0) in SNP Search Result", errstr, err_count, static_cast<int>(row));
985  }
986  else {
987  location->Assign(placements.front()->GetLoc());
988 
989  found = true;
990  }
991  }
992  }
993 
994 
995  if (!found || errstr != "") {
996  location->SetNull();
997  }
998 
999 
1000  data->SetLoc().push_back(location);
1001 
1002  // Will add blanks too for cols w/o errors
1003  error_column->SetData().SetString().push_back(errstr);
1004 
1005  // if call is asynchronous, return if/when cancelled.
1006  if (call != NULL &&
1007  call->IsCanceled())
1008  return false;
1009  }
1010  }
1011 
1012  column->SetData(*data);
1013  table.SetColumns().push_back(column);
1014 
1015  if (err_count > 0) {
1016  table.SetColumns().push_back(error_column);
1017  string label = error_column->GetHeader().GetTitle();
1018  string properties = string("&xtype=") +
1020  " &derived_field=true";
1021  column_meta_info->AddField(label, properties);
1022  }
1023 
1024  return true;
1025 }
1026 
1028  ICanceled* call)
1029 {
1030 
1031  // Get User data from Annot Descriptor this is the Properties data
1032  // structure, where each entry is a combination of column number
1033  // and field properties, e.g.
1034  // Column.1 &xtype=StartPosition &one_based=true
1035  CAnnotdesc::TUser* column_meta_info = x_GetColumnMetaInfo();
1036  if (column_meta_info == NULL)
1037  return false;
1038 
1039  // Now get the actual columns (which have headers with descriptive
1040  // info as well - as much as we currently need for finding features
1041  // actually)
1042  if (!m_AnnotContainer->IsSetData()) {
1043  LOG_POST("Error - SeqTable not found in SeqAnnot");
1044  return false;
1045  }
1046  CSeq_annot::TData& table_container = m_AnnotContainer->SetData();
1047 
1048  if (!table_container.IsSeq_table()) {
1049  LOG_POST("Error - SeqAnnot does not contain a SeqTable");
1050  return false;
1051  }
1052  CSeq_table& table = table_container.SetSeq_table();
1053  CSeq_table::TColumns& cols = table.SetColumns();
1054 
1055  /// To make the table a valid source for features first need to set its type
1056  /// and subtype to indicate that the rows represent region features.
1057  table.SetFeat_type(CSeqFeatData_Base::e_Region);
1058  table.SetFeat_subtype(CSeqFeatData::eSubtype_region);
1059 
1060  const CSeqTable_column_info& id_header = cols[fc.m_IdCol]->GetHeader();
1061 
1062  // Can only have one ID field if the table is a feature table -search for any others here
1063  // and switch them to comment fields. Also, no comment fields may be of type int or real.
1064  for (size_t col_num=0; col_num<cols.size(); ++col_num) {
1065  if (col_num != fc.m_IdCol &&
1066  col_num != fc.m_LengthCol &&
1067  col_num != fc.m_StartPosCol &&
1068  col_num != fc.m_StopPosCol &&
1069  col_num != fc.m_StrandCol) {
1070 
1071  // Can only have 1 id field in region feature so set others to comment
1072  CSeqTable_column_info& header = cols[col_num]->SetHeader();
1078  }
1079  // Can't have Numeric fields other than start, stop length. In table loader
1080  // integer fields not being used in locations have id type as comment and data
1081  // type as int. switch data type to text.
1082  else if (cols[col_num]->GetData().Which() == CSeqTable_multi_data_Base::e_Int &&
1084 
1085  // Get copy current data for this integer field
1086  CSeqTable_multi_data_Base::TInt col_data = cols[col_num]->GetData().GetInt();
1087  // delete field data and set new type to string
1088  cols[col_num]->SetData().Select(CSeqTable_multi_data_Base::e_String, eDoResetVariant);
1089  // copy int data to new field, converting it to string
1090  for (size_t row=0; row<col_data.size(); ++row) {
1091  string val;
1092  try {
1093  val = NStr::IntToString(col_data[row]);
1094  }
1095  catch (CException&) { }
1096 
1097  cols[col_num]->SetData().SetString().push_back(val);
1098  }
1099  // done - we converted the int field to a string field
1100  // (because int fields can't be comment fields - in
1101  // region features)
1102  }
1103  else if (cols[col_num]->GetData().Which() == CSeqTable_multi_data_Base::e_Real &&
1105 
1106  // Get copy current data for this integer field
1107  CSeqTable_multi_data_Base::TReal col_data = cols[col_num]->GetData().GetReal();
1108  // delete field data and set new type to string
1109  cols[col_num]->SetData().Select(CSeqTable_multi_data_Base::e_String, eDoResetVariant);
1110  // copy int data to new field, converting it to string
1111  for (size_t row=0; row<col_data.size(); ++row) {
1112  string val;
1113  try {
1114  val = NStr::DoubleToString(col_data[row]);
1115  }
1116  catch (CException&) { }
1117 
1118  cols[col_num]->SetData().SetString().push_back(val);
1119  }
1120  // done - we converted the real field to a string field
1121  // (because real fields can't be comment fields - in
1122  // region features)
1123  }
1124  }
1125  }
1126 
1127  bool start_one_based = true;
1128  CUser_field& start_meta_info = column_meta_info->SetData()[fc.m_StartPosCol].GetObject();
1129  if (start_meta_info.IsSetData() && start_meta_info.GetData().IsStr()) {
1130  start_one_based = x_GetMetaInfoTag(start_meta_info.GetData().GetStr(), "one_based") == "true";
1131 
1132  // To make a feature table we need 0-based indices, so before we update
1133  // below change the meta-data to indicate it is no longer 1-based:
1134  if (start_one_based) {
1135  string meta_info_str = start_meta_info.GetData().GetStr();
1136  x_UpdateMetaInfoTag(meta_info_str, "one_based", "false");
1137  start_meta_info.SetData().SetStr(meta_info_str);
1138  }
1139  }
1140 
1141  bool stop_one_based = true;
1142  if (fc.m_StopPosCol >= 0) {
1143  CUser_field& stop_meta_info = column_meta_info->SetData()[fc.m_StopPosCol].GetObject();
1144  if (stop_meta_info.IsSetData() && stop_meta_info.GetData().IsStr())
1145  stop_one_based = x_GetMetaInfoTag(stop_meta_info.GetData().GetStr(), "one_based") == "true";
1146 
1147  // To make a feature table we need 0-based indices, so before we update
1148  // below change the meta-data to indicate it is no longer 1-based:
1149  if (stop_one_based) {
1150  string meta_info_str = stop_meta_info.GetData().GetStr();
1151  x_UpdateMetaInfoTag(meta_info_str, "one_based", "false");
1152  stop_meta_info.SetData().SetStr(meta_info_str);
1153  }
1154  }
1155  else {
1156  // we are using length instead of a 'to' position. Stop will be
1157  // start + len, so if start is one-based, stop will be to.
1158  stop_one_based = start_one_based;
1159  }
1160 
1161  /// Chromosomes need to be re-mapped using the accession
1162  CIdMapper* mapper = nullptr;
1163  CRef< CSeqTable_column > xform_ids_column;
1164 
1165  const CUser_field& id_meta_info = column_meta_info->GetData()[fc.m_IdCol].GetObject();
1166 
1167  string column_type = x_GetMetaInfoTag(id_meta_info.GetData().GetStr(), "xtype");
1168  if (CTableImportColumn::GetDataTypeFromString(column_type) ==
1170  string assembly;
1171  string meta_info;
1172 
1173  if (id_meta_info.IsSetData() && id_meta_info.GetData().IsStr())
1174  meta_info = id_meta_info.GetData().GetStr();
1175 
1176  assembly = x_GetMetaInfoTag(meta_info, "genome_assembly");
1177 
1178  if (!assembly.empty() ) {
1179  // used for initializing CIdMapperGCAssembly; having a full assembly seems to be inevitable here
1180  // this is used only by Gbench, so using seqconfig as assembly service should be safe
1181  CRef<CGC_Assembly> assm = CGencollSvc::GetInstance()->GetGCAssembly(assembly, true, "Gbench", nullptr, true);
1182  if (assm) {
1184  CRef<CScope> scope(new CScope(*obj_mgr));
1185  scope->AddDefaults();
1188  mapper = new CIdMapperGCAssembly(*scope, *assm, alias);
1189  }
1190 
1191  // Create a new column to put the mapped ids into
1193  cinfo->SetTitle(id_header.GetTitle());
1195 
1196  xform_ids_column.Reset(new CSeqTable_column());
1197  xform_ids_column->SetHeader(*cinfo);
1200  xform_ids_column->SetData(data.GetNCObject());
1201  }
1202  }
1203 
1204  // Do we need a new strand column? For features it has to be numeric. If
1205  // there is no strand column, create one based on start>stop or start<stop.
1206  CRef< CSeqTable_column > xform_strand_column;
1207  if (fc.m_StrandCol == -1 ||
1208  cols[fc.m_StrandCol]->GetData().IsString()) {
1210 
1212  if (fc.m_StrandCol != -1) {
1213  const CSeqTable_column_info& strand_header =
1214  cols[fc.m_StrandCol]->GetHeader();
1215  cinfo->SetTitle(strand_header.GetTitle());
1216  }
1217  else {
1218  cinfo->SetTitle(string("Column.") +
1219  NStr::NumericToString(cols.size() + 1));
1220  }
1221 
1222  xform_strand_column.Reset(new CSeqTable_column());
1223  xform_strand_column->SetHeader(*cinfo);
1226  xform_strand_column->SetData(data.GetNCObject());
1227  }
1228 
1229  //If there is not already a data-region column, we need to add one.
1230  CRef< CSeqTable_column > data_region_column;
1231  if (fc.m_DataRegionCol == -1) {
1233  cinfo->SetTitle("Data Region Col");
1235 
1236  data_region_column.Reset(new CSeqTable_column());
1237  data_region_column->SetHeader(*cinfo);
1240  data_region_column->SetData(data.GetNCObject());
1241  }
1242 
1243  // Need to have a stop-position column (not just a length
1244  // column) for features
1245  CRef< CSeqTable_column > stop_position_column;
1246  if (fc.m_StopPosCol == -1 && fc.m_LengthCol >= 0) {
1248  cinfo->SetTitle("Stop Column");
1250 
1251  stop_position_column.Reset(new CSeqTable_column());
1252  stop_position_column->SetHeader(*cinfo);
1255  stop_position_column->SetData(data.GetNCObject());
1256  }
1257 
1258  // If there are errors we can put them in an error column
1259  // at the end of conversion, if no errors have occured, the column
1260  // will not be added to the table. Columns with errors will also
1261  // be marked as disabled to keep the table valid.
1262  bool errors_occured = false;
1263  CRef< CSeqTable_column > error_column;
1264  {
1266  cinfo->SetTitle("Error Messages");
1268 
1269  error_column.Reset(new CSeqTable_column());
1270  error_column->SetHeader(*cinfo);
1273  error_column->SetData(data.GetNCObject());
1274 
1275  // I think our table viewer is not handling sparse data...
1276  //CSeqTable_sparse_index* si = new CSeqTable_sparse_index();
1277  //si->Select(CSeqTable_sparse_index_Base::e_Indexes);
1278  //error_column->SetSparse(*si);
1279  }
1280 
1281  CRef< CSeqTable_column > disabled_column;
1282  {
1284  cinfo->SetField_name("disabled");
1285 
1286  disabled_column.Reset(new CSeqTable_column());
1287  disabled_column->SetHeader(*cinfo);
1289  sd->SetBit(true);
1290  disabled_column->SetDefault(*sd);
1291 
1294  disabled_column->SetSparse(*si);
1295  }
1296 
1297 
1298  if (call != NULL &&
1299  call->IsCanceled())
1300  return false;
1301 
1302  // Visit all rows in the table and add any columns needed to create a
1303  // complete feature. If there are no columns to add/update, skip
1304  // this step.
1305  if (!xform_strand_column.IsNull() ||
1306  !stop_position_column.IsNull() ||
1307  !data_region_column.IsNull() ||
1308  !(mapper == NULL) ||
1309  start_one_based ||
1310  stop_one_based) {
1311 
1312  int err_count = 0;
1313  for (size_t row=0; row<cols[fc.m_IdCol]->GetData().GetSize(); ++row) {
1314 
1315  int from = 0;
1316  int to = 0;
1317  string logstr;
1318  string errstr = "";
1319 
1320  if (!cols[fc.m_StartPosCol]->TryGetInt(row, from)) {
1321  logstr = " cannot convert start position to an integer";
1322  if (++err_count < 100) {
1323  LOG_POST("Invalid row " << row+1 << logstr);
1324  }
1325  }
1326  else {
1327  if (start_one_based) {
1328  if (from < 1) {
1329  logstr = " start column is one-based but value is < 1";
1330  if (++err_count < 100) {
1331  LOG_POST("Invalid row " << row+1 << logstr);
1332  }
1333  }
1334 
1335  from -= 1;
1336  cols[fc.m_StartPosCol]->SetData().SetInt()[row] = from;
1337  }
1338  else if (from < 0) {
1339  logstr = " start column is < 0";
1340  if (++err_count < 100) {
1341  LOG_POST("Invalid row " << row+1 << logstr);
1342  }
1343  }
1344  }
1345 
1346  errstr = logstr;
1347 
1348  // Inserting a stop position column if the original table had only
1349  // a start and length column (stop = start + length)
1350  if (!stop_position_column.IsNull()) {
1351  int len = 0;
1352  // if 'from' not retrieved above get it from start column
1353  if (!cols[fc.m_LengthCol]->TryGetInt(row, len)) {
1354  logstr = " cannot convert length to an integer";
1355  if (errstr != "") errstr += " | ";
1356  errstr += logstr;
1357 
1358  if (++err_count < 100) {
1359  LOG_POST("Invalid row " << row+1 << logstr);
1360  }
1361  len = 0;
1362  }
1363 
1364  if (len < 0) {
1365  logstr = " length column is < 0";
1366  if (errstr != "") errstr += " | ";
1367  errstr += logstr;
1368 
1369  if (++err_count < 100) {
1370  LOG_POST("Invalid row " << row+1 << logstr);
1371  }
1372  len = 0;
1373  }
1374 
1375  to = from + len;
1376 
1377  stop_position_column->SetData().SetInt().push_back(to);
1378  }
1379  else {
1380  // Get stop column, check if value is valid and update it
1381  // to be 0-based if needed.
1382  if (!cols[fc.m_StopPosCol]->TryGetInt(row, to)) {
1383  logstr = " cannot convert stop position to an integer";
1384  if (errstr != "") errstr += " | ";
1385  errstr += logstr;
1386 
1387  if (++err_count < 100) {
1388  LOG_POST("Invalid row " << row+1 << logstr);
1389  }
1390  }
1391  else {
1392  if (stop_one_based) {
1393  if (to < 1) {
1394  logstr = " stop column is one-based but value is < 1";
1395  if (errstr != "") errstr += " | ";
1396  errstr += logstr;
1397 
1398  if (++err_count < 100) {
1399  LOG_POST("Invalid row " << row+1 << logstr);
1400  }
1401  }
1402 
1403  to -= 1;
1404  cols[fc.m_StopPosCol]->SetData().SetInt()[row] = to;
1405  }
1406  else if (to < 0) {
1407  logstr = " stop column is < 0";
1408  if (errstr != "") errstr += " | ";
1409  errstr += logstr;
1410 
1411  if (++err_count < 100) {
1412  LOG_POST("Invalid row " << row+1 << logstr);
1413  }
1414  }
1415  }
1416  }
1417 
1418 
1419  // Convert the strand column to a numeric as required for
1420  // making seqtable features.
1421  if (!xform_strand_column.IsNull()) {
1422  ENa_strand strand_e;
1423  if (fc.m_StrandCol != -1) {
1424  string strand = *cols[fc.m_StrandCol]->GetStringPtr(row);
1425  strand_e = x_GetStrand(strand);
1426 
1427  // Invalid strand string - log an error.
1428  if (strand_e == eNa_strand_unknown) {
1429  logstr = string(" error - strand: \"") + strand + "\" not valid";
1430  if (errstr != "") errstr += " | ";
1431  errstr += logstr;
1432 
1433  if (++err_count < 100)
1434  LOG_POST("Invalid row " << row+1 << logstr);
1435  }
1436  }
1437  else {
1438  // Infer strand from start and stop position. These
1439  // have already been retrieved above
1440  // If to>from assume positive strand, otherwise negative.
1441  if (to >= from) {
1442  strand_e = eNa_strand_plus;
1443  }
1444  else {
1445  // If a negative strand is inferred, switch the 'from' and 'to'
1446  // values so that 'to' is greater than 'from':
1447  strand_e = eNa_strand_minus;
1448  int tmp = to;
1449  to = from;
1450  from = tmp;
1451  }
1452 
1453  cols[fc.m_StartPosCol]->SetData().SetInt()[row] = from;
1454 
1455  // Update the stop column which may have been in the
1456  // original table or may be a generated column we are
1457  // filling based on the length field.
1458  if (!stop_position_column.IsNull())
1459  stop_position_column->SetData().SetInt().push_back(to);
1460  else
1461  cols[fc.m_StopPosCol]->SetData().SetInt()[row] = to;
1462 
1463 
1464  }
1465  xform_strand_column->SetData().SetInt().push_back((int)strand_e);
1466  }
1467 
1468  // Add a (generated) data-region column if none was previously
1469  // specified.
1470  if (!data_region_column.IsNull()) {
1471  string region_name = "Region " + NStr::IntToString((int)row);
1472  data_region_column->SetData().SetString().push_back(region_name);
1473  }
1474 
1475  if (mapper != NULL) {
1476  // Handle chromosomes of the forms:
1477  // "1", "20", "chr20", "X", "y", "Chr[x,y,X,Y], "20|text..."
1478  string idstr = *cols[fc.m_IdCol]->GetStringPtr(row);
1479  CRef<CSeq_id> seq_id(nullptr);
1480  try {
1481  seq_id = CSeqUtils::MapStringId(idstr, mapper);
1482  xform_ids_column->SetData().SetId().push_back(seq_id);
1483  }
1484  catch (const CException& ex) {
1485  // Error - chromosome didn't work
1486  seq_id.Reset(new CSeq_id());
1487  xform_ids_column->SetData().SetId().push_back(seq_id);
1488 
1489  logstr = string(" Error id: ") + idstr + " - " + ex.GetMsg();
1490  if (errstr != "") errstr += " | ";
1491  errstr += logstr;
1492 
1493  if (++err_count < 100) {
1494  LOG_POST("Invalid row " << row + 1 << logstr);
1495  }
1496  }
1497 
1498  if (seq_id.IsNull()) {
1499  // Could not identify as a chromosome - try to create id with
1500  // unmodified string.
1501  try {
1502  seq_id.Reset(new CSeq_id(idstr));
1503  }
1504  catch (CException&) {
1505  seq_id.Reset(new CSeq_id());
1506  }
1507  xform_ids_column->SetData().SetId().push_back(seq_id);
1508  }
1509  }
1510 
1511  // will only add the column at the end if 1 or more errors
1512  // occured.
1513  if (errstr != "") {
1514  errors_occured = true;
1515  disabled_column->SetSparse().SetIndexes().push_back(static_cast<int>(row));
1516 
1517  // our viewer doesn't currently handle sparse indices with data
1518  //error_column->SetSparse().SetIndexes().push_back(row);
1519  }
1520  // Will add blanks too for cols w/o errors
1521  error_column->SetData().SetString().push_back(errstr);
1522 
1523  // if call is asynchronous, return if/when cancelled.
1524  if (call != NULL &&
1525  call->IsCanceled())
1526  return false;
1527  }
1528  }
1529 
1530  delete mapper;
1531 
1532 
1533  /// Add any new columns that were required to make a valid feature:
1534  // (strand, data region , stop position and ID)
1535  if (!xform_strand_column.IsNull()) {
1536  if (fc.m_StrandCol != -1) {
1537  table.SetColumns().erase(table.SetColumns().begin() + fc.m_StrandCol);
1538  table.SetColumns().insert(table.SetColumns().begin() + fc.m_StrandCol,
1539  xform_strand_column);
1540  }
1541  else {
1542  table.SetColumns().push_back(xform_strand_column);
1543  string label = xform_strand_column->GetHeader().GetTitle();
1544  string properties = string("&xtype=") +
1546  " &derived_field=true";
1547  column_meta_info->AddField(label, properties);
1548  }
1549 
1550  }
1551 
1552  if (!data_region_column.IsNull()) {
1553  table.SetColumns().push_back(data_region_column);
1554 
1555  string label = data_region_column->GetHeader().GetTitle();
1556  string properties = string("&xtype=") +
1558  " &derived_field=true";
1559  column_meta_info->AddField(label, properties);
1560  }
1561 
1562  if (!stop_position_column.IsNull()) {
1563  table.SetColumns().push_back(stop_position_column);
1564 
1565  string label = stop_position_column->GetHeader().GetTitle();
1566  string properties = string("&xtype=") +
1568  " &derived_field=true";
1569  column_meta_info->AddField(label, properties);
1570  }
1571 
1572  if (!xform_ids_column.IsNull()) {
1573  table.SetColumns().erase(table.SetColumns().begin() + fc.m_IdCol);
1574  table.SetColumns().insert(table.SetColumns().begin() + fc.m_IdCol,
1575  xform_ids_column);
1576  }
1577 
1578  if (errors_occured) {
1579  table.SetColumns().push_back(error_column);
1580  string label = error_column->GetHeader().GetTitle();
1581  string properties = string("&xtype=") +
1583  " &derived_field=true";
1584  column_meta_info->AddField(label, properties);
1585 
1586  table.SetColumns().push_back(disabled_column);
1587  label = "disabled";
1588  properties = string("&xtype=disabled_indices") +
1589  //CTableImportColumn::GetStringFromDataType(CTableImportColumn::eUndefined) +
1590  " &derived_field=true";
1591  column_meta_info->AddField(label, properties);
1592  }
1593 
1594  return true;
1595 }
1596 
1598  ICanceled* call)
1599 {
1600  // Get User data from Annot Descriptor this is the Properties data
1601  // structure, where each entry is a combination of column number
1602  // and field properties, e.g.
1603  // Column.1 &xtype=StartPosition &one_based=true
1604  CAnnotdesc::TUser* column_meta_info = x_GetColumnMetaInfo();
1605  if (column_meta_info == NULL)
1606  return false;
1607 
1608  // Now get the actual columns (which have headers with descriptive
1609  // info as well - as much as we currently need for finding features
1610  // actually)
1611  if (!m_AnnotContainer->IsSetData()) {
1612  LOG_POST("Error - SeqTable not found in SeqAnnot");
1613  return false;
1614  }
1615  CSeq_annot::TData& table_container = m_AnnotContainer->SetData();
1616 
1617  if (!table_container.IsSeq_table()) {
1618  LOG_POST("Error - SeqAnnot does not contain a SeqTable");
1619  return false;
1620  }
1621  CSeq_table& table = table_container.SetSeq_table();
1622  CSeq_table::TColumns& cols = table.SetColumns();
1623 
1624  /// To make the table a valid source for features first need to set its type
1625  /// and subtype to indicate that the rows represent region features.
1626  table.SetFeat_type(CSeqFeatData_Base::e_Region);
1627  table.SetFeat_subtype(CSeqFeatData::eSubtype_region);
1628 
1629  // Can only have one ID field if the table is a feature table.
1630  // Also, no comment fields may be of type int or real. So since we are adding
1631  // all the feature fields (start/stop/strand/id) set any other numeric or id fields
1632  // to comments.
1633  for (size_t col_num=0; col_num<cols.size(); ++col_num) {
1634  // Can only have 1 id field in region feature so set others to comment
1635  CSeqTable_column_info& header = cols[col_num]->SetHeader();
1641  }
1642  // Can't have Numeric fields other than start, stop length. In table loader
1643  // integer fields not being used in locations have id type as comment and data
1644  // type as int. switch data type to text.
1645  else if (cols[col_num]->GetData().Which() == CSeqTable_multi_data_Base::e_Int &&
1647 
1648  // Get copy current data for this integer field
1649  CSeqTable_multi_data_Base::TInt col_data = cols[col_num]->GetData().GetInt();
1650  // delete field data and set new type to string
1651  cols[col_num]->SetData().Select(CSeqTable_multi_data_Base::e_String, eDoResetVariant);
1652  // copy int data to new field, converting it to string
1653  for (size_t row=0; row<col_data.size(); ++row) {
1654  string val;
1655  try {
1656  val = NStr::IntToString(col_data[row]);
1657  }
1658  catch (CException&) { }
1659 
1660  cols[col_num]->SetData().SetString().push_back(val);
1661  }
1662  // done - we converted the int field to a string field
1663  // (becuase int fields can't be comment fields - in
1664  // region features)
1665  }
1666  else if (cols[col_num]->GetData().Which() == CSeqTable_multi_data_Base::e_Real &&
1668 
1669  // Get copy current data for this integer field
1670  CSeqTable_multi_data_Base::TReal col_data = cols[col_num]->GetData().GetReal();
1671  // delete field data and set new type to string
1672  cols[col_num]->SetData().Select(CSeqTable_multi_data_Base::e_String, eDoResetVariant);
1673  // copy int data to new field, converting it to string
1674  for (size_t row=0; row<col_data.size(); ++row) {
1675  string val;
1676  try {
1677  val = NStr::DoubleToString(col_data[row]);
1678  }
1679  catch (CException&) { }
1680 
1681  cols[col_num]->SetData().SetString().push_back(val);
1682  }
1683  // done - we converted the real field to a string field
1684  // (becuase real fields can't be comment fields - in
1685  // region features)
1686  }
1687  }
1688 
1689  /// Chromosomes need to be re-mapped using the accession
1690  string assembly;
1691 
1692  const CUser_field& id_meta_info = column_meta_info->GetData()[fc.m_IdCol].GetObject();
1693 
1694  string column_type = x_GetMetaInfoTag(id_meta_info.GetData().GetStr(), "xtype");
1695  if (CTableImportColumn::GetDataTypeFromString(column_type) ==
1697  string meta_info;
1698 
1699  if (id_meta_info.IsSetData() && id_meta_info.GetData().IsStr())
1700  meta_info = id_meta_info.GetData().GetStr();
1701 
1702  assembly = x_GetMetaInfoTag(meta_info, "genome_assembly");
1703  }
1704 
1705  // Add id column for seq-id of molecule on which rsid is found
1706  CRef< CSeqTable_column > seqid_column;
1707  {
1709 
1711  if (fc.m_StrandCol != -1) {
1712  const CSeqTable_column_info& strand_header =
1713  cols[fc.m_StrandCol]->GetHeader();
1714  cinfo->SetTitle(strand_header.GetTitle());
1715  }
1716  else {
1717  cinfo->SetTitle(string("Seq-ID"));
1718  }
1719 
1720  seqid_column.Reset(new CSeqTable_column());
1721  seqid_column->SetHeader(*cinfo);
1724  seqid_column->SetData(data.GetNCObject());
1725  }
1726 
1727  // Do we need a new strand column? For features it has to be numeric. If
1728  // there is no strand column, create one based on start>stop or start<stop.
1729  CRef< CSeqTable_column > xform_strand_column;
1730  if (fc.m_StrandCol == -1 ||
1731  cols[fc.m_StrandCol]->GetData().IsString()) {
1733 
1735  if (fc.m_StrandCol != -1) {
1736  const CSeqTable_column_info& strand_header =
1737  cols[fc.m_StrandCol]->GetHeader();
1738  cinfo->SetTitle(strand_header.GetTitle());
1739  }
1740  else {
1741  cinfo->SetTitle(string("Strand"));
1742  }
1743 
1744  xform_strand_column.Reset(new CSeqTable_column());
1745  xform_strand_column->SetHeader(*cinfo);
1748  xform_strand_column->SetData(data.GetNCObject());
1749  }
1750 
1751  //If there is not already a data-region column, we need to add one.
1752  CRef< CSeqTable_column > data_region_column;
1753  if (fc.m_DataRegionCol == -1) {
1755  cinfo->SetTitle("Data Region Col");
1757 
1758  data_region_column.Reset(new CSeqTable_column());
1759  data_region_column->SetHeader(*cinfo);
1762  data_region_column->SetData(data.GetNCObject());
1763  }
1764 
1765  // Need to have a stop-position column (not just a length
1766  // column) for features
1767  CRef< CSeqTable_column > stop_position_column;
1768  if (fc.m_StopPosCol == -1) {
1770  cinfo->SetTitle("Stop Column");
1772 
1773  stop_position_column.Reset(new CSeqTable_column());
1774  stop_position_column->SetHeader(*cinfo);
1777  stop_position_column->SetData(data.GetNCObject());
1778  }
1779  CRef< CSeqTable_column > start_position_column;
1780  if (fc.m_StartPosCol == -1) {
1782  cinfo->SetTitle("Start Column");
1784 
1785  start_position_column.Reset(new CSeqTable_column());
1786  start_position_column->SetHeader(*cinfo);
1789  start_position_column->SetData(data.GetNCObject());
1790  }
1791 
1792  // If there are errors we can put them in an error column
1793  // at the end of conversion, if no errors have occured, the column
1794  // will not be added to the table. Columns with errors will also
1795  // be marked as disabled to keep the table valid.
1796  bool errors_occured = false;
1797  CRef< CSeqTable_column > error_column;
1798  {
1800  cinfo->SetTitle("Error Messages");
1802 
1803  error_column.Reset(new CSeqTable_column());
1804  error_column->SetHeader(*cinfo);
1807  error_column->SetData(data.GetNCObject());
1808  }
1809 
1810  CRef< CSeqTable_column > disabled_column;
1811  {
1813  cinfo->SetField_name("disabled");
1814 
1815  disabled_column.Reset(new CSeqTable_column());
1816  disabled_column->SetHeader(*cinfo);
1818  sd->SetBit(true);
1819  disabled_column->SetDefault(*sd);
1820 
1823  disabled_column->SetSparse(*si);
1824  }
1825 
1826 
1827  if (call != NULL &&
1828  call->IsCanceled())
1829  return false;
1830 
1831  size_t rsid_search_size = 100;
1832 
1833  // Visit all rows in the table and add any columns needed to create a
1834  // complete feature. If there are no columns to add/update, skip
1835  // this step.
1836  int err_count = 0;
1837 
1839  CRef<CScope> scope(new CScope(*om));
1840  scope->AddDefaults();
1841 
1842 
1843  // We have an outer loop to retrieve the snp information from the snp database in
1844  // groups of 'rsid_search_size' at a time since thats more efficient, and then an
1845  // inner loop to process those results one at a time.
1846  size_t row = 0;
1847  size_t search_row = 0;
1848  size_t current_size = 0;
1849  size_t row_count = cols[fc.m_IdCol]->GetData().GetSize();
1850  while (search_row < row_count) {
1851 
1852  // could resize rsids to begin with (rsids have predictable lengths...)
1853  string rsids = "";
1854  for (current_size=0; search_row < row_count && current_size<rsid_search_size; ++current_size, ++search_row) {
1855  rsids += *cols[fc.m_IdCol]->GetStringPtr(search_row);
1856  rsids += ",";
1857  }
1858  // remove last comma
1859  rsids = rsids.substr(0, rsids.length()-1);
1860 
1861  // Query for current set of rsids
1863  try {
1864  // assembly may be blank.
1865  NSNPWebServices::Search(rsids, assembly, SNPSearchResultList);
1866  }
1867  catch (CException& e) {
1868  LOG_POST("Exception while searching SNP database: " + e.GetMsg());
1869  // If there is an error getting a group of ids continually reduce the batch
1870  // size until there is only 1 id retrieved and then if there is still an
1871  // error process it as an error
1872  if (rsid_search_size > 2) {
1873  search_row = row;
1874  rsid_search_size = rsid_search_size/2;
1875  continue;
1876  }
1877  }
1878  catch(...) {
1879  LOG_POST("Unidentified exception while searching SNP database");
1880  // If there is an error getting a group of ids continually reduce the batch
1881  // size until there is only 1 id retrieved and then if there is still an
1882  // error process it as an error
1883  if (rsid_search_size > 2) {
1884  search_row = row;
1885  rsid_search_size = rsid_search_size/2;
1886  continue;
1887  }
1888  }
1889 
1890  // process the current set of rows. Results in result list should be in same order
1891  // as the rows, but we will search the result set if there is a mismatch.
1892  NSNPWebServices::TSNPSearchCompoundResultList::iterator current_iter = SNPSearchResultList.begin();
1893 
1894  for (; row < search_row; ++row) {
1895 
1896  int from = 0;
1897  int to = 0;
1898  string logstr;
1899  string errstr = "";
1900 
1901  bool found = false;
1902  string rsid = *cols[fc.m_IdCol]->GetStringPtr(row);
1903 
1904  // See if current snp query result matches current row. If not, search for current
1905  // row rsid in entire result set. (usually they should match)
1906  NSNPWebServices::TSNPSearchCompoundResultList::iterator iter = current_iter;
1907  ++current_iter;
1908 
1909  if (iter->first != rsid) {
1910  CompareSNPResults pred(rsid);
1911  iter = std::find_if(SNPSearchResultList.begin(), SNPSearchResultList.end(), pred);
1912  }
1913 
1914  if (iter == SNPSearchResultList.end()) {
1915  x_LogErr("No matches for given rsid", errstr, err_count, static_cast<int>(row));
1916  }
1917  else if ((*iter).second.size() == 0) {
1918  x_LogErr("No matches for rsid and assembly: " + assembly,
1919  errstr, err_count, static_cast<int>(row));
1920  }
1921  else {
1922  const NSNPWebServices::TSNPSearchResultList::value_type& first_result = (*iter).second.front();
1923 
1924  // CVariation used as a search result can have one and only one placement
1925  if (first_result->CanGetPlacements() == 0) {
1926  x_LogErr("Unexpected absence of placements in SNP Search Result", errstr, err_count, static_cast<int>(row));
1927  }
1928  else {
1929  const CVariation::TPlacements& placements(first_result->GetPlacements());
1930  if (placements.size() == 0) {
1931  x_LogErr("Unexpected number of placements (0) in SNP Search Result", errstr, err_count, static_cast<int>(row));
1932  }
1933  else {
1934  const CSeq_id* id = placements.front()->GetLoc().GetId();
1935  CRef<CSeq_id> rid(new CSeq_id());
1936  rid->Assign(*id);
1937 
1938  seqid_column->SetData().SetId().push_back(rid);
1939 
1940  from = placements.front()->GetLoc().GetStart(eExtreme_Positional);
1941  start_position_column->SetData().SetInt().push_back(from);
1942 
1943  to = placements.front()->GetLoc().GetStop(eExtreme_Positional);
1944  stop_position_column->SetData().SetInt().push_back(to);
1945 
1946  xform_strand_column->SetData().SetInt().push_back(placements.front()->GetLoc().GetStrand());
1947  found = true;
1948  }
1949  }
1950  }
1951 
1952  if (!found) {
1953  CRef<CSeq_id> empty_id(new CSeq_id());
1954 
1955  seqid_column->SetData().SetId().push_back(empty_id);
1956  start_position_column->SetData().SetInt().push_back(0);
1957  stop_position_column->SetData().SetInt().push_back(0);
1958  xform_strand_column->SetData().SetInt().push_back(eNa_strand_unknown);
1959  }
1960 
1961  // Add a (generated) data-region column if none was previously
1962  // specified.
1963  if (!data_region_column.IsNull()) {
1964  string region_name = "Region " + NStr::IntToString((int)row);
1965  data_region_column->SetData().SetString().push_back(region_name);
1966  }
1967 
1968  // will only add the column at the end if 1 or more errors
1969  // occured.
1970  if (errstr != "") {
1971  errors_occured = true;
1972  disabled_column->SetSparse().SetIndexes().push_back(static_cast<int>(row));
1973  }
1974  // Will add blanks too for cols w/o errors
1975  error_column->SetData().SetString().push_back(errstr);
1976 
1977  // if call is asynchronous, return if/when cancelled.
1978  if (call != NULL &&
1979  call->IsCanceled())
1980  return false;
1981  }
1982  }
1983 
1984  /// Add any new columns that were required to make a valid feature:
1985  // (strand, data region , stop position and ID)
1986  if (!seqid_column.IsNull()) {
1987  table.SetColumns().push_back(seqid_column);
1988  string label = seqid_column->GetHeader().GetTitle();
1989  string properties = string("&xtype=") +
1991  " &derived_field=true";
1992  column_meta_info->AddField(label, properties);
1993  }
1994 
1995  if (!xform_strand_column.IsNull()) {
1996  if (fc.m_StrandCol != -1) {
1997  table.SetColumns().erase(table.SetColumns().begin() + fc.m_StrandCol);
1998  table.SetColumns().insert(table.SetColumns().begin() + fc.m_StrandCol,
1999  xform_strand_column);
2000  }
2001  else {
2002  table.SetColumns().push_back(xform_strand_column);
2003  string label = xform_strand_column->GetHeader().GetTitle();
2004  string properties = string("&xtype=") +
2006  " &derived_field=true";
2007  column_meta_info->AddField(label, properties);
2008  }
2009 
2010  }
2011 
2012  if (!data_region_column.IsNull()) {
2013  table.SetColumns().push_back(data_region_column);
2014 
2015  string label = data_region_column->GetHeader().GetTitle();
2016  string properties = string("&xtype=") +
2018  " &derived_field=true";
2019  column_meta_info->AddField(label, properties);
2020  }
2021 
2022  if (!start_position_column.IsNull()) {
2023  table.SetColumns().push_back(start_position_column);
2024 
2025  string label = start_position_column->GetHeader().GetTitle();
2026  string properties = string("&xtype=") +
2028  " &derived_field=true";
2029  column_meta_info->AddField(label, properties);
2030  }
2031 
2032  if (!stop_position_column.IsNull()) {
2033  table.SetColumns().push_back(stop_position_column);
2034 
2035  string label = stop_position_column->GetHeader().GetTitle();
2036  string properties = string("&xtype=") +
2038  " &derived_field=true";
2039  column_meta_info->AddField(label, properties);
2040  }
2041 
2042  if (errors_occured) {
2043  table.SetColumns().push_back(error_column);
2044  string label = error_column->GetHeader().GetTitle();
2045  string properties = string("&xtype=") +
2047  " &derived_field=true";
2048  column_meta_info->AddField(label, properties);
2049 
2050  table.SetColumns().push_back(disabled_column);
2051  label = "disabled";
2052  properties = string("&xtype=disabled_indices") +
2053  //CTableImportColumn::GetStringFromDataType(CTableImportColumn::eUndefined) +
2054  " &derived_field=true";
2055  column_meta_info->AddField(label, properties);
2056  }
2057 
2058  return true;
2059 }
2060 
2062  ICanceled* call)
2063 {
2064  // Get User data from Annot Descriptor this is the Properties data
2065  // structure, where each entry is a combination of column number
2066  // and field properties, e.g.
2067  // Column.1 &xtype=StartPosition &one_based=true
2068  CAnnotdesc::TUser* column_meta_info = x_GetColumnMetaInfo();
2069  if (column_meta_info == NULL)
2070  return false;
2071 
2072  // Now get the actual columns (which have headers with descriptive
2073  // info as well - as much as we currently need for finding features
2074  // actually)
2075  if (!m_AnnotContainer->IsSetData()) {
2076  LOG_POST("Error - SeqTable not found in SeqAnnot");
2077  return false;
2078  }
2079  CSeq_annot::TData& table_container = m_AnnotContainer->SetData();
2080 
2081  if (!table_container.IsSeq_table()) {
2082  LOG_POST("Error - SeqAnnot does not contain a SeqTable");
2083  return false;
2084  }
2085  CSeq_table& table = table_container.SetSeq_table();
2086  CSeq_table::TColumns& cols = table.SetColumns();
2087 
2088  /// To make the table a valid source for features first need to set its type
2089  /// and subtype to indicate that the rows represent region features.
2090  table.SetFeat_type(CSeqFeatData_Base::e_Imp);
2091  table.SetFeat_subtype(CSeqFeatData::eSubtype_variation);
2092 
2093  const CUser_field& id_meta_info = column_meta_info->GetData()[fc.m_IdCol].GetObject();
2094 
2095  // Can only have one ID field if the table is a feature table.
2096  // Also, no comment fields may be of type int or real. So since we are adding
2097  // all the feature fields (start/stop/strand/id) set any other numeric or id fields
2098  // to comments.
2099  for (size_t col_num=0; col_num<cols.size(); ++col_num) {
2100  // Can only have 1 id field in region feature so set others to comment
2101  CSeqTable_column_info& header = cols[col_num]->SetHeader();
2107  }
2108  // Can't have Numeric fields other than start, stop length. In table loader
2109  // integer fields not being used in locations have id type as comment and data
2110  // type as int. switch data type to text.
2111  else if (cols[col_num]->GetData().Which() == CSeqTable_multi_data_Base::e_Int &&
2113 
2114  // Get copy current data for this integer field
2115  CSeqTable_multi_data_Base::TInt col_data = cols[col_num]->GetData().GetInt();
2116  // delete field data and set new type to string
2117  cols[col_num]->SetData().Select(CSeqTable_multi_data_Base::e_String, eDoResetVariant);
2118  // copy int data to new field, converting it to string
2119  for (size_t row=0; row<col_data.size(); ++row) {
2120  string val;
2121  try {
2122  val = NStr::IntToString(col_data[row]);
2123  }
2124  catch (CException&) { }
2125 
2126  cols[col_num]->SetData().SetString().push_back(val);
2127  }
2128  // done - we converted the int field to a string field
2129  // (becuase int fields can't be comment fields - in
2130  // region features)
2131  }
2132  else if (cols[col_num]->GetData().Which() == CSeqTable_multi_data_Base::e_Real &&
2134 
2135  // Get copy current data for this integer field
2136  CSeqTable_multi_data_Base::TReal col_data = cols[col_num]->GetData().GetReal();
2137  // delete field data and set new type to string
2138  cols[col_num]->SetData().Select(CSeqTable_multi_data_Base::e_String, eDoResetVariant);
2139  // copy int data to new field, converting it to string
2140  for (size_t row=0; row<col_data.size(); ++row) {
2141  string val;
2142  try {
2143  val = NStr::DoubleToString(col_data[row]);
2144  }
2145  catch (CException&) { }
2146 
2147  cols[col_num]->SetData().SetString().push_back(val);
2148  }
2149  // done - we converted the real field to a string field
2150  // (becuase real fields can't be comment fields - in
2151  // region features)
2152  }
2153  }
2154 
2155  /// Chromosomes need to be re-mapped using the accession
2156  string assembly;
2157 
2158  string column_type = x_GetMetaInfoTag(id_meta_info.GetData().GetStr(), "xtype");
2159  if (CTableImportColumn::GetDataTypeFromString(column_type) ==
2161  string meta_info;
2162 
2163  // Change the field name and type to match requirements for dbsnp field
2164  CSeqTable_column_info& header = cols[fc.m_IdCol]->SetHeader();
2165  header.ResetField_id();
2166  header.SetField_name("D.dbSNP");
2167 
2168  if (id_meta_info.IsSetData() && id_meta_info.GetData().IsStr())
2169  meta_info = id_meta_info.GetData().GetStr();
2170 
2171  assembly = x_GetMetaInfoTag(meta_info, "genome_assembly");
2172  }
2173 
2174  // Add data-imp column which represents variations
2175  CRef< CSeqTable_column > variation_column;
2176  {
2178 
2180  cinfo->SetTitle(string("Var"));
2181 
2182  variation_column.Reset(new CSeqTable_column());
2183  variation_column->SetHeader(*cinfo);
2184 
2186  default_data->Select(CSeqTable_single_data::e_String);
2187  default_data->SetString("variation");
2188  variation_column->SetDefault(*default_data);
2189  }
2190 
2191  // Add id column for seq-id of molecule on which rsid is found
2192  CRef< CSeqTable_column > seqid_column;
2193  {
2195 
2197  if (fc.m_StrandCol != -1) {
2198  const CSeqTable_column_info& strand_header =
2199  cols[fc.m_StrandCol]->GetHeader();
2200  cinfo->SetTitle(strand_header.GetTitle());
2201  }
2202  else {
2203  cinfo->SetTitle(string("Seq-ID"));
2204  }
2205 
2206  seqid_column.Reset(new CSeqTable_column());
2207  seqid_column->SetHeader(*cinfo);
2210  seqid_column->SetData(data.GetNCObject());
2211  }
2212 
2213  CRef< CSeqTable_column > start_position_column;
2214  if (fc.m_StartPosCol == -1) {
2216  cinfo->SetTitle("Start Column");
2218 
2219  start_position_column.Reset(new CSeqTable_column());
2220  start_position_column->SetHeader(*cinfo);
2223  start_position_column->SetData(data.GetNCObject());
2224  }
2225 
2226  CRef< CSeqTable_column > replace_column1;
2227  {
2229  cinfo->SetTitle("Replace 1");
2230  cinfo->SetField_name("Q.replace");
2231 
2232  replace_column1.Reset(new CSeqTable_column());
2233  replace_column1->SetHeader(*cinfo);
2236  replace_column1->SetData(data.GetNCObject());
2237  }
2238 
2239 
2240  CRef< CSeqTable_column > replace_column2;
2241  {
2243  cinfo->SetTitle("Replace 2");
2244  cinfo->SetField_name("Q.replace");
2245 
2246  replace_column2.Reset(new CSeqTable_column());
2247  replace_column2->SetHeader(*cinfo);
2250  replace_column2->SetData(data.GetNCObject());
2251  }
2252 
2253 
2254  // If there are errors we can put them in an error column
2255  // at the end of conversion, if no errors have occured, the column
2256  // will not be added to the table. Columns with errors will also
2257  // be marked as disabled to keep the table valid.
2258  bool errors_occured = false;
2259  CRef< CSeqTable_column > error_column;
2260  {
2262  cinfo->SetTitle("Error Messages");
2264 
2265  error_column.Reset(new CSeqTable_column());
2266  error_column->SetHeader(*cinfo);
2269  error_column->SetData(data.GetNCObject());
2270  }
2271 
2272  CRef< CSeqTable_column > disabled_column;
2273  {
2275  cinfo->SetField_name("disabled");
2276 
2277  disabled_column.Reset(new CSeqTable_column());
2278  disabled_column->SetHeader(*cinfo);
2280  sd->SetBit(true);
2281  disabled_column->SetDefault(*sd);
2282 
2285  disabled_column->SetSparse(*si);
2286  }
2287 
2288 
2289  if (call != NULL &&
2290  call->IsCanceled())
2291  return false;
2292 
2293  size_t rsid_search_size = 100;
2294 
2295  // Visit all rows in the table and add any columns needed to create a
2296  // complete feature. If there are no columns to add/update, skip
2297  // this step.
2298  int err_count = 0;
2299 
2301  CRef<CScope> scope(new CScope(*om));
2302  scope->AddDefaults();
2303 
2304  size_t row = 0;
2305  size_t search_row = 0;
2306  size_t current_size = 0;
2307  size_t row_count = cols[fc.m_IdCol]->GetData().GetSize();
2308  while (search_row < row_count) {
2309 
2310  // could resize rsids to begin with (rsids have predictable lengths...)
2311  string rsids = "";
2312  for (current_size=0; search_row < row_count && current_size<rsid_search_size; ++current_size, ++search_row) {
2313  rsids += *cols[fc.m_IdCol]->GetStringPtr(search_row);
2314  rsids += ",";
2315  }
2316  // remove last comma
2317  rsids = rsids.substr(0, rsids.length()-1);
2318 
2319  // Query for current set of rsids
2321  try {
2322  // assembly may be blank.
2323  NSNPWebServices::Search(rsids, assembly, SNPSearchResultList);
2324  }
2325  catch (CException& e) {
2326  LOG_POST("Exception while searching SNP database: " + e.GetMsg());
2327  // If there is an error getting a group of ids continually reduce the batch
2328  // size until there is only 1 id retrieved and then if there is still an
2329  // error process it as an error
2330  if (rsid_search_size > 2) {
2331  search_row = row;
2332  rsid_search_size = rsid_search_size/2;
2333  continue;
2334  }
2335  }
2336  catch(...) {
2337  LOG_POST("Unidentified exception while searching SNP database");
2338  // If there is an error getting a group of ids continually reduce the batch
2339  // size until there is only 1 id retrieved and then if there is still an
2340  // error process it as an error
2341  if (rsid_search_size > 2) {
2342  search_row = row;
2343  rsid_search_size = rsid_search_size/2;
2344  continue;
2345  }
2346  }
2347 
2348  // process the current set of rows. Results in result list should be in same order
2349  // as the rows, but we will search the result set if there is a mismatch.
2350  NSNPWebServices::TSNPSearchCompoundResultList::iterator current_iter = SNPSearchResultList.begin();
2351 
2352  for (; row < search_row; ++row) {
2353 
2354  int from = 0;
2355  string logstr;
2356  string errstr = "";
2357 
2358  bool found = false;
2359  string rsid = *cols[fc.m_IdCol]->GetStringPtr(row);
2360 
2361  // See if current snp query result matches current row. If not, search for current
2362  // row rsid in entire result set. (usually they should match)
2363  NSNPWebServices::TSNPSearchCompoundResultList::iterator iter = current_iter;
2364  ++current_iter;
2365 
2366  if (iter->first != rsid) {
2367  CompareSNPResults pred(rsid);
2368  iter = std::find_if(SNPSearchResultList.begin(), SNPSearchResultList.end(), pred);
2369  }
2370 
2371  if (iter == SNPSearchResultList.end()) {
2372  x_LogErr("No matches for given rsid", errstr, err_count, static_cast<int>(row));
2373  }
2374  else if ((*iter).second.size() == 0) {
2375  x_LogErr("No matches for rsid and assembly: " + assembly,
2376  errstr, err_count, static_cast<int>(row));
2377  }
2378  else {
2379  const NSNPWebServices::TSNPSearchResultList::value_type& first_result = (*iter).second.front();
2380 
2381  // CVariation used as a search result can have one and only one placement
2382  if (first_result->CanGetPlacements() == 0) {
2383  x_LogErr("Unexpected absence of placements in SNP Search Result", errstr, err_count, static_cast<int>(row));
2384  }
2385  else {
2386  const CVariation::TPlacements& placements(first_result->GetPlacements());
2387  if (placements.size() == 0) {
2388  x_LogErr("Unexpected number of placements (0) in SNP Search Result", errstr, err_count, static_cast<int>(row));
2389  }
2390  else {
2391  const CSeq_id* id = placements.front()->GetLoc().GetId();
2392  CRef<CSeq_id> rid(new CSeq_id());
2393  rid->Assign(*id);
2394 
2395  seqid_column->SetData().SetId().push_back(rid);
2396 
2397  from = placements.front()->GetLoc().GetStart(eExtreme_Positional);
2398  start_position_column->SetData().SetInt().push_back(from);
2399 
2400  found = true;
2401  }
2402  }
2403  }
2404 
2405  if (fc.m_GenotypeCol != -1) {
2406  string geno_str = *cols[fc.m_GenotypeCol]->GetStringPtr(row);
2407  string val1 = "-";
2408  string val2 = "-";
2409 
2410  if (geno_str.length() >= 1)
2411  val1 = geno_str[0];
2412  if (geno_str.length() >= 2)
2413  val2 = geno_str[0];
2414 
2415  {
2416  CCommonString_table_Base::TStrings& strs = replace_column1->SetData().SetCommon_string().SetStrings();
2417  CCommonString_table_Base::TStrings::iterator iter = std::find(strs.begin(), strs.end(), val1);
2418  if (iter != strs.end()) {
2419  size_t idx = iter-strs.begin();
2420  replace_column1->SetData().SetCommon_string().SetIndexes().push_back(static_cast<int>(idx));
2421  }
2422  else {
2423  strs.push_back(val1);
2424  replace_column1->SetData().SetCommon_string().SetIndexes().push_back(static_cast<int>(strs.size()-1));
2425  }
2426  }
2427 
2428  {
2429  CCommonString_table_Base::TStrings& strs = replace_column2->SetData().SetCommon_string().SetStrings();
2430  CCommonString_table_Base::TStrings::iterator iter = std::find(strs.begin(), strs.end(), val2);
2431  if (iter != strs.end()) {
2432  size_t idx = iter-strs.begin();
2433  replace_column2->SetData().SetCommon_string().SetIndexes().push_back(static_cast<int>(idx));
2434  }
2435  else {
2436  strs.push_back(val2);
2437  replace_column2->SetData().SetCommon_string().SetIndexes().push_back(static_cast<int>(strs.size()-1));
2438  }
2439  }
2440  }
2441 
2442  if (!found) {
2443  CRef<CSeq_id> empty_id(new CSeq_id());
2444 
2445  seqid_column->SetData().SetId().push_back(empty_id);
2446  start_position_column->SetData().SetInt().push_back(0);
2447  }
2448 
2449  // will only add the column at the end if 1 or more errors
2450  // occured.
2451  if (errstr != "") {
2452  errors_occured = true;
2453  disabled_column->SetSparse().SetIndexes().push_back(static_cast<int>(row));
2454 
2455  // our viewer doesn't currently handle sparse indices with data
2456  //error_column->SetSparse().SetIndexes().push_back(row);
2457  }
2458  // Will add blanks too for cols w/o errors
2459  error_column->SetData().SetString().push_back(errstr);
2460 
2461  // if call is asynchronous, return if/when cancelled.
2462  if (call != NULL &&
2463  call->IsCanceled())
2464  return false;
2465  }
2466  }
2467 
2468 
2469  /// Add any new columns that were required to make a valid feature:
2470  // (strand, data region , stop position and ID)
2471  if (!variation_column.IsNull()) {
2472  table.SetColumns().push_back(variation_column);
2473  string label = variation_column->GetHeader().GetTitle();
2474  string properties = string("&xtype=") +
2476  " &derived_field=true";
2477  column_meta_info->AddField(label, properties);
2478  }
2479 
2480  if (!seqid_column.IsNull()) {
2481  table.SetColumns().push_back(seqid_column);
2482  string label = seqid_column->GetHeader().GetTitle();
2483  string properties = string("&xtype=") +
2485  " &derived_field=true";
2486  column_meta_info->AddField(label, properties);
2487  }
2488 
2489  if (!start_position_column.IsNull()) {
2490  table.SetColumns().push_back(start_position_column);
2491 
2492  string label = start_position_column->GetHeader().GetTitle();
2493  string properties = string("&xtype=") +
2495  " &derived_field=true";
2496  column_meta_info->AddField(label, properties);
2497  }
2498 
2499 
2500  if (!replace_column1.IsNull()) {
2501  table.SetColumns().push_back(replace_column1);
2502 
2503  string label = replace_column1->GetHeader().GetTitle();
2504  string properties = string("&xtype=") +
2506  " &derived_field=true";
2507  column_meta_info->AddField(label, properties);
2508  }
2509 
2510  if (!replace_column1.IsNull()) {
2511  table.SetColumns().push_back(replace_column1);
2512 
2513  string label = replace_column1->GetHeader().GetTitle();
2514  string properties = string("&xtype=") +
2516  " &derived_field=true";
2517  column_meta_info->AddField(label, properties);
2518  }
2519 
2520  if (errors_occured) {
2521  table.SetColumns().push_back(error_column);
2522  string label = error_column->GetHeader().GetTitle();
2523  string properties = string("&xtype=") +
2525  " &derived_field=true";
2526  column_meta_info->AddField(label, properties);
2527 
2528  table.SetColumns().push_back(disabled_column);
2529  label = "disabled";
2530  properties = string("&xtype=disabled_indices") +
2531  //CTableImportColumn::GetStringFromDataType(CTableImportColumn::eUndefined) +
2532  " &derived_field=true";
2533  column_meta_info->AddField(label, properties);
2534  }
2535 
2536  // Container description must be SNP to make snp features visible in Genome Workbench
2537  CAnnotdesc* adesc = new CAnnotdesc();
2538  adesc->SetName() = "SNP";
2539  m_AnnotContainer->SetDesc().Set().push_back(CRef<CAnnotdesc>(adesc));
2540 
2541 
2542  return true;
2543 }
2544 
2546 {
2547  if (!m_AnnotContainer->IsSetData()) {
2548  LOG_POST(Error << "SeqTable not found in SeqAnnot");
2549  return;
2550  }
2551  CSeq_annot::TData& table_container = m_AnnotContainer->SetData();
2552 
2553  if (!table_container.IsSeq_table()) {
2554  LOG_POST(Error << "SeqAnnot does not contain a SeqTable");
2555  return;
2556  }
2557  CSeq_table& table = table_container.SetSeq_table();
2558  CSeq_table::TColumns& cols = table.SetColumns();
2559 
2560 
2561  CAnnotdesc::TUser* column_meta_info = x_GetColumnMetaInfo();
2562  if (column_meta_info == NULL)
2563  return;
2564 
2565  // Iterate over columns looking for seq-loc fields and erase them.
2566  for (int idx=(int)cols.size()-1; idx>=0; --idx) {
2567  const CSeqTable_column_info& header = cols[(size_t)idx]->GetHeader();
2569 
2570  // delete the column from the vector of columns. Since we are
2571  // iterating backwards, it will not effect our iteration
2572  table.SetColumns().erase(table.SetColumns().begin() + idx);
2573  column_meta_info->SetData().erase(column_meta_info->SetData().begin() + idx);
2574  }
2575  }
2576 }
2577 
2578 void CTableAnnotDataSource::WriteAsn(const string& fname)
2579 {
2580  if (m_AnnotContainer.IsNull())
2581  return;
2582 
2583  ofstream ofs(fname.c_str());
2584 
2585  try {
2586  ofs << MSerial_AsnText << *m_AnnotContainer;
2587  }
2588  catch (CException& c) {
2589  LOG_POST(Error << "Error writing asn: " << c.GetMsg());
2590  }
2591 }
2592 
2594 
2595  if (strand == "+" || NStr::ToLower(strand) == "pos")
2596  return eNa_strand_plus;
2597  else if (strand == "-" || NStr::ToLower(strand) == "neg")
2598  return eNa_strand_minus;
2599  else {
2600  return eNa_strand_unknown;
2601  }
2602 }
2603 
2604 string CTableAnnotDataSource::x_GetMetaInfoTag(const string& meta_string,
2605  const string& tag_name)
2606 {
2607  string value;
2608 
2609  string tag = "&" + tag_name + "=";
2610 
2611  size_t tag_idx = meta_string.find(tag);
2612  if (tag_idx != string::npos) {
2613  // End of value is the start of the next value qualifier (this allows
2614  // embedded blanks in values)
2615  size_t tag_end_idx = meta_string.find_first_of("&", tag_idx+1);
2616  size_t start_idx = tag_idx + string(tag).length();
2617 
2618  if (tag_end_idx == string::npos) {
2619  value = meta_string.substr(start_idx, meta_string.length()-start_idx);
2620  }
2621  else {
2622  value = meta_string.substr(start_idx, tag_end_idx-start_idx);
2623  }
2624 
2626  }
2627 
2628  return value;
2629 }
2630 
2632  const string& tag_name,
2633  const string& new_value)
2634 {
2635  string value;
2636 
2637  string tag = "&" + tag_name + "=";
2638 
2639  size_t tag_idx = meta_string.find(tag);
2640  if (tag_idx != string::npos) {
2641  // End of value is the start of the next value qualifier (this allows
2642  // embedded blanks in values)
2643  size_t tag_end_idx = meta_string.find_first_of("&", tag_idx+1);
2644  size_t start_idx = tag_idx + string(tag).length();
2645 
2646  string trim_value = NStr::TruncateSpaces(new_value);
2647 
2648  if (tag_end_idx == string::npos) {
2649  meta_string.erase(start_idx, meta_string.length()-start_idx);
2650  meta_string += trim_value;
2651  }
2652  else {
2653  meta_string.erase(start_idx, tag_end_idx-start_idx);
2654  // insert value in previous value position and end with a blank
2655  meta_string.insert(start_idx, trim_value + " ");
2656  }
2657 
2658  return true;
2659  }
2660 
2661  return false;
2662 }
2663 
2666 {
2667  if (!m_AnnotContainer->CanGetDesc()) {
2668  LOG_POST("Error - annot-desc not initialized.");
2669  return NULL;
2670  }
2671 
2672  CSeq_annot::TDesc& annot_desc = m_AnnotContainer->SetDesc();
2673  if (!annot_desc.CanGet()) {
2674  LOG_POST("Error - annot-desc list not initialized.");
2675  return NULL;
2676  }
2677 
2678 
2679  CAnnot_descr_Base::Tdata& desc_list_data = annot_desc.Set();
2680  CAnnot_descr_Base::Tdata::iterator tditer = desc_list_data.begin();
2681 
2682  for (; tditer!=desc_list_data.end(); ++tditer) {
2683  if ( (*tditer)->IsUser() )
2684  break;
2685  }
2686 
2687  if (tditer == desc_list_data.end()) {
2688  LOG_POST("Error - User data not found in annot-desc");
2689  return NULL;
2690  }
2691 
2692  // Get User data from Annot Descriptor this is the Properties data
2693  // structure, where each entry is a combination of column number
2694  // and field properties, e.g.
2695  // Column.1 &xtype=StartPosition &one_based=true
2696  CAnnotdesc::TUser& column_meta_info = (*tditer)->SetUser();
2697 
2698  return &column_meta_info;
2699 }
2700 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CAnnot_descr –.
Definition: Annot_descr.hpp:66
CAnnotdesc –.
Definition: Annotdesc.hpp:66
bool IsRefSeq() const
Is this assembly a RefSeq assembly?
static CRef< CGencollSvc > GetInstance(void)
Definition: gencoll_svc.cpp:54
IdMapper implementation using a GC-Assembly.
Definition: idmapper.hpp:370
EAliasMapping
Mapping destination type.
Definition: idmapper.hpp:375
@ eGenBankAcc
GenBank accession.
Definition: idmapper.hpp:377
@ eRefSeqAcc
RefSeq accession.
Definition: idmapper.hpp:379
IdMapper base class implementation.
Definition: idmapper.hpp:56
CScope –.
Definition: scope.hpp:92
ENa_strand x_GetStrand(string strand)
Return enumerated strand type based on string.
void x_LogErr(const string &logstr, string &errstr, int &err_count, int row)
Log an error (but stop logging if error count gets high.
bool x_UpdateMetaInfoTag(string &meta_string, const string &tag_name, const string &new_value)
Change the value for 'tag_name' in 'meta_string' to 'new_value'.
bool AddSeqLoc(const STableLocation &fc, int loc_number, ICanceled *cancel=NULL)
Add a location to the table based on the specified columns.
CAnnotdesc::TUser * x_GetColumnMetaInfo()
Get meta information stored in User_Data in m_AnnotContainer.
string GetField(size_t row, size_t col) const
return a specific field from a specific row, based on current table type and delimiter
vector< STableLocation > FindLocations(string &msg, bool strand_required)
Find colums that can be combined to form locations based on column info If no locations are found,...
bool CreateSnps(const STableLocation &fc, ICanceled *cancel=NULL)
Create snips (features that show variations) for each row.
string x_GetMetaInfoTag(const string &meta_string, const string &tag_name)
Search string 'meta_string' for the value assigned to 'tag_name', e.g.
bool CreateSnpFeature(const STableLocation &fc, ICanceled *cancel=NULL)
Create a region feature for each snp using specified snp (rsid) column.
void RemoveSeqLocs()
Remove any seqloc columns (can be used to undo the AddSeqLoc actions)
bool AddSnpSeqLoc(const STableLocation &fc, int loc_number, ICanceled *cancel=NULL)
Add a location to the table using the rsid (snp/variation) ids location from the snp db.
void Clear()
clears all columns rows and delimiters
bool CreateFeature(const STableLocation &fc, ICanceled *cancel=NULL)
Create a feature using specified columns.
CRef< CSeq_annot > m_AnnotContainer
void WriteAsn(const string &fname)
Write seqtable to file 'fname'.
static string GetStringFromDataType(eDataType t)
Return a string version of a data-type (e.g. "Length" for eLength)
static eDataType GetDataTypeFromString(const string &s)
Return enum data-type from string.
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
Interface for testing cancellation request in a long lasting operation.
Definition: icanceled.hpp:51
static const char location[]
Definition: config.c:97
char value[7]
Definition: config.c:431
static const char si[8][64]
Definition: des.c:146
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
static CRef< objects::CSeq_id > MapStringId(const string &str, objects::IIdMapper *mapper)
Definition: utils.cpp:2394
list< TSNPSearchCompoundResult > TSNPSearchCompoundResultList
results of a search for one or several ids
Definition: snp_gui.hpp:234
pair< string, TSNPSearchResultList > TSNPSearchCompoundResult
results of a search for a given id (string) TSNPSearchResultList may be empty if the id has been foun...
Definition: snp_gui.hpp:231
static void Search(const std::string &sTerms, const std::string &sAssemblyAccession, TSNPSearchCompoundResultList &ResultList)
search for given SNP ID(s) and get a list of results
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
@ eDoResetVariant
Definition: serialbase.hpp:77
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
TObjectType & GetObject(void)
Get object.
Definition: ncbiobj.hpp:1011
TObjectType & GetNCObject(void) const
Get object.
Definition: ncbiobj.hpp:1187
TObjectType & GetObject(void) const
Get object.
Definition: ncbiobj.hpp:1697
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
virtual bool IsCanceled(void) const =0
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5186
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5083
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3182
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
static const char label[]
const TStr & GetStr(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
bool IsStr(void) const
Check if variant Str is selected.
TData & SetData(void)
Assign a value to Data data member.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
TBit & SetBit(void)
Select the variant.
TField_id GetField_id(void) const
Get the Field_id member data.
TLoc & SetLoc(void)
Select the variant.
void SetHeader(THeader &value)
Assign a value to Header data member.
void SetField_name(const TField_name &value)
Assign a value to Field_name data member.
const TTitle & GetTitle(void) const
Get the Title member data.
void Select(E_Choice index, EResetVariant reset=eDoResetVariant)
Select the requested variant if needed.
vector< CRef< CSeqTable_column > > TColumns
Definition: Seq_table_.hpp:92
void SetSparse(TSparse &value)
Assign a value to Sparse data member.
const THeader & GetHeader(void) const
Get the Header member data.
void Select(E_Choice index, EResetVariant reset=eDoResetVariant)
Select the requested variant if needed.
TString & SetString(void)
Select the variant.
void ResetField_id(void)
Reset Field_id data member.
void SetData(TData &value)
Assign a value to Data data member.
vector< CStringUTF8 > TStrings
void SetTitle(const TTitle &value)
Assign a value to Title data member.
void SetDefault(TDefault &value)
Assign a value to Default data member.
void SetField_id(TField_id value)
Assign a value to Field_id data member.
@ eField_id_location
location as Seq-loc
@ eField_id_location_strand
location strand
@ eField_id_id_local
main feature fields id.local.id
@ eField_id_data_imp_key
various data fields
@ e_Real
a set of reals, one per row
@ e_Loc
a set of locations, one per row
@ e_String
a set of strings, one per row
@ e_Int
a set of 4-byte integers, one per row
@ e_Common_string
a set of string with small set of possible values
@ e_Indexes
Indexes of rows with values.
@ e_Region
named region (globin locus)
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
TSeq_table & SetSeq_table(void)
Select the variant.
Definition: Seq_annot_.cpp:159
Tdata & Set(void)
Assign a value to data member.
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
void SetDesc(TDesc &value)
Assign a value to Desc data member.
Definition: Seq_annot_.cpp:223
TName & SetName(void)
Select the variant.
Definition: Annotdesc_.hpp:508
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
Definition: Seq_annot_.hpp:861
bool CanGetDesc(void) const
Check if it is safe to call GetDesc method.
Definition: Seq_annot_.hpp:846
bool IsSeq_table(void) const
Check if variant Seq_table is selected.
Definition: Seq_annot_.hpp:715
bool CanGet(void) const
Check if it is safe to call Get method.
list< CRef< CAnnotdesc > > Tdata
list< CRef< CVariantPlacement > > TPlacements
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
int len
string GetHeader()
Definition: file_names.hpp:62
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
const struct ncbi::grid::netcache::search::fields::SIZE size
const char * tag
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
The Object manager core.
static char tmp[2048]
Definition: utf8.c:42
#define fc
CRef< objects::CObjectManager > om
static const char * column
Definition: stats.c:23
string GetColumnInfo() const
Return informational string as to which columns are selected.
CompareSNPResults(const string &rsid)
bool operator()(const NSNPWebServices::TSNPSearchCompoundResult &rhs) const
USING_SCOPE(objects)
Modified on Mon Dec 11 02:41:17 2023 by modify_doxy.py rev. 669887