NCBI C++ ToolKit
subprep_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: subprep_util.cpp 47464 2023-04-20 00:19:10Z evgeniev $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Colleen Bollin
27  */
28 
29 
30 #include <ncbi_pch.hpp>
31 
37 #include <objects/biblio/Affil.hpp>
55 #include <objects/seq/Bioseq.hpp>
56 #include <objects/seq/Seq_ext.hpp>
60 #include <objects/seq/Seq_data.hpp>
61 #include <objects/seq/IUPACaa.hpp>
63 #include <objmgr/seqdesc_ci.hpp>
64 #include <objmgr/bioseq_ci.hpp>
65 #include <objmgr/feat_ci.hpp>
66 #include <objmgr/seq_vector.hpp>
67 
85 
89 
90 #include <objmgr/util/sequence.hpp>
99 #include <objtools/readers/source_mod_parser.hpp>
100 #include <objects/seq/Seq_annot.hpp>
101 #include <wx/msgdlg.h>
102 #include <objtools/edit/cds_fix.hpp>
103 
104 
106 
107 
114  TWizardName(CSourceRequirements::eWizardType_igs, "Intergenic Spacer (IGS) sequences"),
116  TWizardName(CSourceRequirements::eWizardType_d_loop, "D-loops and control regions")
117 };
118 static int k_NumWizardNames = sizeof (s_WizardNames) / sizeof (TWizardName);
119 
120 
122 {
123  TWizardNameList list;
124  list.clear();
125  for (int i = 0; i < k_NumWizardNames; i++) {
126  list.push_back(s_WizardNames[i]);
127  }
128  return list;
129 }
130 
131 
133 {
134  for (int pos = 0; pos < k_NumWizardNames; pos++) {
135  if (NStr::EqualNocase(wizard_name, s_WizardNames[pos].second)) {
136  return s_WizardNames[pos].first;
137  }
138  }
140 }
141 
142 
143 /*
144 // This looks like a copy-paste of RelaxedMatch from srcedit_util. Since we're making it available there, let's remove it here.
145 static bool RelaxedMatch (CRef<objects::CSeq_id> id1, CRef<objects::CSeq_id> id2)
146 {
147  bool found = false;
148  if (id1->IsLocal()) {
149  string id1_label = "";
150  id1->GetLabel(&id1_label, objects::CSeq_id::eContent);
151  string id2_label = "";
152  id2->GetLabel(&id2_label, objects::CSeq_id::eContent);
153  id2->GetLabel(&id2_label, objects::CSeq_id::eContent);
154  size_t pos = NStr::Find (id2_label, id1_label);
155  if (pos == 0) {
156  if (NStr::Equal(id2_label.substr(id1_label.length(), 1), ".")) {
157  found = true;
158  }
159  } else if (pos != string::npos && NStr::EndsWith(id2_label, id1_label)) {
160  string delim = id2_label.substr(pos - 1, 1);
161  if (NStr::Equal(delim, "|") || NStr::Equal(delim, "/") || NStr::Equal(delim, ":")) {
162  found = true;
163  }
164  }
165  }
166  return found;
167 }
168 */
169 
170 static bool OneRowOk (CRef<objects::CSeq_id> id, CRef<objects::CSeqTable_column> id_col, objects::CSeq_entry_Handle seh)
171 {
172  if (!id || !id_col) {
173  return false;
174  }
175 
176  size_t row = 0;
177  bool found = false;
178  while (row < id_col->GetData().GetSize() && !found) {
179  CRef<objects::CSeq_id> row_id = id_col->GetData().GetId()[row];
180  objects::CSeq_id::E_SIC compare = id->Compare(*row_id);
181  if (compare == objects::CSeq_id::e_YES) {
182  found = true;
183  } else if (compare == objects::CSeq_id::e_DIFF) {
184  if (RelaxedMatch(id, row_id)) {
185  found = true;
186  id->Assign(*row_id);
187  }
188  }
189  row++;
190  }
191  row = 0;
192  while (row < id_col->GetData().GetSize() && !found) {
193  CRef<objects::CSeq_id> row_id = id_col->GetData().GetId()[row];
194  objects::CBioseq_Handle bsh = seh.GetBioseqHandle (*row_id);
195  if (bsh) {
196  CConstRef<objects::CBioseq> b = bsh.GetCompleteBioseq();
197  ITERATE (objects::CBioseq::TId, id_it, b->GetId()) {
198  objects::CSeq_id::E_SIC compare = id->Compare(**id_it);
199  if (compare == objects::CSeq_id::e_YES) {
200  found = true;
201  id->Assign(*row_id);
202  break;
203  } else if (compare == objects::CSeq_id::e_DIFF) {
204  if (RelaxedMatch(id, *id_it)) {
205  found = true;
206  id->Assign(*row_id);
207  break;
208  }
209  }
210  }
211  }
212  row++;
213  }
214 
215  return found;
216 }
217 
218 
220 {
221  if (!input_table || !input_table->IsSetColumns() || input_table->GetColumns().size() < 1) {
222  return;
223  }
224 
225  bool all_default = true;
226  int col_num = 1;
227  ITERATE (objects::CSeq_table::TColumns, cit, input_table->SetColumns()) {
228  string expected_title = "Column " + NStr::NumericToString(col_num);
229  if (!(*cit)->IsSetHeader() || !(*cit)->GetHeader().IsSetTitle()
230  || !NStr::Equal(expected_title, (*cit)->GetHeader().GetTitle())) {
231  all_default = false;
232  break;
233  }
234  col_num++;
235  }
236 
237  bool all_match = true;
238  // make titles
239  NON_CONST_ITERATE (objects::CSeq_table::TColumns, cit, input_table->SetColumns()) {
240  if (!(*cit)->IsSetData() || (*cit)->GetData().GetSize() == 0) {
241  // no data, skip
242  } else {
243  // find first value
244  string first_val = "";
245  if ((*cit)->GetData().IsString()) {
246  first_val = (*cit)->GetData().GetString()[0];
247  } else if ((*cit)->GetData().IsId()) {
248  (*cit)->GetData().GetId()[0]->GetLabel(&first_val, objects::CSeq_id::eContent);
249  }
250 
251  if (all_default || !(*cit)->IsSetHeader() || !(*cit)->GetHeader().IsSetTitle() || NStr::IsBlank((*cit)->GetHeader().GetTitle())) {
252  // set header if missing
253  (*cit)->SetHeader().SetTitle(first_val);
254  } else if (!NStr::Equal((*cit)->GetHeader().GetTitle(), first_val)) {
255  all_match = false;
256  }
257  }
258  }
259 
260  // if all titles match data, delete first row
261  if (all_match) {
262  DeleteTableRow(input_table, 0);
263  }
264 
265  CRef<objects::CSeqTable_column> id_col = FindSeqIDColumn (*input_table);
266  if (!id_col) {
267  // if an ID column wasn't specified, go look for it
268  NON_CONST_ITERATE (objects::CSeq_table::TColumns, cit, input_table->SetColumns()) {
269  if (!(*cit)->IsSetData() || (*cit)->GetData().GetSize() == 0) {
270  // no data, skip
271  } else if ((*cit)->GetData().IsString()) {
272  string title = (*cit)->GetHeader().GetTitle();
273  NStr::ReplaceInPlace(title, " ", "");
274  NStr::ReplaceInPlace(title, "-", "");
275  NStr::ReplaceInPlace(title, "_", "");
276  if (NStr::EqualNocase(title, "SeqId") || NStr::EqualNocase(title, "SequenceID")) {
277  // title looks right
278  vector<CRef<objects::CSeq_id> > new_ids;
279  for (size_t j = 0; j < (*cit)->GetData().GetString().size(); j++) {
280  string val = (*cit)->GetData().GetString()[j];
281  try {
282  CRef<objects::CSeq_id> id(new objects::CSeq_id(val));
283  new_ids.push_back(id);
284  } catch (exception &) {
285  CRef<objects::CSeq_id> id(new objects::CSeq_id());
286  id->SetLocal().SetStr(val);
287  new_ids.push_back(id);
288  }
289  }
290  for (size_t j = 0; j < new_ids.size(); j++) {
291  (*cit)->SetData().SetId().push_back(new_ids[j]);
292  }
293  // found the ID column, stop looking now
294  break;
295  }
296  }
297  }
298  }
299 
300  // change numbers to strings
301  NON_CONST_ITERATE (objects::CSeq_table::TColumns, cit, input_table->SetColumns()) {
302  if (!(*cit)->IsSetData() || (*cit)->GetData().GetSize() == 0) {
303  // skip - no data
304  } else if ((*cit)->GetData().IsInt()) {
305  vector<string> values;
306  for (size_t j = 0; j < (*cit)->GetData().GetInt().size(); j++) {
307  values.push_back(NStr::NumericToString((*cit)->GetData().GetInt()[j]));
308  }
309  for (size_t j = 0; j < values.size(); j++) {
310  (*cit)->SetData().SetString().push_back(values[j]);
311  }
312  } else if ((*cit)->GetData().IsReal()) {
313  vector<string> values;
314  for (size_t j = 0; j < (*cit)->GetData().GetInt().size(); j++) {
315  values.push_back(NStr::NumericToString((*cit)->GetData().GetReal()[j]));
316  }
317  for (size_t j = 0; j < values.size(); j++) {
318  (*cit)->SetData().SetString().push_back(values[j]);
319  }
320  }
321  }
322 
323 
324 }
325 
326 
327 string FindBadRows (CRef<objects::CSeq_table> src, CRef<objects::CSeq_table> dst, objects::CSeq_entry_Handle seh)
328 {
331 
332  if (!src_col) {
333  return "No sequence ID column selected!";
334  }
335 
336  string error = "";
337  size_t row = 0;
338  vector<CRef<objects::CSeq_id> > already_seen;
339  while (row < src_col->GetData().GetSize()) {
340  if (!OneRowOk(src_col->GetData().GetId()[row], dst_col, seh)) {
341  string id_label = "";
342  src_col->GetData().GetId()[row]->GetLabel(&id_label, objects::CSeq_id::eContent);
343  error += "Unable to find " + id_label + " for row " + NStr::NumericToString(row + 1) + "; ";
344  } else {
345  int as_row = 1;
346  ITERATE (vector<CRef<objects::CSeq_id> >, sit, already_seen) {
347  if ((*sit)->Compare(*(src_col->GetData().GetId()[row])) == objects::CSeq_id::e_YES) {
348  error += "Row " + NStr::NumericToString(row + 1)
349  + " and row " + NStr::NumericToString(as_row)
350  + " refer to the same sequence; ";
351  }
352  as_row++;
353  }
354  already_seen.push_back(src_col->GetData().GetId()[row]);
355  }
356  row++;
357  }
358  if (!NStr::IsBlank(error)) {
359  error = error.substr(0, error.length() - 2);
360  }
361  return error;
362 }
363 
364 
366 {
367  ITERATE (objects::CSeq_table::TColumns, cit, table.GetColumns()) {
368  if ((*cit)->IsSetData() && (*cit)->GetData().IsId()) {
369  return *cit;
370  }
371  }
373  return no_col;
374 }
375 
376 
378 {
379  if (!id_col || !id || !id_col->IsSetData() || !id_col->GetData().IsId()) {
380  return -1;
381  }
382  int row = 0;
383  while (row < id_col->GetData().GetSize()) {
384  if (id->Compare(*(id_col->GetData().GetId()[row])) == objects::CSeq_id::e_YES) {
385  return row;
386  }
387  row++;
388  }
389  return -1;
390 }
391 
392 
395 {
396  int count = 0;
397  for (size_t i = 0; i < src_id->GetData().GetSize() && i < src_col->GetData().GetSize(); i++) {
398  int row = FindRowForSeqId(dst_id, src_id->GetData().GetId()[i]);
399  if (row > -1) {
400  if (dst_col->GetData().GetString().size() > row
401  && !NStr::IsBlank(dst_col->GetData().GetString()[row])
402  && !NStr::Equal(dst_col->GetData().GetString()[row], src_col->GetData().GetString()[i])) {
403  count++;
404  }
405  }
406  }
407  return count;
408 }
409 
410 
411 int CountTableColumnConflicts (CRef<objects::CSeq_table> dst, const objects::CSeq_table& src)
412 {
415  int conflicting_columns = 0;
416 
417  // for every column in source, if in dst, combine columns, otherwise add column to dst
418  ITERATE (objects::CSeq_table::TColumns, cit, src.GetColumns()) {
419  if ((*cit)->IsSetData() && !(*cit)->GetData().IsId()) {
420  CRef<objects::CSeqTable_column> dst_col = FindSeqTableColumnByName (dst, (*cit)->GetHeader().GetTitle());
421  if (dst_col) {
422  if (CountColumnRowConflicts (dst_id, dst_col, src_id, *cit) > 0) {
423  conflicting_columns++;
424  }
425  }
426  }
427  }
428  return conflicting_columns;
429 }
430 
431 
433 {
434  NON_CONST_ITERATE (objects::CSeq_table::TColumns, cit, table->SetColumns()) {
435  if (!(*cit)->IsSetData() || (*cit)->GetData().GetSize() == 0) {
436  // no data, skip
437  } else {
438  int num_vals = static_cast<int>((*cit)->GetData().GetSize());
439  if (row >= num_vals) {
440  continue;
441  }
442  if ((*cit)->GetData().IsString()) {
443  for (int j = row; j < num_vals - 1; j++) {
444  (*cit)->SetData().SetString()[j] = (*cit)->GetData().GetString()[j + 1];
445  }
446  (*cit)->SetData().SetString().pop_back();
447  } else if ((*cit)->GetData().IsId()) {
448  for (int j = row; j < num_vals - 1; j++) {
449  (*cit)->SetData().SetId()[j]->Assign(*((*cit)->GetData().GetId()[j + 1]));
450  }
451  (*cit)->SetData().SetId().pop_back();
452  } else if ((*cit)->GetData().IsLoc()) {
453  for (int j = row; j < num_vals - 1; j++) {
454  (*cit)->SetData().SetLoc()[j]->Assign(*((*cit)->GetData().GetLoc()[j + 1]));
455  }
456  (*cit)->SetData().SetLoc().pop_back();
457  } else if ((*cit)->GetData().IsInt()) {
458  for (int j = row; j < num_vals - 1; j++) {
459  (*cit)->SetData().SetInt()[j] = (*cit)->GetData().GetInt()[j + 1];
460  }
461  (*cit)->SetData().SetInt().pop_back();
462  } else if ((*cit)->GetData().IsReal()) {
463  for (int j = row; j < num_vals - 1; j++) {
464  (*cit)->SetData().SetReal()[j] = (*cit)->GetData().GetReal()[j + 1];
465  }
466  (*cit)->SetData().SetReal().pop_back();
467  } else if ((*cit)->GetData().IsBit()) {
468  for (int j = row; j < num_vals - 1; j++) {
469  (*cit)->SetData().SetBit()[j] = (*cit)->GetData().GetBit()[j + 1];
470  }
471  (*cit)->SetData().SetBit().pop_back();
472  } else if ((*cit)->GetData().IsBytes()) {
473  for (int j = row; j < num_vals - 1; j++) {
474  (*cit)->SetData().SetBytes()[j] = (*cit)->GetData().GetBytes()[j + 1];
475  }
476  (*cit)->SetData().SetBytes().pop_back();
477  }
478  }
479  }
480 }
481 
482 
485 {
486  size_t i = 0;
487 
488  for (i = 0; i < src_id->GetData().GetSize() && i < src_col->GetData().GetSize(); i++) {
489  int row = FindRowForSeqId(dst_id, src_id->GetData().GetId()[i]);
490  if (row > -1) {
491  while (dst_col->SetData().SetString().size() < dst_id->GetData().GetSize()) {
492  dst_col->SetData().SetString().push_back ("");
493  }
494  try {
495  dst_col->SetData().SetString()[row] = src_col->GetData().GetString()[i];
496  } catch (exception &) {
497  // int x = 0; // compiler warning here, commented out unused "x"
498  }
499  }
500  }
501 }
502 
503 
504 // return number of new columns
505 int CombineTables (CRef<objects::CSeq_table> dst, const objects::CSeq_table& src)
506 {
507  int num_new = 0;
510 
511  // for every column in source, if in dst, combine columns, otherwise add column to dst
512  ITERATE (objects::CSeq_table::TColumns, cit, src.GetColumns()) {
513  if ((*cit)->IsSetData() && !(*cit)->GetData().IsId()) {
514  CRef<objects::CSeqTable_column> dst_col = FindSeqTableColumnByName (dst, (*cit)->GetHeader().GetTitle());
515  if (dst_col) {
516  // combine columns
517  } else {
518  dst_col = new objects::CSeqTable_column();
519  dst_col->SetHeader().Assign((*cit)->GetHeader());
520  if (!dst_col->GetHeader().IsSetTitle() && dst_col->GetHeader().IsSetField_name()) {
521  dst_col->SetHeader().SetTitle(dst_col->GetHeader().GetField_name());
522  }
523  dst->SetColumns().push_back(dst_col);
524  num_new++;
525  }
526  CombineColumns(dst_id, dst_col, src_id, *cit);
527  }
528  }
529  return num_new;
530 }
531 
532 
533 bool SaveTableFile (wxWindow *parent, wxString& save_file_dir, wxString& save_file_name, CRef<objects::CSeq_table> values_table)
534 {
535  wxFileDialog table_save_file(parent, wxT("Select a file"), save_file_dir, save_file_name,
538  wxFD_SAVE|wxFD_OVERWRITE_PROMPT);
539 
540  if (table_save_file.ShowModal() == wxID_OK)
541  {
542  wxString path = table_save_file.GetPath();
543  wxString name = table_save_file.GetFilename();
544 
545  name.Trim(false);
546  name.Trim(true);
547 
548  if (name.IsEmpty())
549  {
550  wxMessageBox(wxT("Please, select file name"), wxT("Error"),
551  wxOK | wxICON_ERROR, parent);
552  return false;
553  }
554  ios::openmode mode = ios::out;
555 
556  CNcbiOfstream os(path.fn_str(), mode);
557  if (!os)
558  {
559  wxMessageBox(wxT("Cannot open file ")+name, wxT("Error"),
560  wxOK | wxICON_ERROR, parent);
561  return false;
562  }
563 
564  CCSVExporter exporter(os, '\t', '"');
565  ITERATE (objects::CSeq_table::TColumns, cit, values_table->GetColumns()) {
566  exporter.Field((*cit)->GetHeader().GetTitle());
567  }
568  exporter.NewRow();
569  for (int i = 0; i < values_table->GetNum_rows(); ++i) {
570  ITERATE (objects::CSeq_table::TColumns, cit, values_table->GetColumns()) {
571  if (i < (*cit)->GetData().GetSize()) {
572  if ((*cit)->GetData().IsId()) {
573  string label = "";
574  (*cit)->GetData().GetId()[i]->GetLabel(&label, objects::CSeq_id::eContent);
575  exporter.Field(label);
576  } else if ((*cit)->GetData().IsString()) {
577  exporter.Field((*cit)->GetData().GetString()[i]);
578  }
579  } else {
580  string blank = "";
581  exporter.Field(blank);
582  }
583  }
584  exporter.NewRow();
585  }
586 
587  save_file_dir = table_save_file.GetDirectory();
588  save_file_name = table_save_file.GetFilename();
589  return true;
590  } else {
591  return false;
592  }
593 }
594 
595 
597 {
598  if (!col) {
599  return true;
600  }
601  size_t num_rows = col->GetData().GetSize();
602  if (num_rows < 2) {
603  return true;
604  }
605  if (col->GetData().IsString()) {
606  string val = col->GetData().GetString()[0];
607  if (NStr::IsBlank(val)) {
608  val = default_val;
609  }
610  for (size_t row = 1; row < num_rows; row++) {
611  string new_val = col->GetData().GetString()[row];
612  if (NStr::IsBlank(new_val)) {
613  new_val = default_val;
614  }
615  if (!NStr::Equal(val, new_val)) {
616  return false;
617  }
618  }
619  return true;
620  } else if (col->GetData().IsInt()) {
621  int val = col->GetData().GetInt()[0];
622  for (size_t row = 1; row < num_rows; row++) {
623  if (val != col->GetData().GetInt()[row]) {
624  return false;
625  }
626  }
627  return true;
628  }
629  // TODO: implement other data types
630  return false;
631 }
632 
633 
635 {
636  if (!col || !col->IsSetData()) {
637  return true;
638  }
639  size_t num_rows = col->GetData().GetSize();
640  if (num_rows < 1) {
641  return true;
642  }
643  if (col->GetData().IsString()) {
644  for (size_t row = 0; row < num_rows; row++) {
645  string new_val = col->GetData().GetString()[row];
646  if (NStr::IsBlank(new_val)) {
647  return true;
648  }
649  }
650  }
651  // TODO: implement other data types
652  return false;
653 }
654 
655 
657 {
658  if (!col || !col->IsSetData()) {
659  return true;
660  }
661  size_t num_rows = col->GetData().GetSize();
662  if (num_rows < 1) {
663  return true;
664  }
665  if (col->GetData().IsString()) {
666  for (size_t row = 0; row < num_rows; row++) {
667  string new_val = col->GetData().GetString()[row];
668  if (NStr::Equal(new_val, val)) {
669  return true;
670  }
671  }
672  }
673  // TODO: implement other data types
674  return false;
675 }
676 
677 
679 {
680  if (!col || !col->IsSetData()) {
681  return false;
682  }
683  size_t num_rows = col->GetData().GetSize();
684  if (num_rows < 1) {
685  return false;
686  }
687  if (col->GetData().IsString()) {
688  for (size_t row = 0; row < num_rows; row++) {
689  string new_val = col->GetData().GetString()[row];
690  if (!NStr::EqualNocase(new_val, default_val) && !NStr::IsBlank(new_val)) {
691  return true;
692  }
693  }
694  }
695  // TODO: implement other data types
696  return false;
697 }
698 
699 
700 static void s_ExtendIntervalToEnd (objects::CSeq_interval& ival, objects::CBioseq_Handle bsh)
701 {
702  if (ival.IsSetStrand() && ival.GetStrand() == objects::eNa_strand_minus) {
703  if (ival.GetFrom() > 3) {
704  ival.SetFrom(ival.GetFrom() - 3);
705  } else {
706  ival.SetFrom(0);
707  }
708  } else {
709  size_t len = bsh.GetBioseqLength();
710  if (ival.GetTo() < len - 4) {
711  ival.SetTo(ival.GetTo() + 3);
712  } else {
713  ival.SetTo(static_cast<CSeq_interval::TTo>(len) - 1);
714  }
715  }
716 }
717 
718 
719 bool AddProteinToSeqEntry(const objects::CSeq_entry* protein, objects::CSeq_entry_Handle seh, CRef<CCmdComposite> cmd, bool create_general_only)
720 {
721  CProSplign prosplign(CProSplignScoring(), false, true, false, false);
722 
723  // bool rval = false;
724  objects::CBioseq_Handle bsh_match;
725  bool id_match = false;
726 
727  // only add protein if we can match it to a nucleotide sequence via the ID,
728  // or if there is only one nucleotide sequence
729 
730  ITERATE (objects::CBioseq::TId, id_it, protein->GetSeq().GetId()) {
731  bsh_match = seh.GetScope().GetBioseqHandle(**id_it);
732  if (bsh_match) {
733  id_match = true;
734  break;
735  }
736  }
737  if (!bsh_match) {
738  // if there is only one nucleotide sequence, we will use that one
739  int nuc_count = 0;
740  for (objects::CBioseq_CI b_iter(seh, objects::CSeq_inst::eMol_na); b_iter ; ++b_iter ) {
741  bsh_match = *b_iter;
742  nuc_count++;
743  if (nuc_count > 1) {
744  break;
745  }
746  }
747 
748  if (nuc_count == 0) {
749  wxMessageBox(wxT("You must import nucleotide sequences before importing protein sequences"), wxT("Error"),
750  wxOK | wxICON_ERROR, NULL);
751  return false;
752  } else if (nuc_count > 1) {
753  wxMessageBox(wxT("If you have more than one nucleotide sequence, each protein sequence must use the ID of the nucleotide sequence where the coding region is found."), wxT("Error"),
754  wxOK | wxICON_ERROR, NULL);
755  return false;
756  }
757  }
758 
759  CRef<objects::CSeq_id> seq_id(new objects::CSeq_id());
760  seq_id->Assign(*(bsh_match.GetSeqId()));
761  CRef<objects::CSeq_loc> match_loc(new objects::CSeq_loc(*seq_id, 0, bsh_match.GetBioseqLength() - 1));
762 
763  CRef<objects::CSeq_entry> protein_entry(new objects::CSeq_entry());
764  protein_entry->Assign(*protein);
765  if (id_match) {
766  int offset = 1;
767  string id_label;
768  CRef<objects::CSeq_id> product_id = objects::edit::GetNewProtId(bsh_match, offset, id_label, create_general_only);
769  protein_entry->SetSeq().ResetId();
770  protein_entry->SetSeq().SetId().push_back(product_id);
771  }
772 
773  objects::CSeq_entry_Handle protein_h = seh.GetScope().AddTopLevelSeqEntry(*protein_entry);
774 
775  //time_t t1 = time(NULL);
776  CRef<objects::CSeq_align> alignment = prosplign.FindAlignment(seh.GetScope(), *protein_entry->GetSeq().GetId().front(), *match_loc,
778  //time_t t2 = time(NULL);
779  //time_t elapsed = t2 - t1;
780  CRef<objects::CSeq_loc> cds_loc(new objects::CSeq_loc());
781  bool found_start_codon = false;
782  bool found_stop_codon = false;
783  if (alignment && alignment->IsSetSegs() && alignment->GetSegs().IsSpliced()) {
784  CRef<objects::CSeq_id> seq_id (new objects::CSeq_id());
785  seq_id->Assign(*match_loc->GetId());
786  ITERATE (objects::CSpliced_seg::TExons, exon_it, alignment->GetSegs().GetSpliced().GetExons()) {
787  CRef<objects::CSeq_loc> exon(new objects::CSeq_loc(*seq_id,
788  (*exon_it)->GetGenomic_start(),
789  (*exon_it)->GetGenomic_end()));
790  if ((*exon_it)->IsSetGenomic_strand()) {
791  exon->SetStrand((*exon_it)->GetGenomic_strand());
792  }
793  cds_loc->SetMix().Set().push_back(exon);
794  }
795  ITERATE (objects::CSpliced_seg::TModifiers, mod_it,
796  alignment->GetSegs().GetSpliced().GetModifiers()) {
797  if ((*mod_it)->IsStart_codon_found()) {
798  found_start_codon = (*mod_it)->GetStart_codon_found();
799  }
800  if ((*mod_it)->IsStop_codon_found()) {
801  found_stop_codon = (*mod_it)->GetStop_codon_found();
802  }
803  }
804 
805  }
806  if (!cds_loc->IsMix()) {
807  //no exons, no match
808  string label = "";
809  protein->GetSeq().GetId().front()->GetLabel(&label, objects::CSeq_id::eContent);
810  string error = "Unable to find coding region location for protein sequence " + label + ". Import failed.";
811  wxMessageBox(ToWxString(error), wxT("Error"),
812  wxOK | wxICON_ERROR, NULL);
813  return false;
814  } else {
815  if (cds_loc->GetMix().Get().size() == 1) {
816  CRef<objects::CSeq_loc> exon = cds_loc->SetMix().Set().front();
817  cds_loc->Assign(*exon);
818  }
819  }
820  if (!found_start_codon) {
821  cds_loc->SetPartialStart(true, objects::eExtreme_Biological);
822  }
823  if (found_stop_codon) {
824  // extend to cover stop codon
825  if (cds_loc->IsMix()) {
826  s_ExtendIntervalToEnd(cds_loc->SetMix().Set().back()->SetInt(), bsh_match);
827  } else {
828  s_ExtendIntervalToEnd(cds_loc->SetInt(), bsh_match);
829  }
830  } else {
831  cds_loc->SetPartialStop(true, objects::eExtreme_Biological);
832  }
833 
834  // if we add the protein sequence, we'll do it in the new nuc-prot set
835  seh.GetScope().RemoveTopLevelSeqEntry(protein_h);
836  bool partial5 = cds_loc->IsPartialStart(objects::eExtreme_Biological);
837  bool partial3 = cds_loc->IsPartialStop(objects::eExtreme_Biological);
838  SetMolinfoForProtein(protein_entry, partial5, partial3);
839  AddProteinFeatureToProtein(protein_entry, partial5, partial3);
840 
841  CRef<objects::CCmdAddSeqEntry> add_seqentry(new objects::CCmdAddSeqEntry(protein_entry, bsh_match.GetParentEntry()));
842  cmd->AddCommand(*add_seqentry);
843  CRef<objects::CSeq_feat> new_cds(new objects::CSeq_feat());
844  new_cds->SetLocation(*cds_loc);
845  if (partial5 || partial3) {
846  new_cds->SetPartial(true);
847  }
848  new_cds->SetData().SetCdregion();
849  CRef<objects::CSeq_id> product_id(new objects::CSeq_id());
850  product_id->Assign(*(protein_entry->GetSeq().GetId().front()));
851  new_cds->SetProduct().SetWhole(*product_id);
852  CRef<CCmdCreateFeat> add_cds(new CCmdCreateFeat(seh, *new_cds));
853  cmd->AddCommand(*add_cds);
854 
855  return true;
856 }
857 
858 
859 static void s_ReportMixError()
860 {
861  wxMessageBox(wxT("Cannot import a mix of protein and nucleotide sequences unless proteins are already packaged in nuc-prot sets"), wxT("Error"),
862  wxOK | wxICON_ERROR, NULL);
863 }
864 
865 
867 {
868  if (!entry) {
869  return;
870  }
871  if (entry->IsSet()) {
872  NON_CONST_ITERATE(objects::CBioseq_set::TSeq_set, it, entry->SetSet().SetSeq_set()) {
874  }
875  } else if (entry->IsSeq()) {
876  if (!entry->GetSeq().IsAa()) {
877 
878  objects::CSourceModParser smp(
879  objects::CSourceModParser::eHandleBadMod_Ignore );
880  // later - fix title by removing attributes used?
882  = entry->GetSeq().GetClosestDescriptor(objects::CSeqdesc::e_Title);
883  if (title_desc) {
884  string& title(const_cast<string&>(title_desc->GetTitle()));
885  title = smp.ParseTitle(title, CConstRef<objects::CSeq_id>(entry->GetSeq().GetFirstId()) );
886  smp.ApplyAllMods(entry->SetSeq());
887  }
888  }
889  }
890 }
891 
892 
893 bool AddSeqEntryToSeqEntry(const objects::CSeq_entry* entry, objects::CSeq_entry_Handle seh, CRef<CCmdComposite> cmd, bool& has_nuc, bool& has_prot, bool create_general_only)
894 {
895  if (!entry) {
896  return false;
897  }
898  if (entry->IsSeq()) {
899  if (entry->GetSeq().IsAa()) {
900  if (has_nuc) {
902  return false;
903  } else if (!AddProteinToSeqEntry (entry, seh, cmd, create_general_only)) {
904  return false;
905  }
906  has_prot = true;
907  } else {
908  if (has_prot) {
910  return false;
911  }
912  CRef<objects::CSeq_entry> new_entry(new objects::CSeq_entry());
913  new_entry->Assign(*entry);
914  ParseTitlesToNewSeqEntries(new_entry);
915  CRef<objects::CCmdAddSeqEntry> subcmd(new objects::CCmdAddSeqEntry(new_entry, seh));
916  cmd->AddCommand(*subcmd);
917  has_nuc = true;
918  }
919  } else if (entry->IsSet()) {
920  if (entry->GetSet().GetClass() == objects::CBioseq_set::eClass_nuc_prot) {
921  if (has_prot) {
923  return false;
924  }
925  CRef<objects::CSeq_entry> new_entry(new objects::CSeq_entry());
926  new_entry->Assign(*entry);
927  ParseTitlesToNewSeqEntries(new_entry);
928  CRef<objects::CCmdAddSeqEntry> subcmd(new objects::CCmdAddSeqEntry(new_entry, seh));
929  cmd->AddCommand(*subcmd);
930  has_nuc = true;
931  } else {
932  if (seh.IsSet() && seh.GetSet().IsEmptySeq_set() && entry->GetSet().IsSetClass()) {
933  CRef<objects::CBioseq_set> new_set(new objects::CBioseq_set());
934  new_set->Assign(*(seh.GetSet().GetCompleteBioseq_set()));
935  new_set->SetClass(entry->GetSet().GetClass());
936  CCmdChangeBioseqSet *set_cmd = new CCmdChangeBioseqSet(seh.GetSet(), *new_set);
937  cmd->AddCommand(*set_cmd);
938  }
939  if (seh.IsSet() && seh.GetSet().IsEmptySeq_set() && entry->GetSet().IsSetDescr()) {
940  ITERATE (objects::CBioseq_set::TDescr::Tdata, dit, entry->GetSet().GetDescr().Get()) {
941  CRef<objects::CSeqdesc> desc(new objects::CSeqdesc());
942  desc->Assign(**dit);
943  CIRef<IEditCommand> cmdAddDesc(new CCmdCreateDesc(seh, *desc));
944  cmd->AddCommand(*cmdAddDesc);
945  }
946  }
947 
948  ITERATE (objects::CBioseq_set::TSeq_set, it, entry->GetSet().GetSeq_set()) {
949  CRef<objects::CSeq_entry> new_entry(new objects::CSeq_entry());
950  new_entry->Assign(**it);
951  if (entry->GetSet().IsSetDescr()) {
952  ITERATE (objects::CBioseq_set::TDescr::Tdata, dit, entry->GetSet().GetDescr().Get()) {
953  CRef<objects::CSeqdesc> desc(new objects::CSeqdesc());
954  desc->Assign(**dit);
955  new_entry->SetDescr().Set().push_back(desc);
956  }
957  }
958  if (!AddSeqEntryToSeqEntry(new_entry, seh, cmd, has_nuc, has_prot, create_general_only)) {
959  return false;
960  }
961  }
962  }
963  }
964  return true;
965 }
966 
967 
968 string CheckFeatureAnnotation (objects::CSeq_entry_Handle entry, bool& is_ok)
969 {
970  vector<int> counts;
971  counts.clear();
972  bool all = true;
973  bool any = false;
974  int num_with = 0;
975  int common_num = -1;
976  bool same_num = true;
977  is_ok = true;
978 
979  objects::CBioseq_CI b_iter(entry, objects::CSeq_inst::eMol_na);
980  for ( ; b_iter ; ++b_iter ) {
981  int this_count = 0;
982  objects::CFeat_CI fit (*b_iter);
983  while (fit) {
984  this_count++;
985  ++fit;
986  any = true;
987  }
988  counts.push_back(this_count);
989  if (this_count == 0) {
990  all = false;
991  } else {
992  num_with ++;
993  }
994  if (common_num == -1) {
995  common_num = this_count;
996  } else if (common_num != this_count) {
997  same_num = false;
998  }
999  }
1000  string summary = "";
1001  if (any) {
1002  if (all && same_num) {
1003  summary = "All " + NStr::NumericToString(num_with) + " sequences have " + NStr::NumericToString(common_num) + " features.";
1004  } else {
1005  vector<int>::iterator cit = counts.begin();
1006  while (cit != counts.end()) {
1007  int this_num = 1;
1008  vector<int>::iterator cit2 = cit;
1009  cit2++;
1010  while (cit2 != counts.end()) {
1011  if (*cit == *cit2) {
1012  this_num++;
1013  cit2 = counts.erase(cit2);
1014  } else {
1015  cit2++;
1016  }
1017  }
1018  summary += NStr::NumericToString(this_num) + " sequence";
1019  if (this_num == 1) {
1020  summary += " has ";
1021  } else {
1022  summary += "s have ";
1023  }
1024  summary += NStr::NumericToString(*cit) + " feature";
1025  if (*cit != 1) {
1026  summary += "s";
1027  }
1028  summary += ".\n";
1029  cit++;
1030  }
1031  }
1032  } else {
1033  summary = "No features found.";
1034  }
1035  if (!all) {
1036  is_ok = false;
1037  }
1038  return summary;
1039 }
1040 
1041 vector<string> GetTrueFalseList()
1042 {
1043  vector<string> tf_strings;
1044  tf_strings.push_back("true");
1045  tf_strings.push_back("");
1046  return tf_strings;
1047 }
1048 
1049 bool IsSynonymForTrue(const string& val)
1050 {
1051  if (NStr::EqualNocase(val, "true")
1052  || NStr::EqualNocase(val, "yes")
1053  || NStr::EqualNocase(val, "T")) {
1054  return true;
1055  } else {
1056  return false;
1057  }
1058 }
1059 
1060 
1061 bool IsSynonymForFalse(const string& val)
1062 {
1063  if (NStr::EqualNocase(val, "false")
1064  || NStr::EqualNocase(val, "no")
1065  || NStr::EqualNocase(val, "F")
1066  || NStr::IsBlank(val)) {
1067  return true;
1068  } else {
1069  return false;
1070  }
1071 }
1072 
1073 
1074 bool IsTrueFalseList(const vector<string>& choices)
1075 {
1076  if (choices.size() != 2) {
1077  return false;
1078  }
1079  if (IsSynonymForTrue(choices[0]) && IsSynonymForFalse(choices[1])) {
1080  return true;
1081  } else if (IsSynonymForTrue(choices[1]) && IsSynonymForFalse(choices[0])) {
1082  return true;
1083  } else {
1084  return false;
1085  }
1086 }
1087 
1088 
1089 int AddFeatureToSeqTable (const objects::CSeq_feat& f, CRef<objects::CSeq_table> table)
1090 {
1091  int row = 0;
1092  if (table->GetColumns().front()->IsSetData()) {
1093  row = static_cast<int>(table->GetColumns().front()->GetData().GetId().size());
1094  }
1095 
1096  CRef<objects::CSeq_id> id(new objects::CSeq_id());
1097  id->Assign(*f.GetLocation().GetId());
1098  table->SetColumns()[0]->SetData().SetId().push_back(id);
1099  AddValueToTable(table, kStartColLabel, f.GetLocation().GetStart(objects::eExtreme_Biological) + 1, row);
1100  AddValueToTable(table, kStopColLabel, f.GetLocation().GetStop(objects::eExtreme_Biological) + 1, row);
1101  if (f.IsSetId() && f.GetId().IsLocal() && f.GetId().GetLocal().IsId()) {
1102  AddValueToTable(table, kFeatureID, NStr::NumericToString(f.GetId().GetLocal().GetId()), row);
1103  }
1104 
1105  if (f.GetLocation().IsPartialStart(objects::eExtreme_Biological)) {
1107  }
1108  if (f.GetLocation().IsPartialStop(objects::eExtreme_Biological)) {
1110  }
1111  if (f.IsSetComment()) {
1112  AddValueToTable(table, "Comment", f.GetComment(), row);
1113  }
1114  if (f.IsSetExcept() && f.IsSetExcept_text()) {
1115  AddValueToTable(table, "Exception", f.GetExcept_text(), row);
1116  }
1117  if (f.IsSetQual()) {
1118  ITERATE(objects::CSeq_feat::TQual, it, f.GetQual()) {
1119  if ((*it)->IsSetQual() && (*it)->IsSetVal()) {
1120  string qual = (*it)->GetQual();
1121  string val = (*it)->GetVal();
1122  // special case
1123  if (NStr::EqualNocase(qual, "satellite")) {
1124  for (int i = 0; i < kNumSatelliteTypes; i++) {
1126  qual = kSatelliteTypes[i] + " name";
1127  val = val.substr(kSatelliteTypes[i].length());
1128  if (NStr::StartsWith(val, ":")) {
1129  val = val.substr(1);
1130  }
1132  break;
1133  }
1134  }
1135  }
1136  AddValueToTable(table, qual, val, row);
1137  }
1138  }
1139  }
1140  table->SetNum_rows(row + 1);
1141  return row;
1142 }
1143 
1144 
1145 static bool s_OkToAddFeature (const objects::CSeq_feat& f1, const objects::CSeq_feat& f2, const TFeatureSeqTableColumnList& reqs)
1146 {
1147  bool ok = true;
1148 
1149  ITERATE(TFeatureSeqTableColumnList, it, reqs) {
1150  if (!(*it)->RelaxedMatch(f1, f2)) {
1151  ok = false;
1152  break;
1153  }
1154  }
1155  return ok;
1156 }
1157 
1158 
1159 CRef<objects::CSeq_table> GetFeaturesFromSeqEntry(objects::CSeq_entry_Handle entry, const objects::CSeq_feat& feat, const TFeatureSeqTableColumnList& reqs)
1160 {
1161  CRef<objects::CSeq_table> table(new objects::CSeq_table());
1162  CRef<objects::CSeqTable_column> id_col(new objects::CSeqTable_column());
1163  id_col->SetHeader().SetField_id(objects::CSeqTable_column_info::eField_id_location_id);
1164  id_col->SetHeader().SetTitle(kSequenceIdColLabel);
1165  table->SetColumns().push_back(id_col);
1169  objects::CFeat_CI fi(entry, objects::SAnnotSelector(feat.GetData().GetSubtype()));
1170  while (fi) {
1171  if (s_OkToAddFeature(feat, *(fi->GetSeq_feat()), reqs)) {
1172  AddFeatureToSeqTable (*(fi->GetSeq_feat()), table);
1173  }
1174  ++fi;
1175  }
1176 
1177  return table;
1178 }
1179 
1180 
1181 void
1184  objects::CSeq_entry_Handle entry,
1185  objects::CSeqFeatData::ESubtype subtype,
1186  unsigned int row,
1187  TFeatureSeqTableColumnList & vecColEditFactories,
1189 {
1190  CRef<objects::CSeq_feat> feat(new objects::CSeq_feat());
1191 
1196 
1197  if (featid_col && featid_col->IsSetData() && featid_col->GetData().GetSize() > row) {
1198  string feature_id = featid_col->GetData().GetString()[row];
1199  if (!NStr::IsBlank(feature_id)) {
1200  try {
1201  int id = NStr::StringToInt(feature_id);
1202  CRef<objects::CFeat_id> feat_id(new objects::CFeat_id());
1203 
1204  objects::CSeq_feat_Handle orig_feat = entry.GetTopLevelEntry().GetTSE_Handle().GetFeatureWithId(objects::CSeqFeatData::e_not_set,
1205  id);
1206  if (orig_feat) {
1207  feat->Assign(*(orig_feat.GetOriginalSeq_feat()));
1208  feat->ResetQual();
1209  } else {
1210  feat->SetId().SetLocal().SetId(id);
1211  }
1212  } catch (exception &) {
1213  }
1214  }
1215  }
1216 
1217  feat->SetLocation().SetInt().SetId().Assign(*(id_col->GetData().GetId()[row]));
1218  int start = start_col->GetData().GetInt()[row];
1219  int stop = stop_col->GetData().GetInt()[row];
1220  if (stop < start) {
1221  feat->SetLocation().SetInt().SetFrom(stop - 1);
1222  feat->SetLocation().SetInt().SetTo(start - 1);
1223  feat->SetLocation().SetInt().SetStrand(objects::eNa_strand_minus);
1224  } else {
1225  feat->SetLocation().SetInt().SetFrom(start - 1);
1226  feat->SetLocation().SetInt().SetTo(stop - 1);
1227  feat->SetLocation().SetInt().SetStrand(objects::eNa_strand_plus);
1228  }
1229 
1230  for (unsigned int i = kSkipColumns; i < table->GetColumns().size(); i++) {
1231  CConstRef<objects::CSeqTable_column> col = table->GetColumns()[i];
1232  if (vecColEditFactories[i - kSkipColumns] != NULL
1233  && col->GetData().GetSize() > row) {
1234  if (col->GetData().IsString()) {
1235  vecColEditFactories[i - kSkipColumns]->AddToFeature(*feat, col->GetData().GetString()[row], edit::eExistingText_replace_old);
1236  } else if (col->GetData().IsInt()) {
1237  vecColEditFactories[i - kSkipColumns]->AddToFeature(*feat, NStr::NumericToString(col->GetData().GetInt()[row]), edit::eExistingText_replace_old);
1238  } else {
1239  wxMessageBox(wxT("Unrecognized table column type"), wxT("Error"),
1240  wxOK | wxICON_ERROR, NULL);
1241  }
1242  }
1243  }
1244 
1245  objects::CSeqFeatData::E_Choice feature_type = objects::CSeqFeatData::GetTypeFromSubtype(subtype);
1246  switch (feature_type) {
1247  case objects::CSeqFeatData::e_Imp:
1248  {
1249  const objects::CFeatList& feats(*objects::CSeqFeatData::GetFeatList());
1250  string key = feats.GetStoragekey(subtype);
1251  feat->SetData().SetImp().SetKey(key);
1252  }
1253  break;
1254  case objects::CSeqFeatData::e_Cdregion:
1255  feat->SetData().SetCdregion();
1256  break;
1257  default:
1258  break;
1259  }
1260 
1261  bool change_existing = false;
1262  if (feat->IsSetId() && feat->GetId().IsLocal() && feat->GetId().GetLocal().IsId()) {
1263  objects::CSeq_feat_Handle orig_feat = entry.GetTopLevelEntry().GetTSE_Handle().GetFeatureWithId(objects::CSeqFeatData::e_not_set,
1264  feat->GetId().GetLocal().GetId());
1265  if (orig_feat) {
1266  cmd->AddCommand(*CRef<CCmdChangeSeq_feat>(new CCmdChangeSeq_feat(orig_feat, *feat)));
1267  change_existing = true;
1268  }
1269  }
1270  if (!change_existing) {
1271  objects::CBioseq_Handle bh = entry.GetScope().GetBioseqHandle(*(id_col->GetData().GetId()[row]));
1272  objects::CSeq_entry_Handle seh = bh.GetSeq_entry_Handle();
1273  cmd->AddCommand(*CRef<CCmdCreateFeat>(new CCmdCreateFeat(seh, *feat)));
1274  }
1275 }
1276 
1277 
1278 CRef<CCmdComposite> AddFeatureSeqTableToSeqEntry(CRef<objects::CSeq_table> table, objects::CSeq_entry_Handle entry, objects::CSeqFeatData::ESubtype subtype, const TFeatureSeqTableColumnList& reqs, const TFeatureSeqTableColumnList& opts)
1279 {
1280  CRef<CCmdComposite> cmd(new CCmdComposite("Add Features"));
1281 
1282  TFeatureSeqTableColumnList vecColEditFactories;
1283 
1284  // the first kSkipColumns columns are not associated with column handlers
1285  for (size_t i = kSkipColumns; i < table->GetColumns().size(); i++) {
1286  vecColEditFactories.push_back(
1287  GetColumnRuleForFeatureSeqTable(table->GetColumns()[i], reqs, opts));
1288  }
1289 
1290  for (size_t c = 0; c < table->GetColumns().front()->GetData().GetSize(); c++) {
1291  AddFeatureSeqTableRowToSeqEntry (table, entry, subtype, static_cast<int>(c), vecColEditFactories, cmd);
1292  }
1293  return cmd;
1294 }
1295 
1296 
1297 void MergeStringVectors (vector<string>& problems, vector<string> add)
1298 {
1299  if (add.empty()) {
1300  return;
1301  } else if (problems.empty()) {
1302  problems.assign(add.begin(), add.end());
1303  } else {
1304  size_t pos = 0;
1305  while (pos < problems.size() && pos < add.size()) {
1306  if (!NStr::IsBlank(add[pos])) {
1307  if (!NStr::IsBlank(problems[pos])) {
1308  problems[pos] += ", ";
1309  }
1310  problems[pos] += add[pos];
1311  }
1312  pos++;
1313  }
1314  while (pos < add.size()) {
1315  problems.push_back(add[pos]);
1316  pos++;
1317  }
1318  }
1319 }
1320 
1321 
1325  vector<string>& problems)
1326 {
1327  vector<string> add_values;
1328  for (size_t j = 0; j < val_col->GetData().GetSize(); j++) {
1329  add_values.push_back(val_col->GetData().GetString()[j]);
1330  }
1331  vector<string> add_problems = rule_col->IsValid(add_values);
1332  MergeStringVectors(problems, add_problems);
1333 }
1334 
1335 
1337 {
1338  auto num_table_columns = values_table->GetColumns().size();
1339  if (num_table_columns > 1) {
1340  CRef<objects::CSeqTable_column> last_col = values_table->GetColumns()[num_table_columns - 1];
1341  if (!last_col->IsSetHeader()
1342  || !last_col->GetHeader().IsSetTitle()
1343  || NStr::Equal(last_col->GetHeader().GetTitle(), kProblems)) {
1344  values_table->SetColumns().pop_back();
1345  }
1346  }
1347 }
1348 
1349 
1351 {
1352  ITERATE (TFeatureSeqTableColumnList, col_it, reqs) {
1353  if (MatchColumnName((*col_it)->GetLabel(), col->GetHeader().GetTitle())) {
1354  return *col_it;
1355  }
1356  }
1357  ITERATE (TFeatureSeqTableColumnList, col_it, opts) {
1358  if (MatchColumnName((*col_it)->GetLabel(), col->GetHeader().GetTitle())) {
1359  return *col_it;
1360  }
1361  }
1362  CRef<CFeatureSeqTableColumnBase> rule_col = CFeatureSeqTableColumnBaseFactory::Create(col->GetHeader().GetTitle());
1363  return rule_col;
1364 }
1365 
1366 
1368 {
1369  vector<string> problems;
1370 
1371  // the first kSkipColumns columns are not associated with column handlers
1372  for (size_t i = kSkipColumns; i < table->GetColumns().size(); i++) {
1373  if (table->GetColumns()[i]->IsSetData()
1374  && (table->GetColumns()[i]->GetData().IsString())) {
1375  CRef<CFeatureSeqTableColumnBase> col = GetColumnRuleForFeatureSeqTable(table->GetColumns()[i], reqs, opts);
1376  if (col) {
1377  AddProblemsToColumn(table->GetColumns()[i], col, problems);
1378  }
1379  }
1380  }
1381 
1383  CRef<objects::CSeqTable_column> problems_col(new objects::CSeqTable_column());
1384  problems_col->SetHeader().SetTitle(kProblems);
1385  problems_col->SetData().SetString();
1386  table->SetColumns().push_back(problems_col);
1387  ITERATE(vector<string>, it, problems) {
1388  problems_col->SetData().SetString().push_back(*it);
1389  }
1390 }
1391 
1392 
1393 bool HasLocalIntFeatureId (const objects::CSeq_feat& feat)
1394 {
1395  if (feat.IsSetId() && feat.GetId().IsLocal() && feat.GetId().GetLocal().IsId()) {
1396  return true;
1397  } else {
1398  return false;
1399  }
1400 }
1401 
1402 
1403 CRef<CCmdComposite> RemoveMatchingFeaturesWithoutLocalIntFeatureIdsFromSeqEntry(objects::CSeq_entry_Handle entry, const objects::CSeq_feat &feat, const TFeatureSeqTableColumnList& reqs)
1404 {
1405  CRef<CCmdComposite> cmd (new CCmdComposite("Remove Features"));
1406 
1407  objects::CFeat_CI fi(entry, objects::SAnnotSelector(feat.GetData().GetSubtype()));
1408  while (fi) {
1409  const objects::CSeq_feat& this_feat= *(fi->GetSeq_feat());
1410  if (!HasLocalIntFeatureId(this_feat) && s_OkToAddFeature(this_feat, feat, reqs)) {
1411  cmd->AddCommand(*CRef<CCmdDelSeq_feat>(new CCmdDelSeq_feat(fi->GetSeq_feat_Handle())));
1412  }
1413  ++fi;
1414  }
1415  return cmd;
1416 }
1417 
1418 
1420 {
1421  vector<wxString> exts;
1422  exts.push_back(wxT("asn"));
1423  exts.push_back(wxT("sqn"));
1424  wxString filter;
1425  vector<wxString>::const_iterator it;
1426  for (it = exts.begin(); it != exts.end(); it++) {
1427  if (it != exts.begin())
1428  filter += wxT(";");
1429 
1430 #ifdef NCBI_OS_MSWIN
1431  filter += wxT("*.") + *it;
1432 #else
1433  if (*it == "*")
1434  filter += *it;
1435  else
1436  filter += wxT("*.") + *it;
1437 #endif
1438  }
1439  return filter;
1440 }
1441 
1442 
1443 bool IsWizardObject(const objects::CUser_object& user)
1444 {
1445  if (!user.IsSetType() || !user.GetType().IsStr() || !NStr::EqualNocase(user.GetType().GetStr(), kWizardLabel)) {
1446  return false;
1447  } else {
1448  return true;
1449  }
1450 }
1451 
1452 
1454 {
1455  CRef<objects::CUser_object> user(new objects::CUser_object());
1456  user->SetType().SetStr(kWizardLabel);
1457  return user;
1458 }
1459 
1460 
1461 void SetWizardFieldInSeqEntryNoUndo(objects::CSeq_entry_Handle entry, string field_name, string value)
1462 {
1463  bool found = false;
1464 
1465  objects::CSeq_entry_EditHandle eh = entry.GetEditHandle();
1466 
1467  for (objects::CSeqdesc_CI desc_ci( entry, objects::CSeqdesc::e_User);
1468  desc_ci;
1469  ++desc_ci) {
1470  const objects::CUser_object& u = desc_ci->GetUser();
1471  if (IsWizardObject (u)) {
1472  CRef<objects::CSeqdesc> new_desc( new objects::CSeqdesc );
1473  CRef<objects::CUser_object> new_user (new objects::CUser_object());
1474  new_user->Assign (u);
1475  new_desc->SetUser(*new_user);
1476  objects::CUser_field& new_field = new_user->SetField(field_name);
1477  new_field.SetData().SetStr(value);
1478  eh.RemoveSeqdesc(*desc_ci);
1479  eh.AddSeqdesc(*new_desc);
1480  found = true;
1481  break;
1482  }
1483  }
1484  if (!found) {
1485  CRef<objects::CSeqdesc> new_desc( new objects::CSeqdesc );
1487  new_desc->SetUser(*new_user);
1488  objects::CUser_field& new_field = new_user->SetField(field_name);
1489  new_field.SetData().SetStr(value);
1490  entry.GetEditHandle().SetDescr().Set().push_back(new_desc);
1491  }
1492 }
1493 
1494 
1495 const string kBankitSubmissionLabel = "Submission";
1496 const string kBankItAltEmailPrefix = "ALT EMAIL:";
1497 
1498 bool IsBankItSubmissionObject(const objects::CUser_object& user)
1499 {
1500  if (!user.IsSetType() || !user.GetType().IsStr() || !NStr::EqualNocase(user.GetType().GetStr(), kBankitSubmissionLabel)) {
1501  return false;
1502  } else {
1503  return true;
1504  }
1505 }
1506 
1507 
1509 {
1510  CRef<objects::CUser_object> user(new objects::CUser_object());
1511  user->SetType().SetStr(kBankitSubmissionLabel);
1512  return user;
1513 }
1514 
1515 
1516 string GetFieldFromWizardObject (const objects::CUser_object& user, string field_name)
1517 {
1518  string value = "";
1519  ITERATE(objects::CUser_object::TData, fit, user.GetData()) {
1520  if ((*fit)->IsSetLabel()
1521  && (*fit)->GetLabel().IsStr()
1522  && NStr::EqualNocase((*fit)->GetLabel().GetStr(), field_name)
1523  && (*fit)->IsSetData()
1524  && (*fit)->GetData().IsStr()) {
1525  value = (*fit)->GetData().GetStr();
1526  break;
1527  }
1528  }
1529  return value;
1530 }
1531 
1532 
1533 const string kSMARTComment = "SmartComment";
1534 
1535 string GetDescAlternateEmailAddress(const objects::CSeqdesc& desc)
1536 {
1537  string alt_email = "";
1538  if (desc.IsUser() && IsBankItSubmissionObject(desc.GetUser())) {
1539  string str = GetFieldFromWizardObject(desc.GetUser(), kSMARTComment);
1540  size_t pos = NStr::Find(str, kBankItAltEmailPrefix);
1541  if (pos != string::npos) {
1542  alt_email = str.substr(pos + kBankItAltEmailPrefix.length());
1543  }
1544  }
1545  return alt_email;
1546 }
1547 
1548 
1549 string GetAlternateEmailAddress(objects::CSeq_entry_Handle entry)
1550 {
1551  string alt_email = "";
1552  objects::CSeqdesc_CI it (entry, objects::CSeqdesc::e_User);
1553 
1554  while (it && NStr::IsBlank(alt_email)) {
1555  alt_email = GetDescAlternateEmailAddress(*it);
1556  ++it;
1557  }
1558  return alt_email;
1559 }
1560 
1561 
1562 void SetUserAlternateEmailAddress(objects::CUser_object& u, string alt_email)
1563 {
1565  alt_email = kBankItAltEmailPrefix + alt_email;
1566  }
1567  objects::CUser_field& new_field = u.SetField(kSMARTComment);
1568  new_field.SetData().SetStr(alt_email);
1569 }
1570 
1571 
1572 void SetAlternateEmailAddress(objects::CSeq_entry_Handle entry, string alt_email)
1573 {
1574  bool found = false;
1575  objects::CSeq_entry_EditHandle eh = entry.GetEditHandle();
1576  for (objects::CSeqdesc_CI desc_ci( entry, objects::CSeqdesc::e_User);
1577  desc_ci;
1578  ++desc_ci) {
1579  const objects::CUser_object& u = desc_ci->GetUser();
1580  if (IsBankItSubmissionObject (u)) {
1581  CRef<objects::CSeqdesc> new_desc( new objects::CSeqdesc );
1582  CRef<objects::CUser_object> new_user (new objects::CUser_object());
1583  new_user->Assign (u);
1584  new_desc->SetUser(*new_user);
1585  SetUserAlternateEmailAddress(*new_user, alt_email);
1586  eh.RemoveSeqdesc(*desc_ci);
1587  eh.AddSeqdesc(*new_desc);
1588  found = true;
1589  break;
1590  }
1591  }
1592  if (!found) {
1593  CRef<objects::CSeqdesc> new_desc( new objects::CSeqdesc );
1595  new_desc->SetUser(*new_user);
1596  SetUserAlternateEmailAddress(*new_user, alt_email);
1597  entry.GetEditHandle().SetDescr().Set().push_back(new_desc);
1598  }
1599 }
1600 
1601 
1602 bool IsValidEmail(string email)
1603 {
1604  if (NStr::IsBlank(email)) {
1605  return false;
1606  }
1607  size_t pos = NStr::Find(email, "@");
1608  if (pos == string::npos) {
1609  return false;
1610  }
1611  string tmp = email.substr(pos);
1612  pos = NStr::Find(tmp, ".");
1613  if (pos == string::npos) {
1614  return false;
1615  } else {
1616  return true;
1617  }
1618 }
1619 
1620 
1622 (const objects::CSeq_entry& entry,
1627  int& pos,
1628  size_t& row)
1629 {
1630  if (entry.IsSeq()) {
1631  string np = "nuc";
1632  if (entry.GetSeq().IsAa()) {
1633  np = "prot";
1634  }
1635  string title = "";
1636  if (entry.GetSeq().IsSetDescr()) {
1637  ITERATE (objects::CBioseq::TDescr::Tdata, desc_it, entry.GetSeq().GetDescr().Get()) {
1638  if ((*desc_it)->IsTitle()) {
1639  title = (*desc_it)->GetTitle();
1640  break;
1641  }
1642  }
1643  }
1644  ITERATE(objects::CBioseq::TId, id_it, entry.GetSeq().GetId()) {
1645  CRef<objects::CSeq_id> id(new objects::CSeq_id());
1646  id->Assign(**id_it);
1647  id_col->SetData().SetId().push_back(id);
1648  pos_col->SetData().SetInt().push_back(pos);
1649  np_col->SetData().SetString().push_back(np);
1650  title_col->SetData().SetString().push_back(title);
1651  row++;
1652  }
1653  pos++;
1654  } else if (entry.IsSet() && entry.GetSet().IsSetSeq_set()) {
1655  ITERATE(objects::CBioseq_set::TSeq_set, seq_it, entry.GetSet().GetSeq_set()) {
1656  AddIdsFromSeqEntryToTable(**seq_it, id_col, pos_col, np_col, title_col, pos, row);
1657  }
1658  }
1659 }
1660 
1661 
1663 {
1664  CRef<objects::CSeqTable_column> id_col(new objects::CSeqTable_column());
1665  id_col->SetHeader().SetTitle(kSequenceIdColLabel);
1666  id_col->SetData().SetId();
1667  return id_col;
1668 }
1669 
1670 
1672 {
1673  CRef<objects::CSeqTable_column> pos_col(new objects::CSeqTable_column());
1674  pos_col->SetHeader().SetTitle("Pos");
1675  pos_col->SetData().SetInt();
1676  return pos_col;
1677 }
1678 
1679 
1681 {
1682  CRef<objects::CSeqTable_column> np_col(new objects::CSeqTable_column());
1683  np_col->SetHeader().SetTitle("NucOrProt");
1684  np_col->SetData().SetString();
1685  return np_col;
1686 }
1687 
1688 
1690 {
1691  CRef<objects::CSeqTable_column> title_col(new objects::CSeqTable_column());
1692  title_col->SetHeader().SetTitle("Title");
1693  title_col->SetData().SetString();
1694  return title_col;
1695 }
1696 
1697 
1699 (const objects::CSeq_entry& entry, CRef<objects::CSeq_table> table)
1700 {
1702  if (!id_col) {
1703  id_col = s_MakeIdTableIdCol();
1704  table->SetColumns().push_back(id_col);
1705  }
1707  if (!pos_col) {
1708  pos_col = s_MakePosCol();
1709  table->SetColumns().push_back(pos_col);
1710  }
1712  if (!np_col) {
1713  np_col = s_MakeNpCol();
1714  table->SetColumns().push_back(np_col);
1715  }
1717  if (!title_col) {
1718  title_col = s_MakeTitleCol();
1719  table->SetColumns().push_back(title_col);
1720  }
1721  size_t row = table->GetNum_rows();
1722  int pos = 0;
1723  if (row > 0) {
1724  pos = pos_col->GetData().GetInt()[row - 1] + 1;
1725  }
1726  AddIdsFromSeqEntryToTable (entry, id_col, pos_col, np_col, title_col, pos, row);
1727  table->SetNum_rows(static_cast<CSeq_table::TNum_rows>(row));
1728 }
1729 
1730 
1731 CRef<objects::CSeq_table> GetIdsFromSeqEntry(const objects::CSeq_entry& entry)
1732 {
1733  CRef<objects::CSeq_table> table(new objects::CSeq_table());
1735  table->SetColumns().push_back(id_col);
1737  table->SetColumns().push_back(pos_col);
1739  table->SetColumns().push_back(np_col);
1741  table->SetColumns().push_back(title_col);
1742 
1743  int pos = 1;
1744  size_t row = 0;
1745  AddIdsFromSeqEntryToTable(entry, id_col, pos_col, np_col, title_col, pos, row);
1746  table->SetNum_rows(static_cast<CSeq_table::TNum_rows>(row));
1747  return table;
1748 }
1749 
1750 
1751 const string kIdTooLong = "ID is too long";
1752 const string kIdDuplicate = "duplicate";
1753 const string kIdMissing = "Missing ID";
1754 
1755 
1757 {
1758  string str = "";
1759 
1760  if (id_col->GetData().IsString()) {
1761  str = id_col->GetData().GetString()[pos];
1762  } else {
1763  id_col->GetData().GetId()[pos]->GetLabel(&str, objects::CSeq_id::eContent);
1764  }
1765  return str;
1766 }
1767 
1768 
1770 {
1771  CRef<objects::CSeqTable_column> problems(new objects::CSeqTable_column());
1772  problems->SetHeader().SetTitle("Problems");
1773  problems->SetData().SetString();
1774  vector<string> id_strings;
1775  bool any_problems = false;
1776  bool any_nuc = false;
1777 
1780  if (!id_col) {
1781  id_col = orig_id_col;
1782  }
1783  CRef<objects::CSeqTable_column> np_col = FindSeqTableColumnByName(new_ids, "NucOrProt");
1784 
1785  // check length on new_ids only
1786  for (int i = 0; i < new_ids->GetNum_rows(); i++) {
1787  string str = GetIdValueFromColumn(id_col, i);
1788  id_strings.push_back(str);
1789  if (NStr::EqualNocase(np_col->GetData().GetString()[i], "nuc")) {
1790  any_nuc = true;
1791  }
1792  if (NStr::IsBlank(str)) {
1793  problems->SetData().SetString().push_back(kIdMissing);
1794  any_problems = true;
1795  } else if (str.length() > max_len) {
1796  problems->SetData().SetString().push_back(kIdTooLong);
1797  any_problems = true;
1798  } else {
1799  problems->SetData().SetString().push_back("");
1800  }
1801  }
1802 
1803  // only care about duplicates if there are nucs present
1804  if (any_nuc) {
1805  if (old_ids) {
1806  id_col = FindSeqTableColumnByName(old_ids, kSequenceIdColLabel);
1807  for (unsigned int i = 0; i < id_col->GetData().GetSize(); i++) {
1808  string str;
1809  id_col->GetData().GetId()[i]->GetLabel(&str, objects::CSeq_id::eContent);
1810  id_strings.push_back(str);
1811  }
1812  }
1813  vector<string> dup_ids = FindNonUniqueStrings(id_strings);
1814  if (dup_ids.size() > 0) {
1815  for (int i = 0; i < new_ids->GetNum_rows(); i++) {
1816  bool found = false;
1817  ITERATE (vector<string>, s_it, dup_ids) {
1818  if (NStr::EqualNocase(id_strings[i], *s_it)) {
1819  found = true;
1820  break;
1821  }
1822  }
1823  if (found) {
1824  string val = problems->GetData().GetString()[i];
1825  if (!NStr::IsBlank(val)) {
1826  val += ", ";
1827  }
1828  val += kIdDuplicate;
1829  problems->SetData().SetString()[i] = val;
1830  any_problems = true;
1831  }
1832  }
1833  }
1834  }
1835  if (!any_problems) {
1836  problems.Reset(NULL);
1837  }
1838 
1839  return problems;
1840 }
1841 
1842 
1844 {
1845  int num_duplicates = 0;
1846  int num_too_long = 0;
1847 
1848  if (problems) {
1849  ITERATE(objects::CSeqTable_column::TData::TString, s_it, problems->GetData().GetString()) {
1850  if (NStr::FindNoCase(*s_it, kIdDuplicate) != string::npos) {
1851  num_duplicates++;
1852  }
1853  if (NStr::FindNoCase(*s_it, kIdTooLong) != string::npos) {
1854  num_too_long++;
1855  }
1856  }
1857  }
1858  string rval = "";
1859  if (num_duplicates > 0) {
1860  rval = NStr::NumericToString(num_duplicates) + " duplicate IDs";
1861  if (num_too_long > 0) {
1862  rval += ", ";
1863  }
1864  }
1865  if (num_too_long > 0) {
1866  rval += NStr::NumericToString(num_too_long) + " IDs are too long";
1867  }
1868 
1869  return rval;
1870 }
1871 
1872 
1874 {
1875  bool any_changes = false;
1877  CRef<objects::CSeqTable_column> np_col = FindSeqTableColumnByName(new_ids, "NucOrProt");
1879 
1880  CRef<objects::CSeqTable_column> new_str(new objects::CSeqTable_column());
1881  new_str->SetHeader().SetTitle("new_id");
1882  new_str->SetData().SetString();
1883 
1884  size_t num_seq = pos_col->GetData().GetInt().back();
1885  CRef<objects::CSeqTable_column> old_pos_col = FindSeqTableColumnByName(old_ids, "Pos");
1886  if (old_pos_col && old_pos_col->GetData().GetInt().size() > 0) {
1887  num_seq += old_pos_col->GetData().GetInt().back();
1888  }
1889 
1890  int reserve_char = ceil(log ((double)num_seq)) + 1;
1891 
1892  vector<string> id_strings;
1893 
1894  // check length on new_ids only
1895  for (int i = 0; i < new_ids->GetNum_rows(); i++) {
1896  string str;
1897  id_col->GetData().GetId()[i]->GetLabel(&str, objects::CSeq_id::eContent);
1898  if (str.length() > max_len) {
1899  str = str.substr(0, max_len - reserve_char);
1900  new_str->SetData().SetString().push_back(str);
1901  any_changes = true;
1902  } else {
1903  new_str->SetData().SetString().push_back("");
1904  }
1905  id_strings.push_back(str);
1906  }
1907 
1908  if (old_ids) {
1909  id_col = FindSeqTableColumnByName(old_ids, kSequenceIdColLabel);
1910  for (unsigned int i = 0; i < id_col->GetData().GetSize(); i++) {
1911  string str;
1912  id_col->GetData().GetId()[i]->GetLabel(&str, objects::CSeq_id::eContent);
1913  id_strings.push_back(str);
1914  }
1915  }
1916  vector<string> dup_ids = FindNonUniqueStrings(id_strings);
1917  if (dup_ids.size() > 0) {
1918  vector<int> high_offset;
1919  ITERATE (vector<string>, s_it, dup_ids) {
1920  high_offset.push_back(0);
1921  }
1922  ITERATE(vector<string>, i_it, id_strings) {
1923  int dup_pos = 0;
1924  ITERATE (vector<string>, s_it, dup_ids) {
1925  if (NStr::StartsWith(*i_it, *s_it + "_")) {
1926  try {
1927  int offset = NStr::StringToInt((*i_it).substr((*s_it).length() + 1));
1928  if (offset > high_offset[dup_pos]) {
1929  high_offset[dup_pos] = offset;
1930  }
1931  } catch (exception &) {
1932  // not an integer suffix, ignore
1933  }
1934  break;
1935  }
1936  dup_pos++;
1937  }
1938  }
1939  for (int i = 0; i < new_ids->GetNum_rows(); i++) {
1940  int dup_pos = 0;
1941  ITERATE (vector<string>, s_it, dup_ids) {
1942  if (NStr::EqualNocase(id_strings[i], *s_it)) {
1943  high_offset[dup_pos]++;
1944  new_str->SetData().SetString()[i] = id_strings[i] + "_" + NStr::NumericToString(high_offset[dup_pos]);
1945  any_changes = true;
1946  break;
1947  }
1948  dup_pos++;
1949  }
1950  }
1951  }
1952  if (!any_changes) {
1953  new_str.Reset(NULL);
1954  }
1955  return new_str;
1956 }
1957 
1958 
1959 static void TransformSeqAnnot( objects::CSeq_annot& annot,
1960  CRef<objects::CSeq_id> seq_id,
1961  CRef<objects::CSeq_id> new_seq_id )
1962 {
1963  for (CTypeIterator<objects::CSeq_id> seq_id_it(Begin(annot));
1964  seq_id_it; ++seq_id_it)
1965  {
1966  if( seq_id_it->Equals(*seq_id) ) {
1967  seq_id_it->Assign(*new_seq_id);
1968  }
1969  }
1970 }
1971 
1972 
1973 static void TransformSeqAnnots( objects::CBioseq & bioseq,
1974  CRef<objects::CSeq_id> seq_id,
1975  CRef<objects::CSeq_id> new_seq_id )
1976 {
1977  // fix annots directly on the sequence
1978  NON_CONST_ITERATE(objects::CBioseq::TAnnot, annot_it, bioseq.SetAnnot()) {
1979  TransformSeqAnnot(**annot_it, seq_id, new_seq_id);
1980  }
1981 
1982  // then, climb up, changing it within higher-up bioseq-set annots
1983  objects::CSeq_entry *pEntry = bioseq.GetParentEntry();
1984  for( ; pEntry; pEntry = pEntry->GetParentEntry() ) {
1985  if( ! pEntry->IsSet() ) {
1986  continue;
1987  }
1988  objects::CBioseq_set & bioseq_set = pEntry->SetSet();
1989  if( ! bioseq_set.IsSetAnnot() ) {
1990  continue;
1991  }
1992  NON_CONST_ITERATE(objects::CBioseq_set::TAnnot, annot_it, bioseq_set.SetAnnot() ) {
1993  TransformSeqAnnot(**annot_it, seq_id, new_seq_id);
1994  }
1995  }
1996 }
1997 
1998 
1999 void ApplyReplacementIds(objects::CSeq_entry& entry, CRef<objects::CSeq_table> table)
2000 {
2003 
2004  if (entry.IsSeq()) {
2005  NON_CONST_ITERATE (objects::CBioseq::TId, id_it, entry.SetSeq().SetId()) {
2006  objects::CSeqTable_column::TData::TId::iterator orig_it = id_col->SetData().SetId().begin();
2007  objects::CSeqTable_column::TData::TString::iterator new_it = new_id_col->SetData().SetString().begin();
2008  while(orig_it != id_col->SetData().SetId().end() && new_it != new_id_col->SetData().SetString().end()) {
2009  if ((*id_it)->Compare(**orig_it) == objects::CSeq_id::e_YES) {
2010  CRef<objects::CSeq_id> new_id(new objects::CSeq_id());
2011  new_id->SetLocal().SetStr(*new_it);
2012  (*id_it)->Assign(*new_id);
2013  TransformSeqAnnots (entry.SetSeq(), *orig_it, new_id);
2014  // remove entry from list, so that we only make this replacement once
2015  orig_it = id_col->SetData().SetId().erase(orig_it);
2016  new_it = new_id_col->SetData().SetString().erase(new_it);
2017  break;
2018  } else {
2019  orig_it++;
2020  new_it++;
2021  }
2022  }
2023  }
2024  } else if (entry.IsSet()) {
2025  NON_CONST_ITERATE(objects::CBioseq_set::TSeq_set, it, entry.SetSet().SetSeq_set()) {
2026  ApplyReplacementIds(**it, table);
2027  }
2028  }
2029 
2030 }
2031 
2032 
2033 bool AlreadyHasFeature(objects::CBioseq_Handle bh, string key, string comment)
2034 {
2035  bool already_got_one = false;
2036  objects::CFeat_CI f(bh, objects::SAnnotSelector(objects::CSeqFeatData::e_Imp));
2037  while (f) {
2038  if (f->GetData().GetImp().IsSetKey()
2039  && NStr::EqualNocase(key, f->GetData().GetImp().GetKey())
2040  && ((NStr::IsBlank(comment) && !f->IsSetComment())
2041  || (f->IsSetComment() && NStr::Equal(f->GetComment(), comment)))) {
2042  already_got_one = true;
2043  break;
2044  }
2045  ++f;
2046  }
2047  return already_got_one;
2048 }
2049 
2050 
2051 bool AlreadyHasFeature(objects::CBioseq_Handle bh, objects::CSeqFeatData::ESubtype subtype)
2052 {
2053  bool already_got_one = false;
2054  objects::CFeat_CI f(bh, objects::SAnnotSelector(subtype));
2055  if (f) {
2056  already_got_one = true;
2057  }
2058  return already_got_one;
2059 }
2060 
2061 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CRef< objects::CSeq_id > GetNewProtId(objects::CBioseq_Handle bsh, int &offset, string &id_label, bool general_only)
void Field(const string &value)
static CRef< CFeatureSeqTableColumnBase > Create(const string &sTitle, objects::CSeqFeatData::ESubtype subtype=objects::CSeqFeatData::eSubtype_any)
virtual vector< string > IsValid(const vector< string > &values)
static wxString GetDialogFilter(EFileType fileType)
Output filtering parameters.
Definition: prosplign.hpp:156
@ ePassThrough
all zeroes - no filtering
Definition: prosplign.hpp:162
spliced protein to genomic alignment
Definition: prosplign.hpp:299
CRef< objects::CSeq_align > FindAlignment(objects::CScope &scope, const objects::CSeq_id &protein, const objects::CSeq_loc &genomic, CProSplignOutputOptions output_options=CProSplignOutputOptions())
Aligns protein to a region on genomic sequence.
Definition: prosplign.hpp:326
Template class for iteration on objects of class C.
Definition: iterator.hpp:673
void SetMolinfoForProtein(CRef< objects::CSeq_entry > protein, bool partial5, bool partial3)
CRef< objects::CSeq_feat > AddProteinFeatureToProtein(CRef< objects::CSeq_entry > protein, bool partial5, bool partial3)
std::ofstream out("events_result.xml")
main entry point for tests
const string kProblems
vector< CRef< CFeatureSeqTableColumnBase > > TFeatureSeqTableColumnList
const string kFeatureID
static const int kNumSatelliteTypes
const string kPartialStart
static const string kSatelliteTypes[]
const string kPartialStop
const string kStopColLabel
const string kStartColLabel
const char * kNewId
const char * kSequenceIdColLabel
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
static const char * str(char *buf, int n)
Definition: stats.c:84
static char tmp[3200]
Definition: utf8.c:42
int offset
Definition: replacements.h:160
static FILE * f
Definition: readconf.c:23
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define NULL
Definition: ncbistd.hpp:225
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
Definition: iterator.hpp:1004
@ eContent
Definition: feature.hpp:87
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
Definition: ncbistre.hpp:500
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2984
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
Definition: ncbistr.cpp:3192
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2882
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5347
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5378
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3396
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
static const char label[]
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
@ e_not_set
int i
int len
#define wxT(x)
Definition: muParser.cpp:41
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
mdb_mode_t mode
Definition: lmdb++.h:38
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
#define count
static const int kSkipColumns
bool MatchColumnName(string name1, string name2)
CRef< CSeqTable_column > FindSeqTableColumnByName(CRef< objects::CSeq_table > values_table, string column_name)
CRef< CSeqTable_column > AddIntColumnToTable(CRef< CSeq_table > table, string label)
CRef< CSeqTable_column > AddStringColumnToTable(CRef< CSeq_table > table, string label)
void AddValueToTable(CRef< CSeq_table > table, string subtype_name, string value, size_t row, edit::EExistingText existing_text=edit::eExistingText_replace_old)
bool RelaxedMatch(CRef< objects::CSeq_id > id1, CRef< objects::CSeq_id > id2)
#define row(bind, expected)
Definition: string_bind.c:73
@ eExistingText_replace_old
Definition: smp.h:26
int CombineTables(CRef< objects::CSeq_table > dst, const objects::CSeq_table &src)
bool AreAnyColumnValuesMissing(CRef< objects::CSeqTable_column > col)
int CountColumnRowConflicts(CRef< objects::CSeqTable_column > dst_id, CRef< objects::CSeqTable_column > dst_col, CRef< objects::CSeqTable_column > src_id, CRef< objects::CSeqTable_column > src_col)
const string kIdTooLong
int AddFeatureToSeqTable(const objects::CSeq_feat &f, CRef< objects::CSeq_table > table)
static CRef< objects::CSeqTable_column > s_MakeTitleCol()
static CRef< objects::CSeqTable_column > s_MakePosCol()
bool AddSeqEntryToSeqEntry(const objects::CSeq_entry *entry, objects::CSeq_entry_Handle seh, CRef< CCmdComposite > cmd, bool &has_nuc, bool &has_prot, bool create_general_only)
bool SaveTableFile(wxWindow *parent, wxString &save_file_dir, wxString &save_file_name, CRef< objects::CSeq_table > values_table)
CSourceRequirements::EWizardType GetWizardTypeFromName(string wizard_name)
int CountTableColumnConflicts(CRef< objects::CSeq_table > dst, const objects::CSeq_table &src)
CRef< objects::CUser_object > MakeWizardObject()
CRef< objects::CUser_object > MakeBankItSubmissionObject()
bool IsValidEmail(string email)
CRef< objects::CSeq_table > GetIdsFromSeqEntry(const objects::CSeq_entry &entry)
const string kSMARTComment
bool IsWizardObject(const objects::CUser_object &user)
static TWizardName s_WizardNames[]
bool IsBankItSubmissionObject(const objects::CUser_object &user)
CRef< CCmdComposite > AddFeatureSeqTableToSeqEntry(CRef< objects::CSeq_table > table, objects::CSeq_entry_Handle entry, objects::CSeqFeatData::ESubtype subtype, const TFeatureSeqTableColumnList &reqs, const TFeatureSeqTableColumnList &opts)
CRef< CCmdComposite > RemoveMatchingFeaturesWithoutLocalIntFeatureIdsFromSeqEntry(objects::CSeq_entry_Handle entry, const objects::CSeq_feat &feat, const TFeatureSeqTableColumnList &reqs)
static void AddProblemsToColumn(CRef< objects::CSeqTable_column > val_col, CRef< CFeatureSeqTableColumnBase > rule_col, vector< string > &problems)
void ParseTitlesToNewSeqEntries(CRef< objects::CSeq_entry > entry)
void AddProblemsColumnToFeatureSeqTable(CRef< objects::CSeq_table > table, const TFeatureSeqTableColumnList &reqs, const TFeatureSeqTableColumnList &opts)
bool IsSynonymForFalse(const string &val)
static CRef< objects::CSeqTable_column > s_MakeIdTableIdCol()
bool AreAnyColumnValuesPresent(CRef< objects::CSeqTable_column > col, string default_val)
static CRef< objects::CSeqTable_column > s_MakeNpCol()
void AddFeatureSeqTableRowToSeqEntry(CRef< objects::CSeq_table > table, objects::CSeq_entry_Handle entry, objects::CSeqFeatData::ESubtype subtype, unsigned int row, TFeatureSeqTableColumnList &vecColEditFactories, CRef< CCmdComposite > cmd)
void SetWizardFieldInSeqEntryNoUndo(objects::CSeq_entry_Handle entry, string field_name, string value)
string GetDescAlternateEmailAddress(const objects::CSeqdesc &desc)
wxString GetAsnSqnExtensions()
static void TransformSeqAnnot(objects::CSeq_annot &annot, CRef< objects::CSeq_id > seq_id, CRef< objects::CSeq_id > new_seq_id)
int FindRowForSeqId(CRef< objects::CSeqTable_column > id_col, CRef< objects::CSeq_id > id)
static void AddIdsFromSeqEntryToTable(const objects::CSeq_entry &entry, CRef< objects::CSeqTable_column > id_col, CRef< objects::CSeqTable_column > pos_col, CRef< objects::CSeqTable_column > np_col, CRef< objects::CSeqTable_column > title_col, int &pos, size_t &row)
CRef< CFeatureSeqTableColumnBase > GetColumnRuleForFeatureSeqTable(CRef< objects::CSeqTable_column > col, const TFeatureSeqTableColumnList &reqs, const TFeatureSeqTableColumnList &opts)
const string kBankItAltEmailPrefix
CRef< objects::CSeqTable_column > FindSeqIDColumn(const objects::CSeq_table &table)
bool IsSynonymForTrue(const string &val)
static void s_ReportMixError()
string SummarizeIdProblems(CRef< objects::CSeqTable_column > problems)
void DeleteTableRow(CRef< objects::CSeq_table > table, int row)
static void TransformSeqAnnots(objects::CBioseq &bioseq, CRef< objects::CSeq_id > seq_id, CRef< objects::CSeq_id > new_seq_id)
string GetAlternateEmailAddress(objects::CSeq_entry_Handle entry)
static int k_NumWizardNames
static bool OneRowOk(CRef< objects::CSeq_id > id, CRef< objects::CSeqTable_column > id_col, objects::CSeq_entry_Handle seh)
bool AlreadyHasFeature(objects::CBioseq_Handle bh, string key, string comment)
void RemoveProblemsColumn(CRef< objects::CSeq_table > values_table)
void SetAlternateEmailAddress(objects::CSeq_entry_Handle entry, string alt_email)
bool DoesColumnHaveValue(CRef< objects::CSeqTable_column > col, string val)
static void s_ExtendIntervalToEnd(objects::CSeq_interval &ival, objects::CBioseq_Handle bsh)
string GetFieldFromWizardObject(const objects::CUser_object &user, string field_name)
bool AreAllColumnValuesTheSame(CRef< objects::CSeqTable_column > col, string default_val)
void ApplyReplacementIds(objects::CSeq_entry &entry, CRef< objects::CSeq_table > table)
const string kIdDuplicate
vector< string > GetTrueFalseList()
const string kIdMissing
string CheckFeatureAnnotation(objects::CSeq_entry_Handle entry, bool &is_ok)
bool AddProteinToSeqEntry(const objects::CSeq_entry *protein, objects::CSeq_entry_Handle seh, CRef< CCmdComposite > cmd, bool create_general_only)
void FixTableAfterImport(CRef< objects::CSeq_table > input_table)
const string kBankitSubmissionLabel
string FindBadRows(CRef< objects::CSeq_table > src, CRef< objects::CSeq_table > dst, objects::CSeq_entry_Handle seh)
static void CombineColumns(CRef< objects::CSeqTable_column > dst_id, CRef< objects::CSeqTable_column > dst_col, CRef< objects::CSeqTable_column > src_id, CRef< objects::CSeqTable_column > src_col)
CRef< objects::CSeq_table > GetFeaturesFromSeqEntry(objects::CSeq_entry_Handle entry, const objects::CSeq_feat &feat, const TFeatureSeqTableColumnList &reqs)
bool IsTrueFalseList(const vector< string > &choices)
static bool s_OkToAddFeature(const objects::CSeq_feat &f1, const objects::CSeq_feat &f2, const TFeatureSeqTableColumnList &reqs)
void SetUserAlternateEmailAddress(objects::CUser_object &u, string alt_email)
CRef< objects::CSeqTable_column > GetSeqIdProblems(CRef< objects::CSeq_table > new_ids, CRef< objects::CSeq_table > old_ids, size_t max_len)
bool HasLocalIntFeatureId(const objects::CSeq_feat &feat)
static string GetIdValueFromColumn(CRef< objects::CSeqTable_column > id_col, int pos)
CRef< objects::CSeqTable_column > GetReplacementSeqIds(CRef< objects::CSeq_table > new_ids, CRef< objects::CSeq_table > old_ids, size_t max_len)
void MergeStringVectors(vector< string > &problems, vector< string > add)
TWizardNameList GetWizardNameList()
vector< TWizardName > TWizardNameList
vector< string > FindNonUniqueStrings(const vector< string > &values)
pair< CSourceRequirements::EWizardType, string > TWizardName
const string kWizardLabel
wxString ToWxString(const string &s)
Definition: wx_utils.hpp:173
Modified on Fri Sep 20 14:58:12 2024 by modify_doxy.py rev. 669887