NCBI C++ ToolKit
molinfoedit_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: molinfoedit_util.cpp 47479 2023-05-02 13:24:02Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Colleen Bollin
27  */
28 
29 
30 #include <ncbi_pch.hpp>
31 
35 #include <objects/seq/Bioseq.hpp>
36 #include <objects/seq/Seq_inst.hpp>
43 #include <objmgr/seqdesc_ci.hpp>
44 #include <objmgr/bioseq_ci.hpp>
45 #include <objmgr/feat_ci.hpp>
46 
60 
65 
67 
68 
69 objects::CSeq_inst::ETopology s_TopologyFromName (string name)
70 {
71  objects::CSeq_inst::ETopology topology = objects::CSeq_inst::eTopology_not_set;
72  if (NStr::EqualNocase(name, "circular")) {
73  topology = objects::CSeq_inst::eTopology_circular;
74  } else if (NStr::EqualNocase(name, "linear")) {
75  topology = objects::CSeq_inst::eTopology_linear;
76  } else if (NStr::EqualNocase(name, "other")) {
77  topology = objects::CSeq_inst::eTopology_other;
78  } else if (NStr::EqualNocase(name, "tandem")) {
79  topology = objects::CSeq_inst::eTopology_tandem;
80  }
81  return topology;
82 }
83 
84 
85 string s_GetBiomolName (objects::CSeq_inst::EMol mol, objects::CMolInfo::TBiomol biomol)
86 {
87  string name = "";
88  switch (biomol) {
89  case objects::CMolInfo::eBiomol_genomic:
90  case objects::CMolInfo::eBiomol_unknown:
91  if (mol == objects::CSeq_inst::eMol_rna) {
92  name = "Genomic RNA";
93  } else if (mol == objects::CSeq_inst::eMol_dna) {
94  name = kDefaultMoleculeType;
95  }
96  break;
97  case objects::CMolInfo::eBiomol_pre_RNA:
98  name = "Precursor RNA";
99  break;
100  case objects::CMolInfo::eBiomol_mRNA:
101  name = "mRNA";
102  break;
103  case objects::CMolInfo::eBiomol_rRNA:
104  name = "Ribosomal RNA";
105  break;
106  case objects::CMolInfo::eBiomol_tRNA:
107  name = "Transfer RNA";
108  break;
109  case objects::CMolInfo::eBiomol_peptide:
110  name = "Peptide";
111  break;
112  case objects::CMolInfo::eBiomol_other_genetic:
113  name = "Other-Genetic";
114  break;
115  case objects::CMolInfo::eBiomol_genomic_mRNA:
116  name = "Genomic-mRNA";
117  break;
118  case objects::CMolInfo::eBiomol_cRNA:
119  name = "cRNA";
120  break;
121  case objects::CMolInfo::eBiomol_transcribed_RNA:
122  name = "Transcribed RNA";
123  break;
124  case objects::CMolInfo::eBiomol_ncRNA:
125  case objects::CMolInfo::eBiomol_scRNA:
126  case objects::CMolInfo::eBiomol_snoRNA:
127  case objects::CMolInfo::eBiomol_snRNA:
128  case objects::CMolInfo::eBiomol_tmRNA:
129  name = "Non-coding RNA";
130  break;
131  case objects::CMolInfo::eBiomol_other:
132  name = "Other";
133  break;
134  }
135  return name;
136 }
137 
138 
139 static void s_GetBiomolValuesFromName (string name, objects::CSeq_inst::EMol &mol, objects::CMolInfo::EBiomol &biomol)
140 {
141  mol = objects::CSeq_inst::eMol_na;
142  biomol = objects::CMolInfo::eBiomol_unknown;
143 
144  if (NStr::EqualNocase(name, "Genomic RNA")) {
145  biomol = objects::CMolInfo::eBiomol_genomic;
146  mol = objects::CSeq_inst::eMol_rna;
147  } else if (NStr::EqualNocase(name, kDefaultMoleculeType)) {
148  biomol = objects::CMolInfo::eBiomol_genomic;
149  mol = objects::CSeq_inst::eMol_dna;
150  } else if (NStr::EqualNocase(name, "Precursor RNA")
151  || NStr::EqualNocase(name, "preRNA")
152  || NStr::EqualNocase(name, "pre-RNA")
153  || NStr::EqualNocase(name, "pre RNA")) {
154  biomol = objects::CMolInfo::eBiomol_pre_RNA;
155  mol = objects::CSeq_inst::eMol_rna;
156  } else if (NStr::EqualNocase(name, "mRNA [cDNA]")
157  || NStr::EqualNocase(name, "cDNA")
158  || NStr::EqualNocase(name, "mRNA")) {
159  biomol = objects::CMolInfo::eBiomol_mRNA;
160  mol = objects::CSeq_inst::eMol_rna;
161  } else if (NStr::EqualNocase(name, "Ribosomal RNA")
162  || NStr::EqualNocase(name, "rRNA")) {
163  biomol = objects::CMolInfo::eBiomol_rRNA;
164  mol = objects::CSeq_inst::eMol_rna;
165  } else if (NStr::EqualNocase(name, "Transfer RNA")
166  || NStr::EqualNocase(name, "tRNA")) {
167  biomol = objects::CMolInfo::eBiomol_tRNA;
168  mol = objects::CSeq_inst::eMol_rna;
169  } else if (NStr::EqualNocase(name, "Peptide")
170  || NStr::EqualNocase(name, "Amino Acid")) {
171  biomol = objects::CMolInfo::eBiomol_peptide;
172  mol = objects::CSeq_inst::eMol_aa;
173  } else if (NStr::EqualNocase(name, "Other-Genetic")) {
174  biomol = objects::CMolInfo::eBiomol_other_genetic;
175  mol = objects::CSeq_inst::eMol_na;
176  } else if (NStr::EqualNocase(name, "Genomic-mRNA")
177  || NStr::EqualNocase(name, "Genomic mRNA")) {
178  biomol = objects::CMolInfo::eBiomol_genomic_mRNA;
179  mol = objects::CSeq_inst::eMol_rna;
180  } else if (NStr::EqualNocase(name, "cRNA")) {
181  biomol = objects::CMolInfo::eBiomol_cRNA;
182  mol = objects::CSeq_inst::eMol_rna;
183  } else if (NStr::EqualNocase(name, "Transcribed RNA")) {
184  biomol = objects::CMolInfo::eBiomol_transcribed_RNA;
185  mol = objects::CSeq_inst::eMol_rna;
186  } else if (NStr::EqualNocase(name, "Non-coding RNA")
187  || NStr::EqualNocase(name, "ncRNA")
188  || NStr::EqualNocase(name, "scRNA")
189  || NStr::EqualNocase(name, "snoRNA")
190  || NStr::EqualNocase(name, "snRNA")
191  || NStr::EqualNocase(name, "tmRNA")) {
192  biomol = objects::CMolInfo::eBiomol_ncRNA;
193  mol = objects::CSeq_inst::eMol_rna;
194  } else if (NStr::EqualNocase(name, "Other")) {
195  biomol = objects::CMolInfo::eBiomol_other;
196  mol = objects::CSeq_inst::eMol_na;
197  }
198 }
199 
200 
202 {
203  vector<string> mol_options;
204  mol_options.clear();
205  mol_options.push_back(kDefaultMoleculeType);
206  switch (wizard_type) {
208  mol_options.push_back("genomic RNA");
209  mol_options.push_back("cRNA");
210  mol_options.push_back("mRNA");
211  break;
213  mol_options.push_back("mRNA");
214  break;
216  break;
218  default:
219  mol_options.push_back("genomic RNA");
220  mol_options.push_back("cRNA");
221  mol_options.push_back("mRNA");
222  mol_options.push_back("Precursor RNA");
223  mol_options.push_back("Ribosomal RNA");
224  mol_options.push_back("Transfer RNA");
225  mol_options.push_back("Other-Genetic");
226  mol_options.push_back("Transcribed RNA");
227  mol_options.push_back("Transfer-messenger RNA");
228  mol_options.push_back("ncRNA");
229  break;
230  }
231  return mol_options;
232 }
233 
234 
236 {
237  vector<string> genome_options;
238  genome_options.clear();
240  genome_options.push_back("genomic");
241  genome_options.push_back("mitochondrion");
242  genome_options.push_back("chloroplast");
243  genome_options.push_back("plastid");
244  } else {
245  genome_options.push_back("genomic");
247  || wizard_type == CSourceRequirements::eWizardType_igs
248  || wizard_type == CSourceRequirements::eWizardType_standard) {
249  genome_options.push_back("mitochondrion");
253  genome_options.push_back("chloroplast");
254  genome_options.push_back("chromoplast");
255  genome_options.push_back("kinetoplast");
256  genome_options.push_back("plastid");
257  genome_options.push_back("macronuclear");
258  genome_options.push_back("cyanelle");
259  genome_options.push_back("nucleomorph");
260  genome_options.push_back("apicoplast");
261  genome_options.push_back("leucoplast");
262  genome_options.push_back("proplastid");
263  genome_options.push_back("hydrogenosome");
264  genome_options.push_back("chromatophore");
265  genome_options.push_back("extrachromosomal");
266  genome_options.push_back("plasmid");
267  genome_options.push_back("proviral");
268  genome_options.push_back("virion");
269  genome_options.push_back("endogenous-virus");
270  }
271  }
272  }
273  return genome_options;
274 }
275 
276 
278 {
279  CRef<objects::CSeq_table> table(new objects::CSeq_table());
280  CRef<objects::CSeqTable_column> id_col(new objects::CSeqTable_column());
281  id_col->SetHeader().SetField_id(objects::CSeqTable_column_info::eField_id_location_id);
282  id_col->SetHeader().SetTitle(kSequenceIdColLabel);
283  table->SetColumns().push_back(id_col);
284 
286  CRef<objects::CSeqTable_column> topology_col;
287  /* TODO: different coloms for different wizards */
288 
289  moltype_col = new objects::CSeqTable_column();
290  moltype_col->SetHeader().SetTitle(kMoleculeType);
291  moltype_col->SetHeader().SetField_name("biomol");
292  moltype_col->SetData().SetString();
293  table->SetColumns().push_back(moltype_col);
294  topology_col = new objects::CSeqTable_column();
295  topology_col->SetHeader().SetTitle("Topology");
296  topology_col->SetHeader().SetField_name("topology");
297  topology_col->SetData().SetString();
298  table->SetColumns().push_back(topology_col);
299 
300  size_t row = 0;
301  objects::CBioseq_CI b_iter(seh, objects::CSeq_inst::eMol_na);
302  for ( ; b_iter ; ++b_iter ) {
303  objects::CSeqdesc_CI it (*b_iter, objects::CSeqdesc::e_Molinfo);
304  CRef<objects::CSeq_id> id(new objects::CSeq_id());
305  id->Assign (*(b_iter->GetSeqId()));
306  id_col->SetData().SetId().push_back(id);
307  if (topology_col) {
308  objects::CSeq_inst::ETopology topology = objects::CSeq_inst::eTopology_not_set;
309  if (b_iter->IsSetInst_Topology()) {
310  topology = b_iter->GetInst_Topology();
311  }
312  topology_col->SetData().SetString().push_back(CMolInfoFieldType::GetTopologyLabel(topology));
313  }
314  if (moltype_col) {
315  objects::CSeq_inst::EMol mol = objects::CSeq_inst::eMol_na;
316  if (b_iter->IsSetInst_Mol()) {
317  mol = b_iter->GetInst_Mol();
318  }
319  objects::CMolInfo::TBiomol biomol = objects::CMolInfo::eBiomol_unknown;
320  if (it && it->GetMolinfo().IsSetBiomol()) {
321  biomol = it->GetMolinfo().GetBiomol();
322  }
323  moltype_col->SetData().SetString().push_back(s_GetBiomolName(mol, biomol));
324  }
325  row++;
326  }
327 
328  table->SetNum_rows(static_cast<CSeq_table::TNum_rows>(row));
329  return table;
330 }
331 
332 
333 CRef<CCmdComposite> ApplyMolInfoValuesTableToSeqEntry (CRef<objects::CSeq_table>values_table, objects::CSeq_entry_Handle seh, bool add_confirmed)
334 {
335  CRef<CCmdComposite> cmd( new CCmdComposite("Bulk Molecule Type Edit") );
336 
338  if (!id_col) {
339  return cmd;
340  }
341  CRef<objects::CSeqTable_column> topology_col = FindSeqTableColumnByName (values_table, "Topology");
343 
344  if( ! values_table->IsSetColumns() || values_table->GetColumns().empty() ) {
345  return cmd;
346  }
347 
348  for (int row = 0; row < values_table->GetNum_rows() && (unsigned int) row < id_col->GetData().GetSize(); row++) {
349  objects::CBioseq_Handle bsh = seh.GetBioseqHandle(*(id_col->GetData().GetId()[row]));
350 
351  CRef<objects::CSeq_inst> inst (new objects::CSeq_inst());
352  inst->Assign(bsh.GetInst());
353  bool inst_changed = false;
354 
355  if (topology_col) {
356  objects::CSeq_inst::ETopology new_topology = s_TopologyFromName(*topology_col->GetStringPtr(row) );
357  if (inst->IsSetTopology()) {
358  if (new_topology != inst->GetTopology()) {
359  inst->SetTopology(new_topology);
360  inst_changed = true;
361  }
362  } else {
363  if (new_topology != objects::CSeq_inst::eTopology_not_set) {
364  inst->SetTopology(new_topology);
365  inst_changed = true;
366  }
367  }
368  }
369 
370  CRef<objects::CSeqdesc> new_molinfo_desc( new objects::CSeqdesc );
371  objects::CMolInfo & molinfo = new_molinfo_desc->SetMolinfo();
372  bool molinfo_changed = false;
373  objects::CSeqdesc_CI desc_ci( bsh, objects::CSeqdesc::e_Molinfo);
374  if (desc_ci) {
375  molinfo.Assign(desc_ci->GetMolinfo());
376  } else {
377  molinfo_changed = true;
378  }
379 
380  if (moltype_col) {
381  objects::CSeq_inst::EMol new_mol;
382  objects::CMolInfo::EBiomol new_biomol;
383  s_GetBiomolValuesFromName ( *moltype_col->GetStringPtr(row), new_mol, new_biomol);
384  if (inst->IsSetMol()) {
385  if (new_mol != inst->GetMol()) {
386  inst->SetMol(new_mol);
387  inst_changed = true;
388  }
389  } else {
390  if (new_mol != objects::CSeq_inst::eMol_not_set) {
391  inst->SetMol(new_mol);
392  inst_changed = true;
393  }
394  }
395 
396  if (molinfo.IsSetBiomol()) {
397  if (new_biomol != molinfo.GetBiomol()) {
398  molinfo.SetBiomol(new_biomol);
399  molinfo_changed = true;
400  }
401  } else {
402  if (new_biomol != objects::CMolInfo::eBiomol_unknown) {
403  molinfo.SetBiomol(new_biomol);
404  molinfo_changed = true;
405  }
406  }
407  }
408 
409  if (inst_changed) {
410  CRef<CCmdChangeBioseqInst> scmd (new CCmdChangeBioseqInst(bsh, *inst));
411  cmd->AddCommand(*scmd);
412  }
413  if (molinfo_changed) {
414  if (desc_ci) {
415  CRef<CCmdChangeSeqdesc> ecmd(new CCmdChangeSeqdesc(desc_ci.GetSeq_entry_Handle(), *desc_ci, *new_molinfo_desc));
416  cmd->AddCommand (*ecmd);
417  } else {
418  cmd->AddCommand( *CRef<CCmdCreateDesc>(new CCmdCreateDesc(bsh.GetParentEntry(), *new_molinfo_desc)) );
419  }
420  }
421  if (add_confirmed && (inst_changed || molinfo_changed)) {
422  cmd->AddCommand(*CSubPrep_panel::SetWizardFieldInSeqEntry(seh, string(kMoleculeType) + " Confirmed", "Yes"));
423  }
424  }
425 
426  // send composite command
427  return cmd;
428 }
429 
430 
432 {
433  CRef<objects::CSeq_table> table(new objects::CSeq_table());
434  CRef<objects::CSeqTable_column> id_col(new objects::CSeqTable_column());
435  id_col->SetHeader().SetField_id(objects::CSeqTable_column_info::eField_id_location_id);
436  id_col->SetHeader().SetTitle(kSequenceIdColLabel);
437  table->SetColumns().push_back(id_col);
438 
439  CRef<objects::CSeqTable_column> comment_col(new objects::CSeqTable_column());
440  comment_col->SetHeader().SetTitle(label);
441  comment_col->SetHeader().SetField_name("comment");
442  comment_col->SetData().SetString();
443  table->SetColumns().push_back(comment_col);
444 
445  size_t row = 0;
446  objects::CBioseq_CI b_iter(seh, objects::CSeq_inst::eMol_na);
447  for ( ; b_iter ; ++b_iter ) {
448  objects::CSeqdesc_CI it (*b_iter, objects::CSeqdesc::e_Comment);
449  CRef<objects::CSeq_id> id(new objects::CSeq_id());
450  id->Assign (*(b_iter->GetSeqId()));
451  id_col->SetData().SetId().push_back(id);
452  string comment = "";
453  if (it) {
454  comment = it->GetComment();
455  }
456  comment_col->SetData().SetString().push_back(comment);
457  row++;
458  }
459 
460  table->SetNum_rows(static_cast<CSeq_table::TNum_rows>(row));
461  return table;
462 }
463 
464 
465 CRef<CCmdComposite> ApplyCommentValuesTableToSeqEntry (CRef<objects::CSeq_table>values_table, objects::CSeq_entry_Handle seh, const string& label)
466 {
467  CRef<CCmdComposite> cmd( new CCmdComposite("Bulk Comment Edit") );
468 
470  if (!id_col) {
471  return cmd;
472  }
473  CRef<objects::CSeqTable_column> comment_col = FindSeqTableColumnByName (values_table, label);
474 
475  if( ! values_table->IsSetColumns() || values_table->GetColumns().empty() ) {
476  return cmd;
477  }
478 
479  for (int row = 0; row < values_table->GetNum_rows() && (unsigned int) row < id_col->GetData().GetSize(); row++) {
480  objects::CBioseq_Handle bsh = seh.GetBioseqHandle(*(id_col->GetData().GetId()[row]));
481  string new_comment = "";
482  if ((unsigned int) row < comment_col->GetData().GetSize()) {
483  new_comment = comment_col->GetData().GetString()[row];
484  }
485  string old_comment = "";
486  objects::CSeqdesc_CI desc_ci( bsh, objects::CSeqdesc::e_Comment);
487  if (desc_ci) {
488  old_comment = desc_ci->GetComment();
489  }
490  if (!NStr::Equal(new_comment, old_comment)) {
491  CRef<objects::CSeqdesc> new_desc( new objects::CSeqdesc );
492  new_desc->SetComment(new_comment);
493  if (desc_ci) {
494  CRef<CCmdChangeSeqdesc> ecmd(new CCmdChangeSeqdesc(desc_ci.GetSeq_entry_Handle(), *desc_ci, *new_desc));
495  cmd->AddCommand (*ecmd);
496  } else {
497  cmd->AddCommand( *CRef<CCmdCreateDesc>(new CCmdCreateDesc(bsh.GetParentEntry(), *new_desc)) );
498  }
499  }
500  }
501 
502  // send composite command
503  return cmd;
504 }
505 
506 
507 const string kChimeraCommentStart = "Sequences were screened for chimeras by the submitter using ";
508 
509 
510 static bool s_WantChimeraForSource (const objects::CBioSource& src, CSourceRequirements::EWizardSrcType src_type)
511 {
512  if (src.IsSetGenome() && src.GetGenome() == objects::CBioSource::eGenome_mitochondrion) {
513  return true;
514  } else if (src.IsSetOrg() && src.GetOrg().IsSetOrgname() && src.GetOrg().GetOrgname().IsSetLineage()) {
515  const string& lineage = src.GetOrg().GetOrgname().GetLineage();
516  if (NStr::FindNoCase(lineage, "bacteria") != string::npos
517  || NStr::FindNoCase(lineage, "archaea") != string::npos) {
518  return true;
519  } else {
520  return false;
521  }
522  } else if (src_type == CSourceRequirements::eWizardSrcType_any
524  return true;
525  } else {
526  return false;
527  }
528 }
529 
530 
532 {
533  CRef<objects::CSeq_table> table(new objects::CSeq_table());
534  CRef<objects::CSeqTable_column> id_col(new objects::CSeqTable_column());
535  id_col->SetHeader().SetField_id(objects::CSeqTable_column_info::eField_id_location_id);
536  id_col->SetHeader().SetTitle(kSequenceIdColLabel);
537  table->SetColumns().push_back(id_col);
538 
539  CRef<objects::CSeqTable_column> comment_col(new objects::CSeqTable_column());
540  comment_col->SetHeader().SetTitle(label);
541  comment_col->SetHeader().SetField_name("comment");
542  comment_col->SetData().SetString();
543  table->SetColumns().push_back(comment_col);
544 
545  size_t row = 0;
546  objects::CBioseq_CI b_iter(seh, objects::CSeq_inst::eMol_na);
547  for ( ; b_iter ; ++b_iter ) {
548  objects::CSeqdesc_CI desc (*b_iter, objects::CSeqdesc::e_Source);
549  if (desc && !s_WantChimeraForSource(desc->GetSource(), src_type)) {
550  continue;
551  }
552 
553  int this_count = 0;
554  bool any = false;
555  objects::CFeat_CI fit (*b_iter);
556  while (fit && this_count < 2) {
557  this_count++;
558  if (fit->IsSetData() && fit->GetData().IsRna()
559  && fit->GetData().GetRna().IsSetType()
560  && fit->GetData().GetRna().GetType() == objects::CRNA_ref::eType_rRNA
561  && fit->GetData().GetRna().IsSetExt()
562  && fit->GetData().GetRna().GetExt().IsName()
563  && NStr::Equal(fit->GetData().GetRna().GetExt().GetName(), "16S ribosomal RNA")) {
564  any = true;
565  } else {
566  break;
567  }
568  ++fit;
569  }
570  if (this_count == 1 && any) {
571  CRef<objects::CSeq_id> id(new objects::CSeq_id());
572  id->Assign (*(b_iter->GetSeqId()));
573  id_col->SetData().SetId().push_back(id);
574  objects::CSeqdesc_CI it (*b_iter, objects::CSeqdesc::e_Comment);
575  while (it && !NStr::StartsWith(it->GetComment(), kChimeraCommentStart)) {
576  ++it;
577  }
578  string prog_ver = "";
579  if (it) {
580  prog_ver = it->GetComment().substr(kChimeraCommentStart.length());
581  }
582  comment_col->SetData().SetString().push_back(prog_ver);
583  row++;
584  }
585  }
586 
587  if (row == 0) {
588  table->Reset();
589  } else {
590  table->SetNum_rows(static_cast<CSeq_table::TNum_rows>(row));
591  }
592  return table;
593 }
594 
595 
596 CRef<CCmdComposite> ApplyChimeraValuesTableToSeqEntry (CRef<objects::CSeq_table>values_table, objects::CSeq_entry_Handle seh, const string& label)
597 {
598  CRef<CCmdComposite> cmd( new CCmdComposite("Bulk Chimera Comment Edit") );
599 
601  if (!id_col) {
602  return cmd;
603  }
604  CRef<objects::CSeqTable_column> comment_col = FindSeqTableColumnByName (values_table, label);
605 
606  if( ! values_table->IsSetColumns() || values_table->GetColumns().empty() ) {
607  return cmd;
608  }
609 
610  for (int row = 0; row < values_table->GetNum_rows() && (unsigned int) row < id_col->GetData().GetSize(); row++) {
611  objects::CBioseq_Handle bsh = seh.GetBioseqHandle(*(id_col->GetData().GetId()[row]));
612  string new_comment = comment_col->GetData().GetString()[row];
613  if (!NStr::IsBlank(new_comment)) {
614  new_comment = kChimeraCommentStart + new_comment;
615  }
616  string old_comment = "";
617  objects::CSeqdesc_CI desc_ci( bsh, objects::CSeqdesc::e_Comment);
618  while (desc_ci && !NStr::StartsWith(desc_ci->GetComment(), kChimeraCommentStart)) {
619  ++ desc_ci;
620  }
621  if (desc_ci) {
622  old_comment = desc_ci->GetComment();
623  }
624  if (!NStr::Equal(new_comment, old_comment)) {
625  CRef<objects::CSeqdesc> new_desc( new objects::CSeqdesc );
626  new_desc->SetComment(new_comment);
627  if (desc_ci) {
628  CRef<CCmdChangeSeqdesc> ecmd(new CCmdChangeSeqdesc(desc_ci.GetSeq_entry_Handle(), *desc_ci, *new_desc));
629  cmd->AddCommand (*ecmd);
630  } else {
631  if (!NStr::IsBlank (new_comment)) {
632  cmd->AddCommand( *CRef<CCmdCreateDesc>(new CCmdCreateDesc(bsh.GetParentEntry(), *new_desc)) );
633  }
634  }
635  }
636  }
637 
638  // send composite command
639  return cmd;
640 }
641 
642 
644 {
645  CRef<objects::CSeq_table> table(new objects::CSeq_table());
646  CRef<objects::CSeqTable_column> id_col(new objects::CSeqTable_column());
647  id_col->SetHeader().SetField_id(objects::CSeqTable_column_info::eField_id_location_id);
648  id_col->SetHeader().SetTitle(kSequenceIdColLabel);
649  table->SetColumns().push_back(id_col);
650 
651 
652 
653  size_t row = 0;
654  objects::CBioseq_CI b_iter(seh, objects::CSeq_inst::eMol_na);
655  for ( ; b_iter ; ++b_iter ) {
656  CRef<objects::CSeq_id> id(new objects::CSeq_id());
657  id->Assign (*(b_iter->GetSeqId()));
658  id_col->SetData().SetId().push_back(id);
659 
660  objects::CSeqdesc_CI it (*b_iter, objects::CSeqdesc::e_User);
661  while (it) {
662  if (it->GetUser().IsSetType()
663  && it->GetUser().GetType().IsStr()
664  && NStr::EqualNocase(it->GetUser().GetType().GetStr(), "DBLink")) {
665  ITERATE (objects::CUser_object::TData, field_it, it->GetUser().GetData()) {
666  if ((*field_it)->IsSetLabel()
667  && (*field_it)->GetLabel().IsStr()
668  && (*field_it)->IsSetData()) {
669  string existing_val = "";
670  if ((*field_it)->GetData().IsStr()) {
671  existing_val = (*field_it)->GetData().GetStr();
672  } else if ((*field_it)->GetData().IsStrs()) {
673  for (vector<CStringUTF8>::const_iterator str_it = (*field_it)->GetData().GetStrs().begin();
674  str_it != (*field_it)->GetData().GetStrs().end();
675  ++str_it) {
676  existing_val += ", " + *str_it;
677  }
678  while (NStr::StartsWith(existing_val, ", ")) {
679  existing_val = existing_val.substr(2);
680  }
681  }
682  AddValueToTable (table, (*field_it)->GetLabel().GetStr(), existing_val, row);
683  }
684  }
685  }
686  ++it;
687  }
688 
689  row++;
690  }
691 
692  table->SetNum_rows(static_cast<CSeq_table::TNum_rows>(row));
693  return table;
694 }
695 
696 
698 {
699  CRef<CCmdComposite> cmd( new CCmdComposite("Bulk DBLink Edit") );
700 
702  if (!id_col) {
703  return cmd;
704  }
705 
706  const objects::CSeq_table::TColumns & columns = values_table->GetColumns();
707  size_t num_cols = columns.size();
708 
709  for (int row = 0; row < values_table->GetNum_rows() && (size_t) row < id_col->GetData().GetSize(); row++) {
710  objects::CBioseq_Handle bsh = seh.GetBioseqHandle(*(id_col->GetData().GetId()[row]));
711  CRef<objects::CSeqdesc> new_desc( new objects::CSeqdesc );
712 
713  objects::CSeqdesc_CI desc_ci( bsh, objects::CSeqdesc::e_User);
714  bool found = false;
715  while (desc_ci && !found) {
716  if (desc_ci->GetUser().IsSetType()
717  && desc_ci->GetUser().GetType().IsStr()
718  && NStr::EqualNocase(desc_ci->GetUser().GetType().GetStr(), "DBLink")) {
719  found = true;
720  new_desc->SetUser().Assign(desc_ci->GetUser());
721  } else {
722  ++desc_ci;
723  }
724  }
725 
726  bool any_change = false;
727  for (size_t i = 1; i < num_cols; i++) {
728  string col_name = values_table->GetColumns()[i]->GetHeader().GetTitle();
729 
730  objects::CUser_field& field = new_desc->SetUser().SetField(col_name);
731  if (!field.IsSetLabel() || !field.GetLabel().IsStr() || NStr::IsBlank(field.GetLabel().GetStr())) {
732  field.SetLabel().SetStr(col_name);
733  }
734  string new_val = "";
735  if ((size_t) row < values_table->GetColumns()[i]->GetData().GetSize()) {
736  new_val = values_table->GetColumns()[i]->GetData().GetString()[row];
737  }
738  vector<string> values;
739  NStr::Split(new_val, ", ", values);
740  field.ResetData();
741  for (vector<string>::iterator str_it = values.begin(); str_it != values.end(); str_it++) {
742  field.SetData().SetStrs().push_back(*str_it);
743  }
744  if (values.size() == 0) {
745  field.SetData().SetStrs().push_back(" ");
746  }
747  any_change = true;
748  }
749 
750  if (any_change) {
751  if (desc_ci) {
752  CRef<CCmdChangeSeqdesc> ecmd(new CCmdChangeSeqdesc(desc_ci.GetSeq_entry_Handle(), *desc_ci, *new_desc));
753  cmd->AddCommand (*ecmd);
754  } else {
755  new_desc->SetUser().SetType().SetStr("DBLink");
756  cmd->AddCommand( *CRef<CCmdCreateDesc>(new CCmdCreateDesc(bsh.GetParentEntry(), *new_desc)) );
757  }
758  }
759  }
760 
761  // send composite command
762  return cmd;
763 }
764 
765 
766 ////// CMolInfoField
767 
768 string CMolInfoField::GetVal(const CObject& object)
769 {
770  string rval = "";
771 
772  const CSeqdesc* desc = dynamic_cast<const CSeqdesc*>(&object);
773  const CBioseq* b = dynamic_cast<const CBioseq*>(&object);
774  const CSeq_inst* inst = dynamic_cast<const CSeq_inst*>(&object);
775  if (b) {
776  inst = &(b->GetInst());
777  }
778 
779  if (desc && desc->IsMolinfo()) {
780  switch (m_FieldType) {
782  if (desc->GetMolinfo().IsSetBiomol()) {
784  }
785  break;
787  if (desc->GetMolinfo().IsSetTech()) {
788  CMolInfo::TTech tech = desc->GetMolinfo().GetTech();
789  if (tech == CMolInfo::eTech_other) {
790  if (desc->GetMolinfo().IsSetTechexp()) {
791  rval = desc->GetMolinfo().GetTechexp();
792  } else {
793  rval = "other";
794  }
795  } else {
797  }
798  }
799  break;
801  if (desc->GetMolinfo().IsSetCompleteness()) {
803  }
804  break;
805  default:
806  break;
807  }
808  }
809  if (inst) {
810  switch (m_FieldType) {
812  if (inst->IsSetMol()) {
813  rval = CMolInfoFieldType::GetMolLabel(inst->GetMol());
814  }
815  break;
817  if (inst->IsSetTopology()) {
819  }
820  break;
822  if (inst->IsSetStrand()) {
824  }
825  break;
826  default:
827  break;
828  }
829  }
830  return rval;
831 }
832 
833 
834 vector<string> CMolInfoField::GetVals(const CObject& object)
835 {
836  vector<string> rvals;
837  rvals.push_back(GetVal(object));
838  return rvals;
839 }
840 
841 
842 bool CMolInfoField::IsEmpty(const CObject& object) const
843 {
844  return false;
845 }
846 
847 
849 {
850  CSeqdesc* desc = dynamic_cast<CSeqdesc*>(&object);
851  CBioseq* bioseq = dynamic_cast<CBioseq*>(&object);
852  CSeq_inst* inst = dynamic_cast<CSeq_inst*>(&object);
853  if (bioseq) {
854  inst = &(bioseq->SetInst());
855  }
856  switch (m_FieldType) {
858  if (desc && desc->IsMolinfo()) {
859  desc->SetMolinfo().ResetBiomol();
860  }
861  break;
863  if (desc && desc->IsMolinfo()) {
864  desc->SetMolinfo().ResetTech();
865  desc->SetMolinfo().ResetTechexp();
866  }
867  break;
869  if (desc && desc->IsMolinfo()) {
870  desc->SetMolinfo().ResetCompleteness();
871  }
872  break;
873 
875  if (inst) {
876  inst->ResetMol();
877  }
878  break;
880  if (inst) {
881  inst->ResetTopology();
882  }
883  break;
885  if (inst) {
886  inst->ResetStrand();
887  }
888  default:
889  break;
890  }
891 }
892 
893 
894 bool CMolInfoField::SetVal(CObject& object, const string& val, edit::EExistingText existing_text)
895 {
896  bool rval = false;
897  CSeqdesc* desc = dynamic_cast<CSeqdesc*>(&object);
898  CBioseq* bioseq = dynamic_cast<CBioseq*>(&object);
899  CSeq_inst* inst = dynamic_cast<CSeq_inst*>(&object);
900  if (bioseq) {
901  inst = &(bioseq->SetInst());
902  }
903  switch (m_FieldType) {
905  if (desc && desc->IsMolinfo()) {
907  rval = true;
908  }
909  break;
911  if (desc && desc->IsMolinfo()) {
913  desc->SetMolinfo().SetTech(tech);
914  if (tech == CMolInfo::eTech_other) {
915  desc->SetMolinfo().SetTechexp(val);
916  } else {
917  desc->SetMolinfo().ResetTechexp();
918  }
919  rval = true;
920  }
921  break;
923  if (desc && desc->IsMolinfo()) {
925  rval = true;
926  }
927 
928  break;
930  if (inst) {
932  rval = true;
933  }
934  break;
936  if (inst) {
938  rval = true;
939  }
940  break;
942  if (inst) {
944  rval = true;
945  }
946  break;
947  default:
948  break;
949  }
950  return rval;
951 }
952 
953 
954 void CMolInfoField::SetConstraint(const string& field, CConstRef<edit::CStringConstraint> string_constraint)
955 {
957  if (m_ConstraintFieldType == CMolInfoFieldType::e_Unknown || !string_constraint) {
959  } else {
960  m_StringConstraint = new edit::CStringConstraint(" ");
961  m_StringConstraint->Assign(*string_constraint);
962  }
963 }
964 
965 vector<CConstRef<CObject> > CMolInfoField::GetObjects(CBioseq_Handle bsh)
966 {
967  vector<CConstRef<CObject> > objs;
968 
969  switch (m_FieldType) {
974  break;
978  objs.push_back(CConstRef<CObject>(bsh.GetCompleteBioseq().GetPointer()));
979  break;
980  default:
981  break;
982  }
983  return objs;
984 }
985 
986 
987 vector<CRef<edit::CApplyObject> > CMolInfoField::GetApplyObjects(CBioseq_Handle bsh)
988 {
989  vector<CRef<edit::CApplyObject> > objs;
990 
991  switch (m_FieldType) {
995  {{
996  // add existing descriptors
997  CSeqdesc_CI desc_ci(bsh, m_Subtype);
998  while (desc_ci) {
999  CRef<CSeqdesc> new_desc(new CSeqdesc());
1000  new_desc->Assign(*desc_ci);
1001  CRef<edit::CApplyObject> new_obj(new edit::CApplyObject(bsh.GetParentEntry(),
1002  CConstRef<CObject>(&(*desc_ci)),
1003  CRef<CObject>(new_desc.GetPointer())));
1004  objs.push_back(new_obj);
1005  ++desc_ci;
1006  }
1007 
1008  if (objs.empty()) {
1009  CRef<CSeqdesc> new_desc(new CSeqdesc());
1010  new_desc->SetMolinfo();
1011  CRef<edit::CApplyObject> new_obj(new edit::CApplyObject(bsh.GetParentEntry(),
1013  CRef<CObject>(new_desc.GetPointer())));
1014  objs.push_back(new_obj);
1015  }
1016  }}
1017  break;
1021  {{
1022  CRef<CSeq_inst> inst(new CSeq_inst());
1023  if (bsh.IsSetInst()) {
1024  inst->Assign(bsh.GetInst());
1025  }
1026  CRef<edit::CApplyObject> new_obj(new edit::CApplyObject(bsh.GetParentEntry(),
1028  CRef<CObject>(inst.GetPointer())));
1029  objs.push_back(new_obj);
1030  }}
1031  break;
1032  default:
1033  break;
1034  }
1035  return objs;
1036 }
1037 
1038 
1039 
1040 
1041 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CBioseq_Handle –.
CConstRef –.
Definition: ncbiobj.hpp:1266
static string GetBiomolLabel(objects::CMolInfo::TBiomol biomol)
static objects::CSeq_inst::TStrand GetStrandFromLabel(const string &val)
static objects::CSeq_inst::TMol GetMolFromLabel(const string &val)
static string GetTechLabel(objects::CMolInfo::TTech tech)
static objects::CSeq_inst::TTopology GetTopologyFromLabel(const string &val)
static EMolInfoFieldType GetFieldType(const string &field_name)
static objects::CMolInfo::TTech GetTechFromLabel(const string &val)
static string GetStrandLabel(objects::CSeq_inst::TStrand val)
static string GetMolLabel(objects::CSeq_inst::TMol val)
static string GetTopologyLabel(objects::CSeq_inst::TTopology tech)
static objects::CMolInfo::TCompleteness GetCompletenessFromLabel(const string &val)
static objects::CMolInfo::TBiomol GetBiomolFromLabel(const string &val)
static string GetCompletenessLabel(objects::CMolInfo::TCompleteness tech)
virtual void SetConstraint(const string &field, CConstRef< objects::edit::CStringConstraint > string_constraint)
virtual bool SetVal(CObject &object, const string &val, objects::edit::EExistingText existing_text)
CMolInfoFieldType::EMolInfoFieldType m_FieldType
virtual vector< string > GetVals(const CObject &object)
virtual string GetVal(const CObject &object)
CMolInfoFieldType::EMolInfoFieldType m_ConstraintFieldType
virtual vector< CConstRef< CObject > > GetObjects(CBioseq_Handle bsh)
virtual vector< CRef< objects::edit::CApplyObject > > GetApplyObjects(CBioseq_Handle bsh)
virtual bool IsEmpty(const CObject &object) const
virtual void ClearVal(CObject &object)
CRef< objects::edit::CStringConstraint > m_StringConstraint
CObject –.
Definition: ncbiobj.hpp:180
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static CRef< CCmdComposite > SetWizardFieldInSeqEntry(objects::CSeq_entry_Handle entry, string field_name, string value)
objects::CSeqdesc::E_Choice m_Subtype
virtual vector< CConstRef< CObject > > GetObjects(objects::CBioseq_Handle bsh)
const char * kMoleculeType
const char * kDefaultMoleculeType
const char * kSequenceIdColLabel
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
static const column_t columns[]
Definition: utf8_2.c:22
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
bool IsSetInst(void) const
const TInst & GetInst(void) const
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
static const char label[]
void SetCompleteness(TCompleteness value)
Assign a value to Completeness data member.
Definition: MolInfo_.hpp:600
void ResetStrand(void)
Reset Strand data member.
Definition: Seq_inst_.hpp:770
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
Definition: MolInfo_.hpp:569
bool IsMolinfo(void) const
Check if variant Molinfo is selected.
Definition: Seqdesc_.hpp:1196
TStrand GetStrand(void) const
Get the Strand member data.
Definition: Seq_inst_.hpp:777
void ResetMol(void)
Reset Mol data member.
Definition: Seq_inst_.hpp:605
TTopology GetTopology(void) const
Get the Topology member data.
Definition: Seq_inst_.hpp:733
bool IsSetTechexp(void) const
explanation if tech not enough
Definition: MolInfo_.hpp:522
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
Definition: Seq_inst_.hpp:593
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
Definition: Seq_inst_.hpp:758
void ResetCompleteness(void)
Reset Completeness data member.
Definition: MolInfo_.hpp:581
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
Definition: MolInfo_.hpp:422
TTech GetTech(void) const
Get the Tech member data.
Definition: MolInfo_.hpp:497
void ResetTechexp(void)
Reset Techexp data member.
Definition: MolInfo_.cpp:123
TMol GetMol(void) const
Get the Mol member data.
Definition: Seq_inst_.hpp:612
const TTechexp & GetTechexp(void) const
Get the Techexp member data.
Definition: MolInfo_.hpp:534
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void ResetTech(void)
Reset Tech data member.
Definition: MolInfo_.hpp:484
void ResetTopology(void)
Reset Topology data member.
Definition: Seq_inst_.hpp:720
TBiomol GetBiomol(void) const
Get the Biomol member data.
Definition: MolInfo_.hpp:447
void SetTopology(TTopology value)
Assign a value to Topology data member.
Definition: Seq_inst_.hpp:739
void ResetBiomol(void)
Reset Biomol data member.
Definition: MolInfo_.hpp:434
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
Definition: MolInfo_.hpp:453
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
Definition: MolInfo_.hpp:472
void SetTechexp(const TTechexp &value)
Assign a value to Techexp data member.
Definition: MolInfo_.hpp:543
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
Definition: MolInfo_.hpp:594
void SetStrand(TStrand value)
Assign a value to Strand data member.
Definition: Seq_inst_.hpp:786
void SetTech(TTech value)
Assign a value to Tech data member.
Definition: MolInfo_.hpp:503
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
TMolinfo & SetMolinfo(void)
Select the variant.
Definition: Seqdesc_.cpp:594
void SetMol(TMol value)
Assign a value to Mol data member.
Definition: Seq_inst_.hpp:621
bool IsSetTopology(void) const
Check if a value has been assigned to Topology data member.
Definition: Seq_inst_.hpp:708
@ eTech_other
use Source.techexp
Definition: MolInfo_.hpp:148
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
int i
CRef< CCmdComposite > ApplyCommentValuesTableToSeqEntry(CRef< objects::CSeq_table >values_table, objects::CSeq_entry_Handle seh, const string &label)
CRef< objects::CSeq_table > BuildCommentDescriptorValuesTableFromSeqEntry(objects::CSeq_entry_Handle seh, const string &label)
objects::CSeq_inst::ETopology s_TopologyFromName(string name)
static void s_GetBiomolValuesFromName(string name, objects::CSeq_inst::EMol &mol, objects::CMolInfo::EBiomol &biomol)
CRef< CCmdComposite > ApplyMolInfoValuesTableToSeqEntry(CRef< objects::CSeq_table >values_table, objects::CSeq_entry_Handle seh, bool add_confirmed)
vector< string > GetSrcGenomeOptions(CSourceRequirements::EWizardType wizard_type, CSourceRequirements::EWizardSrcType src_type)
vector< string > GetMoleculeTypeOptions(CSourceRequirements::EWizardType wizard_type)
string s_GetBiomolName(objects::CSeq_inst::EMol mol, objects::CMolInfo::TBiomol biomol)
CRef< objects::CSeq_table > BuildMolInfoValuesTableFromSeqEntry(objects::CSeq_entry_Handle seh, CSourceRequirements::EWizardType wizard_type)
CRef< objects::CSeq_table > BuildChimeraValuesTableFromSeqEntry(objects::CSeq_entry_Handle seh, const string &label, CSourceRequirements::EWizardSrcType src_type)
static bool s_WantChimeraForSource(const objects::CBioSource &src, CSourceRequirements::EWizardSrcType src_type)
CRef< CCmdComposite > ApplyDBLinkValuesTableToSeqEntry(CRef< objects::CSeq_table >values_table, objects::CSeq_entry_Handle seh)
CRef< CCmdComposite > ApplyChimeraValuesTableToSeqEntry(CRef< objects::CSeq_table >values_table, objects::CSeq_entry_Handle seh, const string &label)
CRef< objects::CSeq_table > BuildDBLinkValuesTableFromSeqEntry(objects::CSeq_entry_Handle seh)
const string kChimeraCommentStart
CRef< CSeqTable_column > FindSeqTableColumnByName(CRef< objects::CSeq_table > values_table, string column_name)
void AddValueToTable(CRef< CSeq_table > table, string subtype_name, string value, size_t row, edit::EExistingText existing_text=edit::eExistingText_replace_old)
#define row(bind, expected)
Definition: string_bind.c:73
EExistingText
Modified on Wed May 01 14:25:18 2024 by modify_doxy.py rev. 669887