NCBI C++ ToolKit
convert_cds_with_gaps_to_misc_feat.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: convert_cds_with_gaps_to_misc_feat.cpp 41973 2018-11-26 17:21:37Z kachalos $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Igor Filippov
27  */
28 
29 
30 #include <ncbi_pch.hpp>
31 
32 ////@begin includes
33 ////@end includes
34 #include <objects/seq/Seq_ext.hpp>
38 #include <objmgr/bioseq_handle.hpp>
42 #include <objmgr/util/sequence.hpp>
49 #include <objmgr/seq_map.hpp>
50 #include <objmgr/seq_map_ci.hpp>
51 #include <objmgr/seqdesc_ci.hpp>
55 
56 #include <wx/stattext.h>
57 #include <wx/msgdlg.h>
58 
59 
60 
61 ////@begin XPM images
62 ////@end XPM images
63 
66 
67 /*!
68  * CConvertCdsWithGapsToMiscFeat type definition
69  */
70 
71 IMPLEMENT_DYNAMIC_CLASS( CConvertCdsWithGapsToMiscFeat, CBulkCmdDlg )
72 
73 
74 /*!
75  * CConvertCdsWithGapsToMiscFeat event table definition
76  */
77 
78 BEGIN_EVENT_TABLE( CConvertCdsWithGapsToMiscFeat, CBulkCmdDlg )
79 
80 ////@begin CConvertCdsWithGapsToMiscFeat event table entries
81 ////@end CConvertCdsWithGapsToMiscFeat event table entries
83 
84 
85 /*!
86  * CConvertCdsWithGapsToMiscFeat constructors
87  */
88 
90 {
91  Init();
92 }
93 
94 CConvertCdsWithGapsToMiscFeat::CConvertCdsWithGapsToMiscFeat( wxWindow* parent, IWorkbench* wb, wxWindowID id, const wxString& caption, const wxPoint& pos, const wxSize& size, long style )
95  : CBulkCmdDlg(wb), m_Converter(NULL)
96 {
97  Init();
98  Create(parent, id, caption, pos, size, style);
99 }
100 
101 
102 /*!
103  * CConvertCdsWithGapsToMiscFeat creator
104  */
105 
106 bool CConvertCdsWithGapsToMiscFeat::Create( wxWindow* parent, wxWindowID id, const wxString& caption, const wxPoint& pos, const wxSize& size, long style )
107 {
108 ////@begin CConvertCdsWithGapsToMiscFeat creation
109  SetExtraStyle(wxWS_EX_BLOCK_EVENTS);
110  CBulkCmdDlg::Create( parent, id, caption, pos, size, style );
111 
112  CreateControls();
113  if (GetSizer())
114  {
115  GetSizer()->SetSizeHints(this);
116  }
117  Centre();
118 ////@end CConvertCdsWithGapsToMiscFeat creation
119  return true;
120 }
121 
122 
123 /*!
124  * CConvertCdsWithGapsToMiscFeat destructor
125  */
126 
128 {
129 ////@begin CConvertCdsWithGapsToMiscFeat destruction
130 ////@end CConvertCdsWithGapsToMiscFeat destruction
131 }
132 
133 
134 /*!
135  * Member initialisation
136  */
137 
139 {
140 ////@begin CConvertCdsWithGapsToMiscFeat member initialisation
142  m_OkCancel = NULL;
144 ////@end CConvertCdsWithGapsToMiscFeat member initialisation
146 
148  if (opt) {
149  opt->SetBool()->SetVal(true);
150  }
151 
152 }
153 
154 
155 /*!
156  * Control creation for CConvertCdsWithGapsToMiscFeat
157  */
158 
160 {
161 ////@begin CConvertCdsWithGapsToMiscFeat content construction
162  CConvertCdsWithGapsToMiscFeat* itemCBulkCmdDlg1 = this;
163 
164  wxBoxSizer* itemBoxSizer2 = new wxBoxSizer(wxVERTICAL);
165  itemCBulkCmdDlg1->SetSizer(itemBoxSizer2);
166 
167  m_RadioButton = new wxRadioButton( itemCBulkCmdDlg1, wxID_ANY, _("Convert only when internal gap covers 50% or more of the coding region"), wxDefaultPosition, wxDefaultSize, wxRB_GROUP );
168  m_RadioButton->SetValue(true);
169  itemBoxSizer2->Add(m_RadioButton, 0, wxALIGN_LEFT|wxALL, 5);
170 
171  wxRadioButton* itemRadioButton9 = new wxRadioButton( itemCBulkCmdDlg1, wxID_ANY, _("Convert all coding regions with gaps (both terminal and internal)"));
172  itemRadioButton9->SetValue(false);
173  itemBoxSizer2->Add(itemRadioButton9, 0, wxALIGN_LEFT|wxALL, 5);
174 
175  wxStaticText* itemStaticText1 = new wxStaticText( itemCBulkCmdDlg1, wxID_STATIC, _("Where feature text"), wxDefaultPosition, wxDefaultSize, 0 );
176  itemBoxSizer2->Add(itemStaticText1, 0, wxALIGN_LEFT|wxALL, 5);
177 
178  m_StringConstraintPanel = new CStringConstraintPanel( itemCBulkCmdDlg1, false, wxID_ANY, wxDefaultPosition, wxDefaultSize, 0 );
179  itemBoxSizer2->Add(m_StringConstraintPanel, 0, wxALIGN_LEFT|wxALL|wxFIXED_MINSIZE, 0);
180 
181  m_OkCancel = new COkCancelPanel( itemCBulkCmdDlg1, wxID_ANY, wxDefaultPosition, wxSize(100, 100), 0 );
182  itemBoxSizer2->Add(m_OkCancel, 0, wxALIGN_CENTER_HORIZONTAL|wxALL, 5);
183 
184 ////@end CConvertCdsWithGapsToMiscFeat content construction
185 }
186 
187 
188 /*!
189  * Should we show tooltips?
190  */
191 
193 {
194  return true;
195 }
196 
197 /*!
198  * Get bitmap resources
199  */
200 
201 wxBitmap CConvertCdsWithGapsToMiscFeat::GetBitmapResource( const wxString& name )
202 {
203  // Bitmap retrieval
204 ////@begin CConvertCdsWithGapsToMiscFeat bitmap retrieval
205  wxUnusedVar(name);
206  return wxNullBitmap;
207 ////@end CConvertCdsWithGapsToMiscFeat bitmap retrieval
208 }
209 
210 /*!
211  * Get icon resources
212  */
213 
215 {
216  // Icon retrieval
217 ////@begin CConvertCdsWithGapsToMiscFeat icon retrieval
218  wxUnusedVar(name);
219  return wxNullIcon;
220 ////@end CConvertCdsWithGapsToMiscFeat icon retrieval
221 }
222 
223 
225 {
226  CRef<CCmdComposite> cmd(new CCmdComposite("Convert CDS with gaps to misc_feat"));
227 
229 
230 
231  bool any_change = false;
232  bool any_present = false;
233 
234  for (objects::CFeat_CI feat_it(m_TopSeqEntry, CSeqFeatData::eSubtype_cdregion); feat_it; ++feat_it)
235  {
236  objects::CSeq_feat_Handle fh = feat_it->GetSeq_feat_Handle();
237  const objects::CSeq_loc& feat_loc = feat_it->GetLocation();
239 
240  CConstRef<objects::CSeqMap> seq_map = objects::CSeqMap::GetSeqMapForSeq_loc(*total_loc, &m_TopSeqEntry.GetScope());
241 
242  objects::CSeqMap_CI seq_map_ci = seq_map->ResolvedRangeIterator(&m_TopSeqEntry.GetScope(),
243  0,
244  objects::sequence::GetLength(feat_loc,&m_TopSeqEntry.GetScope()),
245  feat_loc.GetStrand(),
246  size_t(-1),
247  objects::CSeqMap::fFindGap);
248  //objects::CSeqMap_CI seq_map_ci = seq_map->BeginResolved(&m_TopSeqEntry.GetScope(), objects::SSeqMapSelector(objects::CSeqMap::fFindGap));
249 
250  bool gap_present(false);
251  for (; seq_map_ci; ++seq_map_ci)
252  {
253  if (seq_map_ci.GetType() == objects::CSeqMap::eSeqGap)
254  {
255  //TSeqPos start = seq_map_ci.GetPosition();
256  //TSeqPos length = seq_map_ci.GetLength();
257  gap_present = true;
258  }
259  }
260  bool match_constraint(true);
261  if (string_constraint)
262  {
263  CFlatFileConfig cfg;
264  cfg.SetNeverTranslateCDS();
265  string str = CFlatFileGenerator::GetSeqFeatText(*feat_it, m_TopSeqEntry.GetScope(), cfg);
266  match_constraint = string_constraint->DoesTextMatch(str);
267  }
268 
269  bool too_many_x(true);
270  if (m_RadioButton->GetValue())
271  {
272  too_many_x = CSplitCDSwithTooManyXs::TooManyXs(fh);
273  }
274 
275  if (gap_present && match_constraint && too_many_x)
276  {
277  CRef<CCmdComposite> subcmd = m_Converter->Convert(*fh.GetOriginalSeq_feat(), false, m_TopSeqEntry.GetScope());
278  if (subcmd)
279  {
280  cmd->AddCommand(*subcmd);
281  any_change = true;
282  }
283  any_present = true;
284  }
285  }
286  if (!any_present)
287  {
288  m_ErrorMessage = "No features found!";
289  cmd.Reset(NULL);
290  }
291  else if (!any_change)
292  {
293  m_ErrorMessage = "All conversions failed.";
294  cmd.Reset(NULL);
295  }
296  return cmd;
297 }
298 
299 
300 
302 {
303  return m_ErrorMessage;
304 }
305 
306 
307 
308 CRef<CCmdComposite> CConvertBadCdsAndRnaToMiscFeat::apply(wxWindow *parent, objects::CSeq_entry_Handle seh)
309 {
310  CRef<CCmdComposite> cmd(new CCmdComposite("Convert bad CDS and mRNA to misc_feat"));
311  CScope &scope = seh.GetScope();
312  int modified = 0;
313  int total = 0;
314  string msg;
315 
317 
319  if (opt) {
320  opt->SetBool()->SetVal(true);
321  }
322  for (objects::CFeat_CI feat_it(seh); feat_it; ++feat_it)
323  {
324  objects::CSeq_feat_Handle fh = feat_it->GetSeq_feat_Handle();
325  if (fh.GetFeatType() == CSeqFeatData::e_Cdregion || fh.GetFeatType() == CSeqFeatData::e_Rna)
326  {
327  total++;
328  const CSeq_feat &feat = *fh.GetOriginalSeq_feat();
329  CBioseq_Handle bsh = scope.GetBioseqHandle(feat.GetLocation());
330  bool cds_validation = fh.GetFeatType() == CSeqFeatData::e_Cdregion && (validator::HasStopInProtein(feat, scope) ||
331  validator::HasInternalStop(feat, scope, false) ||
332  validator::HasBadStartCodon(feat, scope, false) ||
333  validator::HasNoStop(feat, &scope) ||
334  validator::HasBadProteinStart(feat, scope));
335  bool disc_short_rrna = getShortRRNA(feat, scope);
336  bool disc_rna_cds_overlap = getCdsRnaOverlap(feat, bsh);
337  if (cds_validation ||
339  disc_short_rrna ||
340  disc_rna_cds_overlap
341  )
342  {
343  CRef<CCmdComposite> subcmd = converter->Convert(*fh.GetOriginalSeq_feat(), false, scope);
344  if (subcmd)
345  {
346  cmd->AddCommand(*subcmd);
347  modified++;
348  msg += edit::GetTextObjectDescription(feat, scope) + " converted to misc_feature\n";
349  //cout << NDiscrepancy::CReportObj::GetTextObjectDescription(feat, scope);
350  //cout << " " << validator::HasStopInProtein(feat, scope);
351  //cout << " " << validator::HasInternalStop(feat, scope, false);
352  //cout << " " << validator::HasBadStartCodon(feat, scope, false);
353  //cout << " " << validator::HasNoStop(feat, &scope);
354  //cout << " " << validator::HasBadProteinStart(feat, scope);
355  //cout << " " << getBacterialPartialNonextendable(feat, bsh);
356  //cout << " " << disc_short_rrna;
357  //cout << " " << disc_rna_cds_overlap << endl;
358  if (disc_short_rrna || disc_rna_cds_overlap)
359  {
360  CSeq_feat_Handle gene;
361  if (fh.GetFeatType() == CSeqFeatData::e_Cdregion)
363  else
364  {
366  if (gene_feat)
367  gene = scope.GetSeq_featHandle(*gene_feat, CScope::eMissing_Null);
368  }
369  if (gene)
370  cmd->AddCommand(*CRef<CCmdDelSeq_feat>(new CCmdDelSeq_feat(gene)));
371  }
372  }
373  }
374  }
375  }
376 
377  if (modified == 0 || modified * 2 > total)
378  {
379  cmd.Reset();
380  if (modified * 2 > total)
381  wxMessageBox(_("More than 50% of coding regions and RNA features are bad"), wxT("Info"), wxOK | wxICON_INFORMATION);
382  }
383  else
384  {
385  CGenericReportDlg* report = new CGenericReportDlg(parent);
386  report->SetTitle(wxT("Feature Conversions"));
387  report->SetText(msg);
388  report->Show();
389  }
390 
391  return cmd;
392 }
393 
394 // BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS
395 
397 {
398  bool rval = false;
399  if (left < 3) {
400  rval = true;
401  extend_len = left;
402  } else if (seq.IsSetInst() && seq.GetInst().IsSetRepr() &&
404  seq.GetInst().IsSetExt() &&
405  seq.GetInst().GetExt().IsDelta()) {
406  TSeqPos offset = 0;
407  TSeqPos last_gap_stop = 0;
408  ITERATE(CDelta_ext::Tdata, it, seq.GetInst().GetExt().GetDelta().Get()) {
409  if ((*it)->IsLiteral()) {
410  offset += (*it)->GetLiteral().GetLength();
411  if (!(*it)->GetLiteral().IsSetSeq_data()) {
412  last_gap_stop = offset;
413  } else if ((*it)->GetLiteral().GetSeq_data().IsGap()) {
414  last_gap_stop = offset;
415  }
416  } else if ((*it)->IsLoc()) {
417  offset += sequence::GetLength((*it)->GetLoc(), scope);
418  }
419  if (offset > left) {
420  break;
421  }
422  }
423  if (left - last_gap_stop < 3) {
424  rval = true;
425  extend_len = left - last_gap_stop;
426  }
427  }
428  return rval;
429 }
430 
431 
433 {
434  bool rval = false;
435  if (right > seq.GetLength() - 4) {
436  rval = true;
437  extend_len = seq.GetLength() - right - 1;
438  } else if (seq.IsSetInst() && seq.GetInst().IsSetRepr() &&
440  seq.GetInst().IsSetExt() &&
441  seq.GetInst().GetExt().IsDelta()) {
442  TSeqPos offset = 0;
443  TSeqPos next_gap_start = 0;
444  ITERATE(CDelta_ext::Tdata, it, seq.GetInst().GetExt().GetDelta().Get()) {
445  if ((*it)->IsLiteral()) {
446  if (!(*it)->GetLiteral().IsSetSeq_data()) {
447  next_gap_start = offset;
448  } else if ((*it)->GetLiteral().GetSeq_data().IsGap()) {
449  next_gap_start = offset;
450  }
451  offset += (*it)->GetLiteral().GetLength();
452  } else if ((*it)->IsLoc()) {
453  offset += sequence::GetLength((*it)->GetLoc(), scope);
454  }
455  if (offset > right + 3) {
456  break;
457  }
458  }
459  if (next_gap_start > right && next_gap_start - right < 3) {
460  rval = true;
461  extend_len = next_gap_start - right;
462  }
463  }
464  return rval;
465 }
466 
467 
469 {
470  bool rval = false;
472  TSeqPos start = loc.GetStart(eExtreme_Positional);
473  if (start > 0) {
474  TSeqPos extend_len = 0;
475  if (!IsExtendableLeft(start, seq, scope, extend_len)) {
476  rval = true;
477  }
478  }
479  }
480  if (!rval && loc.IsPartialStop(eExtreme_Positional)) {
481  TSeqPos stop = loc.GetStop(eExtreme_Positional);
482  if (stop < seq.GetLength() - 1) {
483  TSeqPos extend_len = 0;
484  if (!IsExtendableRight(stop, seq, scope, extend_len)) {
485  rval = true;
486  }
487  }
488  }
489  return rval;
490 }
491 
492 
494 {
495  if (bsh.IsAa())
496  return false;
497 
498  for (CSeqdesc_CI source_ci(bsh, CSeqdesc::e_Source); source_ci; ++source_ci)
499  {
500  const CBioSource &biosource = source_ci->GetSource();
501  if (biosource.IsSetLineage() && NStr::FindNoCase(biosource.GetLineage(), "Eukaryota") != string::npos)
502  return false;
503  }
504 
505  //only examine coding regions
506  if (!feat.IsSetData() || !feat.GetData().IsCdregion()) {
507  return false;
508  }
509  //ignore if feature already has exception
510  if (feat.IsSetExcept_text() && NStr::FindNoCase(feat.GetExcept_text(), "unextendable partial coding region") != string::npos) {
511  return false;
512  }
513  CConstRef<CBioseq> seq = bsh.GetBioseqCore();
514 
515  return IsNonExtendable(feat.GetLocation(), *seq, &(bsh.GetScope()));
516 }
517 
518 
519 // DISC_SHORT_RRNA
520 typedef pair<size_t, bool> TRNALength;
522 
524  { "16S", { 1000, false } },
525  { "18S", { 1000, false } },
526  { "23S", { 2000, false } },
527  { "25S", { 1000, false } },
528  { "26S", { 1000, false } },
529  { "28S", { 1000, false } },
530  { "28S", { 3300, false } },
531  { "small", { 1000, false } },
532  { "large", { 1000, false } },
533  { "5.8S", { 130, true } },
534  { "5S", { 90, true } }
535 };
536 
537 
539 {
540  if (f.GetData().GetSubtype() != CSeqFeatData::eSubtype_rRNA) {
541  return false;
542  }
543 
544  bool is_bad = false;
545 
546  size_t len = sequence::GetLength(f.GetLocation(), scope);
547 
548  string rrna_name = f.GetData().GetRna().GetRnaProductName();
549 
551  if (NStr::FindNoCase(rrna_name, it->first) != string::npos &&
552  len < it->second.first &&
553  (!it->second.second || (f.IsSetPartial() && f.GetPartial())) ) {
554  is_bad = true;
555  break;
556  }
557  }
558 
559  return is_bad;
560 }
561 
562 
564 {
565  return !feat.IsSetPartial() && IsShortrRNA(feat, &scope);
566 }
567 
568 
569 // DISC_RNA_CDS_OVERLAP
570 
572 {
573  CScope &scope = bsh.GetScope();
574 
575  bool is_eukaryotic = false;
576  for (CSeqdesc_CI source_ci(bsh, CSeqdesc::e_Source); source_ci; ++source_ci)
577  {
578  const CBioSource &biosource = source_ci->GetSource();
579  CBioSource::EGenome genome = (CBioSource::EGenome) biosource.GetGenome();
582  && genome != CBioSource::eGenome_plastid
583  && genome != CBioSource::eGenome_apicoplast
584  && (biosource.IsSetLineage() && NStr::FindNoCase(biosource.GetLineage(), "Eukaryota") != string::npos) )
585  is_eukaryotic = true;;
586  }
587 
588 
589  for (objects::CFeat_CI feat_it(bsh, CSeqFeatData::e_Rna); feat_it; ++feat_it)
590  {
591  objects::CSeq_feat_Handle fh = feat_it->GetSeq_feat_Handle();
592 
593  const CSeq_loc& loc_i = fh.GetLocation();
594  CSeqFeatData::ESubtype subtype = fh.GetFeatSubtype();
595  if (subtype == CSeqFeatData::eSubtype_tRNA)
596  {
597 
598  if (is_eukaryotic)
599  {
600  continue;
601  }
602  }
603  else if (subtype == CSeqFeatData::eSubtype_mRNA || subtype == CSeqFeatData::eSubtype_ncRNA)
604  {
605  continue;
606  }
607  else if (subtype == CSeqFeatData::eSubtype_rRNA)
608  {
609  size_t len = sequence::GetLength(loc_i, &scope);
610  string rrna_name = fh.GetData().GetRna().GetRnaProductName();
611  bool is_bad = false;
613  {
614  if (NStr::FindNoCase(rrna_name, it->first) != string::npos && len < it->second.first && (!it->second.second || (fh.IsSetPartial() && fh.GetPartial())) )
615  {
616  is_bad = true;
617  break;
618  }
619  }
620  if (is_bad)
621  {
622  continue;
623  }
624  }
625 
626 
627 
628 
629  const CSeq_loc& loc_j = cds.GetLocation();
630  sequence::ECompare compare;
631  {
632  CSeq_loc::TRange r1 = loc_j.GetTotalRange();
633  CSeq_loc::TRange r2 = loc_i.GetTotalRange();
634  if (r1.GetFrom() >= r2.GetToOpen() || r2.GetFrom() >= r1.GetToOpen())
635  {
636  compare = sequence::eNoOverlap;
637  }
638  else
639  {
640  compare = sequence::Compare(loc_j, loc_i, &scope, sequence::fCompareOverlapping);
641  }
642  }
643 
644 
645  if (compare == sequence::eSame)
646  {
647  return true;
648  }
649  else if (compare == sequence::eContained)
650  {
651  return true; // no Fatal();
652  }
653  else if (compare == sequence::eContains)
654  {
655  if (fh.GetFeatSubtype() == CSeqFeatData::eSubtype_tRNA)
656  {
657  return true;
658  }
659  else
660  {
661  return true;
662  }
663  }
664  else if (compare != sequence::eNoOverlap)
665  {
666  ENa_strand cds_strand = loc_j.GetStrand();
667  ENa_strand rna_strand = loc_i.GetStrand();
668  if (cds_strand == eNa_strand_minus && rna_strand != eNa_strand_minus)
669  {
670  return true; // no Fatal();
671  }
672  else if (cds_strand != eNa_strand_minus && rna_strand == eNa_strand_minus)
673  {
674  return true; // no Fatal();
675  }
676  else
677  {
678  return true; // no Fatal();
679  }
680  }
681  }
682  return false;
683 }
684 
686 {
687  CScope &scope = bsh.GetScope();
688 
689  bool is_eukaryotic = false;
690  for (CSeqdesc_CI source_ci(bsh, CSeqdesc::e_Source); source_ci; ++source_ci)
691  {
692  const CBioSource &biosource = source_ci->GetSource();
693  CBioSource::EGenome genome = (CBioSource::EGenome) biosource.GetGenome();
696  && genome != CBioSource::eGenome_plastid
697  && genome != CBioSource::eGenome_apicoplast
698  && (biosource.IsSetLineage() && NStr::FindNoCase(biosource.GetLineage(), "Eukaryota") != string::npos) )
699  is_eukaryotic = true;;
700  }
701 
702 
703 
704  const CSeq_loc& loc_i = rna.GetLocation();
705  CSeqFeatData::ESubtype subtype = rna.GetData().GetSubtype();
706  if (subtype == CSeqFeatData::eSubtype_tRNA)
707  {
708 
709  if (is_eukaryotic)
710  {
711  return false;
712  }
713  }
714  else if (subtype == CSeqFeatData::eSubtype_mRNA || subtype == CSeqFeatData::eSubtype_ncRNA)
715  {
716  return false;
717  }
718  else if (subtype == CSeqFeatData::eSubtype_rRNA)
719  {
720  size_t len = sequence::GetLength(loc_i, &scope);
721  string rrna_name = rna.GetData().GetRna().GetRnaProductName();
722  bool is_bad = false;
724  {
725  if (NStr::FindNoCase(rrna_name, it->first) != string::npos && len < it->second.first && (!it->second.second || (rna.IsSetPartial() && rna.GetPartial())) )
726  {
727  is_bad = true;
728  break;
729  }
730  }
731  if (is_bad)
732  {
733  return false;
734  }
735  }
736 
737 
738  for (objects::CFeat_CI feat_it(bsh, CSeqFeatData::e_Cdregion); feat_it; ++feat_it)
739  {
740  objects::CSeq_feat_Handle fh = feat_it->GetSeq_feat_Handle();
741 
742  const CSeq_loc& loc_j = fh.GetLocation();
743  sequence::ECompare compare;
744  {
745  CSeq_loc::TRange r1 = loc_j.GetTotalRange();
746  CSeq_loc::TRange r2 = loc_i.GetTotalRange();
747  if (r1.GetFrom() >= r2.GetToOpen() || r2.GetFrom() >= r1.GetToOpen())
748  {
749  compare = sequence::eNoOverlap;
750  }
751  else
752  {
753  compare = sequence::Compare(loc_j, loc_i, &scope, sequence::fCompareOverlapping);
754  }
755  }
756 
757 
758  if (compare == sequence::eSame)
759  {
760  return true;
761  }
762  else if (compare == sequence::eContained)
763  {
764  return true; // no Fatal();
765  }
766  else if (compare == sequence::eContains)
767  {
768  if (fh.GetFeatSubtype() == CSeqFeatData::eSubtype_tRNA)
769  {
770  return true;
771  }
772  else
773  {
774  return true;
775  }
776  }
777  else if (compare != sequence::eNoOverlap)
778  {
779  ENa_strand cds_strand = loc_j.GetStrand();
780  ENa_strand rna_strand = loc_i.GetStrand();
781  if (cds_strand == eNa_strand_minus && rna_strand != eNa_strand_minus)
782  {
783  return true; // no Fatal();
784  }
785  else if (cds_strand != eNa_strand_minus && rna_strand == eNa_strand_minus)
786  {
787  return true; // no Fatal();
788  }
789  else
790  {
791  return true; // no Fatal();
792  }
793  }
794  }
795  return false;
796 }
797 
799 {
800  if (feat.IsSetData() && feat.GetData().IsCdregion())
801  return getCdsOverlap(feat, bsh);
802  if (feat.IsSetData() && feat.GetData().IsRna())
803  return getRnaOverlap(feat, bsh);
804  return false;
805 }
806 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
User-defined methods of the data storage class.
const string & GetLineage(void) const
Definition: BioSource.cpp:360
bool IsSetLineage(void) const
Definition: BioSource.cpp:355
CBioseq_Handle –.
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
objects::CSeq_entry_Handle m_TopSeqEntry
bool Create(wxWindow *parent, wxWindowID id, const wxString &title, const wxPoint &pos=wxDefaultPosition, const wxSize &size=wxDefaultSize, long style=wxDEFAULT_FRAME_STYLE, const wxString &name=wxFrameNameStr)
string m_ErrorMessage
static bool IsExtendableRight(TSeqPos right, const CBioseq &seq, CScope *scope, TSeqPos &extend_len)
static bool IsNonExtendable(const CSeq_loc &loc, const CBioseq &seq, CScope *scope)
static bool getRnaOverlap(const CSeq_feat &rna, CBioseq_Handle bsh)
static bool IsShortrRNA(const CSeq_feat &f, CScope *scope)
static CRef< CCmdComposite > apply(wxWindow *parent, objects::CSeq_entry_Handle seh)
static bool getBacterialPartialNonextendable(const CSeq_feat &feat, CBioseq_Handle bsh)
static bool getShortRRNA(const CSeq_feat &feat, CScope &scope)
static bool getCdsRnaOverlap(const CSeq_feat &feat, CBioseq_Handle bsh)
static bool getCdsOverlap(const CSeq_feat &cds, CBioseq_Handle bsh)
static bool IsExtendableLeft(TSeqPos left, const CBioseq &seq, CScope *scope, TSeqPos &extend_len)
virtual CRef< CCmdComposite > Convert(const objects::CSeq_feat &orig, bool keep_orig, objects::CScope &scope)
static bool ShowToolTips()
Should we show tooltips?
void Init()
Initialises member variables.
wxBitmap GetBitmapResource(const wxString &name)
Retrieves bitmap resources.
wxIcon GetIconResource(const wxString &name)
Retrieves icon resources.
bool Create(wxWindow *parent, wxWindowID id=wxID_ANY, const wxString &caption=_("Convert Coding Regions With Gaps to Misc Features"), const wxPoint &pos=wxDefaultPosition, const wxSize &size=wxSize(400, 300), long style=wxCAPTION|wxRESIZE_BORDER|wxSYSTEM_MENU|wxCLOSE_BOX|wxTAB_TRAVERSAL)
Creation.
void CreateControls()
Creates the controls and sizers.
CRef< CConversionOption > FindRemoveTranscriptId()
CFlatFileConfig & SetNeverTranslateCDS(bool val=true)
static string GetSeqFeatText(const CMappedFeat &feat, CScope &scope, const CFlatFileConfig &cfg, CRef< feature::CFeatTree > ftree=null)
void SetText(const wxString &text)
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static bool TooManyXs(objects::CSeq_feat_Handle fh)
CRef< edit::CStringConstraint > GetStringConstraint()
IWorkbench is the central interface in the application framework.
Definition: workbench.hpp:113
Definition: map.hpp:338
USING_SCOPE(objects)
map< const char *, TRNALength > TRNALengthMap
static const TRNALengthMap kTrnaLengthMap
pair< size_t, bool > TRNALength
#define _(proto)
Definition: ct_nlmzip_i.h:78
#define wxFIXED_MINSIZE
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
static void Init(void)
Definition: cursor6.c:76
static const char * str(char *buf, int n)
Definition: stats.c:84
int offset
Definition: replacements.h:160
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
@ fMerge_SingleRange
Definition: Seq_loc.hpp:332
CMappedFeat GetBestGeneForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0, CFeatTree::EBestGeneType lookup_type=CFeatTree::eBestGene_TreeOnly)
Definition: feature.cpp:3321
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
ECompare
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
@ eNoOverlap
CSeq_locs do not overlap or abut.
CConstRef< CSeq_feat > GetOverlappingGene(const CSeq_loc &loc, CScope &scope, ETransSplicing eTransSplicing=eTransSplicing_Auto)
Definition: sequence.cpp:1366
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
Definition: scope.cpp:200
@ eMissing_Null
Definition: scope.hpp:157
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
bool IsAa(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
const CSeq_feat_Handle & GetSeq_feat_Handle(void) const
Get original feature handle.
Definition: mapped_feat.hpp:71
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
position_type GetToOpen(void) const
Definition: range.hpp:138
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993
TGenome GetGenome(void) const
Get the Genome member data.
Definition: BioSource_.hpp:422
EGenome
biological context
Definition: BioSource_.hpp:97
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:913
bool IsCdregion(void) const
Check if variant Cdregion is selected.
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
Definition: Seq_feat_.hpp:943
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
Definition: Seq_feat_.hpp:1405
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
Definition: Seq_feat_.hpp:1393
bool IsRna(void) const
Check if variant Rna is selected.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
TRepr GetRepr(void) const
Get the Repr member data.
Definition: Seq_inst_.hpp:565
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
Definition: Seq_inst_.hpp:546
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
Definition: Seq_inst_.hpp:826
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:324
bool IsDelta(void) const
Check if variant Delta is selected.
Definition: Seq_ext_.hpp:336
const TExt & GetExt(void) const
Get the Ext member data.
Definition: Seq_inst_.hpp:838
const TDelta & GetDelta(void) const
Get the variant data.
Definition: Seq_ext_.cpp:180
const Tdata & Get(void) const
Get the member data.
Definition: Delta_ext_.hpp:164
list< CRef< CDelta_seq > > Tdata
Definition: Delta_ext_.hpp:89
@ eRepr_delta
sequence made by changes (delta) to others
Definition: Seq_inst_.hpp:100
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
END_EVENT_TABLE()
int len
#define wxT(x)
Definition: muParser.cpp:41
const struct ncbi::grid::netcache::search::fields::SIZE size
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
bool HasNoStop(const CSeq_feat &feat, CScope *scope)
Definition: utilities.cpp:2673
bool HasBadProteinStart(const CSeqVector &sv)
Definition: utilities.cpp:2467
bool HasInternalStop(const CSeq_feat &feat, CScope &scope, bool ignore_exceptions)
Definition: utilities.cpp:2416
bool HasStopInProtein(const CSeq_feat &feat, CScope &scope)
Definition: utilities.cpp:2519
bool HasBadStartCodon(const CSeq_feat &feat, CScope &scope, bool ignore_exceptions)
Definition: utilities.cpp:2362
static static static wxID_ANY
string GetTextObjectDescription(const CSeq_feat &sf, CScope &scope)
Modified on Tue May 21 10:54:51 2024 by modify_doxy.py rev. 669887