/* $Id: create_gene_model_tool_manager.cpp 47080 2022-07-22 18:11:54Z asztalos $
2  * ===========================================================================
3  *
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio
27  *
28  * File Description:
29  *
30  */
32 #include <ncbi_pch.hpp>
36 #include <gui/core/document.hpp>
43 #include <gui/objutils/label.hpp>
51 #include <gui/objutils/label.hpp>
53 #include <objmgr/util/sequence.hpp>
63 : CAlgoToolManagerBase("Create Gene Model",
64  "",
65  "Create a gene model from an alignment",
66  "Create a new gene model based on one or more "
67  "alignments of related transcripts to a genome",
68  "",
69  "Alignment Creation"),
70  m_ParamsPanel(NULL)
71 {
72 }
76 {
77  return "create_gene_model_tool_manager";
78 }
82 {
83  return "Create Gene Model Tool";
84 }
88 {
92 }
96 {
100 }
104 {
105  if(m_ParamsPanel == NULL) {
109  m_ParamsPanel->Hide(); // to reduce flicker
113  m_ParamsPanel->SetRegistryPath(m_RegPath + ".ParamsPanel");
115  }
116  return true;
117 }
121 {
122  string err;
123  if(m_Params.m_Alignments.size() == 0) {
124  err = "Please select at least one set of alignments!";
125  }
126  if( ! err.empty()) {
128  return false;
129  }
130  return true;
131 }
135 {
136  m_Params.m_Alignments.clear();
137  m_Alignments.clear();
138  ITERATE(vector<TConstScopedObjects>, it, m_InputObjects) {
139  ITERATE(TConstScopedObjects, it2, *it) {
140  const CSeq_align* align = dynamic_cast<const CSeq_align*>(it2->object.GetPointerOrNull());
141  if (align) m_Alignments.push_back(*it2);
142  }
143  }
145  if (m_Alignments.empty()) {
146  x_ConvertInputObjects(CSeq_align::GetTypeInfo(), m_Alignments);
147  }
150 }
153 {
154  TConstScopedObjects temp;
155  for (auto& it: m_Alignments) {
156  if (find(temp.begin(), temp.end(), it) == temp.end()) {
157  temp.push_back(it);
158  }
159  }
161  m_Alignments.assign(temp.begin(), temp.end());
162 }
166 {
167  return m_ParamsPanel;
168 }
172 {
173  return &m_Params;
174 }
178 {
179  return 0;
180 }
183 {
184  if (m_ProjectSelPanel)
186  CSelectProjectOptions options;
190  CRef<CCreateGeneModelTask> task(new CCreateGeneModelTask("Creating gene models", srv, options, m_Params));
191  return task.Release();
192 }
194 //////////////////////////////////////////////////////////////
195 ///SGeneCdsmRnaFeats
198 {
199  CConstRef<CSeq_id> seq_id;
200  if (gene) {
201  CSeq_id_Handle gene_idh = sequence::GetIdHandle(gene->GetLocation(), scope);
202  if (gene_idh) {
203  seq_id = gene_idh.GetSeqId();
204  }
205  }
206  if (!seq_id && cds) {
207  CSeq_id_Handle cds_idh = sequence::GetIdHandle(cds->GetLocation(), scope);
208  if (cds_idh) {
209  seq_id = cds_idh.GetSeqId();
210  if (!cds_idh.GetSeqId()->Match(*seq_id)) {
211  NCBI_USER_THROW("CDS and Gene Seq_ids are not matching!");
212  }
213  }
214  }
215  if (!seq_id && mRNA) {
216  CSeq_id_Handle mRNA_idh = sequence::GetIdHandle(mRNA->GetLocation(), scope);
217  if (mRNA_idh) {
218  seq_id = mRNA_idh.GetSeqId();
219  if (!mRNA_idh.GetSeqId()->Match(*seq_id)) {
220  NCBI_USER_THROW("mRNA and Gene Seq_ids are not matching!");
221  }
222  }
223  }
224  return seq_id;
225 }
228 {
230  if (gene && mRNA) {
231  s_CreateDBXref(*(gene), *(mRNA), dbname);
232  s_CreateXRefLink(*(mRNA), *(gene));
233  s_CreateXRefLink(*(gene), *(mRNA));
234  }
235  if (gene && cds) {
236  s_CreateDBXref(*(gene), *(cds), dbname);
237  s_CreateXRefLink(*(cds), *(gene));
238  s_CreateXRefLink(*(gene), *(cds));
239  }
240  if (cds && mRNA) {
241  s_CreateXRefLink(*(cds), *(mRNA));
242  s_CreateXRefLink(*(mRNA), *(cds));
243  }
244 }
247 {
249  xref->SetId(to_feat.SetId());
250  from_feat.SetXref().push_back(xref);
251 }
253 void SGeneCdsmRnaFeats::s_CreateDBXref(const CSeq_feat& from_feat, CSeq_feat& to_feat, const string& dbname)
254 {
255  if (NStr::IsBlank(dbname) || !from_feat.IsSetDbxref())
256  return;
259  if (tag && tag->IsSetTag()) {
260  if (tag->GetTag().IsId()) {
261  CObject_id::TId id = tag->GetTag().GetId();
262  to_feat.AddDbxref(dbname, id);
263  } else if (tag->GetTag().IsStr()) {
264  CObject_id::TStr str = tag->GetTag().GetStr();
265  to_feat.AddDbxref(dbname, str);
266  }
267  }
268 }
271 ///////////////////////////////////////////////////////////////
272 /// CCreateGeneModelTask
274 const string CCreateGeneModelTask::sGeneDbName("GeneID");
277 {
278 }
281 {
283  if (cmd) {
284  if (m_Params.m_MergeResults) {
287  _ASSERT(m_Scope);
289  CRef<CGBWorkspace> ws = m_Service->GetGBWorkspace();
290  if (!ws) return eFailed;
292  CGBDocument* doc = dynamic_cast<CGBDocument*>(ws->GetProjectFromScope(*m_Scope));
293  if (!doc) return eFailed;
295  ICommandProccessor* cmdProcessor = &doc->GetUndoManager();
296  if (!cmdProcessor) return eFailed;
298  cmdProcessor->Execute(cmd);
299  return eCompleted;
301  } else {
302  if (!m_Items.empty()) {
307  return eCanceled;
309  }
310  return eCompleted;
311  }
312  }
314  return eFailed;
315 }
317 static string s_RetrieveMessage(const string& msg)
318 {
319  if (NStr::IsBlank(msg))
320  return msg;
322  string error(msg);
323  string msg_gmodel("CreateGeneModelFromAlign()");
324  if (NStr::StartsWith(error, msg_gmodel)) {
325  error = error.substr(msg_gmodel.length());
327  }
329  if (!NStr::IsBlank(error)
330  && isalpha((unsigned char)error[0])
331  && islower((unsigned char)error[0])) {
332  error[0] = Upcase(error[0]);
333  }
334  return error;
335 }
338 {
340  ///
341  /// assure we're all in one scope
342  ///
343  CRef<CScope> scope;
344  {{
345  ITERATE( TConstScopedObjects, iter, aligns ){
346  if( !scope ){
347  scope.Reset( const_cast<CScope*>(&*iter->scope) );
348  } else if( scope != &*iter->scope ){
349  scope.Reset();
351  "All alignments must be within the same project"
352  );
353  }
354  }
355  }}
357  m_Scope = scope;
359  ///
360  /// meat goes here
361  ///
363  if( m_Params.m_CreateGene ){
365  }
366  if( m_Params.m_CreateMrna ){
368  }
369  if( m_Params.m_CreateCds ){
371  }
374  }
377  }
378  if( m_Params.m_TranslateCds ){
380  }
383  }
385  bool f_group_by_gene_id = m_Params.m_GroupByGeneId;
386  f_group_by_gene_id &= (opts & CGeneModel::fCreateGene) != 0;
389  list< CRef<CSeq_annot> > annots;
390  CRef<CSeq_entry> entry;
392  TGeneCDSmRNAList generatedFeats;
393  ITERATE( TConstScopedObjects, iter, aligns ){
394  const CSeq_align& align = dynamic_cast<const CSeq_align&>(*iter->object);
395  try
396  {
397  CRef<CSeq_annot> annot( new CSeq_annot() );
398  CRef<CBioseq_set> translated_proteins( new CBioseq_set() );
399  CFeatureGenerator FeatureGenerator(*m_Scope);
401  FeatureGenerator.SetFlags(opts);
402  CConstRef<CSeq_align> clean_align = FeatureGenerator.CleanAlignment(align);
403  FeatureGenerator.ConvertAlignToAnnot(*clean_align, *annot, *translated_proteins);
405  if( annot->IsSetData() ){
406  annots.push_back( annot );
407  if( translated_proteins->IsSetSeq_set() ){
408  if( !entry ){
409  entry.Reset( new CSeq_entry() );
410  }
411  CRef<CSeq_entry> proteins( new CSeq_entry() );
412  proteins->SetSet( *translated_proteins );
413  entry->SetSet().SetSeq_set().push_back( proteins );
414  }
415  x_StoreGeneratedFeats(*annot, generatedFeats);
416  }
417  } catch( CException& e ){
418  LOG_POST( Error << "CreateGeneModel tool failed: " << e.GetMsg() );
419  string msg = s_RetrieveMessage(e.GetMsg());
420  NcbiErrorBox(msg) ;
421  return CRef<CCmdComposite>();
422  }
423  }
425  if (annots.empty())
426  return CRef<CCmdComposite>();
429  ITERATE(TGeneCDSmRNAList, it, generatedFeats) {
430  CConstRef<CSeq_id> id = (*it).GetID(scope);
432  m_GeneModelMap[idh].push_back(*it);
433  }
434  ///
435  /// first, separate by seq-id
436  /// we plan to create a single annotation for each placed sequence
437  ///
438  typedef map<CSeq_id_Handle, list< CRef<CSeq_feat> > > TFeatMap;
439  TFeatMap feats;
441  CRef<CScope> gb_scope;
442  NON_CONST_ITERATE (list< CRef<CSeq_annot> >, iter, annots) {
443  NON_CONST_ITERATE (CSeq_annot::TData::TFtable, i, (*iter)->SetData().SetFtable()) {
444  CSeq_feat& feat = **i;
445  CSeq_id_Handle idh = sequence::GetIdHandle(feat.GetLocation(), scope);
446  if (idh) { // check for an empty handle (e.g., when the location contains more than one id)
448  do { // convert eligible product to GIs
450  || !feat.CanGetProduct()
451  || !feat.GetProduct().IsWhole()
452  || feat.GetProduct().GetWhole().IsGi())
453  break;
455  CBioseq_Handle bsh = scope->GetBioseqHandle(feat.GetProduct().GetWhole());
456  if (!bsh)
457  break;
458  if (bsh.GetTSE_Handle().CanBeEdited()) // was edited with OM methods
459  break;
460  // Available in SC-15
461  //if (bsh.GetTSE_Handle().GetDataLoader() == 0) // was added to the scope by AddBioseq etc.
462  // break;
464  if (!gb_scope) {
466  gb_scope.Reset(new CScope(*obj_mgr));
467  gb_scope->AddDefaults();
468  }
469  TGi gi = sequence::GetGiForId(feat.GetProduct().GetWhole(), *gb_scope);
470  if (gi > ZERO_GI)
471  feat.SetProduct().SetWhole().SetGi(gi);
472  } while (false);
475  // set all seq-id elements to be represented as GI numbers (CR0001)
476  const CSeq_id* seq_id = feat.GetLocation().GetId();
477  if (seq_id && !seq_id->IsGi()) {
478  TGi loc_gi = sequence::GetGiForId(*seq_id, *scope);
479  if (loc_gi > ZERO_GI) {
480  CRef<CSeq_id> new_id(new CSeq_id);
481  new_id->SetGi(loc_gi);
482  feat.SetLocation().SetId(*new_id);
483  }
484  }
486  if (feat.GetData().IsCdregion()) {
487  CCdregion& cds = feat.SetData().SetCdregion();
488  // set CDS genetic code id to one by default (CR0022)
489  if (!cds.IsSetCode()) {
490  int id = 1;
492  code->SetId(id);
493  cds.SetCode(*code);
494  }
495  // set CDS frame to one by default (CR0019)
496  if (!cds.IsSetFrame()) {
498  }
499  }
501  feats[idh].push_back(CRef<CSeq_feat>(&feat));
502  }
503  }
504  }
505  annots.clear();
507  _ASSERT(m_GeneModelMap.size() == feats.size());
509  CTime time(CTime::eCurrent);
510  if (feats.empty())
511  return CRef<CCmdComposite>();
514  CRef<CCmdComposite> cmd(new CCmdComposite("Create New Annotation from Alignment"));
515  NON_CONST_ITERATE( TFeatMap, iter, feats ){
516  list< CRef<CSeq_feat> >::const_iterator feat_it = iter->second.begin();
517  if( feat_it == iter->second.end() ){
518  continue;
519  }
521  TSeqRange range = (*feat_it)->GetLocation().GetTotalRange();
522  for( ++feat_it; feat_it != iter->second.end(); ++feat_it ){
523  range += (*feat_it)->GetLocation().GetTotalRange();
524  }
526  CSeq_id_Handle idh = iter->first;
527  CRef<CSeq_annot> annot(new CSeq_annot());
528  if (!m_Params.m_MergeResults) {
529  string label;
530  CSeq_id_Handle idhbest = sequence::GetId( idh, *scope, sequence::eGetId_Best );
531  idhbest.GetSeqId()->GetLabel( &label, CSeq_id::eContent );
532  label += " (";
533  label += NStr::IntToString( range.GetFrom() + 1, NStr::fWithCommas );
534  label += "..";
535  label += NStr::IntToString( range.GetTo() + 1, NStr::fWithCommas );
536  label += ") ";
537  label += "Gene Models from Alignments";
538  annot->SetNameDesc( "Gene Models from Alignments" );
539  annot->SetTitleDesc( label );
540  annot->SetCreateDate( time );
541  }
542  annot->SetData().SetFtable().insert(
543  annot->SetData().SetFtable().end(),
544  iter->second.begin(), iter->second.end());
546  ///
547  /// make sure we deal with single gene requirements
548  ///
549  if( f_group_by_gene_id ){
550  x_Group_By_GeneID(*annot, model_iter->first);
551  }
553  annots.push_back( annot );
555  if (m_Params.m_MergeResults) {
556  CBioseq_Handle bsh = scope->GetBioseqHandle(idh);
557  if (bsh) {
560  if ( m_Params.m_CreateGene ) {
561  CObject_id::TId max_id = s_FindHighestFeatId(seh);
562  x_AssignIDsAndCrossLinkFeats(*annot, idh, max_id);
563  } else {
564  CRef<CCmdComposite> upd_genes = x_AssignIDsAndUpdateGenes(*annot, idh, seh);
565  if (upd_genes) {
566  cmd->AddCommand(*upd_genes);
567  }
568  }
569  cmd->AddCommand(*CRef<CCmdCreateSeq_annot>(new CCmdCreateSeq_annot(seh, annot.GetObject())));
570  }
571  }
573  ///
574  /// add as appropriate
575  ///
576  if( !m_Params.m_MergeResults ){
577  NON_CONST_ITERATE( list< CRef<CSeq_annot> >, iter, annots ){
578  /// now create a Project Item for the data
579  CObject_id::TId max_id = 0;
580  x_AssignIDsAndCrossLinkFeats(**iter, idh, max_id);
581  CRef<CProjectItem> item(new CProjectItem());
582  item->SetItem().SetAnnot(**iter);
584  string name;
585  CLabel::GetLabel(**iter, &name, CLabel::eDefault, scope.GetPointer());
586  item->SetLabel(name);
588  m_Items.push_back(item);
589  }
591  } else {
592  // We do not need any message like "No items created".
593  }
595  ++model_iter;
596  }
597  return cmd;
598 }
602 {
603  CObject_id::TId gene_id = 0;
605  if( !tag ){
606  tag = feat.GetNamedDbxref("LocusLink");
607  }
608  if( tag ){
609  gene_id = tag->GetTag().GetId();
610  }
611  return gene_id;
612 }
615 {
616  CObject_id::TId id = 0;
617  for (CFeat_CI feat_it(entry); feat_it; ++feat_it) {
618  if (feat_it->IsSetId()) {
619  const CFeat_id& feat_id = feat_it->GetId();
620  if (feat_id.IsLocal() && feat_id.GetLocal().IsId() && feat_id.GetLocal().GetId() > id) {
621  id = feat_id.GetLocal().GetId();
622  }
623  }
624  }
625  return id;
626 }
629 {
630  FOR_EACH_SEQFEAT_ON_SEQANNOT(feat_it, annot) {
631  const CSeq_feat& feat = **feat_it;
632  if (feat.IsSetId() && feat.GetId().IsLocal()) {
633  return true;
634  }
635  }
636  return false;
637 }
640 {
641  FOR_EACH_SEQFEAT_ON_SEQANNOT(feat_it, annot) {
642  const CSeq_feat& feat = **feat_it;
643  if (feat.IsSetXref()) {
645  if ((*it)->IsSetId() && (*it)->GetId().IsLocal())
646  return true;
647  }
648  }
649  }
650  return false;
651 }
654 {
655  SGeneCdsmRnaFeats feats;
656  CSeq_annot::C_Data::TFtable::const_iterator it = annot.GetData().GetFtable().begin();
657  for ( ; it != annot.GetData().GetFtable().end(); ++it) {
658  if ((*it)->IsSetData()) {
659  const CSeqFeatData& data = (*it)->GetData();
660  if (data.IsGene())
661  feats.gene = *it;
662  else if (data.IsCdregion()) {
663  feats.cds = *it;
664  } else if (data.GetSubtype() == CSeqFeatData::eSubtype_mRNA) {
665  feats.mRNA = *it;
666  }
667  }
668  }
669  gene_cds_rna.push_back(feats);
670 }
673 {
674  ///
675  /// now, find the gene for each list of features
676  /// when we're done, create an annotation for each
677  ///
678  typedef map<int, list< CRef<CSeq_feat> > > TGeneFeats;
679  TGeneFeats feats_by_gene;
682  int gene_id = s_GetGeneID(**it);
683  feats_by_gene[gene_id].push_back(*it);
684  }
686  list< CRef<CSeq_feat> > feats;
687  NON_CONST_ITERATE( TGeneFeats, it, feats_by_gene ){
688  if( !it->first ){
689  /// failed to find gene id - pass through
690  feats.insert( feats.end(), it->second.begin(), it->second.end() );
692  } else {
693  /// scan for gene feature
694  CRef<CSeq_feat> gene;
695  TSeqRange longest_range;
696  list< CRef<CSeq_feat> >::iterator i = it->second.begin();
697  while( i != it->second.end() ){
698  CSeq_feat& feat = **i;
699  if( !feat.GetData().IsGene() ){
700  ++i;
701  } else {
702  if( !gene ){
703  gene = *i;
704  longest_range = gene->GetLocation().GetTotalRange();
705  } else {
706  TSeqRange range = (*i)->GetLocation().GetTotalRange();
707  longest_range += range;
708  }
709  i = it->second.erase(i);
710  }
711  }
713  if( gene ){
714  gene->SetLocation().SetInt().SetFrom( longest_range.GetFrom() );
715  gene->SetLocation().SetInt().SetTo( longest_range.GetTo() );
716  feats.push_back( gene );
717  x_GroupGenes(idh, gene);
718  }
720  feats.insert( feats.end(), it->second.begin(), it->second.end() );
721  }
722  }
724  annot.SetData().SetFtable().swap( feats );
725 }
728 {
729  CObject_id::TId gene_id = s_GetGeneID(*gene);
730  if (!gene_id) return;
733  SGeneCdsmRnaFeats& feats = *it;
734  if (feats.gene) {
735  CObject_id::TId this_id = s_GetGeneID(*(feats.gene));
736  if (this_id == gene_id)
737  feats.gene = gene;
738  }
739  }
740 }
743 {
744  if (!s_FeaturesHaveIDs(annot) && !s_FeaturesHaveXrefs(annot)) {
745  s_AssignFeatureIds(annot, max_id);
748  SGeneCdsmRnaFeats& feats = *it;
749  feats.CrossLinkTriple();
750  }
751  }
752 }
755 {
756  EDIT_EACH_SEQFEAT_ON_SEQANNOT(feat_it, annot) {
757  CSeq_feat& feat = **feat_it;
758  if (!feat.IsSetId()) {
759  feat.SetId().SetLocal().SetId(++max_id);
760  } else if (!feat.GetId().IsLocal()){
761  feat.ResetId();
762  feat.SetId().SetLocal().SetId(++max_id);
763  }
764  }
765 }
768 {
771  bool has_xref = false;
772  if (mrna.IsSetXref()) {
773  /// using FeatID from feature cross-references:
774  ///if mRNA refers to a CDS by feature ID, use that feature
775  CBioseq_Handle bsh = scope.GetBioseqHandle(mrna.GetLocation());
776  CTSE_Handle tse = bsh.GetTSE_Handle();
778  if ((*it)->IsSetId() && (*it)->GetId().IsLocal() && (*it)->GetId().GetLocal().IsId()) {
779  CSeq_feat_Handle cdsh = tse.GetFeatureWithId(CSeqFeatData::eSubtype_cdregion, (*it)->GetId().GetLocal().GetId());
780  if (cdsh) {
781  cds = cdsh.GetSeq_feat();
782  }
783  has_xref = true;
784  }
785  }
786  }
787  if (!has_xref) {
788  /// using location to find CDS:
789  /// mRNA must include the CDS location and the internal interval boundaries need to be identical
793  scope);
794  }
795  return cds;
796 }
799 {
800  if (s_FeaturesHaveIDs(annot) || s_FeaturesHaveXrefs(annot)) {
801  return CRef<CCmdComposite>();
802  }
804  CRef<CCmdComposite> cmd(new CCmdComposite("Replace older version of transcripts and update related genes"));
807  if (!it->mRNA)
808  continue;
809  const CSeq_id* it_id = it->mRNA->GetProduct().GetId();
811  if (NStr::IsBlank(new_id)) {
812  NCBI_USER_THROW("Accession for new mRNA transcript_id is not found");
813  }
815  CSeq_feat_Handle fh;
816  for (CFeat_CI mrna_it(seh, SAnnotSelector(CSeqFeatData::eSubtype_mRNA)); mrna_it; ++mrna_it) {
817  CConstRef<CSeq_id> prod_id = mrna_it->GetProductId().GetSeqId();
819  if (NStr::IsBlank(old_id)) {
820  NCBI_USER_THROW("Accession for mRNA transcript_id is not found");
821  }
822  SIZE_TYPE pos = NStr::CommonPrefixSize(old_id, new_id);
823  if (pos == NStr::FindNoCase(old_id, ".") + 1) {
824  string old_version = old_id.substr(pos, NPOS);
825  string new_version = new_id.substr(pos, NPOS);
826  if (NStr::StringToInt(new_version) > NStr::StringToInt(old_version)) {
827  fh = *mrna_it;
828  }
829  break;
830  }
831  }
833  if (!fh)
834  continue;
836  // update the new mRNA with old mRNA's feat-id
837  if (fh.IsSetId()) {
838  CRef<CFeat_id> mrna_id(new CFeat_id);
839  mrna_id->Assign(fh.GetId());
840  it->mRNA->SetId(*mrna_id);
841  }
843  // delete the old mRNA
844  const CSeq_annot_Handle& annot_handle = fh.GetAnnot();
845  CSeq_entry_EditHandle eh = annot_handle.GetParentEntry().GetEditHandle();
846  CRef<CCmdDelSeq_feat> del_mrna(new CCmdDelSeq_feat(fh));
847  cmd->AddCommand(*del_mrna);
849  // update the coding region
851  if (!cds || !it->cds)
852  continue;
854  // update the new CDS with old CDS's feat-id
855  if (cds->IsSetId()) {
856  CRef<CFeat_id> cds_id(new CFeat_id);
857  cds_id->Assign(cds->GetId());
858  it->cds->SetId(*cds_id);
859  }
861  // delete the old CDS
862  CSeq_feat_Handle cdsh = m_Scope->GetSeq_featHandle(*cds);
863  CRef<CCmdDelSeq_feat> del_cds(new CCmdDelSeq_feat(cdsh));
864  cmd->AddCommand(*del_cds);
866  }
867  }
869  CObject_id::TId max_id = s_FindHighestFeatId(seh);
870  s_AssignFeatureIds(annot, max_id);
872  x_GetUpdatedGeneCommand(seh, idh, cmd.GetPointer());
873  return cmd;
874 }
877 {
878  typedef map<CConstRef<CSeq_feat>, TGeneCDSmRNAList> TMapGeneFeats;
879  TMapGeneFeats gene_feats_map;
881  feature::CFeatTree feat_tree;
882  SAnnotSelector sel;
886  CFeat_CI feat_iter(seh, sel);
888  feat_tree.AddFeatures(feat_iter);
891  const SGeneCdsmRnaFeats& feats = *it;
892  if (feats.cds) {
893  CBioseq_Handle proth = seh.GetScope().GetBioseqHandle(feats.cds->GetProduct());
894  const CMappedFeat& mapped_cds = sequence::GetMappedCDSForProduct(proth);
895  feat_tree.AddGenesForCds(mapped_cds);
897  CSeq_feat_Handle fh = feat_tree.GetBestGene(mapped_cds);
898  CConstRef<CSeq_feat> gene_feat;
899  if (fh) {
900  CSeq_id_Handle found_idh = fh.GetLocationId();
901  if (!seh.GetScope().IsSameBioseq(idh, found_idh, CScope::eGetBioseq_All)) {
902  const CSeq_feat& found_feat = *fh.GetOriginalSeq_feat();
903  if (found_feat.GetData().IsGene()) {
904  const CGene_ref& gene_ref = found_feat.GetData().GetGene();
905  if (gene_ref.IsSetLocus()) {
906  for (CFeat_CI feat_it(seh, CSeqFeatData::eSubtype_gene); feat_it; ++feat_it) {
907  if (feat_it->GetData().GetGene().IsSetLocus()
908  && NStr::EqualCase(feat_it->GetData().GetGene().GetLocus(), gene_ref.GetLocus())) {
909  gene_feat = feat_it->GetOriginalSeq_feat();
910  break;
911  }
912  }
913  }
914  }
915  }
916  else {
917  gene_feat = fh.GetOriginalSeq_feat();
918  }
919  }
920  if (gene_feat) {
921  gene_feats_map[gene_feat].push_back(feats);
922  }
923  }
924  }
926  NON_CONST_ITERATE(TMapGeneFeats, it, gene_feats_map) {
927  if (it->first) {
928  const CSeq_feat& gene = *(it->first);
929  CRef<CSeq_feat> new_gene(new CSeq_feat);
930  new_gene->Assign(gene);
932  bool modified = x_UpdateGeneOnSequence(it->second, new_gene);
933  if (modified) {
934  CSeq_feat_Handle fh = m_Scope->GetSeq_featHandle(gene);
935  CIRef<IEditCommand> chg_feat(new CCmdChangeSeq_feat(fh, *new_gene));
936  cmd->AddCommand(*chg_feat);
937  }
938  }
939  }
940 }
943 {
944  if (!gene) return false;
946  // link related features via feature id Xrefs:
947  NON_CONST_ITERATE(TGeneCDSmRNAList, it, gene_cds_rna) {
948  SGeneCdsmRnaFeats& feat = *it;
949  feat.gene.Reset(&(*gene));
950  feat.CrossLinkTriple();
951  }
953  // update the gene range, and set the partialness of the updated gene
954  // gene range is updated to the union of all of its child features' location
955  CConstRef<CDbtag> gene_tag = gene->GetNamedDbxref(sGeneDbName);
956  if (!gene_tag) {
957  NCBI_USER_THROW("Could not update gene range, as no GeneID dbxref was found");
958  }
960  TSeqPos gene_start = gene->GetLocation().GetStart(eExtreme_Positional);
961  TSeqPos gene_stop = gene->GetLocation().GetStop(eExtreme_Positional);
962  TSeqPos gene_start_upd = gene_start, gene_stop_upd = gene_stop;
963  bool partial_start = false, partial_stop = false;
964  ITERATE (TGeneCDSmRNAList, it, gene_cds_rna) {
965  const SGeneCdsmRnaFeats& feat = *it;
966  if (feat.mRNA) {
967  _ASSERT(gene->GetLocation().GetStrand() == feat.mRNA->GetLocation().GetStrand());
968  _ASSERT(feat.mRNA->GetNamedDbxref(sGeneDbName)->Match(*gene_tag));
970  TSeqPos mrna_start = feat.mRNA->GetLocation().GetStart(eExtreme_Positional);
971  TSeqPos mrna_stop = feat.mRNA->GetLocation().GetStop(eExtreme_Positional);
972  if (mrna_start < gene_start_upd) {
973  gene_start_upd = mrna_start;
974  partial_start = feat.mRNA->GetLocation().IsPartialStart(eExtreme_Positional);
975  }
976  if (mrna_stop > gene_stop_upd) {
977  gene_stop_upd = mrna_stop;
978  partial_stop = feat.mRNA->GetLocation().IsPartialStop(eExtreme_Positional);
979  }
981  } else if (feat.cds) {
982  _ASSERT(gene->GetLocation().GetStrand() == feat.cds->GetLocation().GetStrand());
983  _ASSERT(feat.cds->GetNamedDbxref(sGeneDbName)->Match(*gene_tag));
985  TSeqPos cds_start = feat.cds->GetLocation().GetStart(eExtreme_Positional);
986  TSeqPos cds_stop = feat.cds->GetLocation().GetStop(eExtreme_Positional);
987  if (cds_start < gene_start_upd) {
988  gene_start_upd = cds_start;
989  partial_start = feat.cds->GetLocation().IsPartialStart(eExtreme_Positional);
990  }
991  if (cds_stop > gene_stop_upd) {
992  gene_stop_upd = cds_stop;
993  partial_stop = feat.cds->GetLocation().IsPartialStop(eExtreme_Positional);
994  }
995  }
996  }
998  // for one gene end:
999  // if gene is complete and its child is partial, that end becomes partial
1000  // if gene is partial and is child is complete, that end becomes complete
1001  bool modified = false;
1002  if (gene_start_upd < gene_start) {
1003  gene->SetLocation().SetInt().SetFrom(gene_start_upd);
1004  modified = true;
1006  if (partial_start) {
1007  // do nothing
1008  } else {
1009  gene->SetLocation().SetPartialStart(false, eExtreme_Positional);
1010  }
1012  } else {
1013  if (partial_start) {
1014  gene->SetLocation().SetPartialStart(true, eExtreme_Positional);
1015  } else {
1016  // do nothing
1017  }
1018  }
1019  }
1022  if (gene_stop_upd > gene_stop) {
1023  gene->SetLocation().SetInt().SetTo(gene_stop_upd);
1024  modified = true;
1026  if (partial_stop) {
1027  // do nothing
1028  } else {
1029  gene->SetLocation().SetPartialStop(false, eExtreme_Positional);
1030  }
1031  } else {
1032  if (partial_stop) {
1033  gene->SetLocation().SetPartialStop(true, eExtreme_Positional);
1034  } else {
1035  // do nothing
1036  }
1037  }
1038  }
1040  // set the partial flag if at least one end of the gene is partial
1041  // unset the flag, if both ends are complete
1044  gene->SetPartial(true);
1045  } else {
1046  gene->ResetPartial();
1047  }
1050  return modified;
1051 }
