65 "Create a gene model from an alignment",
66 "Create a new gene model based on one or more "
67 "alignments of related transcripts to a genome",
68 "https://www.ncbi.nlm.nih.gov/tools/gbench/",
69 "Alignment Creation"),
77 return "create_gene_model_tool_manager";
83 return "Create Gene Model Tool";
124 err =
"Please select at least one set of alignments!";
156 if (find(temp.begin(), temp.end(), it) == temp.end()) {
191 return task.Release();
206 if (!seq_id &&
cds) {
215 if (!seq_id &&
mRNA) {
249 xref->SetId(to_feat.
SetId());
250 from_feat.
SetXref().push_back(xref);
259 if (
tag &&
tag->IsSetTag()) {
260 if (
tag->GetTag().IsId()) {
263 }
else if (
tag->GetTag().IsStr()) {
296 if (!cmdProcessor)
return eFailed;
323 string msg_gmodel(
"CreateGeneModelFromAlign()");
348 }
else if( scope != &*iter->scope ){
351 "All alignments must be within the same project"
389 list< CRef<CSeq_annot> > annots;
406 annots.push_back( annot );
412 proteins->
SetSet( *translated_proteins );
477 if (seq_id && !seq_id->
IsGi()) {
481 new_id->
SetGi(loc_gi);
516 list< CRef<CSeq_feat> >::const_iterator feat_it = iter->second.begin();
517 if( feat_it == iter->second.end() ){
522 for( ++feat_it; feat_it != iter->second.end(); ++feat_it ){
523 range += (*feat_it)->GetLocation().GetTotalRange();
537 label +=
"Gene Models from Alignments";
538 annot->
SetNameDesc(
"Gene Models from Alignments" );
542 annot->
SetData().SetFtable().insert(
543 annot->
SetData().SetFtable().end(),
544 iter->second.begin(), iter->second.end());
549 if( f_group_by_gene_id ){
553 annots.push_back( annot );
566 cmd->AddCommand(*upd_genes);
582 item->
SetItem().SetAnnot(**iter);
609 gene_id =
tag->GetTag().GetId();
617 for (
CFeat_CI feat_it(entry); feat_it; ++feat_it) {
618 if (feat_it->IsSetId()) {
619 const CFeat_id& feat_id = feat_it->GetId();
645 if ((*it)->IsSetId() && (*it)->GetId().IsLocal())
656 CSeq_annot::C_Data::TFtable::const_iterator it = annot.
GetData().
GetFtable().begin();
657 for ( ; it != annot.
GetData().GetFtable().end(); ++it) {
658 if ((*it)->IsSetData()) {
662 else if (
data.IsCdregion()) {
669 gene_cds_rna.push_back(feats);
679 TGeneFeats feats_by_gene;
683 feats_by_gene[gene_id].push_back(*it);
686 list< CRef<CSeq_feat> > feats;
690 feats.insert( feats.end(), it->second.begin(), it->second.end() );
696 list< CRef<CSeq_feat> >::iterator
i = it->second.begin();
697 while(
i != it->second.end() ){
707 longest_range +=
range;
709 i = it->second.erase(
i);
716 feats.push_back( gene );
720 feats.insert( feats.end(), it->second.begin(), it->second.end() );
724 annot.
SetData().SetFtable().swap( feats );
730 if (!gene_id)
return;
736 if (this_id == gene_id)
759 feat.
SetId().SetLocal().SetId(++max_id);
762 feat.
SetId().SetLocal().SetId(++max_id);
771 bool has_xref =
false;
778 if ((*it)->IsSetId() && (*it)->GetId().IsLocal() && (*it)->GetId().GetLocal().IsId()) {
809 const CSeq_id* it_id = it->mRNA->GetProduct().GetId();
824 string old_version = old_id.substr(pos,
NPOS);
825 string new_version = new_id.substr(pos,
NPOS);
840 it->mRNA->SetId(*mrna_id);
847 cmd->AddCommand(*del_mrna);
851 if (!cds || !it->cds)
858 it->cds->SetId(*cds_id);
864 cmd->AddCommand(*del_cds);
879 TMapGeneFeats gene_feats_map;
881 feature::CFeatTree feat_tree;
888 feat_tree.AddFeatures(feat_iter);
895 feat_tree.AddGenesForCds(mapped_cds);
907 if (feat_it->GetData().GetGene().IsSetLocus()
909 gene_feat = feat_it->GetOriginalSeq_feat();
921 gene_feats_map[gene_feat].push_back(feats);
936 cmd->AddCommand(*chg_feat);
944 if (!gene)
return false;
957 NCBI_USER_THROW(
"Could not update gene range, as no GeneID dbxref was found");
962 TSeqPos gene_start_upd = gene_start, gene_stop_upd = gene_stop;
963 bool partial_start =
false, partial_stop =
false;
972 if (mrna_start < gene_start_upd) {
973 gene_start_upd = mrna_start;
976 if (mrna_stop > gene_stop_upd) {
977 gene_stop_upd = mrna_stop;
981 }
else if (feat.
cds) {
987 if (cds_start < gene_start_upd) {
988 gene_start_upd = cds_start;
991 if (cds_stop > gene_stop_upd) {
992 gene_stop_upd = cds_stop;
1001 bool modified =
false;
1002 if (gene_start_upd < gene_start) {
1003 gene->
SetLocation().SetInt().SetFrom(gene_start_upd);
1006 if (partial_start) {
1013 if (partial_start) {
1022 if (gene_stop_upd > gene_stop) {
1023 gene->
SetLocation().SetInt().SetTo(gene_stop_upd);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
User-defined methods of the data storage class.
bool Create(wxWindow *parent, wxWindowID id=ID_GENEMODELPARAMSPANEL, const wxPoint &pos=wxDefaultPosition, const wxSize &size=wxSize(400, 300), long style=wxTAB_TRAVERSAL)
virtual void SetRegistryPath(const string &path)
CAlgoToolManagerParamsPanel.
virtual void LoadSettings()
void SetParams(SCreateGeneModelParams *params, TConstScopedObjects *objects)
CDataLoadingAppJob - a base class for Jobs loading data into projects.
CRef< objects::CSeq_feat > ConvertAlignToAnnot(const objects::CSeq_align &align, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, Int8 gene_id=0, const objects::CSeq_feat *cdregion_on_mrna=NULL)
Convert an alignment to an annotation.
void SetFlags(TFeatureGeneratorFlags)
void SetAllowedUnaligned(TSeqPos)
static const TSeqPos kDefaultAllowedUnaligned
CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align)
Clean an alignment according to our best guess of its biological representation.
int TFeatureGeneratorFlags
CUndoManager & GetUndoManager()
static void PrefetchSeqDescr(IServiceLocator *serviceLocator, const vector< CRef< objects::CProjectItem > > &items)
void GetParams(SProjectSelectorParams ¶ms) const
CProjectService - a service providing API for operations with Workspaces and Projects.
CProjectSelectOptions - describes how new Project Items shall be added to a workspace.
bool AddItemsToWorkspace(CProjectService *service, const TData &data)
void SetNameDesc(const string &name)
void SetCreateDate(const CTime &dt)
void SetTitleDesc(const string &title)
namespace ncbi::objects::
CConstRef< CDbtag > GetNamedDbxref(const CTempString &db) const
Return a specified DB xref.
void AddDbxref(const string &db_name, const string &db_key)
add a DB xref to this feature
CSeq_feat_Handle GetFeatureWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
bool CanBeEdited(void) const
Return true if this TSE handle is local to scope and can be edited.
Undo/Redo interface for editing operations.
virtual void Execute(IEditCommand *command, wxWindow *window=0)=0
IRegSettings An interface for objects that save / restore settings using CGuiRegistry.
bool m_PropagateNcrnaFeats
TConstScopedObjects m_Alignments
container_type::iterator iterator
const_iterator begin() const
static const char * str(char *buf, int n)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_USER_THROW(message)
Throw a quick-and-dirty runtime exception of type 'CException' with the given error message and error...
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
CIRef< T > GetServiceByType()
retrieves a typed reference to a service, the name of C++ type is used as the name of the service.
ETaskState
List of task states defining the task management FSM.
@ eCanceled
canceled by Task Manager
@ eFailed
failed during execution
@ eCompleted
successfully finished
TConstScopedObjects m_Alignments
virtual string GetExtensionLabel() const
returns a displayable label for this extension ( please capitalize the key words - "My Extension" )
CRef< CCmdComposite > x_AssignIDsAndUpdateGenes(objects::CSeq_annot &annot, const objects::CSeq_id_Handle &idh, objects::CSeq_entry_Handle seh)
void x_StoreGeneratedFeats(const objects::CSeq_annot &annot, TGeneCDSmRNAList &gene_cds_rna) const
void x_GroupGenes(const objects::CSeq_id_Handle &idh, CRef< objects::CSeq_feat > gene)
SCreateGeneModelParams m_Params
void x_AssignIDsAndCrossLinkFeats(objects::CSeq_annot &annot, const objects::CSeq_id_Handle &idh, objects::CObject_id::TId max_id)
virtual bool x_ValidateParams()
validates user input in Parameters panel, report errors if any
static bool s_FeaturesHaveXrefs(const objects::CSeq_annot &annot)
CRef< objects::CScope > m_Scope
void x_Group_By_GeneID(objects::CSeq_annot &annot, const objects::CSeq_id_Handle &idh)
static const string sGeneDbName
CCreateGeneModelTask.
CCreateGeneModelToolManager()
virtual string GetExtensionIdentifier() const
returns the unique human-readable identifier for the extension the id should use lowercase letters se...
CRef< CCmdComposite > x_GetCommand()
static objects::CObject_id::TId s_FindHighestFeatId(const objects::CSeq_entry_Handle seh)
list< SGeneCdsmRnaFeats > TGeneCDSmRNAList
a loader might be tied with a specific project item.
virtual IRegSettings * x_GetParamsAsRegSetting()
return a pointer to Parameters object as IRegSettings interface
virtual CAlgoToolManagerParamsPanel * x_GetParamsPanel()
returns a pointer to the parameters panel, override in derived classes
void x_GetUpdatedGeneCommand(objects::CSeq_entry_Handle seh, const objects::CSeq_id_Handle &idh, CCmdComposite *cmd)
static void s_CreateXRefLink(objects::CSeq_feat &from_feat, objects::CSeq_feat &to_feat)
virtual ~CCreateGeneModelTask()
CConstRef< objects::CSeq_id > GetID(objects::CScope *scope) const
SGeneCdsmRnaFeats.
virtual void CleanUI()
override this function in a derived class and clean extra members
virtual ETaskState x_Run()
override this function in derived classes
static bool s_FeaturesHaveIDs(const objects::CSeq_annot &annot)
virtual CDataLoadingAppJob * x_CreateLoadingJob()
factory method for creating the job that executes the tool algorithm override in derived classes
static void s_AssignFeatureIds(objects::CSeq_annot &annot, objects::CObject_id::TId &max_id)
CRef< objects::CSeq_feat > cds
void x_RemoveDuplicates()
virtual bool x_CreateParamsPanelIfNeeded()
returns / creates Parameters panel, override in derived classes see cpp file for example
CRef< objects::CSeq_feat > gene
virtual IAppTask * GetTask()
Once parameters are gathered and validated this function is called to produce the final Task object t...
static objects::CObject_id::TId s_GetGeneID(const objects::CSeq_feat &feat)
virtual void InitUI()
override this function in a derived class and initialize extra members
CCreateGeneModelParamsPanel * m_ParamsPanel
TGeneModelMap m_GeneModelMap
CSelectProjectOptions m_Options
CSelectProjectOptions::TItems m_Items
CIRef< CProjectService > m_Service
static void s_CreateDBXref(const objects::CSeq_feat &from_feat, objects::CSeq_feat &to_feat, const string &dbname)
void x_SelectCompatibleInputObjects()
SCreateGeneModelParams m_Params
bool x_UpdateGeneOnSequence(TGeneCDSmRNAList &gene_cds_rna, CRef< objects::CSeq_feat > gene)
CRef< objects::CSeq_feat > mRNA
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
void NcbiErrorBox(const string &message, const string &title="Error")
specialized Message Box function for reporting critical errors
virtual const string & GetLabel() const
vector< SConstScopedObject > TConstScopedObjects
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
CConstRef< CSeq_id > GetSeqId(void) const
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ eContent
Untagged human-readable accession or the like.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
@ eOverlap_CheckIntervals
2nd is a subset of 1st with matching boundaries
string GetAccessionForId(const objects::CSeq_id &id, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession string for a Seq-id.
TGi GetGiForId(const objects::CSeq_id &id, CScope &scope, EGetIdType flags=0)
Given a Seq-id retrieve the corresponding GI.
CMappedFeat GetMappedCDSForProduct(const CBioseq_Handle &product)
@ eWithAccessionVersion
accession.version (when possible)
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
bool IsSameBioseq(const CSeq_id_Handle &id1, const CSeq_id_Handle &id2, EGetBioseqFlag get_flag)
Check if two seq-ids are resolved to the same Bioseq.
@ eGetBioseq_All
Search bioseq, load if not loaded yet.
virtual CSeq_id_Handle GetLocationId(void) const
const CFeat_id & GetId(void) const
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
virtual CConstRef< CSeq_feat > GetSeq_feat(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CScope & GetScope(void) const
Get scope this handle belongs to.
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
TObjectType & GetObject(void)
Get object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
NCBI_NS_STD::string::size_type SIZE_TYPE
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
static SIZE_TYPE CommonPrefixSize(const CTempString s1, const CTempString s2)
Determine the common prefix of two strings.
@ fSS_Reject
Reject specified characters, allow all other.
@ fSS_punct
Check on ispunct()
@ fWithCommas
Use commas as thousands separator.
@ eCurrent
Use current time. See also CCurrentTime.
static const char label[]
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetLabel(const TLabel &value)
Assign a value to Label data member.
void SetItem(TItem &value)
Assign a value to Item data member.
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
const TLocus & GetLocus(void) const
Get the Locus member data.
bool IsId(void) const
Check if variant Id is selected.
TId GetId(void) const
Get the variant data.
TXref & SetXref(void)
Assign a value to Xref data member.
void ResetPartial(void)
Reset Partial data member.
bool IsSetCode(void) const
genetic code used Check if a value has been assigned to Code data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void SetPartial(TPartial value)
Assign a value to Partial data member.
void SetProduct(TProduct &value)
Assign a value to Product data member.
const TId & GetId(void) const
Get the Id member data.
const TLocal & GetLocal(void) const
Get the variant data.
void SetCode(TCode &value)
Assign a value to Code data member.
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsLocal(void) const
Check if variant Local is selected.
bool IsGene(void) const
Check if variant Gene is selected.
void ResetId(void)
Reset Id data member.
const TData & GetData(void) const
Get the Data member data.
void SetId(TId &value)
Assign a value to Id data member.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
const TProduct & GetProduct(void) const
Get the Product member data.
const TGene & GetGene(void) const
Get the variant data.
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
void SetFrame(TFrame value)
Assign a value to Frame data member.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
const TWhole & GetWhole(void) const
Get the variant data.
TGi & SetGi(void)
Select the variant.
bool IsGi(void) const
Check if variant Gi is selected.
bool IsWhole(void) const
Check if variant Whole is selected.
TSet & SetSet(void)
Select the variant.
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
void SetData(TData &value)
Assign a value to Data data member.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
const TFtable & GetFtable(void) const
Get the variant data.
list< CRef< CSeq_feat > > TFtable
const TData & GetData(void) const
Get the Data member data.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
range(_Ty, _Ty) -> range< _Ty >
#define EDIT_EACH_SEQFEAT_ON_SEQANNOT(Itr, Var)
#define FOR_EACH_SEQFEAT_ON_SEQANNOT(Itr, Var)
FOR_EACH_SEQFEAT_ON_SEQANNOT EDIT_EACH_SEQFEAT_ON_SEQANNOT.
#define FOR_EACH_SEQFEATXREF_ON_SEQFEAT(Itr, Var)
FOR_EACH_SEQFEATXREF_ON_SEQFEAT EDIT_EACH_SEQFEATXREF_ON_SEQFEAT.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
void ToLoadingOptions(CSelectProjectOptions &options)