48 using namespace sequence;
55 m_Filtering_gene_xref(filtering_gene_xref),
59 const CSeq_id *seq_id = loc_iter.GetRangeAsSeq_loc()->GetId();
60 if( seq_id !=
NULL ) {
78 unique_ptr<CFeat_CI> &feat_ci,
86 if ( m_BioseqHandle ) {
97 new_loc->
SetInt().SetFrom( 0 );
101 otherHalfOfRange->
SetInt().SetFrom(
range.GetFrom() );
104 new_loc->
Add( *otherHalfOfRange );
107 new_loc->
SetId( *loc.GetId() );
109 feat_ci.reset(
new CFeat_CI(scope, *new_loc, sel) );
112 bool loc_change_needed =
false;
113 ITERATE( CSeq_loc, loc_iter, loc ) {
114 if( ! m_BioseqHandle.IsSynonym( loc_iter.GetSeq_id() ) ) {
115 loc_change_needed =
true;
119 if( loc_change_needed ) {
121 ITERATE( CSeq_loc, loc_iter, loc ) {
122 if( m_BioseqHandle.IsSynonym( loc_iter.GetSeq_id() ) ) {
123 new_loc->
Add( *loc_iter.GetRangeAsSeq_loc() );
126 feat_ci.reset(
new CFeat_CI(scope, *new_loc, sel) );
128 feat_ci.reset(
new CFeat_CI(scope, loc, sel) );
132 feat_ci.reset(
new CFeat_CI(scope, loc, sel) );
145 bool &shouldContinueToNextIteration,
149 bool &revert_locations_this_iteration,
156 if( m_Filtering_gene_xref !=
NULL &&
161 shouldContinueToNextIteration =
true;
170 const bool candidate_feat_is_mixed =
IsMixedStrand( m_BioseqHandle, *candidate_feat_loc );
178 circular_length, norm_opt ) ;
182 cleaned_loc_this_iteration, circular_length, norm_opt );
185 if( ( candidate_feat_bad_order || candidate_feat_is_mixed ) &&
190 revert_locations_this_iteration =
true;
194 if( (candidate_feat_bad_order || candidate_feat_is_mixed) &&
198 shouldContinueToNextIteration =
true;
200 CSeq_loc_CI candidate_feat_loc_iter( feat.
GetLocation() );
201 for( ; candidate_feat_loc_iter; ++candidate_feat_loc_iter ) {
204 *candidate_feat_loc_iter.GetRangeAsSeq_loc(),
205 *cleaned_loc_this_iteration,
209 if( x_StrandsMatch( m_Loc_original_strand, candidate_feat_loc_iter.GetStrand() ) ) {
211 shouldContinueToNextIteration =
false;
217 if( x_StrandsMatch( m_Loc_original_strand, candidate_feat_original_strand ) ) {
219 shouldContinueToNextIteration =
false;
222 if( ! x_StrandsMatch( m_Loc_original_strand, candidate_feat_original_strand ) ) {
224 shouldContinueToNextIteration =
true;
247 if( (start > stop) && (circular_length > 0) &&
250 cur_diff = circular_length -
abs( start - stop );
252 cur_diff =
abs( start - stop );
260 return ( candidate_feat_original_strand == feat_strand
291 if( out_suppression_check_gene_ref &&
300 if( resolved_feat ) {
301 out_s_feat = resolved_feat;
313 if( xref_label.empty() ) {
317 bool also_look_at_parent_CDS =
false;
329 also_look_at_parent_CDS =
true;
338 if( also_look_at_parent_CDS &&
339 pParentDecidingGeneRef &&
342 out_g_ref = pParentDecidingGeneRef;
348 if( also_look_at_parent_CDS &&
NULL != xref_g_ref ) {
349 out_g_ref = xref_g_ref;
379 if( also_look_at_parent_CDS ) {
382 bool ownGeneIsOkay =
false;
384 const CSeq_loc &gene_loc = out_s_feat->
GetLocation();
386 ownGeneIsOkay =
true;
395 if( pParentDecidingGeneRef ) {
399 out_g_ref = pParentDecidingGeneRef;
402 }
else if( ownGeneIsOkay ) {
405 if( in_parent_feat ) {
423 if ( in_feat &&
NULL == xref_g_ref ) {
449 out_g_ref = &other_ref;
455 if(
NULL == out_g_ref ) {
456 out_g_ref = xref_g_ref;
468 enum ERGX_MatchQuality {
470 eRGX_MatchedTagForLocus,
471 eRGX_MatchedLocusForTag,
476 if( xref_g_ref ==
NULL ) {
480 if( top_level_seq_entry ) {
484 ERGX_MatchQuality match_quality = eRGX_NoMatch;
485 bool found_near_match =
false;
487 !found_near_match && bs;
491 if (tried.
find(tse_handle) != tried.
end()) {
496 ERGX_MatchQuality new_quality = eRGX_NoMatch;
500 if ( !new_possibilities.empty() ) {
501 new_quality = eRGX_MatchedAsIs;
503 if (new_quality == eRGX_NoMatch
504 && match_quality <= eRGX_MatchedLocusForTag
508 if ( !new_possibilities.empty() ) {
509 new_quality = eRGX_MatchedLocusForTag;
512 if (new_quality == eRGX_NoMatch
513 && match_quality <= eRGX_MatchedTagForLocus
517 if ( !new_possibilities.empty() ) {
518 new_quality = eRGX_MatchedTagForLocus;
522 if (new_quality > match_quality) {
523 possible_feats = new_possibilities;
524 match_quality = new_quality;
525 }
else if (new_quality == match_quality) {
526 possible_feats.insert(possible_feats.end(),
527 new_possibilities.begin(),
528 new_possibilities.end());
530 _ASSERT(new_quality == eRGX_NoMatch);
533 if (match_quality != eRGX_NoMatch
535 found_near_match =
true;
541 if (match_quality != eRGX_NoMatch) {
542 int best_score = INT_MAX;
546 if( this_feats_score < best_score ) {
547 feat = a_possible_feat;
548 best_score = this_feats_score;
597 if(
ctx.IsSegmented() ||
ctx.IsEMBL() ||
ctx.IsDDBJ() ) {
601 if(
ctx.CanGetMaster() ) {
602 const bool isSegmented = (
ctx.GetMaster().GetNumParts() > 1);
610 string::size_type length_before_decimal_point =
ctx.GetAccession().find(
'.' );
611 if( length_before_decimal_point == string::npos ) {
613 length_before_decimal_point =
ctx.GetAccession().length();
615 if( length_before_decimal_point == 6 ) {
647 cleaned_location->
SetStrand( first_strand_to_try );
692 bool needToAddGbLoaderBack =
false;
693 if( scope && (
ctx.IsEMBL() ||
ctx.IsDDBJ() ) &&
699 needToAddGbLoaderBack =
true;
717 filtering_gene_xref );
721 filtering_gene_xref );
724 if( needToAddGbLoaderBack ) {
767 int recursion_depth )
772 if( recursion_depth > 10 ) {
789 CFeat_CI feat_ci(
ctx.GetHandle().GetTSE_Handle(), sel, obj_id );
814 if(
NULL == other_ref ||
NULL == xref ) {
846 copy( gene_syns_list.begin(), gene_syns_list.end(),
847 inserter(gene_syns, gene_syns.
begin()) );
851 if( gene_syns.
find(*syn_iter) == gene_syns.
end() ) {
864 CSeq_loc_CI previous_loc;
867 if( ! previous_loc ) {
868 previous_loc = loc_iter;
871 if ( previous_loc.GetSeq_id().Equals( loc_iter.GetSeq_id() ) ) {
872 const int prev_to = previous_loc.GetRange().GetTo();
873 const int this_to = loc_iter.GetRange().GetTo();
875 if ( prev_to < this_to) {
879 if (prev_to > this_to) {
884 previous_loc = loc_iter;
902 for( ; loc_iter; ++loc_iter ) {
903 const CSeq_id& loc_id = loc_iter.GetSeq_id();
904 if( top_bioseq_handle.
IsSynonym(loc_id) ) {
906 new_part->
Assign( *loc_iter.GetRangeAsSeq_loc() );
907 new_loc_parts.push_back( new_part );
919 for( ; loc_iter; ++loc_iter ) {
924 const CSeq_id& loc_id = loc_iter.GetSeq_id();
925 if( top_bioseq_handle.
IsSynonym(loc_id) ) {
928 original_strand = loc_iter.GetStrand();
934 original_strand = loc_iter.GetStrand();
939 new_part->
Assign( *loc_iter.GetRangeAsSeq_loc() );
940 new_loc_parts.push_back( new_part );
947 if( ! top_bioseq_handle ) {
951 return original_strand;
958 bool plus_seen =
false;
959 bool minus_seen =
false;
961 ITERATE( CSeq_loc, loc_iter, loc ) {
962 if( loc_iter.IsEmpty() ) {
966 if( bioseq_handle ) {
967 const CSeq_id& loc_id = loc_iter.GetSeq_id();
968 if( ! bioseq_handle.
IsSynonym(loc_id) ) {
972 switch( loc_iter.GetStrand() ) {
985 return ( plus_seen && minus_seen );
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
User-defined methods of the data storage class.
bool x_StrandsMatch(ENa_strand feat_strand, ENa_strand candidate_feat_original_strand)
void processSAnnotSelector(SAnnotSelector &sel)
CGeneSearchPlugin(const CSeq_loc &location, CScope &scope, const CGene_ref *filtering_gene_xref)
void setUpFeatureIterator(CBioseq_Handle &ignored_bioseq_handle, unique_ptr< CFeat_CI > &feat_ci, TSeqPos circular_length, CRange< TSeqPos > &range, const CSeq_loc &loc, SAnnotSelector &sel, CScope &scope, ENa_strand &strand)
void processMainLoop(bool &shouldContinueToNextIteration, CRef< CSeq_loc > &cleaned_loc_this_iteration, CRef< CSeq_loc > &candidate_feat_loc, sequence::EOverlapType &overlap_type_this_iteration, bool &revert_locations_this_iteration, CBioseq_Handle &ignored_bioseq_handle, const CMappedFeat &feat, TSeqPos circular_length, SAnnotSelector::EOverlapType annot_overlap_type)
CBioseq_Handle m_BioseqHandle
void postProcessDiffAmount(Int8 &cur_diff, CRef< CSeq_loc > &cleaned_loc, CRef< CSeq_loc > &candidate_feat_loc, CScope &scope, SAnnotSelector &sel, TSeqPos circular_length)
void processLoc(CBioseq_Handle &ignored_bioseq_handle, CRef< CSeq_loc > &loc, TSeqPos circular_length)
static ENa_strand GeneSearchNormalizeLoc(CBioseq_Handle top_bioseq_handle, CRef< CSeq_loc > &loc, const TSeqPos circular_length, TGeneSearchLocOpt opt=0)
static CConstRef< CSeq_feat > GetFeatViaSubsetThenExtremesIfPossible_Helper_subset(CBioseqContext &ctx, CScope *scope, const CSeq_loc &location, CSeqFeatData::E_Choice sought_type, const CGene_ref *filtering_gene_xref)
static CConstRef< CGene_ref > GetSuppressionCheckGeneRef(const CSeq_feat_Handle &feat)
static CSeq_feat_Handle ResolveGeneXref(const CGene_ref *xref_g_ref, const CSeq_entry_Handle &top_level_seq_entry)
This does plain, simple resolution of a CGene_ref to its gene.
static CConstRef< CSeq_feat > ResolveGeneObjectId(CBioseqContext &ctx, const CSeq_feat_Handle &feat, int recursion_depth=0)
static CConstRef< CSeq_feat > GetFeatViaSubsetThenExtremesIfPossible_Helper(CBioseqContext &ctx, CScope *scope, const CSeq_loc &location, CSeqFeatData::E_Choice sought_type, const CGene_ref *filtering_gene_xref)
static bool GeneMatchesXref(const CGene_ref *other_ref, const CGene_ref *xref)
static CConstRef< CSeq_feat > GetFeatViaSubsetThenExtremesIfPossible_Helper_extremes(CBioseqContext &ctx, CScope *scope, const CSeq_loc &location, CSeqFeatData::E_Choice sought_type, const CGene_ref *filtering_gene_xref)
static CConstRef< CSeq_feat > GetFeatViaSubsetThenExtremesIfPossible(CBioseqContext &ctx, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, const CSeq_loc &location, CSeqFeatData::E_Choice sought_type, const CGene_ref *filtering_gene_xref)
static void GetAssociatedGeneInfo(const CSeq_feat_Handle &in_feat, CBioseqContext &ctx, const CConstRef< CSeq_loc > &feat_loc, CConstRef< CGene_ref > &out_suppression_check_gene_ref, const CGene_ref *&out_g_ref, CConstRef< CSeq_feat > &out_s_feat, const CSeq_feat_Handle &in_parent_feat)
Find the gene associated with the given feature.
static bool CanUseExtremesToFindGene(CBioseqContext &ctx, const CSeq_loc &location)
static bool BadSeqLocSortOrderCStyle(CBioseq_Handle &bioseq_handle, const CSeq_loc &location)
static bool IsMixedStrand(CBioseq_Handle bioseq_handle, const CSeq_loc &loc)
@ fGeneSearchLocOpt_RemoveFar
void GetLabel(string *label) const
bool IsSuppressed(void) const
ESubtype GetSubtype(void) const
@ eSubtype_transit_peptide_aa
@ eSubtype_sig_peptide_aa
@ eSubtype_mat_peptide_aa
namespace ncbi::objects::
TSeq_feat_Handles GetGenesWithLocus(const string &locus, bool tag) const
vector< CSeq_feat_Handle > TSeq_feat_Handles
TSeq_feat_Handles GetGenesByRef(const CGene_ref &ref) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator find(const key_type &key) const
const_iterator end() const
static const char location[]
SAFE_CONST_STATIC_STRING(kGbLoader, "GBLOADER")
Public API for finding the gene(s) on a given feature using the same criteria as the flatfile generat...
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
@ eOrder_Biological
Iterate sub-locations in positional order.
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_SubsetRev
1st is a subset of 2nd ranges
@ eOverlap_Contained
2nd contained within 1st extremes
@ eSame
CSeq_locs contain each other.
@ eNoOverlap
CSeq_locs do not overlap or abut.
CMappedFeat GetMappedCDSForProduct(const CBioseq_Handle &product)
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
CDataLoader * FindDataLoader(const string &loader_name) const
Try to find a registered data loader by name.
CObjectManager & GetObjectManager(void)
Get object manager controlling this scope.
void RemoveDataLoader(const string &loader_name, EActionIfLocked action=eThrowIfLocked)
Revoke data loader from the scope.
const CSeq_feat::TXref & GetXref(void) const
virtual CConstRef< CSeq_feat > GetSeq_feat(void) const
const CSeqFeatData & GetData(void) const
bool IsSetXref(void) const
const CTSE_Handle & GetTSE_Handle(void) const
bool IsSetExcept_text(void) const
virtual const CSeq_loc & GetLocation(void) const
const string & GetExcept_text(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeqFeatData::ESubtype GetFeatSubtype(void) const
CSeqFeatData::E_Choice GetFeatType(void) const
const CGene_ref * GetGeneXref(void) const
get gene (if present) from Seq-feat.xref list
bool IsSynonym(const CSeq_id &id) const
Check if this id can be used to obtain this bioseq handle.
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
SAnnotSelector & SetIgnoreFarLocationsForSorting(const CBioseq_Handle &handle)
Set handle used for determining what locations are "near".
EOverlapType GetOverlapType(void) const
Get the selected overlap type.
EOverlapType
Flag to indicate location overlapping method.
SAnnotSelector & SetLimitTSE(const CTSE_Handle &limit)
Limit annotations to those from the TSE only.
SAnnotSelector & SetIgnoreStrand(bool value=true)
Ignore strand when testing for range overlap.
@ eOverlap_Intervals
default - overlapping of individual intervals
@ eOverlap_TotalRange
overlapping of total ranges only
TObjectType * ReleaseOrNull(void)
Release a reference to the object and return a pointer to the object.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
static const SAutoMax kMax_Auto
Generic stand-in for type-specific kMax_* constants from ncbi_limits.h, useful in any context with ex...
int64_t Int8
8-byte (64-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
bool IsSetSyn(void) const
synonyms for locus Check if a value has been assigned to Syn data member.
const TSyn & GetSyn(void) const
Get the Syn member data.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
const TLocus & GetLocus(void) const
Get the Locus member data.
const TData & GetData(void) const
Get the Data member data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
const TLocal & GetLocal(void) const
Get the variant data.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsLocal(void) const
Check if variant Local is selected.
const TId & GetId(void) const
Get the Id member data.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
const TGene & GetGene(void) const
Get the variant data.
bool IsSetId(void) const
the feature copied Check if a value has been assigned to Id data member.
vector< CRef< CSeqFeatXref > > TXref
ENa_strand
strand of nucleic acid
list< CRef< CSeq_loc > > Tdata
@ eNa_strand_both
in forward orientation
unsigned int
A callback function used to compare two keys in a database.
range(_Ty, _Ty) -> range< _Ty >
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Selector used in CSeqMap methods returning iterators.