44 const auto& cds = context.FeatCDS();
45 const auto& trnas = context.FeatTRNAs();
47 bool increase_count =
false;
48 static size_t bs_count = 0;
49 string report_item_str =
"[n] coding region[s] [has] overlapping tRNAs[*" + to_string(++bs_count) +
"*]";
51 for (
size_t i = 0;
i < cds.size();
i++) {
52 const CSeq_loc& loc_i = cds[
i]->GetLocation();
53 bool has_overlap =
false;
54 string cur_report_cds_trna_pair_str =
"Coding region overlaps tRNAs[*" + to_string(
i) +
"*]";
56 for (
size_t j = 0; j < trnas.size(); j++) {
57 const CSeq_loc& loc_j = trnas[j]->GetLocation();
61 if (need_to_compare) {
62 ovlp = context.Compare(loc_i, loc_j);
65 increase_count =
true;
71 subitem.
Ext().
Add(*context.SeqFeatObjRef(*cds[
i]));
73 subitem.
Ext().
Add(*context.SeqFeatObjRef(*trnas[j]));
83 static const string kCdsTrnaOverlapComment =
"TAA stop codon is completed by the addition of 3' A residues to the mRNA";
100 const CSeq_loc& loc_t =
f->GetLocation();
133 if (!br_loc->
GetId()) {
140 br_loc->
SetInt().SetId(*seq_id);
145 br_loc->
SetInt().SetId(*seq_id);
146 br_loc->
SetInt().SetFrom(rr.
GetTo() - 2 + ovlp_len);
151 code_break->
SetLoc().Assign(*br_loc);
152 code_break->
SetAa().SetNcbieaa(
'*');
158 if (comment.length()) {
164 new_cds->
SetData().SetCdregion().SetCode_break().push_back(code_break);
165 context.ReplaceSeq_feat(*obj, cds, *new_cds);
188 {
"16S", { 1000,
false } },
189 {
"18S", { 1000,
false } },
190 {
"23S", { 2000,
false } },
191 {
"25S", { 1000,
false } },
192 {
"26S", { 1000,
false } },
193 {
"28S", { 3300,
false } },
194 {
"small", { 1000,
false } },
195 {
"large", { 1000,
false } },
196 {
"5.8S", { 130,
true } },
197 {
"5S", { 90,
true } }
210 string rrna_name =
f.GetData().GetRna().GetRnaProductName();
213 if (pos !=
NPOS &&
len < it.second.first && !(it.second.second &&
f.IsSetPartial() &&
f.GetPartial()) ) {
223 const string kCDSRNAExactMatch =
"[n/2] coding region location[s] exactly match an RNA location";
234 const CSeqdesc* biosrc = context.GetBiosource();
235 bool is_eukariotic = context.IsEukaryotic(biosrc ? &biosrc->
GetSource() :
nullptr);
237 const auto& cds = context.FeatCDS();
238 const auto& rnas = context.Feat_RNAs();
239 for (
size_t i = 0;
i < rnas.size();
i++) {
240 const CSeq_loc& loc_i = rnas[
i]->GetLocation();
247 string rrna_name = rnas[
i]->GetData().GetRna().GetRnaProductName();
250 if (
NStr::FindNoCase(rrna_name, it.first) !=
NPOS &&
len < it.second.first && (!it.second.second || (rnas[
i]->IsSetPartial() && rnas[
i]->GetPartial())) ) {
259 for (
size_t j = 0; j < cds.size(); j++) {
260 const CSeq_loc& loc_j = cds[j]->GetLocation();
277 ENa_strand cds_strand = cds[j]->GetLocation().GetStrand();
278 ENa_strand rna_strand = rnas[
i]->GetLocation().GetStrand();
296 m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
302 const auto& rrnas = context.FeatRRNAs();
303 for (
size_t i = 0;
i < rrnas.size();
i++) {
304 const CSeq_loc& loc_i = rrnas[
i]->GetLocation();
305 for (
size_t j =
i + 1; j < rrnas.size(); j++) {
306 const CSeq_loc& loc_j = rrnas[j]->GetLocation();
308 m_Objs[
"[n] rRNA feature[s] overlap[S] another rRNA feature."].
Add(*context.SeqFeatObjRef(*rrnas[
i])).Add(*context.SeqFeatObjRef(*rrnas[j])).Fatal();
319 const auto& genes = context.FeatGenes();
320 for (
size_t i = 0;
i < genes.size();
i++) {
321 const CSeq_loc& loc_i = genes[
i]->GetLocation();
323 for (
size_t j =
i + 1; j < genes.size(); j++) {
324 const CSeq_loc& loc_j = genes[j]->GetLocation();
326 m_Objs[
"[n] gene[s] overlap[S] another gene on the same strand."].
Add(*context.SeqFeatObjRef(*genes[
i])).Add(*context.SeqFeatObjRef(*genes[j]));
337 const auto& genes = context.FeatGenes();
338 for (
size_t i = 0;
i < genes.size();
i++) {
339 const CSeq_loc& loc_i = genes[
i]->GetLocation();
342 for (
size_t j =
i + 1; j < genes.size(); j++) {
343 const CSeq_loc& loc_j = genes[j]->GetLocation();
346 if (strand_i == strand_j) {
350 m_Objs[
"[n] gene[s] completely overlapped by other genes"].Add(*context.SeqFeatObjRef(*genes[
i]));
353 m_Objs[
"[n] gene[s] completely overlapped by other genes"].Add(*context.SeqFeatObjRef(*genes[j]));
365 const auto& genes = context.FeatGenes();
366 for (
size_t i = 0;
i < genes.size();
i++) {
367 const CSeq_loc& loc_i = genes[
i]->GetLocation();
369 for (
size_t j =
i + 1; j < genes.size(); j++) {
370 const CSeq_loc& loc_j = genes[j]->GetLocation();
376 m_Objs[
"[n] genes match other genes in the same location, but on the opposite strand"].Add(*context.SeqFeatObjRef(*genes[
i])).Add(*context.SeqFeatObjRef(*genes[j]));
387 const auto& pseudo = context.FeatPseudo();
388 const auto& mrnas = context.FeatMRNAs();
389 for (
size_t i = 0;
i < mrnas.size();
i++) {
390 const CSeq_loc& loc_i = mrnas[
i]->GetLocation();
391 for (
size_t j = 0; j < pseudo.size(); j++) {
392 const CSeq_loc& loc_j = pseudo[j]->GetLocation();
427 static const string kIntronExon =
"[n] introns and exons are incorrectly positioned";
432 sort(vint.begin(), vint.end(),
less);
433 auto Iex = vex.cbegin();
434 auto Iint = vint.cbegin();
435 while (Iex != vex.cend() && Iint != vint.cend()) {
458 const auto& genes = context.FeatGenes();
459 const auto& exons = context.FeatExons();
460 const auto& introns = context.FeatIntrons();
461 if (exons.empty() || introns.empty()) {
465 vector<const CSeq_feat*> vex;
466 vector<const CSeq_feat*> vint;
467 vex.insert(vex.end(), exons.cbegin(), exons.cend());
468 vint.insert(vint.end(), introns.cbegin(), introns.cend());
473 if (gg->CanGetExcept_text() && gg->GetExcept_text() ==
"trans-splicing") {
478 vector<const CSeq_feat*> vex;
479 vector<const CSeq_feat*> vint;
498 static const string kGeneMisc =
"[n] gene[s] overlap[S] with IGS misc features";
502 for (
const CSeq_feat* gene : context.FeatGenes()) {
503 if (gene->IsSetLocation() && gene->IsSetData() && gene->GetData().GetGene().IsSetLocus() &&
506 const CSeq_loc& loc_gene = gene->GetLocation();
507 bool gene_added =
false;
509 for (
const CSeq_feat* misc : context.FeatMisc()) {
510 if (misc->IsSetLocation() && misc->IsSetComment() &&
NStr::FindNoCase(misc->GetComment(),
"intergenic spacer") !=
NPOS) {
511 const CSeq_loc& loc_misc = misc->GetLocation();
514 m_Objs[
kGeneMisc].
Add(*context.SeqFeatObjRef(*gene)).Incr();
517 m_Objs[
kGeneMisc].Add(*context.SeqFeatObjRef(*misc));
530 const auto& genes = context.FeatGenes();
531 const auto& cds = context.FeatCDS();
532 const auto& mrnas = context.FeatMRNAs();
534 const CGene_ref& gref = gene->GetData().GetGene();
540 if (context.GetGeneForFeature(*feat) == &*gene) {
547 if (context.GetGeneForFeature(*feat) == &*gene) {
554 m_Objs[
"[n] gene[s] missing locus"].Add(*context.SeqFeatObjRef(*gene, gene));
566 new_feat->
SetData().SetGene().ResetDesc();
567 context.ReplaceSeq_feat(*obj, *sf, *new_feat);
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
CRef< CDiscrepancyObject > SeqFeatObjRef(const CSeq_feat &feat, EFixType fix=eFixNone, const CObject *more=nullptr)
static void Add(TReportObjectList &list, TReportObjectSet &hash, CReportObj &obj, bool unique=true)
CReportNode & Ext(bool b=true)
namespace ncbi::objects::
#define DISCREPANCY_AUTOFIX(name)
#define DISCREPANCY_CASE(name, type, group, descr)
#define DISCREPANCY_SUMMARIZE(name)
int GetSubtype(CFieldNamePanel *field_name_panel, string &ncRNA_class)
std::ofstream out("events_result.xml")
main entry point for tests
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
ENa_strand GetStrand(void) const
Get the location's strand.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
void SetPartialStop(bool val, ESeqLocExtremes ext)
TSeqPos GetStop(ESeqLocExtremes ext) const
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
CRef< CSeq_loc > Seq_loc_Subtract(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Subtract the second seq-loc from the first one.
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
@ eNoOverlap
CSeq_locs do not overlap or abut.
CConstRef< CSeq_feat > GetGeneForFeature(const CSeq_feat &feat, CScope &scope)
Finds gene for feature, but obeys SeqFeatXref directives.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void Remove(void) const
Remove the feature from Seq-annot.
CScope & GetScope(void) const
Get scope this handle belongs to.
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
void Reset(void)
Reset reference object.
position_type GetToOpen(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
NCBI_NS_STD::string::size_type SIZE_TYPE
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
const TDesc & GetDesc(void) const
Get the Desc member data.
bool CanGetLocus(void) const
Check if it is safe to call GetLocus method.
bool CanGetDesc(void) const
Check if it is safe to call GetDesc method.
const TLocus & GetLocus(void) const
Get the Locus member data.
void SetAa(TAa &value)
Assign a value to Aa data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
void SetComment(const TComment &value)
Assign a value to Comment data member.
const TLocation & GetLocation(void) const
Get the Location member data.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
const TComment & GetComment(void) const
Get the Comment member data.
const TGene & GetGene(void) const
Get the variant data.
bool CanGetComment(void) const
Check if it is safe to call GetComment method.
ENa_strand
strand of nucleic acid
const TSource & GetSource(void) const
Get the variant data.
constexpr auto sort(_Init &&init)
double f(double x_, const double &y_)
const string kCDSRNAContainstRNA
const string kCDSRNAOverlapNoContainSameStrand
const string kCDSRNAOverlapNoContain
const string kCDSRNAExactMatch
static const string kGeneMisc
static const string kCdsTrnaOverlapComment
static const TRNALengthMap kTrnaLengthMap
static void CollectExonsIntrons(CReportNode &out, CDiscrepancyContext &context, vector< const CSeq_feat * > &vex, vector< const CSeq_feat * > &vint)
static bool less(const CSeq_feat *A, const CSeq_feat *B)
const string kCDSRNAOverlapNoContainOppStrand
const string kCDSRNAContains
bool IsShortrRNA(const CSeq_feat &f, CScope *scope)
const string kCDSRNAContainedIn
static const string kIntronExon
map< string, TRNALength > TRNALengthMap
pair< size_t, bool > TRNALength
static const string kCDSoverlapTRNA
const string kCDSRNAAnyOverlap
float g0(Seg_Nsm *spn, Thd_Cxe *cxe)
float Overlap(iterator1 iter1, iterator1 end1, iterator2 iter2, iterator2 end2)
Overlap measure.