61 #define INSERTION(sf, tf) ( ((sf) & CAlnMap::fSeq) && !((tf) & CAlnMap::fSeq) )
62 #define DELETION(sf, tf) ( !((sf) & CAlnMap::fSeq) && ((tf) & CAlnMap::fSeq) )
63 #define MATCH(sf, tf) ( ((sf) & CAlnMap::fSeq) && ((tf) & CAlnMap::fSeq) )
72 static const vector<string> supportedScores{
73 "Gap",
"ambiguous_orientation",
"consensus_splices",
74 "pct_coverage",
"pct_identity_gap",
"pct_identity_ungap",
77 static const vector<string> coreScores{
81 if (!score.IsSetId() || !score.GetId().IsStr()) {
84 string key = score.GetId().GetStr();
85 if (seqId == mCurrentIdForAttributes &&
86 std::find(coreScores.begin(), coreScores.end(),
key) == coreScores.end()) {
89 if (std::find(supportedScores.begin(), supportedScores.end(),
key)
90 == supportedScores.end()) {
102 const auto& seqId = record.
StrSeqId();
119 for (
auto cit = tags.begin();
120 taxonIdStr.empty() && cit != tags.end(); ++cit) {
121 const auto&
tag = **cit;
122 if (!
tag.IsSetDb() ||
tag.GetDb() !=
"taxon") {
125 const auto& objid =
tag.GetTag();
126 switch (objid.Which()) {
130 if (!objid.GetStr().empty()) {
131 taxonIdStr = objid.GetStr();
140 if (!taxonIdStr.empty()) {
154 const auto& seqId = record.
StrSeqId();
163 auto defline = sequence::CDeflineGenerator().GenerateDefline(bsh);
173 if (!m_bHeaderWritten) {
174 m_Os <<
"##gff-version 3" <<
'\n';
175 m_Os <<
"#!gff-spec-version 1.20" <<
'\n';
176 m_Os <<
"##!gff-variant flybase" <<
'\n';
177 m_Os <<
"# This variant of GFF3 interprets ambiguities in the" <<
'\n';
178 m_Os <<
"# GFF3 specifications in accordance with the views of Flybase." <<
'\n';
179 m_Os <<
"# This impacts the feature tag set, and meaning of the phase." <<
'\n';
180 m_Os <<
"#!processor NCBI annotwriter" <<
'\n';
181 m_bHeaderWritten =
true;
195 m_Os <<
"###" <<
'\n';
206 bool isProteinProd = xSplicedSegHasProteinProd(spliced);
208 const unsigned int tgtWidth = isProteinProd ? 3 : 1;
261 string seqStrand =
"+";
267 string target = genomicLabel;
268 target +=
" " + seqStart;
269 target +=
" " + seqStop;
270 target +=
" " + seqStrand;
299 typedef list<CRef<CScore> > SCORES;
302 for (SCORES::const_iterator cit = scores.begin(); cit != scores.end();
304 const CScore& score = **cit;
330 typedef vector<CRef<CScore> > SCORES;
332 const SCORES& scores = align.
GetScore();
333 for (SCORES::const_iterator cit = scores.begin(); cit != scores.end();
335 const CScore& score = **cit;
364 catch(std::exception&) {};
389 catch(std::exception&) {};
401 while (start2 < 0 && start_seg < numSegs) {
402 start2 = alnMap.
GetStart(srcRow, start_seg++);
406 int stop_seg = numSegs-1;
407 while (stop2 < 0 && stop_seg >= 0) {
408 stop2 = alnMap.
GetStart(srcRow, stop_seg--);
413 stop2 += alnMap.
GetLen(start_seg-1)-1;
416 stop2 += alnMap.
GetLen(stop_seg+1)-1;
453 typedef vector<CRef<CScore> > SCORES;
458 const SCORES& scores = denseSeg.
GetScores();
459 for (SCORES::const_iterator cit = scores.begin(); cit != scores.end();
461 const CScore& score = **cit;
480 bool isProteinProd = xSplicedSegHasProteinProd(spliced);
481 const unsigned int tgtWidth = isProteinProd ? 3 : 1;
483 typedef list<CRef<CSpliced_exon_chunk> > CHUNKS;
485 const CHUNKS& chunks = exon.
GetParts();
486 for (CHUNKS::const_iterator cit = chunks.begin(); cit != chunks.end(); ++cit) {
488 switch (chunk.
Which()) {
503 if (del_length > 0) {
507 const unsigned int forward_shift = chunk.
GetGenomic_ins()%tgtWidth;
508 if (forward_shift > 0) {
516 const unsigned int insert_length = chunk.
GetProduct_ins()/tgtWidth;
517 if (insert_length > 0) {
521 const unsigned int reverse_shift = chunk.
GetProduct_ins()%tgtWidth;
522 if (reverse_shift > 0) {
548 if (!align1 && align2) {
552 if ((align1 && !align2) ||
553 (!align1 && !align2) ) {
557 string subject_accession1;
563 string subject_accession2;
570 auto make_key = [](
const pair<CConstRef<CSeq_align>,
string>& p,
CScope& scope) {
572 const string alignId = p.second;
574 string subject_accession;
580 string target_accession;
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
TSignedSeqPos GetStart(TNumrow row, TNumseg seg, int offset=0) const
const CSeq_id & GetSeqId(TNumrow row) const
TSeqPos GetLen(TNumseg seg, int offset=0) const
const CDense_seg & GetDenseg(void) const
TSeqPos GetSeqStop(TNumrow row) const
int StrandSign(TNumrow row) const
TNumseg GetNumSegs(void) const
TSeqPos GetSeqStart(TNumrow row) const
virtual bool xAssignAlignmentDensegSeqId(CGffAlignRecord &, const CAlnMap &, unsigned int) override
virtual bool xAssignAlignmentSplicedScores(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &) override
bool xAssignTaxid(CBioseq_Handle, CGffAlignRecord &)
virtual bool xAssignAlignmentDensegLocation(CGffAlignRecord &, const CAlnMap &, unsigned int) override
virtual void x_SortAlignments(TAlignCache &alignCache, CScope &scope) override
virtual bool WriteHeader() override
bool xAssignDefline(CBioseq_Handle, CGffAlignRecord &)
virtual bool xAssignAlignmentSplicedTarget(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &) override
map< string, string > mDeflineMap
map< string, string > mTaxidMap
string mCurrentIdForAttributes
virtual bool xAssignAlignmentSplicedSeqId(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &) override
virtual bool xAssignAlignmentSplicedLocation(CGffAlignRecord &, const CSpliced_seg &, const CSpliced_exon &) override
virtual bool xAssignAlignmentSplicedGap(CGffAlignRecord &record, const CSpliced_seg &spliced, const CSpliced_exon &exon) override
virtual bool xWriteAlignDisc(const CSeq_align &, const string &="") override
bool xIsNeededScore(const std::string &, const CScore &) const
virtual bool xAssignAlignmentScores(CGffAlignRecord &, const CSeq_align &) override
virtual bool xAssignAlignmentDensegScores(CGffAlignRecord &, const CAlnMap &, unsigned int) override
virtual bool xAssignAlignmentDensegTarget(CGffAlignRecord &, const CAlnMap &, unsigned int) override
virtual bool xWriteAlignDisc(const CSeq_align &, const string &="")
void AddInsertion(unsigned int)
void AddMatch(unsigned int)
void AddReverseShift(unsigned int)
void AddDeletion(unsigned int)
void AddForwardShift(unsigned int)
void SetPhase(unsigned int)
void SetSeqId(const string &)
void SetLocation(unsigned int, unsigned int, ENa_strand=objects::eNa_strand_unknown)
bool SetAttribute(const string &, const string &)
void SetScore(const CScore &)
virtual string StrSeqId() const
CProt_pos_Base::TFrame GetFrame() const
TSeqPos GetSeqStop(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
const_iterator end() const
const_iterator find(const key_type &key) const
API (CDeflineGenerator) for computing sequences' titles ("definitions").
unsigned int TSeqPos
Type for sequence locations and lengths.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ eContent
Untagged human-readable accession or the like.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
string GetAccessionForId(const objects::CSeq_id &id, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession string for a Seq-id.
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
@ eGetId_ForceAcc
return only an accession based seq-id
const CSeq_id_Handle & GetSeq_id_Handle(void) const
Get handle of id used to obtain this bioseq handle.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
const TDb & GetDb(void) const
Get the Db member data.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
const TProtpos & GetProtpos(void) const
Get the variant data.
bool IsSetFrame(void) const
position within codon (1-based) 0 = not set (meaning 1) Check if a value has been assigned to Frame d...
bool IsProtpos(void) const
Check if variant Protpos is selected.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
TMatch GetMatch(void) const
Get the variant data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
bool IsSetScores(void) const
score for each seg Check if a value has been assigned to Scores data member.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
TDiag GetDiag(void) const
Get the variant data.
TMismatch GetMismatch(void) const
Get the variant data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
bool CanGetProduct_strand(void) const
Check if it is safe to call GetProduct_strand method.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
bool IsSetGenomic_strand(void) const
Check if a value has been assigned to Genomic_strand data member.
const TScores & GetScores(void) const
Get the Scores member data.
bool IsSetScore(void) const
for whole alignment Check if a value has been assigned to Score data member.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
const Tdata & Get(void) const
Get the member data.
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
const TScore & GetScore(void) const
Get the Score member data.
const TScores & GetScores(void) const
Get the Scores member data.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
bool IsSetScores(void) const
scores for this exon Check if a value has been assigned to Scores data member.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
ENa_strand
strand of nucleic acid
@ e_Source
source of materials, includes Org-ref
const struct ncbi::grid::netcache::search::fields::KEY key
SFlybaseCompareAlignments(CScope &scope)
bool operator()(const pair< CConstRef< CSeq_align >, string > &p1, const pair< CConstRef< CSeq_align >, string > &p2)