43 #include "../core/jumper.h"
57 : m_Queries(query_factory),
58 m_LocalDbAdapter(blastdb),
59 m_Options(&options->SetOptions()),
60 m_BtopSpliceSignals(
true)
63 if (!
env.Get(
"BTOP_NO_SPLICE_SIGNALS").empty()) {
118 vector< CConstRef<objects::CSeq_id> > seqid_vec;
119 vector< CRef<CBlastAncillaryData> > ancill_vec;
122 unsigned int num_subjects = 0;
129 if(subject_infosrc !=
NULL) {
130 num_subjects = subject_infosrc->
Size();
134 for (index=0; index<local_query_data->
GetNumQueries(); index++)
137 local_query_data->
GetSeq_loc(index)->GetId());
141 msg_vec.push_back(q_msg);
142 seqid_vec.push_back(query_id);
144 sa_vec.push_back(tmp_align);
145 pair<double, double> tmp_pair(-1.0, -1.0);
150 ancill_vec.push_back(tmp_ancillary_data);
152 for(
unsigned int i=1;
i < num_subjects;
i++) {
154 msg_vec.push_back(msg);
155 seqid_vec.push_back(query_id);
157 sa_vec.push_back(tmp_align);
160 ancill_vec.push_back(tmp_ancillary_data);
202 "Missing database or subject sequences");
212 bool btop_splice_signals)
216 const Uint1 kGap = 15;
218 int num_identical = 0;
229 if (subject_gap > 0 &&
236 if (btop_splice_signals) {
237 string acceptor(2u,
' ');
239 (
int)((
prev->map_info->right_edge >> 2) & 3)]);
241 (
int)(
prev->map_info->right_edge & 3)]);
246 string donor(2u,
' ');
258 else if (subject_gap > 0) {
270 else if (query_gap < 0) {
279 query_pos += num_matches;
282 num_identical += num_matches;
283 if (num_matches > 0) {
294 if (buff[1] !=
'-') {
296 buff[0] ==
'-' && num_matches == 0) {
298 md_tag += (
string)(&buff[1]);
302 if (buff[0] ==
'-') {
305 md_tag += (
string)(&buff[1]);
310 md_matches += num_matches;
318 num_matches = hsp->
query.
end - query_pos;
319 num_identical += num_matches;
320 if (num_matches > 0) {
322 md_matches += num_matches;
325 if (md_matches > 0) {
328 len += num_identical;
330 perc_id = (double)(num_identical * 100) / (double)
len;
338 bool btop_splice_signals)
354 subject_id, &subj_length);
358 query_length, chain);
366 user_object->
SetType().SetStr(
"Mapper Info");
367 align->
SetExt().push_back(user_object);
379 btop_splice_signals);
380 user_object->
AddField(
"btop", btop);
381 user_object->
AddField(
"md_tag", md_tag);
398 bool btop_splice_signals)
403 for (
const HSPChain* chain = chains; chain; chain = chain->
next) {
407 if (chain->pair && chain->context > chain->pair->context) {
423 btop_splice_signals));
425 seqinfo_src, query_info,
426 btop_splice_signals));
430 btop_splice_signals);
433 seq_aligns->
Set().push_back(align);
458 for (
int index=0;index < results->
num_queries;index++) {
465 for (
auto it: seq_aligns->
Get()) {
466 retval->
Set().push_back(it);
476 return a->GetSegs().IsDisc() && !
b->GetSegs().IsDisc();
497 for (
int index=0;index < results->
num_queries;index++) {
528 for (
auto it: mate_aligns->
Get()) {
529 aligns->
Set().push_back(it);
537 &query_masks[index + 1],
548 retval->push_back(res);
562 : m_QueryId(query_id),
567 x_SetInfo(query_length, query_mask, mate_length, mate_mask);
575 : m_QueryId(query_id),
588 if (
a->GetSegs().IsDisc() &&
b->GetSegs().IsDisc()) {
589 const CSeq_align& a_first = *
a->GetSegs().GetDisc().Get().front();
590 const CSeq_align& a_second = *
a->GetSegs().GetDisc().Get().back();
591 const CSeq_align& b_first = *
b->GetSegs().GetDisc().Get().front();
592 const CSeq_align& b_second = *
b->GetSegs().GetDisc().Get().back();
607 return (
a->GetSegs().IsDisc() && !
b->GetSegs().IsDisc());
617 if (
a->GetSegs().IsDisc() &&
b->GetSegs().IsDisc()) {
618 const CSeq_align& a_first = *
a->GetSegs().GetDisc().Get().front();
619 const CSeq_align& a_second = *
a->GetSegs().GetDisc().Get().back();
620 const CSeq_align& b_first = *
b->GetSegs().GetDisc().Get().front();
621 const CSeq_align& b_second = *
b->GetSegs().GetDisc().Get().back();
636 return (
a->GetSegs().IsDisc() && !
b->GetSegs().IsDisc());
660 bool first_aligned =
false;
661 bool last_aligned =
false;
670 if (it->GetSegs().IsDisc()) {
671 first_aligned =
true;
675 it->GetSegs().GetDisc().Get();
700 else if (it->GetSeq_id(0).Match(*
m_QueryId)) {
701 first_aligned =
true;
703 else if (it->GetSeq_id(0).Match(*
m_MateId)) {
709 if (!first_aligned) {
717 if (first_mask && !first_mask->empty()) {
718 TSeqRange first_range(*first_mask->front());
724 if (last_mask && !last_mask->empty()) {
725 TSeqRange last_range(*last_mask->front());
736 for (
auto result: *
this) {
738 if (no_discordant &&
result->IsPaired() && !
result->IsConcordant()) {
742 for (
auto it:
result->GetSeqAlign()->Get()) {
743 retval->
Set().push_back(it);
Auxiliary functions for BLAST.
#define NUM_STRANDS
Number of frames in a nucleotide sequence.
void BlastHSPStreamMappingClose(BlastHSPStream *hsp_stream, BlastMappingResults *results)
Closes BlastHSPStream structure for mapping and produces BlastMappingResults.
@ eFirstSegment
The first sequence of a pair with both sequences read and accepted.
Definition of classes which constitute the results of running a BLAST search.
Utility function to convert internal BLAST result structures into objects::CSeq_align_set objects.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
Class used to return ancillary data from a blast search, i.e.
Defines BLAST error codes (user errors included)
Search class to perform the preliminary stage of the BLAST search.
Index wrapper exceptions.
Results of Magic-BLAST mapping.
Magic-BLAST results for a single query/read or a pair of reads.
@ eScore_PercentIdentity_Gapped
void SetNamedScore(const string &id, int score)
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
Abstract base class to encapsulate retrieval of sequence identifiers.
Collection of masked regions for a single query sequence.
Class for the messages for an individual query sequence.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
virtual CConstRef< objects::CSeq_loc > GetSeq_loc(size_t index)=0
Get the Seq_loc for the sequence indicated by index.
int CheckInternalData()
Checks that internal data is valid.
size_t GetNumberOfThreads(void) const
Accessor for the number of threads to use.
bool operator()(const CRef< CSeq_align > &a, const CRef< CSeq_align > &b)
CRef< CLocalDbAdapter > m_LocalDbAdapter
Reference to a BLAST subject/database object.
bool m_Concordant
True if results are concordant pair.
CRef< IQueryFactory > m_Queries
Queries.
bool m_Paired
True if results are for paired reads.
CRef< SInternalData > m_InternalData
Internal data strctures.
bool operator()(const CRef< CSeq_align > &a, const CRef< CSeq_align > &b)
CRef< SInternalData > Run()
Borrow the internal data and results results.
CRef< CMagicBlastResultSet > RunEx(void)
void x_SetInfo(int first_length, const TMaskedQueryRegions *first_masks, int last_length=0, const TMaskedQueryRegions *last_masks=NULL)
CStructWrapper< TData > * WrapStruct(TData *obj, TData *(*del)(TData *))
Auxiliary function to create a CStructWrapper for a pointer to an object.
void SortAlignments(EOrdering order)
Sort alignments by selected criteria (pair configuration)
CRef< CSeq_align_set > x_BuildSeqAlignSet(const BlastMappingResults *results)
const char BLASTNA_TO_IUPACNA[]
Translates between blastna and iupacna.
vector< CConstRef< objects::CSeq_id > > TQueryIdVector
List of query ids.
CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)
Creates and caches an ILocalQueryData.
bool IsBlastDb() const
Returns true if this object represents a BLAST database.
virtual void SetNumberOfThreads(size_t nthreads)
@inheritDoc
void x_Validate(void)
Perform sanity checks on input arguments.
static CRef< CSeq_align > s_CreateSeqAlign(const HSPChain *chain, CRef< ILocalQueryData > &qdata, CRef< IBlastSeqInfoSrc > &seqinfo_src, const BlastQueryInfo *query_info, bool btop_splice_signals)
static CRef< CSeq_align_set > x_CreateSeqAlignSet(const HSPChain *results, CRef< ILocalQueryData > qdata, CRef< IBlastSeqInfoSrc > seqinfo_src, const BlastQueryInfo *query_info, bool btop_splice_signals)
Create results.
CRef< CSeq_align_set > GetFlatResults(bool no_discordant=false)
Get all results as a single Seq-align-set object.
CConstRef< CSeq_id > m_QueryId
Query id.
void Combine(const TSearchMessages &other_msgs)
Combine another set of search messages with this one.
bool m_BtopSpliceSignals
Should BTOP strings be formatted with splice signals.
EOrdering
Ordering of alignments.
IBlastSeqInfoSrc * MakeSeqInfoSrc()
Retrieves or constructs the IBlastSeqInfoSrc.
BlastQueryInfo * m_QueryInfo
The query information structure.
bool operator()(const CRef< CSeq_align > &a, const CRef< CSeq_align > &b)
CConstRef< CSeq_id > m_MateId
Mate id if results are for paired reads.
virtual size_t GetNumQueries()=0
Get the number of queries.
CRef< CSeq_align_set > CreateEmptySeq_align_set()
Constructs an empty Seq-align-set containing an empty discontinuous seq-align, and appends it to a pr...
TResultsInfo m_FirstInfo
Alignment flags for the query.
virtual size_t Size() const =0
Returns the size of the underlying container of sequences.
void GetQueryMessages(size_t index, TQueryMessages &qmsgs)
Retrieve error/warning messages for a specific query.
CRef< CSeq_align_set > m_Aligns
Alignments for a single or a pair of reads.
void MakeSplicedSeg(CSpliced_seg &spliced_seg, CRef< CSeq_id > product_id, CRef< CSeq_id > genomic_id, int product_length, const HSPChain *chain)
Convert a spliced alignmeny in BlastHSPChain into Spliced_seg.
CMagicBlast(CRef< IQueryFactory > query_factory, CRef< CLocalDbAdapter > blastdb, CRef< CMagicBlastOptionsHandle > options)
Constructor to map short reads as queries to a genome as BLAST database.
static void s_ComputeBtopAndIdentity(const HSPChain *chain, string &btop, string &md_tag, double &perc_id, bool btop_splice_signals)
CRef< CBlastPrelimSearch > m_PrelimSearch
Object that runs BLAST search.
virtual size_t GetSeqLength(size_t index)=0
Get the length of the sequence indicated by index.
CRef< CBlastOptions > m_Options
Options to configure the search.
TSearchMessages GetSearchMessages() const
Retrieve any error/warning messages that occurred during the search.
const TSeqLocInfoVector & GetQueryMasks(void) const
Return query masks.
CRef< CSeq_align_set > Run(void)
Run the RNA-Seq mapping.
CRef< TBlastHSPStream > m_HspStream
HSP output of the preliminary stage goes here.
CMagicBlastResults(CConstRef< CSeq_id > query_id, CConstRef< CSeq_id > mate_id, CRef< CSeq_align_set > aligns, const TMaskedQueryRegions *query_mask=NULL, const TMaskedQueryRegions *mate_mask=NULL, int query_length=0, int mate_length=0)
Constructor for a pair.
void GetSequenceLengthAndId(const IBlastSeqInfoSrc *seqinfo_src, int oid, CRef< objects::CSeq_id > &seqid, TSeqPos *length)
Retrieves subject sequence Seq-id and length.
CRef< CMagicBlastResultSet > x_BuildResultSet(const BlastMappingResults *results)
TResultsInfo m_LastInfo
Alignment flags for the mate.
bool IsDbScanMode() const
Returns true if this is not a database but is database scanning mode.
@ eCoreBlastError
FIXME: need to interpret CORE errors.
@ fUnaligned
Read is unaligned.
@ fFiltered
Read did not pass quality filtering.
unsigned int TSeqPos
Type for sequence locations and lengths.
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
C & SerialAssign(C &dest, const C &src, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
static int BlastRank(const CRef< CSeq_id > &id)
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
position_type GetLength(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
void SetType(TType &value)
Assign a value to Type data member.
Tdata & Set(void)
Assign a value to data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
void SetDim(TDim value)
Assign a value to Dim data member.
void SetType(TType value)
Assign a value to Type data member.
TExt & SetExt(void)
Assign a value to Ext data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
@ eType_partial
mapping pieces together
ENa_strand
strand of nucleic acid
unsigned int
A callback function used to compare two keys in a database.
#define MAPPER_SPLICE_SIGNAL
Declares CMagicBlast, the C++ API for the BLAST RNA-Seq mapping engine.
#define ASSERT
macro for assert.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
BlastMappingResults * Blast_MappingResultsFree(BlastMappingResults *results)
Free BlastMappingResults structure.
BlastMappingResults * Blast_MappingResultsNew(void)
Initialize BlastMappingResults structure.
Int4 query_length
Length of this query, strand or frame.
Int4 segment_flags
Flags describing segments for paired reads.
Uint1 left_edge
Two subject bases before the alignment in the four least significant bits and flags in most significa...
JumperEditsBlock * edits
Information about mismatches and gaps, used for mapping short reads.
Structure holding all information about an HSP.
BlastSeg query
Query sequence info.
BlastSeg subject
Subject sequence info.
BlastHSPMappingInfo * map_info
Structure that contains BLAST mapping results.
The query related information.
BlastContextInfo * contexts
Information per context.
A chain of HSPs: spliced alignment.
Int4 score
Alignment score for the chain.
Int4 count
Number of placements for the read.
HSPContainer * hsps
A list of HSPs that belong to this chain.
Int4 context
Contex number of query sequence.
struct HSPChain * next
Pointer to the next chain in a list.
struct HSPContainer * next
Uint1 query_base
Query base at this position.
Uint1 subject_base
Subject base at this position.
Int4 query_pos
Query position.
Alignment edit script for gapped alignment.