79 frame = (start - 1) % 3 + 1;
81 frame = -((seq_length - end) % 3 + 1);
99 const ncbi::TMaskedQueryRegions & mask_info,
100 int master_gentic_code,
int slave_genetic_code)
108 bool query_is_na, subject_is_na;
109 int query_length, subject_length;
111 int score, num_ident;
115 list<TGi> use_this_gi;
116 CBlastFormatUtil::GetAlnScores(kAlign, score, bit_score, evalue, sum_n,
117 num_ident, use_this_gi);
121 sprintf(
tmp,
"%.*g", 6, bit_score );
122 bit_score = atof(
tmp);
123 sprintf(
tmp,
"%.*g", 6, evalue );
126 xhsp->SetBit_score(bit_score);
127 xhsp->SetScore(score);
128 xhsp->SetEvalue(evalue);
134 query_is_na = kQueryBioseqHandle.
IsNa();
138 subject_is_na = kSubjBioseqHandle.
IsNa();
144 xhsp->SetQuery_from(0);
145 xhsp->SetQuery_to(0);
146 xhsp->SetHit_from(0);
148 xhsp->SetIdentity(num_ident);
152 xhsp_list.push_back(xhsp);
166 if (query_is_na && subject_is_na)
169 final_aln = densegAln;
187 string masked_query_seq;
194 const bool kIsBlastn =
195 (query_is_na && subject_is_na && !kTranslated);
206 reversed_ds->
Assign(kDenseg);
208 ds_pt = &(*reversed_ds);
211 int q_start, q_end, s_start, s_end, q_frame=0, s_frame=0;
213 unsigned int num_gaps = 0;
214 int align_length = 0;
237 if (!kTranslated && query_is_na && subject_is_na) {
238 xhsp->SetQuery_strand(
"Plus");
239 xhsp->SetHit_strand(
"Plus");
241 xhsp->SetHit_strand(
"Minus");
247 }
else if (kTranslated) {
253 q_start, q_end, query_length);
254 xhsp->SetQuery_frame(q_frame);
258 s_start, s_end, subject_length);
259 xhsp->SetHit_frame(s_frame);
263 xhsp->SetQuery_from(q_start);
264 xhsp->SetQuery_to(q_end);
265 xhsp->SetHit_from(s_start);
266 xhsp->SetHit_to(s_end);
268 if (mask_info.empty())
293 int num_positives = 0;
294 middle_seq = query_seq;
300 const unsigned int kMaxOffset =
min(query_seq.size(),
302 for (
unsigned int i = 0;
i < kMaxOffset; ++
i) {
303 if (query_seq[
i] == subject_seq[
i]) {
309 (*matrix)(query_seq[
i], subject_seq[
i]) > 0 &&
312 middle_seq[
i] = kIsBlastn ?
' ' :
'+';
318 xhsp->SetIdentity(num_ident);
319 xhsp->SetGaps(num_gaps);
320 xhsp->SetAlign_len(align_length);
322 if (mask_info.empty())
323 xhsp->SetQseq(query_seq);
325 xhsp->SetQseq(masked_query_seq);
326 xhsp->SetHseq(subject_seq);
327 xhsp->SetMidline(middle_seq);
328 if(!(query_is_na && subject_is_na && !kTranslated) )
329 xhsp->SetPositive(num_positives);
331 xhsp_list.push_back(xhsp);
350 const ncbi::TMaskedQueryRegions & mask_info,
351 bool ungapped,
int master_gentice_code,
352 int slave_genetic_code,
bool hasTaxDB)
357 const CSeq_id& kSeqId = kAlignSet.
Get().front()->GetSeq_id(1);
363 list <CRef<blastxml2::CHitDescr> > & descr_list = hit->SetDescription();
375 hit_exp->SetAccession(accession);
378 hit_exp->SetTitle(defline.
GetTitle());
382 hit_exp->SetTaxid(tax_id);
389 descr_list.push_back(hit_exp);
394 list<CRef<objects::CSeq_id> > ids;
402 hit_exp->SetAccession(accession);
405 hit_exp->SetTitle(sequence::CDeflineGenerator().GenerateDefline(subj_handle));
406 descr_list.push_back(hit_exp);
415 hit->SetDescription().push_back(hit_exp);
429 matrix, mask_info, master_gentice_code, slave_genetic_code);
432 mask_info, master_gentice_code, slave_genetic_code);
465 CSeq_align_set::Tdata::const_iterator iter = alnset->Get().begin();
469 const ncbi::TMaskedQueryRegions & mask_info =
data->GetMaskLocations();
470 bool ungapped = !(
data->IsGappedSearch());
471 int master_gentice_code =
data->GetQueryGeneticCode();
472 int slave_genetic_code =
data->GetDbGeneticCode();
473 bool hasTaxDB =
data->CanGetTaxInfo();
476 while (iter != alnset->Get().end()) {
478 new_hit->SetNum(index);
485 if ((*iter)->GetSegs().IsDisc()) {
487 ungapped, master_gentice_code, slave_genetic_code, hasTaxDB);
492 for ( ; iter != alnset->Get().end(); ++iter) {
494 if (!current_id->
Match(*next_id)) {
497 one_subject_alnset.
Set().push_back(*iter);
500 disc_align_wrap.
SetSegs().SetDisc(one_subject_alnset);
502 mask_info, ungapped, master_gentice_code, slave_genetic_code, hasTaxDB);
505 hits.push_back(new_hit);
517 string matrix_name =
data->GetMatrixName();
519 params.SetMatrix(matrix_name);
521 params.SetExpect(
data->GetEvalueThreshold());
525 if ((
val =
data->GetMatchReward()) != 0)
526 params.SetSc_match(
val);
528 if ((
val =
data->GetMismatchPenalty()) != 0)
529 params.SetSc_mismatch(
val);
531 if(
data->IsGappedSearch()) {
532 params.SetGap_open(
data->GetGapOpeningCost());
533 params.SetGap_extend(
data->GetGapExtensionCost());
536 params.SetPattern(
str);
539 params.SetFilter(
str);
542 params.SetBl2seq_mode(
str);
544 if((
val =
data->GetCompositionBasedStats()) != 0)
548 params.SetEntrez_query(
str);
550 if((
val =
data->GetQueryGeneticCode()) != 0)
551 params.SetQuery_gencode(
val);
553 if((
val =
data->GetDbGeneticCode()) != 0)
554 params.SetDb_gencode(
val);
565 if(!
data->IsBl2seq()) {
570 stats.SetHsp_len(
data->GetLengthAdjustment(num));
571 stats.SetEff_space(
data->GetEffectiveSearchSpace(num));
583 const CSeq_id * q_id = q_loc->GetId();
590 string q_title = sequence::CDeflineGenerator().GenerateDefline(bh);
592 search.SetQuery_title(q_title);
599 if(!
data->GetMaskLocations().empty()) {
600 list<CRef< blastxml2::CRange> > & masks = search.SetQuery_masking();
609 masks.push_back(rng);
613 blastxml2::CStatistics &
stats = search.SetStat();
616 string msg =
data->GetMessages(num);
618 if (
data->GetAlignmentSet(num).Empty()) {
620 msg += CBlastFormatUtil::kNoHitsFound;
621 search.SetMessage(msg);
626 search.SetMessage(msg);
628 list<CRef<blastxml2::CHit> > & hit_list = search.SetHits();
662 blastxml2::CReport & report = bxmlout.SetReport();
663 string program_name =
data->GetBlastProgramName();
664 report.SetProgram(program_name);
668 if(!
data->GetSubjectIds().empty()) {
669 report.SetSearch_target().SetSubjects() =
data->GetSubjectIds();
672 report.SetSearch_target().SetDb(
data->GetDatabaseName());
675 blastxml2::CParameters & params = report.SetParams();
678 blastxml2::CResults & results = report.SetResults();
679 if(
data->IsBl2seq()) {
680 list<CRef<blastxml2::CSearch> > & bl2seq = results.SetBl2seq();
681 for(
int i=0;
i <
data->GetNumOfSearchResults();
i++ ) {
684 bl2seq.push_back(search);
688 else if(
data->IsIterativeSearch()) {
689 list<CRef<blastxml2::CIteration> > & iterations = results.SetIterations();
690 for(
int i=0;
i <
data->GetNumOfSearchResults();
i++ ) {
692 itr->SetIter_num(
i+1);
693 blastxml2::CSearch & search = itr->SetSearch();
695 iterations.push_back(itr);
699 blastxml2::CSearch & search = results.SetSearch();
719 TTypeInfo typeInfo = bxmlout.GetThisTypeInfo();
723 xml_out->SetEnforcedStdXml();
724 xml_out->Write(&bxmlout, typeInfo );
731 TTypeInfo typeInfo = bxmlout.GetThisTypeInfo();
736 xml_out->SetReferenceSchema();
737 xml_out->SetUseSchemaLocation(
true);
738 xml_out->SetEnforcedStdXml();
739 xml_out->SetDTDFilePrefix(
"http://www.ncbi.nlm.nih.gov/data_specs/schema_alt/");
740 xml_out->SetDefaultSchemaNamespace(
"http://www.ncbi.nlm.nih.gov");
741 xml_out->Write(&bxmlout, typeInfo );
752 blastxml2::CBlastOutput2 bxmlout;
770 blastxml2::CBlastOutput2 bxmlout;
773 if(!out_stream.is_open())
787 xml_out->SetReferenceSchema();
788 xml_out->SetUseSchemaLocation(
true);
789 xml_out->SetEnforcedStdXml();
790 xml_out->SetDTDFilePrefix(
"http://www.ncbi.nlm.nih.gov/data_specs/schema_alt/");
791 xml_out->SetDefaultSchemaNamespace(
"http://www.ncbi.nlm.nih.gov");
793 blastxml2::CBlastXML2 xml2;
794 TTypeInfo typeInfo = xml2.GetThisTypeInfo();
795 xml_out->Write(&xml2, typeInfo);
798 string::size_type end_pos = out_str.find(
"</BlastXML2>");
799 out_str.erase(end_pos);
801 *out_stream << out_str;
808 blastxml2::CBlastOutput2 bxmlout;
809 bxmlout.SetError().SetCode(exit_code);
811 bxmlout.SetError().SetMessage(err_msg);
836 *out_stream <<
"{\n\"BlastOutput2\": [\n";
842 TTypeInfo typeInfo = bxmlout.GetThisTypeInfo();
847 json_out->Write(&bxmlout, typeInfo );
854 TTypeInfo typeInfo = bxmlout.GetThisTypeInfo();
859 json_out->Write(&bxmlout, typeInfo );
866 blastxml2::CBlastOutput2 bxmlout;
869 if(!out_stream.is_open())
879 blastxml2::CBlastOutput2 bxmlout;
User-defined methods of the data storage class.
Declares singleton objects to store the version and reference for the BLAST engine.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ ePHIBlastn
Nucleotide PHI BLAST.
@ ePHIBlastp
Protein PHI BLAST.
@ eMegablast
Nucl-Nucl (traditional megablast)
@ eDeltaBlast
Delta Blast.
virtual void WriteFileHeader(TTypeInfo type)
virtual ~CBlastOStreamJson(void)
virtual void EndOfWrite(void)
CBlastOStreamJson(CNcbiOstream &stream, EOwnership deleteOut)
CBlastOStreamXml(CNcbiOstream &stream, EOwnership deleteOut)
virtual ~CBlastOStreamXml(void)
virtual void WriteFileHeader(TTypeInfo type)
void Reverse(void)
Reverse the segments' orientation.
void Assign(const CSerialObject &obj, ESerialRecursionMode how=eRecursive)
overloaded Assign()
static CRef< objects::CSeq_align_set > PrepareBlastUngappedSeqalign(const objects::CSeq_align_set &alnset)
static functions Need to call this if the seqalign is stdseg or dendiag for ungapped blast alignment ...
SeqLocCharOption
character used to display seqloc, such as masked sequence
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo &info)
Get taxonomy information.
static CRef< CBlast_def_line_set > ExtractBlastDefline(const CBioseq &bioseq)
Extract a Blast-def-line-set object from a Bioseq retrieved by CSeqDB.
objects::ENa_strand GetStrand() const
Convert the frame to a strand.
const objects::CSeq_interval & GetInterval() const
CRef< CSeq_align > CreateTranslatedDensegFromNADenseg(void) const
Create a Dense-seg with widths from Dense-seg of nucleotides Used by AlnMgr to handle translated nucl...
CRef< CSeq_align > CreateDensegFromStdseg(SSeqIdChooser *SeqIdChooser=0) const
---------------------------------------------------------------------------- PRE : the Seq-align has ...
TSeqPos GetTotalGapCount(TDim row=-1) const
Retrieves the total number of gaps in the given row an alignment; all gaps by default.
TSeqPos GetSeqStop(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
static void GetSeqIdList(const objects::CBioseq_Handle &bh, list< CRef< objects::CSeq_id > > &ids)
Converts a Bioseq handle's sequence id type into a list of objects::CSeq_id references,...
static string GetSeqIdListString(const list< CRef< objects::CSeq_id > > &id, bool show_gi)
Creates a '|' delimited string, corresponding to a list of Seq-ids.
CTypeInfo class contains all information about C++ types (both basic and classes): members and layout...
Interface for filling the top layer of the XML report.
Collection of masked regions for a single query sequence.
string GetSeqIdString(const CSeq_id &id)
std::ofstream out("events_result.xml")
main entry point for tests
static const char * str(char *buf, int n)
static string GetString(EPublication pub)
Reference for requested publication.
EPublication
Enumerates the various BLAST publications.
@ eMaxPublications
Used as sentinel value.
@ ePhiBlast
1998 NAR paper
@ eCompBasedStats
2001 NAR paper
@ eGappedBlast
1997 NAR paper
@ eMegaBlast
2000 J Compt Biol paper
@ eDeltaBlast
2012 Biology Direct on DeltaBLAST
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
SStrictId_Tax::TId TTaxId
Taxon id type.
@ eNoOwnership
No ownership is assumed.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
@ eSerialVerifyData_No
do not verify
const string AsFastaString(void) const
CConstRef< CSeq_id > GetSeqId(void) const
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
virtual void EndOfWrite(void)
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
TSeqPos GetBioseqLength(void) const
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
void IncIndentLevel(size_t step=2)
void PutEol(bool indent=true)
void DecIndentLevel(size_t step=2)
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
enum ENcbiOwnership EOwnership
Ownership relations between objects.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
const TSeqid & GetSeqid(void) const
Get the Seqid member data.
bool IsSetTitle(void) const
simple title Check if a value has been assigned to Title data member.
TTaxid GetTaxid(void) const
Get the Taxid member data.
bool IsSet(void) const
Check if a value has been assigned to data member.
bool IsSetTaxid(void) const
Check if a value has been assigned to Taxid data member.
const Tdata & Get(void) const
Get the member data.
const TTitle & GetTitle(void) const
Get the Title member data.
const TDenseg & GetDenseg(void) const
Get the variant data.
Tdata & Set(void)
Assign a value to data member.
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
bool IsDendiag(void) const
Check if variant Dendiag is selected.
const TStd & GetStd(void) const
Get the variant data.
const TDendiag & GetDendiag(void) const
Get the variant data.
bool IsStd(void) const
Check if variant Std is selected.
bool IsDisc(void) const
Check if variant Disc is selected.
const TIds & GetIds(void) const
Get the Ids member data.
list< CRef< CSeq_align > > Tdata
const TDisc & GetDisc(void) const
Get the variant data.
const TStrands & GetStrands(void) const
Get the Strands member data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsDenseg(void) const
Check if variant Denseg is selected.
bool IsLocal(void) const
Check if variant Local is selected.
static CRef< CSeq_align > CreateDensegFromDendiag(CSeq_align const &aln)
Defines BLAST database access classes.
string scientific_name
Scientific name, such as "Aotus vociferans".