59 ncbi::TMaskedQueryRegions
mask;
74 bool add_reference_dtdi =
false,
75 bool add_xml_versioni =
false );
91 frame = (start - 1) % 3 + 1;
93 frame = -((seq_length - end) % 3 + 1);
111 const ncbi::TMaskedQueryRegions* mask_info,
112 int master_gentic_code,
int slave_genetic_code)
120 bool query_is_na, subject_is_na;
121 int query_length, subject_length;
123 int score, num_ident;
127 list<TGi> use_this_gi;
128 CBlastFormatUtil::GetAlnScores(kAlign, score, bit_score, evalue, sum_n,
129 num_ident, use_this_gi);
133 sprintf(
tmp,
"%.*g", 6, bit_score );
134 bit_score = atof(
tmp);
135 sprintf(
tmp,
"%.*g", 6, evalue );
138 xhsp->SetBit_score(bit_score);
139 xhsp->SetScore(score);
140 xhsp->SetEvalue(evalue);
146 query_is_na = kQueryBioseqHandle.
IsNa();
150 subject_is_na = kSubjBioseqHandle.
IsNa();
156 xhsp->SetQuery_from(0);
157 xhsp->SetQuery_to(0);
158 xhsp->SetHit_from(0);
160 xhsp->SetIdentity(num_ident);
164 xhsp_list.push_back(xhsp);
178 if (query_is_na && subject_is_na)
181 final_aln = densegAln;
199 string masked_query_seq;
206 const bool kIsBlastn =
207 (query_is_na && subject_is_na && !kTranslated);
218 reversed_ds->
Assign(kDenseg);
220 ds_pt = &(*reversed_ds);
223 int q_start, q_end, s_start, s_end, q_frame=0, s_frame=0;
225 unsigned int num_gaps = 0;
226 int align_length = 0;
249 if (!kTranslated && query_is_na && subject_is_na) {
250 q_frame = s_frame = 1;
259 }
else if (kTranslated) {
265 q_start, q_end, query_length);
268 s_start, s_end, subject_length);
271 xhsp->SetQuery_frame(q_frame);
272 xhsp->SetHit_frame(s_frame);
274 xhsp->SetQuery_from(q_start);
275 xhsp->SetQuery_to(q_end);
276 xhsp->SetHit_from(s_start);
277 xhsp->SetHit_to(s_end);
306 int num_positives = 0;
307 middle_seq = query_seq;
313 const unsigned int kMaxOffset =
static_cast<unsigned int>(
min(query_seq.size(),
314 subject_seq.size()));
315 for (
unsigned int i = 0;
i < kMaxOffset; ++
i) {
316 if (query_seq[
i] == subject_seq[
i]) {
322 (*matrix)(query_seq[
i], subject_seq[
i]) > 0 &&
325 middle_seq[
i] = kIsBlastn ?
' ' :
'+';
331 xhsp->SetIdentity(num_ident);
332 xhsp->SetGaps(num_gaps);
333 xhsp->SetAlign_len(align_length);
336 xhsp->SetQseq(masked_query_seq);
338 xhsp->SetQseq(query_seq);
339 xhsp->SetHseq(subject_seq);
340 xhsp->SetMidline(middle_seq);
341 xhsp->SetPositive(num_positives);
344 xhsp_list.push_back(xhsp);
359 const ncbi::TMaskedQueryRegions* mask_info,
360 bool ungapped,
int master_gentice_code,
int slave_genetic_code)
366 if (kAlignSet.
Get().empty())
372 const CSeq_id& kSeqId = kAlignSet.
Get().front()->GetSeq_id(1);
378 list<TGi> use_this_gi;
387 defline =
"No definition line";
390 hit->SetDef(defline);
396 hit->SetAccession(accession);
405 hit->SetDef(
"Unknown");
406 hit->SetAccession(
"Unknown");
420 matrix, mask_info, master_gentice_code, slave_genetic_code);
423 mask_info, master_gentice_code, slave_genetic_code);
455 const ncbi::TMaskedQueryRegions* mask_info,
456 bool ungapped,
int master_gentice_code,
int slave_genetic_code,
460 if (alnset.
Get().empty())
463 CSeq_align_set::Tdata::const_iterator iter = alnset.
Get().begin();
466 bool incremental_output = (
bool)out_stream;
467 while (iter != alnset.
Get().end()) {
474 if ((*iter)->GetSegs().IsDisc()) {
476 ungapped, master_gentice_code, slave_genetic_code);
481 for ( ; iter != alnset.
Get().end(); ++iter) {
483 if (!current_id->
Match(*next_id)) {
486 one_subject_alnset.
Set().push_back(*iter);
489 disc_align_wrap.
SetSegs().SetDisc(one_subject_alnset);
491 mask_info, ungapped, master_gentice_code, slave_genetic_code);
495 new_hit->SetNum(index);
497 if( !incremental_output ) hits.push_back(new_hit);
503 xml_one_hit_os->SetReferenceDTD(
false);
504 xml_one_hit_os->Write( &(*new_hit), new_hit->GetThisTypeInfo() );
507 string::size_type start_xml_pos = out_str.find(
"<?xml");
508 if( start_xml_pos != string::npos ) {
509 string::size_type end_xml_pos = out_str.find_first_of(
"\n\r");
510 out_str.erase(0,end_xml_pos+1);
512 *out_stream << out_str ;
535 const ncbi::TMaskedQueryRegions* mask_info,
536 int index,
int iteration,
CStatistics& stat,
bool is_ungapped,
537 int master_gentice_code,
int slave_genetic_code,
538 const vector<string>& messages,
541 bool incremental_output = (
bool) out_stream;
542 list<CRef<CIteration> >& iterations = bxmlout.
SetIterations();
546 one_query_iter->SetIter_num(iteration);
557 one_query_iter->SetQuery_ID(
559 query_def = sequence::CDeflineGenerator().GenerateDefline(bh);
565 query_def =
"No definition line";
566 one_query_iter->SetQuery_def(query_def);
569 one_query_iter->SetStat(stat);
570 if (messages.size() > 0 && !messages[index].empty())
571 one_query_iter->SetMessage(messages[index]);
573 string serial_xml_start, serial_xml_end;
574 if( incremental_output) {
577 serial_xml_start, serial_xml_end);
578 *out_stream << serial_xml_start <<
"\n<Iteration_hits>\n";
584 scope, matrix, mask_info, is_ungapped,
585 master_gentice_code, slave_genetic_code,
589 if( incremental_output ) *out_stream <<
"</Iteration_hits>" << serial_xml_end;
591 iterations.push_back(one_query_iter);
602 string matrix_name =
data->GetMatrixName();
610 if ((
val =
data->GetMatchReward()) != 0)
613 if ((
val =
data->GetMismatchPenalty()) != 0)
632 int db_numseq =
data->GetDbNumSeqs();
633 Int8 db_length =
data->GetDbLength();
635 for (
unsigned int index = 0; index <
data->GetNumQueries(); ++index) {
644 stat_vec.push_back(stat);
681 bool incremental_output = (
bool)out_stream;
682 string program_name =
data->GetBlastProgramName();
705 query_def = sequence::CDeflineGenerator().GenerateDefline(bh);
711 query_def =
"No definition line";
719 unique_ptr< CBlastFormattingMatrix > matrix(
data->GetMatrix());
721 vector<CRef<CStatistics> > stat_vec;
724 string serial_xml_start, serial_xml_end;
725 if( incremental_output && incremental_struct->
m_IterationNum == 0) {
726 bool add_dtd_reference =
true, add_xml_version =
true;
728 serial_xml_start, serial_xml_end,
729 add_dtd_reference, add_xml_version );
731 *out_stream << serial_xml_start <<
"\n<BlastOutput_iterations>" ;
732 incremental_struct->
m_SerialXmlEnd =
"\n</BlastOutput_iterations>" + serial_xml_end;
735 for (
unsigned int index = 0; index <
data->GetNumQueries(); ++index) {
743 if (incremental_struct)
749 data->GetScope(index), matrix.get(),
750 data->GetMaskLocations(index),
752 *stat_vec[index], !
data->GetGappedMode(),
753 data->GetMasterGeneticCode(),
data->GetSlaveGeneticCode(),
762 ERR_POST(
Error <<
"Failed s_BlastXMLAddIteration " << query_label );
775 bool add_reference_dtd,
776 bool add_xml_version )
778 bool res_code =
false;
779 TTypeInfo typeInfo =
object.GetThisTypeInfo();
780 string breake_by_tag =
tag;
781 start_part=
"<NOT SET>";
782 end_part=
"</NOT SET>";
788 xml_one_iter_os->SetReferenceDTD(add_reference_dtd);
789 xml_one_iter_os->SetDefaultDTDFilePrefix(
"http://www.ncbi.nlm.nih.gov/dtd/");
790 if( add_xml_version )
791 xml_one_iter_os->Write(&
object, typeInfo );
793 xml_one_iter_os->WriteObject(&
object, typeInfo );
796 string::size_type iterations_insert_point = out_str.find( breake_by_tag );
797 if( iterations_insert_point != string::npos ){
798 iterations_insert_point += breake_by_tag.length();
799 start_part = out_str.substr(0,iterations_insert_point);
800 end_part = out_str.substr(iterations_insert_point);
804 start_part = out_str;
User-defined methods of the data storage class.
Declares singleton objects to store the version and reference for the BLAST engine.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ ePHIBlastn
Nucleotide PHI BLAST.
@ ePHIBlastp
Protein PHI BLAST.
@ eMapper
Jumper alignment for mapping.
@ eMegablast
Nucl-Nucl (traditional megablast)
@ eDeltaBlast
Delta Blast.
void Reverse(void)
Reverse the segments' orientation.
void Assign(const CSerialObject &obj, ESerialRecursionMode how=eRecursive)
overloaded Assign()
static CRef< objects::CSeq_align_set > PrepareBlastUngappedSeqalign(const objects::CSeq_align_set &alnset)
static functions Need to call this if the seqalign is stdseg or dendiag for ungapped blast alignment ...
SeqLocCharOption
character used to display seqloc, such as masked sequence
A generalized representation of a pairwise alignment.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
CRef< CSeq_align > CreateTranslatedDensegFromNADenseg(void) const
Create a Dense-seg with widths from Dense-seg of nucleotides Used by AlnMgr to handle translated nucl...
CRef< CSeq_align > CreateDensegFromStdseg(SSeqIdChooser *SeqIdChooser=0) const
---------------------------------------------------------------------------- PRE : the Seq-align has ...
TSeqPos GetTotalGapCount(TDim row=-1) const
Retrieves the total number of gaps in the given row an alignment; all gaps by default.
TSeqPos GetSeqStop(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
Base class for all serializable objects.
static void GetBioseqHandleDeflineAndId(const objects::CBioseq_Handle &handle, list< TGi > &use_this_gi, string &seqid, string &defline, bool show_gi=true, TGi this_gi_first=INVALID_GI)
Returns sequence id and a BLAST defline as strings, given a Bioseq handle and a list of gis.
CTypeInfo class contains all information about C++ types (both basic and classes): members and layout...
Interface for filling the top layer of the XML report.
string GetSeqIdString(const CSeq_id &id)
static const char * str(char *buf, int n)
static string GetString(EPublication pub)
Reference for requested publication.
EPublication
Enumerates the various BLAST publications.
@ eMaxPublications
Used as sentinel value.
@ ePhiBlast
1998 NAR paper
@ eCompBasedStats
2001 NAR paper
@ eGappedBlast
1997 NAR paper
@ eMegaBlast
2000 J Compt Biol paper
@ eDeltaBlast
2012 Biology Direct on DeltaBLAST
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
@ eNoOwnership
No ownership is assumed.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
@ eSerialVerifyData_No
do not verify
const string AsFastaString(void) const
CConstRef< CSeq_id > GetSeqId(void) const
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
TSeqPos GetBioseqLength(void) const
void Reset(void)
Reset reference object.
int64_t Int8
8-byte (64-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
void SetLambda(TLambda value)
Assign a value to Lambda data member.
void SetExpect(TExpect value)
Assign a value to Expect data member.
void SetGap_open(TGap_open value)
Assign a value to Gap_open data member.
void SetEntropy(TEntropy value)
Assign a value to Entropy data member.
void SetDb_num(TDb_num value)
Assign a value to Db_num data member.
void SetQuery_len(TQuery_len value)
Assign a value to Query_len data member.
void SetHsp_len(THsp_len value)
Assign a value to Hsp_len data member.
void SetSc_match(TSc_match value)
Assign a value to Sc_match data member.
void SetMatrix(const TMatrix &value)
Assign a value to Matrix data member.
void SetProgram(const TProgram &value)
Assign a value to Program data member.
void SetGap_extend(TGap_extend value)
Assign a value to Gap_extend data member.
void SetQuery_ID(const TQuery_ID &value)
Assign a value to Query_ID data member.
void SetVersion(const TVersion &value)
Assign a value to Version data member.
void SetKappa(TKappa value)
Assign a value to Kappa data member.
void SetFilter(const TFilter &value)
Assign a value to Filter data member.
TIterations & SetIterations(void)
Assign a value to Iterations data member.
void SetSc_mismatch(TSc_mismatch value)
Assign a value to Sc_mismatch data member.
void SetDb(const TDb &value)
Assign a value to Db data member.
void SetQuery_def(const TQuery_def &value)
Assign a value to Query_def data member.
void SetParam(TParam &value)
Assign a value to Param data member.
void SetPattern(const TPattern &value)
Assign a value to Pattern data member.
void SetReference(const TReference &value)
Assign a value to Reference data member.
void SetDb_len(TDb_len value)
Assign a value to Db_len data member.
void SetEff_space(TEff_space value)
Assign a value to Eff_space data member.
const TDenseg & GetDenseg(void) const
Get the variant data.
Tdata & Set(void)
Assign a value to data member.
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
bool IsDendiag(void) const
Check if variant Dendiag is selected.
const TStd & GetStd(void) const
Get the variant data.
const TDendiag & GetDendiag(void) const
Get the variant data.
bool IsStd(void) const
Check if variant Std is selected.
bool IsDisc(void) const
Check if variant Disc is selected.
const TIds & GetIds(void) const
Get the Ids member data.
list< CRef< CSeq_align > > Tdata
const TDisc & GetDisc(void) const
Get the variant data.
const TStrands & GetStrands(void) const
Get the Strands member data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsDenseg(void) const
Check if variant Denseg is selected.
static CRef< CSeq_align > CreateDensegFromDendiag(CSeq_align const &aln)
Structure to hold data for incremental XML formatting.