59 #ifndef SKIP_DOXYGEN_PROCESSING
69 blast::CLocalDbAdapter& db_adapter,
75 const char *matrix_name ,
80 bool use_sum_statistics ,
81 bool is_remote_search ,
82 int dbfilt_algorithm ,
83 const string& custom_output_format ,
86 const blast::CIgBlastOptions *ig_opts ,
87 const blast::CLocalDbAdapter* domain_db_adapter ,
88 const string & cmdline ,
89 const string& subjectTag )
90 : m_FormatType(format_type), m_IsHTML(is_html),
91 m_DbIsAA(db_adapter.IsProtein()), m_BelieveQuery(believe_query),
92 m_Outfile(
outfile), m_NumSummary(num_summary),
93 m_NumAlignments(num_alignments), m_HitlistSize(options.GetHitlistSize()),
96 m_QueryGenCode(qgencode), m_DbGenCode(dbgencode),
97 m_ShowGi(show_gi), m_ShowLinkedSetSize(
false),
98 m_IsUngappedSearch(!options.GetGappedMode()),
99 m_MatrixName(matrix_name),
103 m_SubjectTag(subjectTag),
104 m_IsRemoteSearch(is_remote_search),
105 m_QueriesFormatted(0),
106 m_Megablast(is_megablast),
107 m_IndexedMegablast(is_indexed),
108 m_CustomOutputFormatSpec(custom_output_format),
109 m_IgOptions(ig_opts),
112 m_IsIterative(
false),
116 m_OrigExceptionMask(
outfile.exceptions()),
120 m_DbName = db_adapter.GetDatabaseName();
132 if (!is_remote_search)
141 int filteringAlgorithmId = db_adapter.GetFilteringAlgorithm();
142 if(filteringAlgorithmId == -1) {
144 if (db_Info && db_Info.
NotEmpty()) {
153 dbfilt_algorithm, is_remote_search);
169 options.GetMatchReward() == 0 &&
170 options.GetMismatchPenalty() == 0 )
189 if (domain_db_adapter) {
191 domain_db_adapter->GetDatabaseName(),
192 true, -1, is_remote_search);
211 const vector< CBlastFormatUtil::SDbInfo >& dbinfo_list,
219 bool is_remote_search,
220 const string& custom_output_format,
222 const string & cmdline)
223 : m_FormatType(format_type),
226 m_BelieveQuery(believe_query),
228 m_NumSummary(num_summary),
229 m_NumAlignments(num_alignments),
230 m_HitlistSize(opts.GetHitlistSize()),
233 m_QueryGenCode(opts.GetQueryGeneticCode()),
234 m_DbGenCode(opts.GetDbGeneticCode()),
236 m_ShowLinkedSetSize(
false),
237 m_IsUngappedSearch(!opts.GetGappedMode()),
238 m_MatrixName(opts.GetMatrixName()),
242 m_IsRemoteSearch(is_remote_search),
243 m_QueriesFormatted(0),
244 m_Megablast(opts.GetProgram() ==
eMegablast ||
246 m_IndexedMegablast(opts.GetMBIndexLoaded()),
247 m_CustomOutputFormatSpec(custom_output_format),
250 m_IsIterative(
false),
254 m_OrigExceptionMask(
outfile.exceptions()),
258 m_DbInfo.assign(dbinfo_list.begin(), dbinfo_list.end());
259 vector< CBlastFormatUtil::SDbInfo >::const_iterator itInfo;
285 opts.GetMatchReward() == 0 &&
286 opts.GetMismatchPenalty() == 0 )
328 "<HEAD><TITLE>BLAST Search Results</TITLE></HEAD>\n"
329 "<BODY BGCOLOR=\"#FFFFFF\" LINK=\"#0000FF\" VLINK=\"#660099\" ALINK=\"#660099\">\n"
372 m_Outfile <<
"Reference: Robert M. Hubley, Arian Smit\n";
373 m_Outfile <<
"RMBlast - RepeatMasker Search Engine\n";
374 m_Outfile <<
"2010 <http://www.repeatmasker.org>";
421 m_Outfile <<
"\n\n" <<
"Conserved Domain ";
443 ? summary.GetPsiUngappedKarlinBlk()
444 : summary.GetUngappedKarlinBlk();
448 CBlastFormatUtil::PrintKAParameters(kbp_ungap->
Lambda,
449 kbp_ungap->
K, kbp_ungap->
H,
456 ? summary.GetPsiGappedKarlinBlk()
457 : summary.GetGappedKarlinBlk();
460 CBlastFormatUtil::PrintKAParameters(kbp_gap->
Lambda,
461 kbp_gap->
K, kbp_gap->
H,
467 m_Outfile <<
"Effective search space used: " <<
468 summary.GetSearchSpace() <<
"\n";
489 int skip_from,
int skip_to,
int index,
490 int num_descriptions_to_show )
503 if (num_descriptions_to_show == 0)
523 blast::CPsiBlastIterationState::TSeqIds& prev_seqids)
526 _ASSERT( !prev_seqids.empty() );
531 unsigned int count = 0;
535 if (prev_seqids.find(subj_id) != prev_seqids.end()) {
537 repeated_seqs.
Set().push_back(*alignment);
540 new_seqs.
Set().push_back(*alignment);
551 bool kIsGlobal = (seqalign_set->IsSet() && seqalign_set->CanGet() &&
552 seqalign_set->Get().front()->CanGetType() &&
561 unsigned int itr_num,
562 blast::CPsiBlastIterationState::TSeqIds& prev_seqids,
569 !prev_seqids.empty()) {
578 repeated_seqs.
Size());
609 bool html,
bool showgi,
bool isbl2seq,
bool disableKAStats)
640 if (program ==
"tblastx") {
658 const string& db_title)
const
672 db_title =
m_DbInfo.front().definition;
705 if (query_id->
Match(*(*itr)->GetQueryId())) {
711 objects::CBlastOutput xml_output;
750 unsigned int itr_num)
778 if (ncbi::NStr::ToLower(
m_Program) ==
string(
"blastn"))
782 string strProgVersion =
800 CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set,
m_HitlistSize);
807 ncbi::NStr::ToLower(
m_Program) ==
string(
"blastn"))
838 clone_info.
seqid = seqid.substr(0, 45);
844 const vector<CIgBlastTabularInfo::SIgDomain*>& domains = tabinfo.
GetIgDomains();
847 for (
unsigned int i=0;
i<domains.size(); ++
i) {
848 if (domains[
i]->length > 0) {
849 length += domains[
i]->length;
850 num_match += domains[
i]->num_match;
854 clone_info.
identity = ((double)num_match)/length;
893 bool fill_clone_info)
917 string strProgVersion =
926 CSeq_align_set::Tdata::const_iterator itr = aln_set->
Get().begin();
932 if (fill_clone_info) {
940 aln_set, subject_bioseq);
943 for (; itr != aln_set->
Get().end(); ++itr) {
966 bool fill_clone_info,
967 bool print_airr_format_header)
983 string strProgVersion =
989 annots =
results.GetIgAnnotation();
991 if (fill_clone_info) {
1006 print_airr_format_header,
1019 static Uint4 subj_index = 0;
1021 list< CRef<CSeq_id> > ids =
m_SeqInfoSrc->GetId(subj_index++);
1040 if (archive.
Empty()) {
1044 if (outfmt.empty()) {
1055 blast::CBlastOptionsHandle& options_handle,
1057 unsigned int num_iters,
1069 seq_loc->SetWhole(*
id);
1071 query_vector->AddQuery(search_query);
1080 if(num_iters != 0) {
1094 for (
unsigned int i=0;
i <
m_DbInfo.size();
i++) {
1101 if(
msg.size() > 0) {
1102 archive->SetMessages() =
msg;
1109 blast::CBlastOptionsHandle& options_handle,
1111 unsigned int num_iters,
1116 if(
msg.size() > 0) {
1117 archive->SetMessages() =
msg;
1126 int delineFormatOption = 0;
1142 vector <CShowBlastDefline::SDeflineFormattingInfo *> sdlFortInfoVec = deflines.
GetFormattingInfo();
1147 for(
size_t i = 0;
i < sdlFortInfoVec.size();
i++) {
1150 obj.
insert(
"dfln_url",sdlFortInfoVec[
i]->dfln_url);
1151 obj.
insert(
"dfln_rid",sdlFortInfoVec[
i]->dfln_rid);
1152 obj.
insert(
"dfln_gi",sdlFortInfoVec[
i]->dfln_gi);
1153 obj.
insert(
"dfln_seqid",sdlFortInfoVec[
i]->dfln_seqid);
1154 obj.
insert(
"full_dfln_defline",sdlFortInfoVec[
i]->full_dfln_defline);
1155 obj.
insert(
"dfln_defline",sdlFortInfoVec[
i]->dfln_defline);
1156 obj.
insert(
"dfln_id",sdlFortInfoVec[
i]->dfln_id);
1157 obj.
insert(
"dflnFrm_id",sdlFortInfoVec[
i]->dflnFrm_id);
1158 obj.
insert(
"dflnFASTA_id",sdlFortInfoVec[
i]->dflnFASTA_id);
1159 obj.
insert(
"dflnAccs",sdlFortInfoVec[
i]->dflnAccs);
1161 obj.
insert(
"score_info",sdlFortInfoVec[
i]->score_info);
1162 obj.
insert(
"dfln_hspnum",sdlFortInfoVec[
i]->dfln_hspnum);
1163 obj.
insert(
"dfln_alnLen",sdlFortInfoVec[
i]->dfln_alnLen);
1164 obj.
insert(
"dfln_blast_rank",sdlFortInfoVec[
i]->dfln_blast_rank);
1165 obj.
insert(
"total_bit_string",sdlFortInfoVec[
i]->total_bit_string);
1166 obj.
insert(
"percent_coverage",sdlFortInfoVec[
i]->percent_coverage);
1167 obj.
insert(
"evalue_string",sdlFortInfoVec[
i]->evalue_string);
1168 obj.
insert(
"percent_identity",sdlFortInfoVec[
i]->percent_identity);
1179 int delineFormatOption = 0;
1201 results.GetMaskedQueryRegions(masklocs);
1204 CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set,
m_NumAlignments);
1243 string blastAlignParamsTemplData = reg.
Get(
"Templates",
"BLAST_ALIGN_PARAMS");
1244 string blastAlignParamsTag = (
m_Program ==
"blastn") ?
"ALIGN_PARAMS_NUC" :
"ALIGN_PARAMS_PROT";
1245 string blastAlignProtParamsTable = reg.
Get(
"Templates", blastAlignParamsTag);
1264 int AlignOption = 0;
1301 string molTypeString;
1305 molTypeString =
"cdna";
1308 molTypeString =
"dna";
1311 molTypeString =
"rna";
1314 molTypeString =
"amino acid";
1317 molTypeString =
"nucleic acid";
1320 molTypeString =
"Unknown";
1322 return molTypeString;
1337 queryID->
GetLabel(&seqID,labelType);
1340 string seqDescr = CBlastFormatUtil::GetSeqDescrString(*bioseq);
1341 seqDescr = seqDescr.empty() ?
"None" : seqDescr;
1352 obj.
insert(
"Query",seqID);
1353 obj.
insert(
"Query_descr",seqDescr);
1356 obj.
insert(
"Moltype",molType);
1365 obj.
insert(
"Database_descr",dbTitle);
1400 unsigned int itr_num
1402 blast::CPsiBlastIterationState::TSeqIds prev_seqids
1404 bool is_deltablast_domain_result )
1448 string reportCaption =
"Tax BLAST report";
1454 const bool kIsTabularOutput =
false;
1456 if (is_deltablast_domain_result) {
1457 m_Outfile <<
"Results from domain search" <<
"\n";
1461 m_Outfile <<
"Results from round " << itr_num <<
"\n";
1469 string message =
"Failed to resolve SeqId: "+
results.GetSeqId()->AsFastaString();
1486 const bool kBelieveSubject =
false;
1488 CBlastFormatUtil::AcknowledgeBlastSubject(*subject_bioseq,
1495 if ( !
results.HasAlignments() ) {
1497 <<
"***** " << CBlastFormatUtil::kNoHitsFound <<
" *****" <<
"\n"
1511 aln_set = CBlastFormatUtil::SortSeqalignForSortableFormat(
1513 (
m_Program ==
"tblastx") ?
true :
false,
1534 results.GetMaskedQueryRegions(masklocs);
1537 CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set,
m_NumAlignments);
1546 int kAlignToShow=2000000000;
1569 results.GetSubjectMasks(subj_masks);
1582 bool fill_clone_info,
1583 bool print_airr_format_header,
1623 if (
results.GetIgAnnotation()->m_MinusStrand) {
1650 m_Outfile <<
"The AIRR format is only available for nucleotide sequence search" << endl;
1656 string reportCaption =
"Tax BLAST report";
1663 const bool kIsTabularOutput =
false;
1683 if ( !
results.HasAlignments() ) {
1685 <<
"***** " << CBlastFormatUtil::kNoHitsFound <<
" *****" <<
"\n"
1718 if (
results.HasAlignments()) {
1720 CSeq_align_set::Tdata::const_iterator itr = aln_set->
Get().begin();
1726 if (fill_clone_info) {
1729 m_Outfile <<
"Domain classification requested: " <<
m_IgOptions->m_DomainSystem << endl << endl;
1738 results.GetMaskedQueryRegions(masklocs);
1755 list < CRef<CDisplaySeqalign::DomainInfo> > domain;
1757 string kabat_domain_name[] = {
"FR1",
"CDR1",
"FR2",
"CDR2",
"FR3",
"CDR3",
"FR4",
"C region"};
1758 string imgt_domain_name[] = {
"FR1-IMGT",
"CDR1-IMGT",
"FR2-IMGT",
"CDR2-IMGT",
"FR3-IMGT",
"CDR3-IMGT",
"FR4-IMGT",
"C region"};
1759 int domain_name_length = 8;
1760 vector<string> domain_name;
1762 for (
int i = 0;
i < domain_name_length;
i ++) {
1763 domain_name.push_back(kabat_domain_name[
i]);
1766 for (
int i = 0;
i < domain_name_length;
i ++) {
1767 domain_name.push_back(imgt_domain_name[
i]);
1773 for (
int i=0;
i<9;
i =
i + 2) {
1788 temp->is_subject_start_valid = subject_start > 0 ?
true:
false;
1789 temp->is_subject_stop_valid = subject_stop > 0 ?
true:
false;
1790 temp->domain_name = domain_name[
i/2];
1791 domain.push_back(temp);
1800 int subject_start = -1;
1802 int subject_stop = -1;
1808 temp->subject_seqloc =
new CSeq_loc(*id_holder,
1811 temp->is_subject_start_valid = subject_start > 0 ?
true:
false;
1812 temp->is_subject_stop_valid = subject_stop > 0 ?
true:
false;
1813 temp->domain_name = domain_name[5];
1814 domain.push_back(temp);
1820 int subject_start = -1;
1822 int subject_stop = -1;
1828 temp->subject_seqloc =
new CSeq_loc(*id_holder,
1831 temp->is_subject_start_valid = subject_start > 0 ?
true:
false;
1832 temp->is_subject_stop_valid = subject_stop > 0 ?
true:
false;
1833 temp->domain_name = domain_name[6];
1834 domain.push_back(temp);
1843 int subject_start = -1;
1845 int subject_stop = -1;
1851 temp->subject_seqloc =
new CSeq_loc(*id_holder,
1854 temp->is_subject_start_valid = subject_start > 0 ?
true:
false;
1855 temp->is_subject_stop_valid = subject_stop > 0 ?
true:
false;
1856 temp->domain_name = domain_name[7];
1857 domain.push_back(temp);
1862 int num_align_to_show =
results.m_NumActualV +
results.m_NumActualD +
1883 vector<string> chain_type_list;
1888 chain_type_list.push_back(*iter);
1904 results.GetSubjectMasks(subj_masks);
1909 m_Outfile <<
"\n<CENTER><b><FONT color=\"green\">Alignments</FONT></b></CENTER>"
1929 if (!
results.HasAlignments()){
1943 string title = sequence::CDeflineGenerator().GenerateDefline(q_bh);
1947 des->SetTitle(
"reversed|" + title);
1949 des->SetTitle(title);
1963 align_set->
Set().push_back(new_align);
1965 results.SetSeqAlign().Reset(&*align_set);
1969 for (
int i=0;
i<6;
i+=2) {
1977 for (
int i=0;
i<12; ++
i) {
1984 for (
int i=0;
i<3; ++
i) {
1995 unsigned int itr_num
1997 blast::CPsiBlastIterationState::TSeqIds prev_seqids
2026 if ((**result).HasErrors()) {
2027 m_Outfile <<
"\n" << (**result).GetErrorStrings() <<
"\n";
2030 if ((**result).HasWarnings()) {
2031 m_Outfile <<
"\n" << (**result).GetWarningStrings() <<
"\n";
2044 string reportCaption =
"Tax BLAST report";
2056 m_Outfile <<
"Results from round " << itr_num <<
"\n";
2072 string reportCaption =
"Tax BLAST report";
2086 for (
int index=0; index<phi_query_info->
num_patterns; index++)
2089 CBlastFormatUtil::PrintPhiInfo(phi_query_info->
num_patterns,
2090 string(phi_query_info->
pattern),
2098 <<
"***** " << CBlastFormatUtil::kNoHitsFound <<
" *****" <<
"\n"
2125 for (index=0; index<phi_query_info->
num_patterns; index++)
2127 list <CDisplaySeqalign::FeatureInfo*> phiblast_pattern;
2132 (
TSeqPos) (occurrences[index].
offset + occurrences[index].length - 1));
2135 phiblast_pattern.push_back(feature_info);
2137 m_Outfile <<
"\nSignificant alignments for pattern occurrence " << index+1
2138 <<
" at position " << 1+occurrences[index].
offset <<
"\n\n";
2141 result_set[index].GetMaskedQueryRegions(masklocs);
2144 CBlastFormatUtil::PruneSeqalign(*aln_set, copy_aln_set,
m_NumAlignments);
2175 NON_CONST_ITERATE(list<CDisplaySeqalign::FeatureInfo*>, itr, phiblast_pattern) {
2247 m_Outfile <<
"\n\nMatrix: " <<
"blastn matrix " <<
2248 options.GetMatchReward() <<
" " <<
2249 options.GetMismatchPenalty() <<
"\n";
2252 m_Outfile <<
"\n\nMatrix: " << options.GetMatrixName() <<
"\n";
2255 if (options.GetGappedMode() ==
true) {
2256 double gap_extension = (double) options.GetGapExtensionCost();
2257 if ((
m_Program ==
"megablast" ||
m_Program ==
"blastn") && options.GetGapExtensionCost() == 0)
2259 gap_extension = -2*options.GetMismatchPenalty() + options.GetMatchReward();
2260 gap_extension /= 2.0;
2262 m_Outfile <<
"Gap Penalties: Existence: "
2263 << options.GetGapOpeningCost() <<
", Extension: "
2264 << gap_extension <<
"\n";
2266 if (options.GetWordThreshold()) {
2267 m_Outfile <<
"Neighboring words threshold: " <<
2268 options.GetWordThreshold() <<
"\n";
2270 if (options.GetWindowSize()) {
2271 m_Outfile <<
"Window for multiple hits: " <<
2272 options.GetWindowSize() <<
"\n";
2354 if (query_id->
Match(*(*itr)->GetQueryId())) {
2382 if (query_id->
Match(*(*itr)->GetQueryId())) {
2394 if (query_id->
Match(*(*itr)->GetQueryId())) {
2411 m_Outfile <<
"<?xml version=\"1.0\"?>\n<BlastXML2\n"
2412 "xmlns=\"http://www.ncbi.nlm.nih.gov\"\n"
2413 "xmlns:xi=\"http://www.w3.org/2003/XInclude\"\n"
2414 "xmlns:xs=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
2415 "xs:schemaLocation=\"http://www.ncbi.nlm.nih.gov http://www.ncbi.nlm.nih.gov/data_specs/schema_alt/NCBI_BlastOutput2.xsd\">\n";
2489 int total_subj_length = 0;
2501 db_name =
m_DbName.substr(dir.length());
2504 if (db_name.size() > 500) {
2505 db_name.resize(500);
2515 if (
l->GetNumGis()) {
2518 if (
l->GetNumSis()){
2521 if (
l->GetNumTaxIds()){
2524 if (
l->GetNumPigs()) {
2530 if (
l->GetNumGis()) {
2533 if (
l->GetNumSis()){
2536 if (
l->GetNumTaxIds()){
2539 if (
l->GetNumPigs()) {
static CRef< CScope > m_Scope
ESubjectMaskingType
Define the possible subject masking types.
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
Int4 BlastSeqSrcGetNumSeqs(const BlastSeqSrc *seq_src)
Get the number of sequences contained in the sequence source.
Int8 BlastSeqSrcGetTotLen(const BlastSeqSrc *seq_src)
Get the total length of all sequences in the sequence source.
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ eRPSBlast
protein-pssm (reverse-position-specific BLAST)
@ eBlastp
Protein-Protein.
@ eTblastn
Protein-Translated nucl.
@ eMegablast
Nucl-Nucl (traditional megablast)
@ eDeltaBlast
Delta Blast.
@ ePSITblastn
PSI Tblastn.
@ eDiscMegablast
Nucl-Nucl using discontiguous megablast.
@ eRPSTblastn
nucleotide-pssm (RPS blast with translated query)
@ eBlastx
Translated nucl-Protein.
build_archive declarations
Class containing information needed for tabular formatting of BLAST results.
void SetQueryGeneticCode(int q_gc)
void SetParseSubjectDefline(bool val)
Should subject deflien be parsed for id or not?
void PrintNumProcessed(int num_queries)
Prints number of queries processed.
EFieldDelimiter
What delimiter to use between fields in each row of the tabular output.
void SetCustomDelim(string customDelim)
void SetQueryRange(TSeqRange &q_range)
Set query range.
void SetParseLocalIds(bool val)
Should local IDs be parsed or not?
virtual void Print(void)
Print one line of tabular output.
int SetFields(const objects::CSeq_align &sal, objects::CScope &scope, CNcbiMatrix< int > *matrix=0)
Set all member fields, given a Seq-align.
void PrintHeader(const string &program, const objects::CBioseq &bioseq, const string &dbname, const string &rid=kEmptyStr, unsigned int iteration=numeric_limits< unsigned int >::max(), const objects::CSeq_align_set *align_set=0, CConstRef< objects::CBioseq > subj_bioseq=CConstRef< objects::CBioseq >())
Print the tabular output header.
void SetNoFetch(bool nofetch)
Avoid fetching sequence (if possible) If the sequence is needed (e.g., will be formatted,...
void SetDbGeneticCode(int db_gc)
void AddParam(EUsageParams p, int val)
Strategy class to gather the data for generating BLAST XML output.
Strategy class to gather the data for generating BLAST XML output.
Class for computing sequences' titles ("definitions").
@ eShowSequencePropertyLabel
@ eTranslateNucToNucAlignment
@ eShowInfoOnMouseOverSeqid
@ eShowTranslationForLocalSeq
@ eShowAlignStatsForMultiAlignView
void SetLineLen(size_t len)
number of bases or amino acids per line
TranslatedFrameForLocalSeq
void SetQueryNumber(int number)
for linking to mapviewer
void SetSeqLocColor(SeqLocColorOption option=eBlack)
color for seqloc display such as masked region
void SetMasterDomain(list< CRef< DomainInfo > > *domain)
void SetMiddleLineStyle(MiddleLineStyle option=eBar)
set middle line style
void SetSeqLocChar(SeqLocCharOption option=eX)
character style for seqloc display such as masked region
void SetAlignOption(int option)
Set functions.
void SetTranslatedFrameForLocalSeq(TranslatedFrameForLocalSeq frame)
void DisplaySeqalign(CNcbiOstream &out)
call this to display seqalign
void SetDbType(bool is_na)
database type.
void SetResultPositionIndex(int index)
void SetDbName(string name)
set blast database name
void SetSequencePropertyLabel(const vector< string > *SequencePropertyLabel, EOwnership ownership=eNoOwnership)
void SetAlignType(AlignType type)
Needed only if you want to display positives and strand.
static CRef< objects::CSeq_align_set > PrepareBlastUngappedSeqalign(const objects::CSeq_align_set &alnset)
static functions Need to call this if the seqalign is stdseg or dendiag for ungapped blast alignment ...
void SetMasterGeneticCode(int code)
Set genetic code for master sequence.
void SetSlaveGeneticCode(int code)
Set Genetic cod for slaves.
void SetNumAlignToShow(int num)
Display top num seqalign Note this only limit the number of seqalign regardless of the seqids.
void SetSubjectMasks(const TSeqLocInfoVector &masks)
Sets the masks and the masking algorithm used for the subject sequences.
void UseLongSequenceIds(void)
Sets usage of long sequence ids (database|accession)
void SetAlignTemplates(SAlignTemplates *alignTemplates)
Class containing information needed for tabular formatting of BLAST results.
void PrintMasterAlign(const CConstRef< blast::CIgBlastOptions > &ig_opts, const string &header="# ") const
Print domain information.
int SetFields(const objects::CSeq_align &align, objects::CScope &scope, const string &chain_type, const string &master_chain_type_to_show, CNcbiMatrix< int > *matrix=0)
Set fields for all other alignments.
const vector< SIgDomain * > & GetIgDomains() const
Get Ig domain.
void PrintHeader(const CConstRef< blast::CIgBlastOptions > &ig_opts, const string &program, const objects::CBioseq &bioseq, const string &dbname, const string &domain_sys, const string &rid=kEmptyStr, unsigned int iteration=numeric_limits< unsigned int >::max(), const objects::CSeq_align_set *align_set=0, CConstRef< objects::CBioseq > subj_bioseq=CConstRef< objects::CBioseq >())
virtual void Print(void)
Override the print method.
void SetIgAnnotation(const CRef< blast::CIgAnnotation > &annot, const CConstRef< blast::CIgBlastOptions > &ig_opts, CConstRef< CSeq_align_set > &align_result, CScope &scope)
One method to set all annotation information.
int SetMasterFields(const objects::CSeq_align &align, objects::CScope &scope, const string &chain_type, const string &master_chain_type_to_show, CNcbiMatrix< int > *matrix=0)
Set fields for master alignment.
void PrintHtmlSummary(const CConstRef< blast::CIgBlastOptions > &ig_opts) const
Print Html style summary.
void SetAirrFormatData(CScope &scope, const CRef< blast::CIgAnnotation > &annot, const CBioseq_Handle &query_handle, CConstRef< CSeq_align_set > align_result, const CConstRef< blast::CIgBlastOptions > &ig_opts)
void GetIgInfo(string &v, string &d, string &j, string &c, string &master_chain_to_show, string &cdr3_nuc, string &cdr3_aa, string &productive) const
Getter.
void PrintAirrRearrangement(CScope &scope, const CRef< blast::CIgAnnotation > &annot, const string &program_version, const CBioseq &query_bioseq, const string &dbname, const string &domain_sys, const string &rid, unsigned int iteration, const CSeq_align_set *align_set, CConstRef< CBioseq > subj_bioseq, CNcbiMatrix< int > *matrix, bool print_airr_format_header, const CConstRef< blast::CIgBlastOptions > &ig_opts)
CJson_Object push_back_object(void)
Add object type element to the end of the array.
bool Write(std::ostream &out, TJson_Write_Flags flags=fJson_Write_IndentWithSpace, unsigned int indent_char_count=4) const
Write JSON data into a stream.
CJson_Object SetObject(void)
Get JSON object contents of the node.
CJson_Array insert_array(const CJson_Node::TKeyType &name)
Insert array type element into the object.
void insert(const CJson_Node::TKeyType &name)
Insert null element into the object.
static CNcbiApplication * Instance(void)
Singleton method.
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Search Results for All Queries.
Search Results for One Query.
string GetTitle() const
Returns the database title.
Tdata::size_type Size() const
This class displays the defline for BLAST result.
void SetDeflineTemplates(SDeflineTemplates *deflineTemplates)
Set this if defline tempaltes are used Param deflineTemplates: struct containg defline templates info...
void SetDbType(bool is_na)
Set this for constructing seqid url.
void DisplayBlastDefline(CNcbiOstream &out)
Display defline.
void SetResultPosIndex(int index)
set and add result position index to <name=seqid> in score quick link for multiple result case
void SetOption(int option)
options per DisplayOption
void SetupPsiblast(TIdString2SeqStatus *seq_status=NULL, PsiblastStatus status=eFirstPass)
Set psiblast specific options.
void Init(void)
Initialize defline params.
void SetQueryNumber(int number)
Set this for linking to mapviewer.
vector< CShowBlastDefline::SDeflineFormattingInfo * > GetFormattingInfo(void)
Get deflines formatting info.
void SetSkipRange(int from, int to)
Skip certain seqaligns (only used in Igblast)
void Display(CNcbiOstream &out)
Display defline.
@ eNewPass
Sequences are newly found in current pass.
@ eRepeatPass
Sequences were found in previous pass.
void SetDbName(string database)
Set this for constructing seqid url.
Collection of masked regions for a single query sequence.
void Print(const CCompactSAMApplication::AlignInfo &ai)
API (CDeflineGenerator) for computing sequences' titles ("definitions").
std::ofstream out("events_result.xml")
main entry point for tests
static void des(const char *src, const char *out)
CRef< objects::CBlast4_archive > BlastBuildArchive(blast::IQueryFactory &queries, blast::CBlastOptionsHandle &options_handle, const CSearchResultSet &results, CRef< CSearchDatabase > search_db, unsigned int num_iters=0)
Returns a blast archive object.
vector< string > m_ChainType
string GetRID() const
Returns the RID for these results (if applicable), otherwise returns an empty string.
CRef< CBlastAncillaryData > GetAncillaryData() const
Accessor for the query's search ancillary.
bool IsLocalId(const objects::CSeq_id *seqid)
Returns true if the CSeq_id is a local id.
const SPHIQueryInfo * GetPhiQueryInfo() const
Retrieves PHI-BLAST information about pattern on query.
set< objects::CSeq_id_Handle > TSeqIds
List of CSeq_ids.
ESubjectMaskingType GetMaskType() const
string Blast_ProgramNameFromType(EBlastProgramType program)
Returns a string program name, given a blast::EBlastProgramType enumeration.
CConstRef< objects::CSeq_id > GetSeqId() const
Accessor for the query's sequence identifier.
EProgram ProgramNameToEnum(const std::string &program_name)
Map a string into an element of the ncbi::blast::EProgram enumeration (except eBlastProgramMax).
void SetFilteringAlgorithm(int filt_algorithm_id)
Temporary fix for backwards compatibility with other 6.0 SCs.
EMoleculeType
Molecule of the BLAST database.
bool HasAlignments() const
Return true if there are any alignments for this query.
@ eCompBasedStats
2001 NAR paper
@ eIndexedMegablast
2008 Bioinformatics on indexed megablast
@ eMegaBlast
2000 J Compt Biol paper
@ eDeltaBlast
2012 Biology Direct on DeltaBLAST
@ eCompAdjustedMatrices
submitted for publication
@ eBlastDbIsNucleotide
nucleotide
@ eBlastDbIsProtein
protein
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
const string & Get(const string &name, bool *found=NULL) const
Get environment value by name.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
static void SplitPath(const string &path, string *dir=0, string *base=0, string *ext=0)
Split a path string into its basic components.
#define MSerial_AsnBinary
#define MSerial_AsnText
I/O stream manipulators –.
string m_Name
PN - program name.
const string AsFastaString(void) const
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
static int WorstRank(const CRef< CSeq_id > &id)
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
static int BestRank(const CRef< CSeq_id > &id)
ELabelType
return the label for a given string
@ eContent
Untagged human-readable accession or the like.
@ eDefault
default is to show type + content
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
string GenerateDefline(const CBioseq_Handle &bsh, TUserFlags flags=0)
Main method.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void ResetDataAndHistory(void)
Clear all information in the scope except added data loaders.
CBioseq_EditHandle GetBioseqEditHandle(const CBioseq &bioseq)
Get edit handle for the specified object Throw an exception if object is not found,...
@ eGetBioseq_All
Search bioseq, load if not loaded yet.
void SetDescr(TDescr &v) const
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
TSeqPos GetBioseqLength(void) const
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
TMol GetBioseqMolType(void) const
Get some values from core:
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
virtual const string & Get(const string §ion, const string &name, TFlags flags=0) const
Get the parameter value.
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static string & ToUpper(string &str)
Convert string to upper case – string& version.
@ eTrunc_End
Truncate trailing whitespace only.
@ eNocase
Case insensitive compare.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsLocal(void) const
Check if variant Local is selected.
const TInst & GetInst(void) const
Get the Inst member data.
bool CanGetLength(void) const
Check if it is safe to call GetLength method.
const TId & GetId(void) const
Get the Id member data.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
TLength GetLength(void) const
Get the Length member data.
@ eMol_not_set
> cdna = rna
@ eMol_na
just a nucleic acid
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
Useful/utility classes and methods.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
static PCRE2_SIZE * offsets
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
Defines BLAST database access classes.
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Definition of SSeqLoc structure.
Complete type definition of Blast Sequence Source ADT.
Structure to hold the Gumbel parameters (for FSC).
Structure to hold the Karlin-Altschul parameters.
double K
K value used in statistics.
double Lambda
Lambda value used in statistics.
double H
H value used in statistics.
structure for showing domains on the master sequence
structure for store feature display info
CConstRef< objects::CSeq_loc > seqloc
string alignRowTmpl
Template for displayin actual pairwise alignment - BLAST_ALIGN_ROWS.
string alnTitlesTmpl
Template for displaying multiple defline titles.
string alignFeatureLinkTmpl
Template for displaying align features link -ALN_FEATURES_LINK.
string alnSeqInfoTmpl
Template for displaying sequnce link in defline.
string alnTitlesLinkTmpl
Template for displaying link for more defline titles.
string alignFeatureTmpl
Template for displaying align features -ALN_FEATURES.
string sortInfoTmpl
Template for displaying Sort by header - SORT_ALIGNS_SEQ.
string alnDefLineTmpl
Template for displaying one defline ALN_DEFLINE_ROW.
string alignInfoTmpl
Template for displaying singe align params - BLAST_ALIGN_PARAMS_NUC,BLAST_ALIGN_PARAMS_PROT.
string alignHeaderTmpl
Template for displaying header,deflines and gene info - BLAST_ALIGN_HEADER.
string alignRowTmplLast
Template for displayin actual last pairwise alignment - BLAST_ALIGN_ROWS_LST.
string scoreInfoTmpl
score info template
string seqInfoTmpl
sequence infor template
string defLineTmpl
whole defilne template
Structure to hold data for incremental XML formatting.
Information about a single pattern occurence in the query.
Int4 offset
Starting offset of this pattern occurrence.
In PHI BLAST, structure containing information about all pattern occurrences in query.
char * pattern
Pattern used, saved here for formatting purposes.
double probability
Estimated probability of the pattern.
Int4 num_patterns
Number of pattern occurrences in query.
SPHIPatternInfo * occurrences
Array of pattern occurrence information structures.