NCBI C++ ToolKit
|
Functions for finding pattern matches in sequence (PHI-BLAST). More...
#include <algo/blast/core/ncbi_std.h>
#include <algo/blast/core/blast_export.h>
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_query_info.h>
Go to the source code of this file.
Go to the SVN repository for this file.
Classes | |
struct | SDNAShortPatternItems |
Structure containing auxiliary items needed for a DNA search with a pattern that fits in a single word. More... | |
struct | SShortPatternItems |
Auxiliary items needed for a PHI BLAST search with a pattern that fits in a single word. More... | |
struct | SExtraLongPatternItems |
Auxiliary items needed for a PHI BLAST search with pattern that contains pieces longer than a word. More... | |
struct | SDNALongPatternItems |
Auxiliary items needed for a DNA pattern search with pattern containing multiple words. More... | |
struct | SLongPatternItems |
Auxiliary items needed for a PHI BLAST search with pattern containing multiple words. More... | |
struct | SPHIPatternSearchBlk |
Structure containing all auxiliary information needed in a pattern search. More... | |
Macros | |
#define | PHI_BUF_SIZE 100 |
Default size for buffers. More... | |
#define | PHI_ASCII_SIZE 256 |
Size of ASCII alphabet. More... | |
#define | PHI_BITS_PACKED_PER_WORD 30 |
Number of bits packed in a word. More... | |
#define | PHI_MAX_WORD_SIZE 11 |
Maximal word size. More... | |
#define | PHI_MAX_PATTERN_LENGTH (PHI_BITS_PACKED_PER_WORD * PHI_MAX_WORD_SIZE) |
Threshold pattern length. More... | |
#define | PHI_MAX_WORDS_IN_PATTERN 100 |
Maximal number of words in pattern. More... | |
#define | PHI_MAX_HIT 20000 |
Maximal size of an array of pattern hits. More... | |
Typedefs | |
typedef enum EPatternProgram | EPatternProgram |
Options for running the pattern search. More... | |
typedef enum EPatternType | EPatternType |
Type of pattern: fits in single word, several words, or is very long. More... | |
typedef struct SDNAShortPatternItems | SDNAShortPatternItems |
Structure containing auxiliary items needed for a DNA search with a pattern that fits in a single word. More... | |
typedef struct SShortPatternItems | SShortPatternItems |
Auxiliary items needed for a PHI BLAST search with a pattern that fits in a single word. More... | |
typedef struct SExtraLongPatternItems | SExtraLongPatternItems |
Auxiliary items needed for a PHI BLAST search with pattern that contains pieces longer than a word. More... | |
typedef struct SDNALongPatternItems | SDNALongPatternItems |
Auxiliary items needed for a DNA pattern search with pattern containing multiple words. More... | |
typedef struct SLongPatternItems | SLongPatternItems |
Auxiliary items needed for a PHI BLAST search with pattern containing multiple words. More... | |
typedef struct SPHIPatternSearchBlk | SPHIPatternSearchBlk |
Structure containing all auxiliary information needed in a pattern search. More... | |
Enumerations | |
enum | EPatternProgram { eSeed = 1 , ePattern , ePatSeed , ePatMatch } |
Options for running the pattern search. More... | |
enum | EPatternType { eOneWord = 0 , eMultiWord , eVeryLong } |
Type of pattern: fits in single word, several words, or is very long. More... | |
Functions | |
Int4 | FindPatternHits (Int4 *hitArray, const Uint1 *seq, Int4 len, Boolean is_dna, const SPHIPatternSearchBlk *patternSearch) |
Find the places where the pattern matches seq; 3 different methods are used depending on the length of the pattern. More... | |
SPHIQueryInfo * | SPHIQueryInfoNew (void) |
Allocates the pattern occurrences structure. More... | |
SPHIQueryInfo * | SPHIQueryInfoFree (SPHIQueryInfo *pat_info) |
Frees the pattern information structure. More... | |
SPHIQueryInfo * | SPHIQueryInfoCopy (const SPHIQueryInfo *pat_info) |
Copies the SPHIQueryInfo structure. More... | |
Int4 | PHIGetPatternOccurrences (const SPHIPatternSearchBlk *pattern_blk, const BLAST_SequenceBlk *query, const BlastSeqLoc *location, Boolean is_dna, BlastQueryInfo *query_info) |
Finds all pattern hits in a given query and saves them in the previously allocated SPHIQueryInfo structure. More... | |
Functions for finding pattern matches in sequence (PHI-BLAST).
Definition in file pattern.h.
#define PHI_BITS_PACKED_PER_WORD 30 |
#define PHI_MAX_HIT 20000 |
#define PHI_MAX_PATTERN_LENGTH (PHI_BITS_PACKED_PER_WORD * PHI_MAX_WORD_SIZE) |
#define PHI_MAX_WORDS_IN_PATTERN 100 |
typedef enum EPatternProgram EPatternProgram |
Options for running the pattern search.
typedef enum EPatternType EPatternType |
Type of pattern: fits in single word, several words, or is very long.
typedef struct SDNALongPatternItems SDNALongPatternItems |
Auxiliary items needed for a DNA pattern search with pattern containing multiple words.
typedef struct SDNAShortPatternItems SDNAShortPatternItems |
Structure containing auxiliary items needed for a DNA search with a pattern that fits in a single word.
typedef struct SExtraLongPatternItems SExtraLongPatternItems |
Auxiliary items needed for a PHI BLAST search with pattern that contains pieces longer than a word.
typedef struct SLongPatternItems SLongPatternItems |
Auxiliary items needed for a PHI BLAST search with pattern containing multiple words.
typedef struct SPHIPatternSearchBlk SPHIPatternSearchBlk |
Structure containing all auxiliary information needed in a pattern search.
typedef struct SShortPatternItems SShortPatternItems |
Auxiliary items needed for a PHI BLAST search with a pattern that fits in a single word.
enum EPatternProgram |
Options for running the pattern search.
enum EPatternType |
Int4 FindPatternHits | ( | Int4 * | hitArray, |
const Uint1 * | seq, | ||
Int4 | len, | ||
Boolean | is_dna, | ||
const SPHIPatternSearchBlk * | patternSearch | ||
) |
Find the places where the pattern matches seq; 3 different methods are used depending on the length of the pattern.
hitArray | Stores the results as pairs of positions in consecutive entries [out] |
seq | Sequence [in] |
len | Length of the sequence [in] |
is_dna | Indicates whether seq is made of DNA or protein letters [in] |
patternSearch | Pattern information [in] |
Definition at line 468 of file pattern.c.
References eMultiWord, eOneWord, SPHIPatternSearchBlk::flagPatternLength, len, s_FindHitsLong(), s_FindHitsShortHead(), and s_FindHitsVeryLong().
Referenced by PHIBlastScanSubject(), PHIGetPatternOccurrences(), and CMultiAligner::x_FindPatternHits().
Int4 PHIGetPatternOccurrences | ( | const SPHIPatternSearchBlk * | pattern_blk, |
const BLAST_SequenceBlk * | query, | ||
const BlastSeqLoc * | location, | ||
Boolean | is_dna, | ||
BlastQueryInfo * | query_info | ||
) |
Finds all pattern hits in a given query and saves them in the previously allocated SPHIQueryInfo structure.
pattern_blk | Structure containing pattern structure. [in] |
query | Query sequence(s) [in] |
location | Segments in the query sequence where to look for pattern [in] |
is_dna | Is this a nucleotide sequence? [in] |
query_info | Used to store pattern occurrences and get length of query (for error checking) [out] |
Definition at line 553 of file pattern.c.
References ASSERT, BlastQueryInfoGetQueryLength(), calloc(), eBlastTypePhiBlastn, eBlastTypePhiBlastp, FindPatternHits(), i, INT4_MAX, SSeqRange::left, location, BlastSeqLoc::next, SPHIQueryInfo::num_patterns, BlastQueryInfo::pattern_info, query, SSeqRange::right, s_PHIBlastAddPatternHit(), sfree, and BlastSeqLoc::ssr.
Referenced by Blast_SetPHIPatternInfo().
SPHIQueryInfo* SPHIQueryInfoCopy | ( | const SPHIQueryInfo * | pat_info | ) |
Copies the SPHIQueryInfo structure.
pat_info | Structure to copy [in] |
Definition at line 507 of file pattern.c.
References BlastMemDup(), NULL, SPHIQueryInfo::num_patterns, SPHIQueryInfo::occurrences, and SPHIQueryInfo::pattern.
Referenced by BlastQueryInfoDup(), and CSearchResults::CSearchResults().
SPHIQueryInfo* SPHIQueryInfoFree | ( | SPHIQueryInfo * | pat_info | ) |
Frees the pattern information structure.
pat_info | Structure to free. [in] |
Definition at line 496 of file pattern.c.
References NULL, SPHIQueryInfo::occurrences, SPHIQueryInfo::pattern, and sfree.
Referenced by BlastQueryInfoFree(), BOOST_AUTO_TEST_CASE(), and CSearchResults::~CSearchResults().
SPHIQueryInfo* SPHIQueryInfoNew | ( | void | ) |
Allocates the pattern occurrences structure.
Definition at line 478 of file pattern.c.
References SPHIQueryInfo::allocated_size, calloc(), NULL, and SPHIQueryInfo::occurrences.
Referenced by Blast_SetPHIPatternInfo(), and CPhiblastTestFixture::x_SetupPatternInfo().