NCBI C++ ToolKit
phi.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25 
26 /*****************************************************************************
27 
28 File name: phi.cpp
29 
30 Author: Jason Papadopoulos
31 
32 Contents: Match PHI patterns against a list of sequences
33 
34 ******************************************************************************/
35 
36 #include <ncbi_pch.hpp>
40 #include <algo/cobalt/cobalt.hpp>
41 
42 /// @file phi.cpp
43 /// Match PHI patterns against a list of sequences
44 
46 BEGIN_SCOPE(cobalt)
47 
48 /// Intermediate representation of a pattern hit
49 typedef struct SPatternHit {
50  int query_idx; ///< query sequence containing the hit
51  TRange hit; ///< range on query sequence where hit occurs
52 
53  /// constructor
54  /// @param seq Sequence
55  /// @param range Offset range of hit
56  ///
58  : query_idx(seq), hit(range) {}
60 
61 void
62 CMultiAligner::x_FindPatternHits(const vector<const CSequence*>& queries,
63  const vector<int>& indices)
64 {
66 
67  _ASSERT(queries.size() == indices.size());
68 
69  size_t num_queries = queries.size();
70 
71  const vector<CMultiAlignerOptions::CPattern>& patterns
73 
74  if (patterns.size() == 0) {
75  return;
76  }
77 
78  // empty out existing list
79 
81 
83  SPHIPatternSearchBlk *phi_pattern;
84  Int4 hit_offsets[PHI_MAX_HIT];
85 
86  // for each pattern
87 
88  for (size_t i=0;i < patterns.size();i++) {
89 
90  vector<SPatternHit> phi_hits;
91 
92  // precompile the pattern in preparation for running
93  // all sequences through it
94 
95  char* pattern = (char*)patterns[i].AsPointer();
96  _ASSERT(pattern);
97 
98  SPHIPatternSearchBlkNew(pattern, FALSE, sbp, &phi_pattern, NULL);
99  _ASSERT(phi_pattern != NULL);
100 
101  // for each sequence
102 
103  for (size_t j = 0; j < num_queries; j++) {
104 
105  // scan the sequence through the compiled pattern,
106  // saving any hits found
107 
108  Int4 twice_num_hits = ::FindPatternHits(hit_offsets,
109  (const Uint1 *)(queries[j]->GetSequence()),
110  queries[j]->GetLength(),
111  FALSE, phi_pattern);
112  for (size_t k = 0; k < (size_t)twice_num_hits; k += 2) {
113  phi_hits.push_back(SPatternHit(indices[j],
114  TRange(hit_offsets[k+1],
115  hit_offsets[k])));
116  }
117  }
118 
119  // for each hit to the same pattern by different sequences,
120  // create a pairwise alignment. Temporarily hijack the score
121  // of the alignment to store the identity of the pattern
122 
123  for (int j = 0; j < (int)phi_hits.size() - 1; j++) {
124  for (int k = j + 1; k < (int)phi_hits.size(); k++) {
125  if (phi_hits[j].query_idx != phi_hits[k].query_idx) {
126 
127  m_PatternHits.AddToHitList(new CHit(phi_hits[j].query_idx,
128  phi_hits[k].query_idx,
129  phi_hits[j].hit,
130  phi_hits[k].hit,
131  i, CEditScript()));
132  }
133  }
134  }
135 
136  // clean up the current pattern
137 
138  phi_pattern = SPHIPatternSearchBlkFree(phi_pattern);
139 
140  // check for interrupt
143  "Alignment Interrupted");
144  }
145  }
146 
147  sbp = BlastScoreBlkFree(sbp);
148 
149  //------------------------------------------------
150  if (m_Options->GetVerbose()) {
151  printf("\n\nPHI Pattern Hits:\n");
152  for (int i = 0; i < m_PatternHits.Size(); i++) {
153  CHit *hit = m_PatternHits.GetHit(i);
154  printf("query %3d %4d - %4d query %3d %4d - %4d pattern %d\n",
155  hit->m_SeqIndex1,
156  hit->m_SeqRange1.GetFrom(),
157  hit->m_SeqRange1.GetTo(),
158  hit->m_SeqIndex2,
159  hit->m_SeqRange2.GetFrom(),
160  hit->m_SeqRange2.GetTo(),
161  hit->m_Score);
162  }
163  printf("\n\n");
164  }
165  //------------------------------------------------
166 
167  for (int i = 0; i < m_PatternHits.Size(); i++) {
169  }
170 }
171 
172 END_SCOPE(cobalt)
CLocalRange< TOffset > TRange
define for the fundamental building block of sequence ranges
Definition: base.hpp:115
Declares the BLAST exception class.
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
Definition: blast_stat.c:965
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
Definition: blast_stat.c:884
Interface for the traceback from blast hits.
Definition: traceback.hpp:55
int Size() const
Retrieve number of hits in list.
Definition: hitlist.hpp:75
void PurgeAllHits()
Delete all hits unconditionally.
Definition: hitlist.hpp:148
CHit * GetHit(int index)
Retrieve a hit from the hitlist.
Definition: hitlist.hpp:93
void AddToHitList(CHit *hit)
Append a hit to the hitlist.
Definition: hitlist.hpp:84
A generalized representation of a pairwise alignment.
Definition: hit.hpp:86
int m_Score
Score of alignment.
Definition: hit.hpp:104
int m_SeqIndex1
Numerical identifier for first sequence in alignment.
Definition: hit.hpp:97
int m_SeqIndex2
Numerical identifier for second sequence in alignment.
Definition: hit.hpp:101
TRange m_SeqRange1
The range of offsets on the first sequence.
Definition: hit.hpp:107
TRange m_SeqRange2
The range of offsets on the second sequence.
Definition: hit.hpp:110
const vector< CPattern > & GetCddPatterns(void) const
Get regular expression patterns for identification of conserved domains.
Definition: options.hpp:395
bool GetVerbose(void) const
Get verbose mode.
Definition: options.hpp:691
SProgress m_ProgressMonitor
Definition: cobalt.hpp:737
@ eInterrupt
Alignment interruped through callback function.
Definition: cobalt.hpp:83
CConstRef< CMultiAlignerOptions > m_Options
Definition: cobalt.hpp:686
CHitList m_PatternHits
Definition: cobalt.hpp:719
void x_FindPatternHits(const vector< const CSequence * > &queries, const vector< int > &indices)
Find PROSITE pattern hits on selected input sequences.
Definition: phi.cpp:62
@ ePatternHitsSearch
Definition: cobalt.hpp:95
FInterruptFn m_Interrupt
Definition: cobalt.hpp:736
Interface for CMultiAligner.
#define BLASTAA_SEQ_CODE
== Seq_code_ncbistdaa
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
range(_Ty, _Ty) -> range< _Ty >
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
Functions for finding pattern matches in sequence (PHI-BLAST).
Int4 FindPatternHits(Int4 *hitArray, const Uint1 *seq, Int4 len, Boolean is_dna, const SPHIPatternSearchBlk *patternSearch)
Find the places where the pattern matches seq; 3 different methods are used depending on the length o...
Definition: pattern.c:468
#define PHI_MAX_HIT
Maximal size of an array of pattern hits.
Definition: pattern.h:57
static patstr * patterns
Definition: pcregrep.c:259
struct SPatternHit SPatternHit
Intermediate representation of a pattern hit.
Pseudo lookup table structure and database scanning functions used in PHI-BLAST.
SPHIPatternSearchBlk * SPHIPatternSearchBlkFree(SPHIPatternSearchBlk *pattern_blk)
Deallocate memory for the PHI BLAST lookup table.
Definition: phi_lookup.c:690
Int2 SPHIPatternSearchBlkNew(char *pattern, Boolean is_dna, BlastScoreBlk *sbp, SPHIPatternSearchBlk **pattern_blk, Blast_Message **error_msg)
Initialize the pattern items structure, serving as a "pseudo" lookup table in a PHI BLAST search.
Definition: phi_lookup.c:388
Structure used for scoring calculations.
Definition: blast_stat.h:177
EAlignmentStage stage
Definition: cobalt.hpp:103
Structure containing all auxiliary information needed in a pattern search.
Definition: pattern.h:155
Intermediate representation of a pattern hit.
Definition: phi.cpp:49
SPatternHit(int seq, TRange range)
constructor
Definition: phi.cpp:57
TRange hit
range on query sequence where hit occurs
Definition: phi.cpp:51
int query_idx
query sequence containing the hit
Definition: phi.cpp:50
#define _ASSERT
Modified on Sun Apr 14 05:26:40 2024 by modify_doxy.py rev. 669887