NCBI C++ ToolKit
subj_ranges_set.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: subj_ranges_set.cpp 40923 2009-02-03 18:13:26Z camacho $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Kevin Bealer
27  *
28  */
29 
30 /// @file subj_ranges_set.cpp
31 /// Defines classes to maintain lists of subject offset ranges in sequence
32 /// data for targetted retrieval during the traceback stage.
33 
34 #include <ncbi_pch.hpp>
37 
40 BEGIN_SCOPE(blast)
41 
42 // The code attempts to fetch sequence data that is near 'used'
43 // areas, and to join segments that are near each other. Each
44 // sub-range of a sequence will be expanded on each side by at
45 // least CSubjectRangesSet::m_ExpandHSP letters (or to the beginning or end of
46 // the sequence). If more than one offset range is specified for a subject
47 // OID, adjacent ranges will be merged if the distance
48 // between them is less than CSubjectRangesSet::m_MinGap.
49 
50 void CSubjectRanges::AddRange(int query_oid,
51  int begin,
52  int end,
53  int min_gap)
54 {
55  m_QueryOIDs.insert(query_oid);
56 
57  bool done = false;
58 
59  // Loop until done - in the process we absorb any competing
60  // elements, and then insert the combined range. There is a
61  // special case where the new element fits into an existing one
62  // where we can just exit.
63 
64  pair<int,int> range(begin, end);
65  pair<int,int> range2(end+1, end+2); // first 'uninteresting' range.
66 
67  while(! done) {
68  TRangeList::iterator lhs = m_Offsets.lower_bound(range);
69  TRangeList::iterator rhs = m_Offsets.upper_bound(range2);
70 
71  // Before starting, we need to 'back up' the start range
72  // iterator in case it overlaps the range we want. If this is
73  // not done, a range with an earlier start than us, but
74  // overlapping data, could be missed.
75 
76  if (lhs != m_Offsets.begin()) {
77  -- lhs;
78  }
79 
80  done = true;
81 
82  // if true, need to redo the lhs/rhs
83  bool recompute = false;
84 
85  while((! recompute) && (lhs != rhs)) {
86  if (lhs->first <= (end + min_gap) &&
87  lhs->second >= (begin - min_gap)) {
88 
89  if (lhs->first <= begin && lhs->second >= end) {
90  // special case: nothing to do.
91  return;
92  }
93 
94  // Absorb this range into begin/end, and remove the
95  // element that we are absorbing; which means lhs/rhs
96  // should be recomputed.
97 
98  x_Absorb(*lhs, range);
99  m_Offsets.erase(lhs);
100 
101  begin = range.first;
102  end = range.second;
103 
104  recompute = true;
105  done = false;
106  } else {
107  // Ranges do not match, try the next one.
108  ++ lhs;
109  }
110  }
111  }
112 
113  // Add the range.
114 
115  m_Offsets.insert(range);
116 }
117 
118 void CSubjectRangesSet::AddRange(int q_oid, int s_oid, int begin, int end)
119 {
121 
122  if (R.Empty()) {
123  R.Reset(new CSubjectRanges);
124  }
125 
126  if (m_ExpandHSP) {
127  x_ExpandHspRange(begin, end);
128  }
129 
130  R->AddRange(q_oid, begin, end, m_MinGap);
131 }
132 
134 {
135  m_SubjRanges.erase(s_oid);
136 }
137 
138 void CSubjectRangesSet::x_ExpandHspRange(int & begin, int & end)
139 {
140  // Expand by at least min_exp (letters). It may be a good idea to
141  // expand each area by a factor of the total length as well, but
142  // the total length of an area is not known until all merging is
143  // done; each individual HSP is potentially part of one or more
144  // alignments that each include any number and combination of
145  // HSPs; this will not be known until after the traceback is
146  // completed.
147 
148  begin = (begin > m_ExpandHSP) ? (begin - m_ExpandHSP) : 0;
149 
150  // end must be adjusted to a max of subject length at data
151  // fetch time.
152 
153  end += m_ExpandHSP;
154 }
155 
157 {
158  static const bool kKeepExistingRanges = true;
160  int subject_oid = subj->first;
161  const CSubjectRanges & subj_list = *(subj->second);
162  bool cache_data = subj_list.IsUsedByMultipleQueries();
163  seqdb.SetOffsetRanges(subject_oid,
164  subj_list.GetRanges(),
165  kKeepExistingRanges,
166  cache_data);
167  }
168 }
169 
170 END_SCOPE(blast)
CSeqDB.
Definition: seqdb.hpp:161
void SetOffsetRanges(int oid, const TRangeList &offset_ranges, bool append_ranges, bool cache_data)
Apply a range of offsets to a database sequence.
Definition: seqdb.cpp:1295
void ApplyRanges(CSeqDB &db) const
Apply existing ranges to a database.
void AddRange(int q_oid, int s_oid, int begin, int end)
Add new offset range for subject HSP.
int m_MinGap
Minimum gap between sequences to avoid merging.
TSubjOid2RangesMap m_SubjRanges
Set of query ids and ranges for an OID.
void x_ExpandHspRange(int &begin, int &end)
Add, expand, and merge new range.
int m_ExpandHSP
Expansion amount for each HSP range.
void RemoveSubject(int s_oid)
Remove a given subject OID from the set.
Set of ranges of subject sequences to fetch during the traceback stage.
const TRangeList & GetRanges() const
Returns the set of ranges accumulated thus far.
bool IsUsedByMultipleQueries() const
Returns true if the ranges associated with this sequence are aligned to multiple query sequences.
void erase(iterator pos)
Definition: map.hpp:167
parent_type::iterator iterator
Definition: set.hpp:80
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
void Reset(void)
Reset random number generator to initial startup condition (LFG only)
Definition: random_gen.cpp:234
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
range(_Ty, _Ty) -> range< _Ty >
Defines BLAST database access classes.
USING_SCOPE(objects)
Declares classes to maintain lists of subject offset ranges in sequence data for targetted retrieval ...
done
Definition: token1.c:1
Modified on Wed Apr 17 13:08:34 2024 by modify_doxy.py rev. 669887