NCBI C++ ToolKit
cuCdUpdater.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuCdUpdater.hpp 102008 2024-03-18 16:00:56Z gaudaensj $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Charlie Liu
27  *
28  * File Description:
29  *
30  * Update CDs
31  *
32  * ===========================================================================
33  */
34 
35 #ifndef CU_CDUPDATER_HPP
36 #define CU_CDUPDATER_HPP
37 
49 
52 BEGIN_SCOPE(cd_utils)
53 
55 {
56 public:
57  CDRefresher (CCdCore* cd);
58 
59  //return the gi that's replaced; return -1 if none is replaced
60  TGi refresh(CRef< CSeq_align> seqAlign, CRef< CSeq_entry > seqEntry);
61  bool hasOlderVersion(CRef< CBioseq > bioseq);
62 
63 private:
67 
68  void addSequences(CSeq_entry& seqEntry);
69  void addSequence(CRef< CBioseq > bioseq);
70 };
71 
73 {
75  vector<TGi> envSeq;
76  vector<TGi> fragmented;
77  vector<TGi> overlap;
78  vector<TGi> noSeq;
79  vector<TGi> badAlign;
80  vector<TGi> redundant;
82  typedef pair<TGi, TGi> OldNewGiPair;
83  vector<OldNewGiPair> oldNewPairs;
86 
87 public:
88  CDUpdateStats();
89  string toString(bool detailed=true);
90 private:
91  string toString(vector<TGi>& gis, string type);
92  string toString(vector<TGi>& gis);
93  string toString(vector<OldNewGiPair>& giPairs, string type);
94 };
95 
97 {
98 public:
99  virtual ~UpdaterInterface() {};
100  virtual int submitBlast(bool wait=false, int row = 0) = 0;
101  virtual bool getBlastHits() = 0;
102  virtual bool processBlastHits() = 0; //true: new sequences recruited.
103  virtual void getCds(vector<CCdCore*>&) = 0;
104  virtual bool hasCd(CCdCore*) =0;
105 
106  // maniplate the updater store
107  static void addUpdater(UpdaterInterface* updater);
108  static bool IsEmpty();
109  static int checkAllBlasts(vector< UpdaterInterface* >& blasted);
110  static void removeUpdaters(const vector<CCdCore*>& cds);
111  static void removeUpdaters(const vector<UpdaterInterface*>& updaters);
112 
113 private:
114  static list<UpdaterInterface*> m_updaterList;
115 };
116 
117 class GroupUpdater;
118 
120 {
121 public:
123  //CDUpdater(const string& rid);
124  virtual ~CDUpdater();
125 
126  //UpdaterInterface
127  int submitBlast(bool wait = false, int row = 0);
128  bool getBlastHits();
129  bool processBlastHits();
130  void getCds(vector<CCdCore*>&);
131  bool hasCd(CCdCore*);
132 
133  // submit a remote blast query
134  // if failed or any exception was encountered, returns false (call getLastError to see message)
135  bool blast(bool wait = false, int row = 0);
136 
137  const string getRid() {return m_rid;}
138  const string getLastError() {return m_lastError;}
139  void setLastError(const string& lastError) { m_lastError = lastError;}
140  bool getHits(CRef<CSeq_align_set> & hits);
141  bool checkDone();
142  CCdCore* getCd() {return m_cd;}
144  const CRef<CSeq_align_set>& GetAlignments() const {return m_hits;}
145  void SetAlignments(CRef<CSeq_align_set>& hits) { m_hits.Reset(hits); }
146 
147  //drive update
148  bool checkBlastAndUpdate();
149  void setHitsNeeded(int num) {m_hitsNeeded = num;}
150  bool update(CCdCore* cd, CSeq_align_set& alignments);
151 
152  //for making a new CD
153  void requireProcessPending(int threshold) {m_processPendingThreshold = threshold;};
154  //return the number of pending rows filtered out
155  static int processPendingToNormal(int overlap, CCdCore* cd);
156  static int mergePending(CCdCore* cd, int threshold, bool remaster);
157 
158  bool isFragmentedSeq(CCdCore* cd, CRef< CSeq_align > seqAlign,
159  CRef< CSeq_entry > seqEntry);
160 
162  CDUpdateStats& getStats() {return m_stats;}
163  static int pickBioseq(CDRefresher* refresher, CRef< CSeq_align > seqAlignRef,
164  vector< CRef< CBioseq > >& bioseqVec);
165  static int GetAllIdsFromSeqEntry(CRef< CSeq_entry > seqEntry,
166  vector< CRef< CSeq_id > >& slaveIds, bool pdbOnly=false);
167  static bool GetOneBioseqFromSeqEntry(CRef< CSeq_entry > seqEntry,
168  CRef< CBioseq >& bioseq, const CSeq_id* seqId=0);
169  static TGi getGi(CRef< CSeq_entry > seqEntry);
170  static TGi getGi(CRef<CBioseq> bioseq);
171  static bool SeqEntryHasSeqId(CRef< CSeq_entry > seqEntry, const CSeq_id& seqId);
172  static bool BioseqHasSeqId(const CBioseq& bioseq, const CSeq_id& seqId);
173 
174  //get org-ref from seqEntry if bioseq does not have one
175  //remove all unnecessary fields
176  //replace ftable with mmdb-id
177  static bool reformatBioseq(CRef< CBioseq > bioseq, CRef< CSeq_entry > seqEntry, CEntrez2Client& client);
178 
179  //copied from objtools/alnmgr/util/showalign.cpp
180  static CRef<CBlast_def_line_set> GetBlastDefline (const CBioseq& handle);
181  static void RemoveBlastDefline (CBioseq& handle);
182  static int SplitBioseqByBlastDefline (CRef< CBioseq > handle, vector< CRef<CBioseq> >& bioseqs);
183  static void reformatBioseqByBlastDefline(CRef<CBioseq> bioseq, CRef< CBlast_def_line > blastDefline, int order);
184 private:
185  bool passedFilters(CCdCore* cd, CRef< CSeq_align > seqAlign,
186  CRef< CSeq_entry > seqEntry);
187 
188  // Ignore overlaps and return 'false' when overlap <= CDUpdateStats::allowedOverlapWithCDRow, or ignore
189  // *all* overlaps when CDUpdateStats::allowedOverlapWithCDRow < 0.
190  bool overlapWithCDRow(CCdCore* cd,CRef< CSeq_align > seqAlign);
191  bool modifySeqAlignSeqEntry(CCdCore* cd, CRef< CSeq_align >& seqAlign,
192  CRef< CSeq_entry > seqEntry);
193  bool findRowsWithOldSeq(CCdCore* cd, CBioseq& bioseq);
194  void retrieveAllSequences(CSeq_align_set& alignments, vector< CRef< CBioseq > >& bioseqs);
195  bool findSeq(CRef<CSeq_id> seqID, vector< CRef< CBioseq > >& bioseqs, CRef<CSeq_entry>& seqEntry);
196 
197  double ComputePercentIdentity(const CRef< CSeq_align >& alignment, const string& queryString, const string& subjectString);
198 
199  void getSequencesFromGB(vector< CRef<CSeq_id> > seqids, vector< CRef< CBioseq > >& bioseqs);
202  string m_rid;
204  string m_lastError;
205  cd_utils::BlockModelPair* m_guideAlignment; //consensus::master
206  string m_consensus;
208  int m_processPendingThreshold; //<0, don't do it
213 
214  static void OssToDefline(const CUser_field::TData::TOss & oss, CBlast_def_line_set& bdls);
215 };
216 
218 {
219 public:
220  GroupUpdater(vector<CCdCore*>& cds, CdUpdateParameters& config);
221  virtual ~GroupUpdater(); //delete all in m_cdUpdaters
222 
223  //UpdaterInterface
224  int submitBlast(bool wait=false, int row=0);
225  bool getBlastHits();
226  bool processBlastHits();
227  void getCds(vector<CCdCore*>&);
228  bool hasCd(CCdCore*);
229 
230 private:
231  vector<CDUpdater*> m_cdUpdaters;
232 
233 };
234 
235 END_SCOPE(cd_utils)
237 
238 #endif
User-defined methods of the data storage class.
Declares the CBlastProteinOptionsHandle class.
AccessionBioseqMap m_accSeqMap
Definition: cuCdUpdater.hpp:66
map< string, CRef< CBioseq > > AccessionBioseqMap
Definition: cuCdUpdater.hpp:65
CCdCore * m_cd
Definition: cuCdUpdater.hpp:64
CdUpdateParameters & getUpdateParameters()
void requireProcessPending(int threshold)
string m_rid
CRef< CSeq_align_set > getAlignments()
void setHitsNeeded(int num)
cd_utils::BlockModelPair * m_guideAlignment
const string getLastError()
void setLastError(const string &lastError)
string m_consensus
const string getRid()
CEntrez2Client m_client
CDUpdateStats & getStats()
void SetAlignments(CRef< CSeq_align_set > &hits)
CRef< CSeq_id > m_masterPdb
CDUpdateStats m_stats
const CRef< CSeq_align_set > & GetAlignments() const
CdUpdateParameters m_config
CCdCore * m_cd
int m_processPendingThreshold
int m_blastQueryRow
CRef< CSeq_align_set > m_hits
CCdCore * getCd()
int m_hitsNeeded
string m_lastError
Definition: Seq_entry.hpp:56
vector< CDUpdater * > m_cdUpdaters
virtual int submitBlast(bool wait=false, int row=0)=0
virtual void getCds(vector< CCdCore * > &)=0
virtual bool processBlastHits()=0
static list< UpdaterInterface * > m_updaterList
virtual bool getBlastHits()=0
virtual bool hasCd(CCdCore *)=0
virtual ~UpdaterInterface()
Definition: cuCdUpdater.hpp:99
Definition: map.hpp:338
USING_SCOPE(objects)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_CDUTILS_EXPORT
Definition: ncbi_export.h:376
vector< vector< char > * > TOss
static std::string toString(boost::unit_test::const_string bstr)
Declares the CRemoteBlast class.
static CNamedPipeClient * client
#define row(bind, expected)
Definition: string_bind.c:73
int numFilteredByOverlap
Definition: cuCdUpdater.hpp:85
pair< TGi, TGi > OldNewGiPair
Definition: cuCdUpdater.hpp:82
vector< OldNewGiPair > oldNewPairs
Definition: cuCdUpdater.hpp:83
vector< TGi > overlap
Definition: cuCdUpdater.hpp:77
vector< TGi > envSeq
Definition: cuCdUpdater.hpp:75
vector< TGi > redundant
Definition: cuCdUpdater.hpp:80
vector< TGi > fragmented
Definition: cuCdUpdater.hpp:76
vector< TGi > badAlign
Definition: cuCdUpdater.hpp:79
vector< TGi > noSeq
Definition: cuCdUpdater.hpp:78
Definition: type.c:6
Modified on Sat May 04 13:11:58 2024 by modify_doxy.py rev. 669887