NCBI C++ ToolKit
cuCdUpdater.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuCdUpdater.hpp 102839 2024-07-29 14:20:54Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Charlie Liu
27  *
28  * File Description:
29  *
30  * Update CDs
31  *
32  * ===========================================================================
33  */
34 
35 #ifndef CU_CDUPDATER_HPP
36 #define CU_CDUPDATER_HPP
37 
50 
53 BEGIN_SCOPE(cd_utils)
54 
56 {
57 public:
58  CDRefresher (CCdCore* cd);
59 
60  //return the gi that's replaced; return -1 if none is replaced
61  TGi refresh(CRef< CSeq_align> seqAlign, CRef< CSeq_entry > seqEntry);
62  bool hasOlderVersion(CRef< CBioseq > bioseq);
63 
64 private:
68 
69  void addSequences(CSeq_entry& seqEntry);
70  void addSequence(CRef< CBioseq > bioseq);
71 };
72 
74 {
76  vector<TGi> envSeq;
77  vector<TGi> fragmented;
78  vector<TGi> overlap;
79  vector<TGi> noSeq;
80  vector<TGi> badAlign;
81  vector<TGi> redundant;
83  typedef pair<TGi, TGi> OldNewGiPair;
84  vector<OldNewGiPair> oldNewPairs;
87 
88 public:
89  CDUpdateStats();
90  string toString(bool detailed=true);
91 private:
92  string toString(vector<TGi>& gis, string type);
93  string toString(vector<TGi>& gis);
94  string toString(vector<OldNewGiPair>& giPairs, string type);
95 };
96 
98 {
99 public:
100  virtual ~UpdaterInterface() {};
101  virtual int submitBlast(bool wait=false, int row = 0) = 0;
102  virtual bool getBlastHits() = 0;
103  virtual bool processBlastHits() = 0; //true: new sequences recruited.
104  virtual void getCds(vector<CCdCore*>&) = 0;
105  virtual bool hasCd(CCdCore*) =0;
106 
107  // maniplate the updater store
108  static void addUpdater(UpdaterInterface* updater);
109  static bool IsEmpty();
110  static int checkAllBlasts(vector< UpdaterInterface* >& blasted);
111  static void removeUpdaters(const vector<CCdCore*>& cds);
112  static void removeUpdaters(const vector<UpdaterInterface*>& updaters);
113 
114 private:
116 };
117 
118 class GroupUpdater;
119 
121 {
122 public:
124  //CDUpdater(const string& rid);
125  virtual ~CDUpdater();
126 
127  //UpdaterInterface
128  int submitBlast(bool wait = false, int row = 0);
129  bool getBlastHits();
130  bool processBlastHits();
131  void getCds(vector<CCdCore*>&);
132  bool hasCd(CCdCore*);
133 
134  // submit a remote blast query
135  // if failed or any exception was encountered, returns false (call getLastError to see message)
136  bool blast(bool wait = false, int row = 0);
137 
138  const string getRid() {return m_rid;}
139  const string getLastError() {return m_lastError;}
140  void setLastError(const string& lastError) { m_lastError = lastError;}
141  bool getHits(CRef<CSeq_align_set> & hits);
142  bool checkDone();
143  CCdCore* getCd() {return m_cd;}
145  const CRef<CSeq_align_set>& GetAlignments() const {return m_hits;}
146  void SetAlignments(CRef<CSeq_align_set>& hits) { m_hits.Reset(hits); }
147 
148  //drive update
149  bool checkBlastAndUpdate();
150  void setHitsNeeded(int num) {m_hitsNeeded = num;}
151  bool update(CCdCore* cd, CSeq_align_set& alignments);
152 
153  //for making a new CD
154  void requireProcessPending(int threshold) {m_processPendingThreshold = threshold;};
155  //return the number of pending rows filtered out
156  static int processPendingToNormal(int overlap, CCdCore* cd);
157  static int mergePending(CCdCore* cd, int threshold, bool remaster);
158 
159  bool isFragmentedSeq(CCdCore* cd, CRef< CSeq_align > seqAlign,
160  CRef< CSeq_entry > seqEntry);
161 
163  CDUpdateStats& getStats() {return m_stats;}
164  static int pickBioseq(CDRefresher* refresher, CRef< CSeq_align > seqAlignRef,
165  vector< CRef< CBioseq > >& bioseqVec);
166  static int GetAllIdsFromSeqEntry(CRef< CSeq_entry > seqEntry,
167  vector< CRef< CSeq_id > >& slaveIds, bool pdbOnly=false);
168  static bool GetOneBioseqFromSeqEntry(CRef< CSeq_entry > seqEntry,
169  CRef< CBioseq >& bioseq, const CSeq_id* seqId=0);
170  static TGi getGi(CRef< CSeq_entry > seqEntry);
171  static TGi getGi(CRef<CBioseq> bioseq);
172  static bool SeqEntryHasSeqId(CRef< CSeq_entry > seqEntry, const CSeq_id& seqId);
173  static bool BioseqHasSeqId(const CBioseq& bioseq, const CSeq_id& seqId);
174 
175  //get org-ref from seqEntry if bioseq does not have one
176  //remove all unnecessary fields
177  //replace ftable with mmdb-id
178  static bool reformatBioseq(CRef< CBioseq > bioseq, CRef< CSeq_entry > seqEntry, CEntrez2Client& client);
179 
180  //copied from objtools/alnmgr/util/showalign.cpp
181  static CRef<CBlast_def_line_set> GetBlastDefline (const CBioseq& handle);
182  static void RemoveBlastDefline (CBioseq& handle);
183  static int SplitBioseqByBlastDefline (CRef< CBioseq > handle, vector< CRef<CBioseq> >& bioseqs);
184  static void reformatBioseqByBlastDefline(CRef<CBioseq> bioseq, CRef< CBlast_def_line > blastDefline, int order);
185 private:
186  bool passedFilters(CCdCore* cd, CRef< CSeq_align > seqAlign,
187  CRef< CSeq_entry > seqEntry);
188 
189  // Ignore overlaps and return 'false' when overlap <= CDUpdateStats::allowedOverlapWithCDRow, or ignore
190  // *all* overlaps when CDUpdateStats::allowedOverlapWithCDRow < 0.
191  bool overlapWithCDRow(CCdCore* cd,CRef< CSeq_align > seqAlign);
192  bool modifySeqAlignSeqEntry(CCdCore* cd, CRef< CSeq_align >& seqAlign,
193  CRef< CSeq_entry > seqEntry);
194  bool findRowsWithOldSeq(CCdCore* cd, CBioseq& bioseq);
195  void retrieveAllSequences(CSeq_align_set& alignments, vector< CRef< CBioseq > >& bioseqs);
196  bool findSeq(CRef<CSeq_id> seqID, vector< CRef< CBioseq > >& bioseqs, CRef<CSeq_entry>& seqEntry);
197 
198  double ComputePercentIdentity(const CRef< CSeq_align >& alignment, const string& queryString, const string& subjectString);
199 
200  void getSequencesFromGB(vector< CRef<CSeq_id> > seqids, vector< CRef< CBioseq > >& bioseqs);
203  string m_rid;
205  string m_lastError;
206  cd_utils::BlockModelPair* m_guideAlignment; //consensus::master
207  string m_consensus;
209  int m_processPendingThreshold; //<0, don't do it
214 
215  static void OssToDefline(const CUser_field::TData::TOss & oss, CBlast_def_line_set& bdls);
216 };
217 
219 {
220 public:
221  GroupUpdater(vector<CCdCore*>& cds, CdUpdateParameters& config);
222  virtual ~GroupUpdater(); //delete all in m_cdUpdaters
223 
224  //UpdaterInterface
225  int submitBlast(bool wait=false, int row=0);
226  bool getBlastHits();
227  bool processBlastHits();
228  void getCds(vector<CCdCore*>&);
229  bool hasCd(CCdCore*);
230 
231 private:
232  vector<CDUpdater*> m_cdUpdaters;
233 
234 };
235 
236 END_SCOPE(cd_utils)
238 
239 #endif
User-defined methods of the data storage class.
Declares the CBlastProteinOptionsHandle class.
AccessionBioseqMap m_accSeqMap
Definition: cuCdUpdater.hpp:67
map< string, CRef< CBioseq > > AccessionBioseqMap
Definition: cuCdUpdater.hpp:66
CCdCore * m_cd
Definition: cuCdUpdater.hpp:65
CdUpdateParameters & getUpdateParameters()
void requireProcessPending(int threshold)
string m_rid
CRef< CSeq_align_set > getAlignments()
void setHitsNeeded(int num)
cd_utils::BlockModelPair * m_guideAlignment
const string getLastError()
void setLastError(const string &lastError)
string m_consensus
const string getRid()
CEntrez2Client m_client
CDUpdateStats & getStats()
void SetAlignments(CRef< CSeq_align_set > &hits)
CRef< CSeq_id > m_masterPdb
CDUpdateStats m_stats
const CRef< CSeq_align_set > & GetAlignments() const
CdUpdateParameters m_config
CCdCore * m_cd
int m_processPendingThreshold
int m_blastQueryRow
CRef< CSeq_align_set > m_hits
CCdCore * getCd()
int m_hitsNeeded
string m_lastError
CSafeStatic<>::
Definition: Seq_entry.hpp:56
vector< CDUpdater * > m_cdUpdaters
static CSafeStatic< list< UpdaterInterface * > > m_updaterList
virtual int submitBlast(bool wait=false, int row=0)=0
virtual void getCds(vector< CCdCore * > &)=0
virtual bool processBlastHits()=0
virtual bool getBlastHits()=0
virtual bool hasCd(CCdCore *)=0
virtual ~UpdaterInterface()
Definition: map.hpp:338
USING_SCOPE(objects)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_CDUTILS_EXPORT
Definition: ncbi_export.h:376
vector< vector< char > * > TOss
static std::string toString(boost::unit_test::const_string bstr)
Static variables safety - create on demand, destroy on application termination.
Declares the CRemoteBlast class.
static CNamedPipeClient * client
#define row(bind, expected)
Definition: string_bind.c:73
int numFilteredByOverlap
Definition: cuCdUpdater.hpp:86
pair< TGi, TGi > OldNewGiPair
Definition: cuCdUpdater.hpp:83
vector< OldNewGiPair > oldNewPairs
Definition: cuCdUpdater.hpp:84
vector< TGi > overlap
Definition: cuCdUpdater.hpp:78
vector< TGi > envSeq
Definition: cuCdUpdater.hpp:76
vector< TGi > redundant
Definition: cuCdUpdater.hpp:81
vector< TGi > fragmented
Definition: cuCdUpdater.hpp:77
vector< TGi > badAlign
Definition: cuCdUpdater.hpp:80
vector< TGi > noSeq
Definition: cuCdUpdater.hpp:79
Definition: type.c:6
Modified on Wed Sep 04 14:58:53 2024 by modify_doxy.py rev. 669887