52 BEGIN_SCOPE(cd_utils)
55 {
56 public:
57  CDRefresher (CCdCore* cd);
59  //return the gi that's replaced; return -1 if none is replaced
60  TGi refresh(CRef< CSeq_align> seqAlign, CRef< CSeq_entry > seqEntry);
61  bool hasOlderVersion(CRef< CBioseq > bioseq);
63 private:
68  void addSequences(CSeq_entry& seqEntry);
69  void addSequence(CRef< CBioseq > bioseq);
70 };
73 {
75  vector<TGi> envSeq;
76  vector<TGi> fragmented;
77  vector<TGi> overlap;
78  vector<TGi> noSeq;
79  vector<TGi> badAlign;
80  vector<TGi> redundant;
82  typedef pair<TGi, TGi> OldNewGiPair;
83  vector<OldNewGiPair> oldNewPairs;
87 public:
88  CDUpdateStats();
89  string toString(bool detailed=true);
90 private:
91  string toString(vector<TGi>& gis, string type);
92  string toString(vector<TGi>& gis);
93  string toString(vector<OldNewGiPair>& giPairs, string type);
94 };
97 {
98 public:
99  virtual ~UpdaterInterface() {};
100  virtual int submitBlast(bool wait=false, int row = 0) = 0;
101  virtual bool getBlastHits() = 0;
102  virtual bool processBlastHits() = 0; //true: new sequences recruited.
103  virtual void getCds(vector<CCdCore*>&) = 0;
104  virtual bool hasCd(CCdCore*) =0;
106  // maniplate the updater store
107  static void addUpdater(UpdaterInterface* updater);
108  static bool IsEmpty();
109  static int checkAllBlasts(vector< UpdaterInterface* >& blasted);
110  static void removeUpdaters(const vector<CCdCore*>& cds);
111  static void removeUpdaters(const vector<UpdaterInterface*>& updaters);
113 private:
114  static list<UpdaterInterface*> m_updaterList;
115 };
117 class GroupUpdater;
120 {
121 public:
123  //CDUpdater(const string& rid);
124  virtual ~CDUpdater();
126  //UpdaterInterface
127  int submitBlast(bool wait = false, int row = 0);
128  bool getBlastHits();
129  bool processBlastHits();
130  void getCds(vector<CCdCore*>&);
131  bool hasCd(CCdCore*);
133  // submit a remote blast query
134  // if failed or any exception was encountered, returns false (call getLastError to see message)
135  bool blast(bool wait = false, int row = 0);
137  const string getRid() {return m_rid;}
138  const string getLastError() {return m_lastError;}
139  void setLastError(const string& lastError) { m_lastError = lastError;}
140  bool getHits(CRef<CSeq_align_set> & hits);
141  bool checkDone();
142  CCdCore* getCd() {return m_cd;}
144  const CRef<CSeq_align_set>& GetAlignments() const {return m_hits;}
145  void SetAlignments(CRef<CSeq_align_set>& hits) { m_hits.Reset(hits); }
147  //drive update
148  bool checkBlastAndUpdate();
149  void setHitsNeeded(int num) {m_hitsNeeded = num;}
150  bool update(CCdCore* cd, CSeq_align_set& alignments);
152  //for making a new CD
153  void requireProcessPending(int threshold) {m_processPendingThreshold = threshold;};
154  //return the number of pending rows filtered out
155  static int processPendingToNormal(int overlap, CCdCore* cd);
156  static int mergePending(CCdCore* cd, int threshold, bool remaster);
158  bool isFragmentedSeq(CCdCore* cd, CRef< CSeq_align > seqAlign,
159  CRef< CSeq_entry > seqEntry);
162  CDUpdateStats& getStats() {return m_stats;}
163  static int pickBioseq(CDRefresher* refresher, CRef< CSeq_align > seqAlignRef,
164  vector< CRef< CBioseq > >& bioseqVec);
165  static int GetAllIdsFromSeqEntry(CRef< CSeq_entry > seqEntry,
166  vector< CRef< CSeq_id > >& slaveIds, bool pdbOnly=false);
167  static bool GetOneBioseqFromSeqEntry(CRef< CSeq_entry > seqEntry,
168  CRef< CBioseq >& bioseq, const CSeq_id* seqId=0);
169  static TGi getGi(CRef< CSeq_entry > seqEntry);
170  static TGi getGi(CRef<CBioseq> bioseq);
171  static bool SeqEntryHasSeqId(CRef< CSeq_entry > seqEntry, const CSeq_id& seqId);
172  static bool BioseqHasSeqId(const CBioseq& bioseq, const CSeq_id& seqId);
174  //get org-ref from seqEntry if bioseq does not have one
175  //remove all unnecessary fields
176  //replace ftable with mmdb-id
177  static bool reformatBioseq(CRef< CBioseq > bioseq, CRef< CSeq_entry > seqEntry, CEntrez2Client& client);
179  //copied from objtools/alnmgr/util/showalign.cpp
180  static CRef<CBlast_def_line_set> GetBlastDefline (const CBioseq& handle);
181  static void RemoveBlastDefline (CBioseq& handle);
182  static int SplitBioseqByBlastDefline (CRef< CBioseq > handle, vector< CRef<CBioseq> >& bioseqs);
183  static void reformatBioseqByBlastDefline(CRef<CBioseq> bioseq, CRef< CBlast_def_line > blastDefline, int order);
184 private:
185  bool passedFilters(CCdCore* cd, CRef< CSeq_align > seqAlign,
186  CRef< CSeq_entry > seqEntry);
188  // Ignore overlaps and return 'false' when overlap <= CDUpdateStats::allowedOverlapWithCDRow, or ignore
189  // *all* overlaps when CDUpdateStats::allowedOverlapWithCDRow < 0.
190  bool overlapWithCDRow(CCdCore* cd,CRef< CSeq_align > seqAlign);
191  bool modifySeqAlignSeqEntry(CCdCore* cd, CRef< CSeq_align >& seqAlign,
192  CRef< CSeq_entry > seqEntry);
193  bool findRowsWithOldSeq(CCdCore* cd, CBioseq& bioseq);
194  void retrieveAllSequences(CSeq_align_set& alignments, vector< CRef< CBioseq > >& bioseqs);
195  bool findSeq(CRef<CSeq_id> seqID, vector< CRef< CBioseq > >& bioseqs, CRef<CSeq_entry>& seqEntry);
197  double ComputePercentIdentity(const CRef< CSeq_align >& alignment, const string& queryString, const string& subjectString);
199  void getSequencesFromGB(vector< CRef<CSeq_id> > seqids, vector< CRef< CBioseq > >& bioseqs);
202  string m_rid;
204  string m_lastError;
205  cd_utils::BlockModelPair* m_guideAlignment; //consensus::master
206  string m_consensus;
208  int m_processPendingThreshold; //<0, don't do it
214  static void OssToDefline(const CUser_field::TData::TOss & oss, CBlast_def_line_set& bdls);
215 };
218 {
219 public:
220  GroupUpdater(vector<CCdCore*>& cds, CdUpdateParameters& config);
221  virtual ~GroupUpdater(); //delete all in m_cdUpdaters
223  //UpdaterInterface
224  int submitBlast(bool wait=false, int row=0);
225  bool getBlastHits();
226  bool processBlastHits();
227  void getCds(vector<CCdCore*>&);
228  bool hasCd(CCdCore*);
230 private:
231  vector<CDUpdater*> m_cdUpdaters;
233 };
235 END_SCOPE(cd_utils)
238 #endif
