NCBI C++ ToolKit
cuCdUpdater.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuCdUpdater.cpp 102007 2024-03-18 15:49:08Z gaudaensj $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Charlie Liu
27  *
28  * File Description:
29  *
30  * Update CDs
31  *
32  * ===========================================================================
33  */
34 
35 #include <ncbi_pch.hpp>
60 
62 BEGIN_SCOPE(cd_utils)
63 
64 CDUpdateStats::CDUpdateStats() : numBlastHits(0), numRedundant(0), numObsolete(0),
65  numFilteredByOverlap(0){}
66 
67 string CDUpdateStats::toString(bool detailed)
68 {
69  int added = numBlastHits - envSeq.size() - fragmented.size() - overlap.size()
70  - noSeq.size() - badAlign.size() - numRedundant - oldNewPairs.size();
71  string result = "Total number of sequences added to the pending list of the Cd:" + NStr::IntToString(added)
72  +". ";
73  if (numFilteredByOverlap > 0)
74  {
75  result += "Total number of pending sequences that are not moved to normal alignment because of insufficient overlapping:"
77  }
78  if (!detailed)
79  return result;
80  result += "Number of Blast Hits = ";
82  result += ". ";
83 
84  result += toString(envSeq, "Environmental Sequences");
85  result += toString(fragmented, "Sequence Fragments");
86  result += toString(overlap, "Alignments overlapping a row already in CD");
87  result += toString(noSeq, "Alignments with no sequence data");
88  result += toString(badAlign, "Alignments that are corrupted or do not match with the CD");
89  if (numRedundant > 0)
90  {
91  result += "Alignments removed due to redundancy:";
93  result += ". ";
94  }
95  result += toString(oldNewPairs, "New sequences that can replace old sequences (in parentheses) already in CD");
96  result += ". ";
97  if (numObsolete > 0)
98  {
99  result += "Numer of obsolete sequences removed:";
101  result += ". ";
102  }
103  return result;
104 }
105 
106 string CDUpdateStats::toString(vector<TGi>& gis, string type)
107 {
108  if (gis.size() <= 0)
109  return "";
110  string result = "Number of " + type + " =";
111  result += NStr::UIntToString((unsigned)gis.size());
112  result += ":\n";
113  result += toString(gis);
114  result += "\n";
115  return result;
116 }
117 
118 string CDUpdateStats::toString(vector<TGi>& gis)
119 {
120  string result;
121  for(unsigned int i = 0; i < gis.size(); i++)
122  {
123  result += NStr::NumericToString<TGi>( gis[i] );
124  result += ",";
125  }
126  return result;
127 }
128 
129 string CDUpdateStats::toString(vector<OldNewGiPair>& giPairs, string type)
130 {
131  if (giPairs.size() <= 0)
132  return "";
133  string result = "Number of " + type + " =";
134  result += NStr::UIntToString((unsigned)giPairs.size());
135  result += ":\n";
136  for (unsigned int i = 0 ; i < giPairs.size(); i++)
137  {
138  result += NStr::NumericToString<TGi>(giPairs[i].second);
139  result += "(";
140  result += NStr::NumericToString<TGi>(giPairs[i].first);
141  result += ")";
142  result += ",";
143  }
144  result += "\n";
145  return result;
146 }
147 
148 
149 /*---------------------------------------------------------------------------------*/
150 /* UpdaterInterface */
151 /*---------------------------------------------------------------------------------*/
152 
153 list<UpdaterInterface*> UpdaterInterface::m_updaterList;
154 
156 {
157  m_updaterList.push_back(updater);
158 }
159 
161 {
162  return m_updaterList.empty();
163 }
164 
165 void UpdaterInterface::removeUpdaters(const vector<CCdCore*>& cds)
166 {
167  for (unsigned int i = 0; i < cds.size(); i++)
168  {
169  for (list<UpdaterInterface*>::iterator lit = m_updaterList.begin();
170  lit != m_updaterList.end(); lit++)
171  {
172  if ((*lit)->hasCd(cds[i]))
173  {
174  UpdaterInterface* cup = *lit;
175  m_updaterList.erase(lit);
176  if (cup)
177  {
178  //cup->getCd()->ClearUpdateInfo();
179  delete cup;
180  }
181  break;
182  }
183  }
184  }
185 }
186 
187 void UpdaterInterface::removeUpdaters(const vector<UpdaterInterface*>& updaters)
188 {
189  for (unsigned int i = 0; i < updaters.size(); i++)
190  {
191  for (list<UpdaterInterface*>::iterator lit = m_updaterList.begin();
192  lit != m_updaterList.end(); lit++)
193  {
194  if ((*lit) == updaters[i])
195  {
196  UpdaterInterface* cup = *lit;
197  m_updaterList.erase(lit);
198  if (cup)
199  {
200  //cup->getCd()->ClearUpdateInfo();
201  delete cup;
202  }
203  break;
204  }
205  }
206  }
207 }
208 
209 int UpdaterInterface::checkAllBlasts(vector<UpdaterInterface*>& blasted)
210 {
211  list<UpdaterInterface*>::iterator lit = m_updaterList.begin();
212  while(lit != m_updaterList.end())
213  {
214  UpdaterInterface* updater = *lit;
215  if(updater->getBlastHits())
216  {
217  blasted.push_back(updater);
218  }
219  lit++;
220  }
221  return blasted.size();
222 }
223 
224 /*---------------------------------------------------------------------------------*/
225 /* GroupUpdater */
226 /*---------------------------------------------------------------------------------*/
227 
229 {
230  for (unsigned int i = 0; i < cds.size(); i++)
231  m_cdUpdaters.push_back(new CDUpdater(cds[i], config));
232 }
233 
234 GroupUpdater::~GroupUpdater() //delete all in m_cdUpdaters
235 {
236  for (unsigned int i = 0; i < m_cdUpdaters.size(); i++)
237  delete (m_cdUpdaters[i]);
238 }
239 
240  //UpdaterInterface
241 int GroupUpdater::submitBlast(bool wait, int row)
242 {
243  int count = 0;
244  for (unsigned int i = 0; i < m_cdUpdaters.size(); i++)
245  {
246  if (!(m_cdUpdaters[i])->submitBlast(wait,row)) {
247  return 0; // return 0 if one fails so legacy "if" statements still work.
248  }
249  else {
250  count++;
251  }
252  }
253  return count;
254 }
255 
257 {
258  bool allDone = true;
259  for (unsigned int i = 0; i < m_cdUpdaters.size(); i++)
260  {
261  if (!(m_cdUpdaters[i]->getBlastHits()))
262  allDone = false;
263  }
264  return allDone;
265 }
266 
268 {
269  if (!getBlastHits())
270  {
271  LOG_POST("Not all BLASTs on the group are done. Thus updating this group can't be done at this time.\n");
272  return false;
273  }
274  //distribute
275  HitDistributor dist;
276  for (unsigned int i = 0; i < m_cdUpdaters.size(); i++)
277  {
278  dist.addBatch(m_cdUpdaters[i]->getAlignments());
279  }
280  //dist.dump("DistTab.txt");
281  dist.distribute();
282  //update individual CDs
283  bool allDone = true;
284  for (unsigned int i = 0; i < m_cdUpdaters.size(); i++)
285  {
286  if (!(m_cdUpdaters[i]->processBlastHits()))
287  allDone = false;
288  }
289  return allDone;
290 }
291 
292 void GroupUpdater::getCds(vector<CCdCore*>& cds)
293 {
294  for (unsigned int i = 0; i < m_cdUpdaters.size(); i++)
295  {
296  m_cdUpdaters[i]->getCds(cds);
297  }
298 }
299 
301 {
302  for (unsigned int i = 0; i < m_cdUpdaters.size(); i++)
303  {
304  if (m_cdUpdaters[i]->hasCd(cd))
305  return true;
306  }
307  return false;;
308 }
309 
310 /*---------------------------------------------------------------------------------*/
311 /* CDUpdater */
312 /*---------------------------------------------------------------------------------*/
313 
315 : m_config(config), m_cd(cd), m_guideAlignment(0), m_processPendingThreshold(-1), m_hitsNeeded(-1),
316  m_blastQueryRow(0)
317 {
318 }
319 
320 
322 {
323  if (m_guideAlignment)
324  delete m_guideAlignment;
325 }
326 
327 int CDUpdater::submitBlast(bool wait, int row)
328 {
330  bool blasted = false;
331  try {
332  blasted = blast(wait, row);
333  }
334  catch (blast::CBlastException& be) {
335  blasted = false;
336  m_lastError = "Blast exception in submitBlast for row " + NStr::IntToString(row) + ":\n";
337  m_lastError += be.ReportAll();
338  }
339  catch (CException& e) {
340  blasted = false;
341  m_lastError = "NCBI C++ Toolkit exception in submitBlast for row " + NStr::IntToString(row) + ":\n";
342  m_lastError += e.ReportAll();
343  }
344  catch (...) {
345  blasted = false;
346  m_lastError = "Unknown exception in submitBlast for row " + NStr::IntToString(row) + "\n";
347  }
348 
349  if (blasted)
350  {
351  LOG_POST("RID of Blast for the update of " << m_cd->GetAccession() << " is " << getRid());
352  }
353  else
354  {
355  LOG_POST("Update of " << m_cd->GetAccession() << " failed due to error\n" << getLastError());
356  }
357 
358  return(blasted ? 1 : 0);
359 }
360 
362 {
363  if (!m_hits.Empty())
364  return true;
365  else
366  return getHits(m_hits);
367 }
368 
370 {
371  bool updated = false;
372 
373  //m_Data.cacheCD(m_cd->GetAccession().c_str(),"Updated by Blast",EDITTYPE_ALIGNMENT);
374  if (!m_hits.Empty())
375  {
376  updated = true;
377  update(m_cd, *m_hits);
378  //LOG_POST("Stats of Updating "<<m_cd->GetAccession()<< ":\n"<<getStats().toString());
379  unsigned numNoAlignment = getStats().badAlign.size();
380  if (numNoAlignment > 0)
381  LOG_POST("There are "<<numNoAlignment
382  <<" hits whose alignments do not overlap with the CD. This may indicate there are long insert to the CD alignment. You can find the GIs for those hits in the log.");
383  }
384  else
385  {
386  LOG_POST("Found no BLAST hits to process for CD " << m_cd->GetAccession() << ".\n");
387  }
388  return updated;
389 }
390 
391 void CDUpdater::getCds(vector<CCdCore*>& cds)
392 {
393  cds.push_back(m_cd);
394 }
395 
397 {
398  return m_cd == cd;
399 }
400 
401 bool CDUpdater::blast(bool wait, int row)
402 {
403  blast::CRemoteBlast* rblast;
404  blast::CBlastProteinOptionsHandle* blastopt;
405  CPSIBlastOptionsHandle * psiopt = NULL;
406  if (m_config.blastType == eBLAST)
407  {
408  blastopt = new blast::CBlastProteinOptionsHandle(blast::CBlastOptions::eRemote);
409  rblast = new blast::CRemoteBlast(blastopt);
410  }
411  else
412  {
413  psiopt = new blast::CPSIBlastOptionsHandle(blast::CBlastOptions::eRemote);
414  //psiopt->SetCompositionBasedStats(eNoCompositionBasedStats);
415  rblast = new blast::CRemoteBlast(psiopt);
416  blastopt = psiopt;
417  }
418  //rblast-> SetVerbose();
419  blastopt->SetSegFiltering(false);
420  if (m_config.numHits > 0)
421  blastopt->SetHitlistSize(m_config.numHits);
422  if (m_config.evalue > 0)
423  blastopt->SetEvalueThreshold(m_config.evalue);
424  if (m_config.identityThreshold > 0) //does not seem to do anything with RemoteBlast
425  blastopt->SetPercentIdentity((double)m_config.identityThreshold);
427 
428  string entrezQuery;
430  {
432  entrezQuery += "[Organism]";
433  }
434  if (m_config.entrezQuery.size() > 0)
435  entrezQuery += m_config.entrezQuery;
436  if (!entrezQuery.empty())
437  rblast->SetEntrezQuery(entrezQuery.c_str());
438 
439  //set PSSM here
440  if (m_config.blastType == eBLAST)
441  {
443  list< CRef< CSeq_entry > >& seqList = bioseqs->SetSeq_set();
444  CRef< CSeq_entry > seqOld;
445 
446  if (!m_cd->GetSeqEntryForRow(row, seqOld)) {
447  delete rblast;
448  return false;
449  }
450 
451  seqList.push_back(seqOld);
452  CRef< CSeq_id > seqId = seqOld->SetSeq().SetId().front();
453  TMaskedQueryRegions masks;
454  int lo = m_cd->GetLowerBound(row);
455  int hi = m_cd->GetUpperBound(row);
456  int len = m_cd->GetSequenceStringByRow(row).length();
457  if (lo > 0)
458  masks.push_back(CRef<CSeqLocInfo>( new CSeqLocInfo(new CSeq_interval(*seqId, 0,lo-1),0)));
459  if (hi < (len-1))
460  masks.push_back(CRef<CSeqLocInfo> ((new CSeqLocInfo(new CSeq_interval(*seqId, hi + 1, len - 1),0))));
461  if (masks.size() > 0)
462  {
463  TSeqLocInfoVector masking_locations;
464  masking_locations.push_back(masks);
465  rblast->SetQueries(bioseqs,masking_locations);
466  }
467  else
468  rblast->SetQueries(bioseqs);
469  //debug
470  /*
471  string err;
472  if (!WriteASNToFile("blast_query", *bioseqs, false,&err))
473  LOG_POST("Failed to write to blast_query");
474  */
475  //end of debug
476  }
477  else //psi-blast
478  {
479  bool useConsensus = true;
480  PssmMaker pm(m_cd, useConsensus, true);
482  config.unalignedSegThreshold = 35;
483  config.requestFrequencyRatios = true;
484  pm.setOptions(config);
485  CRef<CPssmWithParameters> pssm = pm.make();
486 
488  m_guideAlignment->degap();
489  m_guideAlignment->reverse(); //keep con::master
490  m_consensus = pm.getConsensus();
491 
492  psiopt->SetPseudoCount(pm.getPseudoCount());
493 
494  rblast->SetQueries(pssm);
495  }
496 
497 
498  // Submit and, if requested, wait for blast results.
499  // Trap any exceptions and return 'false' in all such cases.
500  bool blasted = false;
501  try {
502  if (wait) {
503  blasted = rblast-> SubmitSync();
504  m_rid = rblast->GetRID();
505  //LOG_POST("RID="<<m_rid);
506  getBlastHits();
507  }
508 
509  blasted = rblast->Submit();
510 
511  if (!blasted) {
512  m_lastError = rblast->GetErrors();
513  }
514 
515  } catch (CRemoteBlastException& e) {
516  m_lastError = "RemoteBlast exception in CDUpdater::blast() for row " + NStr::IntToString(row) + ":\n";
517  m_lastError += e.ReportAll();
518 // string err = e.GetErrCodeString();
519 // LOG_POST("RemoteBlast exception in CDUpdater::blast() for row " << NStr::IntToString(row) << ": error code = " << err);
520  }
521  catch (CException& e) {
522  m_lastError = "NCBI C++ Toolkit exception in CDUpdater::blast() for row " + NStr::IntToString(row) + ":\n";
523  m_lastError += e.ReportAll();
524  }
525  catch (...) {
526  m_lastError = "Unknown exception in CDUpdater::blast() for row " + NStr::IntToString(row) + "\n";
527  }
528 
529  m_rid = rblast->GetRID();
530  delete rblast;
531  return blasted;
532 }
533 
535 {
536  bool done = false;
537  blast::CRemoteBlast rblast(getRid());
538  try {
539  //LOG_POST("Calling RemoteBlast::CheckDone().\n");
540  done = rblast.CheckDone();
541  //LOG_POST("Returned from RemoteBlast::CheckDone().\n");
542  if (done)
543  {
544  hits = rblast.GetAlignments();
545  }
546  } catch (...) {
547  LOG_POST("Exception while getting BLAST hits of CD " << m_cd->GetAccession() << " for RID " << getRid());
548  }
549  return done;
550 }
551 
553 {
554  bool done = false;
555  blast::CRemoteBlast rblast(getRid());
556  try {
557  //LOG_POST("Calling RemoteBlast::CheckDone().\n");
558  done = rblast.CheckDone();
559  //LOG_POST("Returned from RemoteBlast::CheckDone().\n");
560 
561  //CCdCore::UpdateInfo ui = m_cd->GetUpdateInfo();
562  //ui.status = CCdCore::BLAST_DONE;
563  //m_cd->SetUpdateInfo(ui);
564 
565  } catch (...) {
566  LOG_POST("Exception during CheckDone for CD " << m_cd->GetAccession() << ", RID " << getRid());
567  }
568  return done;
569 }
570 
572 {
573  CRef<CSeq_align_set> seqAligns;
574  if(getHits(seqAligns))
575  {
576  if(!seqAligns.Empty())
577  {
578  update(m_cd, *seqAligns);
580  //LOG_POST("Stats of Updating "<<m_cd->GetAccession()<<" are "<<getStats().toString());
581  unsigned numNoAlignment = getStats().badAlign.size();
582  if (numNoAlignment > 0)
583  LOG_POST("There are hits whose alignments do not overlap with the CD. This may indicate there are long insert to the CD alignment. You find the GIs for those hits in the log\n");
584  }
585  else
586  {
587  LOG_POST("Got no alignment for BLAST hits for CD " << m_cd->GetAccession() << ". will try again to retrieve the hits.\n");
588  return true;
589  }
590  return true;
591  }
592  else
593  return false;
594 }
595 
596 double CDUpdater::ComputePercentIdentity(const CRef< CSeq_align >& alignment, const string& queryString, const string& subjectString)
597 {
598  double result = 0.0;
599  unsigned int nIdent = 0;
600  unsigned int qLen = queryString.length(), sLen = subjectString.length();
601  unsigned int i, j, qStart, qStop, sStart, sStop;
602 
603  if (alignment.Empty() || qLen == 0 || sLen == 0) return result;
604 
605  // Note that is is only %id in the aligned region, and doesn't factor in any non-identities
606  // implicit in any parts of the query N- and/or C-terminal to the alignment.
607  const CSeq_align::C_Segs::TDenseg& denseg = alignment->GetSegs().GetDenseg();
608  double denom = (denseg.GetSeqStop(0) - denseg.GetSeqStart(0) + 1);
609 
610  CDense_seg::TStarts starts = denseg.GetStarts();
611  CDense_seg::TLens lens = denseg.GetLens();
612 
613  for (i = 0; i < lens.size(); ++i) {
614  // Do this check before implicit cast to unsigned int when assigning to qStart, sStart.
615  if (starts[2*i] < 0 || starts[2*i + 1] < 0) continue; // gap
616  qStart = starts[2*i];
617  sStart = starts[2*i + 1];
618 
619  qStop = qStart + lens[i] - 1;
620  sStop = sStart + lens[i] - 1;
621  if (qStop >= qLen || sStop >= sLen) continue; // string index out of range
622 
623  for (j = 0; j < lens[i]; ++j) {
624  if (queryString[qStart + j] == subjectString[sStart + j]) ++nIdent;
625  }
626  }
627  result = 100.0*nIdent/denom;
628 // LOG_POST(nIdent << " identities found (" << result << "%)\nquery: " << queryString << "\nsubject: " << subjectString);
629 
630  return result;
631 }
632 
634 {
635  if ( !cd || (!alignments.IsSet()))
636  return false;
637 
638  double pidScore = 0.0;
639  list< CRef< CSeq_align > >& seqAligns = alignments.Set();
640  m_stats.numBlastHits = seqAligns.size();
641  vector< CRef< CBioseq > > bioseqs;
642  LOG_POST("Got "<<m_stats.numBlastHits<<" blast hits for CD " << cd->GetAccession() << ".");
643  retrieveAllSequences(alignments, bioseqs);
644 
645  SequenceTable seqTable;
646  CDRefresher* refresher = 0;
648  refresher = new CDRefresher(cd);
649  vector< CRef< CBioseq > > bioseqVec;
650  for (unsigned int i = 0; i < bioseqs.size(); i++)
651  {
652  bioseqVec.clear();
653  SplitBioseqByBlastDefline (bioseqs[i], bioseqVec);
654  seqTable.addSequences(bioseqVec, true); //as a group
655  }
656 
657  // debugging *** ***
658  //string err;
659  //WriteASNToFile("alignments.txt", alignments, false, &err);
660 
661  // debugging *** ***
662  //seqTable.dump("seqTable.txt");
663  //LOG_POST("Retrieved "<<bioseqs.size()<<" Bioseqs for blast hits\n");
664  //LOG_POST("Process BLAST Hits and add them to "<<cd->GetAccession());
665  int completed = 0;
666  list< CRef< CSeq_align > >::iterator it = seqAligns.begin();
667 
668  CRef< CSeq_id > seqID, querySeqID;
669  CRef<CSeq_entry> seqEntry;
670  CRef< CBioseq > queryBioseq(new CBioseq);
671  string queryString, subjectString;
672 
673  // Get bioseq corresponding to the query.
674  if (it != seqAligns.end()) {
675  querySeqID = (*it)->SetSegs().SetDenseg().GetIds()[0];
676  if (!cd->CopyBioseqForSeqId(querySeqID, queryBioseq)) {
677  queryBioseq.Reset();
678 
679  // This message isn't relevant when the query is a PSSM.
680  if (m_config.blastType == eBLAST)
681  LOG_POST("No bioseq found in CD " << cd->GetAccession() << " for update query.");
682  } else {
683  queryString = GetRawSequenceString(*queryBioseq);
684  }
685  }
686 
687 
688  //for BLAST, if the master is PDB, the master seqid (gi from BLAST) needs to be changed
689  if (m_config.blastType == eBLAST && it != seqAligns.end())
690  {
691  CSeq_align::C_Segs& oldSegs = (*it)->SetSegs();
692  CRef< CSeq_align::C_Segs::TDenseg> denseg( &(oldSegs.SetDenseg()) );
693  vector< CRef< CSeq_id > >& seqIds= denseg->SetIds();
694  CRef< CSeq_entry > masterSeq;
695  cd->GetSeqEntryForRow(m_blastQueryRow, masterSeq);
696  vector< CRef< CSeq_id > > pdbIds;
697  GetAllIdsFromSeqEntry(masterSeq, pdbIds, true);
698  if ((pdbIds.size() > 0) && SeqEntryHasSeqId(masterSeq, *seqIds[0]) && (!seqIds[0] ->IsPdb()))
699  m_masterPdb = pdbIds[0];
700  }
701  for(; it != seqAligns.end(); it++)
702  {
703  pidScore = 0.0;
704  CRef< CSeq_align > seqAlignRef = *it;
705  //seqAlign from BLAST is in Denseg
706  CSeq_align::C_Segs::TDenseg& denseg = seqAlignRef->SetSegs().SetDenseg();
707 
708  // 9/25/08: CRemoteBlast dropped the identity count from the scores, and is a
709  // pending issue (JIRA ticket http://jira.be-md.ncbi.nlm.nih.gov/browse/SB-114).
710  // Workaround is to compute the % identity directly, as done below in
711  // the function ComputePercentIdentity.
712  if (false && m_config.identityThreshold > 0)
713  {
714 
715  // Note: if the type isn't in the seq-align, pidScore remains 0.0 and the
716  // scan of seqAligns will be aborted.
717  seqAlignRef->GetNamedScore(CSeq_align::eScore_IdentityCount, pidScore);
718 
719  int start = denseg.GetSeqStart(0);
720  int stop = denseg.GetSeqStop(0);
721  pidScore = 100*pidScore/(stop - start + 1);
722  if ((int)pidScore < m_config.identityThreshold)
723  break; //stop
724  }
725  //the second is slave
726  if (denseg.GetDim() > 1)
727  seqID = denseg.GetIds()[1];
728  else
729  break; //should not be here
730 
731  TGi gi = seqID->GetGi();
732  vector< CRef< CBioseq > > bioseqVec;
733  if(seqTable.findSequencesInTheGroup(seqID, bioseqVec) > 0)
734  {
735  //one SeqAlign returned from BLAST may represent the hits on several
736  //different Bioseq that all have the same seq_data
737  //pick the Seq_id from the most useful Bioseq
738  if (bioseqVec.size() > 1)
739  {
740  int index = pickBioseq(refresher, seqAlignRef, bioseqVec);
741  seqEntry = new CSeq_entry;
742  seqEntry->SetSeq(*bioseqVec[index]);
743  seqID = denseg.GetIds()[1];
744  gi = seqID->GetGi();
745  subjectString = GetRawSequenceString(*bioseqVec[index]);
746  }
747  else
748  {
749  seqEntry = new CSeq_entry;
750  seqEntry->SetSeq(*bioseqVec[0]);
751  subjectString = GetRawSequenceString(*bioseqVec[0]);
752  }
753 
754  // when there's no identity count in the seq-align, compute it directly
755  if (m_config.identityThreshold > 0)
756  {
757  pidScore = ComputePercentIdentity(seqAlignRef, queryString, subjectString);
758  if ((int)pidScore < m_config.identityThreshold)
759  break; //stop
760  }
761 
762  //change seqAlign from denseg to dendiag
763  //use pdb_id if available
764  if(!modifySeqAlignSeqEntry(cd, *it, seqEntry))
765  {
766  m_stats.badAlign.push_back(gi);
767  continue;
768  }
769 
770  bool passed = true;
771  if (!m_config.noFilter)
772  passed = passedFilters(cd, *it, seqEntry);
773  if(passed) //not framented; not environmental seq
774  {
775  // add merge fragment later here
776 
777  //remaster the seqAlign from consensus to CD.master
779  {
780  BlockModelPair bmp(*it);
781  bmp.remaster(*m_guideAlignment);
782  CRef<CSeq_align> saRef = bmp.toSeqAlign();
783  if (saRef.Empty())
784  {
785  m_stats.badAlign.push_back(gi);
786  //LOG_POST("No valid alignment after remastering to the CD for maste. gi|%d is ignored", gi);
787  continue;
788  }
789  else
790  (*it) = saRef;
791  }
792  //check to see if it is necessary to replace old sequences
793  TGi replacedGi = INVALID_GI;
795  replacedGi = refresher->refresh(seqAlignRef, seqEntry);
796  if (replacedGi > ZERO_GI)
797  {
798  m_stats.oldNewPairs.push_back(CDUpdateStats::OldNewGiPair(replacedGi, gi));
799  }
800  else
801  {
802  cd->AddPendingSeqAlign(*(it));
803  //just add sequence now. redundancy will be removed later
804  cd->AddSequence(seqEntry);
805  }
806  }
807  }
808  else
809  m_stats.noSeq.push_back(gi);
810  completed++;
811  if (m_hitsNeeded > 0)
812  {
813  if (completed >= m_hitsNeeded)
814  break;
815  }
816  if ((completed % 500) == 0)
817  LOG_POST("Processed "<<completed<<" of "<<m_stats.numBlastHits<<" hits.");
818  }
819 
820  // always keep normal rows w/ automatic NR
821  LOG_POST("Finishing processing hits for "<<cd->GetAccession());
822 
824  {
826  }
827  if (refresher)
828  delete refresher;
829  return true;
830 }
831 
832 int CDUpdater::mergePending(CCdCore* cd, int threshold, bool remaster)
833 {
834  int excluded = processPendingToNormal(threshold, cd);
835  if (remaster)
836  {//check and remaster if necessary
837  CRef< CSeq_entry > seqEntry;
838  cd->GetSeqEntryForRow(0,seqEntry);
839  vector< CRef< CSeq_id > > seqIds;
840  GetAllIdsFromSeqEntry(seqEntry,seqIds, true);
841  if (seqIds.size() == 0)
842  {
843  int nRows = cd->GetNumRows();
844  int i = 1;
845  for (; i < nRows; i++)
846  {
847  CRef< CSeq_id > SeqID;
848  if (cd->GetSeqIDForRow(i-1,1, SeqID))
849  {
850  if (SeqID->IsPdb())
851  break;
852  }
853  }
854  if (i < nRows)
855  {
856  string err;
858  }
859  }
860  }
861  return excluded;
862 }
863 
864 
866  vector< CRef< CBioseq > >& bioseqVec)
867 {
868  CSeq_align::C_Segs::TDenseg& denseg = seqAlignRef->SetSegs().SetDenseg();
869  //the second is slave
870  vector< CRef< CSeq_id > >& seqIdVec = denseg.SetIds();
871  CRef< CSeq_id > seqID;
872  assert(denseg.GetDim() > 1);
873  seqID = seqIdVec[1];
874  int index = -1;
875  //if the current seqId's bioseq has a PDB , no change
876  for (int i = 0; i < (int) bioseqVec.size(); i++)
877  {
878  if(BioseqHasSeqId(*(bioseqVec[i]), *seqID))
879  {
880  index = i;
881  const CBioseq::TId& ids = bioseqVec[i]->GetId();
882  CBioseq::TId::const_iterator it = ids.begin(), itend = ids.end();
883  for (; it != itend; ++it)
884  {
885  if ((*it)->IsPdb())
886  {
887  return index;
888  }
889  }
890  }
891  }
892  assert(index >= 0);
893  //use other PDB if there is one.
894  for (int i = 0; i < (int) bioseqVec.size(); i++)
895  {
896  if (i==index)
897  continue;
898  const CBioseq::TId& ids = bioseqVec[i]->GetId();
899  CBioseq::TId::const_iterator it = ids.begin(), itend = ids.end();
900  CRef< CSeq_id > giId;
901  bool foundPDB =false;
902  for (; it != itend; ++it)
903  {
904  if ((*it)->IsPdb())
905  foundPDB = 1;
906  else if ((*it)->IsGi())
907  giId = *it;
908  }
909  if (foundPDB)
910  {
911  seqIdVec[1] = giId; //replace id in SeqAlign
912  return i;
913  }
914  }
915 
916  //use the one whose older version is already in CD
917  //this can be used to replace the old one later
918  if (refresher)
919  {
920  for (int i = 0; i < (int) bioseqVec.size(); i++)
921  {
922  if (refresher->hasOlderVersion(bioseqVec[i]))
923  {
924  const CBioseq::TId& ids = bioseqVec[i]->GetId();
925  CBioseq::TId::const_iterator it = ids.begin(), itend = ids.end();
926  for (; it != itend; ++it)
927  {
928  if ((*it)->IsGi())
929  {
930  seqIdVec[1] = (*it); //replace
931  return i;
932  }
933  }
934  }
935  }
936  }
937  return index;
938 }
939 
941 {
942  CRef< CBioseq > bioseqRef(&bioseq);
943  TGi giNew = getGi(bioseqRef);
944  int num = cd->GetNumRows();
945  vector<int> rows;
946  string nAcc;
947  int nVer=0;
948  CRef< CSeq_id > seqId;
949  if (!GetAccAndVersion(bioseqRef, nAcc, nVer, seqId))
950  return false;
951  bool foundOldSeq = false;
952  for (int i = 0; i < num; i++)
953  {
954  cd->GetBioseqForRow(i, bioseqRef);
955  string oAcc;
956  int oVer=0;
957  if (GetAccAndVersion(bioseqRef, oAcc, oVer,seqId))
958  {
959  TGi giOld = getGi(bioseqRef);
960  if ((oAcc == nAcc) && (giNew != giOld))
961  {
962  rows.push_back(i);
963  m_stats.oldNewPairs.push_back(CDUpdateStats::OldNewGiPair(giOld, giNew));
964  foundOldSeq = true;
965  }
966  }
967  }
968  return foundOldSeq;
969 }
970 
972  CRef< CSeq_entry > seqEntry)
973 {
974  //filter environmental seq
975  CRef< CBioseq > bioseq;
976  TGi gi = getGi(seqEntry);
977  if (!GetOneBioseqFromSeqEntry(seqEntry, bioseq))
978  {
979  m_stats.noSeq.push_back(gi);
980  return false; //no seq is not acceptable
981  }
982 
983  //filter fragmented
984  if (m_config.missingResidueThreshold > 0 && isFragmentedSeq(cd, seqAlign, seqEntry))
985  {
986  m_stats.fragmented.push_back(gi);
987  return false;
988  }
989  // filter overlapping updates unless disabled
990  if (m_config.allowedOverlapWithCDRow >= 0 && overlapWithCDRow(cd, seqAlign))
991  {
992  m_stats.overlap.push_back(gi);
993  return false;
994  }
995  return true;
996 }
997 
999 {
1000  // Ignore overlaps by disabling this check when requested.
1001  int overlap = m_config.allowedOverlapWithCDRow;
1002  if (overlap < 0) return false;
1003 
1004  bool result = false;
1005  BlockModel bm(seqAlign);
1006  int lo, hi;
1007  int lastPos = bm.getLastAlignedPosition();
1008  int firstPos = bm.getFirstAlignedPosition();
1009  CRef< CSeq_id > seqId = bm.getSeqId();
1010  CRef< CSeq_id > seqIdRow;
1011 
1012  // Scan until the first overlap of significant size found.
1013  // Do not return 'false' after first seq-id match in case there are repeats.
1014  for(int i = 0; !result && i < cd->GetNumRows(); i++)
1015  {
1016  if(cd->GetSeqIDFromAlignment(i, seqIdRow))
1017  {
1018  if (SeqIdsMatch(seqId, seqIdRow))
1019  {
1020  lo = cd->GetLowerBound(i);
1021  hi = cd->GetUpperBound(i);
1022  if (lo + overlap <= firstPos)
1023  result = (hi - overlap >= firstPos);
1024  else
1025  result = (lo + overlap <lastPos);
1026 
1027  if (result) {
1028  if (overlap > 0) {
1029  LOG_POST("CD sequence " << i << " [" << lo << ", " << hi << "] and proposed update with range [" << firstPos << ", " << lastPos << "] exceed maximum allowed overlap = " << overlap);
1030  } else {
1031  LOG_POST("Disallowed overlap of CD sequence " << i << " [" << lo << ", " << hi << "] and proposed update with range [" << firstPos << ", " << lastPos << "]");
1032  }
1033  }
1034 // if (lo <= firstPos)
1035 // return hi >=firstPos;
1036 // else
1037 // return lo <lastPos;
1038  }
1039  }
1040  }
1041  return result;
1042 }
1043 
1045  CRef< CSeq_entry > seqEntry)
1046 {
1047  int pssmLen = m_consensus.size(); //equal to PSSM length
1048  int lenAligned = GetNumAlignedResidues(seqAlign);
1049  if (lenAligned >= pssmLen)
1050  return false;
1051  BlockModel master(seqAlign, false);
1052  int mGapToN = master.getGapToNTerminal(0);
1053 
1054  //master is consensus at this point
1055  int mGapToC = master.getGapToCTerminal(master.getBlocks().size() -1, m_consensus.size());
1056  BlockModel slave(seqAlign);
1057  CRef< CBioseq > bioseq;
1058  if (!GetOneBioseqFromSeqEntry(seqEntry, bioseq))
1059  return true; //no seq is a fragmented seq
1060 
1061  int seqLen = GetSeqLength(*bioseq);
1062  int sGapToN = slave.getGapToNTerminal(0);
1063  int sGapToC = slave.getGapToCTerminal(slave.getBlocks().size() - 1, seqLen);
1064  int allowed = m_config.missingResidueThreshold;
1065  if ( ( mGapToN - sGapToN > allowed) ||
1066  (mGapToC - sGapToC > allowed))
1067  return true;
1068  else
1069  return false;
1070 }
1071 
1072 bool CDUpdater::findSeq(CRef<CSeq_id> seqID, vector< CRef< CBioseq > >& bioseqs, CRef<CSeq_entry>& seqEntry)
1073 {
1074  for (unsigned int i = 0; i < bioseqs.size(); i++)
1075  {
1076  const CBioseq::TId& ids = bioseqs[i]->SetId();
1077  CBioseq::TId::const_iterator it = ids.begin(), itend = ids.end();
1078  for (; it != itend; ++it) {
1079  if (SeqIdsMatch(seqID, *it))
1080  {
1081  seqEntry = new CSeq_entry;
1082  seqEntry->SetSeq(*bioseqs[i]);
1083  return true;
1084  }
1085  }
1086  }
1087  return false;
1088 }
1089 
1090 void CDUpdater::getSequencesFromGB(vector< CRef<CSeq_id> > seqids, vector< CRef< CBioseq > >& bioseqs){
1093  string loaderName = loader->GetName();
1094  CScope scope(*objmgr);
1095  scope.AddDataLoader(loaderName);
1096  scope.AddDefaults();
1097  for( CRef< CSeq_id > seqid : seqids ){
1098  // Create a bioseq handle for this seqid.
1099  CBioseq_Handle handle = scope.GetBioseqHandle(*seqid);
1100  if ( !handle ) {
1101  LOG_POST("Failed to get bioseq handle for seqid " << seqid->GetSeqIdString() << ".");
1102  continue;
1103  }
1104  CConstRef<CBioseq> const_bioseq = handle.GetBioseqCore();
1105  CRef<CBioseq> bioseq(const_cast<CBioseq*>(const_bioseq.GetPointer()));
1106  // bioseq.Reset(const_cast<CBioseq&>(*const_bioseq));
1107  bioseqs.push_back(bioseq);
1108  }
1109 }
1110 
1112 {
1113  vector< CRef<CSeq_id> > seqids;
1114  unsigned int batchSize = 500;
1115  unsigned int maxBatchSize = 2000;
1116 
1118 
1119  list< CRef< CSeq_align > >& seqAligns = alignments.Set();
1120  list< CRef< CSeq_align > >::iterator lit = seqAligns.begin();
1121  for (; lit != seqAligns.end(); lit++)
1122  {
1123  seqids.push_back((*lit)->SetSegs().SetDenseg().GetIds()[1]);
1124  list< CRef< CSeq_align > >::iterator next = lit;
1125  next++;
1126  //the batch is full or reach the end
1127  if (seqids.size() >= batchSize || (next == (seqAligns.end())) )
1128  {
1129  string errors, warnings;
1130  vector< CRef< CBioseq > > bioseqBatch;
1131  try {
1132  //LOG_POST("Calling CBlastServices::GetSequences().\n");
1133  // For Blast v5 databases, not all members of an identical protein group are in the database
1134  // which may cause GetSequences to not find the exact sequence specified from such a group
1135  // if it wasn't one of the representatives (5 as of early 2020).
1137  getSequencesFromGB(seqids, bioseqBatch);
1138  LOG_POST("Returned from object manager with a batch of " << bioseqBatch.size() << " sequences.");
1139  }else{
1140  CBlastServices::GetSequences(seqids, dbName, 'p', bioseqBatch, errors,warnings);
1141  LOG_POST("Returned from CBlastServices::GetSequences('" << dbName << "') with a batch of " << bioseqBatch.size() << " sequences.");
1142  }
1143  }
1144  catch (blast::CBlastException& be)
1145  {
1146  if (seqids.size() > maxBatchSize)
1147  {
1148  seqids.clear(); //give up on retrieving sequence on these hits.
1149  //LOG_POST("Retrieving sequences from RemoteBlast failed after repeated tries. Giving up on these %d blast hits");
1150  }
1151  else
1152  LOG_POST("Retrieving sequences from RemoteBlast failed with an exception of "<<be.GetErrCodeString());
1153  continue;
1154  } catch (...)
1155  {
1156  LOG_POST("Unspecified exception during CBlastServices::GetSequences(). Skipping to next Seq-align.\n");
1157  continue;
1158  }
1159 
1160  if (seqids.size()!= bioseqBatch.size())
1161  {
1162  LOG_POST("Ask for "<< seqids.size()<<" sequences. Got "<<bioseqBatch.size()<<" back\n");
1163  LOG_POST("Error="<<errors<<"\nWarnings="<<warnings);
1164  }
1165  seqids.clear();
1166  for (unsigned int i = 0 ; i < bioseqBatch.size(); i++)
1167  {
1168  bioseqs.push_back(bioseqBatch[i]);
1169  }
1170  }
1171  }
1172 }
1173 
1175 {
1176  vector< CRef< CSeq_id > > seqIds;
1177  GetAllIdsFromSeqEntry(seqEntry, seqIds);
1178  for (unsigned int i = 0; i < seqIds.size(); i++)
1179  if (seqIds[i]->IsGi())
1180  return seqIds[i]->GetGi();
1181  return ZERO_GI;
1182 }
1183 
1185 {
1186  const list< CRef< CSeq_id > >& seqIds = bioseq->GetId();
1187  list< CRef< CSeq_id > >::const_iterator cit = seqIds.begin();
1188  for (; cit != seqIds.end(); cit++)
1189  if ((*cit)->IsGi())
1190  return (*cit)->GetGi();
1191  return ZERO_GI;
1192 }
1193 
1194 //change seqAlign from denseg to dendiag
1195 //remaster back to the master.
1196 //use pdb_id if available
1198  CRef< CSeq_entry > seqEntry)
1199 {
1200  CSeq_align::C_Segs& oldSegs = seqAlign->SetSegs();
1201  CRef< CSeq_align::C_Segs::TDenseg> denseg( &(oldSegs.SetDenseg()) );
1202  vector< CRef< CSeq_id > >& seqIds= denseg->SetIds();
1203  if(seqIds.size() <= 1)
1204  return false;
1205 
1206  if (!m_masterPdb.Empty())
1207  {
1208  seqIds[0].Reset(m_masterPdb.GetPointer());
1209  }
1210 
1211  //if slave has a pdb-id use it in seqAlign
1212  vector< CRef< CSeq_id > > slaveIds;
1213  GetAllIdsFromSeqEntry(seqEntry, slaveIds, true); //pdb only
1214  if (slaveIds.size() > 0)
1215  seqIds[1].Reset( (slaveIds[0]).GetPointer() );
1216 
1217  if (seqEntry->IsSet())
1218  {
1219  //pick the right BioSeq from the Set
1220  CRef< CBioseq > bioseq;
1221  if (GetOneBioseqFromSeqEntry(seqEntry, bioseq, seqIds[1].GetPointer()))
1222  {
1223  if (!reformatBioseq(bioseq, seqEntry, m_client))
1224  return false;
1225  seqEntry->SetSeq(*bioseq);
1226  }
1227  else
1228  return false;
1229  }
1230  else
1231  {
1232  CRef< CBioseq > bioseq(&seqEntry->SetSeq());
1233  if (!reformatBioseq(bioseq, seqEntry, m_client))
1234  return false;
1235  }
1236 
1237  CSeq_align::C_Segs::TDendiag& dendiag = seqAlign->SetSegs().SetDendiag();
1238  Denseg2DenseDiagList(*denseg, dendiag);
1239  /*
1240  BlockModelPair bmp(seqAlign);
1241  bmp.remaster(*m_guideAlignment);
1242  seqAlign = bmp.toSeqAlign();*/
1243 
1244  return true;
1245 }
1246 
1247 //get org-ref from seqEntry if bioseq does not have one
1248 //remove all unnecessary fields
1249 //replace ftable with mmdb-id
1251 {
1252  //get BioSource if there is none in bioseq
1253  CSeq_descr& seqDescr = bioseq->SetDescr();
1254  bool hasSource = false;
1255  bool hasTitle = false;
1256  //reset all fields except the source field
1257 
1258  //need trim even if bioseq is not a Set
1259  if (seqDescr.IsSet())
1260  {
1261  list< CRef< CSeqdesc > >& descrList = seqDescr.Set();
1262  list< CRef< CSeqdesc > >::iterator cit = descrList.begin();
1263  while (cit != descrList.end())
1264  {
1265  if ((*cit)->IsSource() && (!hasSource)) //only keep one source field
1266  {
1267  hasSource = true;
1268  cit++;
1269  }
1270  else if ( (*cit)->IsTitle())
1271  {
1272  cit++;
1273  hasTitle = true;
1274  }
1275  //extract taxid/taxname from "TaxNamesData" field
1276  //blastdb uses it to send tax info
1277  else if ((*cit)->IsUser() && (!hasSource))
1278  {
1279  if ((*cit)->SetUser().SetType().SetStr() == "TaxNamesData")
1280  {
1281  vector< CRef< CUser_field > >& fields = (*cit)->SetUser().SetData();
1282  if ( fields.size() > 0)
1283  {
1284  CRef< CUser_field > field = fields[0];
1285  TTaxId taxid = TAX_ID_FROM(CObject_id::TId, field->GetLabel().GetId());
1286  string taxname = field->GetData().GetStrs()[0];
1287  //create a source seedsc and add it
1289  COrg_ref& orgRef = source->SetSource().SetOrg();
1290  orgRef.SetTaxId(taxid);
1291  orgRef.SetTaxname(taxname);
1292  descrList.push_back(source);
1293  hasSource = true;
1294  }
1295  }
1296  cit = descrList.erase(cit);
1297  }
1298  else
1299  cit = descrList.erase(cit);
1300  }
1301  }
1302  if (!hasSource)
1303  {
1304  //get source or org-ref from seqEntry
1305  if (seqEntry->IsSet())
1306  {
1307  const list< CRef< CSeqdesc > >& descrList = seqEntry->GetSet().GetDescr().Get();
1308  list< CRef< CSeqdesc > >::const_iterator cit = descrList.begin();
1309  for (; cit != descrList.end(); cit++)
1310  {
1311  if ((*cit)->IsSource())
1312  {
1313  seqDescr.Set().push_back(*cit);
1314  break;
1315  }
1316  }
1317  }
1318  }
1319  // if bioSeq is pdb
1320  //replace annot field with mmdb-id
1321  //otherwise reset annot field
1322  bioseq->ResetAnnot();
1323  const list< CRef< CSeq_id > >& seqIds = bioseq->GetId();
1324  list< CRef< CSeq_id > >::const_iterator cit = seqIds.begin();
1325  bool isPdb = false;
1326  for (; cit != seqIds.end(); cit++)
1327  {
1328  if ((*cit)->IsPdb())
1329  {
1330  isPdb = true;
1331  break;
1332  }
1333  }
1334  if (isPdb)
1335  {
1336  //CEntrez2Client client;
1337  vector<TIntId> uids;
1338  string pdb = (*cit)->GetPdb().GetMol().Get();
1339  pdb += "[ACCN]";
1340  try {
1341  client.Query(pdb, "structure", uids);
1342  } catch (CException& e)
1343  {
1344  LOG_POST("\nFailed to retrieve mmdb-id for "<<pdb<<" because the error:\n "<<e.ReportAll());
1345  return false;
1346  }
1347  int mmdbId = 0;
1348  if (uids.size() > 0)
1349  {
1350  mmdbId = uids[0];
1351  CRef<CSeq_id> mmdbTag (new CSeq_id);
1352  CSeq_id::TGeneral& generalId = mmdbTag->SetGeneral();
1353  generalId.SetDb("mmdb");
1354  generalId.SetTag().SetId(mmdbId);
1355  CRef< CSeq_annot> seqAnnot (new CSeq_annot);
1356  seqAnnot->SetData().SetIds().push_back(mmdbTag);
1357  bioseq->SetAnnot().push_back(seqAnnot);
1358  }
1359  if (!hasTitle)
1360  {
1361  CRef< CPDB_block > pdbBlock;
1362  if (GetPDBBlockFromSeqEntry(seqEntry, pdbBlock))
1363  {
1364  CRef< CSeqdesc > seqDesc(new CSeqdesc);
1365  if (pdbBlock->CanGetCompound())
1366  {
1367  const list< string >& compounds = pdbBlock->GetCompound();
1368  if (compounds.size() != 0)
1369  seqDesc->SetTitle(*(compounds.begin()));
1370  seqDescr.Set().push_back(seqDesc);
1371  }
1372  }
1373  }
1374  }
1375  return true;
1376 
1377 }
1378 
1380  vector< CRef< CSeq_id > >& slaveIds, bool pdbOnly)
1381 {
1382  if (seqEntry->IsSeq())
1383  {
1384  const list< CRef< CSeq_id > >& seqIdList = seqEntry->GetSeq().GetId();
1385  list< CRef< CSeq_id > >::const_iterator lsii;
1386  for (lsii = seqIdList.begin(); lsii != seqIdList.end(); ++lsii)
1387  {
1388  if (pdbOnly)
1389  {
1390  if ((*lsii)->IsPdb())
1391  slaveIds.push_back(*lsii);
1392  }
1393  else
1394  slaveIds.push_back(*lsii);
1395  }
1396  return slaveIds.size();
1397  }
1398  else
1399  {
1400  list< CRef< CSeq_entry > >::const_iterator lsei;
1401  const list< CRef< CSeq_entry > >& seqEntryList = seqEntry->GetSet().GetSeq_set();
1402  for (lsei = seqEntryList.begin(); lsei != seqEntryList.end(); ++lsei)
1403  {
1404  GetAllIdsFromSeqEntry(*lsei, slaveIds, pdbOnly); // RECURSIVE!!
1405  }
1406  return slaveIds.size();
1407  }
1408 }
1409 //get only protein
1411  CRef< CBioseq >& bioseq,const CSeq_id* seqId)
1412 {
1413  if (seqEntry->IsSeq())
1414  {
1415  if (seqEntry->GetSeq().IsAa())
1416  {
1417  if (seqId)
1418  {
1419  if (SeqEntryHasSeqId(seqEntry, *seqId))
1420  {
1421  bioseq.Reset(&seqEntry->SetSeq());
1422  return true;
1423  }
1424  else
1425  return false;
1426  }
1427  else
1428  {
1429  bioseq.Reset(&seqEntry->SetSeq());
1430  return true;
1431  }
1432  }
1433  else
1434  return false;
1435 
1436  }
1437  else
1438  {
1439  list< CRef< CSeq_entry > >::const_iterator lsei;
1440  const list< CRef< CSeq_entry > >& seqEntryList = seqEntry->GetSet().GetSeq_set();
1441  for (lsei = seqEntryList.begin(); lsei != seqEntryList.end(); ++lsei)
1442  {
1443  if (GetOneBioseqFromSeqEntry(*lsei, bioseq, seqId)) // RECURSIVE!!
1444  return true;
1445  }
1446  return false;
1447  }
1448 }
1449 
1451 {
1452  vector< CRef< CSeq_id > > seqIds;
1453  GetAllIdsFromSeqEntry(seqEntry, seqIds,false);
1454  for (unsigned int i = 0; i < seqIds.size(); i++)
1455  {
1456  if (seqIds[i]->Match(seqId))
1457  return true;
1458  }
1459  return false;
1460 }
1461 
1462 bool CDUpdater::BioseqHasSeqId(const CBioseq& bioseq, const CSeq_id& seqId)
1463 {
1464  const CBioseq::TId& ids = bioseq.GetId();
1465  CBioseq::TId::const_iterator it = ids.begin(), itend = ids.end();
1466  for (; it != itend; ++it)
1467  {
1468  if ((*it)->Match(seqId))
1469  {
1470  return true;
1471  }
1472  }
1473  return false;
1474 }
1475 
1476 
1478 {
1481  list< CRef< CBlast_def_line > >& deflines = blastDefLine->Set();
1482  //most cases
1483  if (deflines.size() <= 1)
1484  {
1485  bioseqs.push_back(orig);
1486  return 1;
1487  }
1488  //PDBs likely
1489  int order = 0;
1490  for (list< CRef< CBlast_def_line > >::iterator iter = deflines.begin();
1491  iter != deflines.end(); iter++)
1492  {
1493  CRef<CBioseq> splitBioseq(new CBioseq);
1494  splitBioseq->Assign(*orig);
1495  reformatBioseqByBlastDefline(splitBioseq, *iter, order);
1496  bioseqs.push_back(splitBioseq);
1497  order++;
1498  }
1499  return deflines.size();
1500 }
1501 
1503 {
1504  CSeq_descr& seqDescr = bioseq->SetDescr();
1505  int sourceOrder = 0;
1506  if (seqDescr.IsSet())
1507  {
1508  list< CRef< CSeqdesc > >& descrList = seqDescr.Set();
1509  list< CRef< CSeqdesc > >::iterator cit = descrList.begin();
1510  while (cit != descrList.end())
1511  {
1512  if ((*cit)->IsSource()) //only keep one source field
1513  {
1514  if (sourceOrder == order)
1515  cit++; //keep
1516  else
1517  cit = descrList.erase(cit);
1518 
1519  // Do this for both cases; if sourceOrder == order must increment
1520  // otherwise will keep all sources *after* order.
1521  sourceOrder++;
1522  }
1523  else if ( (*cit)->IsTitle())
1524  cit = descrList.erase(cit);
1525  }
1526  //add the title from the defLine
1527  CRef< CSeqdesc > title(new CSeqdesc);
1528  title->SetTitle(blastDefline->GetTitle());
1529  descrList.push_back(title);
1530  }
1531 
1532  //add seq_ids from the defline
1533  bioseq->SetId().assign(blastDefline->GetSeqid().begin(), blastDefline->GetSeqid().end());
1534 }
1535 
1536 // IMPORTANT: This code was forked from src/objtools/align_format/align_format_util.cpp.
1537 // Check for changes in original source if this forked version misbehaves in the future.
1538 /// Efficiently decode a Blast-def-line-set from binary ASN.1.
1539 /// @param oss Octet string sequence of binary ASN.1 data.
1540 /// @param bdls Blast def line set decoded from oss.
1542 {
1543  typedef const CUser_field::TData::TOss TOss;
1544 
1545  const char * data = NULL;
1546  size_t size = 0;
1547  string temp;
1548 
1549  if (oss.size() == 1) {
1550  // In the single-element case, no copies are needed.
1551 
1552  const vector<char> & v = *oss.front();
1553  data = & v[0];
1554  size = v.size();
1555  } else {
1556  // Determine the octet string length and do one allocation.
1557 
1558  ITERATE (TOss, iter1, oss) {
1559  size += (**iter1).size();
1560  }
1561 
1562  temp.reserve(size);
1563 
1564  ITERATE (TOss, iter3, oss) {
1565  // 23.2.4[1] "The elements of a vector are stored contiguously".
1566  temp.append(& (**iter3)[0], (*iter3)->size());
1567  }
1568 
1569  data = & temp[0];
1570  }
1571 
1573  inpstr >> bdls;
1574 }
1575 
1576 
1577 
1578 // IMPORTANT: This code was forked from src/objtools/align_format/align_format_util.cpp.
1579 // That method uses the object manager, however, so we're not calling the function directly.
1580 // Check for changes in original source if this forked version misbehaves in the future.
1582 {
1583  static const string asnDeflineObjLabel = "ASN1_BlastDefLine";
1584 
1586  if(bioseq.IsSetDescr()){
1587  const CSeq_descr& desc = bioseq.GetDescr();
1588  const list< CRef< CSeqdesc > >& descList = desc.Get();
1589  for (list<CRef< CSeqdesc > >::const_iterator iter = descList.begin(); iter != descList.end(); iter++){
1590 
1591  if((*iter)->IsUser()){
1592  const CUser_object& uobj = (*iter)->GetUser();
1593  const CObject_id& uobjid = uobj.GetType();
1594  if(uobjid.IsStr()){
1595 
1596  const string& label = uobjid.GetStr();
1597  if (label == asnDeflineObjLabel){
1598  const vector< CRef< CUser_field > >& usf = uobj.GetData();
1599 
1600  if(usf.front()->GetData().IsOss()){ //only one user field
1601  typedef const CUser_field::TData::TOss TOss;
1602  const TOss& oss = usf.front()->GetData().GetOss();
1603  OssToDefline(oss, *bdls);
1604  }
1605  }
1606  }
1607  }
1608  }
1609  }
1610  return bdls;
1611 }
1612 
1614 {
1615  static const string asnDeflineObjLabel = "ASN1_BlastDefLine";
1616  if(handle.IsSetDescr())
1617  {
1618  CSeq_descr& desc = handle.SetDescr();
1619  list< CRef< CSeqdesc > >& descList = desc.Set();
1620  for (list<CRef< CSeqdesc > >::iterator iter = descList.begin(); iter != descList.end(); iter++)
1621  {
1622  if((*iter)->IsUser())
1623  {
1624  const CUser_object& uobj = (*iter)->GetUser();
1625  const CObject_id& uobjid = uobj.GetType();
1626  if(uobjid.IsStr())
1627  {
1628  const string& label = uobjid.GetStr();
1629  if (label == asnDeflineObjLabel)
1630  {
1631  descList.erase(iter);
1632  return;
1633  }
1634  }
1635  }
1636  }
1637  }
1638 }
1639 
1641 {
1642  AlignmentCollection ac(cd); //default:pending only
1643  int num = ac.GetNumRows();
1644  int seqlen = cd->GetSequenceStringByRow(0).size();
1645  if (seqlen <= 0)
1646  return num;
1647  vector< CRef< CSeq_align > > seqAlignVec;
1648  for (int i = 0; i < num; i++)
1649  seqAlignVec.push_back(ac.getSeqAlign(i));
1650  cd_utils::BlockFormater bf(seqAlignVec, seqlen);
1651  list< CRef< CSeq_align > >& seqAlignList = cd->GetSeqAligns();
1652  if (seqAlignList.size() > 0)
1653  {
1654  BlockModelPair bmp(*seqAlignList.begin());
1655  if (bmp.getMaster() == bmp.getSlave()) //aligned to self; used as a seed
1656  seqAlignList.erase(seqAlignList.begin());
1657  if (seqAlignList.size() > 0)
1658  bf.setReferenceSeqAlign(*seqAlignList.begin());
1659  }
1660  int numGood = bf.findIntersectingBlocks(overlap);
1661  bf.formatBlocksForQualifiedRows(seqAlignList);
1662  set<int> goodRows;
1663  vector<int> rows;
1664  bf.getQualifiedRows(rows);
1665  for (unsigned int r = 0; r < rows.size(); r++)
1666  goodRows.insert(rows[r]);
1667  cd->ErasePendingRows(goodRows);
1668  return num - numGood;
1669 }
1670 
1671 
1673 : m_cd(cd)
1674 {
1675  addSequences(cd->SetSequences());
1676 }
1677 
1679 {
1680  if (seqEntry.IsSet())
1681  {
1682  list< CRef< CSeq_entry > >& seqSet = seqEntry.SetSet().SetSeq_set();
1683  list< CRef< CSeq_entry > >::iterator it = seqSet.begin();
1684  for (; it != seqSet.end(); it++)
1685  addSequences(*(*it));
1686  }
1687  else
1688  {
1689  CRef< CBioseq > bioseq(&(seqEntry.SetSeq()));
1690  addSequence(bioseq);
1691  }
1692 }
1693 
1695 {
1696  string acc;
1697  int ver=0;
1698  CRef< CSeq_id > textId;
1699  if (GetAccAndVersion(bioseq, acc, ver, textId))
1701 }
1702 
1704 {
1705  string nAcc;
1706  int nVer=0;
1707  CRef< CSeq_id > textId;
1708  if (!GetAccAndVersion(bioseq, nAcc, nVer, textId))
1709  return false;
1711  if (it != m_accSeqMap.end())
1712  {
1713  TGi newgi = CDUpdater::getGi(bioseq);
1714  TGi oldgi = CDUpdater::getGi(it->second);
1715  return newgi != oldgi;
1716  }
1717  else
1718  return false;
1719 }
1720 
1721  //return the gi that's replaced; return -1 if none is replaced
1723 {
1724  CRef< CBioseq > bioseqRef;
1725  if (!CDUpdater::GetOneBioseqFromSeqEntry(seqEntry, bioseqRef))
1726  return INVALID_GI;
1727  if (!hasOlderVersion(bioseqRef))
1728  return INVALID_GI;
1729  string nAcc;
1730  int nVer=0;
1731  CRef< CSeq_id > textId;
1732  if (!GetAccAndVersion(bioseqRef, nAcc, nVer, textId))
1733  return INVALID_GI;
1734  CRef< CBioseq > oldBioseq = m_accSeqMap[nAcc];
1735  string newStr, oldStr;
1736  GetNcbieaaString(*bioseqRef, newStr);
1737  GetNcbieaaString(*oldBioseq, oldStr);
1738  if (newStr.size() != oldStr.size())
1739  return INVALID_GI;
1740  //proceed to do the placement
1741  vector< CRef< CSeq_id > > newIds;
1742  CDUpdater::GetAllIdsFromSeqEntry(seqEntry, newIds);
1743  CRef< CSeq_id > giId, pdbId;
1744  for (unsigned int i = 0; i < newIds.size(); i++)
1745  {
1746  if (newIds[i]->IsGi())
1747  giId = newIds[i];
1748  else if (newIds[i]->IsPdb())
1749  pdbId = newIds[i];
1750  }
1751  list< CRef< CSeq_align > >& seqAlignList = m_cd->GetSeqAligns();
1752  bool replaced = false;
1753  for (list< CRef< CSeq_align > >::iterator lit = seqAlignList.begin(); lit != seqAlignList.end(); lit++)
1754  {
1755  CRef< CSeq_align >& seqAlign = *lit;
1756  CRef< CSeq_id > idInAlign;
1757  GetSeqID(seqAlign, idInAlign, true);
1758  if (CDUpdater::BioseqHasSeqId(*oldBioseq, *idInAlign))
1759  {
1760  BlockModelPair bmp(seqAlign);
1761 
1762  if (pdbId.NotNull())
1763  {
1764  bmp.getSlave().setSeqId(pdbId);
1765  seqAlign = bmp.toSeqAlign();
1766  replaced =true;
1767  }
1768  else if (giId.NotNull())
1769  {
1770  bmp.getSlave().setSeqId(giId);
1771  seqAlign = bmp.toSeqAlign();
1772  replaced = true;
1773  }
1774  }
1775  }
1776  if (replaced)
1777  {
1778  m_cd->AddSequence(seqEntry);
1779  return CDUpdater::getGi(oldBioseq);
1780  }
1781  else
1782  return INVALID_GI;
1783 }
1784 
1785 END_SCOPE(cd_utils)
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Declares the BLAST exception class.
Declares the CBlastServices class.
CRef< CSeq_align > getSeqAlign(int row) const
int getGapToNTerminal(int bn) const
Definition: cuBlock.cpp:618
int getGapToCTerminal(int bn, int len=-1) const
Definition: cuBlock.cpp:632
vector< Block > & getBlocks()
Definition: cuBlock.hpp:97
CBioseq_Handle –.
bool IsAa(void) const
Definition: Bioseq.cpp:350
int GetNumRows() const
Definition: cuCdCore.cpp:215
int GetUpperBound(int Row) const
Definition: cuCdCore.cpp:490
string GetSequenceStringByRow(int rowId)
Definition: cuCdCore.cpp:578
bool AddSequence(CRef< CSeq_entry > seqAntry)
Definition: cuCdCore.cpp:1153
bool GetSeqIDFromAlignment(int RowIndex, CRef< CSeq_id > &SeqID) const
Definition: cuCdCore.cpp:815
bool AddPendingSeqAlign(CRef< CSeq_align > seqAlign)
Definition: cuCdCore.cpp:1112
bool GetSeqEntryForRow(int rowId, CRef< CSeq_entry > &seqEntry) const
Definition: cuCdCore.cpp:529
bool CopyBioseqForSeqId(const CRef< CSeq_id > &seqId, CRef< CBioseq > &bioseq) const
Definition: cuCdCore.cpp:1767
void ErasePendingRows(set< int > &rows)
Definition: cuCdCore.cpp:1127
int GetLowerBound(int Row) const
Definition: cuCdCore.cpp:471
const list< CRef< CSeq_align > > & GetSeqAligns() const
Definition: cuCdCore.cpp:1398
bool GetBioseqForRow(int rowId, CRef< CBioseq > &bioseq)
Definition: cuCdCore.cpp:561
string GetAccession(int &Version) const
Definition: cuCdCore.cpp:81
bool GetSeqIDForRow(int Pair, int DenDiagRow, CRef< CSeq_id > &SeqID) const
Definition: cuCdCore.cpp:787
AccessionBioseqMap m_accSeqMap
Definition: cuCdUpdater.hpp:66
bool hasOlderVersion(CRef< CBioseq > bioseq)
void addSequence(CRef< CBioseq > bioseq)
TGi refresh(CRef< CSeq_align > seqAlign, CRef< CSeq_entry > seqEntry)
CCdCore * m_cd
Definition: cuCdUpdater.hpp:64
CDRefresher(CCdCore *cd)
void addSequences(CSeq_entry &seqEntry)
bool checkDone()
int submitBlast(bool wait=false, int row=0)
bool findSeq(CRef< CSeq_id > seqID, vector< CRef< CBioseq > > &bioseqs, CRef< CSeq_entry > &seqEntry)
static int mergePending(CCdCore *cd, int threshold, bool remaster)
bool isFragmentedSeq(CCdCore *cd, CRef< CSeq_align > seqAlign, CRef< CSeq_entry > seqEntry)
double ComputePercentIdentity(const CRef< CSeq_align > &alignment, const string &queryString, const string &subjectString)
static void reformatBioseqByBlastDefline(CRef< CBioseq > bioseq, CRef< CBlast_def_line > blastDefline, int order)
static CRef< CBlast_def_line_set > GetBlastDefline(const CBioseq &handle)
bool getBlastHits()
bool overlapWithCDRow(CCdCore *cd, CRef< CSeq_align > seqAlign)
string m_rid
static int GetAllIdsFromSeqEntry(CRef< CSeq_entry > seqEntry, vector< CRef< CSeq_id > > &slaveIds, bool pdbOnly=false)
bool processBlastHits()
bool blast(bool wait=false, int row=0)
cd_utils::BlockModelPair * m_guideAlignment
CDUpdater(CCdCore *cd, CdUpdateParameters &config)
const string getLastError()
void retrieveAllSequences(CSeq_align_set &alignments, vector< CRef< CBioseq > > &bioseqs)
static bool reformatBioseq(CRef< CBioseq > bioseq, CRef< CSeq_entry > seqEntry, CEntrez2Client &client)
string m_consensus
static void RemoveBlastDefline(CBioseq &handle)
virtual ~CDUpdater()
void getCds(vector< CCdCore * > &)
bool hasCd(CCdCore *)
static int SplitBioseqByBlastDefline(CRef< CBioseq > handle, vector< CRef< CBioseq > > &bioseqs)
const string getRid()
CEntrez2Client m_client
CDUpdateStats & getStats()
static bool SeqEntryHasSeqId(CRef< CSeq_entry > seqEntry, const CSeq_id &seqId)
CRef< CSeq_id > m_masterPdb
CDUpdateStats m_stats
static int pickBioseq(CDRefresher *refresher, CRef< CSeq_align > seqAlignRef, vector< CRef< CBioseq > > &bioseqVec)
bool getHits(CRef< CSeq_align_set > &hits)
void getSequencesFromGB(vector< CRef< CSeq_id > > seqids, vector< CRef< CBioseq > > &bioseqs)
CdUpdateParameters m_config
static bool GetOneBioseqFromSeqEntry(CRef< CSeq_entry > seqEntry, CRef< CBioseq > &bioseq, const CSeq_id *seqId=0)
CCdCore * m_cd
int m_processPendingThreshold
bool modifySeqAlignSeqEntry(CCdCore *cd, CRef< CSeq_align > &seqAlign, CRef< CSeq_entry > seqEntry)
static void OssToDefline(const CUser_field::TData::TOss &oss, CBlast_def_line_set &bdls)
Efficiently decode a Blast-def-line-set from binary ASN.1.
bool passedFilters(CCdCore *cd, CRef< CSeq_align > seqAlign, CRef< CSeq_entry > seqEntry)
bool checkBlastAndUpdate()
static int processPendingToNormal(int overlap, CCdCore *cd)
int m_blastQueryRow
bool findRowsWithOldSeq(CCdCore *cd, CBioseq &bioseq)
static bool BioseqHasSeqId(const CBioseq &bioseq, const CSeq_id &seqId)
CRef< CSeq_align_set > m_hits
static TGi getGi(CRef< CSeq_entry > seqEntry)
int m_hitsNeeded
bool update(CCdCore *cd, CSeq_align_set &alignments)
string m_lastError
Definition: Dbtag.hpp:53
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
CObjectIStreamAsnBinary –.
Definition: objistrasnb.hpp:59
TTaxId SetTaxId(TTaxId tax_id)
Definition: Org_ref.cpp:93
Handle to the protein-protein options to the BLAST algorithm.
Exception class for the CRemoteBlast class.
CScope –.
Definition: scope.hpp:92
structure for seqloc info
Definition: seqlocinfo.hpp:48
@ eScore_IdentityCount
Definition: Seq_align.hpp:145
bool GetNamedScore(const string &id, int &score) const
Get score.
Definition: Seq_align.cpp:563
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
Definition: Seq_entry.hpp:56
static string getOrganismName(Organism org)
void getCds(vector< CCdCore * > &)
virtual ~GroupUpdater()
GroupUpdater(vector< CCdCore * > &cds, CdUpdateParameters &config)
bool getBlastHits()
int submitBlast(bool wait=false, int row=0)
vector< CDUpdater * > m_cdUpdaters
bool hasCd(CCdCore *)
bool processBlastHits()
void addBatch(CRef< CSeq_align_set > seqAlignSet)
const string & getConsensus()
void setOptions(const PssmMakerOptions &option)
short getPseudoCount()
CRef< CPssmWithParameters > make()
const BlockModelPair & getGuideAlignment()
unsigned findSequencesInTheGroup(CRef< CSeq_id > seqId, vector< CRef< CBioseq > > &bioseqVec) const
void addSequences(vector< CRef< CBioseq > > &bioseqVec, bool grouped=false)
Collection of masked regions for a single query sequence.
Definition: seqlocinfo.hpp:113
static void addUpdater(UpdaterInterface *updater)
static bool IsEmpty()
static list< UpdaterInterface * > m_updaterList
static int checkAllBlasts(vector< UpdaterInterface * > &blasted)
virtual bool getBlastHits()=0
static void removeUpdaters(const vector< CCdCore * > &cds)
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
const_iterator find(const key_type &key) const
Definition: map.hpp:153
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
static int nRows
Definition: cn3d_png.cpp:115
struct config config
CRef< CSeq_align > Denseg2DenseDiagList(const CRef< CSeq_align > &denseSegSeqAlign)
Definition: cuAlign.cpp:703
bool GetSeqID(const CRef< CSeq_align > &seqAlign, CRef< CSeq_id > &SeqID, bool getSlave=true)
Definition: cuAlign.cpp:55
int GetNumAlignedResidues(const CRef< CSeq_align > &align)
Definition: cuAlign.cpp:238
bool ReMasterCdWithoutUnifiedBlocks(CCdCore *cd, int Row, bool resetFields=true)
Definition: cuCD.cpp:757
bool SetUpdateDate(CCdCore *cd)
Definition: cuCD.cpp:234
@ eAll_organisms
bool SeqIdsMatch(const CRef< CSeq_id > &id1, const CRef< CSeq_id > &id2)
Definition: cuSequence.cpp:70
int GetSeqLength(const CBioseq &bioseq)
Definition: cuSequence.cpp:216
bool GetPDBBlockFromSeqEntry(CRef< CSeq_entry > seqEntry, CRef< CPDB_block > &pdbBlock)
Definition: cuSequence.cpp:446
string GetRawSequenceString(const CBioseq &bioseq)
Definition: cuSequence.cpp:349
bool GetNcbieaaString(const CBioseq &bioseq, string &str)
Definition: cuSequence.cpp:298
bool GetAccAndVersion(const CRef< CBioseq > bioseq, string &acc, int &version, CRef< CSeq_id > &seqId)
Definition: cuSequence.cpp:420
Declares auxiliary class to calculate the effective search space.
thread_local unique_ptr< FtaMsgPost > bmp
Definition: ftaerr.cpp:120
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
char data[12]
Definition: iconv.c:80
static void GetSequences(TSeqIdVector &seqids, const string &database, char seqtype, TBioseqVector &bioseqs, string &errors, string &warnings, bool verbose=false, bool target_only=false)
Get a set of Bioseqs given an input set of Seq-ids.
void SetPseudoCount(int p)
Sets PseudoCount.
void SetSegFiltering(bool val)
Enable SEG filtering.
#define INVALID_GI
Definition: ncbimisc.hpp:1089
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define TAX_ID_FROM(T, value)
Definition: ncbimisc.hpp:1111
#define ZERO_GI
Definition: ncbimisc.hpp:1088
#define NULL
Definition: ncbistd.hpp:225
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
Definition: ncbiexpt.cpp:370
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
TLoader * GetLoader(void) const
Get pointer to the loader.
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
Definition: scope.cpp:510
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
bool NotNull(void) const THROWS_NONE
Check if pointer is not null – same effect as NotEmpty().
Definition: ncbiobj.hpp:744
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5109
static const char label[]
Tdata & Set(void)
Assign a value to data member.
void SetSequences(TSequences &value)
Assign a value to Sequences data member.
Definition: Cdd_.cpp:134
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
void SetTag(TTag &value)
Assign a value to Tag data member.
Definition: Dbtag_.cpp:66
const TStrs & GetStrs(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
vector< vector< char > * > TOss
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TData & GetData(void) const
Get the Data member data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
void SetDb(const TDb &value)
Assign a value to Db data member.
Definition: Dbtag_.hpp:229
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
Definition: Org_ref_.hpp:381
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
Tdata & Set(void)
Assign a value to data member.
TDenseg & SetDenseg(void)
Select the variant.
Definition: Seq_align_.cpp:159
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
bool IsSet(void) const
Check if a value has been assigned to data member.
list< CRef< CDense_diag > > TDendiag
Definition: Seq_align_.hpp:194
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
TGeneral & SetGeneral(void)
Select the variant.
Definition: Seq_id_.cpp:375
bool IsPdb(void) const
Check if variant Pdb is selected.
Definition: Seq_id_.hpp:922
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
const TDescr & GetDescr(void) const
Get the Descr member data.
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
TTitle & SetTitle(void)
Select the variant.
Definition: Seqdesc_.hpp:1039
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:372
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
void ResetAnnot(void)
Reset Annot data member.
Definition: Bioseq_.cpp:91
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
Definition: Bioseq_.hpp:303
bool IsSet(void) const
Check if a value has been assigned to data member.
Definition: Seq_descr_.hpp:154
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
Tdata & Set(void)
Assign a value to data member.
Definition: Seq_descr_.hpp:172
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Bioseq_.hpp:315
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
int i
int len
#include<zmmintrin.h>
Definition: bm.h:78
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
Definition: pointer.h:1149
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
Declares the CPSIBlastOptionsHandle class.
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
Definition: seqlocinfo.hpp:139
static CNamedPipeClient * client
#define assert(x)
Definition: srv_diag.hpp:58
#define row(bind, expected)
Definition: string_bind.c:73
int numFilteredByOverlap
Definition: cuCdUpdater.hpp:85
string toString(bool detailed=true)
Definition: cuCdUpdater.cpp:67
pair< TGi, TGi > OldNewGiPair
Definition: cuCdUpdater.hpp:82
vector< OldNewGiPair > oldNewPairs
Definition: cuCdUpdater.hpp:83
vector< TGi > overlap
Definition: cuCdUpdater.hpp:77
vector< TGi > envSeq
Definition: cuCdUpdater.hpp:75
vector< TGi > fragmented
Definition: cuCdUpdater.hpp:76
vector< TGi > badAlign
Definition: cuCdUpdater.hpp:79
vector< TGi > noSeq
Definition: cuCdUpdater.hpp:78
Definition: type.c:6
done
Definition: token1.c:1
else result
Definition: token2.c:20
Modified on Thu Apr 25 08:18:51 2024 by modify_doxy.py rev. 669887