NCBI C++ ToolKit
cn3d_blast.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cn3d_blast.cpp 92483 2021-01-26 18:35:06Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Paul Thiessen
27 *
28 * File Description:
29 * module for aligning with BLAST and related algorithms
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbistr.hpp>
37 
41 
42 #include <objects/seq/Bioseq.hpp>
43 #include <objects/seq/Seq_inst.hpp>
44 #include <objects/seq/Seq_data.hpp>
48 
50 
51 #include "cn3d_blast.hpp"
53 #include "cn3d_pssm.hpp"
54 #include "sequence_set.hpp"
55 #include "cn3d_tools.hpp"
56 #include "structure_set.hpp"
57 #include "molecule_identifier.hpp"
58 #include "asn_reader.hpp"
59 
62 
63 
64 BEGIN_SCOPE(Cn3D)
65 
66 class TruncatedSequence : public CObject
67 {
68 public:
71  int fromIndex, toIndex;
72 };
73 
74 typedef vector < CRef < TruncatedSequence > > TruncatedSequences;
75 
77  const BlockMultipleAlignment *pair, int alnNum, bool isMaster, int extension)
78 {
80 
81  // master sequence (only used for blast-two-sequences)
82  if (isMaster) {
83 
84  ts->originalFullSequence = pair->GetMaster();
86 
87  // use alignMasterTo/From if present and reasonable
88  if (pair->alignMasterFrom >= 0 && pair->alignMasterFrom < (int)ts->originalFullSequence->Length() &&
89  pair->alignMasterTo >= 0 && pair->alignMasterTo < (int)ts->originalFullSequence->Length() &&
90  pair->alignMasterFrom <= pair->alignMasterTo)
91  {
92  ts->fromIndex = pair->alignMasterFrom;
93  ts->toIndex = pair->alignMasterTo;
94  }
95 
96  // use aligned footprint + extension if multiple has any aligned blocks
97  else if (multiple && multiple->GetUngappedAlignedBlocks(&uaBlocks) > 0)
98  {
99  ts->fromIndex = uaBlocks.front()->GetRangeOfRow(0)->from - extension;
100  if (ts->fromIndex < 0)
101  ts->fromIndex = 0;
102  ts->toIndex = uaBlocks.back()->GetRangeOfRow(0)->to + extension;
103  if (ts->toIndex >= (int)ts->originalFullSequence->Length())
104  ts->toIndex = ts->originalFullSequence->Length() - 1;
105  }
106 
107  // otherwise, just use the whole sequence
108  else {
109  ts->fromIndex = 0;
110  ts->toIndex = ts->originalFullSequence->Length() - 1;
111  }
112  }
113 
114  // dependent sequence
115  else {
116 
117  ts->originalFullSequence = pair->GetSequenceOfRow(1);
118 
119  // use alignDependentTo/From if present and reasonable
120  if (pair->alignDependentFrom >= 0 && pair->alignDependentFrom < (int)ts->originalFullSequence->Length() &&
121  pair->alignDependentTo >= 0 && pair->alignDependentTo < (int)ts->originalFullSequence->Length() &&
122  pair->alignDependentFrom <= pair->alignDependentTo)
123  {
124  ts->fromIndex = pair->alignDependentFrom;
125  ts->toIndex = pair->alignDependentTo;
126  }
127 
128  // otherwise, just use the whole sequence
129  else {
130  ts->fromIndex = 0;
131  ts->toIndex = ts->originalFullSequence->Length() - 1;
132  }
133  }
134 
135  // create new Bioseq (contained in a Seq-entry) with the truncated sequence
136  ts->truncatedSequence.Reset(new CSeq_entry);
137  CBioseq& bioseq = ts->truncatedSequence->SetSeq();
138  CRef < CSeq_id > id(new CSeq_id);
139  id->SetLocal().SetId(alnNum);
140  bioseq.SetId().push_back(id);
141  bioseq.SetInst().SetRepr(CSeq_inst::eRepr_raw);
142  bioseq.SetInst().SetMol(CSeq_inst::eMol_aa);
143  bioseq.SetInst().SetLength(ts->toIndex - ts->fromIndex + 1);
144  TRACEMSG("truncated " << ts->originalFullSequence->identifier->ToString()
145  << " from " << (ts->fromIndex+1) << " to " << (ts->toIndex+1) << "; length " << bioseq.GetInst().GetLength());
146  bioseq.SetInst().SetSeq_data().SetNcbistdaa().Set().resize(ts->toIndex - ts->fromIndex + 1);
147  for (int j=ts->fromIndex; j<=ts->toIndex; ++j)
148  bioseq.SetInst().SetSeq_data().SetNcbistdaa().Set()[j - ts->fromIndex] =
149  LookupNCBIStdaaNumberFromCharacter(ts->originalFullSequence->sequenceString[j]);
150 
151  return ts;
152 }
153 
154 static inline bool IsLocalID(const CSeq_id& sid, int localID)
155 {
156  return (sid.IsLocal() && (
157  (sid.GetLocal().IsStr() && sid.GetLocal().GetStr() == NStr::IntToString(localID)) ||
158  (sid.GetLocal().IsId() && sid.GetLocal().GetId() == localID)));
159 }
160 
161 /*
162 static inline bool GetLocalID(const CSeq_id& sid, int *localID)
163 {
164  *localID = kMin_Int;
165  if (!sid.IsLocal())
166  return false;
167  if (sid.GetLocal().IsId())
168  *localID = sid.GetLocal().GetId();
169  else try {
170  *localID = NStr::StringToInt(sid.GetLocal().GetStr());
171  } catch (...) {
172  return false;
173  }
174  return true;
175 }
176 */
177 
178 static inline bool SeqIdMatchesMaster(const CSeq_id& sid, bool usePSSM)
179 {
180  // if blast-sequence-vs-pssm, master will be consensus
181  if (usePSSM)
182  return (sid.IsLocal() && sid.GetLocal().IsStr() && sid.GetLocal().GetStr() == "consensus");
183 
184  // if blast-two-sequences, master will be local id -1
185  else
186  return IsLocalID(sid, -1);
187 }
188 
189 static void MapBlockFromConsensusToMaster(int consensusStart, int dependentStart, int length,
190  BlockMultipleAlignment *newAlignment, const BlockMultipleAlignment *multiple)
191 {
192  // get mapping of each position of consensus -> master on this block
193  vector < int > masterLoc(length);
194  int i;
195  for (i=0; i<length; ++i)
196  masterLoc[i] = multiple->GetPSSM().MapConsensusToMaster(consensusStart + i);
197 
198  UngappedAlignedBlock *subBlock = NULL;
199  for (i=0; i<length; ++i) {
200 
201  // is this the start of a sub-block?
202  if (!subBlock && masterLoc[i] >= 0) {
203  subBlock = new UngappedAlignedBlock(newAlignment);
204  subBlock->SetRangeOfRow(0, masterLoc[i], masterLoc[i]);
205  subBlock->SetRangeOfRow(1, dependentStart + i, dependentStart + i);
206  subBlock->width = 1;
207  }
208 
209  // continue existing sub-block
210  if (subBlock) {
211 
212  // is this the end of a sub-block?
213  if (i == length - 1 || // last position of block
214  masterLoc[i + 1] < 0 || // next position is unmapped
215  masterLoc[i + 1] != masterLoc[i] + 1) // next position is discontinuous
216  {
217  newAlignment->AddAlignedBlockAtEnd(subBlock);
218  subBlock = NULL;
219  }
220 
221  // extend block by one
222  else {
223  const Block::Range *range = subBlock->GetRangeOfRow(0);
224  subBlock->SetRangeOfRow(0, range->from, range->to + 1);
225  range = subBlock->GetRangeOfRow(1);
226  subBlock->SetRangeOfRow(1, range->from, range->to + 1);
227  ++(subBlock->width);
228  }
229  }
230  }
231 
232  if (subBlock)
233  ERRORMSG("MapBlockFromConsensusToMaster() - unterminated sub-block");
234 }
235 
236 static void RemoveAllDataLoaders() {
239  om->GetRegisteredNames(loader_names);
240  ITERATE(CObjectManager::TRegisteredNames, itr, loader_names) {
241  om->RevokeDataLoader(*itr);
242  }
243 }
244 
245 static bool SimpleSeqLocFromBioseq(const CRef< CBioseq>& bs, CSeq_loc& seqLoc)
246 {
247  bool result = true;
248  CSeq_interval& seqInt = seqLoc.SetInt();
249  CSeq_id& seqId = seqInt.SetId();
250  seqInt.SetFrom(0);
251 
252  // Assign the first identifier from the bioseq
253  if (bs.NotEmpty() && bs->GetFirstId() != 0) {
254  seqInt.SetTo(bs->GetLength() - 1);
255  seqId.Assign(*(bs->GetFirstId()));
256  } else {
257  result = false;
258  }
259 
260  return result;
261 }
262 
264  const AlignmentList& toRealign, AlignmentList *newAlignments, bool usePSSM)
265 {
266  newAlignments->clear();
267  if (usePSSM && (!multiple || multiple->HasNoAlignedBlocks())) {
268  ERRORMSG("usePSSM true, but NULL or zero-aligned block multiple alignment");
269  return;
270  }
271  if (!usePSSM && toRealign.size() > 1) {
272  ERRORMSG("CreateNewPairwiseAlignmentsByBlast() - currently can only do single blast-2-sequences at a time");
273  return;
274  }
275  if (toRealign.size() == 0)
276  return;
277 
278  try {
279  const Sequence *master = (multiple ? multiple->GetMaster() : NULL);
280 
281  int extension = 0;
283  WARNINGMSG("Can't get footprint residue extension from registry");
284 
285  // Make sure object manager loads only data from our alignment object.
288  CScope scope(*objmgr);
289  CRef< CBioseq > queryBioseq, subjectBioseq;
290  CRef<CSeq_loc> querySeqLoc(new CSeq_loc);
291  blast::CBlastQueryVector queryVector, subjectVector;
292  scope.ResetDataAndHistory();
293 
294  // collect subject(s) - second sequence of each realignment
295  TruncatedSequences subjectTSs;
296  int localID = 0;
297  AlignmentList::const_iterator a, ae = toRealign.end();
298  for (a=toRealign.begin(); a!=ae; ++a, ++localID) {
299  if (!master)
300  master = (*a)->GetMaster();
301  if ((*a)->GetMaster() != master) {
302  ERRORMSG("CreateNewPairwiseAlignmentsByBlast() - all masters must be the same");
303  return;
304  }
305  if ((*a)->NRows() != 2) {
306  ERRORMSG("CreateNewPairwiseAlignmentsByBlast() - can only realign pairwise alignments");
307  return;
308  }
309  subjectTSs.push_back(CreateTruncatedSequence(multiple, *a, localID, false, extension));
310 
311  CRef< CSeq_loc > subjectSeqLoc(new CSeq_loc);
312  subjectBioseq = &(subjectTSs.back()->truncatedSequence->SetSeq());
313  scope.AddBioseq(*subjectBioseq);
314  // Set up the QueryFactory for the subject sequences
315  if (SimpleSeqLocFromBioseq(subjectBioseq, *subjectSeqLoc)) {
316  CRef< blast::CBlastSearchQuery > bsqSubject(new blast::CBlastSearchQuery(*subjectSeqLoc, scope));
317  subjectVector.AddQuery(bsqSubject);
318  }
319 
320  }
321  CRef < blast::IQueryFactory > sequenceSubjects(new blast::CObjMgr_QueryFactory(subjectVector));
322 
323  // main blast engine
324  CRef < blast::CPsiBl2Seq > blastEngine;
325 
326  // setup searches: blast-sequence-vs-pssm
329  if (usePSSM) {
330  pssmQuery.Reset(new CPssmWithParameters);
331  pssmQuery->Assign(multiple->GetPSSM().GetPSSM());
332  pssmOptions.Reset(new blast::CPSIBlastOptionsHandle);
333 
334  // NR stats at 3/21/2006
335  pssmOptions->SetDbLength(1196146007);
336  pssmOptions->SetDbSeqNum(3479934);
337  pssmOptions->SetHitlistSize(subjectTSs.size());
338  pssmOptions->SetMatrixName("BLOSUM62");
339  pssmOptions->SetCompositionBasedStats(eCompositionBasedStats);
340  pssmOptions->SetSegFiltering(false);
341 
342  blastEngine.Reset(new
343  blast::CPsiBl2Seq(
344  pssmQuery,
345  sequenceSubjects,
347  }
348 
349  // setup searches: blast-two-sequences
351  CRef < blast::IQueryFactory > sequenceQuery;
353  if (!usePSSM) {
354  masterTS = CreateTruncatedSequence(multiple, toRealign.front(), -1, true, extension);
355 
356  // Set up a QueryFactory for the query sequence
357  queryBioseq = &(masterTS->truncatedSequence->SetSeq());
358  scope.AddBioseq(*queryBioseq);
359  if (SimpleSeqLocFromBioseq(queryBioseq, *querySeqLoc)) {
360  CRef< blast::CBlastSearchQuery> bsqQuery(new blast::CBlastSearchQuery(*querySeqLoc, scope));
361  queryVector.AddQuery(bsqQuery);
362  }
363  sequenceQuery.Reset(new blast::CObjMgr_QueryFactory(queryVector));
364 
365  sequenceOptions.Reset(new blast::CBlastProteinOptionsHandle);
366  sequenceOptions->SetMatrixName("BLOSUM62");
367  sequenceOptions->SetHitlistSize(subjectTSs.size());
368  blastEngine.Reset(new
369  blast::CPsiBl2Seq(
370  sequenceQuery,
371  sequenceSubjects,
373  }
374 
375  // actually do the alignment(s)
376  CRef < blast::CSearchResultSet > results(blastEngine->Run());
377 
378  // parse the alignments
379  if (results->size() != toRealign.size())
380  {
381  ERRORMSG("CreateNewPairwiseAlignmentsByBlast() - did not get one result alignment per input sequence");
382  return;
383  }
384 
385  localID = 0;
386  for (unsigned int i=0; i<results->size(); ++i, ++localID) {
387 
388 // string err;
389 // WriteASNToFile("Seq-align-set.txt", (*results)[i].GetSeqAlign().GetObject(), false, &err);
390 
391  // create new alignment structure
393  (*seqs)[0] = master;
394  (*seqs)[1] = subjectTSs[localID]->originalFullSequence;
395  string dependentTitle = subjectTSs[localID]->originalFullSequence->identifier->ToString();
396  unique_ptr < BlockMultipleAlignment > newAlignment(
398  newAlignment->SetRowDouble(0, kMax_Double);
399  newAlignment->SetRowDouble(1, kMax_Double);
400 
401  // check for valid or empty alignment
402  if (!((*results)[i].HasAlignments())) {
403  WARNINGMSG("BLAST did not find a significant alignment for "
404  << dependentTitle << " with " << (usePSSM ? string("PSSM") : master->identifier->ToString()));
405  } else {
406 
407  // get Seq-align; use first one for this result, which assumes blast returns the highest scoring alignment first
408  const CSeq_align& sa = (*results)[i].GetSeqAlign()->Get().front().GetObject();
409 
410  if (!sa.IsSetDim() || sa.GetDim() != 2 || sa.GetType() != CSeq_align::eType_partial) {
411  ERRORMSG("CreateNewPairwiseAlignmentsByBlast() - returned alignment not in expected format (dim 2, partial)");
412  } else if (sa.GetSegs().IsDenseg()) {
413 
414  // unpack Dense-seg
415  const CDense_seg& ds = sa.GetSegs().GetDenseg();
416  if (!ds.IsSetDim() || ds.GetDim() != 2 || ds.GetIds().size() != 2 ||
417  (int)ds.GetLens().size() != ds.GetNumseg() || (int)ds.GetStarts().size() != 2 * ds.GetNumseg()) {
418  ERRORMSG("CreateNewPairwiseAlignmentsByBlast() - returned alignment format error (denseg dims)");
419  } else if (!SeqIdMatchesMaster(ds.GetIds().front().GetObject(), usePSSM) ||
420  !IsLocalID(ds.GetIds().back().GetObject(), localID)) {
421  ERRORMSG("CreateNewPairwiseAlignmentsByBlast() - returned alignment format error (ids)");
422  } else {
423 
424  // unpack segs
425  CDense_seg::TStarts::const_iterator s = ds.GetStarts().begin();
426  CDense_seg::TLens::const_iterator l, le = ds.GetLens().end();
427  for (l=ds.GetLens().begin(); l!=le; ++l) {
428  int masterStart = *(s++), dependentStart = *(s++);
429  if (masterStart >= 0 && dependentStart >= 0) { // skip gaps
430  dependentStart += subjectTSs[localID]->fromIndex;
431 
432  if (usePSSM) {
433  MapBlockFromConsensusToMaster(masterStart, dependentStart, *l, newAlignment.get(), multiple);
434  } else {
435  masterStart += masterTS->fromIndex;
436  UngappedAlignedBlock *newBlock = new UngappedAlignedBlock(newAlignment.get());
437  newBlock->SetRangeOfRow(0, masterStart, masterStart + (*l) - 1);
438  newBlock->SetRangeOfRow(1, dependentStart, dependentStart + (*l) - 1);
439  newBlock->width = *l;
440  newAlignment->AddAlignedBlockAtEnd(newBlock);
441  }
442  }
443  }
444  }
445 
446  } else {
447  ERRORMSG("CreateNewPairwiseAlignmentsByBlast() - returned alignment in unrecognized format");
448  }
449 
450  // unpack score
451  if (!sa.IsSetScore() || sa.GetScore().size() == 0) {
452  WARNINGMSG("BLAST did not return an alignment score for " << dependentTitle);
453  } else {
454  CNcbiOstrstream oss;
455  oss << "BLAST result scores for " << dependentTitle << " vs. "
456  << (usePSSM ? string("PSSM") : master->identifier->ToString()) << ':';
457 
458  bool haveE = false;
459  CSeq_align::TScore::const_iterator sc, sce = sa.GetScore().end();
460  for (sc=sa.GetScore().begin(); sc!=sce; ++sc) {
461  if ((*sc)->IsSetId() && (*sc)->GetId().IsStr()) {
462 
463  // E-value (put in status line and double values)
464  if ((*sc)->GetValue().IsReal() && (*sc)->GetId().GetStr() == "e_value") {
465  haveE = true;
466  newAlignment->SetRowDouble(0, (*sc)->GetValue().GetReal());
467  newAlignment->SetRowDouble(1, (*sc)->GetValue().GetReal());
468  string status = string("E-value: ") + NStr::DoubleToString((*sc)->GetValue().GetReal());
469  newAlignment->SetRowStatusLine(0, status);
470  newAlignment->SetRowStatusLine(1, status);
471  oss << ' ' << status;
472  }
473 
474  // raw score
475  else if ((*sc)->GetValue().IsInt() && (*sc)->GetId().GetStr() == "score") {
476  oss << " raw: " << (*sc)->GetValue().GetInt();
477  }
478 
479  // bit score
480  else if ((*sc)->GetValue().IsReal() && (*sc)->GetId().GetStr() == "bit_score") {
481  oss << " bit score: " << (*sc)->GetValue().GetReal();
482  }
483  }
484  }
485 
486  INFOMSG((string) CNcbiOstrstreamToString(oss));
487  if (!haveE)
488  WARNINGMSG("BLAST did not return an E-value for " << dependentTitle);
489  }
490  }
491 
492  // finalize and and add new alignment to list
493  if (newAlignment->AddUnalignedBlocks() && newAlignment->UpdateBlockMapAndColors(false))
494  newAlignments->push_back(newAlignment.release());
495  else
496  ERRORMSG("error finalizing alignment");
497  }
498 
499  } catch (exception& e) {
500  ERRORMSG("CreateNewPairwiseAlignmentsByBlast() failed with exception: " << e.what());
501  }
502 }
503 
505 {
506  if (!multiple) {
507  ERRORMSG("NULL multiple alignment");
508  return;
509  }
510 
511  int extension = 0;
513  WARNINGMSG("Can't get footprint residue extension from registry");
514 
516  multiple->GetUngappedAlignedBlocks(&uaBlocks);
517  if (uaBlocks.size() == 0) {
518  ERRORMSG("Can't calculate self-hits with no aligned blocks");
519  return;
520  }
521 
522  // do BLAST-vs-pssm on all rows, using footprint for each row
523  AlignmentList rowPairs;
524  unsigned int row;
525  for (row=0; row<multiple->NRows(); ++row) {
527  (*seqs)[0] = multiple->GetMaster();
528  (*seqs)[1] = multiple->GetSequenceOfRow(row);
529  BlockMultipleAlignment *newAlignment = new BlockMultipleAlignment(seqs, multiple->GetMaster()->parentSet->alignmentManager);
530  const Block::Range *range = uaBlocks.front()->GetRangeOfRow(row);
531  newAlignment->alignDependentFrom = range->from - extension;
532  if (newAlignment->alignDependentFrom < 0)
533  newAlignment->alignDependentFrom = 0;
534  range = uaBlocks.back()->GetRangeOfRow(row);
535  newAlignment->alignDependentTo = range->to + extension;
536  if (newAlignment->alignDependentTo >= (int)multiple->GetSequenceOfRow(row)->Length())
537  newAlignment->alignDependentTo = multiple->GetSequenceOfRow(row)->Length() - 1;
538  rowPairs.push_back(newAlignment);
539  }
541  CreateNewPairwiseAlignmentsByBlast(multiple, rowPairs, &results, true);
543  if (results.size() != multiple->NRows()) {
545  ERRORMSG("CalculateSelfHitScores() - CreateNewPairwiseAlignmentsByBlast() didn't return right # alignments");
546  return;
547  }
548 
549  // extract scores, assumes E-value is in RowDouble
550  AlignmentList::const_iterator r = results.begin();
551  for (row=0; row<multiple->NRows(); ++row, ++r) {
552  double score = (*r)->GetRowDouble(1);
553  multiple->SetRowDouble(row, score);
554  string status;
555  if (score >= 0.0 && score < kMax_Double)
556  status = string("Self hit E-value: ") + NStr::DoubleToString(score);
557  else
558  status = "No detectable self hit";
559  multiple->SetRowStatusLine(row, status);
560  }
562 
563  // print out overall self-hit rate
564  static const double threshold = 0.01;
565  unsigned int nSelfHits = 0;
566  for (row=0; row<multiple->NRows(); ++row) {
567  if (multiple->GetRowDouble(row) >= 0.0 && multiple->GetRowDouble(row) <= threshold)
568  ++nSelfHits;
569  }
570  INFOMSG("Self hits with E-value <= " << setprecision(3) << threshold << ": "
571  << (100.0*nSelfHits/multiple->NRows()) << "% ("
572  << nSelfHits << '/' << multiple->NRows() << ')' << setprecision(6));
573 }
574 
575 double GetStandardProbability(char ch)
576 {
577  typedef map < char, double > CharDoubleMap;
578  static CharDoubleMap standardProbabilities;
579 
580  if (standardProbabilities.size() == 0) { // initialize static stuff
581  if (BLASTAA_SIZE != 28) {
582  ERRORMSG("GetStandardProbability() - confused by BLASTAA_SIZE != 28");
583  return 0.0;
584  }
585  double *probs = BLAST_GetStandardAaProbabilities();
586  for (unsigned int i=0; i<28; ++i) {
587  standardProbabilities[LookupCharacterFromNCBIStdaaNumber(i)] = probs[i];
588 // TRACEMSG("standard probability " << LookupCharacterFromNCBIStdaaNumber(i) << " : " << probs[i]);
589  }
590  sfree(probs);
591  }
592 
593  CharDoubleMap::const_iterator f = standardProbabilities.find(toupper((unsigned char) ch));
594  if (f != standardProbabilities.end())
595  return f->second;
596  WARNINGMSG("GetStandardProbability() - unknown residue character " << ch);
597  return 0.0;
598 }
599 
600 END_SCOPE(Cn3D)
User-defined methods of the data storage class.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
double * BLAST_GetStandardAaProbabilities(void)
Get the standard amino acid probabilities.
Definition: blast_util.c:1323
std::list< BlockMultipleAlignment * > AlignmentList
Definition: cn3d_blast.hpp:53
void CalculateSelfHitScores(const BlockMultipleAlignment *multiple)
Definition: cn3d_blast.cpp:504
void CreateNewPairwiseAlignmentsByBlast(const BlockMultipleAlignment *multiple, const AlignmentList &toRealign, AlignmentList *newAlignments, bool usePSSM)
Definition: cn3d_blast.cpp:263
std::vector< const Sequence * > SequenceList
void SetRowDouble(unsigned int row, double value) const
const BLAST_Matrix * GetPSSM(void) const
void SetRowStatusLine(unsigned int row, const std::string &value) const
const Sequence * GetMaster(void) const
std::vector< const UngappedAlignedBlock * > UngappedAlignedBlockList
const Sequence * GetSequenceOfRow(unsigned int row) const
void GetUngappedAlignedBlocks(UngappedAlignedBlockList *blocks) const
double GetRowDouble(unsigned int row) const
bool AddAlignedBlockAtEnd(UngappedAlignedBlock *newBlock)
void SetRangeOfRow(unsigned int row, int from, int to)
const Range * GetRangeOfRow(int row) const
unsigned int width
const CSeq_id * GetFirstId() const
Definition: Bioseq.cpp:271
TSeqPos GetLength(void) const
Definition: Bioseq.cpp:360
CConstRef –.
Definition: ncbiobj.hpp:1266
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CObject –.
Definition: ncbiobj.hpp:180
CScope –.
Definition: scope.hpp:92
Definition: Seq_entry.hpp:56
std::string ToString(void) const
const MoleculeIdentifier * identifier
unsigned int Length(void) const
StructureSet * parentSet
AlignmentManager * alignmentManager
const Sequence * originalFullSequence
Definition: cn3d_blast.cpp:69
CRef< CSeq_entry > truncatedSequence
Definition: cn3d_blast.cpp:70
USING_SCOPE(objects)
double GetStandardProbability(char ch)
Definition: cn3d_blast.cpp:575
static void RemoveAllDataLoaders()
Definition: cn3d_blast.cpp:236
static CRef< TruncatedSequence > CreateTruncatedSequence(const BlockMultipleAlignment *multiple, const BlockMultipleAlignment *pair, int alnNum, bool isMaster, int extension)
Definition: cn3d_blast.cpp:76
static bool SeqIdMatchesMaster(const CSeq_id &sid, bool usePSSM)
Definition: cn3d_blast.cpp:178
static void MapBlockFromConsensusToMaster(int consensusStart, int dependentStart, int length, BlockMultipleAlignment *newAlignment, const BlockMultipleAlignment *multiple)
Definition: cn3d_blast.cpp:189
static bool SimpleSeqLocFromBioseq(const CRef< CBioseq > &bs, CSeq_loc &seqLoc)
Definition: cn3d_blast.cpp:245
vector< CRef< TruncatedSequence > > TruncatedSequences
Definition: cn3d_blast.cpp:74
USING_NCBI_SCOPE
Definition: cn3d_blast.cpp:60
static bool IsLocalID(const CSeq_id &sid, int localID)
Definition: cn3d_blast.cpp:154
bool RegistryGetInteger(const string &section, const string &name, int *value)
Definition: cn3d_tools.cpp:228
static const std::string REG_FOOTPRINT_RES
Definition: cn3d_tools.hpp:197
#define TRACEMSG(stream)
Definition: cn3d_tools.hpp:83
#define INFOMSG(stream)
Definition: cn3d_tools.hpp:84
#define WARNINGMSG(stream)
Definition: cn3d_tools.hpp:85
static const std::string REG_ADVANCED_SECTION
Definition: cn3d_tools.hpp:191
#define ERRORMSG(stream)
Definition: cn3d_tools.hpp:86
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static FILE * f
Definition: readconf.c:23
#define BLASTAA_SIZE
Size of aminoacid alphabet.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
string
Definition: cgiapp.hpp:690
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
vector< string > TRegisteredNames
void ResetDataAndHistory(void)
Clear all information in the scope except added data loaders.
Definition: scope.cpp:331
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
#define kMax_Double
Definition: ncbi_limits.h:208
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5181
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
bool IsSetDim(void) const
dimensionality Check if a value has been assigned to Dim data member.
Definition: Dense_seg_.hpp:396
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
TDim GetDim(void) const
Get the Dim member data.
Definition: Seq_align_.hpp:856
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
TType GetType(void) const
Get the Type member data.
Definition: Seq_align_.hpp:809
bool IsSetDim(void) const
dimensionality Check if a value has been assigned to Dim data member.
Definition: Seq_align_.hpp:837
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Dense_seg_.hpp:505
bool IsSetScore(void) const
for whole alignment Check if a value has been assigned to Score data member.
Definition: Seq_align_.hpp:884
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
const TScore & GetScore(void) const
Get the Score member data.
Definition: Seq_align_.hpp:896
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
@ eType_partial
mapping pieces together
Definition: Seq_align_.hpp:103
void SetTo(TTo value)
Assign a value to To data member.
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Seq_id_.cpp:193
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
int i
range(_Ty, _Ty) -> range< _Ty >
unsigned int a
Definition: ncbi_localip.c:102
int toupper(Uchar c)
Definition: ncbictype.hpp:73
bool le(T x_, T y_, T round_)
Definition: njn_approx.hpp:84
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
The Object manager core.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
static int * results[]
Declares CPsiBl2Seq, the C++ API for the PSI-BLAST 2 Sequences engine.
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
CRef< objects::CObjectManager > om
#define row(bind, expected)
Definition: string_bind.c:73
#define DELETE_ALL_AND_CLEAR(container, ContainerType)
Definition: su_private.hpp:82
unsigned char LookupNCBIStdaaNumberFromCharacter(char r)
Definition: su_pssm.cpp:125
char LookupCharacterFromNCBIStdaaNumber(unsigned char n)
Definition: su_pssm.cpp:142
else result
Definition: token2.c:20
Modified on Fri Sep 20 14:58:30 2024 by modify_doxy.py rev. 669887