NCBI C++ ToolKit
hyperclust.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25 
26 /*****************************************************************************
27 
28 File name: hyperclust.cpp
29 
30 Author: Jason Papadopoulos
31 
32 Contents: Distances and clustering of multiple alignment profiles
33 
34 ******************************************************************************/
35 
36 #include <ncbi_pch.hpp>
37 #include <corelib/ncbiapp.hpp>
38 #include <corelib/ncbifile.hpp>
41 #include <serial/iterator.hpp>
45 #include <algo/cobalt/cobalt.hpp>
46 #include "cobalt_app_util.hpp"
47 
50 USING_SCOPE(cobalt);
51 
53 {
54 private:
55  virtual void Init(void);
56  virtual int Run(void);
57  virtual void Exit(void);
58 
60 };
61 
62 void CMultiApplication::Init(void)
63 {
65  | fHideDryRun);
66 
67  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
68 
69  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
70  "Distances and clustering of multiple alignment"
71  " profiles");
72 
73  arg_desc->AddKey("i", "infile", "file containing names of alignment files",
75  arg_desc->AddOptionalKey("d", "defline_file",
76  "file containing deflines corrsponding to alignment files",
78  arg_desc->AddDefaultKey("first", "first",
79  "produce alignments from alignment number 'first' "
80  "to all succeeding alignments in the list",
82  arg_desc->AddDefaultKey("last", "index",
83  "Compare alignments from first to last against the rest "
84  "in the list. Indices start from 1 "
85  "(default indicates an all vs. all comparison).",
87  arg_desc->AddDefaultKey("g0", "penalty",
88  "gap open penalty for initial/terminal gaps",
90  arg_desc->AddDefaultKey("e0", "penalty",
91  "gap extend penalty for initial/terminal gaps",
93  arg_desc->AddDefaultKey("g1", "penalty",
94  "gap open penalty for middle gaps",
96  arg_desc->AddDefaultKey("e1", "penalty",
97  "gap extend penalty for middle gaps",
99  arg_desc->AddDefaultKey("matrix", "matrix",
100  "score matrix to use",
101  CArgDescriptions::eString, "BLOSUM62");
102  arg_desc->AddDefaultKey("v", "verbose",
103  "turn on verbose output",
105  arg_desc->AddDefaultKey("local", "local",
106  "reduce end gap penalties in profile-profile alignment",
108  arg_desc->AddFlag("pairs", "Report pairwise distances up to a cutoff "
109  "value instead of the distance matrix");
110  arg_desc->AddDefaultKey("cutoff", "distance",
111  "Maximum pairwise distance to report",
112  CArgDescriptions::eDouble, "0.75");
113  arg_desc->AddDefaultKey("out", "outfile", "Output file name",
115 
116  SetupArgDescriptions(arg_desc.release());
117 }
118 
121 {
122  blast::TSeqLocVector retval;
123 
124  CStreamLineReader line_reader(instream);
125  CFastaReader fr(line_reader, CFastaReader::fAssumeProt |
127  /*CFastaReader::fOneSeq |*/
129 
130  // read one query at a time, and use a separate seq_entry,
131  // scope, and lowercase mask for each query. This lets different
132  // query sequences have the same ID. Later code will distinguish
133  // between queries by using different elements of retval[]
134 
135  while (!line_reader.AtEOF()) {
136  CRef<CScope> scope(new CScope(objmgr));
137 
138  scope->AddDefaults();
139 
140  CRef<CSeq_entry> entry = fr.ReadOneSeq();
141 
142 
143  if (entry == 0) {
144  NCBI_THROW(CObjReaderException, eInvalid,
145  "Could not retrieve seq entry");
146  }
147 
148  scope->AddTopLevelSeqEntry(*entry);
150  CRef<CSeq_loc> seqloc(new CSeq_loc());
151  seqloc->SetWhole().Assign(*itr->GetId().front());
152  blast::SSeqLoc sl(seqloc, scope);
153  retval.push_back(sl);
154  }
155  return retval;
156 }
157 
158 void
159 x_SetScoreMatrix(const char *matrix_name,
160  CPSSMAligner& aligner)
161 {
162  if (strcmp(matrix_name, "BLOSUM62") == 0)
164  else if (strcmp(matrix_name, "BLOSUM45") == 0)
166  else if (strcmp(matrix_name, "BLOSUM80") == 0)
168  else if (strcmp(matrix_name, "PAM30") == 0)
169  aligner.SetScoreMatrix(&NCBISM_Pam30);
170  else if (strcmp(matrix_name, "PAM70") == 0)
171  aligner.SetScoreMatrix(&NCBISM_Pam70);
172  else if (strcmp(matrix_name, "PAM250") == 0)
173  aligner.SetScoreMatrix(&NCBISM_Pam250);
174 }
175 
176 static const int kScaleFactor = 100;
177 
178 static void
179 x_FillResidueFrequencies(double **freq_data,
180  vector<CSequence>& query_data)
181 {
182  int align_length = query_data[0].GetLength();
183  int num_seqs = (int)query_data.size();
184  for (int i = 0; i < align_length; i++) {
185  for (int j = 0; j < num_seqs; j++)
186  freq_data[i][query_data[j].GetLetter(i)]++;
187  }
188 }
189 
190 static void
191 x_NormalizeResidueFrequencies(double **freq_data,
192  int freq_size)
193 {
194  for (int i = 0; i < freq_size; i++) {
195  double sum = 0.0;
196 
197  // Compute the total weight of each row
198 
199  for (int j = 0; j < kAlphabetSize; j++) {
200  sum += freq_data[i][j];
201  }
202 
203  sum = 1.0 / sum;
204  for (int j = 0; j < kAlphabetSize; j++) {
205  freq_data[i][j] *= sum;
206  }
207  }
208 }
209 
210 static int
212  double **freq1, int len1,
213  double **freq2, int len2,
214  int end_gap_open,
215  int end_gap_extend)
216 {
217  const TNCBIScore (*sm) [NCBI_FSM_DIM] = aligner.GetMatrix().s;
218  const CNWAligner::TTranscript& transcript = aligner.GetTranscript(false);
219  int offset1 = -1;
220  int offset2 = -1;
221  int state1 = 0;
222  int state2 = 0;
223 
224  const size_t dim = transcript.size();
225  double dscore = 0.0;
226 
227  for(size_t i = 0; i < dim; ++i) {
228 
229  int wg1 = end_gap_open, ws1 = end_gap_extend;
230  int wg2 = end_gap_open, ws2 = end_gap_extend;
231 
232  if (offset1 >= 0 && offset1 < len1 - 1) {
233  wg1 = aligner.GetWg();
234  ws1 = aligner.GetWs();
235  }
236 
237  if (offset2 >= 0 && offset2 < len2 - 1) {
238  wg2 = aligner.GetWg();
239  ws2 = aligner.GetWs();
240  }
241 
242 
243  CNWAligner::ETranscriptSymbol ts = transcript[i];
244  switch(ts) {
245 
247  case CNWAligner::eTS_Match: {
248  state1 = state2 = 0;
249  ++offset1; ++offset2;
250  double accum = 0.0, sum = 0.0;
251  double diff_freq1[kAlphabetSize];
252  double diff_freq2[kAlphabetSize];
253 
254  for (int m = 1; m < kAlphabetSize; m++) {
255  if (freq1[offset1][m] < freq2[offset2][m]) {
256  accum += freq1[offset1][m] * (double)sm[m][m];
257  diff_freq1[m] = 0.0;
258  diff_freq2[m] = freq2[offset2][m] - freq1[offset1][m];
259  }
260  else {
261  accum += freq2[offset2][m] * (double)sm[m][m];
262  diff_freq1[m] = freq1[offset1][m] -
263  freq2[offset2][m];
264  diff_freq2[m] = 0.0;
265  }
266  }
267 
268  if (freq1[offset1][0] <= freq2[offset2][0]) {
269  for (int m = 1; m < kAlphabetSize; m++)
270  sum += diff_freq1[m];
271  } else {
272  for (int m = 1; m < kAlphabetSize; m++)
273  sum += diff_freq2[m];
274  }
275 
276  if (sum > 0) {
277  if (freq1[offset1][0] <= freq2[offset2][0]) {
278  for (int m = 1; m < kAlphabetSize; m++)
279  diff_freq1[m] /= sum;
280  } else {
281  for (int m = 1; m < kAlphabetSize; m++)
282  diff_freq2[m] /= sum;
283  }
284 
285  for (int m = 1; m < kAlphabetSize; m++) {
286  for (int n = 1; n < kAlphabetSize; n++) {
287  accum += diff_freq1[m] *
288  diff_freq2[n] *
289  (double)sm[m][n];
290  }
291  }
292  }
293  dscore += accum * kScaleFactor +
294  freq1[offset1][0] * aligner.GetWs() * (1-freq2[offset2][0]) +
295  freq2[offset2][0] * aligner.GetWs() * (1-freq1[offset1][0]);
296  }
297  break;
298 
299  case CNWAligner::eTS_Insert: {
300  ++offset2;
301  if(state1 != 1) dscore += wg1 * (1.0 - freq2[offset2][0]);
302  state1 = 1; state2 = 0;
303  dscore += ws1;
304  }
305  break;
306 
307  case CNWAligner::eTS_Delete: {
308  ++offset1;
309  if(state2 != 1) dscore += wg2 * (1.0 - freq1[offset1][0]);
310  state1 = 0; state2 = 1;
311  dscore += ws2;
312  }
313  break;
314 
315  default: {
316  break;
317  }
318  }
319  }
320 
321  return (int)(dscore + 0.5);
322 }
323 
324 static double **
325 x_GetProfile(vector<CSequence>& alignment)
326 {
327  double **freq_data;
328  int freq_size = alignment[0].GetLength();
329 
330  // build a set of residue frequencies for the
331  // sequences in the left subtree
332 
333  freq_data = new double* [freq_size];
334  freq_data[0] = new double[kAlphabetSize * freq_size];
335 
336  for (int i = 1; i < freq_size; i++)
337  freq_data[i] = freq_data[0] + kAlphabetSize * i;
338 
339  memset(freq_data[0], 0, kAlphabetSize * freq_size * sizeof(double));
340  x_FillResidueFrequencies(freq_data, alignment);
341  x_NormalizeResidueFrequencies(freq_data, freq_size);
342  return freq_data;
343 }
344 
345 /*
346 static void
347 x_FreeProfile(double **freq_data)
348 {
349  delete [] freq_data[0];
350  delete [] freq_data;
351 }
352 */
353 
354 int
355 x_AlignProfileProfile(double **freq1_data, int freq1_size,
356  double **freq2_data, int freq2_size,
357  CPSSMAligner& aligner,
358  bool local_alignment)
359 {
360  int score;
361  int end_gap_open = aligner.GetStartWg();
362  int end_gap_extend = aligner.GetStartWs();
363 
364  aligner.SetSequences((const double**)freq1_data, freq1_size,
365  (const double**)freq2_data, freq2_size,
366  kScaleFactor);
367  aligner.Run();
368  score = x_ScoreFromTranscriptCore(aligner,
369  freq1_data, freq1_size,
370  freq2_data, freq2_size,
371  local_alignment ? 0 : end_gap_open,
372  local_alignment ? 0 : end_gap_extend);
373  return score;
374 }
375 
376 static double
377 x_GetSelfScore(double **freq_data, int freq_size,
378  SNCBIFullScoreMatrix& matrix)
379 {
380  double score = 0.0;
381  for (int i = 0; i < freq_size; i++) {
382  for (size_t j = 1; j < kAlphabetSize; j++)
383  score += freq_data[i][j] * matrix.s[j][j];
384  }
385  return kScaleFactor * score;
386 }
387 
388 typedef struct {
389  char name[500];
390  vector<CSequence> align;
391  int length;
392  double self_score;
393  double **profile;
394 } SAlignEntry;
395 
396 static void
397 x_PrintTree(const TPhyTreeNode *tree, int level,
398  vector<SAlignEntry>& aligns)
399 {
400  int i, j;
401 
402  for (i = 0; i < level; i++)
403  printf(" ");
404 
405  printf("node: ");
406  if (tree->GetValue().GetId() >= 0)
407  printf("cluster %d (%s) ", tree->GetValue().GetId(),
408  aligns[tree->GetValue().GetId()].name);
409  if (tree->GetValue().IsSetDist())
410  printf("distance %lf", tree->GetValue().GetDist());
411  printf("\n");
412 
413  if (tree->IsLeaf())
414  return;
415 
416  TPhyTreeNode::TNodeList_CI child(tree->SubNodeBegin());
417 
418  j = 0;
419  while (child != tree->SubNodeEnd()) {
420  for (i = 0; i < level; i++)
421  printf(" ");
422 
423  printf("%d:\n", j);
424  x_PrintTree(*child, level + 1, aligns);
425 
426  j++;
427  child++;
428  }
429 }
430 
431 static void
433  CDistMethods::TMatrix& matrix)
434 {
435  if (node->IsLeaf())
436  return;
437 
438  vector<CTree::STreeLeaf> left_leaves;
439  vector<CTree::STreeLeaf> right_leaves;
440 
442  CTree::ListTreeLeaves(*child, left_leaves,
443  (*child)->GetValue().GetDist());
444  child++;
445  CTree::ListTreeLeaves(*child, right_leaves,
446  (*child)->GetValue().GetDist());
447 
448  for (size_t i = 0; i < left_leaves.size(); i++) {
449  for (size_t j = 0; j < right_leaves.size(); j++) {
450  int idx1 = left_leaves[i].query_idx;
451  double dist1 = left_leaves[i].distance;
452  int idx2 = right_leaves[j].query_idx;
453  double dist2 = right_leaves[j].distance;
454 
455  if (dist1 > 0)
456  dist1 = 1.0 / dist1;
457  if (dist2 > 0)
458  dist2 = 1.0 / dist2;
459  matrix(idx1, idx2) = matrix(idx2, idx1) = dist1 + dist2;
460  }
461  }
462  left_leaves.clear();
463  right_leaves.clear();
464 
465  child = node->SubNodeBegin();
466  while (child != node->SubNodeEnd()) {
467  x_FillNewDistanceMatrix(*child++, matrix);
468  }
469 }
470 
471 
472 int CMultiApplication::Run(void)
473 {
475 
476  // Process command line args
477  const CArgs& args = GetArgs();
478 
479  bool verbose = args["v"].AsBoolean();
480 
481  // set up the aligner
482  CPSSMAligner aligner;
483  CNWAligner::TScore Wg = -kScaleFactor * args["g1"].AsInteger();
484  CNWAligner::TScore Ws = -kScaleFactor * args["e1"].AsInteger();
485  CNWAligner::TScore EndWg = -kScaleFactor * args["g0"].AsInteger();
486  CNWAligner::TScore EndWs = -kScaleFactor * args["e0"].AsInteger();
487  x_SetScoreMatrix(args["matrix"].AsString().c_str(), aligner);
488  aligner.SetWg(Wg);
489  aligner.SetWs(Ws);
490  aligner.SetStartWg(EndWg);
491  aligner.SetStartWs(EndWs);
492  aligner.SetEndWg(EndWg);
493  aligner.SetEndWs(EndWs);
494 
495  // Set up data loaders
497 
498  // read all the alignments
499  CNcbiIstream& infile = args["i"].AsInputFile();
500  vector<SAlignEntry> align_list;
501  while (!infile.fail() && !infile.eof()) {
502  char buf[128];
503 
504  buf[0] = 0;
505  infile >> buf;
506  if (buf[0] == 0)
507  continue;
508 
509  CNcbiIfstream is((const char *)buf);
511 
512  align_list.push_back(SAlignEntry());
513  SAlignEntry& e = align_list.back();
514  for (size_t i = 0; i < align_read.size(); i++) {
515  e.align.push_back(CSequence(*align_read[i].seqloc, *align_read[i].scope));
516  }
517  e.length = e.align[0].GetLength();
518  e.profile = x_GetProfile(e.align);
520  aligner.GetMatrix());
521 
522  char *name_ptr = buf + strlen(buf) - 1;
523  while (name_ptr > buf &&
524  name_ptr[-1] != '/' &&
525  name_ptr[-1] != '\\') {
526  name_ptr--;
527  }
528  strcpy(e.name, name_ptr);
529  }
530 
531  int first_index = args["first"].AsInteger();
532  int last_index = args["last"].AsInteger();
533  if (last_index == 0) {
534  last_index = (int)align_list.size() - 1;
535  }
536  else {
537  // last_index starts from 1
538  last_index--;
539  }
540 
541  if (first_index != 0) {
542  NcbiCerr << "error: all-against-all alignment required" << endl;
543  return -1;
544  }
545 
546  CDistMethods::TMatrix distances(align_list.size(),
547  align_list.size());
548 
549  for (int i = first_index; i <= last_index; i++) {
550  for (size_t j = i + 1; j < align_list.size(); j++) {
551 
552  if (last_index < (int)align_list.size() - 1 && (int)j <= last_index) {
553  continue;
554  }
555 
556  int len1 = align_list[i].align[0].GetLength();
557  int len2 = align_list[j].align[0].GetLength();
558 
559  aligner.SetEndSpaceFree(false, false, false, false);
560  if (args["local"].AsBoolean() == true) {
561  if (len1 > 1.5 * len2 || len2 > 1.5 * len1) {
562  aligner.SetEndSpaceFree(true, true, true, true);
563  }
564  else if (len1 > 1.2 * len2 || len2 > 1.2 * len1) {
565  aligner.SetStartWs(EndWs / 2);
566  aligner.SetEndWs(EndWs / 2);
567  }
568  }
569 
570  int score = x_AlignProfileProfile(align_list[i].profile,
571  align_list[i].length,
572  align_list[j].profile,
573  align_list[j].length,
574  aligner,
575  args["local"].AsBoolean());
576 
577  if (verbose) {
578  printf("%s %s %.2f\n", align_list[i].name,
579  align_list[j].name, (double)score / 100);
580  }
581 
582  aligner.SetStartWs(EndWs);
583  aligner.SetEndWs(EndWs);
584 
585  distances(i, j) = distances(j, i) = 1.0 - (double)score *
586  (1.0 / align_list[i].self_score +
587  1.0 / align_list[j].self_score) / 2;
588  }
589  }
590  if (verbose) {
591  printf("\n\n");
592  }
593 
594  int num_clusters = (int)align_list.size();
595 
596  if (args["d"]) {
597  CNcbiIstream& dfile(args["d"].AsInputFile());
598 
599  while (!dfile.fail() && !dfile.eof()) {
600  char defline[500];
601  char name[128];
602 
603  dfile >> name;
604  dfile >> name;
605  dfile >> defline;
606 
607  defline[0] = 0;
608  dfile.getline(defline, sizeof(defline), '\n');
609  if (defline[0] == 0)
610  continue;
611 
612  for (int i = 0; i < num_clusters; i++) {
613  if (strstr(align_list[i].name, name)) {
614  snprintf(align_list[i].name, sizeof(align_list[i].name),
615  "%s %s", name, defline);
616  char *tmp = align_list[i].name;
617  while (*tmp) {
618  if (!isdigit(*tmp) && !isalpha(*tmp))
619  *tmp = '_';
620  tmp++;
621  }
622  }
623  }
624  }
625  }
626 
627  if (args["pairs"]) {
628  for (int i=0;i <= last_index; i++) {
629  for (int j=(i+1);j < num_clusters; j++) {
630 
631  if (last_index < (int)align_list.size() - 1 && j <= last_index) {
632  continue;
633  }
634 
635  if (distances(i, j) < args["cutoff"].AsDouble()) {
636  args["out"].AsOutputFile() << align_list[i].name << "\t"
637  << align_list[j].name << "\t"
638  << distances(i, j) << NcbiEndl;
639  }
640  }
641  }
642  }
643  else {
644  printf("%d\n", num_clusters);
645  for (int i = 0; i < num_clusters; i++) {
646  printf("%s\n", align_list[i].name);
647  for (int j = 0; j < num_clusters; j++) {
648  printf("%5.4f ", distances(i, j));
649  }
650  printf("\n");
651  }
652  }
653 
654  CTree tree(distances);
655  if (verbose)
656  x_PrintTree(tree.GetTree(), 0, align_list);
657 
658  CDistMethods::TMatrix new_distances(align_list.size(),
659  align_list.size());
660  x_FillNewDistanceMatrix(tree.GetTree(), new_distances);
661 
662  //--------------------------------
663  if (verbose) {
664  printf("\n\n");
665  printf("\n\nNew Distance matrix:\n ");
666  for (int i = (int)align_list.size() - 1; i > 0; i--)
667  printf("%5d ", i);
668  printf("\n");
669 
670  for (int i = 0; i < (int)align_list.size() - 1; i++) {
671  printf("%2d: ", i);
672  for (int j = (int)align_list.size() - 1; j > i; j--) {
673  printf("%5.3f ", new_distances(i, j));
674  }
675  printf("\n");
676  }
677  printf("\n\n");
678 
679  printf("\n\nPercent relative error:\n ");
680  for (int i = (int)align_list.size() - 1; i > 0; i--)
681  printf("%5d ", i);
682  printf("\n");
683 
684  for (int i = 0; i < (int)align_list.size() - 1; i++) {
685  printf("%2d: ", i);
686  for (int j = (int)align_list.size() - 1; j > i; j--) {
687  printf("%5.2f ", 100*fabs(new_distances(i, j) -
688  distances(i, j)) / distances(i, j));
689  }
690  printf("\n");
691  }
692  printf("\n\n");
693  }
694  //--------------------------------
695 
696  return 0;
697 }
698 
699 void CMultiApplication::Exit(void)
700 {
701  SetDiagStream(0);
702 }
703 
704 int main(int argc, const char* argv[])
705 {
706  return CMultiApplication().AppMain(argc, argv, 0, eDS_Default, 0);
707 }
static const int kAlphabetSize
The aligner internally works only with the ncbistdaa alphabet.
Definition: base.hpp:119
Data loader implementation that uses the blast databases.
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
Base class for reading FASTA sequences.
Definition: fasta.hpp:80
virtual void Exit(void)
Cleanup on application exit.
Definition: cobalt_app.cpp:686
virtual void Init(void)
Initialize the application.
Definition: cobalt_app.cpp:118
virtual int Run(void)
Run the application.
Definition: cobalt_app.cpp:368
virtual void Exit(void)
Cleanup on application exit.
virtual void Init(void)
Initialize the application.
CRef< CObjectManager > m_ObjMgr
Definition: cobalt_app.cpp:88
virtual int Run(void)
Run the application.
CObjectManager –.
CScope –.
Definition: scope.hpp:92
Simple implementation of ILineReader for i(o)streams.
definition of a Culling tree
Definition: ncbi_tree.hpp:100
A wrapper for controlling access to the phylogenetic tree generated by CDistMethods.
Definition: tree.hpp:54
static void ListTreeLeaves(const TPhyTreeNode *node, vector< STreeLeaf > &node_list, double curr_dist=0)
Traverse a tree below a given starting point, listing all leaves encountered along the way.
Definition: tree.cpp:98
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
Interface for CMultiAligner.
Operators to edit gaps in sequences.
static char tmp[3200]
Definition: utf8.c:42
void SetStartWg(TScore value)
TTranscript GetTranscript(bool reversed=true) const
Definition: nw_aligner.cpp:909
void SetEndWs(TScore value)
virtual CNWAligner::TScore Run(void)
TScore GetStartWs() const
void SetScoreMatrix(const SNCBIPackedScoreMatrix *scoremat)
SNCBIFullScoreMatrix & GetMatrix()
TScore GetWs(void) const
Definition: nw_aligner.hpp:167
TScore GetWg(void) const
Definition: nw_aligner.hpp:166
void SetEndWg(TScore value)
vector< ETranscriptSymbol > TTranscript
Definition: nw_aligner.hpp:199
void SetWs(TScore value)
void SetSequences(const char *seq1, size_t len1, const char *seq2, size_t len2, bool verify=true)
TScore GetStartWg() const
void SetWg(TScore value)
void SetEndSpaceFree(bool Left1, bool Right1, bool Left2, bool Right2)
Definition: nw_aligner.cpp:192
void SetStartWs(TScore value)
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
Definition: ncbiapp.cpp:1312
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideLogfile
Hide log file description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eBoolean
{'true', 't', 'false', 'f'}, case-insensitive
Definition: ncbiargs.hpp:590
@ eDouble
Convertible into a floating point number (double)
Definition: ncbiargs.hpp:594
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")
Set diagnostic stream.
Definition: ncbidiag.cpp:8083
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
Definition: ncbidiag.hpp:1790
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)
Read a single effective sequence, which may turn out to be a segmented set.
Definition: fasta.cpp:312
bool AtEOF(void) const
Indicates (negatively) whether there is any more input.
@ fNoParseID
Generate an ID (whole defline -> title)
Definition: fasta.hpp:90
@ fForceType
Force specified type regardless of accession.
Definition: fasta.hpp:89
@ fParseRawID
Try to identify raw accessions.
Definition: fasta.hpp:97
@ fAssumeProt
Assume prots unless accns indicate otherwise.
Definition: fasta.hpp:88
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:1012
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
#define NcbiEndl
Definition: ncbistre.hpp:548
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define NcbiCerr
Definition: ncbistre.hpp:544
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
TNodeList_CI SubNodeBegin(void) const
Return first const iterator on subnode list.
Definition: ncbi_tree.hpp:160
TNodeList::const_iterator TNodeList_CI
Definition: ncbi_tree.hpp:110
bool IsLeaf() const
Report whether this is a leaf node.
Definition: ncbi_tree.hpp:296
TNodeList_CI SubNodeEnd(void) const
Return last const iterator on subnode list.
Definition: ncbi_tree.hpp:166
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
USING_SCOPE(objects)
static void x_NormalizeResidueFrequencies(double **freq_data, int freq_size)
Definition: hyperclust.cpp:191
static void x_FillResidueFrequencies(double **freq_data, vector< CSequence > &query_data)
Definition: hyperclust.cpp:179
static double x_GetSelfScore(double **freq_data, int freq_size, SNCBIFullScoreMatrix &matrix)
Definition: hyperclust.cpp:377
static void x_PrintTree(const TPhyTreeNode *tree, int level, vector< SAlignEntry > &aligns)
Definition: hyperclust.cpp:397
static int x_ScoreFromTranscriptCore(CPSSMAligner &aligner, double **freq1, int len1, double **freq2, int len2, int end_gap_open, int end_gap_extend)
Definition: hyperclust.cpp:211
int x_AlignProfileProfile(double **freq1_data, int freq1_size, double **freq2_data, int freq2_size, CPSSMAligner &aligner, bool local_alignment)
Definition: hyperclust.cpp:355
static blast::TSeqLocVector x_GetSeqLocFromStream(CNcbiIstream &instream, CObjectManager &objmgr)
Definition: hyperclust.cpp:120
void x_SetScoreMatrix(const char *matrix_name, CPSSMAligner &aligner)
Definition: hyperclust.cpp:159
static double ** x_GetProfile(vector< CSequence > &alignment)
Definition: hyperclust.cpp:325
int main(int argc, const char *argv[])
Definition: hyperclust.cpp:704
static void x_FillNewDistanceMatrix(const TPhyTreeNode *node, CDistMethods::TMatrix &matrix)
Definition: hyperclust.cpp:432
USING_NCBI_SCOPE
Definition: hyperclust.cpp:48
static const int kScaleFactor
Definition: hyperclust.cpp:176
char * buf
int i
yy_size_t n
CSequnceHelper< CObject > CSequence
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
#define fabs(v)
Definition: ncbi_dispd.c:46
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
The Object manager core.
true_type verbose
Definition: processing.cpp:890
const SNCBIPackedScoreMatrix NCBISM_Pam30
Definition: sm_pam30.c:92
const SNCBIPackedScoreMatrix NCBISM_Blosum62
Definition: sm_blosum62.c:92
const SNCBIPackedScoreMatrix NCBISM_Pam250
Definition: sm_pam250.c:92
#define NCBI_FSM_DIM
Recommended approach: unpack and index directly.
Definition: raw_scoremat.h:85
const SNCBIPackedScoreMatrix NCBISM_Blosum80
Definition: sm_blosum80.c:92
const SNCBIPackedScoreMatrix NCBISM_Pam70
Definition: sm_pam70.c:92
const SNCBIPackedScoreMatrix NCBISM_Blosum45
The standard matrices.
Definition: sm_blosum45.c:92
int TNCBIScore
data types
Definition: raw_scoremat.h:45
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
char name[500]
Definition: hyperclust.cpp:389
vector< CSequence > align
Definition: hyperclust.cpp:390
double self_score
Definition: hyperclust.cpp:392
double ** profile
Definition: hyperclust.cpp:393
TNCBIScore s[128][128]
Definition: raw_scoremat.h:87
Modified on Tue Apr 30 06:39:20 2024 by modify_doxy.py rev. 669887