NCBI C++ ToolKit
splign_formatter.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: splign_formatter.cpp 100425 2023-07-31 13:44:51Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Yuri Kapustin
27  *
28  * ===========================================================================
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
36 
49 
53 
54 #include <objmgr/seq_vector.hpp>
55 #include <objmgr/util/sequence.hpp>
56 
57 #include <util/value_convert.hpp>
58 
59 #include <algorithm>
60 
61 #include "splign_util.hpp"
62 #include "messages.hpp"
63 
64 #include <algorithm>
65 
68 
69 
71  m_splign_results(results)
72 {
73  x_Init();
74 }
75 
76 
78  m_splign_results(splign.GetResult())
79 {
80  x_Init();
81 }
82 
83 
85 {
86  const char kSeqId_not_set [] = "lcl|ID_not_set";
87  CConstRef<CSeq_id> seqid_not_set (new CSeq_id (kSeqId_not_set));
88  m_QueryId = m_SubjId = seqid_not_set;
89 }
90 
91 
94 {
95  m_QueryId = id1;
96  m_SubjId = id2;
97 }
98 
99 
101  const CSplign::TResults* results, int flags) const
102 {
103  if(results == 0) {
104  results = &m_splign_results;
105  }
106 
107  CNcbiOstrstream oss;
108  oss.precision(3);
109 
110  const bool print_exon_scores ((flags & eTF_NoExonScores)? false: true);
111  const bool use_fasta_style_ids (flags & eTF_UseFastaStyleIds);
112 
113 
114  const string querystr (use_fasta_style_ids?
115  m_QueryId->AsFastaString():
116  m_QueryId->GetSeqIdString(true));
117  const string subjstr (use_fasta_style_ids?
118  m_SubjId->AsFastaString():
119  m_SubjId->GetSeqIdString(true));
120 
121  ITERATE(CSplign::TResults, ii, *results) {
122 
123  for(size_t i (0), seg_dim (ii->m_Segments.size()); i < seg_dim; ++i) {
124 
125  const CSplign::TSegment& seg (ii->m_Segments[i]);
126 
127  oss << (ii->m_QueryStrand? '+': '-')
128  << ii->m_Id << '\t'
129  << querystr << '\t'
130  << subjstr << '\t';
131  if(seg.m_exon) {
132  oss << seg.m_idty << '\t';
133  }
134  else {
135  oss << "-\t";
136  }
137 
138  oss << seg.m_len << '\t'
139  << seg.m_box[0] + 1 << '\t' << seg.m_box[1] + 1 << '\t';
140 
141  if(seg.m_exon) {
142  oss << seg.m_box[2] + 1 << '\t'
143  << seg.m_box[3] + 1 << '\t';
144  }
145  else {
146  oss << "-\t-\t";
147  }
148 
149  if(seg.m_exon) {
150 
151  oss << seg.m_annot << '\t';
153  if(print_exon_scores) {
154  oss << '\t' << seg.m_score;
155  }
156  }
157  else {
158 
159  if(i == 0) {
160  oss << "<L-Gap>\t";
161  }
162  else if(i == seg_dim - 1) {
163  oss << "<R-Gap>\t";
164  }
165  else {
166  oss << "<M-Gap>\t";
167  }
168  oss << '-';
169  if(print_exon_scores) {
170  oss << "\t-";
171  }
172  }
173  oss << endl;
174  }
175 
176  // print poly-A/T, if any
177  const bool polya_present (ii->m_PolyA > 0 && ii->m_PolyA < ii->m_QueryLen);
178 
179  if(polya_present) {
180 
181  size_t polya_len;
182  size_t start, stop;
183  char c1, c2;
184  if(ii->m_QueryStrand) {
185  polya_len = ii->m_QueryLen - ii->m_PolyA;
186  c1 = '+';
187  c2 = 'A';
188  start = 1 + ii->m_PolyA;
189  stop = ii->m_QueryLen;
190  }
191  else {
192  polya_len = 1 + ii->m_PolyA;
193  c1 = '-';
194  c2 = 'T';
195  start = polya_len;
196  stop = 1;
197  }
198 
199  oss << c1 << ii->m_Id << '\t' << querystr << '\t' << subjstr
200  << "\t-\t" << polya_len << '\t';
201  oss << start << '\t' << stop
202  << "\t-\t-\t<poly-" << c2 << ">\t-";
203  if(print_exon_scores) {
204  oss << "\t-";
205  }
206  oss << endl;
207  }
208  }
209 
210  return CNcbiOstrstreamToString(oss);
211 }
212 
213 
214 void MakeLeftHeader(size_t x, string* ps)
215 {
216  string & s (*ps);
217  const string strx (NStr::SizetToString(x));
218  copy(strx.begin(), strx.end(), s.begin() + 9 - strx.size());
219 }
220 
221 
223  CRef<objects::CScope> scope,
224  const CSplign::TResults* results,
225  size_t line_width,
226  int segnum) const
227 
228 {
229  if(results == 0) {
230  results = &m_splign_results;
231  }
232 
233  const size_t extra_chars = 5;
234 
235  const string kNotSet ("id_not_set");
236  const string querystr (m_QueryId.IsNull()? kNotSet:
237  m_QueryId->GetSeqIdString(true));
238  const string subjstr (m_SubjId.IsNull()? kNotSet:
239  m_SubjId->GetSeqIdString(true));
240 
241  // query seq-vector
242  CBioseq_Handle bh_query (scope->GetBioseqHandle(*m_QueryId));
244  string query_sequence_sense;
245  sv_query.GetSeqData(sv_query.begin(), sv_query.end(), query_sequence_sense);
246 
247  // subject seq-vector
248  CBioseq_Handle bh_subj (scope->GetBioseqHandle(*m_SubjId));
250 
251  CNcbiOstrstream oss;
252  oss.precision(3);
253  const string kTenner (10, ' '); // heading spaces
254 
255  ITERATE(CSplign::TResults, ii, *results) {
256 
257  if(ii->m_Status != CSplign::SAlignedCompartment::eStatus_Ok) {
258  continue;
259  }
260 
261  const bool qstrand = ii->m_QueryStrand;
262  const bool sstrand = ii->m_SubjStrand;
263  const char qc = qstrand? '+': '-';
264  const char sc = sstrand? '+': '-';
265 
266  oss << endl << '>' << qc << ii->m_Id << '\t'
267  << querystr << '(' << qc << ")\t"
268  << subjstr << '(' << sc << ')' << endl;
269 
270  size_t exons_total = 0;
271  ITERATE(CSplign::TSegments, jj, ii->m_Segments) {
272  if(jj->m_exon) {
273  ++exons_total;
274  }
275  }
276 
277  string query_sequence;
278  if(qstrand) {
279  query_sequence = query_sequence_sense;
280  }
281  else {
282  query_sequence.resize(query_sequence_sense.size());
283  transform(query_sequence_sense.rbegin(), query_sequence_sense.rend(),
284  query_sequence.begin(),SCompliment());
285  }
286 
287  string cds_sequence, query_protein;
288  if(ii->m_Cds_start < ii->m_Cds_stop) {
289 
290  cds_sequence.resize(ii->m_Cds_stop - ii->m_Cds_start + 1);
291  copy(query_sequence.begin() + ii->m_Cds_start,
292  query_sequence.begin() + ii->m_Cds_stop + 1,
293  cds_sequence.begin());
294  CSeqTranslator::Translate(cds_sequence, query_protein);
295  }
296 
297  size_t exon_count = 0, seg_count = 0;
298  int query_aa_idx (0);
299  int qframe(ii->m_Cds_start < ii->m_Cds_stop? -2: -3);
300  ITERATE(CSplign::TSegments, jj, ii->m_Segments) {
301 
302  const CSplign::TSegment & s (*jj);
303  if(s.m_exon) {
304 
305  size_t qbeg = s.m_box[0];
306  size_t qfin = s.m_box[1];
307  size_t sbeg = s.m_box[2];
308  size_t sfin = s.m_box[3];
309 
310  if(exon_count > 0) {
311  if(sstrand) {
312  sbeg -= extra_chars;
313  }
314  else {
315  sbeg += extra_chars;
316  }
317  }
318 
319  if(exon_count + 1 < exons_total) {
320  if(sstrand) {
321  sfin += extra_chars;
322  }
323  else {
324  sfin -= extra_chars;
325  }
326  }
327 
328  size_t s0, s1;
329  if(sbeg < sfin) {
330  s0 = sbeg;
331  s1 = sfin;
332  }
333  else {
334  s0 = sfin;
335  s1 = sbeg;
336  }
337 
338  size_t q0, q1;
339  if(qbeg < qfin) {
340  q0 = qbeg;
341  q1 = qfin;
342  }
343  else {
344  q0 = qfin;
345  q1 = qbeg;
346  }
347 
348  // Load seq data
349  string str;
350  sv_subj.GetSeqData(sv_subj.begin() + TSeqPos(s0),
351  sv_subj.begin() + TSeqPos(s1 + 1), str);
352  vector<char> subj (str.size());
353  if(sstrand) {
354  copy(str.begin(), str.end(), subj.begin());
355  }
356  else {
357  reverse(str.begin(), str.end());
358  transform(str.begin(), str.end(),
359  subj.begin(), SCompliment());
360  }
361 
362  if(!qstrand) {
363  const size_t Q0 (q0);
364  q0 = query_sequence.size() - q1 - 1;
365  q1 = query_sequence.size() - Q0 - 1;
366  }
367 
368  vector<char> query (q1 - q0 + 1);
369  copy(query_sequence.begin() + q0, query_sequence.begin() + q1 + 1,
370  query.begin());
371 
372  const bool do_print (segnum == -1 || int(seg_count) == segnum);
373  if(do_print) {
374 
375  oss << endl << " Exon " << (exon_count + 1) << " ("
376  << (1 + s.m_box[0]) << '-' << (1 + s.m_box[1]) << ','
377  << (1 + s.m_box[2]) << '-' << (1 + s.m_box[3]) << ") "
378  << "Len = " << s.m_len << ' '
379  << "Identity = " << s.m_idty << endl;
380  }
381 
382  string l0 (kTenner);
383  string l1 (kTenner);
384  string l2 (kTenner);
385  string l3 (kTenner);
386 
387  MakeLeftHeader(qbeg + 1, &l1);
388  MakeLeftHeader(sbeg + 1, &l3);
389 
390  string trans;
391  if(exon_count > 0) {
392  trans.assign(extra_chars, '#');
393  }
394  trans.append(s.m_details);
395  if(exon_count + 1 < exons_total) {
396  trans.append(extra_chars, '#');
397  }
398 
399  size_t lines = 0;
400  for(size_t t = 0, td = trans.size(), iq = 0, is = 0; t < td; ++t) {
401 
402  char c = trans[t], c1, c2, c3, c0, c4;
403 
404  if(qframe == -2 && q0 + iq == ii->m_Cds_start) {
405  qframe = -1;
406  }
407 
408  if(qframe >= 0 && q0 + iq >= ii->m_Cds_stop) {
409  qframe = -3;
410  }
411 
412  switch(c) {
413 
414  case '#':
415  c1 = '.';
416  c2 = ' ';
417  c3 = subj[is++];
418  break;
419 
420  case 'M':
421  c1 = query[iq++];
422  c2 = '|';
423  c3 = subj[is++];
424  break;
425 
426  case 'R':
427  c1 = query[iq++];
428  c2 = ' ';
429  c3 = subj[is++];
430  break;
431 
432  case 'I':
433  c1 = '-';
434  c2 = ' ';
435  c3 = subj[is++];
436  break;
437 
438  case 'D':
439  c1 = query[iq++];
440  c2 = ' ';
441  c3 = '-';
442  break;
443 
444  default:
445  NCBI_THROW(CAlgoAlignException,
446  eInternal,
447  g_msg_UnknownTranscriptSymbol + c);
448  }
449 
450  c0 = c4 = ' ';
451  if(qframe >= -1 && (c == 'M' || c == 'R' || c == 'D')) {
452  qframe = (qframe + 1) % 3;
453  }
454  if(c != '#' && c != 'I' && qframe == 1) {
455  c0 = query_protein[query_aa_idx++];
456  }
457 
458  l0.push_back(c0);
459  l1.push_back(c1);
460  l2.push_back(c2);
461  l3.push_back(c3);
462 
463  if(l1.size() == 10 + line_width) {
464 
465  if(do_print) {
466  oss << l0 << endl << l1 << endl
467  << l2 << endl << l3 << endl << endl;
468  }
469 
470  ++lines;
471  l0 = l1 = l2 = l3 = kTenner;
472  size_t q0, s0;
473  if(qstrand) {
474  q0 = qbeg + iq;
475  }
476  else {
477  q0 = qbeg - iq;
478  }
479 
480  if(sstrand) {
481  s0 = sbeg + is;
482  }
483  else {
484  s0 = sbeg - is;
485  }
486 
487  MakeLeftHeader(q0 + 1, &l1);
488  MakeLeftHeader(s0 + 1, &l3);
489  }
490  }
491 
492  if(l1.size() > 10) {
493 
494  if(do_print) {
495  oss << l0 << endl << l1 << endl << l2 << endl << l3 << endl;
496  }
497 
498  l0 = l1 = l2 = l3 = kTenner;
499  }
500 
501  ++exon_count;
502  }
503  else {
504  if(qframe >= 0) qframe = -3; // disable further translation
505  }
506 
507  ++seg_count;
508  }
509  }
510 
511  return CNcbiOstrstreamToString(oss);
512 }
513 
514 
515 
516 double CalcIdentity(const string& transcript)
517 {
518  Uint4 matches = 0;
519  ITERATE(string, ii, transcript) {
520  if(*ii == 'M') {
521  ++matches; // std::count() not supported by some compilers
522  }
523  }
524  return double(matches) / transcript.size();
525 }
526 
527 
528 CRef<CSeq_align> CSplignFormatter::x_Compartment2SeqAlign (
529  const vector<size_t>& boxes,
530  const vector<string>& transcripts,
531  const vector<float>& scores ) const
532 {
533  const size_t num_exons (boxes.size() / 4);
534 
535  CRef<CSeq_align> sa (new CSeq_align);
536 
537  sa->Reset();
538 
539  // this is a discontinuous alignment
540  sa->SetType(CSeq_align::eType_disc);
541  sa->SetDim(2);
542 
543  // create seq-align-set
544  CSeq_align_set& sas = sa->SetSegs().SetDisc();
545  list<CRef<CSeq_align> >& sas_data = sas.Set();
546 
547  for(size_t i = 0; i < num_exons; ++i) {
548 
549  CRef<CSeq_align> sa (new CSeq_align);
550  sa->Reset();
551  sa->SetDim(2);
552  sa->SetType(CSeq_align::eType_global);
553 
554  // add dynprog score
555  CRef<CScore> score (new CScore);
556  score->SetId().SetStr("splign");
557  score->SetValue().SetReal(scores[i]);
558  CSeq_align::TScore& scorelist = sa->SetScore();
559  scorelist.push_back(score);
560 
561  // add percent identity
562  CRef<CScore> idty (new CScore);
563  idty->SetId().SetStr("idty");
564  idty->SetValue().SetReal(CalcIdentity(transcripts[i]));
565  scorelist.push_back(idty);
566 
567  CDense_seg& ds = sa->SetSegs().SetDenseg();
568 
569  const size_t* box = &(*(boxes.begin() + i*4));
570  const TSeqPos query_start = Convert(box[0]);
571  ENa_strand query_strand = box[0] <= box[1]? eNa_strand_plus:
572  eNa_strand_minus;
573  const TSeqPos subj_start = Convert(box[2]);
574  ENa_strand subj_strand = box[2] <= box[3]? eNa_strand_plus:
575  eNa_strand_minus;
576  ds.FromTranscript(query_start, query_strand, subj_start, subj_strand,
577  transcripts[i]);
578  // don't include strands when both are positive
579  if(query_strand == eNa_strand_plus && subj_strand == eNa_strand_plus) {
580  ds.ResetStrands();
581  }
582 
583  vector< CRef< CSeq_id > > &ids = ds.SetIds();
584 
585  CRef<CSeq_id> id_query (new CSeq_id());
586  id_query->Assign(*m_QueryId);
587  ids.push_back(id_query);
588 
589  CRef<CSeq_id> id_subj (new CSeq_id());
590  id_subj->Assign(*m_SubjId);
591  ids.push_back(id_subj);
592 
593  sas_data.push_back(sa);
594  }
595 
596  return sa;
597 }
598 
599 
600 bool PIsSpace(char c) {
601  return isspace(c);
602 }
603 
604 
605 
607 {
609  switch(cur) {
610  case 'M': chunk->SetMatch() = count; break;
611  case 'R': chunk->SetMismatch() = count; break;
612  case 'I': chunk->SetGenomic_ins() = count; break;
613  case 'D': chunk->SetProduct_ins() = count; break;
614  default:
616  eInternal,
617  string(g_msg_UnknownTranscriptSymbol) + cur);
618  }
619  return chunk;
620 }
621 
622 
624  const CSplign::TResults * results,
625  int flag)
626 const
627 {
628  const bool spliced_seg (flag & 0x0001);
629  const bool with_parts (flag & 0x0002);
630  const bool no_version (flag & eAF_NoVersion);
631 
632  if(results == 0) {
633  results = &(m_splign_results);
634  }
635 
637  CSeq_align_set::Tdata& data (rv->Set());
638 
639  ITERATE(CSplign::TResults, ii, *results) {
640 
641  if(ii->m_Status != CSplign::SAlignedCompartment::eStatus_Ok) continue;
642 
643  if(spliced_seg) {
644 
645  CRef<CSeq_align> sa (new CSeq_align);
647  sa->SetDim(2);
648 
649  CSpliced_seg& sseg (sa->SetSegs().SetSpliced());
650 
652  CSeq_id * pseqid_query (const_cast<CSeq_id*>(
654  sseg.SetProduct_id(*pseqid_query);
655 
656  CSeq_id * pseqid_subj (const_cast<CSeq_id*>(
658  sseg.SetGenomic_id(*pseqid_subj);
659 
660  sseg.SetProduct_strand((*ii).m_QueryStrand? eNa_strand_plus:
662  sseg.SetGenomic_strand((*ii).m_SubjStrand? eNa_strand_plus:
664 
665  TSeqPos query_len = Convert((*ii).m_QueryLen);
666  TSeqPos polya = Convert((*ii).m_PolyA);
667  if(query_len > 0) {
668  sseg.SetProduct_length(query_len);
669  }
670 
671  if(polya > 0 && polya < query_len) {
672  sseg.SetPoly_a(polya);
673  }
674 
675  CSpliced_seg::TExons & exons (sseg.SetExons());
676  for(size_t i (0), seg_dim ((*ii).m_Segments.size()); i < seg_dim; ++i) {
677 
678  const CSplign::TSegment & seg ((*ii).m_Segments[i]);
679  if(seg.m_exon) {
680 
682 
683  TSeqPos qmin, qmax, smin, smax;
684  if(seg.m_box[0] <= seg.m_box[1]) {
685  qmin = Convert(seg.m_box[0]);
686  qmax = Convert(seg.m_box[1]);
687  }
688  else {
689  qmax = Convert(seg.m_box[0]);
690  qmin = Convert(seg.m_box[1]);
691  }
692 
693  if(seg.m_box[2] <= seg.m_box[3]) {
694  smin = Convert(seg.m_box[2]);
695  smax = Convert(seg.m_box[3]);
696  }
697  else {
698  smax = Convert(seg.m_box[2]);
699  smin = Convert(seg.m_box[3]);
700  }
701 
702  exon->SetProduct_start().SetNucpos(qmin);
703  exon->SetProduct_end().SetNucpos(qmax);
704 
705  exon->SetGenomic_start(smin);
706  exon->SetGenomic_end(smax);
707 
708  CSpliced_exon::TScores::Tdata & scores (exon->SetScores().Set());
709 
710  {{
711  // add dynprog score
712  CRef<CScore> score (new CScore);
713  score->SetId().SetStr("splign");
714  score->SetValue().SetReal(seg.m_score);
715  scores.push_back(score);
716  }}
717 
718  {{
719  // add percent identity
720  CRef<CScore> score (new CScore);
721  score->SetId().SetStr("idty");
722  score->SetValue().SetReal(seg.m_idty);
723  scores.push_back(score);
724  }}
725 
726  if( i>0 && !(*ii).m_Segments[i-1].m_exon) {// 5` partial
727  exon->SetPartial(true);
728  }
729 
730  const size_t adim (seg.m_annot.size());
731  if( i>0 && (*ii).m_Segments[i-1].m_exon) {
732  // add acceptor residues if available
733  if(adim > 2 && seg.m_annot[2] == '<') {
734  string acc;
735  acc.push_back(seg.m_annot[0]);
736  acc.push_back(seg.m_annot[1]);
737  exon->SetAcceptor_before_exon().SetBases(acc);
738  }
739  }
740 
741  if(i+1<seg_dim && !(*ii).m_Segments[i+1].m_exon) {//3` partial
742  exon->SetPartial(true);
743  }
744 
745  if(i+1<seg_dim && (*ii).m_Segments[i+1].m_exon) {
746  // add donor residues if available
747  if(adim > 2 && seg.m_annot[adim - 3] == '>') {
748  string dnr;
749  dnr.push_back(seg.m_annot[adim - 2]);
750  dnr.push_back(seg.m_annot[adim - 1]);
751  exon->SetDonor_after_exon().SetBases(dnr);
752  }
753  }
754 
755  if(with_parts) {
756 
757  // add parts
758  CSpliced_exon::TParts & parts (exon->SetParts());
759  if(seg.m_details.size() == 0) {
761  eInternal,
762  "Alignment details not available");
763  }
764 
765  char cur (seg.m_details[0]);
766  unsigned count (0);
767  ITERATE(string, ii, seg.m_details) {
768 
769  if(cur != *ii) {
770 
771  parts.push_back(CreateSplicedExonChunk(cur, count));
772  count = 1;
773  cur = *ii;
774  }
775  else {
776  ++count;
777  }
778  }
779  parts.push_back(CreateSplicedExonChunk(cur, count));
780  }
781 
782  exons.push_back(exon);
783  }
784  }
785 
786  if(!no_version) {
787 
788  CSeq_align::TExt& ext (sa->SetExt());
790  ext.push_back(uo);
791 
792  uo->SetType().SetStr("origin");
794  uo->SetData().push_back(uf);
795 
796  CRef<CObject_id> oid (new CObject_id);
797  oid->SetStr("algo");
798  uf->SetLabel(*oid);
799  string verstr (CSplign::s_GetVersion().Print("Splign", CVersion::fVersionInfo));
800  verstr.erase(remove_if(verstr.begin(), verstr.end(), PIsSpace),
801  verstr.end());
802  uf->SetData().SetStr(CUtf8::AsUTF8(verstr, eEncoding_UTF8));
803  }
804 
805  data.push_back(sa);
806  }
807  else {
808 
809  // format into a dense-seg
810 
811  vector<size_t> boxes;
812  vector<string> transcripts;
813  vector<float> scores;
814 
815  for(size_t i (0), seg_dim (ii->m_Segments.size()); i < seg_dim; ++i) {
816  const CSplign::TSegment& seg (ii->m_Segments[i]);
817  if(seg.m_exon) {
818  copy(seg.m_box, seg.m_box + 4, back_inserter(boxes));
819  transcripts.push_back(seg.m_details);
820  scores.push_back(seg.m_score);
821  }
822  }
823 
824  CRef<CSeq_align> sa (x_Compartment2SeqAlign(boxes,transcripts,scores));
825  data.push_back(sa);
826  }
827  }
828 
829  return rv;
830 }
831 
832 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void remove_if(Container &c, Predicate *__pred)
Definition: chainer.hpp:69
static string s_RunLengthEncode(const string &in)
CBioseq_Handle –.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
Definition: Score.hpp:57
CSeqVector –.
Definition: seq_vector.hpp:65
CSpliced_exon_chunk –.
CRef< objects::CSeq_align_set > AsSeqAlignSet(const CSplign::TResults *results=0, int flags=eAF_SplicedSegWithParts) const
Format alignment as a seq-align-set.
CConstRef< objects::CSeq_id > m_QueryId
void SetSeqIds(CConstRef< objects::CSeq_id > id1, CConstRef< objects::CSeq_id > id2)
CRef< objects::CSeq_align > x_Compartment2SeqAlign(const vector< size_t > &boxes, const vector< string > &transcripts, const vector< float > &scores) const
CSplignFormatter(const CSplign::TResults &results)
string AsExonTable(const CSplign::TResults *results=0, int flags=eTF_None) const
CConstRef< objects::CSeq_id > m_SubjId
const CSplign::TResults m_splign_results
string AsAlignmentText(CRef< objects::CScope > scope, const CSplign::TResults *results=0, size_t line_width=80, int segnum=-1) const
Format alignment as plain text.
CSplign is the central library object for computing spliced cDNA-to-genomic alignments.
Definition: splign.hpp:74
static CVersionAPI & s_GetVersion(void)
Retrieve the library's version object.
Definition: splign.cpp:206
vector< SAlignedCompartment > TResults
Definition: splign.hpp:299
void Print(const CCompactSAMApplication::AlignInfo &ai)
static uch flags
char data[12]
Definition: iconv.c:80
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
const_iterator begin(void) const
Definition: seq_vector.hpp:298
const_iterator end(void) const
Definition: seq_vector.hpp:305
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:1401
TObjectType * GetNonNullPointer(void) const
Get pointer value and throw a null pointer exception if pointer is null.
Definition: ncbiobj.hpp:1654
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
Definition: ncbistr.cpp:2751
static CStringUTF8 AsUTF8(const CTempString &src, EEncoding encoding, EValidate validate=eNoValidate)
Convert into UTF8 from a C/C++ string.
Definition: ncbistr.hpp:3889
@ eEncoding_UTF8
Definition: ncbistr.hpp:201
@ fVersionInfo
Print version info.
TData & SetData(void)
Assign a value to Data data member.
void SetLabel(TLabel &value)
Assign a value to Label data member.
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CScore > > Tdata
Definition: Score_set_.hpp:90
void SetProduct_id(TProduct_id &value)
Assign a value to Product_id data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
TExons & SetExons(void)
Assign a value to Exons data member.
void SetProduct_strand(TProduct_strand value)
Assign a value to Product_strand data member.
void SetProduct_length(TProduct_length value)
Assign a value to Product_length data member.
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Seq_align_.hpp:865
list< CRef< CUser_object > > TExt
Definition: Seq_align_.hpp:402
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
TExt & SetExt(void)
Assign a value to Ext data member.
void SetProduct_type(TProduct_type value)
Assign a value to Product_type data member.
list< CRef< CSpliced_exon > > TExons
void SetPoly_a(TPoly_a value)
Assign a value to Poly_a data member.
void SetGenomic_id(TGenomic_id &value)
Assign a value to Genomic_id data member.
list< CRef< CSpliced_exon_chunk > > TParts
void SetGenomic_strand(TGenomic_strand value)
Assign a value to Genomic_strand data member.
list< CRef< CSeq_align > > Tdata
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
int i
EIPRangeType t
Definition: ncbi_localip.c:101
int isspace(Uchar c)
Definition: ncbictype.hpp:69
T positive(T x_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
USING_SCOPE(objects)
CRef< CSpliced_exon_chunk > CreateSplicedExonChunk(char cur, size_t count)
void MakeLeftHeader(size_t x, string *ps)
bool PIsSpace(char c)
const char g_msg_UnknownTranscriptSymbol[]
Definition: messages.hpp:35
const value_slice::CValueConvert< value_slice::SRunTimeCP, FROM > Convert(const FROM &value)
Modified on Wed Apr 17 13:08:07 2024 by modify_doxy.py rev. 669887