NCBI C++ ToolKit
tabular_fmt.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: tabular_fmt.cpp 101530 2023-12-27 15:15:01Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio, Wratko Hlavina, Eyal Mozes
27  *
28  * File Description:
29  * Sample for the command-line arguments' processing ("ncbiargs.[ch]pp"):
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 
35 #include <util/xregexp/regexp.hpp>
36 #include <util/range_coll.hpp>
37 #include <objmgr/feat_ci.hpp>
38 #include <objmgr/seqdesc_ci.hpp>
46 
47 #include <objmgr/seqdesc_ci.hpp>
51 #include <objects/seq/MolInfo.hpp>
57 
59 
63 
64 #include <util/value_convert.hpp>
65 
66 #include <limits>
67 
70 
71 
72 /////////////////////////////////////////////////////////////////////////////
73 
75 : m_Row(row)
76 {
77 }
78 
80 {
81  if (m_Row == 0) {
82  ostr << "query";
83  } else if (m_Row == 1) {
84  ostr << "subject";
85  } else {
87  "only pairwise alignments are supported");
88  }
89 }
90 
92 {
93  ostr << "All ";
94  PrintHeader(ostr);
95  ostr << " Seq-id(s), separated by a ';'";
96 }
97 
99  const CSeq_align& align)
100 {
101  CSeq_id_Handle idh =
103  CScope::TIds ids = m_Scores->GetScope().GetIds(idh);
104  ITERATE (CScope::TIds, it, ids) {
105  ostr << *it;
106  CScope::TIds::const_iterator i = it;
107  ++i;
108  if (i != ids.end()) {
109  ostr << ';';
110  }
111  }
112 }
113 
114 /////////////////////////////////////////////////////////////////////////////
115 
117  sequence::EGetIdType id_type,
118  bool tag_only,
119  bool protein)
120 : m_Row(row)
121 , m_GetIdType(id_type)
122 , m_TagOnly(tag_only)
123 , m_Protein(protein)
124 {
125 }
126 
128 {
129  PrintHeader(ostr);
130  switch (m_GetIdType) {
132  ostr << " accession.version";
133  break;
134 
136  ostr << " GI";
137  break;
138 
140  ostr << " id as it appears in alignment";
141  break;
142 
143  default:
144  NCBI_THROW(CException, eUnknown, "Unimplemented seq-id type");
145  }
146  if (m_TagOnly) {
147  ostr << "; tag only for gnl seq-ids";
148  }
149 
150 }
151 
153 {
154  if (m_Protein) {
155  ostr << "protein";
156  } else if (m_Row == 0) {
157  ostr << "query";
158  } else if (m_Row == 1) {
159  ostr << "subject";
160  } else {
162  "only pairwise alignments are supported");
163  }
165  ostr << " gi";
166  }
167 }
168 
170  const CSeq_align& align)
171 {
172  CSeq_id_Handle idh =
174  if (m_Protein) {
175  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(idh);
176  if (!bsh) {
177  ostr << "NA";
178  return;
179  }
180  CFeat_CI feat_iter(bsh, CSeqFeatData::e_Cdregion);
181  if (!feat_iter) {
182  ostr << "NA";
183  return;
184  }
185  idh = CSeq_id_Handle::GetHandle(*feat_iter->GetProduct().GetId());
186  }
187  CSeq_id_Handle best =
188  sequence::GetId(idh, m_Scores->GetScope(), m_GetIdType);
189  if ( !best ) {
190  best = idh;
191  }
192  if (m_TagOnly) {
193  if (best.GetSeqId()->IsGeneral()) {
194  best.GetSeqId()->GetGeneral().GetTag().AsString(ostr);
195  } else {
196  string acc;
197  best.GetSeqId()->GetLabel(&acc, CSeq_id::eContent);
198  ostr << acc;
199  }
200  } else {
201  ostr << best;
202  }
203 }
204 
205 /////////////////////////////////////////////////////////////////////////////
206 
208 : m_Row(row), m_NoMinus(nominus)
209 {
210 }
211 
213 {
214  ostr << "Start of alignment in ";
215  if (m_Row == 0) {
216  ostr << "query";
217  } else if (m_Row == 1) {
218  ostr << "subject";
219  } else {
221  "only pairwise alignments are supported");
222  }
223 }
224 
226 {
227  if (m_Row == 0) {
228  ostr << "qstart";
229  } else if (m_Row == 1) {
230  ostr << "sstart";
231  } else {
233  "only pairwise alignments are supported");
234  }
235 }
236 
238  const CSeq_align& align)
239 {
240  // determine global flip status
241 
242  if(m_NoMinus) {
243  ostr << align.GetSeqStart(m_Row) + 1;
244  return;
245  }
246 
247  if (m_Row == 0) {
248  TSeqRange r = align.GetSeqRange(m_Row);
249  ostr << min(r.GetFrom(), r.GetTo()) + 1;
250  } else {
251  TSeqPos start = align.GetSeqStart(m_Row);
252  TSeqPos stop = align.GetSeqStop(m_Row);
253 
254  bool qneg = (align.GetSeqStrand(0) == eNa_strand_minus);
255  bool sneg = (align.GetSeqStrand(1) == eNa_strand_minus);
256 
257  if (qneg) {
258  sneg = !sneg;
259  }
260  if (sneg) {
261  std::swap(start, stop);
262  }
263 
264  ostr << start + 1;
265  }
266 }
267 
268 
269 /////////////////////////////////////////////////////////////////////////////
270 
272 : m_Row(row), m_NoMinus(nominus)
273 {
274 }
275 
277 {
278  ostr << "End of alignment in ";
279  if (m_Row == 0) {
280  ostr << "query";
281  } else if (m_Row == 1) {
282  ostr << "subject";
283  } else {
285  "only pairwise alignments are supported");
286  }
287 }
288 
290 {
291  if (m_Row == 0) {
292  ostr << "qend";
293  } else if (m_Row == 1) {
294  ostr << "send";
295  } else {
297  "only pairwise alignments are supported");
298  }
299 }
300 
302  const CSeq_align& align)
303 {
304  if(m_NoMinus) {
305  ostr << align.GetSeqStop(m_Row) + 1;
306  return;
307  }
308 
309  if (m_Row == 0) {
310  TSeqRange r = align.GetSeqRange(m_Row);
311  ostr << max(r.GetFrom(), r.GetTo()) + 1;
312  } else {
313  TSeqPos start = align.GetSeqStart(m_Row);
314  TSeqPos stop = align.GetSeqStop(m_Row);
315 
316  bool qneg = (align.GetSeqStrand(0) == eNa_strand_minus);
317  bool sneg = (align.GetSeqStrand(1) == eNa_strand_minus);
318 
319  if (qneg) {
320  sneg = !sneg;
321  }
322  if (sneg) {
323  std::swap(start, stop);
324  }
325 
326  ostr << stop + 1;
327  }
328 }
329 
330 /////////////////////////////////////////////////////////////////////////////
331 
333 : m_Row(row)
334 {
335 }
336 
338 {
339  ostr << "Strand of alignment in ";
340  if (m_Row == 0) {
341  ostr << "query";
342  } else if (m_Row == 1) {
343  ostr << "subject";
344  } else {
346  "only pairwise alignments are supported");
347  }
348 }
349 
351 {
352  if (m_Row == 0) {
353  ostr << "qstrand";
354  } else if (m_Row == 1) {
355  ostr << "sstrand";
356  } else {
358  "only pairwise alignments are supported");
359  }
360 }
361 
363  const CSeq_align& align)
364 {
365  switch (align.GetSeqStrand(m_Row)) {
366  case eNa_strand_plus:
367  ostr << '+';
368  break;
369 
370  case eNa_strand_minus:
371  ostr << '-';
372  break;
373 
374  case eNa_strand_both:
375  ostr << 'b';
376  break;
377 
378  default:
379  ostr << '?';
380  break;
381  }
382 }
383 
384 /////////////////////////////////////////////////////////////////////////////
385 
387 : m_Row(row)
388 {
389 }
390 
392 {
393  ostr << "Length of ";
394  if (m_Row == 0) {
395  ostr << "query";
396  } else if (m_Row == 1) {
397  ostr << "subject";
398  } else {
400  "only pairwise alignments are supported");
401  }
402  ostr << " sequence";
403 }
404 
406 {
407  if (m_Row == 0) {
408  ostr << "qlen";
409  } else if (m_Row == 1) {
410  ostr << "slen";
411  } else {
413  "only pairwise alignments are supported");
414  }
415 }
416 
418  const CSeq_align& align)
419 {
420  double score =
421  m_Scores->GetScore(align,
422  m_Row == 0 ? "query_length" : "subject_length");
423  if (score == numeric_limits<double>::quiet_NaN()) {
424  score = 0;
425  }
426  ostr << (int) score;
427 }
428 
429 
430 /////////////////////////////////////////////////////////////////////////////
431 
433 {
434  ostr << "Alignment length";
435 }
436 
438 {
439  ostr << "length";
440 }
441 
443  const CSeq_align& align)
444 {
445  ostr << (int)m_Scores->GetScore(align, "align_length");
446 }
447 
448 /////////////////////////////////////////////////////////////////////////////
449 
451 {
452  ostr << "Alignment length not counting gaps";
453 }
454 
456 {
457  ostr << "length_ungap";
458 }
459 
461  const CSeq_align& align)
462 {
463  ostr << (int)m_Scores->GetScore(align, "align_length_ungap");
464 }
465 
466 //////////////////////////////////////////////////////////////////////////////
467 
469 : m_Gapped(gapped)
470 {
471 }
472 
474 {
475  ostr << "Percentage of identical matches";
476  if (!m_Gapped) {
477  ostr << " excluding gaps on either row";
478  }
479 }
480 
482 {
483  ostr << "pident";
484  if (m_Gapped) {
485  ostr << "(gapped)";
486  } else {
487  ostr << "(ungapped)";
488  }
489 }
490 
492  const CSeq_align& align)
493 {
494  double pct_id = m_Scores->GetScore(align,
495  m_Gapped ? "pct_identity_gap"
496  : "pct_identity_ungap");
497  if (pct_id != 100) {
498  pct_id = min(pct_id, 99.99);
499  }
500  ostr << pct_id;
501 }
502 
503 /////////////////////////////////////////////////////////////////////////////
504 
506 {
507  ostr << (m_Row == 0 ? "Percent coverage of query in subject"
508  : "Percent coverage of subject in query");
509 }
510 
512 {
513  ostr << m_Header;
514 }
515 
517  const CSeq_align& align)
518 {
519  double pct_cov = m_Scores->GetScore(align, m_Row == 0
520  ? "pct_coverage" : "subject_coverage");
521  if (pct_cov != 100) {
522  pct_cov = min(pct_cov, 99.99);
523  }
524  ostr << pct_cov;
525 }
526 
527 /////////////////////////////////////////////////////////////////////////////
528 
530 {
531  ostr << "Number of gap openings";
532 }
533 
535 {
536  ostr << "gapopen";
537 }
539  const CSeq_align& align)
540 {
541  ostr << align.GetNumGapOpenings();
542 }
543 
544 /////////////////////////////////////////////////////////////////////////////
545 
547 {
548  ostr << "Number of identical matches";
549 }
551 {
552  ostr << "identities";
553 }
555  const CSeq_align& align)
556 {
557  ostr << (int)m_Scores->GetScore(align, "num_ident");
558 }
559 
560 /////////////////////////////////////////////////////////////////////////////
561 
563 {
564  ostr << "Number of mismatches";
565 }
567 {
568  ostr << "mismatch";
569 }
571  const CSeq_align& align)
572 {
573  ostr << (int)m_Scores->GetScore(align, "num_mismatch");
574 }
575 
576 /////////////////////////////////////////////////////////////////////////////
577 
578 void s_AlignToSeqRanges(const CSeq_align& align, int row, list<TSeqRange>& ranges)
579 {
580  // this should be added to CSeq_align as a list instead of RangeColl version of GetAlignedBases
581  switch (align.GetSegs().Which()) {
583  {{
584  const CDense_seg& ds = align.GetSegs().GetDenseg();
585  for (CDense_seg::TNumseg i = 0; i < ds.GetNumseg(); ++i) {
586  bool is_gapped = false;
587  for (CDense_seg::TDim j = 0; j < ds.GetDim(); ++j) {
588  if (ds.GetStarts()[i * ds.GetDim() + j] == -1)
589  {
590  is_gapped = true;
591  break;
592  }
593  }
594  if (!is_gapped) {
595  TSignedSeqPos start = ds.GetStarts()[i * ds.GetDim() + row];
597  range.SetFrom(start);
598  range.SetLength(ds.GetLens()[i]);
599  ranges.push_back(range);
600  }
601  }
602  }}
603  break;
605  {{
607  align.GetSegs().GetDisc().Get()) {
608  s_AlignToSeqRanges(*(*iter), row, ranges);
609  }
610  }}
611  break;
612  default:
613  NCBI_THROW(CSeqalignException, eUnsupported,
614  "smismatchpos and qmismatchpos currently do not handle "
615  "this type of alignment.");
616  }
617 }
618 
620  : m_Row(row)
621 {
622 }
623 
625 {
626  ostr << "Positions of aligned mismatches, comma seperated";
627 }
629 {
630  if(m_Row == 0) {
631  ostr << "qmismatchpos";
632  } else if(m_Row == 1) {
633  ostr << "smismatchpos";
634  } else {
636  "only pairwise alignments are supported");
637  }
638 }
640  const CSeq_align& align)
641 {
642  ENa_strand QStrand = align.GetSeqStrand(0);
643  ENa_strand SStrand = align.GetSeqStrand(1);
644  vector<TSeqPos> mm_pos;
645 
646  if (align.GetSegs().IsSpliced()) {
647  /// Special handling for Spliced-seg, since mismatch location is already in
648  /// the alignment
649  if (align.GetSegs().GetSpliced().GetProduct_type() ==
651  {
652  NCBI_THROW(CException, eUnknown, "smismatchpos and qmismatchpos not "
653  "supported for protein alignments");
654  }
655 
656  for (const CRef<CSpliced_exon> &exon : align.GetSegs().GetSpliced().GetExons()) {
657  if (!exon->IsSetParts()) {
658  continue;
659  }
660  ENa_strand exon_qstrand = exon->IsSetProduct_strand()
661  ? exon->GetProduct_strand() : QStrand;
662  ENa_strand exon_sstrand = exon->IsSetGenomic_strand()
663  ? exon->GetGenomic_strand() : SStrand;
664  ENa_strand strand = m_Row == 0 ? exon_qstrand : exon_sstrand;
665  TSeqPos qpos = exon_qstrand == eNa_strand_plus ? exon->GetProduct_start().GetNucpos()
666  : exon->GetProduct_end().GetNucpos();
667  TSeqPos spos = exon_sstrand == eNa_strand_plus ? exon->GetGenomic_start()
668  : exon->GetGenomic_end();
669  TSeqPos pos = m_Row == 0 ? qpos : spos;
670  int direction = strand == eNa_strand_plus ? 1 : -1;
671  for (const CRef<CSpliced_exon_chunk> &part : exon->GetParts()) {
672  switch (part->Which()) {
674  pos += direction * part->GetMatch();
675  break;
676 
678  for (unsigned i = 0; i < part->GetMismatch(); ++i) {
679  mm_pos.push_back(pos);
680  pos += direction;
681  }
682  break;
683 
685  if (m_Row == 0) {
686  pos += direction * part->GetProduct_ins();
687  }
688  break;
689 
691  if (m_Row == 1) {
692  pos += direction * part->GetGenomic_ins();
693  }
694  break;
695 
696  default:
697  NCBI_THROW(CException, eUnknown, "smismatchpos and qmismatchpos not "
698  "supported for alignments with diag");
699  }
700  }
701  }
702  } else {
703 
704  TSeqRange QAlignRange, SAlignRange;
705  QAlignRange = align.GetSeqRange(0);
706  SAlignRange = align.GetSeqRange(1);
707 
708  string QueryStr, SubjtStr;
709  {{
710  CBioseq_Handle QueryH, SubjtH;
711  QueryH = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(0));
712  SubjtH = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(1));
713 
714  CSeqVector QueryVec(QueryH, CBioseq_Handle::eCoding_Iupac, QStrand);
715  CSeqVector SubjtVec(SubjtH, CBioseq_Handle::eCoding_Iupac, SStrand);
716 
717  if(QStrand == eNa_strand_plus)
718  QueryVec.GetSeqData(QAlignRange.GetFrom(), QAlignRange.GetTo()+1, QueryStr);
719  else if(QStrand == eNa_strand_minus)
720  QueryVec.GetSeqData(QueryVec.size()-QAlignRange.GetTo()-1,
721  QueryVec.size()-QAlignRange.GetFrom(), QueryStr);
722 
723  if(SStrand == eNa_strand_plus)
724  SubjtVec.GetSeqData(SAlignRange.GetFrom(), SAlignRange.GetTo()+1, SubjtStr);
725  else if(SStrand == eNa_strand_minus)
726  SubjtVec.GetSeqData(SubjtVec.size()-SAlignRange.GetTo()-1,
727  SubjtVec.size()-SAlignRange.GetFrom(), SubjtStr);
728 
729  string QS = QueryStr.substr(0,50);
730  string SS = SubjtStr.substr(0,50);
731  }}
732 
733  list<TSeqRange> QSegRanges, SSegRanges;
734  s_AlignToSeqRanges(align, 0, QSegRanges);
735  s_AlignToSeqRanges(align, 1, SSegRanges);
736 
737 
738  // loop segments
739  list<TSeqRange>::const_iterator SSegIter = SSegRanges.begin();
740  ITERATE(list<TSeqRange>, QSegIter, QSegRanges) {
741  TSeqRange QuerySeg = *QSegIter;
742  TSeqRange SubjtSeg = *SSegIter;
743 
744  if(QuerySeg.GetLength() != SubjtSeg.GetLength()) {
745  NCBI_THROW(CException, eUnknown, "mismatched segment sizes?");
746  }
747 
748  size_t QOffset, SOffset;
749  {{
750  size_t QPOffset = QuerySeg.GetFrom()-QAlignRange.GetFrom();
751  size_t QMOffset = QAlignRange.GetTo()-QuerySeg.GetTo();
752  QOffset = (QStrand == eNa_strand_plus ? QPOffset : QMOffset);
753 
754  size_t SPOffset = SubjtSeg.GetFrom()-SAlignRange.GetFrom();
755  size_t SMOffset = SAlignRange.GetTo()-SubjtSeg.GetTo();
756  SOffset = (SStrand == eNa_strand_plus ? SPOffset : SMOffset);
757  }}
758 
759  // find locations
760  for(unsigned Loop = 0; Loop < QuerySeg.GetLength(); Loop++) {
761  size_t QLoop = QOffset+Loop;
762  size_t SLoop = SOffset+Loop;
763 
764  //if(Loop < 6 || Loop+6 > QuerySeg.GetLength())
765  // cerr << "L: " << Loop << " " << QLoop << " " << SLoop << " : "
766  // << QueryStr[QLoop] << " == " << SubjtStr[SLoop] << endl;
767 
768 
769  if(QueryStr[QLoop] == SubjtStr[SLoop]) {
770  ;
771  } else {
772  if(m_Row == 0)
773  mm_pos.push_back(QStrand == eNa_strand_plus
774  ? QuerySeg.GetFrom()+Loop : QuerySeg.GetTo()-Loop);
775  else if(m_Row == 1)
776  mm_pos.push_back(SStrand == eNa_strand_plus
777  ? SubjtSeg.GetFrom()+Loop : SubjtSeg.GetTo()-Loop);
778  }
779  }
780 
781  ++SSegIter;
782  }
783  }
784 
785  sort(mm_pos.begin(), mm_pos.end());
786  ITERATE(vector<TSeqPos>, it, mm_pos) {
787  if (it != mm_pos.begin()) {
788  ostr << ',';
789  }
790  ostr << *it +1;
791  }
792 }
793 
794 /////////////////////////////////////////////////////////////////////////////
795 
797  : m_Row(row)
798 {
799 }
800 
802 {
803  ostr << "Positions of gapped, unaligned, segments, comma seperated";
804 }
806 {
807  if(m_Row == 0) {
808  ostr << "qgapranges";
809  } else if(m_Row == 1) {
810  ostr << "sgapranges";
811  } else {
813  "only pairwise alignments are supported");
814  }
815 }
817  const CSeq_align& align)
818 {
820  CRangeCollection<TSeqPos> GappedRC;
821  GappedRC += align.GetSeqRange(m_Row);
822  GappedRC -= AlignedRC;
823 
824  //vector<TSeqPos> mm_pos;
825  //sort(mm_pos.begin(), mm_pos.end());
826  ITERATE(CRangeCollection<TSeqPos>, it, GappedRC) {
827  if (it != GappedRC.begin()) {
828  ostr << ',';
829  }
830  ostr << it->GetFrom()+1 << "-" << it->GetTo()+1;
831  }
832 }
833 
834 /////////////////////////////////////////////////////////////////////////////
835 
837 {
838  ostr << "Total number of gaps";
839 }
841 {
842  ostr << "gaps";
843 }
845  const CSeq_align& align)
846 {
847  ostr << align.GetTotalGapCount();
848 }
849 
850 /////////////////////////////////////////////////////////////////////////////
851 
853 {
854  ostr << "Expect value";
855 }
857 {
858  ostr << "evalue";
859 }
861  const CSeq_align& align)
862 {
863  double score = m_Scores->GetScore(align, "e_value");
864  if (score == numeric_limits<double>::infinity() ||
865  score == numeric_limits<double>::quiet_NaN()) {
866  score = 0;
867  }
868  if (score > 1e26) {
869  score = 0;
870  }
871  if (score < -1e26) {
872  score = 0;
873  }
874 
875  //get the current flags
876  ios_base::fmtflags cur_flags=ostr.flags();
877 
878  //print using scientific
879  ostr << scientific << score;
880 
881  //unset scientific
882  ostr.unsetf(ios_base::scientific);
883 
884  //reset to original flags
885  ostr << setiosflags(cur_flags);
886 }
887 
888 
889 /////////////////////////////////////////////////////////////////////////////
890 
892 {
893  ostr << "Expect value in mantissa format";
894 }
896 {
897  ostr << "evalue_mantissa";
898 }
900  const CSeq_align& align)
901 {
902  double score = 0;
903  if ( !align.GetNamedScore(CSeq_align::eScore_EValue, score) ) {
904  score = m_Scores->GetScore(align, "e_value");
905  }
906  if (score == numeric_limits<double>::infinity() ||
907  score == numeric_limits<double>::quiet_NaN()) {
908  score = 0;
909  }
910  if (score > 1e26) {
911  score = 0;
912  }
913  if (score < -1e26) {
914  score = 0;
915  }
916 
917  double mantissa = score;
918 
919  if(score > 0.0) {
920  while(mantissa >= 10.0) {
921  mantissa /= 10.0;
922  }
923  while(mantissa < 1.0) {
924  mantissa *= 10.0;
925  }
926  } else if(score < 0.0) {
927  while(mantissa <= -10.0) {
928  mantissa /= 10.0;
929  }
930  while(mantissa > -1.0) {
931  mantissa *= 10.0;
932  }
933  }
934 
935  ostr << mantissa;
936 }
937 
938 
939 /////////////////////////////////////////////////////////////////////////////
940 
942 {
943  ostr << "Expect value in exponent format";
944 }
946 {
947  ostr << "evalue_exponent";
948 }
950  const CSeq_align& align)
951 {
952  double score = 0;
953  if ( !align.GetNamedScore(CSeq_align::eScore_EValue, score) ) {
954  score = m_Scores->GetScore(align, "e_value");
955  }
956  if (score == numeric_limits<double>::infinity() ||
957  score == numeric_limits<double>::quiet_NaN()) {
958  score = 0;
959  }
960  if (score > 1e26) {
961  score = 0;
962  }
963  if (score < -1e26) {
964  score = 0;
965  }
966 
967  double mantissa = score;
968  int exponent = 0;
969 
970 
971  if(score > 0.0) {
972  while(mantissa >= 10.0) {
973  mantissa /= 10.0;
974  exponent++;
975  }
976  while(mantissa < 1.0) {
977  mantissa *= 10.0;
978  exponent--;
979  }
980  } else if(score < 0.0) {
981  while(mantissa <= -10.0) {
982  mantissa /= 10.0;
983  exponent--;
984  }
985  while(mantissa > -1.0) {
986  mantissa *= 10.0;
987  exponent++;
988  }
989  }
990 
991  ostr << exponent;
992 }
993 
994 
995 
996 
997 
998 /////////////////////////////////////////////////////////////////////////////
999 
1001 {
1002  ostr << "Bit score";
1003 }
1005 {
1006  ostr << "bitscore";
1007 }
1009  const CSeq_align& align)
1010 {
1011  double score = m_Scores->GetScore(align, "bit_score");
1012  ostr << score;
1013 }
1014 
1015 /////////////////////////////////////////////////////////////////////////////
1016 
1018 {
1019  ostr << "Raw score";
1020 }
1022 {
1023  ostr << "score";
1024 }
1026  const CSeq_align& align)
1027 {
1028  double score = m_Scores->GetScore(align, "score");
1029  ostr << score;
1030 }
1031 
1032 /////////////////////////////////////////////////////////////////////////////
1033 
1034 /// formatter for dumping any score in an alignment
1036  const string& col_name)
1037  : m_ScoreName(score_name)
1038  , m_ColName(col_name)
1039 {
1040 }
1041 
1042 
1044 {
1045  ostr << m_Scores->HelpText(m_ScoreName);
1046 }
1047 
1049 {
1050  ostr << m_ScoreName;
1051 }
1052 
1053 
1055  const CSeq_align& align)
1056 {
1057  double score_d=0.0;
1058  int score_i = 0;
1059  bool is_int = m_Scores->IsIntegerScore(align, m_ScoreName);
1060  try {
1061  if(is_int)
1062  score_i = (int)m_Scores->GetScore(align, m_ScoreName);
1063  else
1064  score_d = m_Scores->GetScore(align, m_ScoreName);
1065  } catch (CException &) {
1066  score_d = 0;
1067  score_i = 0;
1068  }
1069  if(is_int)
1070  ostr << score_i;
1071  else
1072  ostr << score_d;
1073 }
1074 
1075 
1076 /////////////////////////////////////////////////////////////////////////////
1077 
1079 {
1080  ostr << "Entropy value for the "
1081  << (m_Row == 0 ? "query " : "subject ")
1082  << "sequence";
1083 }
1084 
1085 
1087 {
1088  ostr
1089  << (m_Row == 0 ? "query_" : "subject_")
1090  << "entropy";
1091 }
1092 
1093 
1095  const objects::CSeq_align& align)
1096 {
1097  string score_name =
1098  (m_Row == 0 ? "query_" : "subject_") +
1099  string("entropy");
1100  double val = 0;
1101  if (m_Scores) {
1102  val = m_Scores->GetScore(align, score_name);
1103  }
1104  ostr << val;
1105 }
1106 
1107 
1108 /////////////////////////////////////////////////////////////////////////////
1109 
1111 {
1112  ostr << "Entropy value for the "
1113  << (m_Row == 0 ? "query " : "subject ")
1114  << "sequence";
1115 }
1116 
1117 
1119 {
1120  ostr
1121  << (m_Row == 0 ? "query_" : "subject_")
1122  << "seg_pct";
1123 }
1124 
1125 
1127  const objects::CSeq_align& align)
1128 {
1129  string score_name =
1130  (m_Row == 0 ? "query_" : "subject_") +
1131  string("seg_pct");
1132  double val = 0;
1133  if (m_Scores) {
1134  val = m_Scores->GetScore(align, score_name);
1135  }
1136  ostr << val;
1137 }
1138 
1139 
1140 /////////////////////////////////////////////////////////////////////////////
1141 
1143  : m_Row(row)
1144 {
1145 }
1146 
1147 
1149 {
1150  ostr << "Defline of the ";
1151  if (m_Row == 0) {
1152  ostr << "query";
1153  } else if (m_Row == 1) {
1154  ostr << "subject";
1155  } else {
1157  "only pairwise alignments are supported");
1158  }
1159  ostr << " sequence";
1160 }
1161 
1162 
1164 {
1165  if (m_Row == 0) {
1166  ostr << "qdefline";
1167  } else if (m_Row == 1) {
1168  ostr << "sdefline";
1169  } else {
1171  "only pairwise alignments are supported");
1172  }
1173 }
1174 
1175 
1177  const CSeq_align& align)
1178 {
1179  if (m_Row >= align.CheckNumRows()) {
1181  "indexing past the end of available "
1182  "sequences in an alignment");
1183  }
1184 
1186  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(idh);
1187  if (bsh) {
1188  ostr << generator.GenerateDefline(bsh);
1189  }
1190 }
1191 
1192 /////////////////////////////////////////////////////////////////////////////
1193 
1195 {
1196  ostr << "Alignment ids";
1197 }
1198 
1199 
1201 {
1202  ostr << "align_ids";
1203 }
1204 
1205 
1207  const CSeq_align& align)
1208 {
1209  if (align.IsSetId()) {
1210  bool first = true;
1211  ITERATE (CSeq_align::TId, it, align.GetId()) {
1212  if ( !first ) {
1213  ostr << ',';
1214  }
1215  if ((*it)->IsId()) {
1216  ostr << (*it)->GetId();
1217  }
1218  else if ((*it)->IsStr()) {
1219  ostr << (*it)->GetStr();
1220  }
1221  }
1222  }
1223 }
1224 
1225 /////////////////////////////////////////////////////////////////////////////
1226 
1228 {
1229  ostr << "best_placement group id";
1230 }
1231 
1233 {
1234  ostr << "best_placement_group";
1235 }
1236 
1237 
1239  const CSeq_align& align)
1240 {
1241  if (align.IsSetExt()) {
1242  ITERATE (CSeq_align::TExt, i, align.GetExt()) {
1243  const CUser_object& obj = **i;
1244  if (!obj.GetType().IsStr() ||
1245  obj.GetType().GetStr() != "placement_data") {
1246  continue;
1247  }
1248 
1249  CConstRef<CUser_field> f = obj.GetFieldRef("placement_id");
1250  if (f) {
1251  ostr << f->GetData().GetStr();
1252  break;
1253  }
1254  }
1255  }
1256 }
1257 
1258 /////////////////////////////////////////////////////////////////////////////
1259 
1261  : m_Row(row)
1262 {
1263 }
1264 
1265 
1267 {
1268  ostr << "Prot-ref of the ";
1269  if (m_Row == 0) {
1270  ostr << "query";
1271  } else if (m_Row == 1) {
1272  ostr << "subject";
1273  } else {
1275  "only pairwise alignments are supported");
1276  }
1277  ostr << " sequence";
1278 }
1279 
1280 
1282 {
1283  if (m_Row == 0) {
1284  ostr << "qprotref";
1285  } else if (m_Row == 1) {
1286  ostr << "sprotref";
1287  } else {
1289  "only pairwise alignments are supported");
1290  }
1291 }
1292 
1293 
1295  const CSeq_align& align)
1296 {
1297  if (m_Row >= align.CheckNumRows()) {
1299  "indexing past the end of available "
1300  "sequences in an alignment");
1301  }
1302 
1304  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(idh);
1305  if (bsh) {
1306  SAnnotSelector sel;
1307  sel.SetResolveTSE()
1309  CFeat_CI feat_iter(bsh, sel);
1310  if (feat_iter.GetSize() == 1) {
1311  const CProt_ref& ref = feat_iter->GetData().GetProt();
1312  string s;
1313  ref.GetLabel(&s);
1314  ostr << s;
1315  }
1316  }
1317 }
1318 
1319 /////////////////////////////////////////////////////////////////////////////
1320 
1321 
1323 {
1324  ostr << "Dump the ";
1325  switch (m_Interval) {
1326  case e_Exons:
1327  ostr << "exon";
1328  break;
1329 
1330  case e_Introns:
1331  ostr << (m_Sequence == 0 ? "unaligned segment" : "intron");
1332  break;
1333  }
1334 
1335  switch (m_Info) {
1336  case e_Range:
1337  ostr << " structure";
1338  break;
1339 
1340  case e_Length:
1341  ostr << " lengths";
1342  break;
1343  }
1344 
1345  if (m_Sequence == 0) {
1346  ostr << " for the query sequence";
1347  }
1348 
1349  ostr << " of a Spliced-seg alignment";
1350 }
1351 
1353 {
1354  if (m_Sequence == 0) {
1355  ostr << "query_";
1356  }
1357  switch (m_Interval) {
1358  case e_Exons:
1359  switch (m_Info) {
1360  case e_Range:
1361  ostr << "exons";
1362  break;
1363 
1364  case e_Length:
1365  ostr << "exon_len";
1366  break;
1367  }
1368  break;
1369 
1370  case e_Introns:
1371  switch (m_Info) {
1372  case e_Range:
1373  ostr << (m_Sequence == 0 ? "unaligned" : "introns");
1374  break;
1375 
1376  case e_Length:
1377  ostr << (m_Sequence == 0 ? "unaligned_len" : "intron_len");
1378  break;
1379  }
1380  break;
1381 
1382  }
1383 }
1384 
1386  const CSeq_align& align)
1387 {
1388  if (align.GetSegs().IsSpliced()) {
1389  bool is_protein = m_Sequence == 0 &&
1390  align.GetSegs().GetSpliced().GetProduct_type() ==
1392  if (is_protein && (m_Interval == e_Introns || m_Info == e_Length)) {
1393  CNcbiOstrstream column_name;
1394  PrintHeader(column_name);
1396  string(CNcbiOstrstreamToString(column_name))
1397  + " not supported for protein alignments");
1398  }
1399 
1400  typedef pair<const CProt_pos*, const CProt_pos*> TProteinExon;
1401  vector<TProteinExon> protein_exons;
1402  vector<TSeqRange> nuc_exons;
1403 
1404  CRangeCollection<TSeqPos> intron_ranges;
1405  if (m_Interval == e_Introns) {
1406  TSeqRange align_range = align.GetSeqRange(m_Sequence);
1407  align_range.SetFrom(align_range.GetFrom()+1);
1408  align_range.SetTo(align_range.GetTo()+1);
1409  intron_ranges += align_range;
1410  }
1411 
1413  align.GetSegs().GetSpliced().GetExons()) {
1414  const CSpliced_exon& exon = **it;
1415  TSeqRange exon_range;
1416  if (is_protein) {
1417  protein_exons.push_back(
1418  TProteinExon(
1419  &exon.GetProduct_start().GetProtpos(),
1420  &exon.GetProduct_end().GetProtpos()));
1421  } else if (m_Sequence == 1) {
1422  exon_range.SetFrom(exon.GetGenomic_start()+1);
1423  exon_range.SetTo(exon.GetGenomic_end()+1);
1424  } else {
1425  exon_range.SetFrom(exon.GetProduct_start().GetNucpos()+1);
1426  exon_range.SetTo(exon.GetProduct_end().GetNucpos()+1);
1427  }
1428  switch (m_Interval) {
1429  case e_Exons:
1430  nuc_exons.push_back(exon_range);
1431  break;
1432 
1433  case e_Introns:
1434  intron_ranges -= exon_range;
1435  break;
1436  }
1437  }
1438  list<TSeqRange> range_list;
1439  if (!nuc_exons.empty()) {
1440  range_list.insert(range_list.end(), nuc_exons.begin(),
1441  nuc_exons.end());
1442  } else if (!intron_ranges.Empty()) {
1443  range_list.insert(range_list.end(), intron_ranges.begin(),
1444  intron_ranges.end());
1445  if(m_Sequence == 1 &&
1446  (align.GetSeqStrand(0) == eNa_strand_minus ||
1447  align.GetSeqStrand(1) == eNa_strand_minus))
1448  {
1449  range_list.reverse();
1450  }
1451  }
1452  ostr << '[';
1453  if (is_protein) {
1454  ITERATE (vector<TProteinExon>, it, protein_exons) {
1455  if (it != protein_exons.begin()) {
1456  ostr << ',';
1457  }
1458 
1459  ostr << '(' << it->first->GetAmin()+1
1460  << '/' << it->first->GetFrame()
1461  << ".." << it->second->GetAmin()+1
1462  << '/' << it->second->GetFrame() << ')';
1463  }
1464  } else {
1465  ITERATE (list<TSeqRange>, it, range_list) {
1466  if (it != range_list.begin()) {
1467  ostr << ',';
1468  }
1469 
1470  switch (m_Info) {
1471  case e_Range:
1472  ostr << '('
1473  << it->GetFrom()
1474  << ".."
1475  << it->GetTo()
1476  << ')';
1477  break;
1478 
1479  case e_Length:
1480  ostr << it->GetLength();
1481  break;
1482  }
1483  }
1484  }
1485  ostr << ']';
1486  }
1487 }
1488 
1489 /////////////////////////////////////////////////////////////////////////////
1490 
1492  : m_Row(row)
1493 {
1494 }
1495 
1496 
1498 {
1499  ostr << "Taxid of the ";
1500  if (m_Row == 0) {
1501  ostr << "query";
1502  } else if (m_Row == 1) {
1503  ostr << "subject";
1504  } else {
1506  "only pairwise alignments are supported");
1507  }
1508  ostr << " sequence";
1509 }
1510 
1512 {
1513  if (m_Row == 0) {
1514  ostr << "qtaxid";
1515  } else if (m_Row == 1) {
1516  ostr << "staxid";
1517  } else {
1519  "only pairwise alignments are supported");
1520  }
1521 }
1522 
1524  const CSeq_align& align)
1525 {
1526  if (m_Row >= align.CheckNumRows()) {
1528  "indexing past the end of available "
1529  "sequences in an alignment");
1530  }
1531 
1532  ostr << (int)m_Scores->GetScore(align, m_Row == 0 ? "query_taxid"
1533  : "subject_taxid");
1534 }
1535 
1536 
1537 /////////////////////////////////////////////////////////////////////////////
1538 
1540  : m_Row(row)
1541  , m_Prefix(prefix)
1542 {
1543 }
1544 
1545 
1547 {
1548  ostr << m_Prefix << " of the ";
1549  if (m_Row == 0) {
1550  ostr << "query";
1551  } else if (m_Row == 1) {
1552  ostr << "subject";
1553  } else {
1555  "only pairwise alignments are supported");
1556  }
1557  ostr << " sequence";
1558 }
1559 
1561 {
1562  if (m_Row == 0) {
1563  ostr << "query ";
1564  } else if (m_Row == 1) {
1565  ostr << "subject ";
1566  } else {
1568  "only pairwise alignments are supported");
1569  }
1570  ostr << m_Prefix;
1571 }
1572 
1574  const CSeq_align& align)
1575 {
1576  if (m_Row >= align.CheckNumRows()) {
1578  "indexing past the end of available "
1579  "sequences in an alignment");
1580  }
1581 
1582  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(
1583  align.GetSeq_id(m_Row));
1584  if (!bsh) {
1585  ostr << "NA";
1586  return;
1587  }
1588  for (CSeqdesc_CI desc_iter(bsh, CSeqdesc::e_Comment);
1589  desc_iter; ++desc_iter)
1590  {
1591  if (NStr::StartsWith(desc_iter->GetComment(),
1592  m_Prefix + ": ", NStr::eNocase))
1593  {
1594  ostr << desc_iter->GetComment().substr(m_Prefix.size() + 2);
1595  return;
1596  }
1597  }
1598  ostr << "NA";
1599 }
1600 
1601 
1602 /////////////////////////////////////////////////////////////////////////////
1603 
1605  : m_Row(row)
1606  , m_Field(field)
1607 {
1608 }
1609 
1610 
1612 {
1613 }
1614 
1615 
1617 {
1618  switch (m_Field) {
1619  case eFullTaxName:
1620  ostr << "Full taxname of the ";
1621  break;
1622 
1623  case eSpecies:
1624  ostr << "Species name of the ";
1625  break;
1626 
1627  case eGenus:
1628  ostr << "Genus name of the ";
1629  break;
1630 
1631  case eKingdom:
1632  ostr << "Kingdom name of the ";
1633  break;
1634  }
1635 
1636  switch (m_Row) {
1637  case 0: ostr << "query"; break;
1638  case 1: ostr << "subject"; break;
1639  default:
1641  "only pairwise alignments are supported");
1642  }
1643  ostr << " sequence";
1644 }
1645 
1647 {
1648  switch (m_Row) {
1649  case 0: ostr << "q"; break;
1650  case 1: ostr << "s"; break;
1651  default:
1653  "only pairwise alignments are supported");
1654  }
1655 
1656  switch (m_Field) {
1657  case eFullTaxName: ostr << "taxname"; break;
1658  case eSpecies: ostr << "species"; break;
1659  case eGenus: ostr << "genus"; break;
1660  case eKingdom: ostr << "kingdom"; break;
1661  }
1662 }
1663 
1665  const CSeq_align& align)
1666 {
1667  if (m_Row >= align.CheckNumRows()) {
1669  "indexing past the end of available "
1670  "sequences in an alignment");
1671  }
1672 
1673  if (!m_Taxon1.get()) {
1674  m_Taxon1.reset(new CTaxon1);
1675  m_Taxon1->Init(100000);
1676  }
1677 
1678  TTaxId taxid = TAX_ID_FROM(int,
1679  (int)m_Scores->GetScore(align,
1680  m_Row == 0 ? "query_taxid"
1681  : "subject_taxid"));
1682 
1683  switch (m_Field) {
1684  case eSpecies:
1685  taxid = m_Taxon1->GetSpecies(taxid);
1686  break;
1687 
1688  case eGenus:
1689  taxid = m_Taxon1->GetGenus(taxid);
1690  break;
1691 
1692  case eKingdom:
1693  taxid = m_Taxon1->GetSuperkingdom(taxid);
1694  break;
1695 
1696  default:
1697  break;
1698  }
1699 
1700  bool is_species = false;
1701  bool is_uncultured = false;
1702  string blast_name;
1703  CConstRef<COrg_ref> org =
1704  m_Taxon1->GetOrgRef(taxid, is_species, is_uncultured, blast_name);
1705  if (org) {
1706  string label;
1707  org->GetLabel(&label);
1708  ostr << label;
1709  }
1710  else {
1711  ostr << "-";
1712  }
1713 }
1714 
1715 
1716 /////////////////////////////////////////////////////////////////////////////
1718 : m_Row(row)
1719 {
1720 }
1721 
1723 {
1724  ostr << "size of biggest gap";
1725 }
1727 {
1728  if(m_Row == e_All) {
1729  ostr << "biggestgap";
1730  } else if(m_Row == 0) {
1731  ostr << "qbiggestgap";
1732  } else if(m_Row == 1) {
1733  ostr << "sbiggestgap";
1734  } else {
1736  "only pairwise alignments are supported");
1737  }
1738 }
1740  const CSeq_align& align)
1741 {
1742  ostr << x_CalcBiggestGap(align);
1743 }
1744 
1746 {
1747  if(align.GetSegs().IsDisc()) {
1748  TSeqPos Biggest = 0;
1749  ITERATE(CSeq_align_set::Tdata, AlignIter, align.GetSegs().GetDisc().Get()) {
1750  Biggest = max(Biggest, x_CalcBiggestGap(**AlignIter));
1751  }
1752  return Biggest;
1753  } else if(align.GetSegs().IsDenseg()) {
1754  const CDense_seg& Denseg = align.GetSegs().GetDenseg();
1755  TSeqPos Biggest = 0;
1756  for(int Index = 0; Index < Denseg.GetNumseg(); Index++) {
1757  bool QGap = (Denseg.GetStarts()[2*Index] == -1);
1758  bool SGap = (Denseg.GetStarts()[(2*Index)+1] == -1);
1759  if(m_Row == e_All && (QGap || SGap)) {
1760  Biggest = max(Biggest, (TSeqPos)Denseg.GetLens()[Index]);
1761  } else if(m_Row == 0 && QGap) {
1762  Biggest = max(Biggest, (TSeqPos)Denseg.GetLens()[Index]);
1763  } else if(m_Row == 1 && SGap) {
1764  Biggest = max(Biggest, (TSeqPos)Denseg.GetLens()[Index]);
1765  }
1766  }
1767  return Biggest;
1768  } else {
1770  "biggestgap is only supported for Dense-sef and Disc alignments");
1771  }
1772 }
1773 
1774 /////////////////////////////////////////////////////////////////////////////
1776 : m_Row(row)
1777 {
1778 }
1779 
1781 {
1782  ostr << "If ";
1783  if (m_Row == 0) {
1784  ostr << "query";
1785  } else if (m_Row == 1) {
1786  ostr << "subject";
1787  } else {
1789  "only pairwise alignments are supported");
1790  }
1791  ostr << " has a chromosome, its name";
1792 }
1793 
1795 {
1796  if (m_Row == 0) {
1797  ostr << "qchrom";
1798  } else if (m_Row == 1) {
1799  ostr << "schrom";
1800  } else {
1802  "only pairwise alignments are supported");
1803  }
1804 }
1805 
1807  const CSeq_align& align)
1808 {
1809  CBioseq_Handle Handle = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(m_Row));
1810 
1811  string Chrom = "";
1812 
1814  while(Iter) {
1815  const CBioSource& BioSource = Iter->GetSource();
1816  if(BioSource.CanGetSubtype()) {
1817  ITERATE(CBioSource::TSubtype, SubIter, BioSource.GetSubtype()) {
1818  if( (*SubIter)->CanGetSubtype() &&
1819  (*SubIter)->GetSubtype() == CSubSource::eSubtype_chromosome &&
1820  (*SubIter)->CanGetName() ) {
1821  Chrom = (*SubIter)->GetName();
1822  }
1823  }
1824  }
1825  ++Iter;
1826  }
1827 
1828  ostr << Chrom;
1829 }
1830 
1831 /////////////////////////////////////////////////////////////////////////////
1833 : m_Row(row)
1834 {
1835 }
1836 
1838 {
1839  ostr << "If ";
1840  if (m_Row == 0) {
1841  ostr << "query";
1842  } else if (m_Row == 1) {
1843  ostr << "subject";
1844  } else {
1846  "only pairwise alignments are supported");
1847  }
1848  ostr << " has a clone, its name";
1849 }
1850 
1852 {
1853  if (m_Row == 0) {
1854  ostr << "qclone";
1855  } else if (m_Row == 1) {
1856  ostr << "sclone";
1857  } else {
1859  "only pairwise alignments are supported");
1860  }
1861 }
1862 
1864  const CSeq_align& align)
1865 {
1866  string Clone = "";
1867 
1868  try {
1869  CBioseq_Handle Handle = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(m_Row));
1871  while(Iter) {
1872  const CBioSource& BioSource = Iter->GetSource();
1873  if(BioSource.CanGetSubtype()) {
1874  ITERATE(CBioSource::TSubtype, SubIter, BioSource.GetSubtype()) {
1875  if( (*SubIter)->CanGetSubtype() &&
1876  (*SubIter)->GetSubtype() == CSubSource::eSubtype_clone &&
1877  (*SubIter)->CanGetName() ) {
1878  Clone = (*SubIter)->GetName();
1879  }
1880  }
1881  }
1882  ++Iter;
1883  }
1884  } catch(...) {
1885  Clone = "";
1886  }
1887 
1888  ostr << Clone;
1889 }
1890 
1891 
1892 /////////////////////////////////////////////////////////////////////////////
1894 : m_Row(row)
1895 {
1896 }
1897 
1899 {
1900  if (m_Row == 0) {
1901  ostr << "Query";
1902  } else if (m_Row == 1) {
1903  ostr << "Subject";
1904  } else {
1906  "only pairwise alignments are supported");
1907  }
1908  ostr << " sequence tech type";
1909 }
1910 
1912 {
1913  if (m_Row == 0) {
1914  ostr << "qtech";
1915  } else if (m_Row == 1) {
1916  ostr << "stech";
1917  } else {
1919  "only pairwise alignments are supported");
1920  }
1921 }
1922 
1924  const CSeq_align& align)
1925 {
1926  CBioseq_Handle Handle = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(m_Row));
1927 
1928  string TechStr = "(none)";
1929 
1931  while(Iter) {
1932  const CMolInfo& MolInfo = Iter->GetMolinfo();
1933  if(MolInfo.CanGetTech() && MolInfo.IsSetTech()) {
1934  const CEnumeratedTypeValues* tech_types = CMolInfo::GetTypeInfo_enum_ETech();
1935  TechStr = tech_types->FindName(MolInfo.GetTech(), false);
1936  }
1937  ++Iter;
1938  }
1939 
1940  ostr << TechStr;
1941 }
1942 
1943 //////////////////////////////////////////////////////////////////////////////
1945 : m_Row(row)
1946 {
1947 }
1948 
1950 {
1951  ostr << "Strand of alignment in ";
1952  if (m_Row == 0) {
1953  ostr << "query";
1954  } else if (m_Row == 1) {
1955  ostr << "subject";
1956  } else {
1958  "only pairwise alignments are supported");
1959  }
1960  ostr << ", 'b' if both in a Disc-seg alignment";
1961 }
1962 
1964 {
1965  if (m_Row == 0) {
1966  ostr << "qdiscstrand";
1967  } else if (m_Row == 1) {
1968  ostr << "sdiscstrand";
1969  } else {
1971  "only pairwise alignments are supported");
1972  }
1973 }
1974 
1976  const CSeq_align& align)
1977 {
1978  bool Plus=false, Minus=false;
1979  x_RecurseStrands(align, Plus, Minus);
1980  if(Plus && !Minus)
1981  ostr << '+';
1982  else if(Minus && !Plus)
1983  ostr << '-';
1984  else if(Plus && Minus)
1985  ostr << 'b';
1986 }
1987 
1989  bool& Plus, bool& Minus)
1990 {
1991  if(align.GetSegs().IsDisc()) {
1992  ITERATE(CSeq_align_set::Tdata, iter, align.GetSegs().GetDisc().Get()) {
1993  x_RecurseStrands(**iter, Plus, Minus);
1994  }
1995  return;
1996  }
1997 
1998  if(align.GetSeqStrand(m_Row) == eNa_strand_plus)
1999  Plus = true;
2000  else if(align.GetSeqStrand(m_Row) == eNa_strand_minus)
2001  Minus = true;
2002 }
2003 
2004 
2005 //////////////////////////////////////////////////////////////////////////////
2006 
2008  const string& text)
2009 : m_ColName(col_name)
2010 , m_Text(text)
2011 {
2012 }
2013 
2015 {
2016  ostr << "'" << m_Text << "' as fixed text";
2017 }
2018 
2020 {
2021  ostr << m_ColName;
2022 }
2023 
2025  const CSeq_align& align)
2026 {
2027  ostr << m_Text;
2028 }
2029 
2030 
2031 //////////////////////////////////////////////////////////////////////////////
2032 
2034 {
2035  ostr << "length_ungap / size of aligned query sequence range";
2036 }
2037 
2039 {
2040  ostr << "align_len_ratio";
2041 }
2042 
2044  const CSeq_align& align)
2045 {
2046  /// historical score:
2047  /// ungapped alignment length / length of range of query sequence
2048  TSeqPos align_length = align.GetAlignLength(false /*ungapped*/);
2049  TSeqPos align_range = align.GetSeqRange(0).GetLength();
2050  ostr << double(align_length) / double(align_range);
2051 }
2052 
2053 
2054 /////////////////////////////////////////////////////////////////////////////
2055 
2057 {
2058 }
2059 
2060 
2062 {
2063  ostr << "Alignment CIGAR string";
2064 }
2065 
2066 
2068 {
2069  ostr << "cigar";
2070 }
2071 
2072 
2074  const CSeq_align& align)
2075 {
2076  if(!align.CanGetSegs() || !align.GetSegs().IsDenseg()) {
2078  "cigar format only supports denseg alignments.");
2079  }
2080 
2081 
2082  int NumSeg = align.GetSegs().GetDenseg().GetNumseg();
2083  const CDense_seg::TStarts & Starts = align.GetSegs().GetDenseg().GetStarts();
2084  const CDense_seg::TLens & Lens = align.GetSegs().GetDenseg().GetLens();
2085 
2086  for(int Loop = 0; Loop < NumSeg; Loop++) {
2087  int Length = Lens[Loop];
2088  char Code = 0;
2089 
2090  if( Starts[ (Loop*2) ] == -1)
2091  Code = 'D';
2092  else if( Starts[ (Loop*2)+1 ] == -1)
2093  Code = 'I';
2094  else
2095  Code = 'M';
2096 
2097  ostr << Length << Code;
2098  }
2099 
2100 }
2101 
2102 
2103 //////////////////////////////////////////////////////////////////////////////
2104 
2107 : m_Row(row), m_Type(type), m_Info(info)
2108 {
2109 }
2110 
2112 {
2113  m_Gencoll = gencoll;
2114 }
2115 
2116 
2118 {
2119  switch (m_Info) {
2120  case eName:
2121  ostr << "Name of ";
2122  break;
2123  case eAccession:
2124  ostr << "Accession of ";
2125  break;
2126  case eChainId:
2127  ostr << "Chain id of ";
2128  break;
2129  case eChromosome:
2130  ostr << "Chromosome containing ";
2131  break;
2132  }
2133  if (m_Info != eChromosome) {
2134  ostr << (m_Type == eFull ? "full assembly" : "assembly unit") << " of ";
2135  }
2136  if (m_Row == 0) {
2137  ostr << "query";
2138  } else if (m_Row == 1) {
2139  ostr << "subject";
2140  } else {
2142  "only pairwise alignments are supported");
2143  }
2144  ostr << " sequence";
2145 }
2146 
2148 {
2149  if (m_Row == 0) {
2150  ostr << "q";
2151  } else if (m_Row == 1) {
2152  ostr << "s";
2153  } else {
2155  "only pairwise alignments are supported");
2156  }
2157  if (m_Info != eChromosome) {
2158  ostr << (m_Type == eFull ? "fullasm" : "asmunit");
2159  }
2160  switch (m_Info) {
2161  case eName:
2162  break;
2163  case eAccession:
2164  ostr << "acc";
2165  break;
2166  case eChainId:
2167  ostr << "chain";
2168  break;
2169  case eChromosome:
2170  ostr << "chromosome";
2171  break;
2172  }
2173 }
2174 
2176  const CSeq_align& align)
2177 {
2178  if (m_Row == 1 && align.IsSetExt())
2179  {
2180  /// For the subject sequence, the information may be stored in teh
2181  /// alignment as a User-obejct
2182  ITERATE (CSeq_align::TExt, i, align.GetExt()) {
2183  const CUser_object& obj = **i;
2184  if (!obj.GetType().IsStr() ||
2185  obj.GetType().GetStr() != "Assembly Info") {
2186  continue;
2187  }
2188 
2189  switch (m_Info) {
2190  case eName:
2191  ostr << obj.GetField(m_Type == eFull
2192  ? "Assembly Name" : "Assembly Unit Name")
2193  .GetData().GetStr();
2194  return;
2195 
2196  case eAccession:
2197  if (obj.HasField("Assembly Accession")) {
2198  ostr << obj.GetField(m_Type == eFull
2199  ? "Assembly Accession" : "Assembly Unit Accession")
2200  .GetData().GetStr();
2201  } else {
2202  ostr << "NA";
2203  }
2204  return;
2205 
2206  case eChainId:
2207  if (m_Type == eUnit) {
2208  if (obj.HasField("GenColl Chain")) {
2209  ostr << obj.GetField("GenColl Chain").GetData().GetInt();
2210  } else {
2211  ostr << "NA";
2212  }
2213  return;
2214  }
2215  break;
2216 
2217  case eChromosome:
2218  if (obj.HasField("Chromosome")) {
2219  ostr << obj.GetField("Chromosome").GetData().GetStr();
2220  } else {
2221  ostr << "NA";
2222  }
2223  return;
2224  }
2225  }
2226  }
2227 
2228  if(!m_Gencoll) {
2229  return;
2230  }
2231 
2234 
2235  if(!Seq) {
2236  return;
2237  }
2238 
2240  if (m_Info != eChromosome) {
2241  if (m_Type == eFull) {
2242  Assm = Seq->GetFullAssembly();
2243  } else {
2244  CConstRef<CGC_AssemblyUnit> Unit = Seq->GetAssemblyUnit();
2245  if (Unit) {
2246  CGC_Assembly *unit_assm = new CGC_Assembly();
2247  unit_assm->SetUnit(const_cast<CGC_AssemblyUnit &>(*Unit));
2248  Assm.Reset(unit_assm);
2249  }
2250  }
2251  if(!Assm) {
2252  return;
2253  }
2254  }
2255 
2256  switch (m_Info) {
2257  case eName:
2258  ostr << Assm->GetName();
2259  break;
2260 
2261  case eAccession:
2262  ostr << Assm->GetAccession();
2263  break;
2264 
2265  case eChainId:
2266  {{
2267  string accession = Assm->GetAccession();
2268  size_t chain_start = accession.find_first_of("123456789");
2269  size_t chain_end = accession.find('.');
2270  ostr << accession.substr(chain_start, chain_end-chain_start);
2271  }}
2272  break;
2273 
2274  case eChromosome:
2275  ostr << Seq->GetChrName();
2276  break;
2277  }
2278 }
2279 
2280 //////////////////////////////////////////////////////////////////////////////
2281 
2283 : m_Row(row), m_Gencoll(gencoll)
2284 {
2285 }
2286 
2288 {
2289  ostr << "Patch type, if any, of ";
2290  if (m_Row == 0) {
2291  ostr << "query";
2292  } else if (m_Row == 1) {
2293  ostr << "sequence";
2294  } else {
2296  "only pairwise alignments are supported");
2297  }
2298  ostr << " sequence";
2299 }
2300 
2302 {
2303  if (m_Row == 0) {
2304  ostr << "qpatchtype";
2305  } else if (m_Row == 1) {
2306  ostr << "spatchtype";
2307  } else {
2309  "only pairwise alignments are supported");
2310  }
2311 }
2312 
2314  const CSeq_align& align)
2315 {
2316  if(!m_Gencoll)
2317  return;
2318 
2320  Seq = m_Gencoll->Find(CSeq_id_Handle::GetHandle(align.GetSeq_id(m_Row)));
2321  if(!Seq)
2322  return;
2323 
2324  if(Seq->CanGetPatch_type()) {
2325  if(Seq->GetPatch_type() == CGC_Sequence::ePatch_type_fix)
2326  ostr << "FIX";
2327  else if(Seq->GetPatch_type() == CGC_Sequence::ePatch_type_novel)
2328  ostr << "NOVEL";
2329  }
2330 }
2331 
2332 //////////////////////////////////////////////////////////////////////////////
2333 
2335 : m_Row(row), m_Gencoll(gencoll)
2336 {
2337 }
2338 
2340 {
2341  ostr << "Nearest Gap, if any, or edge, of ";
2342  if (m_Row == 0) {
2343  ostr << "query";
2344  } else if (m_Row == 1) {
2345  ostr << "subject";
2346  } else {
2348  "only pairwise alignments are supported");
2349  }
2350  ostr << " sequence";
2351 }
2352 
2354 {
2355  if (m_Row == 0) {
2356  ostr << "qnearestgap";
2357  } else if (m_Row == 1) {
2358  ostr << "snearestgap";
2359  } else {
2361  "only pairwise alignments are supported");
2362  }
2363 }
2364 
2366  const CSeq_id& Id,
2367  const TSeqPos Offset,
2368  list<TSeqRange>& Gaps)
2369 {
2371 
2372  if(!Seq)
2373  return 0;
2374 
2375  if(!Seq->CanGetStructure())
2376  return 0;
2377 
2378  TSeqPos CurrStart = Offset;
2379  ITERATE(CDelta_ext::Tdata, DeltaIter, Seq->GetStructure().Get()) {
2380  if( (*DeltaIter)->IsLiteral()) {
2381  if (!(*DeltaIter)->GetLiteral().CanGetSeq_data() ||
2382  (*DeltaIter)->GetLiteral().GetSeq_data().IsGap()) {
2383  TSeqRange GapRange;
2384  GapRange.SetFrom(CurrStart);
2385  GapRange.SetLength((*DeltaIter)->GetLiteral().GetLength());
2386  Gaps.push_back(GapRange);
2387  }
2388  CurrStart += (*DeltaIter)->GetLiteral().GetLength();
2389  } else if( (*DeltaIter)->IsLoc()) {
2390  s_FindGaps(Assembly, *(*DeltaIter)->GetLoc().GetId(), CurrStart, Gaps);
2391  CurrStart += (*DeltaIter)->GetLoc().GetTotalRange().GetLength();
2392  }
2393  }
2394  return CurrStart;
2395 }
2396 
2398  const CSeq_align& align)
2399 {
2400  if(!m_Gencoll) {
2401  ostr << "*";
2402  return;
2403  }
2404 
2405 
2406  list<TSeqRange> Gaps;
2407  TSeqPos SeqLength = s_FindGaps(*m_Gencoll, align.GetSeq_id(m_Row), 0, Gaps);
2408 
2409  if(SeqLength == 0) {
2410  ostr << "*";
2411  return;
2412  }
2413 
2414  TSeqRange CompRange = align.GetSeqRange(m_Row);
2415  TSeqPos MinGapDist = numeric_limits<TSeqPos>::max();
2416  MinGapDist = min(MinGapDist, (TSeqPos)abs((TSignedSeqPos)(CompRange.GetFrom()-0)));
2417  MinGapDist = min(MinGapDist, (TSeqPos)abs((TSignedSeqPos)(CompRange.GetTo()-SeqLength)));
2418 
2419 
2420  ITERATE(list<TSeqRange>, GapIter, Gaps) {
2421  MinGapDist = min(MinGapDist, (TSeqPos)abs((TSignedSeqPos)(CompRange.GetFrom()-GapIter->GetFrom())));
2422  MinGapDist = min(MinGapDist, (TSeqPos)abs((TSignedSeqPos)(CompRange.GetTo()-GapIter->GetTo())));
2423  }
2424 
2425  ostr << MinGapDist;
2426 }
2427 
2428 
2429 /////////////////////////////////////////////////////////////////////////////
2430 
2432 {
2433 }
2434 
2435 
2437 {
2438  ostr << "Blast Traceback string";
2439 }
2440 
2442 {
2443  ostr << "btop";
2444 }
2445 
2446 
2448  const CSeq_align& align)
2449 {
2450  if(!align.CanGetSegs() || !align.GetSegs().IsDenseg()) {
2452  "btop format only supports denseg alignments.");
2453  }
2454 
2455  ostr << m_Scores->GetTraceback(m_Scores->GetScope(), align, 0);
2456 }
2457 
2458 
2459 /////////////////////////////////////////////////////////////////////////////
2460 
2462  int coordinate_row)
2463 : m_IndelType(indel_type)
2464 , m_CoordinateRow(coordinate_row)
2465 {
2466 }
2467 
2468 
2470 {
2471  switch (m_IndelType) {
2472  case e_Frameshifts:
2473  ostr << "List of frameshift indels";
2474  break;
2475 
2476  case e_NonFrameshifts:
2477  ostr << "List of non-frameshifting indels";
2478  break;
2479 
2480  default:
2481  ostr << "List of all indels wihin CDS";
2482  break;
2483  }
2484  if (m_CoordinateRow == 0) {
2485  ostr << ", coordinates on query sequence";
2486  }
2487 }
2488 
2490 {
2491  switch (m_IndelType) {
2492  case e_Frameshifts:
2493  ostr << "frameshifts";
2494  break;
2495 
2496  case e_NonFrameshifts:
2497  ostr << "non-frameshift indels";
2498  break;
2499 
2500  default:
2501  ostr << "indels in cds";
2502  break;
2503  }
2504  if (m_CoordinateRow == 0) {
2505  ostr << " on query";
2506  }
2507 }
2508 
2509 
2511  const CSeq_align& align)
2512 {
2513  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(0));
2514  if ( !bsh ) {
2516  "failed to retrieve sequence for " +
2517  align.GetSeq_id(0).AsFastaString());
2518  }
2519  if (bsh.GetBioseqMolType() != CSeq_inst::eMol_rna) {
2520  NCBI_THROW(CException, eUnknown, "Not RNA alignments");
2521  }
2522 
2523  /// Only display frameshifts within cdregion
2524  CFeat_CI feat_it(bsh,
2525  SAnnotSelector()
2526  .IncludeFeatType(CSeqFeatData::e_Cdregion));
2527  if (!feat_it) {
2528  return;
2529  }
2530 
2531  vector<CSeq_align::SIndel> indels;
2532  switch (m_IndelType) {
2533  case e_Frameshifts:
2534  indels = align.GetFrameshiftsWithinRange(feat_it->GetRange());
2535  break;
2536 
2537  case e_NonFrameshifts:
2538  indels = align.GetNonFrameshiftsWithinRange(feat_it->GetRange());
2539  break;
2540 
2541  default:
2542  indels = align.GetIndelsWithinRange(feat_it->GetRange());
2543  break;
2544  }
2545 
2546  bool first = true;
2547  for (const CSeq_align::SIndel &indel : indels) {
2548  if (!first) {
2549  ostr << ',';
2550  }
2551  ostr << indel.AsString(m_CoordinateRow);
2552  first = false;
2553  }
2554 }
2555 
2556 
2557 /////////////////////////////////////////////////////////////////////////////
2558 
2560 : m_Row(row)
2561 {
2562 }
2563 
2564 
2566 {
2567  ostr << "Gene symbol for " << (m_Row == 0 ? "query" : "subject");
2568 }
2569 
2571 {
2572  ostr << (m_Row == 0 ? "query" : "subject") << "_gene_symbol";
2573 }
2574 
2575 
2577  const CSeq_align& align)
2578 {
2579  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(m_Row));
2580  if ( !bsh ) {
2581  ostr << "NA";
2582  return;
2583  }
2584  CFeat_CI gene_it(bsh, CSeqFeatData::e_Gene);
2585  if (!gene_it || !gene_it->GetData().GetGene().IsSetLocus()) {
2586  ostr << "NA";
2587  return;
2588  }
2589 
2590  ostr << gene_it->GetData().GetGene().GetLocus();
2591 }
2592 
2593 
2594 /////////////////////////////////////////////////////////////////////////////
2595 
2596 static string s_CodonVariation(const CSeq_align &align, TSeqPos pos,
2597  CScope &scope, int row)
2598 {
2599  CRef<CSeq_id> query_id(new CSeq_id);
2600  query_id->Assign(align.GetSeq_id(0));
2601  CRef<CSeq_loc> query_loc(new CSeq_loc(*query_id, pos, pos+2));
2602  CSeqVector query_vec(*query_loc, scope, CBioseq_Handle::eCoding_Iupac);
2603  string query;
2604  query_vec.GetSeqData(0, 3, query);
2605 
2606  CSeq_loc_Mapper mapper(align, 1);
2607  CRef<CSeq_loc> subject_loc = mapper.Map(*query_loc);
2608  CSeqVector subject_vec(*subject_loc, scope, CBioseq_Handle::eCoding_Iupac);
2609  string subject;
2610  subject_vec.GetSeqData(0, subject_vec.size(), subject);
2611 
2612  string variation;
2613  if (query != subject) {
2614  ENa_strand strand = eNa_strand_plus;
2615  if (row == 1) {
2616  pos = subject_loc->GetStart(eExtreme_Biological);
2617  strand = subject_loc->GetStrand();
2618  }
2619  unsigned snp_count = 0, snp_pos = 0;
2620  if (query.size() == subject.size() + 1) {
2621  /// query is one longer; check if this is a one-base deletion
2622  for (unsigned deletion_pos = 0; deletion_pos < subject.size();
2623  ++deletion_pos)
2624  {
2625  string subject_with_del = subject;
2626  subject_with_del.insert(deletion_pos, 1, query[deletion_pos]);
2627  if (query == subject_with_del) {
2628  subject.insert(deletion_pos, 1, '-');
2629  break;
2630  }
2631  }
2632  }
2633  if (query.size() == subject.size()) {
2634  for (unsigned index = 0; index < query.size(); ++index) {
2635  if (query[index] != subject[index]) {
2636  ++snp_count;
2637  snp_pos = index;
2638  }
2639  }
2640  }
2641  if (snp_count == 1) {
2642  pos += (strand == eNa_strand_minus ? -1 : 1) * snp_pos;
2643  variation = NStr::NumericToString(pos) + query[snp_pos] + '>'
2644  + subject[snp_pos];
2645  } else {
2646  variation = NStr::NumericToString(pos) + query + '>' + subject;
2647  }
2648  }
2649  return variation;
2650 }
2651 
2653 : m_CoordinateRow(row)
2654 {
2655 }
2656 
2657 
2659 {
2660  ostr << "Mismatches or indels within start codon";
2661  if (m_CoordinateRow == 0) {
2662  ostr << ", coordinates on query sequence";
2663  }
2664 }
2665 
2667 {
2668  ostr << "Start codon changes";
2669  if (m_CoordinateRow == 0) {
2670  ostr << " on query";
2671  }
2672 }
2673 
2674 
2676  const CSeq_align& align)
2677 {
2678  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(0));
2679  if ( !bsh ) {
2681  "failed to retrieve sequence for " +
2682  align.GetSeq_id(0).AsFastaString());
2683  }
2684  if (bsh.GetBioseqMolType() != CSeq_inst::eMol_rna) {
2685  NCBI_THROW(CException, eUnknown, "Not RNA alignments");
2686  }
2687 
2688  CFeat_CI feat_it(bsh,
2689  SAnnotSelector()
2690  .IncludeFeatType(CSeqFeatData::e_Cdregion));
2691  if (feat_it) {
2692  string variation = s_CodonVariation(align, feat_it->GetRange().GetFrom(),
2693  m_Scores->GetScope(), m_CoordinateRow);
2694  if (!variation.empty()) {
2695  ostr << variation;
2696  }
2697  }
2698 }
2699 
2700 
2701 /////////////////////////////////////////////////////////////////////////////
2702 
2704 : m_CoordinateRow(row)
2705 {
2706 }
2707 
2708 
2710 {
2711  ostr << "Mismatches or indels within stop codon";
2712  if (m_CoordinateRow == 0) {
2713  ostr << ", coordinates on query sequence";
2714  }
2715 }
2716 
2718 {
2719  ostr << "Stop codon changes";
2720  if (m_CoordinateRow == 0) {
2721  ostr << " on query";
2722  }
2723 }
2724 
2725 
2727  const CSeq_align& align)
2728 {
2729  CFeatureGenerator generator(m_Scores->GetScope());
2732  generator.SetAllowedUnaligned(10);
2733 
2734  CConstRef<CSeq_align> clean_align = generator.CleanAlignment(align);
2735  CSeq_annot annot;
2736  CBioseq_set bset;
2737  generator.ConvertAlignToAnnot(*clean_align, annot, bset);
2738  if (bset.GetSeq_set().empty() ||
2739  !bset.GetSeq_set().front()->IsSetAnnot())
2740  {
2741  return;
2742  }
2743 
2744  CScope transcribed_mrna_scope(*CObjectManager::GetInstance());
2745  transcribed_mrna_scope.AddTopLevelSeqEntry(*bset.GetSeq_set().front());
2746  CRef<CSeq_feat> cds = bset.GetSeq_set().front()
2747  -> GetSeq().GetAnnot().front()
2748  -> GetData().GetFtable().front();
2749  cds->SetData().SetCdregion().ResetCode_break();
2750  string trans;
2751  CSeqTranslator::Translate(*cds, transcribed_mrna_scope, trans);
2752  bool missing_stop = false;
2754  if (NStr::EndsWith(trans, "*")) {
2755  trans.resize(trans.size() - 1);
2756  } else {
2757  missing_stop = true;
2758  }
2759  }
2760 
2761  for (size_t changed_codons_count = 0, internal_stop_pos = trans.find('*');
2762  internal_stop_pos != string::npos || missing_stop;
2763  internal_stop_pos = trans.find('*', internal_stop_pos+1))
2764  {
2765  if (internal_stop_pos == string::npos) {
2766  /// Processed all internal stops if any; process missing final stop
2767  internal_stop_pos = trans.size() - 1;
2768  missing_stop = false;
2769  }
2770  TSeqPos isp = Convert(internal_stop_pos);
2771  string variation = s_CodonVariation(align,
2773  + isp*3,
2774  m_Scores->GetScope(), m_CoordinateRow);
2775  if (!variation.empty()) {
2776  if (changed_codons_count++) {
2777  ostr << ',';
2778  }
2779  ostr << variation;
2780  }
2781  }
2782 }
2783 
2784 
2785 /////////////////////////////////////////////////////////////////////////////
2786 
2788  const string &unavailable_string)
2789 : m_Scores(&scores), m_Ostr(ostr), m_UnavailableString(unavailable_string)
2790 {
2791  s_RegisterStandardFields(*this);
2792 }
2793 
2795 {
2796  IFormatter *qseqid =
2798  formatter.RegisterField("qseqid", qseqid);
2799  formatter.RegisterField("qacc", qseqid);
2800  formatter.RegisterField("qaccver", qseqid);
2801  formatter.RegisterField("qtag", qseqid);
2802 
2803  IFormatter *qallseqid =
2805  formatter.RegisterField("qallseqid", qallseqid);
2806  formatter.RegisterField("qallacc", qallseqid);
2807 
2808  formatter.RegisterField("qgi",
2810  formatter.RegisterField("qbaregi",
2812  formatter.RegisterField("qexactseqid",
2814 
2815  formatter.RegisterField("qlen", new CTabularFormatter_SeqLength(0));
2816  formatter.RegisterField("qstrand", new CTabularFormatter_AlignStrand(0));
2817  formatter.RegisterField("qstart", new CTabularFormatter_AlignStart(0));
2818  formatter.RegisterField("qend", new CTabularFormatter_AlignEnd(0));
2819  formatter.RegisterField("qestart", new CTabularFormatter_AlignStart(0, true));
2820  formatter.RegisterField("qeend", new CTabularFormatter_AlignEnd(0, true));
2821 
2822 
2823  IFormatter *sseqid =
2825  formatter.RegisterField("sseqid", sseqid);
2826  formatter.RegisterField("sacc", sseqid);
2827  formatter.RegisterField("saccver", sseqid);
2828  formatter.RegisterField("stag", sseqid);
2829 
2830  IFormatter *prot_seqid =
2831  new CTabularFormatter_SeqId(0, sequence::eGetId_Best, true, true);
2832  formatter.RegisterField("prot_seqid", prot_seqid);
2833  formatter.RegisterField("prot_acc", prot_seqid);
2834  formatter.RegisterField("prot_accver", prot_seqid);
2835 
2836  IFormatter *sallseqid =
2838  formatter.RegisterField("sallseqid", sallseqid);
2839  formatter.RegisterField("sallacc", sallseqid);
2840 
2841  formatter.RegisterField("sgi",
2843  formatter.RegisterField("sbaregi",
2845  formatter.RegisterField("sexactseqid",
2847 
2848  formatter.RegisterField("slen", new CTabularFormatter_SeqLength(1));
2849  formatter.RegisterField("sstrand", new CTabularFormatter_AlignStrand(1));
2850  formatter.RegisterField("sstart", new CTabularFormatter_AlignStart(1));
2851  formatter.RegisterField("send", new CTabularFormatter_AlignEnd(1));
2852  formatter.RegisterField("sestart", new CTabularFormatter_AlignStart(1, true));
2853  formatter.RegisterField("seend", new CTabularFormatter_AlignEnd(1, true));
2854 
2855  formatter.RegisterField("evalue", new CTabularFormatter_EValue);
2856  formatter.RegisterField("evalue_mantissa", new CTabularFormatter_EValue_Mantissa);
2857  formatter.RegisterField("evalue_exponent", new CTabularFormatter_EValue_Exponent);
2858  formatter.RegisterField("bitscore", new CTabularFormatter_BitScore);
2859  formatter.RegisterField("score", new CTabularFormatter_Score);
2860 
2861  formatter.RegisterField("length", new CTabularFormatter_AlignLength);
2862  formatter.RegisterField("length_ungap", new CTabularFormatter_AlignLengthUngap);
2863  formatter.RegisterField("align_len_ratio", new CTabularFormatter_AlignLengthRatio);
2864 
2865  formatter.RegisterField("pident", new CTabularFormatter_PercentId(true));
2866  formatter.RegisterField("pident_ungapped", new CTabularFormatter_PercentId(false));
2867  formatter.RegisterField("pcov", new CTabularFormatter_PercentCoverage(0, "pcov"));
2868  formatter.RegisterField("qcov", new CTabularFormatter_PercentCoverage(0, "qcov"));
2869  formatter.RegisterField("scov", new CTabularFormatter_PercentCoverage(1, "scov"));
2870 
2871  formatter.RegisterField("gaps", new CTabularFormatter_GapBaseCount);
2872  formatter.RegisterField("gapopen", new CTabularFormatter_GapCount);
2873 
2874  formatter.RegisterField("nident", new CTabularFormatter_IdentityCount);
2875  formatter.RegisterField("mismatch", new CTabularFormatter_MismatchCount);
2876  formatter.RegisterField("qmismatchpos", new CTabularFormatter_MismatchPositions(0));
2877  formatter.RegisterField("smismatchpos", new CTabularFormatter_MismatchPositions(1));
2878 
2879  formatter.RegisterField("qgapranges", new CTabularFormatter_GapRanges(0));
2880  formatter.RegisterField("sgapranges", new CTabularFormatter_GapRanges(1));
2881 
2882 
2883  formatter.RegisterField("qdefline",
2884  new CTabularFormatter_Defline(0));
2885  formatter.RegisterField("sdefline",
2886  new CTabularFormatter_Defline(1));
2887  formatter.RegisterField("qprotref",
2888  new CTabularFormatter_ProtRef(0));
2889  formatter.RegisterField("sprotref",
2890  new CTabularFormatter_ProtRef(1));
2891  formatter.RegisterField("qtaxid",
2892  new CTabularFormatter_TaxId(0));
2893  formatter.RegisterField("staxid",
2894  new CTabularFormatter_TaxId(1));
2895  formatter.RegisterField("quniprot_source",
2896  new CTabularFormatter_Comment(0, "uniprot source"));
2897  formatter.RegisterField("suniprot_source",
2898  new CTabularFormatter_Comment(1, "uniprot source"));
2899 
2900  formatter.RegisterField("qtaxname",
2903  formatter.RegisterField("qspecies",
2906  formatter.RegisterField("qgenus",
2909  formatter.RegisterField("qkingdom",
2912 
2913  formatter.RegisterField("staxname",
2916  formatter.RegisterField("sspecies",
2919  formatter.RegisterField("sgenus",
2922  formatter.RegisterField("skingdom",
2925 
2926  formatter.RegisterField("align_id",
2928  formatter.RegisterField("best_placement_group",
2930 
2931  formatter.RegisterField("exons",
2935  formatter.RegisterField("exon_len",
2939 
2940  formatter.RegisterField("introns",
2944  formatter.RegisterField("intron_len",
2948  formatter.RegisterField("query_exons",
2952  formatter.RegisterField("query_exon_len",
2956 
2957  formatter.RegisterField("query_unaligned",
2961  formatter.RegisterField("query_unaligned_len",
2965 
2966  formatter.RegisterField("biggestgap",
2968  formatter.RegisterField("qbiggestgap",
2970  formatter.RegisterField("sbiggestgap",
2972  formatter.RegisterField("qchrom",
2974  formatter.RegisterField("schrom",
2976  formatter.RegisterField("qclone",
2978  formatter.RegisterField("sclone",
2980  formatter.RegisterField("qtech",
2981  new CTabularFormatter_Tech(0));
2982  formatter.RegisterField("stech",
2983  new CTabularFormatter_Tech(1));
2984  formatter.RegisterField("qdiscstrand",
2986  formatter.RegisterField("sdiscstrand",
2988  formatter.RegisterField("cigar",
2990  formatter.RegisterField("btop",
2992  formatter.RegisterField("frameshifts",
2994  formatter.RegisterField("nonframeshifts",
2996  formatter.RegisterField("cds_indels",
2998  formatter.RegisterField("frameshifts_on_query",
3000  formatter.RegisterField("nonframeshifts_on_query",
3002  formatter.RegisterField("cds_indels_on_query",
3004  formatter.RegisterField("start_codon_changes",
3006  formatter.RegisterField("stop_codon_changes",
3008  formatter.RegisterField("start_codon_changes_on_query",
3010  formatter.RegisterField("stop_codon_changes_on_query",
3012  formatter.RegisterField("gene_symbol",
3014  formatter.RegisterField("qasmunit", new CTabularFormatter_AssemblyInfo(0,
3017  formatter.RegisterField("sasmunit", new CTabularFormatter_AssemblyInfo(1,
3020  formatter.RegisterField("qfullasm", new CTabularFormatter_AssemblyInfo(0,
3023  formatter.RegisterField("sfullasm", new CTabularFormatter_AssemblyInfo(1,
3026  formatter.RegisterField("qasmunitacc", new CTabularFormatter_AssemblyInfo(0,
3029  formatter.RegisterField("sasmunitacc", new CTabularFormatter_AssemblyInfo(1,
3032  formatter.RegisterField("qfullasmacc", new CTabularFormatter_AssemblyInfo(0,
3035  formatter.RegisterField("sfullasmacc", new CTabularFormatter_AssemblyInfo(1,
3038  formatter.RegisterField("qasmunitchain", new CTabularFormatter_AssemblyInfo(0,
3041  formatter.RegisterField("sasmunitchain", new CTabularFormatter_AssemblyInfo(1,
3044  formatter.RegisterField("qfullasmchain", new CTabularFormatter_AssemblyInfo(0,
3047  formatter.RegisterField("sfullasmchain", new CTabularFormatter_AssemblyInfo(1,
3050  formatter.RegisterField("qchromosome", new CTabularFormatter_AssemblyInfo(0,
3053  formatter.RegisterField("schromosome", new CTabularFormatter_AssemblyInfo(1,
3056 
3057  formatter.RegisterField("query_entropy",
3058  new CTabularFormatter_Entropy(0));
3059  formatter.RegisterField("subject_entropy",
3060  new CTabularFormatter_Entropy(1));
3061  formatter.RegisterField("query_seg_pct",
3063  formatter.RegisterField("subject_seg_pct",
3065 }
3066 
3068 {
3070  formatter_it->second->SetGencoll(gencoll);
3071  }
3072  RegisterField("qpatchtype", new CTabularFormatter_PatchType(0, gencoll));
3073  RegisterField("spatchtype", new CTabularFormatter_PatchType(1, gencoll));
3074  RegisterField("qnearestgap", new CTabularFormatter_NearestGap(0, gencoll));
3075  RegisterField("snearestgap", new CTabularFormatter_NearestGap(1, gencoll));
3076 }
3077 
3078 /// Split a string, but ignore separators within parentheses
3079 static void s_Split(const string &format,
3080  const string &separators,
3081  vector<string> &toks)
3082 {
3083  unsigned int paren_level = 0;
3084  string next_tok;
3085  ITERATE (string, char_it, format) {
3086  if (!paren_level && separators.find(*char_it) != string::npos) {
3087  if (!next_tok.empty()) {
3088  toks.push_back(next_tok);
3089  }
3090  next_tok.clear();
3091  continue;
3092  }
3093  if (*char_it == '(') {
3094  ++paren_level;
3095  } else if (*char_it == ')') {
3096  if (!paren_level) {
3098  "Unbalanced parentheses: " + format);
3099  }
3100  --paren_level;
3101  }
3102  next_tok += *char_it;
3103  }
3104  if (!next_tok.empty()) {
3105  toks.push_back(next_tok);
3106  }
3107  if (paren_level) {
3109  "Unbalanced parentheses: " + format);
3110  }
3111 }
3112 
3114 {
3115  CRegexp re1("score\\(([^,]*),([^)]*)\\)");
3116  CRegexp re2("score\\(([^)]*)\\)");
3117 
3118  CRegexp text_re1("text\\(([^,]*),([^)]*)\\)");
3119  CRegexp text_re2("text\\(([^)]*)\\)");
3120 
3121  vector<string> toks;
3122  s_Split(format, " \t\n\r,", toks);
3123 
3124  ITERATE (vector<string>, it, toks) {
3125  string s = *it;
3126  NStr::ToLower(s);
3127  if (m_FormatterMap.count(s)) {
3128  m_Formatters.push_back(m_FormatterMap[s]);
3129  } else if (re1.IsMatch(s)) {
3130  string score_name = re1.GetSub(*it, 1);
3131  string col_name = re1.GetSub(*it, 2);
3132  m_Formatters.push_back(CIRef<IFormatter>(new CTabularFormatter_AnyScore(score_name, col_name)));
3133 
3134  } else if (re2.IsMatch(s)) {
3135  string score_name = re2.GetSub(*it, 1);
3136  m_Formatters.push_back(CIRef<IFormatter>(new CTabularFormatter_AnyScore(score_name, score_name)));
3137 
3138  } else if (text_re1.IsMatch(s)) {
3139  string score_name = text_re1.GetSub(*it, 1);
3140  string col_name = text_re1.GetSub(*it, 2);
3141  m_Formatters.push_back(CIRef<IFormatter>(new CTabularFormatter_FixedText(score_name, col_name)));
3142 
3143  } else if (text_re2.IsMatch(s)) {
3144  string score_name = text_re2.GetSub(*it, 1);
3145  m_Formatters.push_back(CIRef<IFormatter>(new CTabularFormatter_FixedText(score_name, score_name)));
3146 
3147  } else {
3148  ERR_POST(Error << "unhandled field: " << s);
3149  }
3150  }
3151 
3153  (*it)->SetScoreLookup(m_Scores);
3154  }
3155 }
3156 
3157 
3159 {
3160  m_Ostr << '#';
3162  (*it)->PrintHeader(m_Ostr);
3163 
3164  list< CIRef<IFormatter> >::const_iterator i = it;
3165  ++i;
3166  if (i != m_Formatters.end()) {
3167  m_Ostr << '\t';
3168  }
3169  }
3170 
3171  m_Ostr << '\n';
3172 }
3173 
3174 
3176 {
3178  try {
3179  (*it)->Print(m_Ostr, align);
3180  } catch (...) {
3181  if (m_UnavailableString.empty()) {
3182  throw;
3183  }
3184  /// User provided a string to mark unavailable fields instead of
3185  /// failing
3187  }
3188 
3189  list< CIRef<IFormatter> >::const_iterator i = it;
3190  ++i;
3191  if (i != m_Formatters.end()) {
3192  m_Ostr << '\t';
3193  }
3194  }
3195  m_Scores->UpdateState(align);
3196 
3197  m_Ostr << '\n';
3198 }
3199 
3200 END_SCOPE(ncbi)
3201 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
size_t GetSize(void) const
CBioseq_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
CRef< objects::CSeq_feat > ConvertAlignToAnnot(const objects::CSeq_align &align, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, Int8 gene_id=0, const objects::CSeq_feat *cdregion_on_mrna=NULL)
Convert an alignment to an annotation.
void SetFlags(TFeatureGeneratorFlags)
Definition: gene_model.cpp:195
void SetAllowedUnaligned(TSeqPos)
Definition: gene_model.cpp:215
CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align)
Clean an alignment according to our best guess of its biological representation.
Definition: gene_model.cpp:221
string GetAccession() const
Retrieve the accession for this assembly.
Definition: GC_Assembly.cpp:99
string GetName() const
Retrieve the name of this assembly.
void Find(const CSeq_id_Handle &id, TSequenceList &sequences) const
Find all references to a given sequence within an assembly.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
ostream & AsString(ostream &s) const
Definition: Object_id.cpp:202
void GetLabel(string *label) const
Definition: Prot_ref.cpp:62
const_iterator end() const
Definition: range_coll.hpp:86
const_iterator begin() const
Definition: range_coll.hpp:82
bool Empty() const
Definition: range_coll.hpp:138
CRef –.
Definition: ncbiobj.hpp:618
CRegexp –.
Definition: regexp.hpp:70
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
TSeqPos GetTotalGapCount(TDim row=-1) const
Retrieves the total number of gaps in the given row an alignment; all gaps by default.
Definition: Seq_align.cpp:1550
CRangeCollection< TSeqPos > GetAlignedBases(TDim row) const
Retrieves the locations of aligned bases in the given row, excluding gaps and incontinuities.
Definition: Seq_align.cpp:1796
vector< SIndel > GetNonFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1765
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
TDim CheckNumRows(void) const
Validatiors.
Definition: Seq_align.cpp:73
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
bool GetNamedScore(const string &id, int &score) const
Get score.
Definition: Seq_align.cpp:563
vector< SIndel > GetFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1747
vector< SIndel > GetIndelsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1783
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
Definition: Seq_align.cpp:1993
TSeqPos GetNumGapOpenings(TDim row=-1) const
Retrieves the number of gap openings in a given row in an alignment (ignoring how many gaps are in th...
Definition: Seq_align.cpp:1557
CSeq_loc_Mapper –.
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
objects::CScoreLookup * m_Scores
Definition: tabular_fmt.hpp:73
CTabularFormatter_AlignEnd(int row, bool nominus=false)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
formatter for dumping alignment identifiers
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
CTabularFormatter_AlignStart(int row, bool nominus=false)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
Definition: tabular_fmt.cpp:91
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
Definition: tabular_fmt.cpp:98
void PrintHeader(CNcbiOstream &ostr) const
Definition: tabular_fmt.cpp:79
formatter for dumping any score in an alignment
CTabularFormatter_AnyScore(const string &score_name, const string &col_name)
formatter for dumping any score in an alignment
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
CTabularFormatter_AssemblyInfo(int row, EAssemblyType type, EInfo info)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
CConstRef< objects::CGC_Assembly > m_Gencoll
virtual void SetGencoll(CConstRef< objects::CGC_Assembly > gencoll)
formatter for dumping alignment identifiers
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
TSeqPos x_CalcBiggestGap(const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
formatter for dumping cigar of alignments
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
formatter for dumping content of sequence comment descriptors
CTabularFormatter_Comment(int row, const string &prefix)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
formatter for dumping sequence deflines
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
objects::sequence::CDeflineGenerator generator
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void x_RecurseStrands(const objects::CSeq_align &align, bool &Plus, bool &Minus)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
formatter for Shannon's entropy
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
formatter for dumping exons
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
CTabularFormatter_FixedText(const string &col_name, const string &text)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
CTabularFormatter_Indels(EIndelType indel_type, int coordinate_row)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
CConstRef< objects::CGC_Assembly > m_Gencoll
void PrintHeader(CNcbiOstream &ostr) const
CTabularFormatter_NearestGap(int row, CConstRef< objects::CGC_Assembly > gencoll)
formatter for dumping organism names
std::unique_ptr< objects::CTaxon1 > m_Taxon1
CTabularFormatter_OrgName(int row, EField field=eFullTaxName)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
CConstRef< objects::CGC_Assembly > m_Gencoll
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
CTabularFormatter_PatchType(int row, CConstRef< objects::CGC_Assembly > gencoll)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
CTabularFormatter_PercentId(bool gapped=false)
void PrintHeader(CNcbiOstream &ostr) const
formatter for dumping sequence Prot-refs (protein only)
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
formatter for BLAST seg %
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
CTabularFormatter_SeqId(int row, objects::sequence::EGetIdType id_type, bool tag_only=false, bool protein=false)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
objects::sequence::EGetIdType m_GetIdType
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
formatter for dumping tax-ids
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
Simple tabular alignment formatter This is a replacement for the BLAST tabular formatter that support...
Definition: tabular_fmt.hpp:57
void SetGencoll(CConstRef< objects::CGC_Assembly > gencoll)
void RegisterField(const string &field_name, IFormatter *field_formatter)
Definition: tabular_fmt.hpp:84
objects::CScoreLookup * m_Scores
void Format(const objects::CSeq_align &align)
static void s_RegisterStandardFields(CTabularFormatter &formatter)
void SetFormat(const string &format)
string m_UnavailableString
CTabularFormatter(CNcbiOstream &ostr, objects::CScoreLookup &scores, const string &unavailable_string="")
CNcbiOstream & m_Ostr
list< CIRef< IFormatter > > m_Formatters
Definition: tabular_fmt.hpp:99
TFormatterMap m_FormatterMap
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Definition: User_object.cpp:84
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:71
#define Code
string Offset()
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define TAX_ID_FROM(T, value)
Definition: ncbimisc.hpp:1111
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define Handle
Definition: ncbistd.hpp:119
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & FindName(TEnumValueType value, bool allowBadValue) const
Find name of the enum by its numeric value.
Definition: enumerated.cpp:146
@ eUnknown
Definition: app_popup.hpp:72
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
int EGetIdType
Definition: sequence.hpp:126
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
@ eGetId_ForceGi
return only a gi-based seq-id
Definition: sequence.hpp:99
@ eGetId_HandleDefault
returns the ID associated with a bioseq-handle
Definition: sequence.hpp:104
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
vector< CSeq_id_Handle > TIds
Definition: scope.hpp:143
const CSeqFeatData & GetData(void) const
TMol GetBioseqMolType(void) const
Get some values from core:
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & SetResolveTSE(void)
SetResolveTSE() is equivalent to SetResolveMethod(eResolve_TSE).
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
const CSeq_loc & GetProduct(void) const
TRange GetRange(void) const
Get range for mapped seq-feat's location.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
position_type GetLength(void) const
Definition: range.hpp:158
TThisType & SetLength(position_type length)
Definition: range.hpp:194
bool IsMatch(CTempString str, TMatch flags=fMatch_default)
Check existence substring which match a specified pattern.
Definition: regexp.cpp:193
CTempString GetSub(CTempString str, size_t idx=0) const
Get pattern/subpattern from previous GetMatch().
Definition: regexp.cpp:156
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
bool CanGetSubtype(void) const
Check if it is safe to call GetSubtype method.
Definition: BioSource_.hpp:533
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
const TData & GetData(void) const
Get the Data member data.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TInt GetInt(void) const
Get the variant data.
const TType & GetType(void) const
Get the Type member data.
TUnit & SetUnit(void)
Select the variant.
const TProtpos & GetProtpos(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_align_.hpp:976
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
bool IsSetId(void) const
alignment id Check if a value has been assigned to Id data member.
Definition: Seq_align_.hpp:964
bool IsSetExt(void) const
extra info Check if a value has been assigned to Ext data member.
Definition: Seq_align_.hpp:989
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
list< CRef< CObject_id > > TId
Definition: Seq_align_.hpp:401
list< CRef< CUser_object > > TExt
Definition: Seq_align_.hpp:402
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
Definition: Seq_align_.hpp:915
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
bool IsDisc(void) const
Check if variant Disc is selected.
Definition: Seq_align_.hpp:772
const TExt & GetExt(void) const
Get the Ext member data.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
Definition: Seq_align_.hpp:778
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
list< CRef< CSeq_align > > Tdata
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
TNucpos GetNucpos(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
bool IsGeneral(void) const
Check if variant General is selected.
Definition: Seq_id_.hpp:877
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
TTech GetTech(void) const
Get the Tech member data.
Definition: MolInfo_.hpp:497
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
Definition: MolInfo_.hpp:472
bool CanGetTech(void) const
Check if it is safe to call GetTech method.
Definition: MolInfo_.hpp:478
list< CRef< CDelta_seq > > Tdata
Definition: Delta_ext_.hpp:89
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
@ e_Comment
a more extensive comment
Definition: Seqdesc_.hpp:117
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
const int infinity
Definition: nucprot.cpp:52
int i
static void text(MDB_val *v)
Definition: mdb_dump.c:62
static MDB_envinfo info
Definition: mdb_load.c:37
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
#define abs(a)
Definition: ncbi_heapmgr.c:130
T max(T x_, T y_)
T min(T x_, T y_)
static Format format
Definition: njn_ioutil.cpp:53
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static const char * prefix[]
Definition: pcregrep.c:405
#define row(bind, expected)
Definition: string_bind.c:73
SAnnotSelector –.
static string subject
static string query
Definition: type.c:6
USING_SCOPE(objects)
static void s_Split(const string &format, const string &separators, vector< string > &toks)
Split a string, but ignore separators within parentheses.
TSeqPos s_FindGaps(const CGC_Assembly &Assembly, const CSeq_id &Id, const TSeqPos Offset, list< TSeqRange > &Gaps)
void s_AlignToSeqRanges(const CSeq_align &align, int row, list< TSeqRange > &ranges)
static string s_CodonVariation(const CSeq_align &align, TSeqPos pos, CScope &scope, int row)
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
const value_slice::CValueConvert< value_slice::SRunTimeCP, FROM > Convert(const FROM &value)
Modified on Thu May 23 12:25:01 2024 by modify_doxy.py rev. 669887