NCBI C++ ToolKit
tabular_fmt.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: tabular_fmt.cpp 100426 2023-07-31 13:45:22Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio, Wratko Hlavina, Eyal Mozes
27  *
28  * File Description:
29  * Sample for the command-line arguments' processing ("ncbiargs.[ch]pp"):
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 
35 #include <util/xregexp/regexp.hpp>
36 #include <util/range_coll.hpp>
37 #include <objmgr/feat_ci.hpp>
38 #include <objmgr/seqdesc_ci.hpp>
46 
47 #include <objmgr/seqdesc_ci.hpp>
51 #include <objects/seq/MolInfo.hpp>
57 
59 
63 
64 #include <util/value_convert.hpp>
65 
66 #include <limits>
67 
70 
71 
72 /////////////////////////////////////////////////////////////////////////////
73 
75 : m_Row(row)
76 {
77 }
78 
80 {
81  if (m_Row == 0) {
82  ostr << "query";
83  } else if (m_Row == 1) {
84  ostr << "subject";
85  } else {
87  "only pairwise alignments are supported");
88  }
89 }
90 
92 {
93  ostr << "All ";
94  PrintHeader(ostr);
95  ostr << " Seq-id(s), separated by a ';'";
96 }
97 
99  const CSeq_align& align)
100 {
101  CSeq_id_Handle idh =
103  CScope::TIds ids = m_Scores->GetScope().GetIds(idh);
104  ITERATE (CScope::TIds, it, ids) {
105  ostr << *it;
106  CScope::TIds::const_iterator i = it;
107  ++i;
108  if (i != ids.end()) {
109  ostr << ';';
110  }
111  }
112 }
113 
114 /////////////////////////////////////////////////////////////////////////////
115 
117  sequence::EGetIdType id_type,
118  bool tag_only,
119  bool protein)
120 : m_Row(row)
121 , m_GetIdType(id_type)
122 , m_TagOnly(tag_only)
123 , m_Protein(protein)
124 {
125 }
126 
128 {
129  PrintHeader(ostr);
130  switch (m_GetIdType) {
132  ostr << " accession.version";
133  break;
134 
136  ostr << " GI";
137  break;
138 
140  ostr << " id as it appears in alignment";
141  break;
142 
143  default:
144  NCBI_THROW(CException, eUnknown, "Unimplemented seq-id type");
145  }
146  if (m_TagOnly) {
147  ostr << "; tag only for gnl seq-ids";
148  }
149 
150 }
151 
153 {
154  if (m_Protein) {
155  ostr << "protein";
156  } else if (m_Row == 0) {
157  ostr << "query";
158  } else if (m_Row == 1) {
159  ostr << "subject";
160  } else {
162  "only pairwise alignments are supported");
163  }
165  ostr << " gi";
166  }
167 }
168 
170  const CSeq_align& align)
171 {
172  CSeq_id_Handle idh =
174  if (m_Protein) {
175  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(idh);
176  if (!bsh) {
177  ostr << "NA";
178  return;
179  }
180  CFeat_CI feat_iter(bsh, CSeqFeatData::e_Cdregion);
181  if (!feat_iter) {
182  ostr << "NA";
183  return;
184  }
185  idh = CSeq_id_Handle::GetHandle(*feat_iter->GetProduct().GetId());
186  }
187  CSeq_id_Handle best =
188  sequence::GetId(idh, m_Scores->GetScope(), m_GetIdType);
189  if ( !best ) {
190  best = idh;
191  }
192  if (m_TagOnly) {
193  if (best.GetSeqId()->IsGeneral()) {
194  best.GetSeqId()->GetGeneral().GetTag().AsString(ostr);
195  } else {
196  string acc;
197  best.GetSeqId()->GetLabel(&acc, CSeq_id::eContent);
198  ostr << acc;
199  }
200  } else {
201  ostr << best;
202  }
203 }
204 
205 /////////////////////////////////////////////////////////////////////////////
206 
208 : m_Row(row), m_NoMinus(nominus)
209 {
210 }
211 
213 {
214  ostr << "Start of alignment in ";
215  if (m_Row == 0) {
216  ostr << "query";
217  } else if (m_Row == 1) {
218  ostr << "subject";
219  } else {
221  "only pairwise alignments are supported");
222  }
223 }
224 
226 {
227  if (m_Row == 0) {
228  ostr << "qstart";
229  } else if (m_Row == 1) {
230  ostr << "sstart";
231  } else {
233  "only pairwise alignments are supported");
234  }
235 }
236 
238  const CSeq_align& align)
239 {
240  // determine global flip status
241 
242  if(m_NoMinus) {
243  ostr << align.GetSeqStart(m_Row) + 1;
244  return;
245  }
246 
247  if (m_Row == 0) {
248  TSeqRange r = align.GetSeqRange(m_Row);
249  ostr << min(r.GetFrom(), r.GetTo()) + 1;
250  } else {
251  TSeqPos start = align.GetSeqStart(m_Row);
252  TSeqPos stop = align.GetSeqStop(m_Row);
253 
254  bool qneg = (align.GetSeqStrand(0) == eNa_strand_minus);
255  bool sneg = (align.GetSeqStrand(1) == eNa_strand_minus);
256 
257  if (qneg) {
258  sneg = !sneg;
259  }
260  if (sneg) {
261  std::swap(start, stop);
262  }
263 
264  ostr << start + 1;
265  }
266 }
267 
268 
269 /////////////////////////////////////////////////////////////////////////////
270 
272 : m_Row(row), m_NoMinus(nominus)
273 {
274 }
275 
277 {
278  ostr << "End of alignment in ";
279  if (m_Row == 0) {
280  ostr << "query";
281  } else if (m_Row == 1) {
282  ostr << "subject";
283  } else {
285  "only pairwise alignments are supported");
286  }
287 }
288 
290 {
291  if (m_Row == 0) {
292  ostr << "qend";
293  } else if (m_Row == 1) {
294  ostr << "send";
295  } else {
297  "only pairwise alignments are supported");
298  }
299 }
300 
302  const CSeq_align& align)
303 {
304  if(m_NoMinus) {
305  ostr << align.GetSeqStop(m_Row) + 1;
306  return;
307  }
308 
309  if (m_Row == 0) {
310  TSeqRange r = align.GetSeqRange(m_Row);
311  ostr << max(r.GetFrom(), r.GetTo()) + 1;
312  } else {
313  TSeqPos start = align.GetSeqStart(m_Row);
314  TSeqPos stop = align.GetSeqStop(m_Row);
315 
316  bool qneg = (align.GetSeqStrand(0) == eNa_strand_minus);
317  bool sneg = (align.GetSeqStrand(1) == eNa_strand_minus);
318 
319  if (qneg) {
320  sneg = !sneg;
321  }
322  if (sneg) {
323  std::swap(start, stop);
324  }
325 
326  ostr << stop + 1;
327  }
328 }
329 
330 /////////////////////////////////////////////////////////////////////////////
331 
333 : m_Row(row)
334 {
335 }
336 
338 {
339  ostr << "Strand of alignment in ";
340  if (m_Row == 0) {
341  ostr << "query";
342  } else if (m_Row == 1) {
343  ostr << "subject";
344  } else {
346  "only pairwise alignments are supported");
347  }
348 }
349 
351 {
352  if (m_Row == 0) {
353  ostr << "qstrand";
354  } else if (m_Row == 1) {
355  ostr << "sstrand";
356  } else {
358  "only pairwise alignments are supported");
359  }
360 }
361 
363  const CSeq_align& align)
364 {
365  switch (align.GetSeqStrand(m_Row)) {
366  case eNa_strand_plus:
367  ostr << '+';
368  break;
369 
370  case eNa_strand_minus:
371  ostr << '-';
372  break;
373 
374  case eNa_strand_both:
375  ostr << 'b';
376  break;
377 
378  default:
379  ostr << '?';
380  break;
381  }
382 }
383 
384 /////////////////////////////////////////////////////////////////////////////
385 
387 : m_Row(row)
388 {
389 }
390 
392 {
393  ostr << "Length of ";
394  if (m_Row == 0) {
395  ostr << "query";
396  } else if (m_Row == 1) {
397  ostr << "subject";
398  } else {
400  "only pairwise alignments are supported");
401  }
402  ostr << " sequence";
403 }
404 
406 {
407  if (m_Row == 0) {
408  ostr << "qlen";
409  } else if (m_Row == 1) {
410  ostr << "slen";
411  } else {
413  "only pairwise alignments are supported");
414  }
415 }
416 
418  const CSeq_align& align)
419 {
420  double score =
421  m_Scores->GetScore(align,
422  m_Row == 0 ? "query_length" : "subject_length");
423  if (score == numeric_limits<double>::quiet_NaN()) {
424  score = 0;
425  }
426  ostr << (int) score;
427 }
428 
429 
430 /////////////////////////////////////////////////////////////////////////////
431 
433 {
434  ostr << "Alignment length";
435 }
436 
438 {
439  ostr << "length";
440 }
441 
443  const CSeq_align& align)
444 {
445  ostr << (int)m_Scores->GetScore(align, "align_length");
446 }
447 
448 /////////////////////////////////////////////////////////////////////////////
449 
451 {
452  ostr << "Alignment length not counting gaps";
453 }
454 
456 {
457  ostr << "length_ungap";
458 }
459 
461  const CSeq_align& align)
462 {
463  ostr << (int)m_Scores->GetScore(align, "align_length_ungap");
464 }
465 
466 //////////////////////////////////////////////////////////////////////////////
467 
469 : m_Gapped(gapped)
470 {
471 }
472 
474 {
475  ostr << "Percentage of identical matches";
476  if (!m_Gapped) {
477  ostr << " excluding gaps on either row";
478  }
479 }
480 
482 {
483  ostr << "pident";
484  if (m_Gapped) {
485  ostr << "(gapped)";
486  } else {
487  ostr << "(ungapped)";
488  }
489 }
490 
492  const CSeq_align& align)
493 {
494  double pct_id = m_Scores->GetScore(align,
495  m_Gapped ? "pct_identity_gap"
496  : "pct_identity_ungap");
497  if (pct_id != 100) {
498  pct_id = min(pct_id, 99.99);
499  }
500  ostr << pct_id;
501 }
502 
503 /////////////////////////////////////////////////////////////////////////////
504 
506 {
507  ostr << (m_Row == 0 ? "Percent coverage of query in subject"
508  : "Percent coverage of subject in query");
509 }
510 
512 {
513  ostr << m_Header;
514 }
515 
517  const CSeq_align& align)
518 {
519  double pct_cov = m_Scores->GetScore(align, m_Row == 0
520  ? "pct_coverage" : "subject_coverage");
521  if (pct_cov != 100) {
522  pct_cov = min(pct_cov, 99.99);
523  }
524  ostr << pct_cov;
525 }
526 
527 /////////////////////////////////////////////////////////////////////////////
528 
530 {
531  ostr << "Number of gap openings";
532 }
533 
535 {
536  ostr << "gapopen";
537 }
539  const CSeq_align& align)
540 {
541  ostr << align.GetNumGapOpenings();
542 }
543 
544 /////////////////////////////////////////////////////////////////////////////
545 
547 {
548  ostr << "Number of identical matches";
549 }
551 {
552  ostr << "identities";
553 }
555  const CSeq_align& align)
556 {
557  ostr << (int)m_Scores->GetScore(align, "num_ident");
558 }
559 
560 /////////////////////////////////////////////////////////////////////////////
561 
563 {
564  ostr << "Number of mismatches";
565 }
567 {
568  ostr << "mismatch";
569 }
571  const CSeq_align& align)
572 {
573  ostr << (int)m_Scores->GetScore(align, "num_mismatch");
574 }
575 
576 /////////////////////////////////////////////////////////////////////////////
577 
578 void s_AlignToSeqRanges(const CSeq_align& align, int row, list<TSeqRange>& ranges)
579 {
580  // this should be added to CSeq_align as a list instead of RangeColl version of GetAlignedBases
581  switch (align.GetSegs().Which()) {
583  {{
584  const CDense_seg& ds = align.GetSegs().GetDenseg();
585  for (CDense_seg::TNumseg i = 0; i < ds.GetNumseg(); ++i) {
586  bool is_gapped = false;
587  for (CDense_seg::TDim j = 0; j < ds.GetDim(); ++j) {
588  if (ds.GetStarts()[i * ds.GetDim() + j] == -1)
589  {
590  is_gapped = true;
591  break;
592  }
593  }
594  if (!is_gapped) {
595  TSignedSeqPos start = ds.GetStarts()[i * ds.GetDim() + row];
597  range.SetFrom(start);
598  range.SetLength(ds.GetLens()[i]);
599  ranges.push_back(range);
600  }
601  }
602  }}
603  break;
605  {{
607  align.GetSegs().GetDisc().Get()) {
608  s_AlignToSeqRanges(*(*iter), row, ranges);
609  }
610  }}
611  break;
612  default:
613  NCBI_THROW(CSeqalignException, eUnsupported,
614  "smismatchpos and qmismatchpos currently do not handle "
615  "this type of alignment.");
616  }
617 }
618 
620  : m_Row(row)
621 {
622 }
623 
625 {
626  ostr << "Positions of aligned mismatches, comma seperated";
627 }
629 {
630  if(m_Row == 0) {
631  ostr << "qmismatchpos";
632  } else if(m_Row == 1) {
633  ostr << "smismatchpos";
634  } else {
636  "only pairwise alignments are supported");
637  }
638 }
640  const CSeq_align& align)
641 {
642  ENa_strand QStrand = align.GetSeqStrand(0);
643  ENa_strand SStrand = align.GetSeqStrand(1);
644  vector<TSeqPos> mm_pos;
645 
646  if (align.GetSegs().IsSpliced()) {
647  /// Special handling for Spliced-seg, since mismatch location is already in
648  /// the alignment
649  if (align.GetSegs().GetSpliced().GetProduct_type() ==
651  {
652  NCBI_THROW(CException, eUnknown, "smismatchpos and qmismatchpos not "
653  "supported for protein alignments");
654  }
655 
656  for (const CRef<CSpliced_exon> &exon : align.GetSegs().GetSpliced().GetExons()) {
657  if (!exon->IsSetParts()) {
658  continue;
659  }
660  ENa_strand exon_qstrand = exon->IsSetProduct_strand()
661  ? exon->GetProduct_strand() : QStrand;
662  ENa_strand exon_sstrand = exon->IsSetGenomic_strand()
663  ? exon->GetGenomic_strand() : SStrand;
664  ENa_strand strand = m_Row == 0 ? exon_qstrand : exon_sstrand;
665  TSeqPos qpos = exon_qstrand == eNa_strand_plus ? exon->GetProduct_start().GetNucpos()
666  : exon->GetProduct_end().GetNucpos();
667  TSeqPos spos = exon_sstrand == eNa_strand_plus ? exon->GetGenomic_start()
668  : exon->GetGenomic_end();
669  TSeqPos pos = m_Row == 0 ? qpos : spos;
670  int direction = strand == eNa_strand_plus ? 1 : -1;
671  for (const CRef<CSpliced_exon_chunk> &part : exon->GetParts()) {
672  switch (part->Which()) {
674  pos += direction * part->GetMatch();
675  break;
676 
678  for (unsigned i = 0; i < part->GetMismatch(); ++i) {
679  mm_pos.push_back(pos);
680  pos += direction;
681  }
682  break;
683 
685  if (m_Row == 0) {
686  pos += direction * part->GetProduct_ins();
687  }
688  break;
689 
691  if (m_Row == 1) {
692  pos += direction * part->GetGenomic_ins();
693  }
694  break;
695 
696  default:
697  NCBI_THROW(CException, eUnknown, "smismatchpos and qmismatchpos not "
698  "supported for alignments with diag");
699  }
700  }
701  }
702  } else {
703 
704  TSeqRange QAlignRange, SAlignRange;
705  QAlignRange = align.GetSeqRange(0);
706  SAlignRange = align.GetSeqRange(1);
707 
708  string QueryStr, SubjtStr;
709  {{
710  CBioseq_Handle QueryH, SubjtH;
711  QueryH = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(0));
712  SubjtH = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(1));
713 
714  CSeqVector QueryVec(QueryH, CBioseq_Handle::eCoding_Iupac, QStrand);
715  CSeqVector SubjtVec(SubjtH, CBioseq_Handle::eCoding_Iupac, SStrand);
716 
717  if(QStrand == eNa_strand_plus)
718  QueryVec.GetSeqData(QAlignRange.GetFrom(), QAlignRange.GetTo()+1, QueryStr);
719  else if(QStrand == eNa_strand_minus)
720  QueryVec.GetSeqData(QueryVec.size()-QAlignRange.GetTo()-1,
721  QueryVec.size()-QAlignRange.GetFrom(), QueryStr);
722 
723  if(SStrand == eNa_strand_plus)
724  SubjtVec.GetSeqData(SAlignRange.GetFrom(), SAlignRange.GetTo()+1, SubjtStr);
725  else if(SStrand == eNa_strand_minus)
726  SubjtVec.GetSeqData(SubjtVec.size()-SAlignRange.GetTo()-1,
727  SubjtVec.size()-SAlignRange.GetFrom(), SubjtStr);
728 
729  string QS = QueryStr.substr(0,50);
730  string SS = SubjtStr.substr(0,50);
731  }}
732 
733  list<TSeqRange> QSegRanges, SSegRanges;
734  s_AlignToSeqRanges(align, 0, QSegRanges);
735  s_AlignToSeqRanges(align, 1, SSegRanges);
736 
737 
738  // loop segments
739  list<TSeqRange>::const_iterator SSegIter = SSegRanges.begin();
740  ITERATE(list<TSeqRange>, QSegIter, QSegRanges) {
741  TSeqRange QuerySeg = *QSegIter;
742  TSeqRange SubjtSeg = *SSegIter;
743 
744  if(QuerySeg.GetLength() != SubjtSeg.GetLength()) {
745  NCBI_THROW(CException, eUnknown, "mismatched segment sizes?");
746  }
747 
748  size_t QOffset, SOffset;
749  {{
750  size_t QPOffset = QuerySeg.GetFrom()-QAlignRange.GetFrom();
751  size_t QMOffset = QAlignRange.GetTo()-QuerySeg.GetTo();
752  QOffset = (QStrand == eNa_strand_plus ? QPOffset : QMOffset);
753 
754  size_t SPOffset = SubjtSeg.GetFrom()-SAlignRange.GetFrom();
755  size_t SMOffset = SAlignRange.GetTo()-SubjtSeg.GetTo();
756  SOffset = (SStrand == eNa_strand_plus ? SPOffset : SMOffset);
757  }}
758 
759  // find locations
760  for(unsigned Loop = 0; Loop < QuerySeg.GetLength(); Loop++) {
761  size_t QLoop = QOffset+Loop;
762  size_t SLoop = SOffset+Loop;
763 
764  //if(Loop < 6 || Loop+6 > QuerySeg.GetLength())
765  // cerr << "L: " << Loop << " " << QLoop << " " << SLoop << " : "
766  // << QueryStr[QLoop] << " == " << SubjtStr[SLoop] << endl;
767 
768 
769  if(QueryStr[QLoop] == SubjtStr[SLoop]) {
770  ;
771  } else {
772  if(m_Row == 0)
773  mm_pos.push_back(QStrand == eNa_strand_plus
774  ? QuerySeg.GetFrom()+Loop : QuerySeg.GetTo()-Loop);
775  else if(m_Row == 1)
776  mm_pos.push_back(SStrand == eNa_strand_plus
777  ? SubjtSeg.GetFrom()+Loop : SubjtSeg.GetTo()-Loop);
778  }
779  }
780 
781  ++SSegIter;
782  }
783  }
784 
785  sort(mm_pos.begin(), mm_pos.end());
786  ITERATE(vector<TSeqPos>, it, mm_pos) {
787  if (it != mm_pos.begin()) {
788  ostr << ',';
789  }
790  ostr << *it +1;
791  }
792 }
793 
794 /////////////////////////////////////////////////////////////////////////////
795 
797  : m_Row(row)
798 {
799 }
800 
802 {
803  ostr << "Positions of gapped, unaligned, segments, comma seperated";
804 }
806 {
807  if(m_Row == 0) {
808  ostr << "qgapranges";
809  } else if(m_Row == 1) {
810  ostr << "sgapranges";
811  } else {
813  "only pairwise alignments are supported");
814  }
815 }
817  const CSeq_align& align)
818 {
820  CRangeCollection<TSeqPos> GappedRC;
821  GappedRC += align.GetSeqRange(m_Row);
822  GappedRC -= AlignedRC;
823 
824  //vector<TSeqPos> mm_pos;
825  //sort(mm_pos.begin(), mm_pos.end());
826  ITERATE(CRangeCollection<TSeqPos>, it, GappedRC) {
827  if (it != GappedRC.begin()) {
828  ostr << ',';
829  }
830  ostr << it->GetFrom()+1 << "-" << it->GetTo()+1;
831  }
832 }
833 
834 /////////////////////////////////////////////////////////////////////////////
835 
837 {
838  ostr << "Total number of gaps";
839 }
841 {
842  ostr << "gaps";
843 }
845  const CSeq_align& align)
846 {
847  ostr << align.GetTotalGapCount();
848 }
849 
850 /////////////////////////////////////////////////////////////////////////////
851 
853 {
854  ostr << "Expect value";
855 }
857 {
858  ostr << "evalue";
859 }
861  const CSeq_align& align)
862 {
863  double score = m_Scores->GetScore(align, "e_value");
864  if (score == numeric_limits<double>::infinity() ||
865  score == numeric_limits<double>::quiet_NaN()) {
866  score = 0;
867  }
868  if (score > 1e26) {
869  score = 0;
870  }
871  if (score < -1e26) {
872  score = 0;
873  }
874 
875  //get the current flags
876  ios_base::fmtflags cur_flags=ostr.flags();
877 
878  //print using scientific
879  ostr << scientific << score;
880 
881  //unset scientific
882  ostr.unsetf(ios_base::scientific);
883 
884  //reset to original flags
885  ostr << setiosflags(cur_flags);
886 }
887 
888 
889 /////////////////////////////////////////////////////////////////////////////
890 
892 {
893  ostr << "Expect value in mantissa format";
894 }
896 {
897  ostr << "evalue_mantissa";
898 }
900  const CSeq_align& align)
901 {
902  double score = 0;
903  if ( !align.GetNamedScore(CSeq_align::eScore_EValue, score) ) {
904  score = m_Scores->GetScore(align, "e_value");
905  }
906  if (score == numeric_limits<double>::infinity() ||
907  score == numeric_limits<double>::quiet_NaN()) {
908  score = 0;
909  }
910  if (score > 1e26) {
911  score = 0;
912  }
913  if (score < -1e26) {
914  score = 0;
915  }
916 
917  double mantissa = score;
918  int exponent = 0;
919 
920  if(score > 0.0) {
921  while(mantissa >= 10.0) {
922  mantissa /= 10.0;
923  exponent++;
924  }
925  while(mantissa < 1.0) {
926  mantissa *= 10.0;
927  exponent--;
928  }
929  } else if(score < 0.0) {
930  while(mantissa <= -10.0) {
931  mantissa /= 10.0;
932  exponent--;
933  }
934  while(mantissa > -1.0) {
935  mantissa *= 10.0;
936  exponent++;
937  }
938  }
939 
940  ostr << mantissa;
941 }
942 
943 
944 /////////////////////////////////////////////////////////////////////////////
945 
947 {
948  ostr << "Expect value in exponent format";
949 }
951 {
952  ostr << "evalue_exponent";
953 }
955  const CSeq_align& align)
956 {
957  double score = 0;
958  if ( !align.GetNamedScore(CSeq_align::eScore_EValue, score) ) {
959  score = m_Scores->GetScore(align, "e_value");
960  }
961  if (score == numeric_limits<double>::infinity() ||
962  score == numeric_limits<double>::quiet_NaN()) {
963  score = 0;
964  }
965  if (score > 1e26) {
966  score = 0;
967  }
968  if (score < -1e26) {
969  score = 0;
970  }
971 
972  double mantissa = score;
973  int exponent = 0;
974 
975 
976  if(score > 0.0) {
977  while(mantissa >= 10.0) {
978  mantissa /= 10.0;
979  exponent++;
980  }
981  while(mantissa < 1.0) {
982  mantissa *= 10.0;
983  exponent--;
984  }
985  } else if(score < 0.0) {
986  while(mantissa <= -10.0) {
987  mantissa /= 10.0;
988  exponent--;
989  }
990  while(mantissa > -1.0) {
991  mantissa *= 10.0;
992  exponent++;
993  }
994  }
995 
996  ostr << exponent;
997 }
998 
999 
1000 
1001 
1002 
1003 /////////////////////////////////////////////////////////////////////////////
1004 
1006 {
1007  ostr << "Bit score";
1008 }
1010 {
1011  ostr << "bitscore";
1012 }
1014  const CSeq_align& align)
1015 {
1016  double score = m_Scores->GetScore(align, "bit_score");
1017  ostr << score;
1018 }
1019 
1020 /////////////////////////////////////////////////////////////////////////////
1021 
1023 {
1024  ostr << "Raw score";
1025 }
1027 {
1028  ostr << "score";
1029 }
1031  const CSeq_align& align)
1032 {
1033  double score = m_Scores->GetScore(align, "score");
1034  ostr << score;
1035 }
1036 
1037 /////////////////////////////////////////////////////////////////////////////
1038 
1039 /// formatter for dumping any score in an alignment
1041  const string& col_name)
1042  : m_ScoreName(score_name)
1043  , m_ColName(col_name)
1044 {
1045 }
1046 
1047 
1049 {
1050  ostr << m_Scores->HelpText(m_ScoreName);
1051 }
1052 
1054 {
1055  ostr << m_ScoreName;
1056 }
1057 
1058 
1060  const CSeq_align& align)
1061 {
1062  double score_d=0.0;
1063  int score_i = 0;
1064  bool is_int = m_Scores->IsIntegerScore(align, m_ScoreName);
1065  try {
1066  if(is_int)
1067  score_i = (int)m_Scores->GetScore(align, m_ScoreName);
1068  else
1069  score_d = m_Scores->GetScore(align, m_ScoreName);
1070  } catch (CException &) {
1071  score_d = 0;
1072  score_i = 0;
1073  }
1074  if(is_int)
1075  ostr << score_i;
1076  else
1077  ostr << score_d;
1078 }
1079 
1080 
1081 /////////////////////////////////////////////////////////////////////////////
1082 
1084 {
1085  ostr << "Entropy value for the "
1086  << (m_Row == 0 ? "query " : "subject ")
1087  << "sequence";
1088 }
1089 
1090 
1092 {
1093  ostr
1094  << (m_Row == 0 ? "query_" : "subject_")
1095  << "entropy";
1096 }
1097 
1098 
1100  const objects::CSeq_align& align)
1101 {
1102  string score_name =
1103  (m_Row == 0 ? "query_" : "subject_") +
1104  string("entropy");
1105  double val = 0;
1106  if (m_Scores) {
1107  val = m_Scores->GetScore(align, score_name);
1108  }
1109  ostr << val;
1110 }
1111 
1112 
1113 /////////////////////////////////////////////////////////////////////////////
1114 
1116 {
1117  ostr << "Entropy value for the "
1118  << (m_Row == 0 ? "query " : "subject ")
1119  << "sequence";
1120 }
1121 
1122 
1124 {
1125  ostr
1126  << (m_Row == 0 ? "query_" : "subject_")
1127  << "seg_pct";
1128 }
1129 
1130 
1132  const objects::CSeq_align& align)
1133 {
1134  string score_name =
1135  (m_Row == 0 ? "query_" : "subject_") +
1136  string("seg_pct");
1137  double val = 0;
1138  if (m_Scores) {
1139  val = m_Scores->GetScore(align, score_name);
1140  }
1141  ostr << val;
1142 }
1143 
1144 
1145 /////////////////////////////////////////////////////////////////////////////
1146 
1148  : m_Row(row)
1149 {
1150 }
1151 
1152 
1154 {
1155  ostr << "Defline of the ";
1156  if (m_Row == 0) {
1157  ostr << "query";
1158  } else if (m_Row == 1) {
1159  ostr << "subject";
1160  } else {
1162  "only pairwise alignments are supported");
1163  }
1164  ostr << " sequence";
1165 }
1166 
1167 
1169 {
1170  if (m_Row == 0) {
1171  ostr << "qdefline";
1172  } else if (m_Row == 1) {
1173  ostr << "sdefline";
1174  } else {
1176  "only pairwise alignments are supported");
1177  }
1178 }
1179 
1180 
1182  const CSeq_align& align)
1183 {
1184  if (m_Row >= align.CheckNumRows()) {
1186  "indexing past the end of available "
1187  "sequences in an alignment");
1188  }
1189 
1191  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(idh);
1192  if (bsh) {
1193  ostr << generator.GenerateDefline(bsh);
1194  }
1195 }
1196 
1197 /////////////////////////////////////////////////////////////////////////////
1198 
1200 {
1201  ostr << "Alignment ids";
1202 }
1203 
1204 
1206 {
1207  ostr << "align_ids";
1208 }
1209 
1210 
1212  const CSeq_align& align)
1213 {
1214  if (align.IsSetId()) {
1215  bool first = true;
1216  ITERATE (CSeq_align::TId, it, align.GetId()) {
1217  if ( !first ) {
1218  ostr << ',';
1219  }
1220  if ((*it)->IsId()) {
1221  ostr << (*it)->GetId();
1222  }
1223  else if ((*it)->IsStr()) {
1224  ostr << (*it)->GetStr();
1225  }
1226  }
1227  }
1228 }
1229 
1230 /////////////////////////////////////////////////////////////////////////////
1231 
1233 {
1234  ostr << "best_placement group id";
1235 }
1236 
1238 {
1239  ostr << "best_placement_group";
1240 }
1241 
1242 
1244  const CSeq_align& align)
1245 {
1246  if (align.IsSetExt()) {
1247  ITERATE (CSeq_align::TExt, i, align.GetExt()) {
1248  const CUser_object& obj = **i;
1249  if (!obj.GetType().IsStr() ||
1250  obj.GetType().GetStr() != "placement_data") {
1251  continue;
1252  }
1253 
1254  CConstRef<CUser_field> f = obj.GetFieldRef("placement_id");
1255  if (f) {
1256  ostr << f->GetData().GetStr();
1257  break;
1258  }
1259  }
1260  }
1261 }
1262 
1263 /////////////////////////////////////////////////////////////////////////////
1264 
1266  : m_Row(row)
1267 {
1268 }
1269 
1270 
1272 {
1273  ostr << "Prot-ref of the ";
1274  if (m_Row == 0) {
1275  ostr << "query";
1276  } else if (m_Row == 1) {
1277  ostr << "subject";
1278  } else {
1280  "only pairwise alignments are supported");
1281  }
1282  ostr << " sequence";
1283 }
1284 
1285 
1287 {
1288  if (m_Row == 0) {
1289  ostr << "qprotref";
1290  } else if (m_Row == 1) {
1291  ostr << "sprotref";
1292  } else {
1294  "only pairwise alignments are supported");
1295  }
1296 }
1297 
1298 
1300  const CSeq_align& align)
1301 {
1302  if (m_Row >= align.CheckNumRows()) {
1304  "indexing past the end of available "
1305  "sequences in an alignment");
1306  }
1307 
1309  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(idh);
1310  if (bsh) {
1311  SAnnotSelector sel;
1312  sel.SetResolveTSE()
1314  CFeat_CI feat_iter(bsh, sel);
1315  if (feat_iter.GetSize() == 1) {
1316  const CProt_ref& ref = feat_iter->GetData().GetProt();
1317  string s;
1318  ref.GetLabel(&s);
1319  ostr << s;
1320  }
1321  }
1322 }
1323 
1324 /////////////////////////////////////////////////////////////////////////////
1325 
1326 
1328 {
1329  ostr << "Dump the ";
1330  switch (m_Interval) {
1331  case e_Exons:
1332  ostr << "exon";
1333  break;
1334 
1335  case e_Introns:
1336  ostr << (m_Sequence == 0 ? "unaligned segment" : "intron");
1337  break;
1338  }
1339 
1340  switch (m_Info) {
1341  case e_Range:
1342  ostr << " structure";
1343  break;
1344 
1345  case e_Length:
1346  ostr << " lengths";
1347  break;
1348  }
1349 
1350  if (m_Sequence == 0) {
1351  ostr << " for the query sequence";
1352  }
1353 
1354  ostr << " of a Spliced-seg alignment";
1355 }
1356 
1358 {
1359  if (m_Sequence == 0) {
1360  ostr << "query_";
1361  }
1362  switch (m_Interval) {
1363  case e_Exons:
1364  switch (m_Info) {
1365  case e_Range:
1366  ostr << "exons";
1367  break;
1368 
1369  case e_Length:
1370  ostr << "exon_len";
1371  break;
1372  }
1373  break;
1374 
1375  case e_Introns:
1376  switch (m_Info) {
1377  case e_Range:
1378  ostr << (m_Sequence == 0 ? "unaligned" : "introns");
1379  break;
1380 
1381  case e_Length:
1382  ostr << (m_Sequence == 0 ? "unaligned_len" : "intron_len");
1383  break;
1384  }
1385  break;
1386 
1387  }
1388 }
1389 
1391  const CSeq_align& align)
1392 {
1393  if (align.GetSegs().IsSpliced()) {
1394  bool is_protein = m_Sequence == 0 &&
1395  align.GetSegs().GetSpliced().GetProduct_type() ==
1397  if (is_protein && (m_Interval == e_Introns || m_Info == e_Length)) {
1398  CNcbiOstrstream column_name;
1399  PrintHeader(column_name);
1401  string(CNcbiOstrstreamToString(column_name))
1402  + " not supported for protein alignments");
1403  }
1404 
1405  typedef pair<const CProt_pos*, const CProt_pos*> TProteinExon;
1406  vector<TProteinExon> protein_exons;
1407  vector<TSeqRange> nuc_exons;
1408 
1409  CRangeCollection<TSeqPos> intron_ranges;
1410  if (m_Interval == e_Introns) {
1411  TSeqRange align_range = align.GetSeqRange(m_Sequence);
1412  align_range.SetFrom(align_range.GetFrom()+1);
1413  align_range.SetTo(align_range.GetTo()+1);
1414  intron_ranges += align_range;
1415  }
1416 
1418  align.GetSegs().GetSpliced().GetExons()) {
1419  const CSpliced_exon& exon = **it;
1420  TSeqRange exon_range;
1421  if (is_protein) {
1422  protein_exons.push_back(
1423  TProteinExon(
1424  &exon.GetProduct_start().GetProtpos(),
1425  &exon.GetProduct_end().GetProtpos()));
1426  } else if (m_Sequence == 1) {
1427  exon_range.SetFrom(exon.GetGenomic_start()+1);
1428  exon_range.SetTo(exon.GetGenomic_end()+1);
1429  } else {
1430  exon_range.SetFrom(exon.GetProduct_start().GetNucpos()+1);
1431  exon_range.SetTo(exon.GetProduct_end().GetNucpos()+1);
1432  }
1433  switch (m_Interval) {
1434  case e_Exons:
1435  nuc_exons.push_back(exon_range);
1436  break;
1437 
1438  case e_Introns:
1439  intron_ranges -= exon_range;
1440  break;
1441  }
1442  }
1443  list<TSeqRange> range_list;
1444  if (!nuc_exons.empty()) {
1445  range_list.insert(range_list.end(), nuc_exons.begin(),
1446  nuc_exons.end());
1447  } else if (!intron_ranges.Empty()) {
1448  range_list.insert(range_list.end(), intron_ranges.begin(),
1449  intron_ranges.end());
1450  if(m_Sequence == 1 &&
1451  (align.GetSeqStrand(0) == eNa_strand_minus ||
1452  align.GetSeqStrand(1) == eNa_strand_minus))
1453  {
1454  range_list.reverse();
1455  }
1456  }
1457  ostr << '[';
1458  if (is_protein) {
1459  ITERATE (vector<TProteinExon>, it, protein_exons) {
1460  if (it != protein_exons.begin()) {
1461  ostr << ',';
1462  }
1463 
1464  ostr << '(' << it->first->GetAmin()+1
1465  << '/' << it->first->GetFrame()
1466  << ".." << it->second->GetAmin()+1
1467  << '/' << it->second->GetFrame() << ')';
1468  }
1469  } else {
1470  ITERATE (list<TSeqRange>, it, range_list) {
1471  if (it != range_list.begin()) {
1472  ostr << ',';
1473  }
1474 
1475  switch (m_Info) {
1476  case e_Range:
1477  ostr << '('
1478  << it->GetFrom()
1479  << ".."
1480  << it->GetTo()
1481  << ')';
1482  break;
1483 
1484  case e_Length:
1485  ostr << it->GetLength();
1486  break;
1487  }
1488  }
1489  }
1490  ostr << ']';
1491  }
1492 }
1493 
1494 /////////////////////////////////////////////////////////////////////////////
1495 
1497  : m_Row(row)
1498 {
1499 }
1500 
1501 
1503 {
1504  ostr << "Taxid of the ";
1505  if (m_Row == 0) {
1506  ostr << "query";
1507  } else if (m_Row == 1) {
1508  ostr << "subject";
1509  } else {
1511  "only pairwise alignments are supported");
1512  }
1513  ostr << " sequence";
1514 }
1515 
1517 {
1518  if (m_Row == 0) {
1519  ostr << "qtaxid";
1520  } else if (m_Row == 1) {
1521  ostr << "staxid";
1522  } else {
1524  "only pairwise alignments are supported");
1525  }
1526 }
1527 
1529  const CSeq_align& align)
1530 {
1531  if (m_Row >= align.CheckNumRows()) {
1533  "indexing past the end of available "
1534  "sequences in an alignment");
1535  }
1536 
1537  ostr << (int)m_Scores->GetScore(align, m_Row == 0 ? "query_taxid"
1538  : "subject_taxid");
1539 }
1540 
1541 
1542 /////////////////////////////////////////////////////////////////////////////
1543 
1545  : m_Row(row)
1546  , m_Prefix(prefix)
1547 {
1548 }
1549 
1550 
1552 {
1553  ostr << m_Prefix << " of the ";
1554  if (m_Row == 0) {
1555  ostr << "query";
1556  } else if (m_Row == 1) {
1557  ostr << "subject";
1558  } else {
1560  "only pairwise alignments are supported");
1561  }
1562  ostr << " sequence";
1563 }
1564 
1566 {
1567  if (m_Row == 0) {
1568  ostr << "query ";
1569  } else if (m_Row == 1) {
1570  ostr << "subject ";
1571  } else {
1573  "only pairwise alignments are supported");
1574  }
1575  ostr << m_Prefix;
1576 }
1577 
1579  const CSeq_align& align)
1580 {
1581  if (m_Row >= align.CheckNumRows()) {
1583  "indexing past the end of available "
1584  "sequences in an alignment");
1585  }
1586 
1587  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(
1588  align.GetSeq_id(m_Row));
1589  if (!bsh) {
1590  ostr << "NA";
1591  return;
1592  }
1593  for (CSeqdesc_CI desc_iter(bsh, CSeqdesc::e_Comment);
1594  desc_iter; ++desc_iter)
1595  {
1596  if (NStr::StartsWith(desc_iter->GetComment(),
1597  m_Prefix + ": ", NStr::eNocase))
1598  {
1599  ostr << desc_iter->GetComment().substr(m_Prefix.size() + 2);
1600  return;
1601  }
1602  }
1603  ostr << "NA";
1604 }
1605 
1606 
1607 /////////////////////////////////////////////////////////////////////////////
1608 
1610  : m_Row(row)
1611  , m_Field(field)
1612 {
1613 }
1614 
1615 
1617 {
1618 }
1619 
1620 
1622 {
1623  switch (m_Field) {
1624  case eFullTaxName:
1625  ostr << "Full taxname of the ";
1626  break;
1627 
1628  case eSpecies:
1629  ostr << "Species name of the ";
1630  break;
1631 
1632  case eGenus:
1633  ostr << "Genus name of the ";
1634  break;
1635 
1636  case eKingdom:
1637  ostr << "Kingdom name of the ";
1638  break;
1639  }
1640 
1641  switch (m_Row) {
1642  case 0: ostr << "query"; break;
1643  case 1: ostr << "subject"; break;
1644  default:
1646  "only pairwise alignments are supported");
1647  }
1648  ostr << " sequence";
1649 }
1650 
1652 {
1653  switch (m_Row) {
1654  case 0: ostr << "q"; break;
1655  case 1: ostr << "s"; break;
1656  default:
1658  "only pairwise alignments are supported");
1659  }
1660 
1661  switch (m_Field) {
1662  case eFullTaxName: ostr << "taxname"; break;
1663  case eSpecies: ostr << "species"; break;
1664  case eGenus: ostr << "genus"; break;
1665  case eKingdom: ostr << "kingdom"; break;
1666  }
1667 }
1668 
1670  const CSeq_align& align)
1671 {
1672  if (m_Row >= align.CheckNumRows()) {
1674  "indexing past the end of available "
1675  "sequences in an alignment");
1676  }
1677 
1678  if (!m_Taxon1.get()) {
1679  m_Taxon1.reset(new CTaxon1);
1680  m_Taxon1->Init(100000);
1681  }
1682 
1683  TTaxId taxid = TAX_ID_FROM(int,
1684  (int)m_Scores->GetScore(align,
1685  m_Row == 0 ? "query_taxid"
1686  : "subject_taxid"));
1687 
1688  switch (m_Field) {
1689  case eSpecies:
1690  taxid = m_Taxon1->GetSpecies(taxid);
1691  break;
1692 
1693  case eGenus:
1694  taxid = m_Taxon1->GetGenus(taxid);
1695  break;
1696 
1697  case eKingdom:
1698  taxid = m_Taxon1->GetSuperkingdom(taxid);
1699  break;
1700 
1701  default:
1702  break;
1703  }
1704 
1705  bool is_species = false;
1706  bool is_uncultured = false;
1707  string blast_name;
1708  CConstRef<COrg_ref> org =
1709  m_Taxon1->GetOrgRef(taxid, is_species, is_uncultured, blast_name);
1710  if (org) {
1711  string label;
1712  org->GetLabel(&label);
1713  ostr << label;
1714  }
1715  else {
1716  ostr << "-";
1717  }
1718 }
1719 
1720 
1721 /////////////////////////////////////////////////////////////////////////////
1723 : m_Row(row)
1724 {
1725 }
1726 
1728 {
1729  ostr << "size of biggest gap";
1730 }
1732 {
1733  if(m_Row == e_All) {
1734  ostr << "biggestgap";
1735  } else if(m_Row == 0) {
1736  ostr << "qbiggestgap";
1737  } else if(m_Row == 1) {
1738  ostr << "sbiggestgap";
1739  } else {
1741  "only pairwise alignments are supported");
1742  }
1743 }
1745  const CSeq_align& align)
1746 {
1747  ostr << x_CalcBiggestGap(align);
1748 }
1749 
1751 {
1752  if(align.GetSegs().IsDisc()) {
1753  TSeqPos Biggest = 0;
1754  ITERATE(CSeq_align_set::Tdata, AlignIter, align.GetSegs().GetDisc().Get()) {
1755  Biggest = max(Biggest, x_CalcBiggestGap(**AlignIter));
1756  }
1757  return Biggest;
1758  } else if(align.GetSegs().IsDenseg()) {
1759  const CDense_seg& Denseg = align.GetSegs().GetDenseg();
1760  TSeqPos Biggest = 0;
1761  for(int Index = 0; Index < Denseg.GetNumseg(); Index++) {
1762  bool QGap = (Denseg.GetStarts()[2*Index] == -1);
1763  bool SGap = (Denseg.GetStarts()[(2*Index)+1] == -1);
1764  if(m_Row == e_All && (QGap || SGap)) {
1765  Biggest = max(Biggest, (TSeqPos)Denseg.GetLens()[Index]);
1766  } else if(m_Row == 0 && QGap) {
1767  Biggest = max(Biggest, (TSeqPos)Denseg.GetLens()[Index]);
1768  } else if(m_Row == 1 && SGap) {
1769  Biggest = max(Biggest, (TSeqPos)Denseg.GetLens()[Index]);
1770  }
1771  }
1772  return Biggest;
1773  } else {
1775  "biggestgap is only supported for Dense-sef and Disc alignments");
1776  }
1777 }
1778 
1779 /////////////////////////////////////////////////////////////////////////////
1781 : m_Row(row)
1782 {
1783 }
1784 
1786 {
1787  ostr << "If ";
1788  if (m_Row == 0) {
1789  ostr << "query";
1790  } else if (m_Row == 1) {
1791  ostr << "subject";
1792  } else {
1794  "only pairwise alignments are supported");
1795  }
1796  ostr << " has a chromosome, its name";
1797 }
1798 
1800 {
1801  if (m_Row == 0) {
1802  ostr << "qchrom";
1803  } else if (m_Row == 1) {
1804  ostr << "schrom";
1805  } else {
1807  "only pairwise alignments are supported");
1808  }
1809 }
1810 
1812  const CSeq_align& align)
1813 {
1814  CBioseq_Handle Handle = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(m_Row));
1815 
1816  string Chrom = "";
1817 
1819  while(Iter) {
1820  const CBioSource& BioSource = Iter->GetSource();
1821  if(BioSource.CanGetSubtype()) {
1822  ITERATE(CBioSource::TSubtype, SubIter, BioSource.GetSubtype()) {
1823  if( (*SubIter)->CanGetSubtype() &&
1824  (*SubIter)->GetSubtype() == CSubSource::eSubtype_chromosome &&
1825  (*SubIter)->CanGetName() ) {
1826  Chrom = (*SubIter)->GetName();
1827  }
1828  }
1829  }
1830  ++Iter;
1831  }
1832 
1833  ostr << Chrom;
1834 }
1835 
1836 /////////////////////////////////////////////////////////////////////////////
1838 : m_Row(row)
1839 {
1840 }
1841 
1843 {
1844  ostr << "If ";
1845  if (m_Row == 0) {
1846  ostr << "query";
1847  } else if (m_Row == 1) {
1848  ostr << "subject";
1849  } else {
1851  "only pairwise alignments are supported");
1852  }
1853  ostr << " has a clone, its name";
1854 }
1855 
1857 {
1858  if (m_Row == 0) {
1859  ostr << "qclone";
1860  } else if (m_Row == 1) {
1861  ostr << "sclone";
1862  } else {
1864  "only pairwise alignments are supported");
1865  }
1866 }
1867 
1869  const CSeq_align& align)
1870 {
1871  string Clone = "";
1872 
1873  try {
1874  CBioseq_Handle Handle = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(m_Row));
1876  while(Iter) {
1877  const CBioSource& BioSource = Iter->GetSource();
1878  if(BioSource.CanGetSubtype()) {
1879  ITERATE(CBioSource::TSubtype, SubIter, BioSource.GetSubtype()) {
1880  if( (*SubIter)->CanGetSubtype() &&
1881  (*SubIter)->GetSubtype() == CSubSource::eSubtype_clone &&
1882  (*SubIter)->CanGetName() ) {
1883  Clone = (*SubIter)->GetName();
1884  }
1885  }
1886  }
1887  ++Iter;
1888  }
1889  } catch(...) {
1890  Clone = "";
1891  }
1892 
1893  ostr << Clone;
1894 }
1895 
1896 
1897 /////////////////////////////////////////////////////////////////////////////
1899 : m_Row(row)
1900 {
1901 }
1902 
1904 {
1905  if (m_Row == 0) {
1906  ostr << "Query";
1907  } else if (m_Row == 1) {
1908  ostr << "Subject";
1909  } else {
1911  "only pairwise alignments are supported");
1912  }
1913  ostr << " sequence tech type";
1914 }
1915 
1917 {
1918  if (m_Row == 0) {
1919  ostr << "qtech";
1920  } else if (m_Row == 1) {
1921  ostr << "stech";
1922  } else {
1924  "only pairwise alignments are supported");
1925  }
1926 }
1927 
1929  const CSeq_align& align)
1930 {
1931  CBioseq_Handle Handle = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(m_Row));
1932 
1933  string TechStr = "(none)";
1934 
1936  while(Iter) {
1937  const CMolInfo& MolInfo = Iter->GetMolinfo();
1938  if(MolInfo.CanGetTech() && MolInfo.IsSetTech()) {
1939  const CEnumeratedTypeValues* tech_types = CMolInfo::GetTypeInfo_enum_ETech();
1940  TechStr = tech_types->FindName(MolInfo.GetTech(), false);
1941  }
1942  ++Iter;
1943  }
1944 
1945  ostr << TechStr;
1946 }
1947 
1948 //////////////////////////////////////////////////////////////////////////////
1950 : m_Row(row)
1951 {
1952 }
1953 
1955 {
1956  ostr << "Strand of alignment in ";
1957  if (m_Row == 0) {
1958  ostr << "query";
1959  } else if (m_Row == 1) {
1960  ostr << "subject";
1961  } else {
1963  "only pairwise alignments are supported");
1964  }
1965  ostr << ", 'b' if both in a Disc-seg alignment";
1966 }
1967 
1969 {
1970  if (m_Row == 0) {
1971  ostr << "qdiscstrand";
1972  } else if (m_Row == 1) {
1973  ostr << "sdiscstrand";
1974  } else {
1976  "only pairwise alignments are supported");
1977  }
1978 }
1979 
1981  const CSeq_align& align)
1982 {
1983  bool Plus=false, Minus=false;
1984  x_RecurseStrands(align, Plus, Minus);
1985  if(Plus && !Minus)
1986  ostr << '+';
1987  else if(Minus && !Plus)
1988  ostr << '-';
1989  else if(Plus && Minus)
1990  ostr << 'b';
1991 }
1992 
1994  bool& Plus, bool& Minus)
1995 {
1996  if(align.GetSegs().IsDisc()) {
1997  ITERATE(CSeq_align_set::Tdata, iter, align.GetSegs().GetDisc().Get()) {
1998  x_RecurseStrands(**iter, Plus, Minus);
1999  }
2000  return;
2001  }
2002 
2003  if(align.GetSeqStrand(m_Row) == eNa_strand_plus)
2004  Plus = true;
2005  else if(align.GetSeqStrand(m_Row) == eNa_strand_minus)
2006  Minus = true;
2007 }
2008 
2009 
2010 //////////////////////////////////////////////////////////////////////////////
2011 
2013  const string& text)
2014 : m_ColName(col_name)
2015 , m_Text(text)
2016 {
2017 }
2018 
2020 {
2021  ostr << "'" << m_Text << "' as fixed text";
2022 }
2023 
2025 {
2026  ostr << m_ColName;
2027 }
2028 
2030  const CSeq_align& align)
2031 {
2032  ostr << m_Text;
2033 }
2034 
2035 
2036 //////////////////////////////////////////////////////////////////////////////
2037 
2039 {
2040  ostr << "length_ungap / size of aligned query sequence range";
2041 }
2042 
2044 {
2045  ostr << "align_len_ratio";
2046 }
2047 
2049  const CSeq_align& align)
2050 {
2051  /// historical score:
2052  /// ungapped alignment length / length of range of query sequence
2053  TSeqPos align_length = align.GetAlignLength(false /*ungapped*/);
2054  TSeqPos align_range = align.GetSeqRange(0).GetLength();
2055  ostr << double(align_length) / double(align_range);
2056 }
2057 
2058 
2059 /////////////////////////////////////////////////////////////////////////////
2060 
2062 {
2063 }
2064 
2065 
2067 {
2068  ostr << "Alignment CIGAR string";
2069 }
2070 
2071 
2073 {
2074  ostr << "cigar";
2075 }
2076 
2077 
2079  const CSeq_align& align)
2080 {
2081  if(!align.CanGetSegs() || !align.GetSegs().IsDenseg()) {
2083  "cigar format only supports denseg alignments.");
2084  }
2085 
2086 
2087  int NumSeg = align.GetSegs().GetDenseg().GetNumseg();
2088  const CDense_seg::TStarts & Starts = align.GetSegs().GetDenseg().GetStarts();
2089  const CDense_seg::TLens & Lens = align.GetSegs().GetDenseg().GetLens();
2090 
2091  for(int Loop = 0; Loop < NumSeg; Loop++) {
2092  int Length = Lens[Loop];
2093  char Code = 0;
2094 
2095  if( Starts[ (Loop*2) ] == -1)
2096  Code = 'D';
2097  else if( Starts[ (Loop*2)+1 ] == -1)
2098  Code = 'I';
2099  else
2100  Code = 'M';
2101 
2102  ostr << Length << Code;
2103  }
2104 
2105 }
2106 
2107 
2108 //////////////////////////////////////////////////////////////////////////////
2109 
2112 : m_Row(row), m_Type(type), m_Info(info)
2113 {
2114 }
2115 
2117 {
2118  m_Gencoll = gencoll;
2119 }
2120 
2121 
2123 {
2124  switch (m_Info) {
2125  case eName:
2126  ostr << "Name of ";
2127  break;
2128  case eAccession:
2129  ostr << "Accession of ";
2130  break;
2131  case eChainId:
2132  ostr << "Chain id of ";
2133  break;
2134  case eChromosome:
2135  ostr << "Chromosome containing ";
2136  break;
2137  }
2138  if (m_Info != eChromosome) {
2139  ostr << (m_Type == eFull ? "full assembly" : "assembly unit") << " of ";
2140  }
2141  if (m_Row == 0) {
2142  ostr << "query";
2143  } else if (m_Row == 1) {
2144  ostr << "subject";
2145  } else {
2147  "only pairwise alignments are supported");
2148  }
2149  ostr << " sequence";
2150 }
2151 
2153 {
2154  if (m_Row == 0) {
2155  ostr << "q";
2156  } else if (m_Row == 1) {
2157  ostr << "s";
2158  } else {
2160  "only pairwise alignments are supported");
2161  }
2162  if (m_Info != eChromosome) {
2163  ostr << (m_Type == eFull ? "fullasm" : "asmunit");
2164  }
2165  switch (m_Info) {
2166  case eName:
2167  break;
2168  case eAccession:
2169  ostr << "acc";
2170  break;
2171  case eChainId:
2172  ostr << "chain";
2173  break;
2174  case eChromosome:
2175  ostr << "chromosome";
2176  break;
2177  }
2178 }
2179 
2181  const CSeq_align& align)
2182 {
2183  if (m_Row == 1 && align.IsSetExt())
2184  {
2185  /// For the subject sequence, the information may be stored in teh
2186  /// alignment as a User-obejct
2187  ITERATE (CSeq_align::TExt, i, align.GetExt()) {
2188  const CUser_object& obj = **i;
2189  if (!obj.GetType().IsStr() ||
2190  obj.GetType().GetStr() != "Assembly Info") {
2191  continue;
2192  }
2193 
2194  switch (m_Info) {
2195  case eName:
2196  ostr << obj.GetField(m_Type == eFull
2197  ? "Assembly Name" : "Assembly Unit Name")
2198  .GetData().GetStr();
2199  return;
2200 
2201  case eAccession:
2202  if (obj.HasField("Assembly Accession")) {
2203  ostr << obj.GetField(m_Type == eFull
2204  ? "Assembly Accession" : "Assembly Unit Accession")
2205  .GetData().GetStr();
2206  } else {
2207  ostr << "NA";
2208  }
2209  return;
2210 
2211  case eChainId:
2212  if (m_Type == eUnit) {
2213  if (obj.HasField("GenColl Chain")) {
2214  ostr << obj.GetField("GenColl Chain").GetData().GetInt();
2215  } else {
2216  ostr << "NA";
2217  }
2218  return;
2219  }
2220  break;
2221 
2222  case eChromosome:
2223  if (obj.HasField("Chromosome")) {
2224  ostr << obj.GetField("Chromosome").GetData().GetStr();
2225  } else {
2226  ostr << "NA";
2227  }
2228  return;
2229  }
2230  }
2231  }
2232 
2233  if(!m_Gencoll) {
2234  return;
2235  }
2236 
2239 
2240  if(!Seq) {
2241  return;
2242  }
2243 
2245  if (m_Info != eChromosome) {
2246  if (m_Type == eFull) {
2247  Assm = Seq->GetFullAssembly();
2248  } else {
2249  CConstRef<CGC_AssemblyUnit> Unit = Seq->GetAssemblyUnit();
2250  if (Unit) {
2251  CGC_Assembly *unit_assm = new CGC_Assembly();
2252  unit_assm->SetUnit(const_cast<CGC_AssemblyUnit &>(*Unit));
2253  Assm.Reset(unit_assm);
2254  }
2255  }
2256  if(!Assm) {
2257  return;
2258  }
2259  }
2260 
2261  switch (m_Info) {
2262  case eName:
2263  ostr << Assm->GetName();
2264  break;
2265 
2266  case eAccession:
2267  ostr << Assm->GetAccession();
2268  break;
2269 
2270  case eChainId:
2271  {{
2272  string accession = Assm->GetAccession();
2273  size_t chain_start = accession.find_first_of("123456789");
2274  size_t chain_end = accession.find('.');
2275  ostr << accession.substr(chain_start, chain_end-chain_start);
2276  }}
2277  break;
2278 
2279  case eChromosome:
2280  ostr << Seq->GetChrName();
2281  break;
2282  }
2283 }
2284 
2285 //////////////////////////////////////////////////////////////////////////////
2286 
2288 : m_Row(row), m_Gencoll(gencoll)
2289 {
2290 }
2291 
2293 {
2294  ostr << "Patch type, if any, of ";
2295  if (m_Row == 0) {
2296  ostr << "query";
2297  } else if (m_Row == 1) {
2298  ostr << "sequence";
2299  } else {
2301  "only pairwise alignments are supported");
2302  }
2303  ostr << " sequence";
2304 }
2305 
2307 {
2308  if (m_Row == 0) {
2309  ostr << "qpatchtype";
2310  } else if (m_Row == 1) {
2311  ostr << "spatchtype";
2312  } else {
2314  "only pairwise alignments are supported");
2315  }
2316 }
2317 
2319  const CSeq_align& align)
2320 {
2321  if(!m_Gencoll)
2322  return;
2323 
2325  Seq = m_Gencoll->Find(CSeq_id_Handle::GetHandle(align.GetSeq_id(m_Row)));
2326  if(!Seq)
2327  return;
2328 
2329  if(Seq->CanGetPatch_type()) {
2330  if(Seq->GetPatch_type() == CGC_Sequence::ePatch_type_fix)
2331  ostr << "FIX";
2332  else if(Seq->GetPatch_type() == CGC_Sequence::ePatch_type_novel)
2333  ostr << "NOVEL";
2334  }
2335 }
2336 
2337 //////////////////////////////////////////////////////////////////////////////
2338 
2340 : m_Row(row), m_Gencoll(gencoll)
2341 {
2342 }
2343 
2345 {
2346  ostr << "Nearest Gap, if any, or edge, of ";
2347  if (m_Row == 0) {
2348  ostr << "query";
2349  } else if (m_Row == 1) {
2350  ostr << "subject";
2351  } else {
2353  "only pairwise alignments are supported");
2354  }
2355  ostr << " sequence";
2356 }
2357 
2359 {
2360  if (m_Row == 0) {
2361  ostr << "qnearestgap";
2362  } else if (m_Row == 1) {
2363  ostr << "snearestgap";
2364  } else {
2366  "only pairwise alignments are supported");
2367  }
2368 }
2369 
2371  const CSeq_id& Id,
2372  const TSeqPos Offset,
2373  list<TSeqRange>& Gaps)
2374 {
2376 
2377  if(!Seq)
2378  return 0;
2379 
2380  if(!Seq->CanGetStructure())
2381  return 0;
2382 
2383  TSeqPos CurrStart = Offset;
2384  ITERATE(CDelta_ext::Tdata, DeltaIter, Seq->GetStructure().Get()) {
2385  if( (*DeltaIter)->IsLiteral()) {
2386  if (!(*DeltaIter)->GetLiteral().CanGetSeq_data() ||
2387  (*DeltaIter)->GetLiteral().GetSeq_data().IsGap()) {
2388  TSeqRange GapRange;
2389  GapRange.SetFrom(CurrStart);
2390  GapRange.SetLength((*DeltaIter)->GetLiteral().GetLength());
2391  Gaps.push_back(GapRange);
2392  }
2393  CurrStart += (*DeltaIter)->GetLiteral().GetLength();
2394  } else if( (*DeltaIter)->IsLoc()) {
2395  s_FindGaps(Assembly, *(*DeltaIter)->GetLoc().GetId(), CurrStart, Gaps);
2396  CurrStart += (*DeltaIter)->GetLoc().GetTotalRange().GetLength();
2397  }
2398  }
2399  return CurrStart;
2400 }
2401 
2403  const CSeq_align& align)
2404 {
2405  if(!m_Gencoll) {
2406  ostr << "*";
2407  return;
2408  }
2409 
2410 
2411  list<TSeqRange> Gaps;
2412  TSeqPos SeqLength = s_FindGaps(*m_Gencoll, align.GetSeq_id(m_Row), 0, Gaps);
2413 
2414  if(SeqLength == 0) {
2415  ostr << "*";
2416  return;
2417  }
2418 
2419  TSeqRange CompRange = align.GetSeqRange(m_Row);
2420  TSeqPos MinGapDist = numeric_limits<TSeqPos>::max();
2421  MinGapDist = min(MinGapDist, (TSeqPos)abs((TSignedSeqPos)(CompRange.GetFrom()-0)));
2422  MinGapDist = min(MinGapDist, (TSeqPos)abs((TSignedSeqPos)(CompRange.GetTo()-SeqLength)));
2423 
2424 
2425  ITERATE(list<TSeqRange>, GapIter, Gaps) {
2426  MinGapDist = min(MinGapDist, (TSeqPos)abs((TSignedSeqPos)(CompRange.GetFrom()-GapIter->GetFrom())));
2427  MinGapDist = min(MinGapDist, (TSeqPos)abs((TSignedSeqPos)(CompRange.GetTo()-GapIter->GetTo())));
2428  }
2429 
2430  ostr << MinGapDist;
2431 }
2432 
2433 
2434 /////////////////////////////////////////////////////////////////////////////
2435 
2437 {
2438 }
2439 
2440 
2442 {
2443  ostr << "Blast Traceback string";
2444 }
2445 
2447 {
2448  ostr << "btop";
2449 }
2450 
2451 
2453  const CSeq_align& align)
2454 {
2455  if(!align.CanGetSegs() || !align.GetSegs().IsDenseg()) {
2457  "btop format only supports denseg alignments.");
2458  }
2459 
2460  ostr << m_Scores->GetTraceback(m_Scores->GetScope(), align, 0);
2461 }
2462 
2463 
2464 /////////////////////////////////////////////////////////////////////////////
2465 
2467  int coordinate_row)
2468 : m_IndelType(indel_type)
2469 , m_CoordinateRow(coordinate_row)
2470 {
2471 }
2472 
2473 
2475 {
2476  switch (m_IndelType) {
2477  case e_Frameshifts:
2478  ostr << "List of frameshift indels";
2479  break;
2480 
2481  case e_NonFrameshifts:
2482  ostr << "List of non-frameshifting indels";
2483  break;
2484 
2485  default:
2486  ostr << "List of all indels wihin CDS";
2487  break;
2488  }
2489  if (m_CoordinateRow == 0) {
2490  ostr << ", coordinates on query sequence";
2491  }
2492 }
2493 
2495 {
2496  switch (m_IndelType) {
2497  case e_Frameshifts:
2498  ostr << "frameshifts";
2499  break;
2500 
2501  case e_NonFrameshifts:
2502  ostr << "non-frameshift indels";
2503  break;
2504 
2505  default:
2506  ostr << "indels in cds";
2507  break;
2508  }
2509  if (m_CoordinateRow == 0) {
2510  ostr << " on query";
2511  }
2512 }
2513 
2514 
2516  const CSeq_align& align)
2517 {
2518  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(0));
2519  if ( !bsh ) {
2521  "failed to retrieve sequence for " +
2522  align.GetSeq_id(0).AsFastaString());
2523  }
2524  if (bsh.GetBioseqMolType() != CSeq_inst::eMol_rna) {
2525  NCBI_THROW(CException, eUnknown, "Not RNA alignments");
2526  }
2527 
2528  /// Only display frameshifts within cdregion
2529  CFeat_CI feat_it(bsh,
2530  SAnnotSelector()
2531  .IncludeFeatType(CSeqFeatData::e_Cdregion));
2532  if (!feat_it) {
2533  return;
2534  }
2535 
2536  vector<CSeq_align::SIndel> indels;
2537  switch (m_IndelType) {
2538  case e_Frameshifts:
2539  indels = align.GetFrameshiftsWithinRange(feat_it->GetRange());
2540  break;
2541 
2542  case e_NonFrameshifts:
2543  indels = align.GetNonFrameshiftsWithinRange(feat_it->GetRange());
2544  break;
2545 
2546  default:
2547  indels = align.GetIndelsWithinRange(feat_it->GetRange());
2548  break;
2549  }
2550 
2551  bool first = true;
2552  for (const CSeq_align::SIndel &indel : indels) {
2553  if (!first) {
2554  ostr << ',';
2555  }
2556  ostr << indel.AsString(m_CoordinateRow);
2557  first = false;
2558  }
2559 }
2560 
2561 
2562 /////////////////////////////////////////////////////////////////////////////
2563 
2565 : m_Row(row)
2566 {
2567 }
2568 
2569 
2571 {
2572  ostr << "Gene symbol for " << (m_Row == 0 ? "query" : "subject");
2573 }
2574 
2576 {
2577  ostr << (m_Row == 0 ? "query" : "subject") << "_gene_symbol";
2578 }
2579 
2580 
2582  const CSeq_align& align)
2583 {
2584  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(m_Row));
2585  if ( !bsh ) {
2586  ostr << "NA";
2587  return;
2588  }
2589  CFeat_CI gene_it(bsh, CSeqFeatData::e_Gene);
2590  if (!gene_it || !gene_it->GetData().GetGene().IsSetLocus()) {
2591  ostr << "NA";
2592  return;
2593  }
2594 
2595  ostr << gene_it->GetData().GetGene().GetLocus();
2596 }
2597 
2598 
2599 /////////////////////////////////////////////////////////////////////////////
2600 
2601 static string s_CodonVariation(const CSeq_align &align, TSeqPos pos,
2602  CScope &scope, int row)
2603 {
2604  CRef<CSeq_id> query_id(new CSeq_id);
2605  query_id->Assign(align.GetSeq_id(0));
2606  CRef<CSeq_loc> query_loc(new CSeq_loc(*query_id, pos, pos+2));
2607  CSeqVector query_vec(*query_loc, scope, CBioseq_Handle::eCoding_Iupac);
2608  string query;
2609  query_vec.GetSeqData(0, 3, query);
2610 
2611  CSeq_loc_Mapper mapper(align, 1);
2612  CRef<CSeq_loc> subject_loc = mapper.Map(*query_loc);
2613  CSeqVector subject_vec(*subject_loc, scope, CBioseq_Handle::eCoding_Iupac);
2614  string subject;
2615  subject_vec.GetSeqData(0, subject_vec.size(), subject);
2616 
2617  string variation;
2618  if (query != subject) {
2619  ENa_strand strand = eNa_strand_plus;
2620  if (row == 1) {
2621  pos = subject_loc->GetStart(eExtreme_Biological);
2622  strand = subject_loc->GetStrand();
2623  }
2624  unsigned snp_count = 0, snp_pos = 0;
2625  if (query.size() == subject.size() + 1) {
2626  /// query is one longer; check if this is a one-base deletion
2627  for (unsigned deletion_pos = 0; deletion_pos < subject.size();
2628  ++deletion_pos)
2629  {
2630  string subject_with_del = subject;
2631  subject_with_del.insert(deletion_pos, 1, query[deletion_pos]);
2632  if (query == subject_with_del) {
2633  subject.insert(deletion_pos, 1, '-');
2634  break;
2635  }
2636  }
2637  }
2638  if (query.size() == subject.size()) {
2639  for (unsigned index = 0; index < query.size(); ++index) {
2640  if (query[index] != subject[index]) {
2641  ++snp_count;
2642  snp_pos = index;
2643  }
2644  }
2645  }
2646  if (snp_count == 1) {
2647  pos += (strand == eNa_strand_minus ? -1 : 1) * snp_pos;
2648  variation = NStr::NumericToString(pos) + query[snp_pos] + '>'
2649  + subject[snp_pos];
2650  } else {
2651  variation = NStr::NumericToString(pos) + query + '>' + subject;
2652  }
2653  }
2654  return variation;
2655 }
2656 
2658 : m_CoordinateRow(row)
2659 {
2660 }
2661 
2662 
2664 {
2665  ostr << "Mismatches or indels within start codon";
2666  if (m_CoordinateRow == 0) {
2667  ostr << ", coordinates on query sequence";
2668  }
2669 }
2670 
2672 {
2673  ostr << "Start codon changes";
2674  if (m_CoordinateRow == 0) {
2675  ostr << " on query";
2676  }
2677 }
2678 
2679 
2681  const CSeq_align& align)
2682 {
2683  CBioseq_Handle bsh = m_Scores->GetScope().GetBioseqHandle(align.GetSeq_id(0));
2684  if ( !bsh ) {
2686  "failed to retrieve sequence for " +
2687  align.GetSeq_id(0).AsFastaString());
2688  }
2689  if (bsh.GetBioseqMolType() != CSeq_inst::eMol_rna) {
2690  NCBI_THROW(CException, eUnknown, "Not RNA alignments");
2691  }
2692 
2693  CFeat_CI feat_it(bsh,
2694  SAnnotSelector()
2695  .IncludeFeatType(CSeqFeatData::e_Cdregion));
2696  if (feat_it) {
2697  string variation = s_CodonVariation(align, feat_it->GetRange().GetFrom(),
2698  m_Scores->GetScope(), m_CoordinateRow);
2699  if (!variation.empty()) {
2700  ostr << variation;
2701  }
2702  }
2703 }
2704 
2705 
2706 /////////////////////////////////////////////////////////////////////////////
2707 
2709 : m_CoordinateRow(row)
2710 {
2711 }
2712 
2713 
2715 {
2716  ostr << "Mismatches or indels within stop codon";
2717  if (m_CoordinateRow == 0) {
2718  ostr << ", coordinates on query sequence";
2719  }
2720 }
2721 
2723 {
2724  ostr << "Stop codon changes";
2725  if (m_CoordinateRow == 0) {
2726  ostr << " on query";
2727  }
2728 }
2729 
2730 
2732  const CSeq_align& align)
2733 {
2734  CFeatureGenerator generator(m_Scores->GetScope());
2737  generator.SetAllowedUnaligned(10);
2738 
2739  CConstRef<CSeq_align> clean_align = generator.CleanAlignment(align);
2740  CSeq_annot annot;
2741  CBioseq_set bset;
2742  generator.ConvertAlignToAnnot(*clean_align, annot, bset);
2743  if (bset.GetSeq_set().empty() ||
2744  !bset.GetSeq_set().front()->IsSetAnnot())
2745  {
2746  return;
2747  }
2748 
2749  CScope transcribed_mrna_scope(*CObjectManager::GetInstance());
2750  transcribed_mrna_scope.AddTopLevelSeqEntry(*bset.GetSeq_set().front());
2751  CRef<CSeq_feat> cds = bset.GetSeq_set().front()
2752  -> GetSeq().GetAnnot().front()
2753  -> GetData().GetFtable().front();
2754  cds->SetData().SetCdregion().ResetCode_break();
2755  string trans;
2756  CSeqTranslator::Translate(*cds, transcribed_mrna_scope, trans);
2757  bool missing_stop = false;
2759  if (NStr::EndsWith(trans, "*")) {
2760  trans.resize(trans.size() - 1);
2761  } else {
2762  missing_stop = true;
2763  }
2764  }
2765 
2766  for (size_t changed_codons_count = 0, internal_stop_pos = trans.find('*');
2767  internal_stop_pos != string::npos || missing_stop;
2768  internal_stop_pos = trans.find('*', internal_stop_pos+1))
2769  {
2770  if (internal_stop_pos == string::npos) {
2771  /// Processed all internal stops if any; process missing final stop
2772  internal_stop_pos = trans.size() - 1;
2773  missing_stop = false;
2774  }
2775  TSeqPos isp = Convert(internal_stop_pos);
2776  string variation = s_CodonVariation(align,
2778  + isp*3,
2779  m_Scores->GetScope(), m_CoordinateRow);
2780  if (!variation.empty()) {
2781  if (changed_codons_count++) {
2782  ostr << ',';
2783  }
2784  ostr << variation;
2785  }
2786  }
2787 }
2788 
2789 
2790 /////////////////////////////////////////////////////////////////////////////
2791 
2793  const string &unavailable_string)
2794 : m_Scores(&scores), m_Ostr(ostr), m_UnavailableString(unavailable_string)
2795 {
2796  s_RegisterStandardFields(*this);
2797 }
2798 
2800 {
2801  IFormatter *qseqid =
2803  formatter.RegisterField("qseqid", qseqid);
2804  formatter.RegisterField("qacc", qseqid);
2805  formatter.RegisterField("qaccver", qseqid);
2806  formatter.RegisterField("qtag", qseqid);
2807 
2808  IFormatter *qallseqid =
2810  formatter.RegisterField("qallseqid", qallseqid);
2811  formatter.RegisterField("qallacc", qallseqid);
2812 
2813  formatter.RegisterField("qgi",
2815  formatter.RegisterField("qbaregi",
2817  formatter.RegisterField("qexactseqid",
2819 
2820  formatter.RegisterField("qlen", new CTabularFormatter_SeqLength(0));
2821  formatter.RegisterField("qstrand", new CTabularFormatter_AlignStrand(0));
2822  formatter.RegisterField("qstart", new CTabularFormatter_AlignStart(0));
2823  formatter.RegisterField("qend", new CTabularFormatter_AlignEnd(0));
2824  formatter.RegisterField("qestart", new CTabularFormatter_AlignStart(0, true));
2825  formatter.RegisterField("qeend", new CTabularFormatter_AlignEnd(0, true));
2826 
2827 
2828  IFormatter *sseqid =
2830  formatter.RegisterField("sseqid", sseqid);
2831  formatter.RegisterField("sacc", sseqid);
2832  formatter.RegisterField("saccver", sseqid);
2833  formatter.RegisterField("stag", sseqid);
2834 
2835  IFormatter *prot_seqid =
2836  new CTabularFormatter_SeqId(0, sequence::eGetId_Best, true, true);
2837  formatter.RegisterField("prot_seqid", prot_seqid);
2838  formatter.RegisterField("prot_acc", prot_seqid);
2839  formatter.RegisterField("prot_accver", prot_seqid);
2840 
2841  IFormatter *sallseqid =
2843  formatter.RegisterField("sallseqid", sallseqid);
2844  formatter.RegisterField("sallacc", sallseqid);
2845 
2846  formatter.RegisterField("sgi",
2848  formatter.RegisterField("sbaregi",
2850  formatter.RegisterField("sexactseqid",
2852 
2853  formatter.RegisterField("slen", new CTabularFormatter_SeqLength(1));
2854  formatter.RegisterField("sstrand", new CTabularFormatter_AlignStrand(1));
2855  formatter.RegisterField("sstart", new CTabularFormatter_AlignStart(1));
2856  formatter.RegisterField("send", new CTabularFormatter_AlignEnd(1));
2857  formatter.RegisterField("sestart", new CTabularFormatter_AlignStart(1, true));
2858  formatter.RegisterField("seend", new CTabularFormatter_AlignEnd(1, true));
2859 
2860  formatter.RegisterField("evalue", new CTabularFormatter_EValue);
2861  formatter.RegisterField("evalue_mantissa", new CTabularFormatter_EValue_Mantissa);
2862  formatter.RegisterField("evalue_exponent", new CTabularFormatter_EValue_Exponent);
2863  formatter.RegisterField("bitscore", new CTabularFormatter_BitScore);
2864  formatter.RegisterField("score", new CTabularFormatter_Score);
2865 
2866  formatter.RegisterField("length", new CTabularFormatter_AlignLength);
2867  formatter.RegisterField("length_ungap", new CTabularFormatter_AlignLengthUngap);
2868  formatter.RegisterField("align_len_ratio", new CTabularFormatter_AlignLengthRatio);
2869 
2870  formatter.RegisterField("pident", new CTabularFormatter_PercentId(true));
2871  formatter.RegisterField("pident_ungapped", new CTabularFormatter_PercentId(false));
2872  formatter.RegisterField("pcov", new CTabularFormatter_PercentCoverage(0, "pcov"));
2873  formatter.RegisterField("qcov", new CTabularFormatter_PercentCoverage(0, "qcov"));
2874  formatter.RegisterField("scov", new CTabularFormatter_PercentCoverage(1, "scov"));
2875 
2876  formatter.RegisterField("gaps", new CTabularFormatter_GapBaseCount);
2877  formatter.RegisterField("gapopen", new CTabularFormatter_GapCount);
2878 
2879  formatter.RegisterField("nident", new CTabularFormatter_IdentityCount);
2880  formatter.RegisterField("mismatch", new CTabularFormatter_MismatchCount);
2881  formatter.RegisterField("qmismatchpos", new CTabularFormatter_MismatchPositions(0));
2882  formatter.RegisterField("smismatchpos", new CTabularFormatter_MismatchPositions(1));
2883 
2884  formatter.RegisterField("qgapranges", new CTabularFormatter_GapRanges(0));
2885  formatter.RegisterField("sgapranges", new CTabularFormatter_GapRanges(1));
2886 
2887 
2888  formatter.RegisterField("qdefline",
2889  new CTabularFormatter_Defline(0));
2890  formatter.RegisterField("sdefline",
2891  new CTabularFormatter_Defline(1));
2892  formatter.RegisterField("qprotref",
2893  new CTabularFormatter_ProtRef(0));
2894  formatter.RegisterField("sprotref",
2895  new CTabularFormatter_ProtRef(1));
2896  formatter.RegisterField("qtaxid",
2897  new CTabularFormatter_TaxId(0));
2898  formatter.RegisterField("staxid",
2899  new CTabularFormatter_TaxId(1));
2900  formatter.RegisterField("quniprot_source",
2901  new CTabularFormatter_Comment(0, "uniprot source"));
2902  formatter.RegisterField("suniprot_source",
2903  new CTabularFormatter_Comment(1, "uniprot source"));
2904 
2905  formatter.RegisterField("qtaxname",
2908  formatter.RegisterField("qspecies",
2911  formatter.RegisterField("qgenus",
2914  formatter.RegisterField("qkingdom",
2917 
2918  formatter.RegisterField("staxname",
2921  formatter.RegisterField("sspecies",
2924  formatter.RegisterField("sgenus",
2927  formatter.RegisterField("skingdom",
2930 
2931  formatter.RegisterField("align_id",
2933  formatter.RegisterField("best_placement_group",
2935 
2936  formatter.RegisterField("exons",
2940  formatter.RegisterField("exon_len",
2944 
2945  formatter.RegisterField("introns",
2949  formatter.RegisterField("intron_len",
2953  formatter.RegisterField("query_exons",
2957  formatter.RegisterField("query_exon_len",
2961 
2962  formatter.RegisterField("query_unaligned",
2966  formatter.RegisterField("query_unaligned_len",
2970 
2971  formatter.RegisterField("biggestgap",
2973  formatter.RegisterField("qbiggestgap",
2975  formatter.RegisterField("sbiggestgap",
2977  formatter.RegisterField("qchrom",
2979  formatter.RegisterField("schrom",
2981  formatter.RegisterField("qclone",
2983  formatter.RegisterField("sclone",
2985  formatter.RegisterField("qtech",
2986  new CTabularFormatter_Tech(0));
2987  formatter.RegisterField("stech",
2988  new CTabularFormatter_Tech(1));
2989  formatter.RegisterField("qdiscstrand",
2991  formatter.RegisterField("sdiscstrand",
2993  formatter.RegisterField("cigar",
2995  formatter.RegisterField("btop",
2997  formatter.RegisterField("frameshifts",
2999  formatter.RegisterField("nonframeshifts",
3001  formatter.RegisterField("cds_indels",
3003  formatter.RegisterField("frameshifts_on_query",
3005  formatter.RegisterField("nonframeshifts_on_query",
3007  formatter.RegisterField("cds_indels_on_query",
3009  formatter.RegisterField("start_codon_changes",
3011  formatter.RegisterField("stop_codon_changes",
3013  formatter.RegisterField("start_codon_changes_on_query",
3015  formatter.RegisterField("stop_codon_changes_on_query",
3017  formatter.RegisterField("gene_symbol",
3019  formatter.RegisterField("qasmunit", new CTabularFormatter_AssemblyInfo(0,
3022  formatter.RegisterField("sasmunit", new CTabularFormatter_AssemblyInfo(1,
3025  formatter.RegisterField("qfullasm", new CTabularFormatter_AssemblyInfo(0,
3028  formatter.RegisterField("sfullasm", new CTabularFormatter_AssemblyInfo(1,
3031  formatter.RegisterField("qasmunitacc", new CTabularFormatter_AssemblyInfo(0,
3034  formatter.RegisterField("sasmunitacc", new CTabularFormatter_AssemblyInfo(1,
3037  formatter.RegisterField("qfullasmacc", new CTabularFormatter_AssemblyInfo(0,
3040  formatter.RegisterField("sfullasmacc", new CTabularFormatter_AssemblyInfo(1,
3043  formatter.RegisterField("qasmunitchain", new CTabularFormatter_AssemblyInfo(0,
3046  formatter.RegisterField("sasmunitchain", new CTabularFormatter_AssemblyInfo(1,
3049  formatter.RegisterField("qfullasmchain", new CTabularFormatter_AssemblyInfo(0,
3052  formatter.RegisterField("sfullasmchain", new CTabularFormatter_AssemblyInfo(1,
3055  formatter.RegisterField("qchromosome", new CTabularFormatter_AssemblyInfo(0,
3058  formatter.RegisterField("schromosome", new CTabularFormatter_AssemblyInfo(1,
3061 
3062  formatter.RegisterField("query_entropy",
3063  new CTabularFormatter_Entropy(0));
3064  formatter.RegisterField("subject_entropy",
3065  new CTabularFormatter_Entropy(1));
3066  formatter.RegisterField("query_seg_pct",
3068  formatter.RegisterField("subject_seg_pct",
3070 }
3071 
3073 {
3075  formatter_it->second->SetGencoll(gencoll);
3076  }
3077  RegisterField("qpatchtype", new CTabularFormatter_PatchType(0, gencoll));
3078  RegisterField("spatchtype", new CTabularFormatter_PatchType(1, gencoll));
3079  RegisterField("qnearestgap", new CTabularFormatter_NearestGap(0, gencoll));
3080  RegisterField("snearestgap", new CTabularFormatter_NearestGap(1, gencoll));
3081 }
3082 
3083 /// Split a string, but ignore separators within parentheses
3084 static void s_Split(const string &format,
3085  const string &separators,
3086  vector<string> &toks)
3087 {
3088  unsigned int paren_level = 0;
3089  string next_tok;
3090  ITERATE (string, char_it, format) {
3091  if (!paren_level && separators.find(*char_it) != string::npos) {
3092  if (!next_tok.empty()) {
3093  toks.push_back(next_tok);
3094  }
3095  next_tok.clear();
3096  continue;
3097  }
3098  if (*char_it == '(') {
3099  ++paren_level;
3100  } else if (*char_it == ')') {
3101  if (!paren_level) {
3103  "Unbalanced parentheses: " + format);
3104  }
3105  --paren_level;
3106  }
3107  next_tok += *char_it;
3108  }
3109  if (!next_tok.empty()) {
3110  toks.push_back(next_tok);
3111  }
3112  if (paren_level) {
3114  "Unbalanced parentheses: " + format);
3115  }
3116 }
3117 
3119 {
3120  CRegexp re1("score\\(([^,]*),([^)]*)\\)");
3121  CRegexp re2("score\\(([^)]*)\\)");
3122 
3123  CRegexp text_re1("text\\(([^,]*),([^)]*)\\)");
3124  CRegexp text_re2("text\\(([^)]*)\\)");
3125 
3126  vector<string> toks;
3127  s_Split(format, " \t\n\r,", toks);
3128 
3129  ITERATE (vector<string>, it, toks) {
3130  string s = *it;
3131  NStr::ToLower(s);
3132  if (m_FormatterMap.count(s)) {
3133  m_Formatters.push_back(m_FormatterMap[s]);
3134  } else if (re1.IsMatch(s)) {
3135  string score_name = re1.GetSub(*it, 1);
3136  string col_name = re1.GetSub(*it, 2);
3137  m_Formatters.push_back(CIRef<IFormatter>(new CTabularFormatter_AnyScore(score_name, col_name)));
3138 
3139  } else if (re2.IsMatch(s)) {
3140  string score_name = re2.GetSub(*it, 1);
3141  m_Formatters.push_back(CIRef<IFormatter>(new CTabularFormatter_AnyScore(score_name, score_name)));
3142 
3143  } else if (text_re1.IsMatch(s)) {
3144  string score_name = text_re1.GetSub(*it, 1);
3145  string col_name = text_re1.GetSub(*it, 2);
3146  m_Formatters.push_back(CIRef<IFormatter>(new CTabularFormatter_FixedText(score_name, col_name)));
3147 
3148  } else if (text_re2.IsMatch(s)) {
3149  string score_name = text_re2.GetSub(*it, 1);
3150  m_Formatters.push_back(CIRef<IFormatter>(new CTabularFormatter_FixedText(score_name, score_name)));
3151 
3152  } else {
3153  ERR_POST(Error << "unhandled field: " << s);
3154  }
3155  }
3156 
3158  (*it)->SetScoreLookup(m_Scores);
3159  }
3160 }
3161 
3162 
3164 {
3165  m_Ostr << '#';
3167  (*it)->PrintHeader(m_Ostr);
3168 
3169  list< CIRef<IFormatter> >::const_iterator i = it;
3170  ++i;
3171  if (i != m_Formatters.end()) {
3172  m_Ostr << '\t';
3173  }
3174  }
3175 
3176  m_Ostr << '\n';
3177 }
3178 
3179 
3181 {
3183  try {
3184  (*it)->Print(m_Ostr, align);
3185  } catch (...) {
3186  if (m_UnavailableString.empty()) {
3187  throw;
3188  }
3189  /// User provided a string to mark unavailable fields instead of
3190  /// failing
3192  }
3193 
3194  list< CIRef<IFormatter> >::const_iterator i = it;
3195  ++i;
3196  if (i != m_Formatters.end()) {
3197  m_Ostr << '\t';
3198  }
3199  }
3200  m_Scores->UpdateState(align);
3201 
3202  m_Ostr << '\n';
3203 }
3204 
3205 END_SCOPE(ncbi)
3206 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
size_t GetSize(void) const
CBioseq_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
CRef< objects::CSeq_feat > ConvertAlignToAnnot(const objects::CSeq_align &align, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, Int8 gene_id=0, const objects::CSeq_feat *cdregion_on_mrna=NULL)
Convert an alignment to an annotation.
void SetFlags(TFeatureGeneratorFlags)
Definition: gene_model.cpp:195
void SetAllowedUnaligned(TSeqPos)
Definition: gene_model.cpp:215
CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align)
Clean an alignment according to our best guess of its biological representation.
Definition: gene_model.cpp:221
string GetAccession() const
Retrieve the accession for this assembly.
Definition: GC_Assembly.cpp:99
string GetName() const
Retrieve the name of this assembly.
void Find(const CSeq_id_Handle &id, TSequenceList &sequences) const
Find all references to a given sequence within an assembly.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
ostream & AsString(ostream &s) const
Definition: Object_id.cpp:202
void GetLabel(string *label) const
Definition: Prot_ref.cpp:62
const_iterator end() const
Definition: range_coll.hpp:86
const_iterator begin() const
Definition: range_coll.hpp:82
bool Empty() const
Definition: range_coll.hpp:138
CRef –.
Definition: ncbiobj.hpp:618
CRegexp –.
Definition: regexp.hpp:70
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
TSeqPos GetTotalGapCount(TDim row=-1) const
Retrieves the total number of gaps in the given row an alignment; all gaps by default.
Definition: Seq_align.cpp:1550
CRangeCollection< TSeqPos > GetAlignedBases(TDim row) const
Retrieves the locations of aligned bases in the given row, excluding gaps and incontinuities.
Definition: Seq_align.cpp:1796
vector< SIndel > GetNonFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1765
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
TDim CheckNumRows(void) const
Validatiors.
Definition: Seq_align.cpp:73
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
bool GetNamedScore(const string &id, int &score) const
Get score.
Definition: Seq_align.cpp:563
vector< SIndel > GetFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1747
vector< SIndel > GetIndelsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1783
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
Definition: Seq_align.cpp:1993
TSeqPos GetNumGapOpenings(TDim row=-1) const
Retrieves the number of gap openings in a given row in an alignment (ignoring how many gaps are in th...
Definition: Seq_align.cpp:1557
CSeq_loc_Mapper –.
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
objects::CScoreLookup * m_Scores
Definition: tabular_fmt.hpp:73
CTabularFormatter_AlignEnd(int row, bool nominus=false)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
formatter for dumping alignment identifiers
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
CTabularFormatter_AlignStart(int row, bool nominus=false)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
Definition: tabular_fmt.cpp:91
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
Definition: tabular_fmt.cpp:98
void PrintHeader(CNcbiOstream &ostr) const
Definition: tabular_fmt.cpp:79
formatter for dumping any score in an alignment
CTabularFormatter_AnyScore(const string &score_name, const string &col_name)
formatter for dumping any score in an alignment
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
CTabularFormatter_AssemblyInfo(int row, EAssemblyType type, EInfo info)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
CConstRef< objects::CGC_Assembly > m_Gencoll
virtual void SetGencoll(CConstRef< objects::CGC_Assembly > gencoll)
formatter for dumping alignment identifiers
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
TSeqPos x_CalcBiggestGap(const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
formatter for dumping cigar of alignments
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
formatter for dumping content of sequence comment descriptors
CTabularFormatter_Comment(int row, const string &prefix)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
formatter for dumping sequence deflines
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
objects::sequence::CDeflineGenerator generator
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void x_RecurseStrands(const objects::CSeq_align &align, bool &Plus, bool &Minus)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
formatter for Shannon's entropy
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
formatter for dumping exons
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
CTabularFormatter_FixedText(const string &col_name, const string &text)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
CTabularFormatter_Indels(EIndelType indel_type, int coordinate_row)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
CConstRef< objects::CGC_Assembly > m_Gencoll
void PrintHeader(CNcbiOstream &ostr) const
CTabularFormatter_NearestGap(int row, CConstRef< objects::CGC_Assembly > gencoll)
formatter for dumping organism names
std::unique_ptr< objects::CTaxon1 > m_Taxon1
CTabularFormatter_OrgName(int row, EField field=eFullTaxName)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
CConstRef< objects::CGC_Assembly > m_Gencoll
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
CTabularFormatter_PatchType(int row, CConstRef< objects::CGC_Assembly > gencoll)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
CTabularFormatter_PercentId(bool gapped=false)
void PrintHeader(CNcbiOstream &ostr) const
formatter for dumping sequence Prot-refs (protein only)
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
formatter for BLAST seg %
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
CTabularFormatter_SeqId(int row, objects::sequence::EGetIdType id_type, bool tag_only=false, bool protein=false)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
objects::sequence::EGetIdType m_GetIdType
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHeader(CNcbiOstream &ostr) const
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
formatter for dumping tax-ids
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
void Print(CNcbiOstream &ostr, const objects::CSeq_align &align)
void PrintHelpText(CNcbiOstream &ostr) const
void PrintHeader(CNcbiOstream &ostr) const
Simple tabular alignment formatter This is a replacement for the BLAST tabular formatter that support...
Definition: tabular_fmt.hpp:57
void SetGencoll(CConstRef< objects::CGC_Assembly > gencoll)
void RegisterField(const string &field_name, IFormatter *field_formatter)
Definition: tabular_fmt.hpp:84
objects::CScoreLookup * m_Scores
void Format(const objects::CSeq_align &align)
static void s_RegisterStandardFields(CTabularFormatter &formatter)
void SetFormat(const string &format)
string m_UnavailableString
CTabularFormatter(CNcbiOstream &ostr, objects::CScoreLookup &scores, const string &unavailable_string="")
CNcbiOstream & m_Ostr
list< CIRef< IFormatter > > m_Formatters
Definition: tabular_fmt.hpp:99
TFormatterMap m_FormatterMap
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Definition: User_object.cpp:84
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:71
#define Code
string Offset()
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define TAX_ID_FROM(T, value)
Definition: ncbimisc.hpp:1111
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define Handle
Definition: ncbistd.hpp:119
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & FindName(TEnumValueType value, bool allowBadValue) const
Find name of the enum by its numeric value.
Definition: enumerated.cpp:146
@ eUnknown
Definition: app_popup.hpp:72
const string AsFastaString(void) const
Definition: Seq_id.cpp:2265
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2039
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:573
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:882
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
int EGetIdType
Definition: sequence.hpp:126
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
@ eGetId_ForceGi
return only a gi-based seq-id
Definition: sequence.hpp:99
@ eGetId_HandleDefault
returns the ID associated with a bioseq-handle
Definition: sequence.hpp:104
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
vector< CSeq_id_Handle > TIds
Definition: scope.hpp:143
const CSeqFeatData & GetData(void) const
TMol GetBioseqMolType(void) const
Get some values from core:
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & SetResolveTSE(void)
SetResolveTSE() is equivalent to SetResolveMethod(eResolve_TSE).
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
const CSeq_loc & GetProduct(void) const
TRange GetRange(void) const
Get range for mapped seq-feat's location.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
position_type GetLength(void) const
Definition: range.hpp:158
TThisType & SetLength(position_type length)
Definition: range.hpp:194
bool IsMatch(CTempString str, TMatch flags=fMatch_default)
Check existence substring which match a specified pattern.
Definition: regexp.cpp:193
CTempString GetSub(CTempString str, size_t idx=0) const
Get pattern/subpattern from previous GetMatch().
Definition: regexp.cpp:156
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5429
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5411
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
bool CanGetSubtype(void) const
Check if it is safe to call GetSubtype method.
Definition: BioSource_.hpp:533
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:493
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:505
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
const TData & GetData(void) const
Get the Data member data.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TInt GetInt(void) const
Get the variant data.
const TType & GetType(void) const
Get the Type member data.
TUnit & SetUnit(void)
Select the variant.
const TProtpos & GetProtpos(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_align_.hpp:976
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
bool IsSetId(void) const
alignment id Check if a value has been assigned to Id data member.
Definition: Seq_align_.hpp:964
bool IsSetExt(void) const
extra info Check if a value has been assigned to Ext data member.
Definition: Seq_align_.hpp:989
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
list< CRef< CObject_id > > TId
Definition: Seq_align_.hpp:401
list< CRef< CUser_object > > TExt
Definition: Seq_align_.hpp:402
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
Definition: Seq_align_.hpp:915
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
bool IsDisc(void) const
Check if variant Disc is selected.
Definition: Seq_align_.hpp:772
const TExt & GetExt(void) const
Get the Ext member data.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
Definition: Seq_align_.hpp:778
TNumseg GetNumseg(void) const
Get the Numseg member data.
Definition: Dense_seg_.hpp:465
list< CRef< CSeq_align > > Tdata
const TDisc & GetDisc(void) const
Get the variant data.
Definition: Seq_align_.cpp:197
TNucpos GetNucpos(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TGene & GetGene(void) const
Get the variant data.
const TProt & GetProt(void) const
Get the variant data.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
bool IsGeneral(void) const
Check if variant General is selected.
Definition: Seq_id_.hpp:877
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
TTech GetTech(void) const
Get the Tech member data.
Definition: MolInfo_.hpp:497
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
Definition: MolInfo_.hpp:472
bool CanGetTech(void) const
Check if it is safe to call GetTech method.
Definition: MolInfo_.hpp:478
list< CRef< CDelta_seq > > Tdata
Definition: Delta_ext_.hpp:89
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
@ e_Comment
a more extensive comment
Definition: Seqdesc_.hpp:117
@ e_Molinfo
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
const int infinity
Definition: nucprot.cpp:52
int i
static void text(MDB_val *v)
Definition: mdb_dump.c:62
static MDB_envinfo info
Definition: mdb_load.c:37
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
#define abs(a)
Definition: ncbi_heapmgr.c:130
T max(T x_, T y_)
T min(T x_, T y_)
static Format format
Definition: njn_ioutil.cpp:53
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static const char * prefix[]
Definition: pcregrep.c:405
SAnnotSelector –.
static string subject
static string query
Definition: type.c:6
USING_SCOPE(objects)
static void s_Split(const string &format, const string &separators, vector< string > &toks)
Split a string, but ignore separators within parentheses.
TSeqPos s_FindGaps(const CGC_Assembly &Assembly, const CSeq_id &Id, const TSeqPos Offset, list< TSeqRange > &Gaps)
void s_AlignToSeqRanges(const CSeq_align &align, int row, list< TSeqRange > &ranges)
static string s_CodonVariation(const CSeq_align &align, TSeqPos pos, CScope &scope, int row)
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
const value_slice::CValueConvert< value_slice::SRunTimeCP, FROM > Convert(const FROM &value)
Modified on Tue Dec 05 02:01:19 2023 by modify_doxy.py rev. 669887