NCBI C++ ToolKit
score_lookup.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: score_lookup.cpp 102010 2024-03-18 17:26:51Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Eyal Mozes
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
37 #include <algo/sequence/util.hpp>
39 
59 
61 #include <objmgr/scope.hpp>
62 #include <objmgr/seq_vector.hpp>
63 #include <objmgr/util/sequence.hpp>
64 
68 
71 
72 
73 /////////////////////////////////////////////////////////////////////////////
74 
76 {
77 public:
78  CScore_AlignLength(bool include_gaps)
79  : m_Gaps(include_gaps)
80  {
81  }
82 
83  virtual EComplexity GetComplexity() const { return eEasy; };
84 
85  virtual bool IsInteger() const { return true; };
86 
87  virtual double Get(const CSeq_align& align, CScope*) const
88  {
89  return align.GetAlignLength(m_Gaps);
90  }
91 
92  virtual void PrintHelp(CNcbiOstream& ostr) const
93  {
94  if (m_Gaps) {
95  ostr << "Length of the aligned segments, including the length of all gap segments";
96  }
97  else {
98  ostr << "Length of the aligned segments, excluding all gap segments; thus, this is the length of all actually aligned (i.e., match or mismatch) bases";
99  }
100  }
101 
102 private:
103  bool m_Gaps;
104 };
105 
106 /////////////////////////////////////////////////////////////////////////////
107 
109 {
110 public:
111  CScore_GapCount(bool count_bases, int row = -1,
112  bool exon_specific = false)
113  : m_CountBases(count_bases), m_Row(row), m_ExonSpecific(exon_specific)
114  {
115  }
116 
117  virtual EComplexity GetComplexity() const { return eEasy; };
118 
119  virtual bool IsInteger() const { return true; };
120 
121  virtual double Get(const CSeq_align& align, CScope*) const
122  {
123  if (m_ExonSpecific && !align.GetSegs().IsSpliced()) {
124  NCBI_THROW(CSeqalignException, eUnsupported,
125  "'product_gap_length' and 'genomic_gap_length' scores "
126  "valid only for Spliced-seg alignments");
127  }
128  return m_CountBases ? align.GetTotalGapCount(m_Row)
129  : align.GetNumGapOpenings(m_Row);
130  }
131 
132  virtual void PrintHelp(CNcbiOstream& ostr) const
133  {
134  if (m_CountBases) {
135  ostr << "Total number of gap bases missing";
136  }
137  else {
138  ostr << "Number of gap openings";
139  }
140  if (m_ExonSpecific) {
141  if (m_Row == 0) {
142  ostr << " in product exons";
143  } else if(m_Row == 1) {
144  ostr << " in genomic exons";
145  }
146  } else {
147  if (m_Row == 0) {
148  ostr << " in query";
149  } else if(m_Row == 1) {
150  ostr << " in subject";
151  }
152  }
153  }
154 
155 private:
157  int m_Row;
159 };
160 
161 /////////////////////////////////////////////////////////////////////////////
162 
164 {
165 public:
166  CScore_FrameShifts(int row = -1, bool frameshifts = true)
167  : m_Row(row)
168  , m_Frameshifts(frameshifts)
169  {
170  }
171 
172  virtual EComplexity GetComplexity() const { return eEasy; };
173 
174  virtual bool IsInteger() const { return true; };
175 
176  virtual double Get(const CSeq_align& align, CScope *scope) const
177  {
178  int opposite_row = m_Row >= 0 ? 1 - m_Row : m_Row;
179  if (align.GetSegs().IsSpliced() &&
180  align.GetSegs().GetSpliced().GetProduct_type() ==
182  {
183  /// Protein alignment; just count frameshifts
184  return m_Frameshifts ? align.GetNumFrameshifts(m_Row)
185  : align.GetNumGapOpenings(opposite_row)
186  - align.GetNumFrameshifts(m_Row);
187  }
188 
189  CBioseq_Handle bsh = scope->GetBioseqHandle(align.GetSeq_id(0));
190  if ( !bsh ) {
192  "failed to retrieve sequence for " +
193  align.GetSeq_id(0).AsFastaString());
194  }
195  if (bsh.GetBioseqMolType() != CSeq_inst::eMol_rna) {
197  "Can't count frameshifts on a genomic alignment");
198  }
199 
200  /// Only count frameshifts within cdregion
201  CFeat_CI feat_it(bsh,
203  .IncludeFeatType(CSeqFeatData::e_Cdregion));
204  return !feat_it ? 0 : (m_Frameshifts
205  ? align.GetNumFrameshiftsWithinRange(feat_it->GetRange(), m_Row)
206  : align.GetNumGapOpeningsWithinRange(feat_it->GetRange(), opposite_row)
207  - align.GetNumFrameshiftsWithinRange(feat_it->GetRange(), m_Row));
208  }
209 
210  virtual void PrintHelp(CNcbiOstream& ostr) const
211  {
212  ostr << "Number of ";
213  if (!m_Frameshifts) {
214  ostr << "non-";
215  }
216  ostr << "frameshifting insertions";
217  if (m_Row == 0) {
218  ostr << " in the query";
219  } else if(m_Row == 1) {
220  ostr << " in the subject";
221  } else {
222  ostr << " or deletions";
223  }
224  }
225 
226 private:
227  int m_Row;
229 };
230 
231 /////////////////////////////////////////////////////////////////////////////
232 
234 {
235 public:
236  virtual void PrintHelp(CNcbiOstream& ostr) const
237  {
238  ostr << "Length of the longest gap observed in either query or subject";
239  }
240 
241  virtual EComplexity GetComplexity() const { return eEasy; };
242 
243  virtual bool IsInteger() const { return true; };
244 
245  virtual double Get(const CSeq_align& align, CScope*) const
246  {
247  return align.GapLengthRange().second;
248  }
249 };
250 
251 /////////////////////////////////////////////////////////////////////////////
252 
254 {
255 public:
256 
257  virtual void PrintHelp(CNcbiOstream& ostr) const
258  {
259  ostr << "Length of unaligned sequence 3' of alignment end";
260  }
261 
262  virtual EComplexity GetComplexity() const { return eEasy; };
263 
264  virtual bool IsInteger() const { return true; };
265 
266  virtual double Get(const CSeq_align& align, CScope* scope) const
267  {
268  double score_value = 0;
269  if (align.GetSegs().IsSpliced()) {
270  score_value = align.GetSegs().GetSpliced().GetProduct_length();
271  if (align.GetSegs().GetSpliced().IsSetPoly_a()) {
272  score_value = align.GetSegs().GetSpliced().GetPoly_a();
273  }
274  } else {
275  if (scope) {
276  CBioseq_Handle bsh = scope->GetBioseqHandle(align.GetSeq_id(0));
277  if (bsh) {
278  score_value = bsh.GetBioseqLength();
279  }
280  }
281  }
282  if (score_value) {
283  score_value -= align.GetSeqStop(0) + 1;
284  }
285  return score_value;
286  }
287 };
288 
289 /////////////////////////////////////////////////////////////////////////////
290 
292 {
293 public:
294 
295  virtual void PrintHelp(CNcbiOstream& ostr) const
296  {
297  ostr << "Length of polya tail";
298  }
299 
300  virtual EComplexity GetComplexity() const { return eEasy; };
301 
302  virtual bool IsInteger() const { return true; };
303 
304  virtual double Get(const CSeq_align& align, CScope* scope) const
305  {
306  if (!align.GetSegs().IsSpliced() ||
307  !align.GetSegs().GetSpliced().IsSetPoly_a())
308  {
309  return 0;
310  }
311  if (align.GetSegs().GetSpliced().IsSetProduct_strand() &&
313  {
314  /// Alignment on minus strand, so poly-a score represents the actual
315  /// length of the poly-t tail
316  return align.GetSegs().GetSpliced().GetPoly_a();
317  }
318  double product_length = 0;
319  if (align.GetSegs().GetSpliced().IsSetProduct_length()) {
320  product_length = align.GetSegs().GetSpliced().GetProduct_length();
321  } else if (scope) {
322  CBioseq_Handle bsh = scope->GetBioseqHandle(align.GetSeq_id(0));
323  if (bsh) {
324  product_length = bsh.GetBioseqLength();
325  }
326  }
327  if (product_length == 0) {
328  return 0;
329  }
330  return product_length - align.GetSegs().GetSpliced().GetPoly_a();
331  }
332 };
333 
334 /////////////////////////////////////////////////////////////////////////////
335 
337 {
338 public:
339 
340  virtual void PrintHelp(CNcbiOstream& ostr) const
341  {
342  ostr << "Length of unaligned sequence contained within the aligned "
343  "range. Note that this does not count gaps; rather, it computes "
344  "the length of all missing, unaligned sequence bounded by the "
345  "aligned range";
346  }
347 
348  virtual EComplexity GetComplexity() const { return eEasy; };
349 
350  virtual bool IsInteger() const { return true; };
351 
352  virtual double Get(const CSeq_align& align, CScope* ) const
353  {
354  double score_value = 0;
355  switch (align.GetSegs().Which()) {
357  {{
358  const CSpliced_seg& seg = align.GetSegs().GetSpliced();
359  if (seg.IsSetProduct_strand() &&
361  CSpliced_seg::TExons::const_reverse_iterator it =
362  seg.GetExons().rbegin();
363  CSpliced_seg::TExons::const_reverse_iterator prev =
364  seg.GetExons().rbegin();
365  CSpliced_seg::TExons::const_reverse_iterator end =
366  seg.GetExons().rend();
367  if (seg.GetProduct_type() ==
369  for (++it; it != end; ++it, ++prev) {
370  score_value += (*it)->GetProduct_start().GetNucpos() -
371  (*prev)->GetProduct_end().GetNucpos() - 1;
372  }
373  } else {
374  for (++it; it != end; ++it, ++prev) {
375  TSeqPos curr_nuc = (*it)->GetProduct_start().AsSeqPos();
376  TSeqPos last_nuc = (*prev)->GetProduct_end().AsSeqPos();
377  score_value += curr_nuc - last_nuc - 1;
378  }
379  }
380  }
381  else {
382  CSpliced_seg::TExons::const_iterator it =
383  seg.GetExons().begin();
384  CSpliced_seg::TExons::const_iterator prev =
385  seg.GetExons().begin();
386  CSpliced_seg::TExons::const_iterator end =
387  seg.GetExons().end();
388  if (seg.GetProduct_type() ==
390  for (++it; it != end; ++it, ++prev) {
391  score_value += (*it)->GetProduct_start().GetNucpos() -
392  (*prev)->GetProduct_end().GetNucpos() - 1;
393  }
394  } else {
395  for (++it; it != end; ++it, ++prev) {
396  TSeqPos curr_nuc = (*it)->GetProduct_start().AsSeqPos();
397  TSeqPos last_nuc = (*prev)->GetProduct_end().AsSeqPos();
398  score_value += curr_nuc - last_nuc - 1;
399  }
400  }
401  }
402  }}
403  break;
404 
405  default:
406  NCBI_THROW(CSeqalignException, eNotImplemented,
407  "internal_unaligned not implemented for this "
408  "type of alignment");
409  }
410  return score_value;
411  }
412 };
413 
414 /////////////////////////////////////////////////////////////////////////////
415 
417 {
418 public:
419  CScore_AlignStartStop(int row, bool start)
420  : m_Row(row)
421  , m_Start(start)
422  {
423  }
424 
425  virtual EComplexity GetComplexity() const { return eEasy; };
426 
427  virtual bool IsInteger() const { return true; };
428 
429  virtual void PrintHelp(CNcbiOstream& ostr) const
430  {
431  if (m_Start) {
432  if (m_Row == 0) {
433  ostr << "Start of query sequence (0-based coordinates)";
434  }
435  else if (m_Row == 1) {
436  ostr << "Start of subject sequence (0-based coordinates)";
437  }
438  }
439  else {
440  if (m_Row == 0) {
441  ostr << "End of query sequence (0-based coordinates)";
442  }
443  else if (m_Row == 1) {
444  ostr << "End of subject sequence (0-based coordinates)";
445  }
446  }
447  }
448 
449  virtual double Get(const CSeq_align& align, CScope*) const
450  {
451  if (m_Start) {
452  return align.GetSeqStart(m_Row);
453  } else {
454  return align.GetSeqStop(m_Row);
455  }
456  }
457 
458 private:
459  int m_Row;
460  bool m_Start;
461 };
462 
463 /////////////////////////////////////////////////////////////////////////////
464 
466 {
467 public:
468 
469  virtual void PrintHelp(CNcbiOstream& ostr) const
470  {
471  ostr << "Ratio of subject aligned range length to query aligned "
472  "range length";
473  }
474 
475  virtual EComplexity GetComplexity() const { return eEasy; };
476 
477  virtual double Get(const CSeq_align& align, CScope*) const
478  {
479  return align.AlignLengthRatio();
480  }
481 };
482 
483 //////////////////////////////////////////////////////////////////////////////
484 
486 {
487 public:
489  : m_Row(row)
490  {
491  }
492 
493  virtual void PrintHelp(CNcbiOstream& ostr) const
494  {
495  if (m_Row == 0) {
496  ostr << "Length of query sequence";
497  }
498  else if (m_Row == 1) {
499  ostr << "Length of subject sequence";
500  }
501  }
502 
503  virtual EComplexity GetComplexity() const { return eHard; };
504 
505  virtual bool IsInteger() const { return true; };
506 
507  virtual double Get(const CSeq_align& align, CScope* scope) const
508  {
509  if (m_Row == 0 && align.GetSegs().IsSpliced()) {
510  return align.GetSegs().GetSpliced().GetProduct_length();
511  } else {
512  if (scope) {
513  CBioseq_Handle bsh =
514  scope->GetBioseqHandle(align.GetSeq_id(m_Row));
515  if (bsh) {
516  return bsh.GetBioseqLength();
517  } else {
518  NCBI_THROW(CSeqalignException, eInvalidSeqId,
519  "Can't get length for sequence " +
520  align.GetSeq_id(m_Row).AsFastaString());
521  }
522  }
523  }
524  return 0;
525  }
526 
527 private:
528  int m_Row;
529 };
530 
531 //////////////////////////////////////////////////////////////////////////////
532 /// Get sequence's length in nucleic acids
534 {
535  TSeqPos len = bsh.GetBioseqLength();
536  if (bsh.CanGetInst_Mol() && bsh.GetInst_Mol() == CSeq_inst::eMol_aa) {
537  /// This is an amino-acid sequence, so multiply length by 3
538  len *= 3;
539  }
540  return len;
541 }
542 
543 
545 {
546 public:
547  enum EType {e_Min, e_Avg};
549  : m_Type(type)
550  {
551  }
552 
553  virtual void PrintHelp(CNcbiOstream& ostr) const
554  {
555  ostr <<
556  "Symmetric overlap, as a percent (0-100). This is similar to "
557  "coverage, except that it takes into account both query and "
558  "subject sequence lengths. Alignment length is divided by "
559  << (m_Type == e_Min ? "minimum" : "average")
560  << " of the two sequence lengths";
561  }
562 
563  virtual EComplexity GetComplexity() const { return eHard; };
564 
565  virtual double Get(const CSeq_align& align, CScope* scope) const
566  {
567  TSeqPos length = align.GetAlignLength(false);
568  double pct_overlap = length * 100;
569 
570  CBioseq_Handle q = scope->GetBioseqHandle(align.GetSeq_id(0));
571  if ( !q ) {
573  "failed to retrieve sequence for " +
574  align.GetSeq_id(0).AsFastaString());
575  }
576  CBioseq_Handle s = scope->GetBioseqHandle(align.GetSeq_id(1));
577  if ( !s ) {
579  "failed to retrieve sequence for " +
580  align.GetSeq_id(1).AsFastaString());
581  }
582  if (q.IsAa() && s.IsAa()) {
583  pct_overlap *= 3;
584  }
585 
586  switch (m_Type) {
587  case e_Min:
588  pct_overlap /= min(s_GetNaLength(q), s_GetNaLength(s));
589  break;
590 
591  case e_Avg:
592  pct_overlap /= (s_GetNaLength(q) + s_GetNaLength(s))/2;
593  break;
594  }
595  return pct_overlap;
596  }
597 
598 private:
600 };
601 
602 //////////////////////////////////////////////////////////////////////////////
603 
605 {
606 public:
607  virtual void PrintHelp(CNcbiOstream& ostr) const
608  {
609  ostr <<
610  "Length of the shortest exon. Note that this score has "
611  "meaning only for Spliced-seg alignments, as would be generated "
612  "by Splign or ProSplign.";
613  }
614 
615  virtual EComplexity GetComplexity() const { return eEasy; };
616 
617  virtual bool IsInteger() const { return true; };
618 
619  virtual double Get(const CSeq_align& align, CScope*) const
620  {
621  return align.ExonLengthRange().first;
622  }
623 };
624 
625 //////////////////////////////////////////////////////////////////////////////
626 
628 {
629 public:
630  virtual void PrintHelp(CNcbiOstream& ostr) const
631  {
632  ostr <<
633  "Length of the longest intron. Note that this score has "
634  "meaning only for Spliced-seg alignments, as would be generated "
635  "by Splign or ProSplign.";
636  }
637 
638  virtual EComplexity GetComplexity() const { return eEasy; };
639 
640  virtual bool IsInteger() const { return true; };
641 
642  virtual double Get(const CSeq_align& align, CScope*) const
643  {
644  return align.IntronLengthRange().second;
645  }
646 };
647 
648 //////////////////////////////////////////////////////////////////////////////
649 
651 {
652 public:
653  virtual void PrintHelp(CNcbiOstream& ostr) const
654  {
655  ostr <<
656  "Count of the number of exons. Note that this score has "
657  "meaning only for Spliced-seg alignments, as would be generated "
658  "by Splign or ProSplign.";
659  }
660 
661  virtual EComplexity GetComplexity() const { return eEasy; };
662 
663  virtual bool IsInteger() const { return true; };
664 
665  virtual double Get(const CSeq_align& align, CScope*) const
666  {
667  if (align.GetSegs().IsSpliced()) {
668  const CSpliced_seg& seg = align.GetSegs().GetSpliced();
669  if (seg.IsSetExons()) {
670  return seg.GetExons().size();
671  }
672  return 0;
673  }
674 
675  NCBI_THROW(CSeqalignException, eUnsupported,
676  "'exon_count' score is valid only for "
677  "Spliced-seg alignments");
678  }
679 };
680 
681 //////////////////////////////////////////////////////////////////////////////
682 
684 {
685 public:
686  virtual void PrintHelp(CNcbiOstream& ostr) const
687  {
688  ostr <<
689  "Minimum distance between an indel and a splice site. Note that "
690  "this score has meaning only for Spliced-seg alignments, as would "
691  "be generated by Splign or ProSplign.";
692  }
693 
694  virtual EComplexity GetComplexity() const { return eEasy; };
695 
696  virtual bool IsInteger() const { return true; };
697 
698  virtual double Get(const CSeq_align& align, CScope*) const
699  {
700  if (align.GetSegs().IsSpliced() &&
701  align.GetSegs().GetSpliced().IsSetExons())
702  {
703  const CSpliced_seg& seg = align.GetSegs().GetSpliced();
704  unsigned result = INT_MAX;
705  ITERATE (CSpliced_seg::TExons, exon_it, seg.GetExons()) {
706  const CSpliced_exon& exon = **exon_it;
707  if (!exon.IsSetParts()) {
708  continue;
709  }
710  unsigned distance_5prime = 0, distance_3prime = 0;
711  bool found_indel = false;
712  ITERATE (CSpliced_exon::TParts, part_it, exon.GetParts()) {
713  const CSpliced_exon_chunk& part = **part_it;
714  switch (part.Which()) {
716  distance_5prime += part.GetMatch();
717  break;
719  distance_5prime += part.GetMismatch();
720  break;
722  distance_5prime += part.GetDiag();
723  break;
724  default:
725  found_indel = true;
726  break;
727  }
728  if (found_indel) {
729  break;
730  }
731  }
732  if (!exon.IsSetAcceptor_before_exon() ||
733  exon.GetAcceptor_before_exon().GetBases() == " " ||
734  !found_indel)
735  {
736  distance_5prime = INT_MAX;
737  }
738  found_indel = false;
739  REVERSE_ITERATE (CSpliced_exon::TParts, part_it, exon.GetParts()) {
740  const CSpliced_exon_chunk& part = **part_it;
741  switch (part.Which()) {
743  distance_3prime += part.GetMatch();
744  break;
746  distance_3prime += part.GetMismatch();
747  break;
749  distance_3prime += part.GetDiag();
750  break;
751  default:
752  found_indel = true;
753  break;
754  }
755  if (found_indel) {
756  break;
757  }
758  }
759  if (!exon.IsSetDonor_after_exon() ||
760  exon.GetDonor_after_exon().GetBases() == " " ||
761  !found_indel)
762  {
763  distance_3prime = INT_MAX;
764  }
765  result = min(result, min(distance_5prime,distance_3prime));
766  }
767  if (result < INT_MAX) {
768  return result;
769  }
770  }
771 
773  "No indels found in exons with splice sites");
774  }
775 };
776 
777 //////////////////////////////////////////////////////////////////////////////
778 
779 static const CGenetic_code *s_GetGeneticCode(const CSeq_id& seq_id,
780  CScope* scope)
781 {
782  CRef<CGenetic_code> genetic_code;
783  try {
784  CBioseq_Handle bsh = scope->GetBioseqHandle(seq_id);
785  int gcode = sequence::GetOrg_ref(bsh).GetGcode();
787  ITERATE (CGenetic_code_table::Tdata, it, tbl.Get()) {
788  if ((*it)->GetId() == gcode) {
789  genetic_code = *it;
790  break;
791  }
792  }
793  }
794  catch (CException&) {
795  // use the default genetic code
796  }
797 
798  return genetic_code.GetPointer();
799 }
800 
802 {
803 public:
804  CScore_StartStopCodon(bool start_codon)
805  : m_StartCodon(start_codon)
806  {
807  }
808 
809  virtual void PrintHelp(CNcbiOstream& ostr) const
810  {
811  ostr << "1 if a " << (m_StartCodon ? "start" : "stop")
812  << " codon was found, 0 otherwise. Note that this score has "
813  "meaning only for Spliced-seg alignments, as would be generated "
814  "by Splign or ProSplign.";
815  }
816 
817  virtual EComplexity GetComplexity() const { return eEasy; };
818 
819  virtual bool IsInteger() const { return true; };
820 
821  virtual double Get(const CSeq_align& align, CScope* scope) const
822  {
823  bool is_protein = false;
824  TSeqPos product_length = 0;
825  if (align.GetSegs().IsSpliced()) {
826  bool score_precalculated=false;
827  const CSpliced_seg& seg = align.GetSegs().GetSpliced();
828  is_protein = seg.GetProduct_type() ==
830  if (seg.CanGetProduct_length()) {
831  product_length = seg.GetProduct_length();
832  }
834  if (m_StartCodon
835  ? (*it)->IsStart_codon_found()
836  : (*it)->IsStop_codon_found() ) {
837  score_precalculated=true;
838  if (m_StartCodon
839  ? (*it)->GetStart_codon_found()
840  : (*it)->GetStop_codon_found())
841  {
842  return 1;
843  }
844  }
845  }
846  if (score_precalculated) {
847  /// Found the modifier, but it was set to false
848  return 0;
849  }
850  }
851 
852  if (!product_length) {
853  CBioseq_Handle product_bsh =
854  scope->GetBioseqHandle(align.GetSeq_id(0));
855  if (!product_bsh) {
856  NCBI_THROW(CSeqalignException, eUnsupported,
857  "Can't get sequence " +
858  align.GetSeq_id(0).AsFastaString());
859  }
860  is_protein = product_bsh.IsAa();
861  product_length = product_bsh.GetBioseqLength();
862  }
863 
864  CRef<CSeq_loc> aligned_genomic;
865 
866  //
867  // generate the cleaned alignment
868  //
869 
870  CFeatureGenerator generator(*scope);
871  generator.SetAllowedUnaligned(10);
872  CConstRef<CSeq_align> clean_align = generator.CleanAlignment(align);
873 
874  // we can't call CFeatureGenerator because CFeatureGenerator depends on
875  // having certain fields set (such as Spliced-seg modifiers indicating
876  // (wait for it...) that the stop codon or start codon was found. This
877  // here function is to be called to verify that the star/stop are
878  // included, hence we have a circular logical relationship...
879  CSeq_id &query_id = const_cast<CSeq_id &>(clean_align->GetSeq_id(0));
880  CSeq_id &subject_id = const_cast<CSeq_id &>(clean_align->GetSeq_id(1));
881  CBioseq_Handle genomic_bsh = scope->GetBioseqHandle(subject_id);
882  if ( !genomic_bsh ) {
884  "failed to retrieve sequence for " +
885  subject_id.AsFastaString());
886  }
887  int genomic_len = genomic_bsh.GetBioseqLength();
888 
889  CSeq_loc_Mapper mapper(*clean_align, 1);
890 
891  CRef<CSeq_loc> cds_loc;
892  if (is_protein) {
893  CSeq_loc loc;
894  loc.SetWhole().Assign(query_id);
895  cds_loc = mapper.Map(loc);
896  }
897  else {
898  CBioseq_Handle bsh = scope->GetBioseqHandle(query_id);
899  if ( !bsh ) {
901  "failed to retrieve sequence for " +
902  query_id.AsFastaString());
903  }
904  CFeat_CI feat_it(bsh,
906  .IncludeFeatType(CSeqFeatData::e_Cdregion));
907 
908  CMappedFeat mf;
909  for ( ; feat_it; ++feat_it) {
910  mf = *feat_it;
911  break;
912  }
913 
914  if ( !mf ) {
915  // no CDS == no start or stop
916  return 0.0;
917  }
918 
919  const CSeq_loc &orig_loc = mf.GetLocation();
920  ENa_strand q_strand = sequence::GetStrand(orig_loc, scope);
921  TSeqRange total_q_range = orig_loc.GetTotalRange();
922  if (!orig_loc.IsPartialStop(eExtreme_Biological)) {
923  /// Remove stop codon
924  if (q_strand == eNa_strand_minus) {
925  total_q_range.SetFrom(total_q_range.GetFrom() + 3);
926  }
927  else {
928  total_q_range.SetTo(total_q_range.GetTo() - 3);
929  }
930  }
931 
932  /**
933  cerr << "orig loc: " << MSerial_AsnText << orig_loc;
934  cerr << "orig strand: " << s_strand << endl;
935  cerr << "orig range: " << total_s_range << endl;
936  **/
937 
938  if (mf.GetData().GetCdregion().IsSetFrame() &&
939  mf.GetData().GetCdregion().GetFrame() > 1)
940  {
941  TSeqPos offs = mf.GetData().GetCdregion().GetFrame() - 1;
942  if (q_strand == eNa_strand_minus) {
943  total_q_range.SetTo(total_q_range.GetTo() + offs);
944  }
945  else {
946  total_q_range.SetFrom(total_q_range.GetFrom() - offs);
947  }
948  }
949  CSeq_loc adjusted_loc(
950  query_id, total_q_range.GetFrom(),
951  total_q_range.GetTo(), q_strand);
952 
953  // map the mRNA locations to the genome
954  cds_loc = mapper.Map(adjusted_loc);
955 
956  /**
957  if (start_codon) {
958  cerr << "start codon: " << MSerial_AsnText << *start_codon;
959  }
960  if (stop_codon) {
961  cerr << "stop codon: " << MSerial_AsnText << *stop_codon;
962  }
963  **/
964  }
965 
966  ENa_strand s_strand = sequence::GetStrand(*cds_loc, scope);
967  int direction = s_strand == eNa_strand_minus ? -1 : 1;
968  int from =
970  : (int)cds_loc->GetStop(eExtreme_Biological) + direction;
971  int to = from + 2 * direction;
972  CRef<CSeq_loc> codon;
973  if (to >= 0 && to < genomic_len) {
974  /// codon is simple interval
975  codon.Reset(new CSeq_loc(subject_id, min(from,to), max(from,to),
976  s_strand));
977  } else if (genomic_bsh.GetInst_Topology() ==
979  {
980  /// this is a circular genomic sequence, and codon crosses origin
981  CRef<CSeq_interval> int1, int2;
982  if (s_strand == eNa_strand_minus) {
983  int1.Reset(new CSeq_interval(subject_id, 0, from,
985  int1->SetFuzz_from().SetLim(CInt_fuzz::eLim_circle);
986  int2.Reset(new CSeq_interval(subject_id, to + genomic_len,
987  genomic_len - 1, eNa_strand_minus));
988  int2->SetFuzz_to().SetLim(CInt_fuzz::eLim_circle);
989  } else {
990  int1.Reset(new CSeq_interval(subject_id, from,
991  genomic_len - 1, eNa_strand_plus));
992  int1->SetFuzz_to().SetLim(CInt_fuzz::eLim_circle);
993  int2.Reset(new CSeq_interval(subject_id, 0, to - genomic_len,
994  eNa_strand_plus));
995  int2->SetFuzz_from().SetLim(CInt_fuzz::eLim_circle);
996  }
997  codon.Reset(new CSeq_loc);
998  codon->SetPacked_int().Set().push_back(int1);
999  codon->SetPacked_int().Set().push_back(int2);
1000  }
1001 
1002  if ( !codon ) {
1003  return 0.0;
1004  }
1005 
1006  //
1007  // evaluate for start-stop codon as needed
1008  //
1009 
1010  int gcode = 11;
1011  const CGenetic_code* gc = s_GetGeneticCode(align.GetSeq_id(1), scope);
1012  if (gc) {
1013  gcode = gc->GetId();
1014  }
1015  const CTrans_table& tbl = CGen_code_table::GetTransTable(gcode);
1016 
1017  CSeqVector v(*codon, *scope, CBioseq_Handle::eCoding_Iupac);
1018 
1019  /**
1020  cerr << MSerial_AsnText << *start_codon;
1021  cerr << "gcode: " << gcode << endl;
1022  cerr << "bases: "
1023  << v[0] << v[1] << v[2] << endl;
1024  **/
1025 
1026  int state = tbl.SetCodonState(v[0], v[1], v[2]);
1027  if (m_StartCodon ? tbl.IsAnyStart(state) : tbl.IsOrfStop(state)) {
1028  return 1.0;
1029  }
1030 
1031  return 0.0;
1032  }
1033 
1034 
1035 private:
1037 };
1038 
1039 //////////////////////////////////////////////////////////////////////////////
1040 
1042 {
1043 public:
1044  virtual void PrintHelp(CNcbiOstream& ostr) const
1045  {
1046  ostr <<
1047  "Count of the number of internal stop codons encountered when "
1048  "translating the aligned coding region. Note that this has meaning "
1049  "only for Spliced-seg transcript alignments with a transcript that "
1050  "has an annotated cdregion, or for Spliced-seg protein alignments.";
1051  }
1052 
1053  virtual EComplexity GetComplexity() const { return eHard; };
1054 
1055  virtual bool IsInteger() const { return true; };
1056 
1057  virtual double Get(const CSeq_align& align, CScope* scope) const
1058  {
1059 
1060  if (align.GetSegs().IsSpliced()) {
1061  CInternalStopFinder stop_finder(*scope);
1062  return stop_finder.FindStops(align).size();
1063  }
1064 
1065  double score = 0;
1066 
1067  //
1068  // complicated
1069  //
1070 
1071  // first, generate a gene model
1072  CFeatureGenerator generator(*scope);
1075  generator.SetAllowedUnaligned(10);
1076 
1077  CConstRef<CSeq_align> clean_align = generator.CleanAlignment(align);
1078  CSeq_annot annot;
1079  CBioseq_set bset;
1080  generator.ConvertAlignToAnnot(*clean_align, annot, bset);
1081  if (bset.GetSeq_set().empty() ||
1082  !bset.GetSeq_set().front()->IsSetAnnot())
1083  {
1084  return score;
1085  }
1086 
1087  CScope transcribed_mrna_scope(*CObjectManager::GetInstance());
1088  transcribed_mrna_scope.AddTopLevelSeqEntry(*bset.GetSeq_set().front());
1089  CRef<CSeq_feat> cds = bset.GetSeq_set().front()
1090  -> GetSeq().GetAnnot().front()
1091  -> GetData().GetFtable().front();
1092 
1093  if (cds) {
1094  cds->SetData().SetCdregion().ResetCode_break();
1095  string trans;
1096  CSeqTranslator::Translate(*cds, transcribed_mrna_scope, trans);
1098  NStr::EndsWith(trans, "*"))
1099  {
1100  trans.resize(trans.size() - 1);
1101  }
1102 
1103  ITERATE (string, i, trans) {
1104  score += (*i == '*');
1105  }
1106 
1107  /**
1108  cerr << "align: "
1109  << CSeq_id_Handle::GetHandle(align.GetSeq_id(0))
1110  << " x "
1111  << CSeq_id_Handle::GetHandle(align.GetSeq_id(1))
1112  << endl;
1113 
1114  if (cds->IsSetProduct()) {
1115  string seq;
1116  CSeqVector v(cds->GetProduct(), *scope, CBioseq_Handle::eCoding_Iupac);
1117  v.GetSeqData(v.begin(), v.end(), seq);
1118  cerr << "product: " << seq << endl;
1119  }
1120  cerr << "xlate: " << trans << endl;
1121  cerr << "count: " << score << endl;
1122  **/
1123  }
1124 
1125  return score;
1126  }
1127 };
1128 
1129 /////////////////////////////////////////////////////////////////////////////
1130 
1132 {
1133 public:
1135 
1137  : m_ScoreType(type)
1138  {}
1139 
1140  virtual EComplexity GetComplexity() const { return eHard; };
1141 
1142  virtual bool IsInteger() const { return m_ScoreType >= eStart; };
1143 
1144  virtual void PrintHelp(CNcbiOstream& ostr) const
1145  {
1146  switch (m_ScoreType) {
1147  case ePercentIdentity:
1148  ostr <<
1149  "Percent-identity score confined to the coding region "
1150  "associated with the align transcipt. Not supported "
1151  "for standard-seg alignments.";
1152  break;
1153  case ePercentCoverage:
1154  ostr <<
1155  "Percent-coverage score confined to the coding region "
1156  "associated with the align transcipt.";
1157  break;
1158  case eStart:
1159  ostr << "Start position of product's coding region.";
1160  break;
1161  case eEnd:
1162  ostr << "End position of product's coding region.";
1163  break;
1164  }
1165  ostr << " Note that this has meaning only if product has a coding "
1166  "region annotation.";
1167  }
1168 
1169  virtual double Get(const CSeq_align& align, CScope* scope) const
1170  {
1171  double score = -1;
1172  if (align.GetSegs().IsStd()) {
1173  return score;
1174  }
1175 
1176  CBioseq_Handle product = scope->GetBioseqHandle(align.GetSeq_id(0));
1177  if ( !product ) {
1179  "failed to retrieve sequence for " +
1180  align.GetSeq_id(0).AsFastaString());
1181  }
1183 
1184  if (cds) {
1185  switch (m_ScoreType) {
1186  case eStart:
1187  score = cds->GetLocation().GetStart(eExtreme_Positional);
1188  break;
1189 
1190  case eEnd:
1191  score = cds->GetLocation().GetStop(eExtreme_Positional);
1192  break;
1193 
1194  default:
1195  {{
1196  CRangeCollection<TSeqPos> cds_ranges;
1197  for (CSeq_loc_CI it(cds->GetLocation()); it; ++it) {
1198  cds_ranges += it.GetRange();
1199  }
1200  score = m_ScoreType == ePercentIdentity
1201  ? CScoreBuilder().GetPercentIdentity(*scope, align,
1202  cds_ranges)
1203  : CScoreBuilder().GetPercentCoverage(*scope, align,
1204  cds_ranges);
1205  break;
1206  }}
1207  }
1208  }
1209  return score;
1210  }
1211 
1212 private:
1214 };
1215 
1216 //////////////////////////////////////////////////////////////////////////////
1217 
1219 {
1220 public:
1222  : m_Row(row)
1223  {}
1224 
1225  virtual EComplexity GetComplexity() const { return eEasy; };
1226 
1227  virtual void PrintHelp(CNcbiOstream& ostr) const
1228  {
1229  ostr << (m_Row == 0
1230  ? "Percentage of query sequence aligned to subject (0.0-100.0)"
1231  : "Percentage of subject sequence aligned to query (0.0-100.0)");
1232  }
1233 
1234 
1235  virtual double Get(const CSeq_align& align, CScope* scope) const
1236  {
1237  if (m_Row == 0) {
1238  return CScoreBuilder().GetPercentCoverage(*scope, align);
1239  }
1240 
1241  /// Calculate coverage on subject
1242  size_t covered_bases = align.GetAlignLength(false /* don't include gaps */);
1243  size_t seq_len = scope->GetSequenceLength(align.GetSeq_id(1));
1244  return covered_bases ? 100.0f * double(covered_bases) / double(seq_len)
1245  : 0.0;
1246  }
1247 
1248 private:
1249  int m_Row;
1250 };
1251 
1252 //////////////////////////////////////////////////////////////////////////////
1253 
1255 {
1256 public:
1257  CScore_Taxid(int row, const string &rank = "")
1258  : m_Row(row)
1259  , m_Rank(rank)
1260  {
1261  }
1262 
1263  virtual EComplexity GetComplexity() const { return eHard; };
1264 
1265  virtual bool IsInteger() const { return true; };
1266 
1267  virtual void PrintHelp(CNcbiOstream& ostr) const
1268  {
1269  if (m_Row == 0) {
1270  ostr << "Taxid of query sequence";
1271  }
1272  else if (m_Row == 1) {
1273  ostr << "Taxid of subject sequence";
1274  }
1275  }
1276 
1277  virtual double Get(const CSeq_align& align, CScope* scope) const
1278  {
1279  TTaxId taxid = scope->GetTaxId(align.GetSeq_id(m_Row));
1280  if (!m_Rank.empty()) {
1281  m_Taxon.Init();
1282  taxid = m_Taxon.GetAncestorByRank(taxid, m_Rank.c_str());
1283  }
1284  return TAX_ID_TO(double, taxid);
1285  }
1286 
1287 private:
1288  int m_Row;
1289  string m_Rank;
1290  mutable CTaxon1 m_Taxon;
1291 };
1292 
1293 //////////////////////////////////////////////////////////////////////////////
1294 
1296 {
1297 public:
1299  {
1300  }
1301 
1302  virtual void PrintHelp(CNcbiOstream& ostr) const
1303  {
1304  ostr <<
1305  "Position of last splice site. Note that this has meaning only "
1306  "for Spliced-seg transcript alignments, and only if the alignment "
1307  "has at least two exons.";
1308  }
1309 
1310  virtual EComplexity GetComplexity() const { return eEasy; };
1311 
1312  virtual bool IsInteger() const { return true; };
1313 
1314  virtual double Get(const CSeq_align& align, CScope* ) const
1315  {
1316  if (align.GetSegs().IsSpliced())
1317  {
1318  const CSpliced_seg &seg = align.GetSegs().GetSpliced();
1319  if (seg.CanGetExons() && seg.GetExons().size() > 1 &&
1320  seg.CanGetProduct_type() &&
1322  seg.CanGetProduct_strand() &&
1324  {
1325  const CSpliced_exon &last_spliced_exon =
1327  ? **++align.GetSegs().GetSpliced().GetExons().begin()
1328  : **++align.GetSegs().GetSpliced().GetExons().rbegin();
1329  if (last_spliced_exon.CanGetProduct_end()) {
1330  return last_spliced_exon.GetProduct_end().GetNucpos();
1331  }
1332  }
1333  }
1334  NCBI_THROW(CSeqalignException, eUnsupported,
1335  "last_splice_site score inapplicable");
1336  return 0;
1337  }
1338 };
1339 
1340 
1341 //////////////////////////////////////////////////////////////////////////////
1342 
1344 {
1345 public:
1346  CScore_Overlap(int row, bool include_gaps)
1347  : m_Row(row)
1348  , m_IncludeGaps(include_gaps)
1349  {
1350  }
1351 
1352  virtual void PrintHelp(CNcbiOstream& ostr) const
1353  {
1354  string row_name = m_Row == 0 ? "query" : "subject";
1355  string range_type = m_IncludeGaps ? "total aligned range" : "aligned bases";
1356  ostr <<
1357  "size of overlap of " + range_type + " with any alignments "
1358  "over the same " + row_name + " sequence that have previously "
1359  "passed this filter. Assumes that input alignments "
1360  "are collated by " + row_name + ", and then sorted by priority for "
1361  "inclusion in the output.";
1362  }
1363 
1364  virtual EComplexity GetComplexity() const { return eEasy; };
1365 
1366  virtual bool IsInteger() const { return true; };
1367 
1368  virtual double Get(const CSeq_align& align, CScope* ) const
1369  {
1370  CRangeCollection<TSeqPos> overlap;
1371  if (align.GetSeq_id(m_Row).Match(m_CurrentSeq)) {
1372  overlap = m_CoveredRanges;
1373  if (m_IncludeGaps) {
1374  overlap &= align.GetSeqRange(m_Row);
1375  } else {
1376  overlap &= align.GetAlignedBases(m_Row);
1377  }
1378  }
1379  return overlap.GetCoveredLength();
1380  }
1381 
1382  virtual void UpdateState(const objects::CSeq_align& align)
1383  {
1384  const CSeq_id &aligned_id = align.GetSeq_id(m_Row);
1385  if (!aligned_id.Match(m_CurrentSeq)) {
1386  m_CurrentSeq.Assign(aligned_id);
1388  }
1389  if (m_IncludeGaps) {
1390  m_CoveredRanges += align.GetSeqRange(m_Row);
1391  } else {
1392  m_CoveredRanges += align.GetAlignedBases(m_Row);
1393  }
1394  }
1395 
1396 private:
1397  int m_Row;
1401 };
1402 
1403 
1404 //////////////////////////////////////////////////////////////////////////////
1405 
1407 {
1408 public:
1409  CScore_OverlapBoth(int row, bool include_gaps)
1410  : m_Row(row)
1411  , m_IncludeGaps(include_gaps)
1412  {
1413  }
1414 
1415  virtual void PrintHelp(CNcbiOstream& ostr) const
1416  {
1417  string row_name = m_Row == 0 ? "query" : "subject";
1418  string range_type = m_IncludeGaps ? "total aligned range" : "aligned bases";
1419  ostr <<
1420  "size of overlap of " + range_type + " with any alignments "
1421  "over the same " + row_name + " sequence that have previously "
1422  "passed this filter. Assumes that input alignments "
1423  "are collated by " + row_name + ", and then sorted by priority for "
1424  "inclusion in the output.";
1425  }
1426 
1427  virtual EComplexity GetComplexity() const { return eEasy; };
1428 
1429  virtual bool IsInteger() const { return true; };
1430 
1431  virtual double Get(const CSeq_align& align, CScope* ) const
1432  {
1435 
1436  CRangeCollection<TSeqPos> overlap;
1438  m_CoveredRanges.find(make_pair(q, s));
1439 
1440  if (it != m_CoveredRanges.end()) {
1441  if (m_IncludeGaps) {
1442  overlap += align.GetSeqRange(m_Row);
1443  } else {
1444  overlap += align.GetAlignedBases(m_Row);
1445  }
1446 
1447  overlap &= it->second;
1448  }
1449  return overlap.GetCoveredLength();
1450  }
1451 
1452  virtual void UpdateState(const objects::CSeq_align& align)
1453  {
1454  CSeq_id_Handle q = CSeq_id_Handle::GetHandle(align.GetSeq_id(0));
1455  CSeq_id_Handle s = CSeq_id_Handle::GetHandle(align.GetSeq_id(1));
1456 
1457  TData::iterator it =
1459  (make_pair(q, s),
1460  CRangeCollection<TSeqPos>())).first;
1461 
1462  if (m_IncludeGaps) {
1463  it->second += align.GetSeqRange(m_Row);
1464  } else {
1465  it->second += align.GetAlignedBases(m_Row);
1466  }
1467  }
1468 
1469 private:
1470  int m_Row;
1472 
1475 };
1476 
1477 //////////////////////////////////////////////////////////////////////////////
1478 
1480 {
1481 public:
1483  : m_Row(row)
1484  {
1485  }
1486 
1487  virtual void PrintHelp(CNcbiOstream& ostr) const
1488  {
1489  ostr <<
1490  "restrict to the first N subjects seen for each query";
1491  }
1492 
1493  virtual EComplexity GetComplexity() const { return eEasy; };
1494 
1495  virtual bool IsInteger() const { return true; };
1496 
1497  virtual double Get(const CSeq_align& align, CScope* ) const
1498  {
1499  int index_row = m_Row;
1500  int alt_row = abs(index_row - 1);
1501  CSeq_id_Handle id1 = CSeq_id_Handle::GetHandle(align.GetSeq_id(index_row));
1502  CSeq_id_Handle id2 = CSeq_id_Handle::GetHandle(align.GetSeq_id(alt_row));
1503  TOrdinalPos& ranks = m_Ids[id1];
1504  TOrdinalPos::iterator it = ranks.find(id2);
1505  if (it == ranks.end()) {
1506  it = ranks.insert(TOrdinalPos::value_type(id2, ranks.size())).first;
1507 
1508  /**
1509  LOG_POST(Error << " q=" << qid
1510  << " s=" << id2
1511  << " ord=" << it->second);
1512  **/
1513  }
1514  return it->second;
1515  }
1516 
1517 private:
1520 
1521  int m_Row;
1522  mutable TIds m_Ids;
1523 };
1524 
1525 
1526 //////////////////////////////////////////////////////////////////////////////
1527 
1529 {
1530 public:
1533  {
1534  }
1535 
1536  virtual void PrintHelp(CNcbiOstream& ostr) const
1537  {
1538  ostr <<
1539  "Recompute a raw BLAST score for an arbitrary protein-to-DNA "
1540  "alignment, using a Spliced-seg as input. Computation is "
1541  "constrained to accept only protein-to-nucleotide Spliced-seg "
1542  "alignments and is slightly different than the raw BLAST score, "
1543  "in that gap computations differ due to the lack of true "
1544  "composition based statistics. These differences are minimal.";
1545  }
1546 
1547  virtual EComplexity GetComplexity() const { return eHard; };
1548 
1549  virtual bool IsInteger() const { return true; };
1550 
1551  virtual double Get(const CSeq_align& align, CScope* scope) const
1552  {
1553  // check assumptions:
1554  //
1555  if ( !align.GetSegs().IsSpliced() ) {
1556  NCBI_THROW(CSeqalignException, eUnsupported,
1557  "CScore_TblastnScore: "
1558  "valid only for spliced-seg alignments");
1559  }
1560 
1561  if ( align.GetSegs().GetSpliced().GetProduct_type() !=
1563  NCBI_THROW(CSeqalignException, eUnsupported,
1564  "CScore_TblastnScore: "
1565  "valid only for protein spliced-seg alignments");
1566  }
1567 
1568  int score = m_ScoreLookup.GetBlastScore(*scope, align);
1569 
1570  return score;
1571  }
1572 
1573 private:
1575 };
1576 
1577 
1578 //////////////////////////////////////////////////////////////////////////////
1579 
1581 {
1582 public:
1585  {
1586  }
1587 
1588  virtual void PrintHelp(CNcbiOstream& ostr) const
1589  {
1590  ostr <<
1591  "Adjusted protein score (ratio of actual score to perfect score)";
1592  }
1593 
1594  virtual EComplexity GetComplexity() const { return eHard; };
1595 
1596  virtual double Get(const CSeq_align& align, CScope* scope) const
1597  {
1599 
1600  //
1601  // compute the BLAST score
1602  //
1603  int score = m_ScoreLookup.GetBlastScore(*scope, align);
1604 
1605  //
1606  // compute the BLAST score for a degenerate perfect alignment for
1607  // the two sequences
1608  //
1609  double q_perfect = x_GetPerfectScore(*scope, idh);
1610  double s_perfect = x_GetPerfectScore
1611  (*scope, CSeq_id_Handle::GetHandle(align.GetSeq_id(1)));
1612 
1613  double perfect_score = max(q_perfect, s_perfect);
1614  return perfect_score ? score / perfect_score : 0;
1615  }
1616 
1617 private:
1619 
1620  double x_GetPerfectScore(CScope& scope, const CSeq_id_Handle& idh) const
1621  {
1622  CBioseq_Handle bsh = scope.GetBioseqHandle(idh);
1623  if ( !bsh ) {
1625  "failed to retrieve sequence for " +
1626  idh.AsString());
1627  }
1628 
1629  CSeq_align perfect_align;
1630  CDense_seg& seg = perfect_align.SetSegs().SetDenseg();
1631  CRef<CSeq_id> id(new CSeq_id);
1632  id->Assign(*idh.GetSeqId());
1633  seg.SetIds().push_back(id);
1634  seg.SetIds().push_back(id);
1635  seg.SetNumseg(1);
1636  seg.SetStarts().push_back(0);
1637  seg.SetStarts().push_back(0);
1638  seg.SetLens().push_back(bsh.GetBioseqLength());
1639 
1640  return m_ScoreLookup.GetBlastScore(scope, perfect_align);
1641  }
1642 };
1643 
1644 
1646 {
1647 public:
1649 
1651 
1653  : m_Edge(edge), m_InfoType(type)
1654  {}
1655 
1656  virtual void PrintHelp(CNcbiOstream& ostr) const
1657  {
1658  ostr << (m_InfoType == eLength ? "Length" : "Identity percentage")
1659  << " of the " << (m_Edge == e5Prime ? "5'" : "3'")
1660  << " exon. Note that this score has "
1661  "meaning only for Spliced-seg alignments, as would be generated "
1662  "by Splign or ProSplign, and only if it has at least one intron.";
1663  }
1664 
1665  virtual EComplexity GetComplexity() const { return eEasy; };
1666 
1667  virtual bool IsInteger() const { return m_InfoType == eLength; };
1668 
1669  virtual double Get(const CSeq_align& align, CScope* scope) const
1670  {
1671  if (!align.GetSegs().IsSpliced() ||
1672  align.GetSegs().GetSpliced().GetExons().size() == 1)
1673  {
1674  NCBI_THROW(CSeqalignException, eUnsupported,
1675  "CScore_EdgeExonInfo: "
1676  "valid only for spliced-seg alignments with at least one intron");
1677  }
1678  const CSpliced_seg::TExons &exons =
1679  align.GetSegs().GetSpliced().GetExons();
1680  CConstRef<CSpliced_exon> exon = m_Edge == e5Prime ? exons.front()
1681  : exons.back();
1682  if (m_InfoType == eLength) {
1683  return exon->GetGenomic_end() - exon->GetGenomic_start() + 1;
1684  } else {
1685  if (exon->IsSetScores()) {
1686  ITERATE (CScore_set::Tdata, score_it, exon->GetScores().Get()) {
1687  if ((*score_it)->CanGetId() && (*score_it)->GetId().IsStr()
1688  && (*score_it)->GetId().GetStr() == "idty")
1689  {
1690  return (*score_it)->GetValue().GetReal() * 100;
1691  }
1692  }
1693  }
1694  /// Exon percent identity not stored; calculate it
1695  TSeqRange product_span;
1696  product_span.Set(exon->GetProduct_start().AsSeqPos(),
1697  exon->GetProduct_end().AsSeqPos());
1698  return CScoreBuilder().GetPercentIdentity(*scope, align,
1699  product_span);
1700  }
1701  }
1702 
1703 private:
1706 };
1707 
1709 {
1710  CFeat_CI gene_it(bsh, CSeqFeatData::e_Gene);
1711  if (!gene_it) {
1712  NCBI_THROW(CException, eUnknown, "No gene feature");
1713  }
1714 
1715  CMappedFeat gene = *gene_it;
1716  if (++gene_it) {
1717  NCBI_THROW(CException, eUnknown, "Multiple gene features");
1718  }
1719 
1720  if (gene.GetNamedDbxref("GeneID")) {
1721  return gene.GetNamedDbxref("GeneID")->GetTag().GetId();
1722  }
1723 
1724  /// Fallback; use LocusID
1725  if (gene.GetData().GetGene().IsSetDb()) {
1726  for (const CRef<CDbtag> &db : gene.GetData().GetGene().GetDb()) {
1727  if (db->GetDb() == "LocusID" && db->GetTag().IsId()) {
1728  return db->GetTag().GetId();
1729  }
1730  }
1731  }
1732 
1733  NCBI_THROW(CException, eUnknown, "Gene id not set");
1734 }
1735 
1737 {
1738 public:
1740  : m_Row(row)
1741  {
1742  }
1743 
1744  virtual void PrintHelp(CNcbiOstream& ostr) const
1745  {
1746  ostr << "Gene ID of " << (m_Row == 0 ? "query" : "subject");
1747  }
1748 
1749  virtual EComplexity GetComplexity() const { return eEasy; };
1750 
1751  virtual bool IsInteger() const { return true; };
1752 
1753  virtual double Get(const CSeq_align& align, CScope* scope) const
1754  {
1755  CBioseq_Handle bsh = scope->GetBioseqHandle(align.GetSeq_id(m_Row));
1756  if ( !bsh ) {
1758  "failed to retrieve sequence for " +
1759  align.GetSeq_id(m_Row).AsFastaString());
1760  }
1761  return CScoreLookup::GetGeneId(bsh);
1762  }
1763 
1764 private:
1765  int m_Row;
1766 };
1767 
1768 /////////////////////////////////////////////////////////////////////////////
1769 
1771 {
1772 public:
1773 
1774  virtual void PrintHelp(CNcbiOstream& ostr) const
1775  {
1776  ostr << "CRC of the strucural parts of the alignment";
1777  }
1778 
1779  virtual EComplexity GetComplexity() const { return eEasy; };
1780 
1781  virtual bool IsInteger() const { return true; };
1782 
1783  virtual double Get(const CSeq_align& align, CScope*) const
1784  {
1785  CScoreBuilder Builder;
1786  return Builder.ComputeTieBreaker(align);
1787  }
1788 };
1789 
1790 //////////////////////////////////////////////////////////////////////////////
1791 
1793 {
1794 public:
1795  virtual void PrintHelp(CNcbiOstream& ostr) const
1796  {
1797  ostr <<
1798  "1 if rna Seq-feat based on this alignment is partial; "
1799  "0 if it is complete";
1800  }
1801 
1802  virtual EComplexity GetComplexity() const { return eHard; };
1803 
1804  virtual bool IsInteger() const { return true; };
1805 
1806  virtual double Get(const CSeq_align& align, CScope* scope) const
1807  {
1808  CFeatureGenerator generator(*scope);
1809  generator.SetAllowedUnaligned(10);
1810 
1811  CConstRef<CSeq_align> clean_align = generator.CleanAlignment(align);
1812  CSeq_annot annot;
1813  CBioseq_set bset;
1814  generator.ConvertAlignToAnnot(*clean_align, annot, bset);
1815  for (const CRef<CSeq_feat> &feat : annot.GetData().GetFtable()) {
1816  if (feat->GetData().IsRna()) {
1817  return feat->IsSetPartial() && feat->GetPartial();
1818  }
1819  }
1820 
1822  "Can't generate rna sequence from alignment");
1823  }
1824 };
1825 
1826 //////////////////////////////////////////////////////////////////////////////
1827 
1829 {
1830 public:
1831  virtual void PrintHelp(CNcbiOstream& ostr) const
1832  {
1833  ostr <<
1834  "1 if query is a mRNA and its coding region has ribosomal "
1835  "slippage; 0 otherwise";
1836  }
1837 
1838  virtual EComplexity GetComplexity() const { return eEasy; };
1839 
1840  virtual bool IsInteger() const { return true; };
1841 
1842  virtual double Get(const CSeq_align& align, CScope* scope) const
1843  {
1844  CBioseq_Handle bsh = scope->GetBioseqHandle(align.GetSeq_id(0));
1845  if ( !bsh ) {
1847  "failed to retrieve sequence for " +
1848  align.GetSeq_id(0).AsFastaString());
1849  }
1850 
1851  CFeat_CI feat_it(bsh, CSeqFeatData::e_Cdregion);
1852  return feat_it && feat_it->IsSetExcept_text() &&
1853  feat_it->GetExcept_text().find("ribosomal slippage") != string::npos;
1854  }
1855 };
1856 
1857 //////////////////////////////////////////////////////////////////////////////
1858 
1860 {
1861 public:
1863  : m_Row(row)
1864  {
1865  }
1866 
1867  virtual void PrintHelp(CNcbiOstream& ostr) const
1868  {
1869  ostr <<
1870  "Computes the percent of residues in the aligned "
1871  << (m_Row == 0 ? "query" : "subject")
1872  << " region that would be filtered by 'seg'";
1873  }
1874 
1875  virtual EComplexity GetComplexity() const { return eEasy; };
1876 
1877  virtual bool IsInteger() const { return false; };
1878 
1879  virtual double Get(const CSeq_align& align, CScope* scope) const
1880  {
1881  CBioseq_Handle bsh = scope->GetBioseqHandle(align.GetSeq_id(m_Row));
1882  if ( !bsh ) {
1884  "failed to retrieve sequence for " +
1885  align.GetSeq_id(0).AsFastaString());
1886  }
1887 
1888  if ( !bsh.IsProtein() ) {
1890  "alignment filter requires that the requested "
1891  "sequence be a protein");
1892  }
1893 
1894  TSeqRange r = align.GetSeqRange(m_Row);
1895  string seq;
1897  vec.GetSeqData(r.GetFrom(), r.GetTo(), seq);
1898 
1899  string seq_iupac;
1901  vec.GetSeqData(r.GetFrom(), r.GetTo(), seq_iupac);
1902 
1903  //
1904  // this uses lower-level calls in BLAST to run 'seg' on the covered
1905  // sequence
1906  //
1907 
1909  BlastSeqLoc* seq_locs = NULL;
1910  SeqBufferSeg((unsigned char *)seq.data(), seq.size(), 0, sp, &seq_locs);
1911  SegParametersFree(sp);
1912 
1913  // now, count how many masked residues we have
1914  vector<size_t> counts(seq.size(), 0);
1915  for (BlastSeqLoc *itr = seq_locs; itr; itr = itr->next) {
1916  for (int i = itr->ssr->left; i <= itr->ssr->right; ++ i) {
1917  counts[i] = 1;
1918  }
1919  //cerr << " seg range: [" << itr->ssr->left << ".." << itr->ssr->right << "]: " << itr->ssr->right - itr->ssr->left + 1 << " / " << pos.size() << " total" << endl;
1920  }
1921  BlastSeqLocFree(seq_locs);
1922 
1923  // report the number of masked residues
1924  size_t count_x = 0;
1925  for (const auto& i : counts) {
1926  count_x += i;
1927  }
1928  double val = count_x * 100.0 / seq.size();
1929 
1930  /**
1931  CSeq_id_Handle idh = sequence::GetId(bsh, sequence::eGetId_Best);
1932  cerr
1933  << idh << "(" << r << "): seg-pct = " << val
1934  << ", seq = " << seq_iupac
1935  << endl;
1936  **/
1937 
1938  return val;
1939  }
1940 
1941 private:
1942  size_t m_Row;
1943 };
1944 
1945 //////////////////////////////////////////////////////////////////////////////
1946 
1948 {
1949 public:
1951  : m_Row(row)
1952  {
1953  }
1954 
1955  virtual void PrintHelp(CNcbiOstream& ostr) const
1956  {
1957  ostr <<
1958  "Computes the value of Shannon's entropy for the specified "
1959  "aligned "
1960  << (m_Row == 0 ? "query" : "subject") << " region";
1961  }
1962 
1963  virtual EComplexity GetComplexity() const { return eEasy; };
1964 
1965  virtual bool IsInteger() const { return false; };
1966 
1967  virtual double Get(const CSeq_align& align, CScope* scope) const
1968  {
1969  CBioseq_Handle bsh = scope->GetBioseqHandle(align.GetSeq_id(m_Row));
1970  if ( !bsh ) {
1972  "failed to retrieve sequence for " +
1973  align.GetSeq_id(0).AsFastaString());
1974  }
1975  TSeqRange r = align.GetSeqRange(m_Row);
1976  string seq;
1978  vec.GetSeqData(r.GetFrom(), r.GetTo(), seq);
1979 
1980  int word_size = 4;
1981  if (bsh.IsProtein()) {
1982  word_size = 1;
1983  }
1984  double val = ComputeNormalizedProteinEntropy(seq, word_size);
1985 
1986  /**
1987  CSeq_id_Handle idh = sequence::GetId(bsh, sequence::eGetId_Best);
1988  cerr
1989  << idh << "(" << r << "): entropy = " << val
1990  << ", seq = " << seq
1991  << endl;
1992  **/
1993 
1994  return val;
1995  }
1996 
1997 private:
1998  size_t m_Row;
1999 };
2000 
2001 /////////////////////////////////////////////////////////////////////////////
2002 
2003 
2005 {
2008  ("align_length_ungap",
2009  CIRef<IScore>(new CScore_AlignLength(false /* include gaps */))));
2012  ("gap_count",
2013  CIRef<IScore>(new CScore_GapCount(false))));
2016  ("gap_basecount",
2017  CIRef<IScore>(new CScore_GapCount(true))));
2020  ("query_gap_length",
2021  CIRef<IScore>(new CScore_GapCount(true, 0))));
2024  ("subject_gap_length",
2025  CIRef<IScore>(new CScore_GapCount(true, 1))));
2028  ("product_gap_length",
2029  CIRef<IScore>(new CScore_GapCount(true, 0, true))));
2032  ("genomic_gap_length",
2033  CIRef<IScore>(new CScore_GapCount(true, 1, true))));
2036  ("frame",
2040  ("qframe",
2044  ("sframe",
2048  ("nonframe_indel",
2049  CIRef<IScore>(new CScore_FrameShifts(-1, false))));
2052  ("qnonframe_indel",
2053  CIRef<IScore>(new CScore_FrameShifts(0, false))));
2056  ("snonframe_indel",
2057  CIRef<IScore>(new CScore_FrameShifts(1, false))));
2060  ("symmetric_overlap",
2065  ("symmetric_overlap_min",
2070  ("3prime_unaligned",
2072 
2075  ("polya", CIRef<IScore>(new CScore_Polya)));
2076 
2079  ("min_exon_len",
2081 
2084  ("max_intron_len",
2086 
2089  ("longest_gap",
2091 
2092  {{
2093  CIRef<IScore> score(new CScore_AlignStartStop(0, true));
2096  ("query_start", score));
2099  ("5prime_unaligned", score));
2102  ("query_end",
2103  CIRef<IScore>(new CScore_AlignStartStop(0, false))));
2104  }}
2105 
2108  ("internal_unaligned",
2110 
2113  ("cds_internal_stops",
2117  ("cds_start",
2121  ("cds_end",
2125  ("cds_pct_identity",
2129  ("cds_pct_coverage",
2131 
2134  ("query_coverage",
2135  CIRef<IScore>(new CScore_Coverage(0))));
2136 
2139  ("subject_coverage",
2140  CIRef<IScore>(new CScore_Coverage(1))));
2141 
2144  ("align_length_ratio",
2146 
2149  ("subject_start",
2150  CIRef<IScore>(new CScore_AlignStartStop(1, true))));
2153  ("subject_end",
2154  CIRef<IScore>(new CScore_AlignStartStop(1, false))));
2155 
2156  {{
2157  CIRef<IScore> score(new CScore_SequenceLength(0));
2160  ("query_length", score));
2163  ("product_length", score));
2166  ("subject_length",
2168  }}
2169 
2172  ("query_taxid",
2173  CIRef<IScore>(new CScore_Taxid(0))));
2176  ("subject_taxid",
2177  CIRef<IScore>(new CScore_Taxid(1))));
2180  ("query_species",
2181  CIRef<IScore>(new CScore_Taxid(0, "species"))));
2184  ("subject_species",
2185  CIRef<IScore>(new CScore_Taxid(1, "species"))));
2186 
2189  ("last_splice_site",
2191 
2194  ("exon_count",
2196 
2199  ("query_overlap",
2200  CIRef<IScore>(new CScore_Overlap(0, true))));
2201 
2204  ("query_overlap_nogaps",
2205  CIRef<IScore>(new CScore_Overlap(0, false))));
2206 
2209  ("subject_overlap",
2210  CIRef<IScore>(new CScore_Overlap(1, true))));
2211 
2214  ("subject_overlap_nogaps",
2215  CIRef<IScore>(new CScore_Overlap(1, false))));
2216 
2219  ("query_subject_overlap",
2220  CIRef<IScore>(new CScore_OverlapBoth(1, true))));
2221 
2224  ("query_subject_overlap_nogaps",
2225  CIRef<IScore>(new CScore_OverlapBoth(1, false))));
2226 
2229  ("subject_ordinal_pos",
2230  CIRef<IScore>(new CScore_OrdinalPos(0))));
2231 
2234  ("query_ordinal_pos",
2235  CIRef<IScore>(new CScore_OrdinalPos(1))));
2236 
2239  ("prosplign_tblastn_score",
2240  CIRef<IScore>(new CScore_TblastnScore(*this))));
2241 
2244  ("blast_score_ratio",
2245  CIRef<IScore>(new CScore_BlastRatio(*this))));
2246 
2249  ("start_codon",
2250  CIRef<IScore>(new CScore_StartStopCodon(true))));
2251 
2254  ("stop_codon",
2255  CIRef<IScore>(new CScore_StartStopCodon(false))));
2256 
2259  ("5prime_exon_len",
2263 
2266  ("3prime_exon_len",
2270 
2273  ("5prime_exon_pct_identity",
2277 
2280  ("3prime_exon_pct_identity",
2284 
2287  ("query_geneid",
2288  CIRef<IScore>(new CScore_GeneID(0))));
2291  ("subject_geneid",
2292  CIRef<IScore>(new CScore_GeneID(1))));
2293 
2296  ("query_entropy",
2297  CIRef<IScore>(new CScore_Entropy(0))));
2300  ("subject_entropy",
2301  CIRef<IScore>(new CScore_Entropy(1))));
2302 
2305  ("query_seg_pct",
2306  CIRef<IScore>(new CScore_SegPct(0))));
2309  ("subject_seg_pct",
2310  CIRef<IScore>(new CScore_SegPct(1))));
2311 
2314  ("min_indel_to_splice",
2316 
2319  ("partial",
2320  CIRef<IScore>(new CScore_Partial())));
2321 
2324  ("ribosomal_slippage",
2326 
2329  ("tiebreaker",
2331 }
2332 
2333 
2334 void CScoreLookup::UpdateState(const objects::CSeq_align& align)
2335 {
2337  m_Scores[*it]->UpdateState(align);
2338  }
2339 }
2340 
2342  const string &score_name)
2343 {
2344  ostr << " * " << score_name << endl;
2345 
2346  list<string> tmp;
2347  NStr::Wrap(HelpText(score_name), 72, tmp);
2348  ITERATE (list<string>, i, tmp) {
2349  ostr << " " << *i << endl;
2350  }
2351 }
2352 
2354 {
2355  ostr << "Build-in score names: " << endl;
2357  x_PrintDictionaryEntry(ostr, it->first);
2358  }
2359  ostr << endl;
2360 
2361  ostr << "Computed tokens: " << endl;
2363  x_PrintDictionaryEntry(ostr, it->first);
2364  }
2365 }
2366 
2367 string CScoreLookup::HelpText(const string &score_name)
2368 {
2370  CSeq_align::ScoreNameMap().find(score_name);
2371  if (score_it != CSeq_align::ScoreNameMap().end()) {
2372  return CSeq_align::HelpText(score_it->second);
2373  }
2374 
2375  TScoreDictionary::const_iterator token_it = m_Scores.find(score_name);
2376  if (token_it != m_Scores.end()) {
2377  m_ScoresUsed.insert(score_name);
2378  CNcbiOstrstream os;
2379  token_it->second->PrintHelp(os);
2380  return string(CNcbiOstrstreamToString(os));
2381  }
2382 
2383  return "assumed to be a score on the Seq-align";
2384 }
2385 
2387 Complexity(const string &score_name)
2388 {
2390  CSeq_align::ScoreNameMap().find(score_name);
2391  if (score_it != CSeq_align::ScoreNameMap().end()) {
2392  return IScore::eEasy;
2393  }
2394 
2395  TScoreDictionary::const_iterator token_it = m_Scores.find(score_name);
2396  if (token_it != m_Scores.end()) {
2397  return token_it->second->GetComplexity();
2398  }
2399 
2400  NCBI_THROW(CAlgoAlignUtilException, eScoreNotFound, score_name);
2401 }
2402 
2403 bool CScoreLookup::IsIntegerScore(const objects::CSeq_align& align,
2404  const string &score_name)
2405 {
2407  CSeq_align::ScoreNameMap().find(score_name);
2408  if (score_it != CSeq_align::ScoreNameMap().end()) {
2409  return CSeq_align::IsIntegerScore(score_it->second);
2410  }
2411 
2412  TScoreDictionary::const_iterator token_it = m_Scores.find(score_name);
2413  if (token_it != m_Scores.end()) {
2414  return token_it->second->IsInteger();
2415  }
2416 
2417  ITERATE (CSeq_align::TScore, stored_score_it, align.GetScore()) {
2418  if ((*stored_score_it)->CanGetValue() &&
2419  (*stored_score_it)->CanGetId() &&
2420  (*stored_score_it)->GetId().IsStr() &&
2421  (*stored_score_it)->GetId().GetStr() == score_name)
2422  {
2423  return (*stored_score_it)->GetValue().IsInt();
2424  }
2425  }
2426  return false;
2427 }
2428 
2429 double CScoreLookup::GetScore(const objects::CSeq_align& align,
2430  const string &score_name)
2431 {
2432  double score;
2433  if (align.GetNamedScore(score_name, score)) {
2434  return score;
2435  }
2436 
2437  if (m_Scope.IsNull()) {
2439  m_Scope->AddDefaults();
2440  }
2441 
2442  /// Score not found in alignmnet; look for it among built-in scores
2444  CSeq_align::ScoreNameMap().find(score_name);
2445  if (score_it != CSeq_align::ScoreNameMap().end()) {
2446  return ComputeScore(*m_Scope, align, score_it->second);
2447  }
2448 
2449  /// Not a built-in score; look for it among computed tokens
2450  TScoreDictionary::const_iterator token_it = m_Scores.find(score_name);
2451  if (token_it != m_Scores.end()) {
2452  m_ScoresUsed.insert(score_name);
2453  return token_it->second->Get(align, &*m_Scope);
2454  }
2455 
2456  NCBI_THROW(CAlgoAlignUtilException, eScoreNotFound, score_name);
2457 }
2458 
2460 
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
Definition: blast_filter.c:737
SEG filtering functions.
Int2 SeqBufferSeg(Uint1 *sequence, Int4 length, Int4 offset, SegParameters *sparamsp, BlastSeqLoc **seg_locs)
Runs seg on a protein sequence in ncbistdaa.
Definition: blast_seg.c:2281
SegParameters * SegParametersNewAa(void)
Allocated SeqParameter struct for proteins and fills with default values.
Definition: blast_seg.c:2225
void SegParametersFree(SegParameters *sparamsp)
Free SegParameters structure.
Definition: blast_seg.c:2272
CBioseq_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
CRef< objects::CSeq_feat > ConvertAlignToAnnot(const objects::CSeq_align &align, objects::CSeq_annot &annot, objects::CBioseq_set &seqs, Int8 gene_id=0, const objects::CSeq_feat *cdregion_on_mrna=NULL)
Convert an alignment to an annotation.
void SetFlags(TFeatureGeneratorFlags)
Definition: gene_model.cpp:195
void SetAllowedUnaligned(TSeqPos)
Definition: gene_model.cpp:215
CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align)
Clean an alignment according to our best guess of its biological representation.
Definition: gene_model.cpp:221
static const CTrans_table & GetTransTable(int id)
static const CGenetic_code_table & GetCodeTable(void)
int GetId(void) const
set< TSeqPos > FindStops(const CSeq_align &align)
CMappedFeat –.
Definition: mapped_feat.hpp:59
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
int GetGcode(void) const
Definition: Org_ref.cpp:134
TSeqPos AsSeqPos() const
Definition: Product_pos.cpp:56
position_type GetCoveredLength(void) const
Returns total length covered by ranges in this collection, i.e.
Definition: range_coll.hpp:157
CScope –.
Definition: scope.hpp:92
double GetPercentCoverage(CScope &scope, const CSeq_align &align, unsigned query=0)
Compute percent coverage of the query (sequence 0) (range 0-100)
double GetPercentIdentity(CScope &scope, const CSeq_align &align, EPercentIdentityType type=eGapped)
int ComputeTieBreaker(const CSeq_align &align)
int GetBlastScore(CScope &scope, const CSeq_align &align)
Compute the BLAST score of the alignment.
double ComputeScore(CScope &scope, const CSeq_align &align, const CRangeCollection< TSeqPos > &ranges, CSeq_align::EScoreType score)
TScoreDictionary m_Scores
IScore::EComplexity Complexity(const string &score_name)
double GetScore(const objects::CSeq_align &align, const string &score_name)
Get requested score for alignment.
void UpdateState(const objects::CSeq_align &align)
void PrintDictionary(CNcbiOstream &)
Print out the dictionary of recognized score names.
string HelpText(const string &score_name)
Help text for score.
void x_PrintDictionaryEntry(CNcbiOstream &ostr, const string &score_name)
static int GetGeneId(const objects::CBioseq_Handle &bsh)
set< string > m_ScoresUsed
CRef< objects::CScope > m_Scope
bool IsIntegerScore(const objects::CSeq_align &align, const string &score_name)
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
virtual EComplexity GetComplexity() const
CScore_AlignLength(bool include_gaps)
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
CScore_AlignStartStop(int row, bool start)
virtual bool IsInteger() const
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
double x_GetPerfectScore(CScope &scope, const CSeq_id_Handle &idh) const
CScore_BlastRatio(CScoreLookup &lookup)
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *scope) const
CScoreLookup & m_ScoreLookup
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual EComplexity GetComplexity() const
CScore_CdsScore(EScoreType type)
const EScoreType m_ScoreType
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *scope) const
CScore_Coverage(int row)
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
CScore_EdgeExonInfo(EEdge edge, EInfoType type)
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
CScore_Entropy(int row)
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *) const
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
CScore_FrameShifts(int row=-1, bool frameshifts=true)
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
CScore_GapCount(bool count_bases, int row=-1, bool exon_specific=false)
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
CScore_GeneID(int row)
virtual double Get(const CSeq_align &align, CScope *) const
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *) const
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
virtual bool IsInteger() const
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
virtual EComplexity GetComplexity() const
map< CSeq_id_Handle, TOrdinalPos > TIds
virtual double Get(const CSeq_align &align, CScope *) const
map< CSeq_id_Handle, size_t > TOrdinalPos
virtual bool IsInteger() const
CScore_OrdinalPos(int row)
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
CScore_OverlapBoth(int row, bool include_gaps)
virtual EComplexity GetComplexity() const
virtual void UpdateState(const objects::CSeq_align &align)
For any IScore subclasses that have an internal state, this function will be called to update it for ...
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
map< pair< CSeq_id_Handle, CSeq_id_Handle >, CRangeCollection< TSeqPos > > TData
virtual EComplexity GetComplexity() const
virtual void UpdateState(const objects::CSeq_align &align)
For any IScore subclasses that have an internal state, this function will be called to update it for ...
virtual bool IsInteger() const
CRangeCollection< TSeqPos > m_CoveredRanges
virtual void PrintHelp(CNcbiOstream &ostr) const
CScore_Overlap(int row, bool include_gaps)
virtual double Get(const CSeq_align &align, CScope *) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual EComplexity GetComplexity() const
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual EComplexity GetComplexity() const
CScore_SegPct(int row)
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual bool IsInteger() const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
CScore_StartStopCodon(bool start_codon)
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *scope) const
CScore_SymmetricOverlap(EType type)
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual EComplexity GetComplexity() const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual double Get(const CSeq_align &align, CScope *scope) const
virtual void PrintHelp(CNcbiOstream &ostr) const
CScore_Taxid(int row, const string &rank="")
virtual EComplexity GetComplexity() const
virtual bool IsInteger() const
CScore_TblastnScore(CScoreLookup &lookup)
virtual EComplexity GetComplexity() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual bool IsInteger() const
virtual double Get(const CSeq_align &align, CScope *scope) const
CScoreLookup & m_ScoreLookup
virtual bool IsInteger() const
virtual void PrintHelp(CNcbiOstream &ostr) const
virtual double Get(const CSeq_align &align, CScope *) const
virtual EComplexity GetComplexity() const
CSeqVector –.
Definition: seq_vector.hpp:65
TSeqPos GetTotalGapCount(TDim row=-1) const
Retrieves the total number of gaps in the given row an alignment; all gaps by default.
Definition: Seq_align.cpp:1550
TLengthRange ExonLengthRange() const
Definition: Seq_align.cpp:2214
CRangeCollection< TSeqPos > GetAlignedBases(TDim row) const
Retrieves the locations of aligned bases in the given row, excluding gaps and incontinuities.
Definition: Seq_align.cpp:1796
TSeqPos GetNumGapOpeningsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1570
TLengthRange IntronLengthRange() const
Definition: Seq_align.cpp:2186
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
Definition: Seq_align.cpp:153
static string HelpText(EScoreType score)
Definition: Seq_align.cpp:508
TSeqPos GetSeqStop(TDim row) const
Definition: Seq_align.cpp:273
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
double AlignLengthRatio() const
Definition: Seq_align.cpp:2017
TSeqPos GetSeqStart(TDim row) const
Definition: Seq_align.cpp:252
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
Definition: Seq_align.cpp:1993
TSeqPos GetNumFrameshiftsWithinRange(const TSeqRange &range, TDim row=-1) const
Definition: Seq_align.cpp:1725
TSeqPos GetNumFrameshifts(TDim row=-1) const
Retrieves the number of times a given row shifts frames; i.e.
Definition: Seq_align.cpp:1716
static const TScoreNameMap & ScoreNameMap()
Definition: Seq_align.cpp:483
TLengthRange GapLengthRange() const
Definition: Seq_align.cpp:2114
static bool IsIntegerScore(EScoreType score)
Definition: Seq_align.cpp:513
TSeqPos GetNumGapOpenings(TDim row=-1) const
Retrieves the number of gap openings in a given row in an alignment (ignoring how many gaps are in th...
Definition: Seq_align.cpp:1557
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeq_loc_Mapper –.
CSpliced_exon_chunk –.
bool Init(void)
Definition: taxon1.cpp:101
TTaxId GetAncestorByRank(TTaxId id_tax, const char *rank_name)
Definition: taxon1.cpp:942
static int SetCodonState(unsigned char ch1, unsigned char ch2, unsigned char ch3)
bool IsAnyStart(int state) const
bool IsOrfStop(int state) const
size_type size() const
Definition: map.hpp:148
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static int lookup(const char *name, const struct lookup_int *table)
Definition: attributes.c:50
static char tmp[3200]
Definition: utf8.c:42
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define TAX_ID_TO(T, tax_id)
Definition: ncbimisc.hpp:1110
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define REVERSE_ITERATE(Type, Var, Cont)
ITERATE macro to reverse sequence through container elements.
Definition: ncbimisc.hpp:827
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
CConstRef< CSeq_id > GetSeqId(void) const
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
const COrg_ref & GetOrg_ref(const CBioseq_Handle &handle)
Return the org-ref associated with a given sequence.
Definition: sequence.cpp:264
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:4095
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
TTaxId GetTaxId(const CSeq_id &id, TGetFlags flags=0)
Get taxonomy id of bioseq Return -1 if sequence is not found Return 0 if sequence doesn't have taxono...
Definition: scope.cpp:474
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
TSeqPos GetSequenceLength(const CSeq_id &id, TGetFlags flags=0)
Get sequence length Return kInvalidSeqPos if sequence is not found.
Definition: scope.cpp:769
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
const CSeqFeatData & GetData(void) const
CConstRef< CDbtag > GetNamedDbxref(const CTempString &db) const
Return a specified DB xref.
TSeqPos GetBioseqLength(void) const
bool IsAa(void) const
bool IsSetExcept_text(void) const
TInst_Mol GetInst_Mol(void) const
bool IsProtein(void) const
TInst_Topology GetInst_Topology(void) const
const string & GetExcept_text(void) const
TMol GetBioseqMolType(void) const
Get some values from core:
bool CanGetInst_Mol(void) const
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
const CSeq_loc & GetLocation(void) const
TRange GetRange(void) const
Get range for mapped seq-feat's location.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
void SetCoding(TCoding coding)
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
TThisType & Set(position_type from, position_type to)
Definition: range.hpp:188
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static void Wrap(const string &str, SIZE_TYPE width, IWrapDest &dest, TWrapFlags flags, const string *prefix, const string *prefix1)
Definition: ncbistr.cpp:5347
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
bool IsSetDb(void) const
ids in other dbases Check if a value has been assigned to Db data member.
Definition: Gene_ref_.hpp:731
const TDb & GetDb(void) const
Get the Db member data.
Definition: Gene_ref_.hpp:743
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
@ eLim_circle
artificial break at origin of circle
Definition: Int_fuzz_.hpp:215
const TDonor_after_exon & GetDonor_after_exon(void) const
Get the Donor_after_exon member data.
bool CanGetProduct_length(void) const
Check if it is safe to call GetProduct_length method.
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_align_.hpp:691
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member.
vector< CRef< CScore > > TScore
Definition: Seq_align_.hpp:398
TMatch GetMatch(void) const
Get the variant data.
list< CRef< CScore > > Tdata
Definition: Score_set_.hpp:90
bool IsSetProduct_strand(void) const
should be 'plus' or 'minus' Check if a value has been assigned to Product_strand data member.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
bool CanGetExons(void) const
Check if it is safe to call GetExons method.
const TAcceptor_before_exon & GetAcceptor_before_exon(void) const
Get the Acceptor_before_exon member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
bool IsSetAcceptor_before_exon(void) const
splice sites Check if a value has been assigned to Acceptor_before_exon data member.
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
bool CanGetProduct_type(void) const
Check if it is safe to call GetProduct_type method.
bool IsSetPoly_a(void) const
start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...
TDiag GetDiag(void) const
Get the variant data.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
bool CanGetProduct_strand(void) const
Check if it is safe to call GetProduct_strand method.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
list< CRef< CSpliced_seg_modifier > > TModifiers
const TScores & GetScores(void) const
Get the Scores member data.
TStarts & SetStarts(void)
Assign a value to Starts data member.
Definition: Dense_seg_.hpp:536
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
bool IsStd(void) const
Check if variant Std is selected.
Definition: Seq_align_.hpp:746
bool IsSetExons(void) const
set of segments involved each segment corresponds to one exon exons are always in biological order Ch...
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:474
const TBases & GetBases(void) const
Get the Bases member data.
list< CRef< CSpliced_exon_chunk > > TParts
bool IsSetProduct_length(void) const
length of the product, in bases/residues from this (or from poly-a if present), a 3' unaligned length...
TPoly_a GetPoly_a(void) const
Get the Poly_a member data.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
Definition: Seq_align_.hpp:778
const Tdata & Get(void) const
Get the member data.
Definition: Score_set_.hpp:165
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
const TModifiers & GetModifiers(void) const
Get the Modifiers member data.
TNucpos GetNucpos(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsSetDonor_after_exon(void) const
Check if a value has been assigned to Donor_after_exon data member.
bool CanGetProduct_end(void) const
Check if it is safe to call GetProduct_end method.
bool IsSetScores(void) const
scores for this exon Check if a value has been assigned to Scores data member.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
list< CRef< CGenetic_code > > Tdata
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TCdregion & GetCdregion(void) const
Get the variant data.
const TGene & GetGene(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
void SetFuzz_to(TFuzz_to &value)
Assign a value to Fuzz_to data member.
void SetFuzz_from(TFuzz_from &value)
Assign a value to Fuzz_from data member.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
const TFtable & GetFtable(void) const
Get the variant data.
Definition: Seq_annot_.hpp:621
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
double ComputeNormalizedProteinEntropy(const CTempString &sequence, size_t word_size)
Sequence Entropy Calculation.
Definition: util.cpp:233
int i
int len
#define abs(a)
Definition: ncbi_heapmgr.c:130
T max(T x_, T y_)
T min(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
The Object manager core.
USING_SCOPE(objects)
static TSeqPos s_GetNaLength(CBioseq_Handle bsh)
Get sequence's length in nucleic acids.
static const CGenetic_code * s_GetGeneticCode(const CSeq_id &seq_id, CScope *scope)
#define row(bind, expected)
Definition: string_bind.c:73
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
struct BlastSeqLoc * next
next in linked list
Definition: blast_def.h:205
SAnnotSelector –.
Structure to hold parameters for seg search.
Definition: blast_seg.h:49
Definition: type.c:6
else result
Definition: token2.c:20
Modified on Wed Apr 17 13:10:30 2024 by modify_doxy.py rev. 669887