NCBI C++ ToolKit
seqport_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1  /*$Id: seqport_util.cpp 99064 2023-02-08 19:14:27Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Clifford Clausen
27  * (also reviewed/fixed/groomed by Denis Vakatov and Aaron Ucko)
28  *
29  * File Description:
30  */
31 
32 #include <ncbi_pch.hpp>
34 
35 #include <serial/serial.hpp>
36 #include <serial/objostr.hpp>
37 #include <serial/objistr.hpp>
38 
39 #include <objects/seq/NCBI2na.hpp>
40 #include <objects/seq/NCBI4na.hpp>
41 #include <objects/seq/NCBI8na.hpp>
42 #include <objects/seq/NCBI8aa.hpp>
43 #include <objects/seq/IUPACna.hpp>
44 #include <objects/seq/IUPACaa.hpp>
45 #include <objects/seq/NCBIeaa.hpp>
47 #include <objects/seq/NCBIpaa.hpp>
48 
53 
54 #include <util/sequtil/sequtil.hpp>
57 #include <util/random_gen.hpp>
59 
60 #include <algorithm>
61 #include <string.h>
62 
63 
65 BEGIN_objects_SCOPE
66 
67 static const bool kSymbol = true;
68 static const bool kName = false;
69 static const unsigned int kNumCodes = 11;
70 
72 {
73  switch (from_type) {
94  default:
95  throw CSeqportUtil::CBadType("EChoiceToESeq");
96  }
97 }
98 
99 // CSeqportUtil_implementation is a singleton.
100 
102 public:
105 
108 
110  (const CSeq_data& in_seq,
111  CSeq_data* out_seq,
112  CSeq_data::E_Choice to_code,
113  TSeqPos uBeginIdx,
114  TSeqPos uLength,
115  bool bAmbig,
117  TSeqPos total_length = 0,
118  TSeqPos* out_seq_length = 0,
119  vector<Uint4>* blast_ambig = 0)
120  const;
121 
122  size_t Pack
123  (CSeq_data* in_seq,
124  TSeqPos uLength)
125  const;
126 
127  bool FastValidate
128  (const CSeq_data& in_seq,
129  TSeqPos uBeginIdx,
130  TSeqPos uLength)
131  const;
132 
133  void Validate
134  (const CSeq_data& in_seq,
135  vector<TSeqPos>* badIdx,
136  TSeqPos uBeginIdx,
137  TSeqPos uLength)
138  const;
139 
141  (const CSeq_data& in_seq,
142  CSeq_data* out_seq,
143  vector<TSeqPos>* out_indices,
144  CSeq_data::E_Choice to_code,
145  TSeqPos uBeginIdx,
146  TSeqPos uLength)
147  const;
148 
150  (const CSeq_data& in_seq,
151  CSeq_data* out_seq,
152  TSeqPos uBeginIdx,
153  TSeqPos uLength)
154  const;
155 
156  TSeqPos Keep
157  (CSeq_data* in_seq,
158  TSeqPos uBeginIdx,
159  TSeqPos uLength)
160  const;
161 
163  (CSeq_data* out_seq,
164  const CSeq_data& in_seq1,
165  TSeqPos uBeginIdx1,
166  TSeqPos uLength1,
167  const CSeq_data& in_seq2,
168  TSeqPos uBeginIdx2,
169  TSeqPos uLength2)
170  const;
171 
173  (CSeq_data* in_seq,
174  TSeqPos uBeginIdx,
175  TSeqPos uLength)
176  const;
177 
179  (const CSeq_data& in_seq,
180  CSeq_data* out_seq,
181  TSeqPos uBeginIdx,
182  TSeqPos uLength)
183  const;
184 
186  (CSeq_data* in_seq,
187  TSeqPos uBeginIdx,
188  TSeqPos uLength)
189  const;
190 
192  (const CSeq_data& in_seq,
193  CSeq_data* out_seq,
194  TSeqPos uBeginIdx,
195  TSeqPos uLength)
196  const;
197 
199  (CSeq_data* in_seq,
200  TSeqPos uBeginIdx,
201  TSeqPos uLength)
202  const;
203 
205  (const CSeq_data& in_seq,
206  CSeq_data* out_seq,
207  TSeqPos uBeginIdx,
208  TSeqPos uLength)
209  const;
210 
211  const string& GetIupacaa3(TIndex ncbistdaa);
212 
213  bool IsCodeAvailable(CSeq_data::E_Choice code_type);
214 
215  bool IsCodeAvailable(ESeq_code_type code_type);
216 
218 
220 
221  const string& GetCodeOrName(CSeq_data::E_Choice code_type,
222  TIndex idx,
223  bool get_code);
224 
225  const string& GetCodeOrName(ESeq_code_type code_type,
226  TIndex idx,
227  bool get_code);
228 
230  const string& code);
231 
232  TIndex GetIndex(ESeq_code_type code_type,
233  const string& code);
234 
236  TIndex idx);
237 
239  TIndex idx);
240 
242  CSeq_data::E_Choice to_type,
243  TIndex from_idx);
244 
246  ESeq_code_type to_type,
247  TIndex from_idx);
248  // Template wrapper class used to create data type specific
249  // classes to delete code tables on exit from main
250  template <class T>
251  class CWrapper_table : public CObject
252  {
253  public:
254  CWrapper_table(size_t size, size_t start)
255  {
256  m_Table = new T[256];
257  m_StartAt = start;
258  m_Size = size;
259  }
261  drop_table();
262  }
263  void drop_table()
264  {
265  delete[] m_Table;
266  m_Table = 0;
267  }
268 
270  size_t m_StartAt;
271  size_t m_Size;
272  };
273 
274  // Template wrapper class used for two-dimensional arrays.
275  template <class T>
276  class CWrapper_2D : public CObject
277  {
278  public:
279  CWrapper_2D(size_t size1, int start1, size_t size2, int start2)
280  {
281  m_Size_D1 = size1;
282  m_Size_D2 = size2;
283  m_StartAt_D1 = start1;
284  m_StartAt_D2 = start2;
285  m_Table = new T*[size1];
286  for (size_t i=0; i<size1; i++)
287  {
288  m_Table[i] = new T[size2] - start2;
289  }
290  m_Table -= start1;
291  }
293  {
295  for (size_t i=0; i<m_Size_D1; i++)
296  {
297  delete[](m_Table[i] + m_StartAt_D2);
298  }
299  delete[] m_Table;
300  }
301 
303  size_t m_Size_D1;
304  size_t m_Size_D2;
307  };
308 
309  // Typedefs making use of wrapper classes above.
318 
321 
322 private:
323  // String to initialize CSeq_code_set
324  // This string is initialized in seqport_util.h
325  static const char* sm_StrAsnData[];
326 
327  // CSeq_code_set member holding code and map table data
329 
330  // Helper function used internally to initialize m_SeqCodeSet
332 
333  // Member variables holding code tables
338 
339  // Helper function to initialize code tables
341 
342  // Member variables holding na complement information
346 
347  // Helper functions to initialize complement tables
351 
352  // Member variables holding na reverse information
353  // Used to reverse residues packed within a byte.
356 
357  // Helper functions to initialize reverse tables
360 
361  // Member variables holding map tables
362 
375 
376 
378  (const CSeq_data& in_seq,
379  CSeq_data* out_seq,
380  CSeq_data::E_Choice to_code,
381  TSeqPos uBeginIdx,
382  TSeqPos uLength,
384  TSeqPos total_length = 0,
385  TSeqPos* out_seq_length = 0,
386  vector<Uint4>* blast_ambig = 0)
387  const;
388 
389  // Helper function to initialize map tables
391  ESeq_code_type to_type);
392 
393  // Member variables holding fast conversion tables
394 
395  // Takes a byte as an index and returns a unsigned int with
396  // 4 characters, each character being one of ATGC
397  //CRef<CFast_table4> m_FastNcbi2naIupacna;
398 
399  // Takes a byte (each byte with 4 Ncbi2na codes) as an index and
400  // returns a Unit2 with 2 bytes, each byte formated as 2 Ncbi4na codes
401  //CRef<CFast_table2> m_FastNcbi2naNcbi4na;
402 
403  // Takes a byte (each byte with 2 Ncbi4na codes) as an index and
404  // returns a 2 byte string, each byte with an Iupacna code.
405  //CRef<CFast_table2> m_FastNcbi4naIupacna;
406 
407  // Table used for fast compression from Iupacna to Ncbi2na (4 bytes to 1
408  // byte). This table is a 2 dimensional table. The first dimension
409  // corresponds to the iupacna position modulo 4 (0-3). The second dimension
410  // is the value of the iupacna byte (0-255). The 4 resulting values from 4
411  // iupancna bytes are bitwise or'd to produce 1 byte.
413 
414  // Table used for fast compression from Iupacna to Ncbi4na
415  // (2 bytes to 1 byte). Similar to m_FastIupacnaNcbi2na
417 
418  // Table used for fast compression from Ncbi4na to Ncbi2na
419  // (2 bytes to 1 byte). Similar to m_FastIupacnaNcbi4na
421 
422  // Tables used to convert an index for a code type to a symbol or name
423  // for the same code type
424  vector<vector<string> > m_IndexString[2];
425  vector<vector<TIndex> > m_IndexComplement;
426  vector<map<string, TIndex> > m_StringIndex;
427  vector<TIndex> m_StartAt;
428 
429  // Helper function to initialize fast conversion tables
430  //CRef<CFast_table4> InitFastNcbi2naIupacna();
436 
437  // Helper functions to initialize Index to/from code/name conversion tables
438  // and complement tables
439  void InitIndexCodeName();
440 
441  // Data members and functions used for random disambiguation
442 
443  // structure used for ncbi4na --> ncbi2na
444  struct SMasksArray : public CObject
445  {
446  // Structure to hold all masks applicable to an input byte
447  struct SMasks {
448  int nMasks;
449  unsigned char cMask[16];
450  };
452  };
453 
455 
456  // Helper function to initialize m_Masks
458 
459  // Data members used for detecting ambiguities
460 
461  // Data members used by GetAmbig methods to get a list of
462  // ambiguities resulting from alphabet conversions
465 
466  // Helper functiond to initialize m_Detect_Ambig_ data members
469 
470  // Alphabet conversion functions. Functions return
471  // the number of converted codes.
472 
473  /*
474  // Fuction to convert ncbi2na (1 byte) to iupacna (4 bytes)
475  TSeqPos MapNcbi2naToIupacna(const CSeq_data& in_seq,
476  CSeq_data* out_seq,
477  TSeqPos uBeginIdx,
478  TSeqPos uLength)
479  const;
480 
481  // Function to convert ncbi2na (1 byte) to ncbi4na (2 bytes)
482  TSeqPos MapNcbi2naToNcbi4na(const CSeq_data& in_seq,
483  CSeq_data* out_seq,
484  TSeqPos uBeginIdx,
485  TSeqPos uLength)
486  const;
487 
488  // Function to convert ncbi4na (1 byte) to iupacna (2 bytes)
489  TSeqPos MapNcbi4naToIupacna(const CSeq_data& in_seq,
490  CSeq_data* out_seq,
491  TSeqPos uBeginIdx,
492  TSeqPos uLength)
493  const;
494  */
495  // Function to convert iupacna (4 bytes) to ncbi2na (1 byte)
496  TSeqPos MapIupacnaToNcbi2na(const CSeq_data& in_seq,
497  CSeq_data* out_seq,
498  TSeqPos uBeginIdx,
499  TSeqPos uLength,
500  bool bAmbig,
502  TSeqPos total_length,
503  TSeqPos* out_seq_length,
504  vector<Uint4>* blast_ambig)
505  const;
506  /*
507 
508  // Function to convert iupacna (2 bytes) to ncbi4na (1 byte)
509  TSeqPos MapIupacnaToNcbi4na(const CSeq_data& in_seq,
510  CSeq_data* out_seq,
511  TSeqPos uBeginIdx,
512  TSeqPos uLength)
513  const;
514  */
515  // Function to convert ncbi4na (2 bytes) to ncbi2na (1 byte)
516  TSeqPos MapNcbi4naToNcbi2na(const CSeq_data& in_seq,
517  CSeq_data* out_seq,
518  TSeqPos uBeginIdx,
519  TSeqPos uLength,
520  bool bAmbig,
522  TSeqPos total_length,
523  TSeqPos* out_seq_length,
524  vector<Uint4>* blast_ambig)
525  const;
526  /*
527 
528  // Function to convert iupacaa (byte) to ncbieaa (byte)
529  TSeqPos MapIupacaaToNcbieaa(const CSeq_data& in_seq,
530  CSeq_data* out_seq,
531  TSeqPos uBeginIdx,
532  TSeqPos uLength) const;
533 
534  // Function to convert ncbieaa (byte) to iupacaa (byte)
535  TSeqPos MapNcbieaaToIupacaa(const CSeq_data& in_seq,
536  CSeq_data* out_seq,
537  TSeqPos uBeginIdx,
538  TSeqPos uLength)
539  const;
540 
541  // Function to convert iupacaa (byte) to ncbistdaa (byte)
542  TSeqPos MapIupacaaToNcbistdaa(const CSeq_data& in_seq,
543  CSeq_data* out_seq,
544  TSeqPos uBeginIdx,
545  TSeqPos uLength)
546  const;
547 
548  // Function to convert ncbieaa (byte) to ncbistdaa (byte)
549  TSeqPos MapNcbieaaToNcbistdaa(const CSeq_data& in_seq,
550  CSeq_data* out_seq,
551  TSeqPos uBeginIdx,
552  TSeqPos uLength)
553  const;
554 
555  // Function to convert ncbistdaa (byte) to ncbieaa (byte)
556  TSeqPos MapNcbistdaaToNcbieaa(const CSeq_data& in_seq,
557  CSeq_data* out_seq,
558  TSeqPos uBeginIdx,
559  TSeqPos uLength)
560  const;
561 
562  // Function to convert ncbistdaa (byte) to iupacaa (byte)
563  TSeqPos MapNcbistdaaToIupacaa(const CSeq_data& in_seq,
564  CSeq_data* out_seq,
565  TSeqPos uBeginIdx,
566  TSeqPos uLength)
567  const;
568  */
569 
570  // Fast Validation functions
571  bool FastValidateIupacna(const CSeq_data& in_seq,
572  TSeqPos uBeginIdx,
573  TSeqPos uLength)
574  const;
575 
576  bool FastValidateNcbieaa(const CSeq_data& in_seq,
577  TSeqPos uBeginIdx,
578  TSeqPos uLength)
579  const;
580 
581 
582  bool FastValidateNcbistdaa(const CSeq_data& in_seq,
583  TSeqPos uBeginIdx,
584  TSeqPos uLength)
585  const;
586 
587 
588  bool FastValidateIupacaa(const CSeq_data& in_seq,
589  TSeqPos uBeginIdx,
590  TSeqPos uLength)
591  const;
592 
593  // Full Validation functions
594  void ValidateIupacna(const CSeq_data& in_seq,
595  vector<TSeqPos>* badIdx,
596  TSeqPos uBeginIdx,
597  TSeqPos uLength)
598  const;
599 
600  void ValidateNcbieaa(const CSeq_data& in_seq,
601  vector<TSeqPos>* badIdx,
602  TSeqPos uBeginIdx,
603  TSeqPos uLength)
604  const;
605 
606  void ValidateNcbistdaa(const CSeq_data& in_seq,
607  vector<TSeqPos>* badIdx,
608  TSeqPos uBeginIdx,
609  TSeqPos uLength)
610  const;
611 
612  void ValidateIupacaa(const CSeq_data& in_seq,
613  vector<TSeqPos>* badIdx,
614  TSeqPos uBeginIdx,
615  TSeqPos uLength)
616  const;
617 
618  // Functions to make copies of the different types of sequences
619  TSeqPos GetNcbi2naCopy(const CSeq_data& in_seq,
620  CSeq_data* out_seq,
621  TSeqPos uBeginIdx,
622  TSeqPos uLength)
623  const;
624 
625  TSeqPos GetNcbi4naCopy(const CSeq_data& in_seq,
626  CSeq_data* out_seq,
627  TSeqPos uBeginIdx,
628  TSeqPos uLength)
629  const;
630 
631  TSeqPos GetIupacnaCopy(const CSeq_data& in_seq,
632  CSeq_data* out_seq,
633  TSeqPos uBeginIdx,
634  TSeqPos uLength)
635  const;
636 
637  TSeqPos GetNcbieaaCopy(const CSeq_data& in_seq,
638  CSeq_data* out_seq,
639  TSeqPos uBeginIdx,
640  TSeqPos uLength)
641  const;
642 
643  TSeqPos GetNcbistdaaCopy(const CSeq_data& in_seq,
644  CSeq_data* out_seq,
645  TSeqPos uBeginIdx,
646  TSeqPos uLength)
647  const;
648 
649  TSeqPos GetIupacaaCopy(const CSeq_data& in_seq,
650  CSeq_data* out_seq,
651  TSeqPos uBeginIdx,
652  TSeqPos uLength)
653  const;
654 
655  // Function to adjust uBeginIdx to lie on an in_seq byte boundary
656  // and uLength to lie on on an out_seq byte boundary. Returns
657  // overhang, the number of out seqs beyond byte boundary determined
658  // by uBeginIdx + uLength
659  TSeqPos Adjust(TSeqPos* uBeginIdx,
660  TSeqPos* uLength,
661  TSeqPos uInSeqBytes,
662  TSeqPos uInSeqsPerByte,
663  TSeqPos uOutSeqsPerByte)
664  const;
665 
666  // GetAmbig methods
667 
668  // Loops through an ncbi4na input sequence and determines
669  // the ambiguities that would result from conversion to an ncbi2na sequence
670  // On return, out_seq contains the ncbi4na bases that become ambiguous and
671  // out_indices contains the indices of the abiguous bases in in_seq
673  CSeq_data* out_seq,
674  vector<TSeqPos>* out_indices,
675  TSeqPos uBeginIdx,
676  TSeqPos uLength)
677  const;
678 
679  // Loops through an iupacna input sequence and determines
680  // the ambiguities that would result from conversion to an ncbi2na sequence
681  // On return, out_seq contains the iupacna bases that become ambiguous and
682  // out_indices contains the indices of the abiguous bases in in_seq. The
683  // return is the number of ambiguities found.
685  CSeq_data* out_seq,
686  vector<TSeqPos>* out_indices,
687  TSeqPos uBeginIdx,
688  TSeqPos uLength)
689  const;
690 
691  // Methods to perform Keep on specific seq types. Methods
692  // return length of kept sequence.
693  TSeqPos KeepNcbi2na(CSeq_data* in_seq,
694  TSeqPos uBeginIdx,
695  TSeqPos uLength)
696  const;
697 
698  TSeqPos KeepNcbi4na(CSeq_data* in_seq,
699  TSeqPos uBeginIdx,
700  TSeqPos uLength)
701  const;
702 
703  TSeqPos KeepIupacna(CSeq_data* in_seq,
704  TSeqPos uBeginIdx,
705  TSeqPos uLength)
706  const;
707 
708  TSeqPos KeepNcbieaa(CSeq_data* in_seq,
709  TSeqPos uBeginIdx,
710  TSeqPos uLength)
711  const;
712 
714  TSeqPos uBeginIdx,
715  TSeqPos uLength)
716  const;
717 
718  TSeqPos KeepIupacaa(CSeq_data* in_seq,
719  TSeqPos uBeginIdx,
720  TSeqPos uLength)
721  const;
722 
723  // Methods to complement na sequences
724 
725  // In place methods. Return number of complemented residues.
727  TSeqPos uBeginIdx,
728  TSeqPos uLength)
729  const;
730 
732  TSeqPos uBeginIdx,
733  TSeqPos uLength)
734  const;
735 
737  TSeqPos uBeginIdx,
738  TSeqPos uLength)
739  const;
740 
741 
742  // Complement in copy methods
743  TSeqPos ComplementIupacna(const CSeq_data& in_seq,
744  CSeq_data* out_seq,
745  TSeqPos uBeginIdx,
746  TSeqPos uLength)
747  const;
748 
749  TSeqPos ComplementNcbi2na(const CSeq_data& in_seq,
750  CSeq_data* out_seq,
751  TSeqPos uBeginIdx,
752  TSeqPos uLength)
753  const;
754 
755  TSeqPos ComplementNcbi4na(const CSeq_data& in_seq,
756  CSeq_data* out_seq,
757  TSeqPos uBeginIdx,
758  TSeqPos uLength)
759  const;
760 
761 
762  // Methods to reverse na sequences
763 
764  // In place methods
766  TSeqPos uBeginIdx,
767  TSeqPos uLength)
768  const;
769 
771  TSeqPos uBeginIdx,
772  TSeqPos uLength)
773  const;
774 
776  TSeqPos uBeginIdx,
777  TSeqPos uLength)
778  const;
779 
780  // Reverse in copy methods
781  TSeqPos ReverseIupacna(const CSeq_data& in_seq,
782  CSeq_data* out_seq,
783  TSeqPos uBeginIdx,
784  TSeqPos uLength)
785  const;
786 
787  TSeqPos ReverseNcbi2na(const CSeq_data& in_seq,
788  CSeq_data* out_seq,
789  TSeqPos uBeginIdx,
790  TSeqPos uLength)
791  const;
792 
793  TSeqPos ReverseNcbi4na(const CSeq_data& in_seq,
794  CSeq_data* out_seq,
795  TSeqPos uBeginIdx,
796  TSeqPos uLength)
797  const;
798 
799  // Methods to reverse-complement an na sequences
800 
801  // In place methods
803  TSeqPos uBeginIdx,
804  TSeqPos uLength)
805  const;
806 
808  TSeqPos uBeginIdx,
809  TSeqPos uLength)
810  const;
811 
813  TSeqPos uBeginIdx,
814  TSeqPos uLength)
815  const;
816 
817  // Reverse in copy methods
819  CSeq_data* out_seq,
820  TSeqPos uBeginIdx,
821  TSeqPos uLength)
822  const;
823 
825  CSeq_data* out_seq,
826  TSeqPos uBeginIdx,
827  TSeqPos uLength)
828  const;
829 
831  CSeq_data* out_seq,
832  TSeqPos uBeginIdx,
833  TSeqPos uLength)
834  const;
835 
836  // Append methods
838  const CSeq_data& in_seq1,
839  TSeqPos uBeginIdx1,
840  TSeqPos uLength1,
841  const CSeq_data& in_seq2,
842  TSeqPos uBeginIdx2,
843  TSeqPos uLength2)
844  const;
845 
847  const CSeq_data& in_seq1,
848  TSeqPos uBeginIdx1,
849  TSeqPos uLength1,
850  const CSeq_data& in_seq2,
851  TSeqPos uBeginIdx2,
852  TSeqPos uLength2)
853  const;
854 
856  const CSeq_data& in_seq1,
857  TSeqPos uBeginIdx1,
858  TSeqPos uLength1,
859  const CSeq_data& in_seq2,
860  TSeqPos uBeginIdx2,
861  TSeqPos uLength2)
862  const;
863 
865  const CSeq_data& in_seq1,
866  TSeqPos uBeginIdx1,
867  TSeqPos uLength1,
868  const CSeq_data& in_seq2,
869  TSeqPos uBeginIdx2,
870  TSeqPos uLength2)
871  const;
872 
874  const CSeq_data& in_seq1,
875  TSeqPos uBeginIdx1,
876  TSeqPos uLength1,
877  const CSeq_data& in_seq2,
878  TSeqPos uBeginIdx2,
879  TSeqPos uLength2)
880  const;
881 
883  const CSeq_data& in_seq1,
884  TSeqPos uBeginIdx1,
885  TSeqPos uLength1,
886  const CSeq_data& in_seq2,
887  TSeqPos uBeginIdx2,
888  TSeqPos uLength2)
889  const;
890 
891  void x_GetSeqFromSeqData(const CSeq_data& data,
892  const string** str,
893  const vector<char>** vec)
894  const;
896  string** str,
897  vector<char>** vec)
898  const;
899 };
900 
901 
903 
905 {
906  return *sx_Implementation;
907 }
908 
909 
910 
911 
912 /////////////////////////////////////////////////////////////////////////////
913 // PUBLIC (static wrappers to CSeqportUtil_implementation public methods)::
914 //
915 
916 
918 (const CSeq_data& in_seq,
919  CSeq_data* out_seq,
920  CSeq_data::E_Choice to_code,
921  TSeqPos uBeginIdx,
922  TSeqPos uLength,
923  bool bAmbig,
925 {
927  (in_seq, out_seq, to_code, uBeginIdx, uLength, bAmbig, seed,
928  0, 0, 0);
929 }
930 
932 (const CSeq_data& in_seq,
933  CSeq_data* out_seq,
934  TSeqPos uBeginIdx,
935  TSeqPos uLength,
936  TSeqPos total_length,
937  TSeqPos* out_seq_length,
938  vector<Uint4>* blast_ambig)
939 {
941  (in_seq, out_seq, CSeq_data::e_Ncbi2na, uBeginIdx, uLength, true,
942  17734276, total_length, out_seq_length, blast_ambig);
943 }
944 
946 (CSeq_data* in_seq,
947  TSeqPos uLength)
948 {
949  return static_cast<TSeqPos>(
950  x_GetImplementation().Pack(in_seq, uLength));
951 }
952 
953 
955 (const CSeq_data& in_seq,
956  TSeqPos uBeginIdx,
957  TSeqPos uLength)
958 {
960  (in_seq, uBeginIdx, uLength);
961 }
962 
963 
965 (const CSeq_data& in_seq,
966  vector<TSeqPos>* badIdx,
967  TSeqPos uBeginIdx,
968  TSeqPos uLength)
969 {
971  (in_seq, badIdx, uBeginIdx, uLength);
972 }
973 
974 
976 (const CSeq_data& in_seq,
977  CSeq_data* out_seq,
978  vector<TSeqPos>* out_indices,
979  CSeq_data::E_Choice to_code,
980  TSeqPos uBeginIdx,
981  TSeqPos uLength)
982 {
984  (in_seq, out_seq, out_indices, to_code, uBeginIdx, uLength);
985 }
986 
987 
989 (const CSeq_data& in_seq,
990  CSeq_data* out_seq,
991  TSeqPos uBeginIdx,
992  TSeqPos uLength)
993 {
995  (in_seq, out_seq, uBeginIdx, uLength);
996 }
997 
998 
999 
1001 (CSeq_data* in_seq,
1002  TSeqPos uBeginIdx,
1003  TSeqPos uLength)
1004 {
1005  return x_GetImplementation().Keep
1006  (in_seq, uBeginIdx, uLength);
1007 }
1008 
1009 
1011 (CSeq_data* out_seq,
1012  const CSeq_data& in_seq1,
1013  TSeqPos uBeginIdx1,
1014  TSeqPos uLength1,
1015  const CSeq_data& in_seq2,
1016  TSeqPos uBeginIdx2,
1017  TSeqPos uLength2)
1018 {
1019  return x_GetImplementation().Append
1020  (out_seq,
1021  in_seq1, uBeginIdx1, uLength1, in_seq2, uBeginIdx2, uLength2);
1022 }
1023 
1024 
1026 (CSeq_data* in_seq,
1027  TSeqPos uBeginIdx,
1028  TSeqPos uLength)
1029 {
1031  (in_seq, uBeginIdx, uLength);
1032 }
1033 
1034 
1036 (const CSeq_data& in_seq,
1037  CSeq_data* out_seq,
1038  TSeqPos uBeginIdx,
1039  TSeqPos uLength)
1040 {
1042  (in_seq, out_seq, uBeginIdx, uLength);
1043 }
1044 
1045 
1047 (CSeq_data* in_seq,
1048  TSeqPos uBeginIdx,
1049  TSeqPos uLength)
1050 {
1051  return x_GetImplementation().Reverse
1052  (in_seq, uBeginIdx, uLength);
1053 }
1054 
1055 
1057 (const CSeq_data& in_seq,
1058  CSeq_data* out_seq,
1059  TSeqPos uBeginIdx,
1060  TSeqPos uLength)
1061 {
1062  return x_GetImplementation().Reverse
1063  (in_seq, out_seq, uBeginIdx, uLength);
1064 }
1065 
1066 
1068 (CSeq_data* in_seq,
1069  TSeqPos uBeginIdx,
1070  TSeqPos uLength)
1071 {
1073  (in_seq, uBeginIdx, uLength);
1074 }
1075 
1076 
1078 (const CSeq_data& in_seq,
1079  CSeq_data* out_seq,
1080  TSeqPos uBeginIdx,
1081  TSeqPos uLength)
1082 {
1084  (in_seq, out_seq, uBeginIdx, uLength);
1085 }
1086 
1087 
1088 const string& CSeqportUtil::GetIupacaa3(TIndex ncbistdaa)
1089 {
1090  return x_GetImplementation().GetIupacaa3(ncbistdaa);
1091 }
1092 
1094 {
1095  return x_GetImplementation().IsCodeAvailable(code_type);
1096 }
1097 
1099 {
1100  return x_GetImplementation().IsCodeAvailable(code_type);
1101 }
1102 
1104 (CSeq_data::E_Choice code_type)
1105 {
1106  return x_GetImplementation().GetCodeIndexFromTo(code_type);
1107 }
1108 
1110 (ESeq_code_type code_type)
1111 {
1112  return x_GetImplementation().GetCodeIndexFromTo(code_type);
1113 }
1114 
1116 (CSeq_data::E_Choice code_type,
1117  TIndex idx)
1118 {
1119  return x_GetImplementation().GetCodeOrName(code_type, idx, true);
1120 }
1121 
1123 (ESeq_code_type code_type,
1124  TIndex idx)
1125 {
1126  return x_GetImplementation().GetCodeOrName(code_type, idx, true);
1127 }
1128 
1130 (CSeq_data::E_Choice code_type,
1131  TIndex idx)
1132 {
1133  return x_GetImplementation().GetCodeOrName(code_type, idx, false);
1134 }
1135 
1137 (ESeq_code_type code_type,
1138  TIndex idx)
1139 {
1140  return x_GetImplementation().GetCodeOrName(code_type, idx, false);
1141 }
1142 
1144 (CSeq_data::E_Choice code_type,
1145  const string& code)
1146 {
1147  return x_GetImplementation().GetIndex(code_type, code);
1148 }
1149 
1151 (ESeq_code_type code_type,
1152  const string& code)
1153 {
1154  return x_GetImplementation().GetIndex(code_type, code);
1155 }
1156 
1158 (CSeq_data::E_Choice code_type,
1159  TIndex idx)
1160 {
1161  return x_GetImplementation().GetIndexComplement(code_type, idx);
1162 }
1163 
1165 (ESeq_code_type code_type,
1166  TIndex idx)
1167 {
1168  return x_GetImplementation().GetIndexComplement(code_type, idx);
1169 }
1170 
1172 (CSeq_data::E_Choice from_type,
1173  CSeq_data::E_Choice to_type,
1174  TIndex from_idx)
1175 {
1176  return x_GetImplementation().GetMapToIndex(from_type, to_type, from_idx);
1177 }
1178 
1180 (ESeq_code_type from_type,
1181  ESeq_code_type to_type,
1182  TIndex from_idx)
1183 {
1184  return x_GetImplementation().GetMapToIndex(from_type, to_type, from_idx);
1185 }
1186 
1188 {
1189 
1190  // Initialize m_SeqCodeSet
1191  m_SeqCodeSet = Init();
1192 
1193  // Initialize code tables
1195 
1197 
1199 
1201 
1202 
1203  // Initialize na complement tables
1205 
1207 
1209 
1210 
1211 
1212  // Initialize na reverse tables
1214 
1216 
1217 
1218  // Initialize map tables
1219 
1222 
1225 
1228 
1231 
1234 
1237 
1240 
1243 
1246 
1249 
1252 
1255 
1256  // Initialize fast conversion tables
1257  //m_FastNcbi2naIupacna = InitFastNcbi2naIupacna();
1258  //m_FastNcbi2naNcbi4na = InitFastNcbi2naNcbi4na();
1259  //m_FastNcbi4naIupacna = InitFastNcbi4naIupacna();
1263 
1264  // Initialize tables for conversion of index to codes or names
1266 
1267  // Initialize m_Masks used for random ambiguity resolution
1269 
1270  // Initialize m_DetectAmbigNcbi4naNcbi2na used for ambiguity
1271  // detection and reporting
1273 
1274  // Initialize m_DetectAmbigIupacnaNcbi2na used for ambiguity detection
1275  // and reporting
1277 
1278 }
1279 
1280 // Destructor. All memory allocated on the
1281 // free store is wrapped in smart pointers.
1282 // Therefore, the destructor does not need
1283 // to deallocate memory.
1285 {
1286  return;
1287 }
1288 
1289 
1290 /////////////////////////////////////////////////////////////////////////////
1291 // PRIVATE::
1292 //
1293 
1294 
1295 // Helper function to initialize m_SeqCodeSet from sm_StrAsnData
1297 {
1298  // Compose a long-long string
1299  string str;
1300  for (size_t i = 0; sm_StrAsnData[i]; i++) {
1301  str += sm_StrAsnData[i];
1302  }
1303 
1304  // Create an in memory stream on sm_StrAsnData
1305  CNcbiIstrstream is(str);
1306 
1307  unique_ptr<CObjectIStream>
1308  asn_codes_in(CObjectIStream::Open(eSerial_AsnText, is));
1309 
1310  // Create a CSeq_code_set
1311  CRef<CSeq_code_set> ptr_seq_code_set(new CSeq_code_set());
1312 
1313  // Initialize the newly created CSeq_code_set
1314  *asn_codes_in >> *ptr_seq_code_set;
1315 
1316  // Return a newly created CSeq_code_set
1317  return ptr_seq_code_set;
1318 }
1319 
1320 
1321 // Function to initialize code tables
1324 {
1325  // Get list of code tables
1326  const list<CRef<CSeq_code_table> >& code_list = m_SeqCodeSet->GetCodes();
1327 
1328  // Get table for code_type
1329  list<CRef<CSeq_code_table> >::const_iterator i_ct;
1330  for(i_ct = code_list.begin(); i_ct != code_list.end(); ++i_ct)
1331  if((*i_ct)->GetCode() == code_type)
1332  break;
1333 
1334 
1335  if(i_ct == code_list.end())
1336  throw runtime_error("Requested code table not found");
1337 
1338  // Get table data
1339  const list<CRef<CSeq_code_table::C_E> >& table_data = (*i_ct)->GetTable();
1340  SIZE_TYPE size = table_data.size();
1341  int start_at = (*i_ct)->GetStart_at();
1342  CRef<CCode_table> codeTable(new CCode_table(size, start_at));
1343 
1344  // Initialize codeTable to 255
1345  for(int i=0; i<256; i++)
1346  codeTable->m_Table[i] = '\xff';
1347 
1348  // Copy table data to codeTable
1349  int nIdx = start_at;
1350  list<CRef<CSeq_code_table::C_E> >::const_iterator i_td;
1351  for(i_td = table_data.begin(); i_td != table_data.end(); ++i_td) {
1352  codeTable->m_Table[nIdx] = *((*i_td)->GetSymbol().c_str());
1353  if(codeTable->m_Table[nIdx] == '\x00')
1354  codeTable->m_Table[nIdx++] = '\xff';
1355  else
1356  nIdx++;
1357  }
1358 
1359  // Return codeTable
1360  return codeTable;
1361 }
1362 
1363 
1364 // Function to initialize iupacna complement table
1367 {
1368 
1369  // Get list of code tables
1370  const list<CRef<CSeq_code_table> >& code_list = m_SeqCodeSet->GetCodes();
1371 
1372  // Get table for code_type iupacna
1373  list<CRef<CSeq_code_table> >::const_iterator i_ct;
1374  for(i_ct = code_list.begin(); i_ct != code_list.end(); ++i_ct)
1375  if((*i_ct)->GetCode() == eSeq_code_type_iupacna)
1376  break;
1377 
1378 
1379  if(i_ct == code_list.end())
1380  throw runtime_error("Code table for Iupacna not found");
1381 
1382  // Check that complements are set
1383  if(!(*i_ct)->IsSetComps())
1384  throw runtime_error("Complement data is not set for iupacna table");
1385 
1386  // Get complement data, start at and size of complement data
1387  const list<int>& comp_data = (*i_ct)->GetComps();
1388  int start_at = (*i_ct)->GetStart_at();
1389 
1390  // Allocate memory for complement data
1391  CRef<CCode_comp> compTable(new CCode_comp(256, start_at));
1392 
1393  // Initialize compTable to 255 for illegal codes
1394  for(unsigned int i = 0; i<256; i++)
1395  compTable->m_Table[i] = (char) 255;
1396 
1397  // Loop trhough the complement data and set compTable
1398  list<int>::const_iterator i_comp;
1399  unsigned int nIdx = start_at;
1400  for(i_comp = comp_data.begin(); i_comp != comp_data.end(); ++i_comp)
1401  compTable->m_Table[nIdx++] = (*i_comp);
1402 
1403  // Return the complement data
1404  return compTable;
1405 
1406 }
1407 
1408 
1409 // Function to initialize ncbi2na complement table
1412 {
1413 
1414  // Get list of code tables
1415  const list<CRef<CSeq_code_table> >& code_list = m_SeqCodeSet->GetCodes();
1416 
1417  // Get table for code_type ncbi2na
1418  list<CRef<CSeq_code_table> >::const_iterator i_ct;
1419  for(i_ct = code_list.begin(); i_ct != code_list.end(); ++i_ct)
1420  if((*i_ct)->GetCode() == eSeq_code_type_ncbi2na)
1421  break;
1422 
1423  if(i_ct == code_list.end())
1424  throw runtime_error("Code table for Iupacna not found");
1425 
1426  // Check that complements are set
1427  if(!(*i_ct)->IsSetComps())
1428  throw runtime_error("Complement data is not set for ncbi2na table");
1429 
1430  // Get complement data, start at and size of complement data
1431  const list<int>& comp_data = (*i_ct)->GetComps();
1432  int start_at = (*i_ct)->GetStart_at();
1433 
1434  // Allocate memory for complement data
1435  CRef<CCode_comp> compTable(new CCode_comp(256, start_at));
1436 
1437  // Put complement data in an array
1438  char compArray[4];
1439  int nIdx = start_at;
1440  list<int>::const_iterator i_comp;
1441  for(i_comp = comp_data.begin(); i_comp != comp_data.end(); ++i_comp)
1442  compArray[nIdx++] = (*i_comp);
1443 
1444  // Set compTable
1445  for(unsigned int i = 0; i < 4; i++)
1446  for(unsigned int j = 0; j < 4; j++)
1447  for(unsigned int k = 0; k < 4; k++)
1448  for(unsigned int l = 0; l < 4; l++)
1449  {
1450  nIdx = i<<6 | j<<4 | k<<2 | l;
1451  char c1 = compArray[i] << 6;
1452  char c2 = compArray[j] << 4;
1453  char c3 = compArray[k] << 2;
1454  char c4 = compArray[l];
1455  compTable->m_Table[nIdx] = c1 | c2 | c3 | c4;
1456  }
1457 
1458  // Return complement data
1459  return compTable;
1460 
1461 }
1462 
1463 
1464 // Function to initialize ncbi4na complement table
1467 {
1468 
1469  // Get list of code tables
1470  const list<CRef<CSeq_code_table> >& code_list = m_SeqCodeSet->GetCodes();
1471 
1472  // Get table for code_type ncbi2na
1473  list<CRef<CSeq_code_table> >::const_iterator i_ct;
1474  for(i_ct = code_list.begin(); i_ct != code_list.end(); ++i_ct)
1475  if((*i_ct)->GetCode() == eSeq_code_type_ncbi4na)
1476  break;
1477 
1478  if(i_ct == code_list.end())
1479  throw runtime_error("Code table for Iupacna not found");
1480 
1481  // Check that complements are set
1482  if(!(*i_ct)->IsSetComps())
1483  throw runtime_error("Complement data is not set for iupacna table");
1484 
1485  // Get complement data, start at and size of complement data
1486  const list<int>& comp_data = (*i_ct)->GetComps();
1487  int start_at = (*i_ct)->GetStart_at();
1488 
1489  // Allocate memory for complement data
1490  CRef<CCode_comp> compTable(new CCode_comp(256, start_at));
1491 
1492 
1493  // Put complement data in an array
1494  char compArray[16];
1495  int nIdx = start_at;
1496  list<int>::const_iterator i_comp;
1497  for(i_comp = comp_data.begin(); i_comp != comp_data.end(); ++i_comp)
1498  compArray[nIdx++] = (*i_comp);
1499 
1500  // Set compTable
1501  for(unsigned int i = 0; i<16; i++)
1502  for(unsigned int j = 0; j < 16; j++)
1503  {
1504  nIdx = i<<4 | j;
1505  char c1 = compArray[i] << 4;
1506  char c2 = compArray[j];
1507  compTable->m_Table[nIdx] = c1 | c2;
1508  }
1509 
1510  // Return complement data
1511  return compTable;
1512 
1513 }
1514 
1515 
1516 // Function to initialize m_Ncbi2naRev
1518 {
1519 
1520  // Allocate memory for reverse table
1521  CRef<CCode_rev> revTable(new CCode_rev(256, 0));
1522 
1523  // Initialize table used to reverse a byte.
1524  for(unsigned int i = 0; i < 4; i++)
1525  for(unsigned int j = 0; j < 4; j++)
1526  for(unsigned int k = 0; k < 4; k++)
1527  for(unsigned int l = 0; l < 4; l++)
1528  revTable->m_Table[64*i + 16*j + 4*k + l] =
1529  64*l + 16*k + 4*j +i;
1530 
1531  // Return the reverse table
1532  return revTable;
1533 }
1534 
1535 
1536 // Function to initialize m_Ncbi4naRev
1538 {
1539 
1540  // Allocate memory for reverse table
1541  CRef<CCode_rev> revTable(new CCode_rev(256, 0));
1542 
1543  // Initialize table used to reverse a byte.
1544  for(unsigned int i = 0; i < 16; i++)
1545  for(unsigned int j = 0; j < 16; j++)
1546  revTable->m_Table[16*i + j] = 16*j + i;
1547 
1548  // Return the reverse table
1549  return revTable;
1550 }
1551 
1552 
1553 
1554 // Function to initialize map tables
1557 (ESeq_code_type from_type,
1558  ESeq_code_type to_type)
1559 {
1560 
1561  // Get list of map tables
1562  const list< CRef< CSeq_map_table > >& map_list = m_SeqCodeSet->GetMaps();
1563 
1564  // Get requested map table
1565  list<CRef<CSeq_map_table> >::const_iterator i_mt;
1566  for(i_mt = map_list.begin(); i_mt != map_list.end(); ++i_mt)
1567  if((*i_mt)->GetFrom() == from_type && (*i_mt)->GetTo() == to_type)
1568  break;
1569 
1570  if(i_mt == map_list.end())
1571  throw runtime_error("Requested map table not found");
1572 
1573  // Get the map table
1574  const list<int>& table_data = (*i_mt)->GetTable();
1575 
1576  // Create a map table reference
1577  SIZE_TYPE size = table_data.size();
1578  int start_at = (*i_mt)->GetStart_at();
1579  CRef<CMap_table> mapTable(new CMap_table(size,start_at));
1580 
1581  // Copy the table data to mapTable
1582  int nIdx = start_at;
1583  list<int>::const_iterator i_td;
1584  for(i_td = table_data.begin(); i_td != table_data.end(); ++i_td)
1585  {
1586  mapTable->m_Table[nIdx++] = *i_td;
1587  }
1588 
1589  return mapTable;
1590 }
1591 
1592 
1593 // Functions to initialize fast conversion tables
1594 // Function to initialize FastNcib2naIupacna
1595 /*
1596 CRef<CSeqportUtil_implementation::CFast_table4> CSeqportUtil_implementation::InitFastNcbi2naIupacna()
1597 {
1598 
1599  CRef<CFast_table4> fastTable(new CFast_table4(256,0));
1600  unsigned char i,j,k,l;
1601  for(i = 0; i < 4; i++)
1602  for(j = 0; j < 4; j++)
1603  for(k = 0; k < 4; k++)
1604  for(l = 0; l < 4; l++)
1605  {
1606  unsigned char aByte = (i<<6) | (j<<4) | (k<<2) | l;
1607  char chi = m_Ncbi2naIupacna->m_Table[i];
1608  char chj = m_Ncbi2naIupacna->m_Table[j];
1609  char chk = m_Ncbi2naIupacna->m_Table[k];
1610  char chl = m_Ncbi2naIupacna->m_Table[l];
1611 
1612  // Note high order bit pair corresponds to low order
1613  // byte etc., on Unix machines.
1614  char *pt =
1615  reinterpret_cast<char*>(&fastTable->m_Table[aByte]);
1616  *(pt++) = chi;
1617  *(pt++) = chj;
1618  *(pt++) = chk;
1619  *(pt) = chl;
1620  }
1621  return fastTable;
1622 }
1623 */
1624 
1625 // Function to initialize FastNcib2naNcbi4na
1627 {
1628 
1629  CRef<CFast_table2> fastTable(new CFast_table2(256,0));
1630  unsigned char i, j, k, l;
1631 
1632  for(i = 0; i < 4; i++)
1633  for(j = 0; j < 4; j++)
1634  for(k = 0; k < 4; k++)
1635  for(l = 0; l < 4; l++) {
1636  unsigned char aByte = (i<<6) | (j<<4) | (k<<2) | l;
1637  unsigned char chi = m_Ncbi2naNcbi4na->m_Table[i];
1638  unsigned char chj = m_Ncbi2naNcbi4na->m_Table[j];
1639  unsigned char chk = m_Ncbi2naNcbi4na->m_Table[k];
1640  unsigned char chl = m_Ncbi2naNcbi4na->m_Table[l];
1641  char *pt =
1642 
1643  reinterpret_cast<char*>(&fastTable->m_Table[aByte]);
1644  *(pt++) = (chi << 4) | chj;
1645  *pt = (chk << 4) | chl;
1646  }
1647  return fastTable;
1648 }
1649 
1650 
1651 // Function to initialize FastNcib4naIupacna
1653 {
1654 
1655  CRef<CFast_table2> fastTable(new CFast_table2(256,0));
1656  unsigned char i,j;
1657  for(i = 0; i < 16; i++)
1658  for(j = 0; j < 16; j++) {
1659  unsigned char aByte = (i<<4) | j;
1660  unsigned char chi = m_Ncbi4naIupacna->m_Table[i];
1661  unsigned char chj = m_Ncbi4naIupacna->m_Table[j];
1662 
1663  // Note high order nible corresponds to low order byte
1664  // etc., on Unix machines.
1665  char *pt = reinterpret_cast<char*>(&fastTable->m_Table[aByte]);
1666  *(pt++) = chi;
1667  *pt = chj;
1668  }
1669  return fastTable;
1670 }
1671 
1672 
1673 // Function to initialize m_FastIupacnancbi2na
1675 {
1676 
1677  auto start_at = m_IupacnaNcbi2na->m_StartAt;
1678  auto size = m_IupacnaNcbi2na->m_Size;
1679  CRef<CFast_4_1> fastTable(new CFast_4_1(4,0,256,0));
1680  for(int ch = 0; ch < 256; ch++) {
1681  if((ch >= start_at) && (ch < (start_at + size)))
1682  {
1683  unsigned char uch = m_IupacnaNcbi2na->m_Table[ch];
1684  uch &= '\x03';
1685  for(unsigned int pos = 0; pos < 4; pos++)
1686  fastTable->m_Table[pos][ch] = uch << (6-2*pos);
1687  }
1688  else
1689  for(unsigned int pos = 0; pos < 4; pos++)
1690  fastTable->m_Table[pos][ch] = '\x00';
1691  }
1692  return fastTable;
1693 }
1694 
1695 
1696 // Function to initialize m_FastIupacnancbi4na
1698 {
1699 
1700  auto start_at = m_IupacnaNcbi4na->m_StartAt;
1701  auto size = m_IupacnaNcbi4na->m_Size;
1702  CRef<CFast_2_1> fastTable(new CFast_2_1(2,0,256,0));
1703  for(int ch = 0; ch < 256; ch++) {
1704  if((ch >= start_at) && (ch < (start_at + size)))
1705  {
1706  unsigned char uch = m_IupacnaNcbi4na->m_Table[ch];
1707  for(unsigned int pos = 0; pos < 2; pos++)
1708  fastTable->m_Table[pos][ch] = uch << (4-4*pos);
1709  }
1710  else
1711  {
1712  fastTable->m_Table[0][ch] = 0xF0;
1713  fastTable->m_Table[1][ch] = 0x0F;
1714  }
1715  }
1716  return fastTable;
1717 }
1718 
1719 
1720 // Function to initialize m_FastNcbi4naNcbi2na
1722 {
1723 
1724  auto start_at = m_Ncbi4naNcbi2na->m_StartAt;
1725  auto size = m_Ncbi4naNcbi2na->m_Size;
1726  CRef<CFast_2_1> fastTable(new CFast_2_1(2,0,256,0));
1727  for(int n1 = 0; n1 < 16; n1++)
1728  for(int n2 = 0; n2 < 16; n2++) {
1729  int nIdx = 16*n1 + n2;
1730  unsigned char u1, u2;
1731  if((n1 >= start_at) && (n1 < start_at + size))
1732  u1 = m_Ncbi4naNcbi2na->m_Table[n1] & 3;
1733  else
1734  u1 = '\x00';
1735  if((n2 >= start_at) && (n2 < start_at + size))
1736  u2 = m_Ncbi4naNcbi2na->m_Table[n2] & 3;
1737  else
1738  u2 = '\x00';
1739  fastTable->m_Table[0][nIdx] = (u1<<6) | (u2<<4);
1740  fastTable->m_Table[1][nIdx] = (u1<<2) | u2;
1741  }
1742 
1743  return fastTable;
1744 }
1745 
1746 
1747 // Function to initialize m_IndexString and m_StringIndex
1749 {
1750  m_IndexString[kName].resize(kNumCodes);
1751  m_IndexString[kSymbol].resize(kNumCodes);
1752  m_IndexComplement.resize(kNumCodes);
1753  m_StringIndex.resize(kNumCodes);
1754  m_StartAt.resize(kNumCodes);
1755 
1756  bool found[kNumCodes];
1757  for (unsigned int ii = 0; ii < kNumCodes; ii++) {
1758  found[ii] = false;
1759  }
1761  const ESeq_code_type& code = (*it)->GetCode();
1762  if (!found[code-1]) {
1763  found[code-1] = true;
1764  m_StartAt[code-1] = (*it)->IsSetStart_at() ?
1765  (*it)->GetStart_at() : 0;
1766  TIndex i = m_StartAt[code-1];
1767  ITERATE(CSeq_code_table::TTable, is, (*it)->GetTable()) {
1768  m_IndexString[kSymbol][code-1].push_back((*is)->GetSymbol());
1769  m_IndexString[kName][code-1].push_back((*is)->GetName());
1770  m_StringIndex[code-1].insert
1771  (make_pair((*is)->GetSymbol(), i++));
1772  }
1773  if ( (*it)->IsSetComps() ) {
1774  ITERATE (list<int>, ic, (*it)->GetComps()) {
1775  m_IndexComplement[code-1].push_back(*ic);
1776  }
1777  }
1778  }
1779  }
1780 
1781 
1782 }
1783 
1784 
1785 // Function to initialize m_Masks
1787 {
1788 
1789  unsigned int i, j, uCnt;
1790  unsigned char cVal, cRslt;
1791  CRef<SMasksArray> aMask(new SMasksArray);
1792 
1793  // Initialize possible masks for converting ambiguous
1794  // ncbi4na bytes to unambiguous bytes
1795  static const unsigned char mask[16] = {
1796  0x11, 0x12, 0x14, 0x18,
1797  0x21, 0x22, 0x24, 0x28,
1798  0x41, 0x42, 0x44, 0x48,
1799  0x81, 0x82, 0x84, 0x88
1800  };
1801 
1802  static const unsigned char maskUpper[4] = { 0x10, 0x20, 0x40, 0x80 };
1803  static const unsigned char maskLower[4] = { 0x01, 0x02, 0x04, 0x08 };
1804 
1805  // Loop through possible ncbi4na bytes and
1806  // build masks that convert it to unambiguous na
1807  for(i = 0; i < 256; i++) {
1808  cVal = i;
1809  uCnt = 0;
1810 
1811  // Case where both upper and lower nible > 0
1812  if(((cVal & '\x0f') != 0) && ((cVal & '\xf0') != 0))
1813  for(j = 0; j < 16; j++) {
1814  cRslt = cVal & mask[j];
1815  if(cRslt == mask[j])
1816  aMask->m_Table[i].cMask[uCnt++] = mask[j];
1817  }
1818 
1819  // Case where upper nible = 0 and lower nible > 0
1820  else if((cVal & '\x0f') != 0)
1821  for(j = 0; j < 4; j++)
1822  {
1823  cRslt = cVal & maskLower[j];
1824  if(cRslt == maskLower[j])
1825  aMask->m_Table[i].cMask[uCnt++] = maskLower[j];
1826  }
1827 
1828 
1829  // Case where lower nible = 0 and upper nible > 0
1830  else if((cVal & '\xf0') != 0)
1831  for(j = 0; j < 4; j++)
1832  {
1833  cRslt = cVal & maskUpper[j];
1834  if(cRslt == maskUpper[j])
1835  aMask->m_Table[i].cMask[uCnt++] = maskUpper[j];
1836  }
1837 
1838  // Both upper and lower nibles = 0
1839  else
1840  aMask->m_Table[i].cMask[uCnt++] = '\x00';
1841 
1842  // Number of distict masks for ncbi4na byte i
1843  aMask->m_Table[i].nMasks = uCnt;
1844 
1845  // Fill out the remainder of cMask array with copies
1846  // of first uCnt masks
1847  for(j = uCnt; j < 16 && uCnt > 0; j++)
1848  aMask->m_Table[i].cMask[j] = aMask->m_Table[i].cMask[j % uCnt];
1849 
1850  }
1851 
1852  return aMask;
1853 }
1854 
1855 
1856 // Function to initialize m_DetectAmbigNcbi4naNcbi2na used for
1857 // ambiguity detection
1859 {
1860 
1861  unsigned char low, high, ambig;
1862 
1863  // Create am new CAmbig_detect object
1864  CRef<CAmbig_detect> ambig_detect(new CAmbig_detect(256,0));
1865 
1866  // Loop through low and high order nibles and assign
1867  // values as follows: 0 - no ambiguity, 1 - low order nible ambigiguous
1868  // 2 - high order ambiguous, 3 -- both high and low ambiguous.
1869 
1870  // Loop for low order nible
1871  for(low = 0; low < 16; low++) {
1872  // Determine if low order nible is ambiguous
1873  if((low == 1) || (low ==2) || (low == 4) || (low == 8))
1874  ambig = 0; // Not ambiguous
1875  else
1876  ambig = 1; // Ambiguous
1877 
1878  // Loop for high order nible
1879  for(high = 0; high < 16; high++) {
1880 
1881  // Determine if high order nible is ambiguous
1882  if((high != 1) && (high != 2) && (high != 4) && (high != 8))
1883  ambig += 2; // Ambiguous
1884 
1885  // Set ambiguity value
1886  ambig_detect->m_Table[16*high + low] = ambig;
1887 
1888  // Reset ambig
1889  ambig &= '\xfd'; // Set second bit to 0
1890  }
1891  }
1892 
1893  return ambig_detect;
1894 }
1895 
1896 
1897 // Function to initialize m_DetectAmbigIupacnaNcbi2na used for ambiguity
1898 // detection
1900 {
1901 
1902  // Create am new CAmbig_detect object
1903  CRef<CAmbig_detect> ambig_detect(new CAmbig_detect(256,0));
1904 
1905  // 0 implies no ambiguity. 1 implies ambiguity
1906  // Initialize to 0
1907  for(unsigned int i = 0; i<256; i++)
1908  ambig_detect->m_Table[i] = 0;
1909 
1910  // Set iupacna characters that are ambiguous when converted
1911  // to ncib2na
1912  ambig_detect->m_Table[66] = 1; // B
1913  ambig_detect->m_Table[68] = 1; // D
1914  ambig_detect->m_Table[72] = 1; // H
1915  ambig_detect->m_Table[75] = 1; // K
1916  ambig_detect->m_Table[77] = 1; // M
1917  ambig_detect->m_Table[78] = 1; // N
1918  ambig_detect->m_Table[82] = 1; // R
1919  ambig_detect->m_Table[83] = 1; // S
1920  ambig_detect->m_Table[86] = 1; // V
1921  ambig_detect->m_Table[87] = 1; // W
1922  ambig_detect->m_Table[89] = 1; // Y
1923 
1924  return ambig_detect;
1925 }
1926 
1927 /*
1928 struct SSeqDataToSeqUtil
1929 {
1930  CSeq_data::E_Choice seq_data_coding;
1931  CSeqConvert::TCoding seq_convert_coding;
1932 };
1933 
1934 
1935 static SSeqDataToSeqUtil s_SeqDataToSeqUtilMap[] = {
1936  { CSeq_data::e_Iupacna, CSeqUtil::e_Iupacna },
1937  { CSeq_data::e_Iupacaa, CSeqUtil::e_Iupacna },
1938  { CSeq_data::e_Ncbi2na, CSeqUtil::e_Ncbi2na },
1939  { CSeq_data::e_Ncbi4na, CSeqUtil::e_Ncbi4na },
1940  { CSeq_data::e_Ncbi8na, CSeqUtil::e_Ncbi8na },
1941  { CSeq_data::e_Ncbi8aa, CSeqUtil::e_Ncbi8aa },
1942  { CSeq_data::e_Ncbieaa, CSeqUtil::e_Ncbieaa },
1943  { CSeq_data::e_Ncbistdaa, CSeqUtil::e_Ncbistdaa }
1944 };
1945 */
1946 
1959 };
1960 
1961 
1962 // Convert from one coding scheme to another. The following
1963 // 12 conversions are supported: ncbi2na<=>ncbi4na;
1964 // ncbi2na<=>iupacna; ncbi4na<=>iupacna; ncbieaa<=>ncbistdaa;
1965 // ncbieaa<=>iupacaa; ncbistdaa<=>iupacaa. Convert is
1966 // really just a dispatch function--it calls the appropriate
1967 // priviate conversion function.
1969 (const CSeq_data& in_seq,
1970  CSeq_data* out_seq,
1971  CSeq_data::E_Choice to_code,
1972  TSeqPos uBeginIdx,
1973  TSeqPos uLength,
1975  TSeqPos total_length,
1976  TSeqPos* out_seq_length,
1977  vector<Uint4>* blast_ambig)
1978  const
1979 {
1980  CSeq_data::E_Choice from_code = in_seq.Which();
1981 
1982  if(to_code == CSeq_data::e_not_set || from_code == CSeq_data::e_not_set)
1983  throw std::runtime_error("to_code or from_code not set");
1984 
1985  if ( to_code != CSeq_data::e_Ncbi2na ) {
1986  throw std::runtime_error("to_code is not Ncbi2na");
1987  }
1988 
1989  switch (from_code) {
1990  case CSeq_data::e_Iupacna:
1991  return MapIupacnaToNcbi2na(in_seq, out_seq, uBeginIdx, uLength, true,
1992  seed, total_length, out_seq_length,
1993  blast_ambig);
1994  case CSeq_data::e_Ncbi4na:
1995  return MapNcbi4naToNcbi2na(in_seq, out_seq, uBeginIdx, uLength, true,
1996  seed, total_length, out_seq_length,
1997  blast_ambig);
1998  default:
1999  throw runtime_error("Requested conversion not implemented");
2000  }
2001 }
2002 
2003 // Convert from one coding scheme to another. The following
2004 // 12 conversions are supported: ncbi2na<=>ncbi4na;
2005 // ncbi2na<=>iupacna; ncbi4na<=>iupacna; ncbieaa<=>ncbistdaa;
2006 // ncbieaa<=>iupacaa; ncbistdaa<=>iupacaa. Convert is
2007 // really just a dispatch function--it calls the appropriate
2008 // priviate conversion function.
2010 (const CSeq_data& in_seq,
2011  CSeq_data* out_seq,
2012  CSeq_data::E_Choice to_code,
2013  TSeqPos uBeginIdx,
2014  TSeqPos uLength,
2015  bool bAmbig,
2017  TSeqPos total_length,
2018  TSeqPos* out_seq_length,
2019  vector<Uint4>* blast_ambig)
2020  const
2021 {
2022  CSeq_data::E_Choice from_code = in_seq.Which();
2023 
2024  // adjust uLength
2025  if ( uLength == 0 ) {
2026  uLength = numeric_limits<TSeqPos>::max();
2027  }
2028 
2029  if(to_code == CSeq_data::e_not_set || from_code == CSeq_data::e_not_set) {
2030  throw std::runtime_error("to_code or from_code not set");
2031  }
2032  if ( s_SeqDataToSeqUtil[to_code] == CSeqUtil::e_not_set ||
2033  s_SeqDataToSeqUtil[from_code] == CSeqUtil::e_not_set ) {
2034  throw runtime_error("Requested conversion not implemented");
2035  }
2036 
2037  // Note: for now use old code to convert to ncbi2na with random
2038  // conversion of ambiguous characters.
2039  if ( (to_code == CSeq_data::e_Ncbi2na) && (bAmbig == true) ) {
2040  return x_ConvertAmbig(in_seq, out_seq, to_code, uBeginIdx, uLength,
2041  seed, total_length, out_seq_length, blast_ambig);
2042  }
2043 
2044  const string* in_str = 0;
2045  const vector<char>* in_vec = 0;
2046 
2047  x_GetSeqFromSeqData(in_seq, &in_str, &in_vec);
2048 
2049  size_t retval = 0;
2050  if ( in_str != 0 ) {
2051  string result;
2052  retval = CSeqConvert::Convert(*in_str, s_SeqDataToSeqUtil[from_code],
2053  uBeginIdx, uLength,
2054  result, s_SeqDataToSeqUtil[to_code]);
2055  CSeq_data temp(result, to_code);
2056  out_seq->Assign(temp);
2057  } else if ( in_vec != 0 ) {
2058  vector<char> result;
2059  retval = CSeqConvert::Convert(*in_vec, s_SeqDataToSeqUtil[from_code],
2060  uBeginIdx, uLength,
2061  result, s_SeqDataToSeqUtil[to_code]);
2062  CSeq_data temp(result, to_code);
2063  out_seq->Assign(temp);
2064  }
2065  return static_cast<TSeqPos>(retval);
2066 }
2067 
2068 
2069 // Provide maximum packing without loss of information
2071 (CSeq_data* in_seq,
2072  TSeqPos uLength)
2073  const
2074 {
2075  _ASSERT(in_seq != 0);
2076 
2077  CSeq_data::E_Choice from_code = in_seq->Which();
2078  _ASSERT(from_code != CSeq_data::e_not_set);
2079 
2080  if ( s_SeqDataToSeqUtil[from_code] == CSeqUtil::e_not_set ) {
2081  throw runtime_error("Unable tp pack requested coding");
2082  }
2083 
2084 
2085  // nothing to pack for proteins
2086  switch ( from_code ) {
2087  case CSeq_data::e_Iupacaa:
2088  return in_seq->GetIupacaa().Get().size();
2089  case CSeq_data::e_Ncbi8aa:
2090  return in_seq->GetNcbi8aa().Get().size();
2091  case CSeq_data::e_Ncbieaa:
2092  return in_seq->GetNcbieaa().Get().size();
2093  case CSeq_data::e_Ncbipaa:
2094  return in_seq->GetNcbipaa().Get().size();
2096  return in_seq->GetNcbistdaa().Get().size();
2097  default:
2098  break;
2099  }
2100  // nothing to convert
2101  if ( from_code == CSeq_data::e_Ncbi2na &&
2102  in_seq->GetNcbi2na().Get().size() * 4 <= uLength ) {
2103  return in_seq->GetNcbi2na().Get().size() * 4;
2104  }
2105 
2106  const string* in_str = 0;
2107  const vector<char>* in_vec = 0;
2108 
2109  x_GetSeqFromSeqData(*in_seq, &in_str, &in_vec);
2110 
2111  vector<char> out_vec;
2113 
2114  size_t retval = 0;
2115  if ( in_str != 0 ) {
2116  retval =
2117  CSeqConvert::Pack(*in_str, s_SeqDataToSeqUtil[from_code],
2118  out_vec, coding, uLength);
2119  } else if ( in_vec != 0 ) {
2120  retval =
2121  CSeqConvert::Pack(*in_vec, s_SeqDataToSeqUtil[from_code],
2122  out_vec, coding, uLength);
2123  }
2124 
2125  switch (coding) {
2126  case CSeqUtil::e_Ncbi2na:
2127  in_seq->SetNcbi2na().Set(out_vec);
2128  break;
2129  case CSeqUtil::e_Ncbi4na:
2130  in_seq->SetNcbi4na().Set(out_vec);
2131  break;
2132  default:
2133  _TROUBLE;
2134  }
2135 
2136  return retval;
2137 }
2138 
2139 
2140 // Method to quickly validate that a CSeq_data object contains valid data.
2141 // FastValidate is a dispatch function that calls the appropriate
2142 // private fast validation function.
2144 (const CSeq_data& in_seq,
2145  TSeqPos uBeginIdx,
2146  TSeqPos uLength)
2147  const
2148 {
2149  switch (in_seq.Which()) {
2150  case CSeq_data::e_Ncbi2na:
2151  return true; // ncbi2na sequences are always valid
2152  case CSeq_data::e_Ncbi4na:
2153  return true; // ncbi4na sequences are always valid
2154  case CSeq_data::e_Iupacna:
2155  return FastValidateIupacna(in_seq, uBeginIdx, uLength);
2156  case CSeq_data::e_Ncbieaa:
2157  return FastValidateNcbieaa(in_seq, uBeginIdx, uLength);
2159  return FastValidateNcbistdaa(in_seq, uBeginIdx, uLength);
2160  case CSeq_data::e_Iupacaa:
2161  return FastValidateIupacaa(in_seq, uBeginIdx, uLength);
2162  default:
2163  throw runtime_error("Sequence could not be validated");
2164  }
2165 }
2166 
2167 
2168 // Function to perform full validation. Validate is a
2169 // dispatch function that calls the appropriate private
2170 // validation function.
2172 (const CSeq_data& in_seq,
2173  vector<TSeqPos>* badIdx,
2174  TSeqPos uBeginIdx,
2175  TSeqPos uLength)
2176  const
2177 {
2178  switch (in_seq.Which()) {
2179  case CSeq_data::e_Ncbi2na:
2180  return; // ncbi2na sequences are always valid
2181  case CSeq_data::e_Ncbi4na:
2182  return; // ncbi4na sequences are always valid
2183  case CSeq_data::e_Iupacna:
2184  ValidateIupacna(in_seq, badIdx, uBeginIdx, uLength);
2185  break;
2186  case CSeq_data::e_Ncbieaa:
2187  ValidateNcbieaa(in_seq, badIdx, uBeginIdx, uLength);
2188  break;
2190  ValidateNcbistdaa(in_seq, badIdx, uBeginIdx, uLength);
2191  break;
2192  case CSeq_data::e_Iupacaa:
2193  ValidateIupacaa(in_seq, badIdx, uBeginIdx, uLength);
2194  break;
2195  default:
2196  throw runtime_error("Sequence could not be validated");
2197  }
2198 }
2199 
2200 
2201 // Function to find ambiguous bases and vector of indices of
2202 // ambiguous bases in CSeq_data objects. GetAmbigs is a
2203 // dispatch function that calls the appropriate private get
2204 // ambigs function.
2206 (const CSeq_data& in_seq,
2207  CSeq_data* out_seq,
2208  vector<TSeqPos>* out_indices,
2209  CSeq_data::E_Choice to_code,
2210  TSeqPos uBeginIdx,
2211  TSeqPos uLength)
2212  const
2213 {
2214 
2215  // Determine and call applicable GetAmbig method.
2216  switch (in_seq.Which()) {
2217  case CSeq_data::e_Ncbi4na:
2218  switch (to_code) {
2219  case CSeq_data::e_Ncbi2na:
2220  return GetAmbigs_ncbi4na_ncbi2na(in_seq, out_seq, out_indices,
2221  uBeginIdx, uLength);
2222  default:
2223  return 0;
2224  }
2225  case CSeq_data::e_Iupacna:
2226  switch (to_code) {
2227  case CSeq_data::e_Ncbi2na:
2228  return GetAmbigs_iupacna_ncbi2na(in_seq, out_seq, out_indices,
2229  uBeginIdx, uLength);
2230  default:
2231  return 0;
2232  }
2233  default:
2234  return 0;
2235  }
2236 }
2237 
2238 
2239 // Get a copy of in_seq from uBeginIdx through uBeginIdx + uLength-1
2240 // and put in out_seq. See comments in alphabet.hpp for more information.
2241 // GetCopy is a dispatch function.
2243 (const CSeq_data& in_seq,
2244  CSeq_data* out_seq,
2245  TSeqPos uBeginIdx,
2246  TSeqPos uLength)
2247  const
2248 {
2249  // Do processing based on in_seq type
2250  switch (in_seq.Which()) {
2251  case CSeq_data::e_Ncbi2na:
2252  return GetNcbi2naCopy(in_seq, out_seq, uBeginIdx, uLength);
2253  case CSeq_data::e_Ncbi4na:
2254  return GetNcbi4naCopy(in_seq, out_seq, uBeginIdx, uLength);
2255  case CSeq_data::e_Iupacna:
2256  return GetIupacnaCopy(in_seq, out_seq, uBeginIdx, uLength);
2257  case CSeq_data::e_Ncbieaa:
2258  return GetNcbieaaCopy(in_seq, out_seq, uBeginIdx, uLength);
2260  return GetNcbistdaaCopy(in_seq, out_seq, uBeginIdx, uLength);
2261  case CSeq_data::e_Iupacaa:
2262  return GetIupacaaCopy(in_seq, out_seq, uBeginIdx, uLength);
2263  default:
2264  throw runtime_error
2265  ("GetCopy() is not implemented for the requested sequence type");
2266  }
2267 }
2268 
2269 
2270 
2271 
2272 // Method to keep only a contiguous piece of a sequence beginning
2273 // at uBeginIdx and uLength residues long. Keep is a
2274 // dispatch function.
2276 (CSeq_data* in_seq,
2277  TSeqPos uBeginIdx,
2278  TSeqPos uLength)
2279  const
2280 {
2281  // Do proceessing based upon in_seq type
2282  switch (in_seq->Which()) {
2283  case CSeq_data::e_Ncbi2na:
2284  return KeepNcbi2na(in_seq, uBeginIdx, uLength);
2285  case CSeq_data::e_Ncbi4na:
2286  return KeepNcbi4na(in_seq, uBeginIdx, uLength);
2287  case CSeq_data::e_Iupacna:
2288  return KeepIupacna(in_seq, uBeginIdx, uLength);
2289  case CSeq_data::e_Ncbieaa:
2290  return KeepNcbieaa(in_seq, uBeginIdx, uLength);
2292  return KeepNcbistdaa(in_seq, uBeginIdx, uLength);
2293  case CSeq_data::e_Iupacaa:
2294  return KeepIupacaa(in_seq, uBeginIdx, uLength);
2295  default:
2296  throw runtime_error("Cannot perform Keep on in_seq type.");
2297  }
2298 }
2299 
2300 
2301 // Append in_seq2 to in_seq1 and put result in out_seq. This
2302 // is a dispatch function.
2304 (CSeq_data* out_seq,
2305  const CSeq_data& in_seq1,
2306  TSeqPos uBeginIdx1,
2307  TSeqPos uLength1,
2308  const CSeq_data& in_seq2,
2309  TSeqPos uBeginIdx2,
2310  TSeqPos uLength2)
2311  const
2312 {
2313  // Check that in_seqs or of same type
2314  if(in_seq1.Which() != in_seq2.Which())
2315  throw runtime_error("Append in_seq types do not match.");
2316 
2317  // Check that out_seq is not null
2318  if(!out_seq) {
2319  return 0;
2320  }
2321 
2322  // Call applicable append method base on in_seq types
2323  switch (in_seq1.Which()) {
2324  case CSeq_data::e_Iupacna:
2325  return AppendIupacna(out_seq, in_seq1, uBeginIdx1, uLength1,
2326  in_seq2, uBeginIdx2, uLength2);
2327  case CSeq_data::e_Ncbi2na:
2328  return AppendNcbi2na(out_seq, in_seq1, uBeginIdx1, uLength1,
2329  in_seq2, uBeginIdx2, uLength2);
2330  case CSeq_data::e_Ncbi4na:
2331  return AppendNcbi4na(out_seq, in_seq1, uBeginIdx1, uLength1,
2332  in_seq2, uBeginIdx2, uLength2);
2333  case CSeq_data::e_Ncbieaa:
2334  return AppendNcbieaa(out_seq, in_seq1, uBeginIdx1, uLength1,
2335  in_seq2, uBeginIdx2, uLength2);
2337  return AppendNcbistdaa(out_seq, in_seq1, uBeginIdx1, uLength1,
2338  in_seq2, uBeginIdx2, uLength2);
2339  case CSeq_data::e_Iupacaa:
2340  return AppendIupacaa(out_seq, in_seq1, uBeginIdx1, uLength1,
2341  in_seq2, uBeginIdx2, uLength2);
2342  default:
2343  throw runtime_error("Append for in_seq type not supported.");
2344  }
2345 }
2346 
2347 
2348 // Methods to complement na sequences. These are
2349 // dispatch functions.
2350 
2351 // Method to complement na sequence in place
2353 (CSeq_data* in_seq,
2354  TSeqPos uBeginIdx,
2355  TSeqPos uLength)
2356  const
2357 {
2358  _ASSERT(in_seq != 0);
2359 
2360  CSeq_data complement;
2361  TSeqPos retval = Complement(*in_seq, &complement, uBeginIdx, uLength);
2362  in_seq->Assign(complement);
2363 
2364  return retval;
2365 }
2366 
2367 
2368 // Method to complement na sequence in a copy out_seq
2370 (const CSeq_data& in_seq,
2371  CSeq_data* out_seq,
2372  TSeqPos uBeginIdx,
2373  TSeqPos uLength)
2374  const
2375 {
2376  _ASSERT(out_seq != 0);
2377 
2378  if ( uLength == 0 ) {
2379  uLength = numeric_limits<TSeqPos>::max();
2380  }
2381  CSeq_data::E_Choice in_code = in_seq.Which();
2382  _ASSERT(in_code != CSeq_data::e_not_set);
2383 
2384  const string* in_str = 0;
2385  const vector<char>* in_vec = 0;
2386  x_GetSeqFromSeqData(in_seq, &in_str, &in_vec);
2387 
2388  size_t retval = 0;
2389  if ( in_str ) {
2390  string out_str;
2391  retval = CSeqManip::Complement(*in_str, s_SeqDataToSeqUtil[in_code], uBeginIdx, uLength, out_str);
2392  CSeq_data temp(out_str, in_code);
2393  out_seq->Assign(temp);
2394  } else if (in_vec != 0) {
2395  vector<char> out_vec;
2396  retval = CSeqManip::Complement(*in_vec, s_SeqDataToSeqUtil[in_code], uBeginIdx, uLength, out_vec);
2397  CSeq_data temp(out_vec, in_code);
2398  out_seq->Assign(temp);
2399  }
2400  return static_cast<TSeqPos>(retval);
2401 }
2402 
2403 
2404 // Methods to reverse na sequences. These are
2405 // dispatch functions.
2406 
2407 // Method to reverse na sequence in place
2409 (CSeq_data* in_seq,
2410  TSeqPos uBeginIdx,
2411  TSeqPos uLength)
2412  const
2413 {
2414  CSeq_data temp;
2415  TSeqPos retval = Reverse(*in_seq, &temp, uBeginIdx, uLength);
2416  in_seq->Assign(temp);
2417 
2418  return retval;
2419 }
2420 
2421 
2422 // Method to reverse na sequence in a copy out_seq
2424 (const CSeq_data& in_seq,
2425  CSeq_data* out_seq,
2426  TSeqPos uBeginIdx,
2427  TSeqPos uLength)
2428  const
2429 {
2430  _ASSERT(out_seq != 0);
2431 
2432  if ( uLength == 0 ) {
2433  uLength = numeric_limits<TSeqPos>::max();
2434  }
2435 
2436  CSeq_data::E_Choice in_code = in_seq.Which();
2437  _ASSERT(in_code != CSeq_data::e_not_set);
2438 
2439  const string* in_str = 0;
2440  const vector<char>* in_vec = 0;
2441  x_GetSeqFromSeqData(in_seq, &in_str, &in_vec);
2442 
2443  size_t retval = 0;
2444  if ( in_str ) {
2445  string out_str;
2446  retval = CSeqManip::Reverse(*in_str, s_SeqDataToSeqUtil[in_code], uBeginIdx, uLength, out_str);
2447  CSeq_data temp(out_str, in_code);
2448  out_seq->Assign(temp);
2449  } else if (in_vec != NULL) {
2450  vector<char> out_vec;
2451  retval = CSeqManip::Reverse(*in_vec, s_SeqDataToSeqUtil[in_code], uBeginIdx, uLength, out_vec);
2452  CSeq_data temp(out_vec, in_code);
2453  out_seq->Assign(temp);
2454  }
2455  return static_cast<TSeqPos>(retval);
2456 }
2457 
2458 
2459 
2460 // Methods to reverse-complement a sequence. These are
2461 // dispatch functions.
2462 
2463 // Method to reverse-complement na sequence in place
2465 (CSeq_data* in_seq,
2466  TSeqPos uBeginIdx,
2467  TSeqPos uLength)
2468  const
2469 {
2470  _ASSERT(in_seq != 0);
2471 
2472  CSeq_data::E_Choice in_code = in_seq->Which();
2473  _ASSERT(in_code != CSeq_data::e_not_set);
2474 
2475  string* in_str = 0;
2476  vector<char>* in_vec = 0;
2477  x_GetSeqFromSeqData(*in_seq, &in_str, &in_vec);
2478 
2479  size_t retval = 0;
2480  if ( in_str ) {
2481  retval = CSeqManip::ReverseComplement(*in_str, s_SeqDataToSeqUtil[in_code], uBeginIdx, uLength);
2482  } else if (in_vec != NULL) {
2483  retval = CSeqManip::ReverseComplement(*in_vec, s_SeqDataToSeqUtil[in_code], uBeginIdx, uLength);
2484  }
2485  return static_cast<TSeqPos>(retval);
2486 }
2487 
2488 
2489 // Method to reverse-complement na sequence in a copy out_seq
2491 (const CSeq_data& in_seq,
2492  CSeq_data* out_seq,
2493  TSeqPos uBeginIdx,
2494  TSeqPos uLength)
2495  const
2496 {
2497  _ASSERT(out_seq != 0);
2498 
2499  if ( uLength == 0 ) {
2500  uLength = numeric_limits<TSeqPos>::max();
2501  }
2502 
2503  CSeq_data::E_Choice in_code = in_seq.Which();
2504  _ASSERT(in_code != CSeq_data::e_not_set);
2505 
2506  const string* in_str = 0;
2507  const vector<char>* in_vec = 0;
2508  x_GetSeqFromSeqData(in_seq, &in_str, &in_vec);
2509 
2510  size_t retval = 0;
2511  if ( in_str ) {
2512  string out_str;
2513  retval = CSeqManip::ReverseComplement(*in_str, s_SeqDataToSeqUtil[in_code], uBeginIdx, uLength, out_str);
2514  CSeq_data temp(out_str, in_code);
2515  out_seq->Assign(temp);
2516  } else if (in_vec != NULL) {
2517  vector<char> out_vec;
2518  retval = CSeqManip::ReverseComplement(*in_vec, s_SeqDataToSeqUtil[in_code], uBeginIdx, uLength, out_vec);
2519  CSeq_data temp(out_vec, in_code);
2520  out_seq->Assign(temp);
2521  }
2522 
2523  return static_cast<TSeqPos>(retval);
2524 }
2525 
2526 
2527 // Implement private worker functions called by public
2528 // dispatch functions.
2529 
2530 // Methods to convert between coding schemes
2531 
2532 /*
2533 // Convert in_seq from ncbi2na (1 byte) to iupacna (4 bytes)
2534 // and put result in out_seq
2535 TSeqPos CSeqportUtil_implementation::MapNcbi2naToIupacna
2536 (const CSeq_data& in_seq,
2537  CSeq_data* out_seq,
2538  TSeqPos uBeginIdx,
2539  TSeqPos uLength)
2540  const
2541 {
2542  // Save uBeginIdx and uLength for later use
2543  TSeqPos uBeginSav = uBeginIdx;
2544  TSeqPos uLenSav = uLength;
2545 
2546  // Get vector holding the in sequence
2547  const vector<char>& in_seq_data = in_seq.GetNcbi2na().Get();
2548 
2549  // Get string where the out sequence will go
2550  out_seq->Reset();
2551  string& out_seq_data = out_seq->SetIupacna().Set();
2552 
2553  // Validate uBeginSav
2554  if(uBeginSav >= 4*in_seq_data.size())
2555  return 0;
2556 
2557  // Adjust uLenSav
2558  if((uLenSav == 0 )|| ((uLenSav + uBeginSav )> 4*in_seq_data.size()))
2559  uLenSav = 4*in_seq_data.size() - uBeginSav;
2560 
2561 
2562  // Adjust uBeginIdx and uLength, if necessary
2563  Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 4, 1);
2564 
2565  // Declare iterator for in_seq
2566  vector<char>::const_iterator i_in;
2567 
2568  // Allocate string memory for result of conversion
2569  out_seq_data.resize(uLenSav);
2570 
2571  // Get pointer to data of out_seq_data (a string)
2572  string::iterator i_out = out_seq_data.begin()-1;
2573 
2574  // Determine begin and end bytes of in_seq_data
2575  vector<char>::const_iterator i_in_begin =
2576  in_seq_data.begin() + uBeginIdx/4;
2577  vector<char>::const_iterator i_in_end = i_in_begin + uLength/4;
2578  if((uLength % 4) != 0) ++i_in_end;
2579  --i_in_end;
2580 
2581  // Handle first input sequence byte
2582  unsigned int uVal =
2583  m_FastNcbi2naIupacna->m_Table[static_cast<unsigned char>(*i_in_begin)];
2584  char *pchar, *pval;
2585  pval = reinterpret_cast<char*>(&uVal);
2586  for(pchar = pval + uBeginSav - uBeginIdx; pchar < pval + 4; ++pchar)
2587  *(++i_out) = *pchar;
2588 
2589  if(i_in_begin == i_in_end)
2590  return uLenSav;
2591  ++i_in_begin;
2592 
2593  // Loop through in_seq_data and convert to out_seq
2594  for(i_in = i_in_begin; i_in != i_in_end; ++i_in) {
2595  uVal =
2596  m_FastNcbi2naIupacna->m_Table[static_cast<unsigned char>(*i_in)];
2597  pchar = reinterpret_cast<char*>(&uVal);
2598  (*(++i_out)) = (*(pchar++));
2599  (*(++i_out)) = (*(pchar++));
2600  (*(++i_out)) = (*(pchar++));
2601  (*(++i_out)) = (*(pchar++));
2602  }
2603 
2604  // Handle last byte of input data
2605  uVal =
2606  m_FastNcbi2naIupacna->m_Table[static_cast<unsigned char>(*i_in_end)];
2607  pval = reinterpret_cast<char*>(&uVal);
2608  TSeqPos uOverhang = (uBeginSav + uLenSav) % 4;
2609  uOverhang = (uOverhang ==0) ? 4 : uOverhang;
2610  for(pchar = pval; pchar < pval + uOverhang; ++pchar) {
2611  (*(++i_out)) = *pchar;
2612  }
2613 
2614  return uLenSav;
2615 }
2616 
2617 
2618 // Convert in_seq from ncbi2na (1 byte) to ncbi4na (2 bytes)
2619 // and put result in out_seq
2620 TSeqPos CSeqportUtil_implementation::MapNcbi2naToNcbi4na
2621 (const CSeq_data& in_seq,
2622  CSeq_data* out_seq,
2623  TSeqPos uBeginIdx,
2624  TSeqPos uLength)
2625  const
2626 {
2627  // Get vector holding the in sequence
2628  const vector<char>& in_seq_data = in_seq.GetNcbi2na().Get();
2629 
2630  // Get vector where out sequence will go
2631  out_seq->Reset();
2632  vector<char>& out_seq_data = out_seq->SetNcbi4na().Set();
2633 
2634  // Save uBeginIdx and uLength for later use as they
2635  // are modified below
2636  TSeqPos uBeginSav = uBeginIdx;
2637  TSeqPos uLenSav = uLength;
2638 
2639  // Check that uBeginSav is not beyond end of in_seq_data
2640  if(uBeginSav >= 4*in_seq_data.size())
2641  return 0;
2642 
2643  // Adjust uLenSav
2644  if((uLenSav == 0) || ((uBeginSav + uLenSav) > 4*in_seq_data.size()))
2645  uLenSav = 4*in_seq_data.size() - uBeginSav;
2646 
2647 
2648  // Adjust uBeginIdx and uLength, if necessary
2649  TSeqPos uOverhang =
2650  Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 4, 2);
2651 
2652  // Declare iterator for in_seq
2653  vector<char>::const_iterator i_in;
2654 
2655  // Allocate memory for out_seq_data
2656  TSeqPos uInBytes = (uLength + uOverhang)/4;
2657  if(((uLength + uOverhang) % 4) != 0) uInBytes++;
2658  vector<char>::size_type nOutBytes = 2*uInBytes;
2659  out_seq_data.resize(nOutBytes);
2660 
2661  // Get an iterator of out_seq_data
2662  vector<char>::iterator i_out = out_seq_data.begin()-1;
2663 
2664  // Determine begin and end bytes of in_seq_data
2665  vector<char>::const_iterator i_in_begin =
2666  in_seq_data.begin() + uBeginIdx/4;
2667  vector<char>::const_iterator i_in_end = i_in_begin + uInBytes;
2668 
2669  // Loop through in_seq_data and convert to out_seq_data
2670  for(i_in = i_in_begin; i_in != i_in_end; ++i_in) {
2671  unsigned short uVal =
2672  m_FastNcbi2naNcbi4na->m_Table[static_cast<unsigned char>(*i_in)];
2673  char* pch = reinterpret_cast<char*>(&uVal);
2674  (*(++i_out)) = (*(pch++));
2675  (*(++i_out)) = (*(pch++));
2676  }
2677  TSeqPos keepidx = uBeginSav - uBeginIdx;
2678  KeepNcbi4na(out_seq, keepidx, uLenSav);
2679 
2680  return uLenSav;
2681 }
2682 
2683 
2684 // Convert in_seq from ncbi4na (1 byte) to iupacna (2 bytes)
2685 // and put result in out_seq
2686 TSeqPos CSeqportUtil_implementation::MapNcbi4naToIupacna
2687 (const CSeq_data& in_seq,
2688  CSeq_data* out_seq,
2689  TSeqPos uBeginIdx,
2690  TSeqPos uLength)
2691  const
2692 {
2693  // Save uBeginIdx and uLength for later use
2694  TSeqPos uBeginSav = uBeginIdx;
2695  TSeqPos uLenSav = uLength;
2696 
2697  // Get vector holding the in sequence
2698  const vector<char>& in_seq_data = in_seq.GetNcbi4na().Get();
2699 
2700  // Get string where the out sequence will go
2701  out_seq->Reset();
2702  string& out_seq_data = out_seq->SetIupacna().Set();
2703 
2704  // Validate uBeginSav
2705  if(uBeginSav >= 2*in_seq_data.size())
2706  return 0;
2707 
2708  // Adjust uLenSav
2709  if((uLenSav == 0 )|| ((uLenSav + uBeginSav )> 2*in_seq_data.size()))
2710  uLenSav = 2*in_seq_data.size() - uBeginSav;
2711 
2712 
2713  // Adjust uBeginIdx and uLength, if necessary
2714  Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 2, 1);
2715 
2716  // Declare iterator for in_seq
2717  vector<char>::const_iterator i_in;
2718 
2719  // Allocate string memory for result of conversion
2720  out_seq_data.resize(uLenSav);
2721 
2722  // Get pointer to data of out_seq_data (a string)
2723  string::iterator i_out = out_seq_data.begin() - 1;
2724 
2725  // Determine begin and end bytes of in_seq_data
2726  vector<char>::const_iterator i_in_begin =
2727  in_seq_data.begin() + uBeginIdx/2;
2728  vector<char>::const_iterator i_in_end = i_in_begin + uLength/2;
2729  if((uLength % 2) != 0) ++i_in_end;
2730  --i_in_end;
2731 
2732  // Handle first input sequence byte
2733  unsigned short uVal =
2734  m_FastNcbi4naIupacna->m_Table[static_cast<unsigned char>(*i_in_begin)];
2735  char *pchar, *pval;
2736  pval = reinterpret_cast<char*>(&uVal);
2737  for(pchar = pval + uBeginSav - uBeginIdx; pchar < pval + 2; ++pchar)
2738  *(++i_out) = *pchar;
2739 
2740  if(i_in_begin == i_in_end)
2741  return uLenSav;
2742  ++i_in_begin;
2743 
2744  // Loop through in_seq_data and convert to out_seq
2745  for(i_in = i_in_begin; i_in != i_in_end; ++i_in) {
2746  uVal =
2747  m_FastNcbi4naIupacna->m_Table[static_cast<unsigned char>(*i_in)];
2748  pchar = reinterpret_cast<char*>(&uVal);
2749  (*(++i_out)) = (*(pchar++));
2750  (*(++i_out)) = (*(pchar++));
2751  }
2752 
2753  // Handle last byte of input data
2754  uVal =
2755  m_FastNcbi4naIupacna->m_Table[static_cast<unsigned char>(*i_in_end)];
2756  pval = reinterpret_cast<char*>(&uVal);
2757  TSeqPos uOverhang = (uBeginSav + uLenSav) % 2;
2758  uOverhang = (uOverhang ==0) ? 2 : uOverhang;
2759  for(pchar = pval; pchar < pval + uOverhang; ++pchar)
2760  (*(++i_out)) = *pchar;
2761 
2762  return uLenSav;
2763 }
2764 */
2765 
2766 // Table for quick check of whether an ncbi4na residue represents an ambiguity.
2767 // The 0 value is not considered an ambiguity, as it represents the end of
2768 // buffer.
2769 static const char kAmbig4na[16] =
2770  { 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 };
2771 
2773 public:
2774  CAmbiguityContext(vector<Uint4>& amb_buff, int seq_length);
2775  // Make sure the vector is not freed in the destructor
2777  void UpdateBuffer();
2778  void AddAmbiguity(char in_byte, TSeqPos& seq_pos);
2779  void Finish();
2780 private:
2781  vector<Uint4>& m_vAmbBuf; ///< Ambiguity buffer to fill
2782  char m_LastAmbChar; ///< Last previous ambiguity character
2788 };
2789 
2790 CAmbiguityContext::CAmbiguityContext(vector<Uint4>& amb_buff, int seq_length)
2791  : m_vAmbBuf(amb_buff)
2792 {
2793  m_AmbCount = 0;
2794  m_AmbStart = 0;
2795  m_BuffPos = 0;
2796  m_LastAmbChar = 0;
2797  m_bLongFormat = (seq_length >= 0x00ffffff);
2798  m_MaxAmbCount = (m_bLongFormat ? 0x00000fff : 0x0000000f);
2799  // If "long format", set the top bit in the length element of the
2800  // ambiguity vector, but only if the input vector is empty. Otherwise,
2801  // assume that this initialization has already been done in the previous
2802  // invocation.
2803  if (m_vAmbBuf.size() == 0) {
2804  Uint4 amb_len = (m_bLongFormat ? 0x80000000 : 0);
2805  m_vAmbBuf.push_back(amb_len);
2806  }
2807 }
2808 
2809 
2811 {
2812  // If there are no more unprocessed ambiguities, return.
2813  if (!m_LastAmbChar)
2814  return;
2815 
2816  Uint4 amb_element = m_LastAmbChar << 28;
2817  // In long format length occupies bits 16-27, and sequence position is stored
2818  // in the next integer element. In short format length occupies bits 24-27,
2819  // and sequence position is stored in the same integer element.
2820  if (m_bLongFormat) {
2821  amb_element |= (m_AmbCount << 16);
2822  m_vAmbBuf.push_back(amb_element);
2823  m_vAmbBuf.push_back(m_AmbStart);
2824  } else {
2825  amb_element |= (m_AmbCount << 24);
2826  amb_element |= m_AmbStart;
2827  m_vAmbBuf.push_back(amb_element);
2828  }
2829 }
2830 
2831 void CAmbiguityContext::AddAmbiguity(char in_byte, TSeqPos& seq_pos)
2832 {
2833  char res[2];
2834 
2835  res[0] = (in_byte >> 4) & 0x0f;
2836  res[1] = in_byte & 0x0f;
2837 
2838  for (int i = 0; i < 2; ++i, ++seq_pos) {
2839  if (kAmbig4na[(int)res[i]]) {
2840  if ((res[i] != m_LastAmbChar) || (m_AmbCount >= m_MaxAmbCount)) {
2841  // Finish the previous ambiguity element, start new;
2842  UpdateBuffer();
2843  m_LastAmbChar = res[i];
2844  m_AmbCount = 0;
2845  m_AmbStart = seq_pos;
2846  } else {
2847  // Just increment the count for the last ambiguity
2848  ++m_AmbCount;
2849  }
2850  } else {
2851  // No ambiguity: finish the previous ambiguity element, if any,
2852  // reset the m_LastAmbChar and count.
2853  UpdateBuffer();
2854  m_LastAmbChar = 0;
2855  m_AmbCount = 0;
2856  }
2857  }
2858 }
2859 
2861 {
2862  UpdateBuffer();
2863  // In the first element of the vector, preserve the top bit, and reset the
2864  // remainder to the number of ambiguity entries.
2865  m_vAmbBuf[0] =
2866  (m_vAmbBuf[0] & 0x80000000) | ((m_vAmbBuf.size() - 1) & 0x7fffffff);
2867 }
2868 
2869 // Function to convert iupacna (4 bytes) to ncbi2na (1 byte)
2871 (const CSeq_data& in_seq,
2872  CSeq_data* out_seq,
2873  TSeqPos uBeginIdx,
2874  TSeqPos uLength,
2875  bool bAmbig,
2877  TSeqPos total_length,
2878  TSeqPos* out_seq_length,
2879  vector<Uint4>* blast_ambig)
2880  const
2881 {
2882  // Get string holding the in_seq
2883  const string& in_seq_data = in_seq.GetIupacna().Get();
2884  TSeqPos in_seq_length = static_cast<TSeqPos>(in_seq_data.size());
2885 
2886  // Out sequence may contain unfinished byte from the previous segment
2887  if (out_seq_length != nullptr && *out_seq_length == 0)
2888  out_seq->Reset();
2889  // Get vector where the out sequence will go
2890  vector<char>& out_seq_data = out_seq->SetNcbi2na().Set();
2891 
2892  // If uBeginIdx is after end of in_seq, return
2893  if(uBeginIdx >= in_seq_data.size())
2894  return 0;
2895 
2896  // Determine return value
2897  TSeqPos uLenSav = uLength;
2898  if((uLenSav == 0) || ((uLenSav + uBeginIdx)) > in_seq_data.size())
2899  uLenSav = in_seq_length - uBeginIdx;
2900 
2901 
2902  // Adjust uBeginIdx and uLength, if necessary and get uOverhang
2903  TSeqPos uOverhang =
2904  Adjust(&uBeginIdx, &uLength, in_seq_length, 1, 4);
2905 
2906  // Check if the output sequence data has already been filled
2907  // with some previous data, e.g. previous segment of a delta
2908  // sequence.
2909  TSeqPos out_seq_pos = 0;
2910  if (out_seq_length) {
2911  out_seq_pos = *out_seq_length;
2912  *out_seq_length += uLenSav;
2913  }
2914  TSeqPos rbit = 2*(out_seq_pos % 4);
2915  TSeqPos lbit = 8 - rbit;
2916 
2917  // Allocate vector memory for result of conversion
2918  // Note memory for overhang is added below.
2919  vector<char>::size_type nBytes = (out_seq_pos + uLenSav + 3) / 4;
2920  out_seq_data.resize(nBytes);
2921 
2922  // Instantiate an ambiguity context object, if BLAST-style
2923  // ambiguity output is requested.
2924  unique_ptr<CAmbiguityContext> amb_context;
2925  if (blast_ambig) {
2926  amb_context.reset(new CAmbiguityContext(*blast_ambig, total_length));
2927  }
2928 
2929  // Declare iterator for out_seq_data and determine begin and end
2930  vector<char>::iterator i_out;
2931  vector<char>::iterator i_out_begin = out_seq_data.begin() + out_seq_pos/4;
2932  vector<char>::iterator i_out_end = i_out_begin + uLength/4;
2933 
2934  // Determine begin of in_seq_data
2935  string::const_iterator i_in = in_seq_data.begin() + uBeginIdx;
2936 
2937  char new_byte;
2938  const int kOneByteMask = 0xff;
2939 
2940  if(bAmbig)
2941  {
2942  // Do random disambiguation
2943  unsigned char c1, c2;
2944  CRandom::TValue rv;
2945 
2946  // Declare a random number generator and set seed
2947  CRandom rg;
2948  rg.SetSeed(seed);
2949 
2950  // Do disambiguation by converting Iupacna to Ncbi4na
2951  // deterministically and then converting from Ncbi4na to Ncbi2na
2952  // with random disambiguation
2953 
2954  // Loop through the out_seq_data converting 4 Iupacna bytes to
2955  // one Ncbi2na byte. in_seq_data.size() % 4 bytes at end of
2956  // input handled separately below.
2957  for(i_out = i_out_begin; i_out != i_out_end; )
2958  {
2959  // Determine first Ncbi4na byte from 1st two Iupacna bytes
2960  c1 =
2961  m_FastIupacnaNcbi4na->m_Table
2962  [0][static_cast<unsigned char>(*i_in)] |
2963  m_FastIupacnaNcbi4na->m_Table
2964  [1][static_cast<unsigned char>(*(i_in+1))];
2965 
2966  // Determine second Ncbi4na byte from 2nd two Iupacna bytes
2967  c2 =
2968  m_FastIupacnaNcbi4na->m_Table
2969  [0][static_cast<unsigned char>(*(i_in+2))]|
2970  m_FastIupacnaNcbi4na->m_Table
2971  [1][static_cast<unsigned char>(*(i_in+3))];
2972 
2973  if (blast_ambig) {
2974  amb_context->AddAmbiguity(c1, out_seq_pos);
2975  amb_context->AddAmbiguity(c2, out_seq_pos);
2976  }
2977 
2978  // Randomly pick disambiguated Ncbi4na bytes
2979  rv = rg.GetRand() % 16;
2980  c1 &= m_Masks->m_Table[c1].cMask[rv];
2981  rv = rg.GetRand() % 16;
2982  c2 &= m_Masks->m_Table[c2].cMask[rv];
2983 
2984  // Convert from Ncbi4na to Ncbi2na
2985  // Calculate the new byte. Assign parts of it to the
2986  // remainder of the current output byte, and the
2987  // front part of the next output byte, advancing the
2988  // output iterator in the process.
2989  new_byte = m_FastNcbi4naNcbi2na->m_Table[0][c1] |
2990  m_FastNcbi4naNcbi2na->m_Table[1][c2];
2991  (*i_out) |= ((new_byte & kOneByteMask) >> rbit);
2992  ++i_out;
2993  // Fill part of next byte only if it is necessary, i.e. when
2994  // rbit is not 0.
2995  if (rbit)
2996  (*i_out) = ((new_byte & kOneByteMask) << lbit);
2997 
2998  // Increment input sequence iterator.
2999  i_in+=4;
3000  }
3001 
3002  // Handle overhang at end of in_seq
3003  switch (uOverhang) {
3004  case 1:
3005  c1 =
3006  m_FastIupacnaNcbi4na->m_Table
3007  [0][static_cast<unsigned char>(*i_in)];
3008  if (blast_ambig)
3009  amb_context->AddAmbiguity(c1, out_seq_pos);
3010  rv = rg.GetRand() % 16;
3011  c1 &= m_Masks->m_Table[c1].cMask[rv];
3012  new_byte = m_FastNcbi4naNcbi2na->m_Table[0][c1] & 0xC0;
3013  break;
3014  case 2:
3015  c1 =
3016  m_FastIupacnaNcbi4na->m_Table
3017  [0][static_cast<unsigned char>(*i_in)] |
3018  m_FastIupacnaNcbi4na->m_Table
3019  [1][static_cast<unsigned char>(*(i_in+1))];
3020  if (blast_ambig)
3021  amb_context->AddAmbiguity(c1, out_seq_pos);
3022  rv = rg.GetRand() % 16;
3023  c1 &= m_Masks->m_Table[c1].cMask[rv];
3024  new_byte = m_FastNcbi4naNcbi2na->m_Table[0][c1] & 0xF0;
3025  break;
3026  case 3:
3027  c1 =
3028  m_FastIupacnaNcbi4na->m_Table
3029  [0][static_cast<unsigned char>(*i_in)] |
3030  m_FastIupacnaNcbi4na->m_Table
3031  [1][static_cast<unsigned char>(*(i_in+1))];
3032  c2 =
3033  m_FastIupacnaNcbi4na->m_Table
3034  [0][static_cast<unsigned char>(*(i_in+2))];
3035  if (blast_ambig) {
3036  amb_context->AddAmbiguity(c1, out_seq_pos);
3037  amb_context->AddAmbiguity(c2, out_seq_pos);
3038  }
3039  rv = rg.GetRand() % 16;
3040  c1 &= m_Masks->m_Table[c1].cMask[rv];
3041  rv = rg.GetRand() % 16;
3042  c2 &= m_Masks->m_Table[c2].cMask[rv];
3043  new_byte = (m_FastNcbi4naNcbi2na->m_Table[0][c1] & 0xF0) |
3044  (m_FastNcbi4naNcbi2na->m_Table[1][c2] & 0x0C);
3045  break;
3046  default:
3047  // This is a bogus assignment, just to suppress a
3048  // compiler warning. The value will not actually be
3049  // used (see the "uOverhang > 0" condition below)
3050  new_byte = 0;
3051  break;
3052  }
3053 
3054  // Assign respective parts of the new byte to the remaining parts
3055  // of the output sequence. Output iterator only needs to be
3056  // incremented if the overhang is greater than the unfilled
3057  // remainder of the last output byte.
3058  if (uOverhang > 0) {
3059  (*i_out) |= ((new_byte & kOneByteMask) >> rbit);
3060  if (2*uOverhang > lbit) {
3061  ++i_out;
3062  (*i_out) = ((new_byte & kOneByteMask) << lbit);
3063  }
3064  }
3065 
3066  if (blast_ambig)
3067  amb_context->Finish();
3068  }
3069  else
3070  {
3071  // Pack uLength input characters into out_seq_data
3072  for(i_out = i_out_begin; i_out != i_out_end; )
3073  {
3074  new_byte =
3075  m_FastIupacnaNcbi2na->m_Table
3076  [0][static_cast<unsigned char>(*(i_in))] |
3077  m_FastIupacnaNcbi2na->m_Table
3078  [1][static_cast<unsigned char>(*(i_in+1))] |
3079  m_FastIupacnaNcbi2na->m_Table
3080  [2][static_cast<unsigned char>(*(i_in+2))] |
3081  m_FastIupacnaNcbi2na->m_Table
3082  [3][static_cast<unsigned char>(*(i_in+3))];
3083  (*i_out) |= ((new_byte & kOneByteMask) >> rbit);
3084  ++i_out;
3085  // Fill part of next byte only if it is necessary, i.e. when
3086  // rbit is not 0.
3087  if (rbit)
3088  (*i_out) = ((new_byte & kOneByteMask) << lbit);
3089  i_in+=4;
3090  }
3091 
3092  // Handle overhang
3093  if(uOverhang > 0) {
3094  new_byte = '\x00';
3095  for(TSeqPos i = 0; i < uOverhang; i++) {
3096  new_byte |=
3097  m_FastIupacnaNcbi2na->m_Table
3098  [i][static_cast<unsigned char>(*(i_in+i))];
3099  }
3100  (*i_out) |= ((new_byte & kOneByteMask) >> rbit);
3101  if (2*uOverhang > lbit) {
3102  ++i_out;
3103  (*i_out) = ((new_byte & kOneByteMask) << lbit);
3104  }
3105  }
3106  }
3107  return uLenSav;
3108 }
3109 /*
3110 
3111 // Function to convert iupacna (2 bytes) to ncbi4na (1 byte)
3112 TSeqPos CSeqportUtil_implementation::MapIupacnaToNcbi4na
3113 (const CSeq_data& in_seq,
3114  CSeq_data* out_seq,
3115  TSeqPos uBeginIdx,
3116  TSeqPos uLength)
3117  const
3118 {
3119  // Get string holding the in_seq
3120  const string& in_seq_data = in_seq.GetIupacna().Get();
3121 
3122  // Get vector where the out sequence will go
3123  out_seq->Reset();
3124  vector<char>& out_seq_data = out_seq->SetNcbi4na().Set();
3125 
3126  // If uBeginIdx beyond end of in_seq, return
3127  if(uBeginIdx >= in_seq_data.size())
3128  return 0;
3129 
3130  // Determine return value
3131  TSeqPos uLenSav = uLength;
3132  if((uLenSav == 0) || (uLenSav + uBeginIdx) > in_seq_data.size())
3133  uLenSav = in_seq_data.size() - uBeginIdx;
3134 
3135  // Adjust uBeginIdx and uLength and get uOverhang
3136  TSeqPos uOverhang =
3137  Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 2);
3138 
3139  // Allocate vector memory for result of conversion
3140  // Note memory for overhang is added below.
3141  vector<char>::size_type nBytes = uLength/2;
3142  out_seq_data.resize(nBytes);
3143 
3144  // Declare iterator for out_seq_data and determine begin and end
3145  vector<char>::iterator i_out;
3146  vector<char>::iterator i_out_begin = out_seq_data.begin();
3147  vector<char>::iterator i_out_end = i_out_begin + uLength/2;
3148 
3149  // Determine begin of in_seq_data offset by 1
3150  string::const_iterator i_in = in_seq_data.begin() + uBeginIdx;
3151 
3152  // Pack uLength input characters into out_seq_data
3153  for(i_out = i_out_begin; i_out != i_out_end; ++i_out) {
3154  (*i_out) =
3155  m_FastIupacnaNcbi4na->m_Table
3156  [0][static_cast<unsigned char>(*(i_in))] |
3157  m_FastIupacnaNcbi4na->m_Table
3158  [1][static_cast<unsigned char>(*(i_in+1))];
3159  i_in+=2;
3160  }
3161 
3162  // Handle overhang
3163  char ch = '\x00';
3164  if (uOverhang > 0) {
3165  ch |=
3166  m_FastIupacnaNcbi4na->
3167  m_Table[0][static_cast<unsigned char>(*i_in)];
3168  out_seq_data.push_back(ch);
3169  }
3170 
3171  return uLenSav;
3172 }
3173 */
3174 
3175 
3176 // Function to convert ncbi4na (2 bytes) to ncbi2na (1 byte)
3178  const CSeq_data& in_seq, CSeq_data* out_seq,
3179  TSeqPos uBeginIdx, TSeqPos uLength,
3180  bool bAmbig, CRandom::TValue seed,
3181  TSeqPos total_length,
3182  TSeqPos* out_seq_length, vector<Uint4>* blast_ambig)
3183  const
3184 {
3185  // Get vector holding the in_seq
3186  const vector<char>& in_seq_data = in_seq.GetNcbi4na().Get();
3187  TSeqPos in_seq_length = static_cast<TSeqPos>(in_seq_data.size());
3188 
3189  // Out sequence may contain unfinished byte from a previous segment.
3190  if (out_seq_length != nullptr && *out_seq_length == 0)
3191  out_seq->Reset();
3192  // Get vector where the out sequence will go
3193  vector<char>& out_seq_data = out_seq->SetNcbi2na().Set();
3194 
3195  // Save uBeginIdx and uLength as they will be modified below
3196  TSeqPos uBeginSav = uBeginIdx;
3197  TSeqPos uLenSav = uLength;
3198 
3199 
3200  // Check that uBeginSav is not beyond end of in_seq
3201  if(uBeginSav >= 2*in_seq_data.size())
3202  return 0;
3203 
3204  // Adjust uLenSav if needed
3205  if((uLenSav == 0) || ((uBeginSav + uLenSav) > 2*in_seq_data.size()))
3206  uLenSav = 2*in_seq_length - uBeginSav;
3207 
3208  // Adjust uBeginIdx and uLength and get uOverhang
3209  TSeqPos uOverhang =
3210  Adjust(&uBeginIdx, &uLength, in_seq_length, 2, 4);
3211 
3212  // Check if the output sequence data has already been filled
3213  // with some previous data, e.g. previous segment of a delta
3214  // sequence.
3215  TSeqPos out_seq_pos = 0;
3216  if (out_seq_length) {
3217  out_seq_pos = *out_seq_length;
3218  *out_seq_length += uLenSav;
3219  }
3220  TSeqPos rbit = 2*(out_seq_pos % 4);
3221  TSeqPos lbit = 8 - rbit;
3222 
3223  // Allocate vector memory for result of conversion
3224  // Note memory for overhang is added below.
3225  vector<char>::size_type nBytes = (out_seq_pos + uLenSav + 3) / 4;
3226  out_seq_data.resize(nBytes);
3227 
3228  // Instantiate an ambiguity context object, if BLAST-style
3229  // ambiguity output is requested.
3230  unique_ptr<CAmbiguityContext> amb_context;
3231  if (blast_ambig) {
3232  amb_context.reset(new CAmbiguityContext(*blast_ambig, total_length));
3233  }
3234 
3235  // Declare iterator for out_seq_data and determine begin and end
3236  vector<char>::iterator i_out;
3237  vector<char>::iterator i_out_begin = out_seq_data.begin() + out_seq_pos/4;
3238  vector<char>::iterator i_out_end = i_out_begin + uLength/4;
3239 
3240  // Make sure that the first byte of out_seq_data does not contain garbage
3241  // in the bits that are not yet supposed to be filled.
3242  *i_out_begin &= (0xff << lbit);
3243 
3244  // Determine begin of in_seq_data
3245  vector<char>::const_iterator i_in = in_seq_data.begin() + uBeginIdx/2;
3246 
3247  char new_byte;
3248  const int kOneByteMask = 0xff;
3249 
3250  if(bAmbig) { // Do random disambiguation
3251  // Declare a random number generator and set seed
3252  CRandom rg;
3253  rg.SetSeed(seed);
3254 
3255  // Pack uLength input bytes into out_seq_data
3256  for(i_out = i_out_begin; i_out != i_out_end; ) {
3257  // Disambiguate
3258  unsigned char c1 = static_cast<unsigned char>(*i_in);
3259  unsigned char c2 = static_cast<unsigned char>(*(i_in+1));
3260 
3261  if (blast_ambig) {
3262  amb_context->AddAmbiguity(c1, out_seq_pos);
3263  amb_context->AddAmbiguity(c2, out_seq_pos);
3264  }
3265  CRandom::TValue rv = rg.GetRand() % 16;
3266  c1 &= m_Masks->m_Table[c1].cMask[rv];
3267  rv = rg.GetRand() % 16;
3268  c2 &= m_Masks->m_Table[c2].cMask[rv];
3269 
3270  // Convert
3271  new_byte = m_FastNcbi4naNcbi2na->m_Table[0][c1] |
3272  m_FastNcbi4naNcbi2na->m_Table[1][c2];
3273  (*i_out) |= ((new_byte & kOneByteMask) >> rbit);
3274  ++i_out;
3275  // Fill part of next byte only if it is necessary, i.e. when
3276  // rbit is not 0.
3277  if (rbit)
3278  (*i_out) = ((new_byte & kOneByteMask) << lbit);
3279  i_in+=2;
3280  }
3281 
3282  // Handle overhang
3283  new_byte = '\x00';
3284 
3285  if(uOverhang > 0) {
3286  // Disambiguate
3287  unsigned char c1 = static_cast<unsigned char>(*i_in);
3288  // If only one residue, make sure that the second half of the byte
3289  // is 0.
3290  if (uOverhang == 1)
3291  c1 &= 0xf0;
3292  if (blast_ambig)
3293  amb_context->AddAmbiguity(c1, out_seq_pos);
3294  CRandom::TValue rv = rg.GetRand() % 16;
3295  c1 &= m_Masks->m_Table[c1].cMask[rv];
3296 
3297  // Convert
3298  new_byte |= m_FastNcbi4naNcbi2na->m_Table[0][c1];
3299  }
3300 
3301  if(uOverhang == 3) {
3302  // Disambiguate; make sure that the second half of the byte
3303  // is 0.
3304  unsigned char c1 = static_cast<unsigned char>(*(++i_in)) & 0xf0;
3305  if (blast_ambig)
3306  amb_context->AddAmbiguity(c1, out_seq_pos);
3307  CRandom::TValue rv = rg.GetRand() % 16;
3308  c1 &= m_Masks->m_Table[c1].cMask[rv];
3309 
3310  // Convert
3311  new_byte |= m_FastNcbi4naNcbi2na->m_Table[1][c1];
3312  }
3313 
3314  if(uOverhang > 0) {
3315  (*i_out) |= ((new_byte & kOneByteMask) >> rbit);
3316  if (2*uOverhang > lbit) {
3317  ++i_out;
3318  (*i_out) = ((new_byte & kOneByteMask) << lbit);
3319  }
3320  }
3321 
3322  if (blast_ambig)
3323  amb_context->Finish();
3324  } else { // Do not do random disambiguation
3325 
3326  // Pack uLength input bytes into out_seq_data
3327  for(i_out = i_out_begin; i_out != i_out_end; ) {
3328  new_byte =
3329  m_FastNcbi4naNcbi2na->m_Table
3330  [0][static_cast<unsigned char>(*i_in)] |
3331  m_FastNcbi4naNcbi2na->m_Table
3332  [1][static_cast<unsigned char>(*(i_in+1))];
3333  (*i_out) |= ((new_byte & kOneByteMask) >> rbit);
3334  ++i_out;
3335  // Fill part of next byte only if it is necessary, i.e. when
3336  // rbit is not 0.
3337  if (rbit)
3338  (*i_out) = ((new_byte & kOneByteMask) << lbit);
3339  i_in+=2;
3340  }
3341 
3342  // Handle overhang
3343  if(uOverhang > 0) {
3344  new_byte = '\x00';
3345  new_byte |= m_FastNcbi4naNcbi2na->m_Table
3346  [0][static_cast<unsigned char>(*i_in)];
3347 
3348  if(uOverhang == 3)
3349  new_byte |= m_FastNcbi4naNcbi2na->m_Table
3350  [1][static_cast<unsigned char>(*(++i_in))];
3351 
3352  (*i_out) |= ((new_byte & kOneByteMask) >> rbit);
3353  if (2*uOverhang > lbit) {
3354  ++i_out;
3355  (*i_out) = ((new_byte & kOneByteMask) << lbit);
3356  }
3357  }
3358  }
3359 
3360  TSeqPos keepidx = uBeginSav - uBeginIdx;
3361  KeepNcbi2na(out_seq, keepidx, uLenSav);
3362 
3363  return uLenSav;
3364 }
3365 
3366 /*
3367 // Function to convert iupacaa (byte) to ncbieaa (byte)
3368 TSeqPos CSeqportUtil_implementation::MapIupacaaToNcbieaa
3369 (const CSeq_data& in_seq,
3370  CSeq_data* out_seq,
3371  TSeqPos uBeginIdx,
3372  TSeqPos uLength)
3373  const
3374 {
3375  // Get read-only reference to in_seq data
3376  const string& in_seq_data = in_seq.GetIupacaa().Get();
3377 
3378  // Get read & write reference to out_seq data
3379  out_seq->Reset();
3380  string& out_seq_data = out_seq->SetNcbieaa().Set();
3381 
3382  // If uBeginIdx beyond end of in_seq, return
3383  if(uBeginIdx >= in_seq_data.size())
3384  return 0;
3385 
3386  // Adjust uBeginIdx and uLength, if necessary
3387  Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
3388 
3389  // Allocate memory for out_seq
3390  out_seq_data.resize(uLength);
3391 
3392  // Declare iterator for out_seq_data
3393  string::iterator i_out = out_seq_data.begin();
3394 
3395  // Declare iterator for in_seq_data and determine begin and end
3396  string::const_iterator i_in;
3397  string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
3398  string::const_iterator i_in_end = i_in_begin + uLength;
3399 
3400  // Loop through input and convert to output
3401  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
3402  (*(i_out++)) =
3403  m_IupacaaNcbieaa->m_Table[static_cast<unsigned char>(*i_in)];
3404 
3405  return uLength;
3406 }
3407 
3408 
3409 // Function to convert ncbieaa (byte) to iupacaa (byte)
3410 TSeqPos CSeqportUtil_implementation::MapNcbieaaToIupacaa
3411 (const CSeq_data& in_seq,
3412  CSeq_data* out_seq,
3413  TSeqPos uBeginIdx,
3414  TSeqPos uLength)
3415  const
3416 {
3417  // Get read-only reference to in_seq data
3418  const string& in_seq_data = in_seq.GetNcbieaa().Get();
3419 
3420  // Get read & write reference to out_seq data
3421  out_seq->Reset();
3422  string& out_seq_data = out_seq->SetIupacaa().Set();
3423 
3424  // If uBeginIdx beyond end of in_seq, return
3425  if(uBeginIdx >= in_seq_data.size())
3426  return 0;
3427 
3428  // Adjust uBeginIdx and uLength, if necessary
3429  Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
3430 
3431  // Allocate memory for out_seq
3432  out_seq_data.resize(uLength);
3433 
3434  // Declare iterator for out_seq_data
3435  string::iterator i_out = out_seq_data.begin();
3436 
3437  // Declare iterator for in_seq_data and determine begin and end
3438  string::const_iterator i_in;
3439  string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
3440  string::const_iterator i_in_end = i_in_begin + uLength;
3441 
3442  // Loop through input and convert to output
3443  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
3444  (*(i_out++)) =
3445  m_NcbieaaIupacaa->m_Table[static_cast<unsigned char>(*i_in)];
3446 
3447  return uLength;
3448 }
3449 
3450 
3451 // Function to convert iupacaa (byte) to ncbistdaa (byte)
3452 TSeqPos CSeqportUtil_implementation::MapIupacaaToNcbistdaa
3453 (const CSeq_data& in_seq,
3454  CSeq_data* out_seq,
3455  TSeqPos uBeginIdx,
3456  TSeqPos uLength)
3457  const
3458 {
3459  // Get read-only reference to in_seq data
3460  const string& in_seq_data = in_seq.GetIupacaa().Get();
3461 
3462  // Get read & write reference to out_seq data
3463  out_seq->Reset();
3464  vector<char>& out_seq_data = out_seq->SetNcbistdaa().Set();
3465 
3466  // If uBeginIdx beyond end of in_seq, return
3467  if(uBeginIdx >= in_seq_data.size())
3468  return 0;
3469 
3470  // Adjust uBeginIdx and uLength, if necessary
3471  Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
3472 
3473  // Allocate memory for out_seq
3474  out_seq_data.resize(uLength);
3475 
3476  // Declare iterator for out_seq_data
3477  vector<char>::iterator i_out = out_seq_data.begin();
3478 
3479  // Declare iterator for in_seq_data and determine begin and end
3480  string::const_iterator i_in;
3481  string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
3482  string::const_iterator i_in_end = i_in_begin + uLength;
3483 
3484  // Loop through input and convert to output
3485  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
3486  (*(i_out++)) =
3487  m_IupacaaNcbistdaa->m_Table[static_cast<unsigned char>(*i_in)];
3488 
3489  return uLength;
3490 }
3491 
3492 
3493 
3494 
3495 
3496 // Function to convert ncbieaa (byte) to ncbistdaa (byte)
3497 TSeqPos CSeqportUtil_implementation::MapNcbieaaToNcbistdaa
3498 (const CSeq_data& in_seq,
3499  CSeq_data* out_seq,
3500  TSeqPos uBeginIdx,
3501  TSeqPos uLength)
3502  const
3503 {
3504  // Get read-only reference to in_seq data
3505  const string& in_seq_data = in_seq.GetNcbieaa().Get();
3506 
3507  // Get read & write reference to out_seq data
3508  out_seq->Reset();
3509  vector<char>& out_seq_data = out_seq->SetNcbistdaa().Set();
3510 
3511  // If uBeginIdx beyond end of in_seq, return
3512  if(uBeginIdx >= in_seq_data.size())
3513  return 0;
3514 
3515  // Adjust uBeginIdx and uLength, if necessary
3516  Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
3517 
3518  // Allocate memory for out_seq
3519  out_seq_data.resize(uLength);
3520 
3521  // Declare iterator for out_seq_data
3522  vector<char>::iterator i_out = out_seq_data.begin();
3523 
3524  // Declare iterator for in_seq_data and determine begin and end
3525  string::const_iterator i_in;
3526  string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
3527  string::const_iterator i_in_end = i_in_begin + uLength;
3528 
3529  // Loop through input and convert to output
3530  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
3531  (*(i_out++)) =
3532  m_NcbieaaNcbistdaa->m_Table[static_cast<unsigned char>(*i_in)];
3533 
3534  return uLength;
3535 }
3536 
3537 
3538 // Function to convert ncbistdaa (byte) to ncbieaa (byte)
3539 TSeqPos CSeqportUtil_implementation::MapNcbistdaaToNcbieaa
3540 (const CSeq_data& in_seq,
3541  CSeq_data* out_seq,
3542  TSeqPos uBeginIdx,
3543  TSeqPos uLength)
3544  const
3545 {
3546  // Get read-only reference to in_seq data
3547  const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
3548 
3549  // Get read & write reference to out_seq data
3550  out_seq->Reset();
3551  string& out_seq_data = out_seq->SetNcbieaa().Set();
3552 
3553  // If uBeginIdx beyond end of in_seq, return
3554  if(uBeginIdx >= in_seq_data.size())
3555  return 0;
3556 
3557  // Adjust uBeginIdx and uLength if necessary
3558  Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
3559 
3560  // Allocate memory for out_seq
3561  out_seq_data.resize(uLength);
3562 
3563  // Get iterator for out_seq_data
3564  string::iterator i_out = out_seq_data.begin();
3565 
3566  // Declare iterator for in_seq_data and determine begin and end
3567  vector<char>::const_iterator i_in;
3568  vector<char>::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
3569  vector<char>::const_iterator i_in_end = i_in_begin + uLength;
3570 
3571  // Loop through input and convert to output
3572  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
3573  *(i_out++) =
3574  m_NcbistdaaNcbieaa->m_Table[static_cast<unsigned char>(*i_in)];
3575 
3576  return uLength;
3577 }
3578 
3579 
3580 // Function to convert ncbistdaa (byte) to iupacaa (byte)
3581 TSeqPos CSeqportUtil_implementation::MapNcbistdaaToIupacaa
3582 (const CSeq_data& in_seq,
3583  CSeq_data* out_seq,
3584  TSeqPos uBeginIdx,
3585  TSeqPos uLength)
3586  const
3587 {
3588  // Get read-only reference to in_seq data
3589  const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
3590 
3591  // Get read & write reference to out_seq data
3592  out_seq->Reset();
3593  string& out_seq_data = out_seq->SetIupacaa().Set();
3594 
3595  // If uBeginIdx beyond end of in_seq, return
3596  if(uBeginIdx >= in_seq_data.size())
3597  return 0;
3598 
3599  // Adjust uBeginIdx and uLength
3600  Adjust(&uBeginIdx, &uLength, in_seq_data.size(), 1, 1);
3601 
3602  // Allocate memory for out_seq
3603  out_seq_data.resize(uLength);
3604 
3605  // Get iterator for out_seq_data
3606  string::iterator i_out = out_seq_data.begin();
3607 
3608  // Declare iterator for in_seq_data and determine begin and end
3609  vector<char>::const_iterator i_in;
3610  vector<char>::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
3611  vector<char>::const_iterator i_in_end = i_in_begin + uLength;
3612 
3613  // Loop through input and convert to output
3614  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
3615  (*(i_out++)) =
3616  m_NcbistdaaIupacaa->m_Table[static_cast<unsigned char>(*i_in)];
3617 
3618  return uLength;
3619 }
3620 */
3621 
3622 // Fast validation of iupacna sequence
3624 (const CSeq_data& in_seq,
3625  TSeqPos uBeginIdx,
3626  TSeqPos uLength)
3627  const
3628 {
3629  // Get read-only reference to in_seq data
3630  const string& in_seq_data = in_seq.GetIupacna().Get();
3631 
3632  // Check that uBeginIdx is not beyond end of in_seq
3633  if(uBeginIdx >= in_seq_data.size())
3634  return true;
3635 
3636  // Adjust uBeginIdx, uLength
3637  Adjust(&uBeginIdx, &uLength, static_cast<TSeqPos>(in_seq_data.size()),
3638  1, 1);
3639 
3640  // Declare in iterator on in_seq and determine begin and end
3641  string::const_iterator itor;
3642  string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
3643  string::const_iterator e_itor = b_itor + uLength;
3644 
3645  // Perform Fast Validation
3646  unsigned char ch = '\x00';
3647  for(itor = b_itor; itor != e_itor; ++itor)
3648  ch |= m_Iupacna->m_Table[static_cast<unsigned char>(*itor)];
3649 
3650  // Return true if valid, otherwise false
3651  return (ch != 255);
3652 }
3653 
3654 
3656 (const CSeq_data& in_seq,
3657  TSeqPos uBeginIdx,
3658  TSeqPos uLength)
3659  const
3660 {
3661  // Get read-only reference to in_seq data
3662  const string& in_seq_data = in_seq.GetNcbieaa().Get();
3663 
3664  // Check that uBeginIdx is not beyond end of in_seq
3665  if(uBeginIdx >= in_seq_data.size())
3666  return true;
3667 
3668  // Adjust uBeginIdx, uLength
3669  Adjust(&uBeginIdx, &uLength, static_cast<TSeqPos>(in_seq_data.size()),
3670  1, 1);
3671 
3672  // Declare in iterator on in_seq and determine begin and end
3673  string::const_iterator itor;
3674  string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
3675  string::const_iterator e_itor = b_itor + uLength;
3676 
3677  // Perform Fast Validation
3678  unsigned char ch = '\x00';
3679  for(itor = b_itor; itor != e_itor; ++itor)
3680  ch |= m_Ncbieaa->m_Table[static_cast<unsigned char>(*itor)];
3681 
3682  // Return true if valid, otherwise false
3683  return (ch != 255);
3684 
3685 }
3686 
3687 
3689 (const CSeq_data& in_seq,
3690  TSeqPos uBeginIdx,
3691  TSeqPos uLength)
3692  const
3693 {
3694  // Get read-only reference to in_seq data
3695  const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
3696 
3697  // Check that uBeginIdx is not beyond end of in_seq
3698  if(uBeginIdx >= in_seq_data.size())
3699  return true;
3700 
3701  // Adjust uBeginIdx, uLength
3702  Adjust(&uBeginIdx, &uLength, static_cast<TSeqPos>(in_seq_data.size()),
3703  1, 1);
3704 
3705  // Declare in iterator on in_seq and determine begin and end
3706  vector<char>::const_iterator itor;
3707  vector<char>::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
3708  vector<char>::const_iterator e_itor = b_itor + uLength;
3709 
3710  // Perform Fast Validation
3711  unsigned char ch = '\x00';
3712  for(itor = b_itor; itor != e_itor; ++itor)
3713  ch |= m_Ncbistdaa->m_Table[static_cast<unsigned char>(*itor)];
3714 
3715  // Return true if valid, otherwise false
3716  return (ch != 255);
3717 
3718 }
3719 
3720 
3722 (const CSeq_data& in_seq,
3723  TSeqPos uBeginIdx,
3724  TSeqPos uLength)
3725  const
3726 {
3727  // Get read-only reference to in_seq data
3728  const string& in_seq_data = in_seq.GetIupacaa().Get();
3729 
3730  // Check that uBeginIdx is not beyond end of in_seq
3731  if(uBeginIdx >= in_seq_data.size())
3732  return true;
3733 
3734  // Adjust uBeginIdx, uLength
3735  Adjust(&uBeginIdx, &uLength, static_cast<TSeqPos>(in_seq_data.size()),
3736  1, 1);
3737 
3738  // Declare in iterator on in_seq and determine begin and end
3739  string::const_iterator itor;
3740  string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
3741  string::const_iterator e_itor = b_itor + uLength;
3742 
3743  // Perform Fast Validation
3744  unsigned char ch = '\x00';
3745  for(itor=b_itor; itor!=e_itor; ++itor)
3746  ch |= m_Iupacaa->m_Table[static_cast<unsigned char>(*itor)];
3747 
3748  // Return true if valid, otherwise false
3749  return (ch != 255);
3750 }
3751 
3752 
3754 (const CSeq_data& in_seq,
3755  vector<TSeqPos>* badIdx,
3756  TSeqPos uBeginIdx,
3757  TSeqPos uLength)
3758  const
3759 {
3760  // Get read-only reference to in_seq data
3761  const string& in_seq_data = in_seq.GetIupacna().Get();
3762 
3763  // clear out_indices
3764  badIdx->clear();
3765 
3766  // Check that uBeginIdx is not beyond end of in_seq
3767  if(uBeginIdx >= in_seq_data.size())
3768  return;
3769 
3770  // Adjust uBeginIdx, uLength
3771  Adjust(&uBeginIdx, &uLength, static_cast<TSeqPos>(in_seq_data.size()),
3772  1, 1);
3773 
3774  // Declare in iterator on in_seq and determine begin and end
3775  string::const_iterator itor;
3776  string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
3777  string::const_iterator e_itor = b_itor + uLength;
3778 
3779  // Perform Validation
3780  TSeqPos nIdx = uBeginIdx;
3781  for(itor = b_itor; itor != e_itor; ++itor)
3782  if(m_Iupacna->m_Table[static_cast<unsigned char>(*itor)] == char(255))
3783  badIdx->push_back(nIdx++);
3784  else
3785  nIdx++;
3786 
3787  // Return list of bad indices
3788  return;
3789 }
3790 
3791 
3793 (const CSeq_data& in_seq,
3794  vector<TSeqPos>* badIdx,
3795  TSeqPos uBeginIdx,
3796  TSeqPos uLength)
3797  const
3798 {
3799  // Get read-only reference to in_seq data
3800  const string& in_seq_data = in_seq.GetNcbieaa().Get();
3801 
3802  // clear badIdx
3803  badIdx->clear();
3804 
3805  // Check that uBeginIdx is not beyond end of in_seq
3806  if(uBeginIdx >= in_seq_data.size())
3807  return;
3808 
3809  // Adjust uBeginIdx, uLength
3810  Adjust(&uBeginIdx, &uLength, static_cast<TSeqPos>(in_seq_data.size()),
3811  1, 1);
3812 
3813  // Declare in iterator on in_seq and determine begin and end
3814  string::const_iterator itor;
3815  string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
3816  string::const_iterator e_itor = b_itor + uLength;
3817 
3818  // Perform Validation
3819  TSeqPos nIdx = uBeginIdx;
3820  for(itor = b_itor; itor != e_itor; ++itor)
3821  if(m_Ncbieaa->m_Table[static_cast<unsigned char>(*itor)] == char(255))
3822  badIdx->push_back(nIdx++);
3823  else
3824  nIdx++;
3825 
3826  // Return vector of bad indices
3827  return;
3828 }
3829 
3830 
3832 (const CSeq_data& in_seq,
3833  vector<TSeqPos>* badIdx,
3834  TSeqPos uBeginIdx,
3835  TSeqPos uLength)
3836  const
3837 {
3838  // Get read-only reference to in_seq data
3839  const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
3840 
3841  // Create a vector to return
3842  badIdx->clear();
3843 
3844  // Check that uBeginIdx is not beyond end of in_seq
3845  if(uBeginIdx >= in_seq_data.size())
3846  return;
3847 
3848  // Adjust uBeginIdx, uLength
3849  Adjust(&uBeginIdx, &uLength, static_cast<TSeqPos>(in_seq_data.size()),
3850  1, 1);
3851 
3852  // Declare in iterator on in_seq and determine begin and end
3853  vector<char>::const_iterator itor;
3854  vector<char>::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
3855  vector<char>::const_iterator e_itor = b_itor + uLength;
3856 
3857  // Perform Validation
3858  TSeqPos nIdx = uBeginIdx;
3859  for(itor=b_itor; itor!=e_itor; ++itor)
3860  if(m_Ncbistdaa->m_Table[static_cast<unsigned char>(*itor)]==char(255))
3861  badIdx->push_back(nIdx++);
3862  else
3863  nIdx++;
3864 
3865  // Return vector of bad indices
3866  return;
3867 }
3868 
3869 
3871 (const CSeq_data& in_seq,
3872  vector<TSeqPos>* badIdx,
3873  TSeqPos uBeginIdx,
3874  TSeqPos uLength)
3875  const
3876 {
3877  // Get read-only reference to in_seq data
3878  const string& in_seq_data = in_seq.GetIupacaa().Get();
3879 
3880  // Create a vector to return
3881  badIdx->clear();
3882 
3883  // Check that uBeginIdx is not beyond end of in_seq
3884  if(uBeginIdx >= in_seq_data.size())
3885  return;
3886 
3887  // Adjust uBeginIdx, uLength
3888  Adjust(&uBeginIdx, &uLength, static_cast<TSeqPos>(in_seq_data.size()),
3889  1, 1);
3890 
3891  // Declare in iterator on in_seq and determine begin and end
3892  string::const_iterator itor;
3893  string::const_iterator b_itor = in_seq_data.begin() + uBeginIdx;
3894  string::const_iterator e_itor = b_itor + uLength;
3895 
3896  // Perform Validation
3897  TSeqPos nIdx = uBeginIdx;
3898  for(itor=b_itor; itor!=e_itor; ++itor)
3899  if(m_Iupacaa->m_Table[static_cast<unsigned char>(*itor)] == char(255))
3900  badIdx->push_back(nIdx++);
3901  else
3902  nIdx++;
3903 
3904  // Return vector of bad indices
3905  return;
3906 }
3907 
3908 
3909 // Function to make copy of ncbi2na type sequences
3911 (const CSeq_data& in_seq,
3912  CSeq_data* out_seq,
3913  TSeqPos uBeginIdx,
3914  TSeqPos uLength)
3915  const
3916 {
3917  // Get reference to out_seq data
3918  out_seq->Reset();
3919  vector<char>& out_seq_data = out_seq->SetNcbi2na().Set();
3920 
3921  // Get reference to in_seq data
3922  const vector<char>& in_seq_data = in_seq.GetNcbi2na().Get();
3923 
3924  // Return if uBeginIdx is after end of in_seq
3925  if(uBeginIdx >= 4 * in_seq_data.size())
3926  return 0;
3927 
3928  // Set uLength to actual valid length in out_seq
3929  if( (uLength ==0) || ((uBeginIdx + uLength) > (4*in_seq_data.size() )) )
3930  uLength = 4 * static_cast<TSeqPos>(in_seq_data.size()) - uBeginIdx;
3931 
3932  // Allocate memory for out_seq data
3933  if((uLength % 4) == 0)
3934  out_seq_data.resize(uLength/4);
3935  else
3936  out_seq_data.resize(uLength/4 + 1);
3937 
3938  // Get iterator on out_seq_data
3939  vector<char>::iterator i_out = out_seq_data.begin() - 1;
3940 
3941  // Calculate amounts to shift bits
3942  unsigned int lShift, rShift;
3943  lShift = 2*(uBeginIdx % 4);
3944  rShift = 8 - lShift;
3945 
3946  // Get interators on in_seq
3947  vector<char>::const_iterator i_in;
3948  vector<char>::const_iterator i_in_begin =
3949  in_seq_data.begin() + uBeginIdx/4;
3950 
3951  // Determine number of input bytes to process
3952  SIZE_TYPE uNumBytes = uLength/4;
3953  if((uLength % 4) != 0)
3954  ++uNumBytes;
3955 
3956  // Prevent access beyond end of in_seq_data
3957  bool bDoLastByte = false;
3958  if((uBeginIdx/4 + uNumBytes) >= in_seq_data.size())
3959  {
3960  uNumBytes = in_seq_data.size() - uBeginIdx/4 - 1;
3961  bDoLastByte = true;
3962  }
3963  vector<char>::const_iterator i_in_end = i_in_begin + uNumBytes;
3964 
3965  // Loop through input sequence and copy to output sequence
3966  if(lShift > 0)
3967  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
3968  (*(++i_out)) =
3969  ((*i_in) << lShift) | (((*(i_in+1)) & 255) >> rShift);
3970  else
3971  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
3972  (*(++i_out)) = (*i_in);
3973 
3974  // Handle last input byte if necessary
3975  if(bDoLastByte)
3976  (*(++i_out)) = (*i_in) << lShift;
3977 
3978  return uLength;
3979 }
3980 
3981 
3982 // Function to make copy of ncbi4na type sequences
3984 (const CSeq_data& in_seq,
3985  CSeq_data* out_seq,
3986  TSeqPos uBeginIdx,
3987  TSeqPos uLength)
3988  const
3989 {
3990  // Get reference to out_seq data
3991  out_seq->Reset();
3992  vector<char>& out_seq_data = out_seq->SetNcbi4na().Set();
3993 
3994  // Get reference to in_seq data
3995  const vector<char>& in_seq_data = in_seq.GetNcbi4na().Get();
3996 
3997  // Return if uBeginIdx is after end of in_seq
3998  if(uBeginIdx >= 2 * in_seq_data.size())
3999  return 0;
4000 
4001  // Set uLength to actual valid length in out_seq
4002  if( (uLength ==0) || ((uBeginIdx + uLength) > (2*in_seq_data.size() )) )
4003  uLength = 2 * static_cast<TSeqPos>(in_seq_data.size()) - uBeginIdx;
4004 
4005  // Allocate memory for out_seq data
4006  if((uLength % 2) == 0)
4007  out_seq_data.resize(uLength/2);
4008  else
4009  out_seq_data.resize(uLength/2 + 1);
4010 
4011 
4012  // Get iterator on out_seq_data
4013  vector<char>::iterator i_out = out_seq_data.begin() - 1;
4014 
4015  // Calculate amounts to shift bits
4016  unsigned int lShift, rShift;
4017  lShift = 4*(uBeginIdx % 2);
4018  rShift = 8 - lShift;
4019 
4020  // Get interators on in_seq
4021  vector<char>::const_iterator i_in;
4022  vector<char>::const_iterator i_in_begin =
4023  in_seq_data.begin() + uBeginIdx/2;
4024 
4025  // Determine number of input bytes to process
4026  SIZE_TYPE uNumBytes = uLength/2;
4027  if((uLength % 2) != 0)
4028  ++uNumBytes;
4029 
4030  // Prevent access beyond end of in_seq_data
4031  bool bDoLastByte = false;
4032  if((uBeginIdx/2 + uNumBytes) >= in_seq_data.size())
4033  {
4034  uNumBytes = in_seq_data.size() - uBeginIdx/2 - 1;
4035  bDoLastByte = true;
4036  }
4037  vector<char>::const_iterator i_in_end = i_in_begin + uNumBytes;
4038 
4039  // Loop through input sequence and copy to output sequence
4040  if(lShift > 0)
4041  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
4042  (*(++i_out)) =
4043  ((*i_in) << lShift) | (((*(i_in+1)) & 255) >> rShift);
4044  else
4045  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
4046  (*(++i_out)) = (*i_in);
4047 
4048  // Handle last input byte
4049  if(bDoLastByte)
4050  (*(++i_out)) = (*i_in) << lShift;
4051 
4052  return uLength;
4053 }
4054 
4055 
4056 // Function to make copy of iupacna type sequences
4058 (const CSeq_data& in_seq,
4059  CSeq_data* out_seq,
4060  TSeqPos uBeginIdx,
4061  TSeqPos uLength)
4062  const
4063 {
4064  // Get reference to out_seq data
4065  out_seq->Reset();
4066  string& out_seq_data = out_seq->SetIupacna().Set();
4067 
4068  // Get reference to in_seq data
4069  const string& in_seq_data = in_seq.GetIupacna().Get();
4070 
4071  // Return if uBeginIdx is after end of in_seq
4072  if(uBeginIdx >= in_seq_data.size())
4073  return 0;
4074 
4075  // Set uLength to actual valid length in out_seq
4076  if( (uLength ==0) || ((uBeginIdx + uLength) > (in_seq_data.size() )) )
4077  uLength = static_cast<TSeqPos>(in_seq_data.size()) - uBeginIdx;
4078 
4079  // Allocate memory for out_seq data
4080  out_seq_data.resize(uLength);
4081 
4082  // Get iterator on out_seq_data
4083  string::iterator i_out = out_seq_data.begin() - 1;
4084 
4085  // Get interators on in_seq
4086  string::const_iterator i_in;
4087  string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
4088  string::const_iterator i_in_end = i_in_begin + uLength;
4089 
4090  // Loop through input sequence and copy to output sequence
4091  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
4092  (*(++i_out)) = (*i_in);
4093 
4094  return uLength;
4095 }
4096 
4097 
4098 // Function to make copy of ncbieaa type sequences
4100 (const CSeq_data& in_seq,
4101  CSeq_data* out_seq,
4102  TSeqPos uBeginIdx,
4103  TSeqPos uLength)
4104  const
4105 {
4106  // Get reference to out_seq data
4107  out_seq->Reset();
4108  string& out_seq_data = out_seq->SetNcbieaa().Set();
4109 
4110  // Get reference to in_seq data
4111  const string& in_seq_data = in_seq.GetNcbieaa().Get();
4112 
4113  // Return if uBeginIdx is after end of in_seq
4114  if(uBeginIdx >= in_seq_data.size())
4115  return 0;
4116 
4117  // Set uLength to actual valid length in out_seq
4118  if( (uLength ==0) || ((uBeginIdx + uLength) > (in_seq_data.size() )) )
4119  uLength = static_cast<TSeqPos>(in_seq_data.size()) - uBeginIdx;
4120 
4121  // Allocate memory for out_seq data
4122  out_seq_data.resize(uLength);
4123 
4124  // Get iterator on out_seq_data
4125  string::iterator i_out = out_seq_data.begin() - 1;
4126 
4127  // Get interators on in_seq
4128  string::const_iterator i_in;
4129  string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
4130  string::const_iterator i_in_end = i_in_begin + uLength;
4131 
4132  // Loop through input sequence and copy to output sequence
4133  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
4134  (*(++i_out)) = (*i_in);
4135 
4136  return uLength;
4137 }
4138 
4139 
4140 // Function to make copy of ncbistdaa type sequences
4142 (const CSeq_data& in_seq,
4143  CSeq_data* out_seq,
4144  TSeqPos uBeginIdx,
4145  TSeqPos uLength)
4146  const
4147 {
4148  // Get reference to out_seq data
4149  out_seq->Reset();
4150  vector<char>& out_seq_data = out_seq->SetNcbistdaa().Set();
4151 
4152  // Get reference to in_seq data
4153  const vector<char>& in_seq_data = in_seq.GetNcbistdaa().Get();
4154 
4155  // Return if uBeginIdx is after end of in_seq
4156  if(uBeginIdx >= in_seq_data.size())
4157  return 0;
4158 
4159  // Set uLength to actual valid length in out_seq
4160  if( (uLength ==0) || ((uBeginIdx + uLength) > (in_seq_data.size() )) )
4161  uLength = static_cast<TSeqPos>(in_seq_data.size()) - uBeginIdx;
4162 
4163  // Allocate memory for out_seq data
4164  out_seq_data.resize(uLength);
4165 
4166  // Get iterator on out_seq_data
4167  vector<char>::iterator i_out = out_seq_data.begin() - 1;
4168 
4169  // Get interators on in_seq
4170  vector<char>::const_iterator i_in;
4171  vector<char>::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
4172  vector<char>::const_iterator i_in_end = i_in_begin + uLength;
4173 
4174  // Loop through input sequence and copy to output sequence
4175  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
4176  (*(++i_out)) = (*i_in);
4177 
4178  return uLength;
4179 }
4180 
4181 
4182 // Function to make copy of iupacaa type sequences
4184 (const CSeq_data& in_seq,
4185  CSeq_data* out_seq,
4186  TSeqPos uBeginIdx,
4187  TSeqPos uLength)
4188  const
4189 {
4190  // Get reference to out_seq data
4191  out_seq->Reset();
4192  string& out_seq_data = out_seq->SetIupacaa().Set();
4193 
4194  // Get reference to in_seq data
4195  const string& in_seq_data = in_seq.GetIupacaa().Get();
4196 
4197  // Return if uBeginIdx is after end of in_seq
4198  if(uBeginIdx >= in_seq_data.size())
4199  return 0;
4200 
4201  // Set uLength to actual valid length in out_seq
4202  if( (uLength ==0) || ((uBeginIdx + uLength) > (in_seq_data.size() )) )
4203  uLength = static_cast<TSeqPos>(in_seq_data.size()) - uBeginIdx;
4204 
4205  // Allocate memory for out_seq data
4206  out_seq_data.resize(uLength);
4207 
4208  // Get iterator on out_seq_data
4209  string::iterator i_out = out_seq_data.begin() - 1;
4210 
4211  // Get interators on in_seq
4212  string::const_iterator i_in;
4213  string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
4214  string::const_iterator i_in_end = i_in_begin + uLength;
4215 
4216  // Loop through input sequence and copy to output sequence
4217  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
4218  (*(++i_out)) = (*i_in);
4219 
4220  return uLength;
4221 }
4222 
4223 
4224 // Function to adjust uBeginIdx to lie on an in_seq byte boundary
4225 // and uLength to lie on on an out_seq byte boundary. Returns
4226 // overhang
4228 (TSeqPos* uBeginIdx,
4229  TSeqPos* uLength,
4230  TSeqPos uInSeqBytes,
4231  TSeqPos uInSeqsPerByte,
4232  TSeqPos uOutSeqsPerByte)
4233  const
4234 {
4235  // Adjust uBeginIdx and uLength to acceptable values
4236 
4237  // If uLength = 0, assume convert to end of sequence
4238  if(*uLength == 0)
4239  *uLength = uInSeqsPerByte * uInSeqBytes;
4240 
4241  // Ensure that uBeginIdx does not start at or after end of in_seq_data
4242  if(*uBeginIdx >= uInSeqsPerByte * uInSeqBytes)
4243  *uBeginIdx = uInSeqsPerByte * uInSeqBytes - uInSeqsPerByte;
4244 
4245  // Ensure that uBeginIdx is a multiple of uInSeqsPerByte and adjust uLength
4246  *uLength += *uBeginIdx % uInSeqsPerByte;
4247  *uBeginIdx = uInSeqsPerByte * (*uBeginIdx/uInSeqsPerByte);
4248 
4249  // Adjust uLength so as not to go beyond end of in_seq_data
4250  if(*uLength > uInSeqsPerByte * uInSeqBytes - *uBeginIdx)
4251  *uLength = uInSeqsPerByte * uInSeqBytes - *uBeginIdx;
4252 
4253  // Adjust uLength down to multiple of uOutSeqsPerByte
4254  // and calculate overhang (overhang handled separately at end)
4255  TSeqPos uOverhang = *uLength % uOutSeqsPerByte;
4256  *uLength = uOutSeqsPerByte * (*uLength / uOutSeqsPerByte);
4257 
4258  return uOverhang;
4259 
4260 }
4261 
4262 
4263 // Loops through an ncbi4na input sequence and determines
4264 // the ambiguities that would result from conversion to an ncbi2na sequence
4265 // On return, out_seq contains the ncbi4na bases that become ambiguous and
4266 // out_indices contains the indices of the abiguous bases in in_seq
4268 (const CSeq_data& in_seq,
4269  CSeq_data* out_seq,
4270  vector<TSeqPos>* out_indices,
4271  TSeqPos uBeginIdx,
4272  TSeqPos uLength)
4273  const
4274 {
4275  // Get read-only reference to in_seq data
4276  const vector<char>& in_seq_data = in_seq.GetNcbi4na().Get();
4277 
4278  // Get read & write reference to out_seq data
4279  out_seq->Reset();
4280  vector<char>& out_seq_data = out_seq->SetNcbi4na().Set();
4281 
4282  // Adjust uBeginIdx and uLength, if necessary
4283  if(uBeginIdx >= 2*in_seq_data.size())
4284  return 0;
4285 
4286  if((uLength == 0) || (((uBeginIdx + uLength) > 2*in_seq_data.size())))
4287  uLength = 2 * static_cast<TSeqPos>(in_seq_data.size()) - uBeginIdx;
4288 
4289  // Save uBeginIdx and adjust uBeginIdx = 0 mod 2
4290  TSeqPos uBeginSav = uBeginIdx;
4291  TSeqPos uLenSav = uLength;
4292  uLength += uBeginIdx % 2;
4293  uBeginIdx = 2*(uBeginIdx/2);
4294 
4295  // Allocate memory for out_seq_data and out_indices
4296  // Note, these will be shrunk at the end to correspond
4297  // to actual memory needed. Note, in test cases, over 50% of the
4298  // time spent in this method is spent in the next two
4299  // statements and 3/4 of that is spent in the second statement.
4300  out_seq_data.resize(uLength/2 + (uLength % 2));
4301  out_indices->resize(uLength);
4302 
4303  // Variable to track number of ambigs
4304  TSeqPos uNumAmbigs = 0;
4305 
4306  // Get iterators to input sequence
4307  vector<char>::const_iterator i_in;
4308  vector<char>::const_iterator i_in_begin =
4309  in_seq_data.begin() + uBeginIdx/2;
4310  vector<char>::const_iterator i_in_end =
4311  i_in_begin + uLength/2 + (uLength % 2);
4312 
4313  // Get iterators to out_seq_data and out_indices
4314  vector<char>::iterator i_out_seq = out_seq_data.begin();
4315  vector<TSeqPos>::iterator i_out_idx = out_indices->begin();
4316 
4317  // Index of current input seq base
4318  TSeqPos uIdx = uBeginIdx;
4319 
4320  // Loop through input sequence looking for ambiguities
4321  for(i_in = i_in_begin; i_in != i_in_end; ++i_in) {
4322  switch (m_DetectAmbigNcbi4naNcbi2na->m_Table
4323  [static_cast<unsigned char>(*i_in)]) {
4324 
4325  case 1: // Low order input nible ambiguous
4326 
4327  // Put low order input nible in low order output nible
4328  if(uNumAmbigs & 1)
4329  {
4330  (*i_out_seq) |= (*i_in) & '\x0f';
4331  ++i_out_seq;
4332  }
4333 
4334  // Put low order input nible in high order output nible
4335  else
4336  (*i_out_seq) = (*i_in) << 4;
4337 
4338  // Record input index that was ambiguous
4339  (*i_out_idx) = uIdx + 1;
4340  ++i_out_idx;
4341 
4342  // Increment number of ambiguities
4343  uNumAmbigs++;
4344  break;
4345 
4346  case 2: // High order input nible ambiguous
4347 
4348  // Put high order input nible in low order output nible
4349  if(uNumAmbigs & 1)
4350  {
4351  (*i_out_seq) |= ((*i_in) >> 4) & '\x0f';
4352  ++i_out_seq;
4353  }
4354 
4355  // Put high order input nible in high order output nible
4356  else
4357  (*i_out_seq) = (*i_in) & '\xf0';
4358 
4359  // Record input index that was ambiguous
4360  (*i_out_idx) = uIdx;
4361  ++i_out_idx;
4362 
4363  // Increment number of ambiguities
4364  uNumAmbigs++;
4365  break;
4366 
4367  case 3: // Both input nibles ambiguous
4368 
4369  // Put high order input nible in low order
4370  // output nible, move to the next output byte
4371  // and put the low order input nibble in the
4372  // high order output nible.
4373  if(uNumAmbigs & 1)
4374  {
4375  (*i_out_seq) |= ((*i_in) >> 4) & '\x0f';
4376  (*(++i_out_seq)) = (*i_in) << 4;
4377  }
4378 
4379  // Put high order input nible in high order
4380  // output nible, put low order input nible
4381  // in low order output nible, and move to
4382  // next output byte
4383  else
4384  {
4385  (*i_out_seq) = (*i_in);
4386  ++i_out_seq;
4387  }
4388 
4389  // Record indices that were ambiguous
4390  (*i_out_idx) = uIdx;
4391  (*(++i_out_idx)) = uIdx + 1;
4392  ++i_out_idx;
4393 
4394  // Increment the number of ambiguities
4395  uNumAmbigs+=2;
4396  break;
4397  }
4398 
4399  // Increment next input byte.
4400  uIdx += 2;
4401  }
4402 
4403  // Shrink out_seq_data and out_indices to actual sizes needed
4404  out_indices->resize(uNumAmbigs);
4405  out_seq_data.resize(uNumAmbigs/2 + uNumAmbigs % 2);
4406 
4407  // Check to ensure that ambigs outside of requested range are not included
4408  TSeqPos uKeepBeg = 0;
4409  TSeqPos uKeepLen = 0;
4410  if((*out_indices)[0] < uBeginSav)
4411  {
4412  uKeepBeg = 1;
4413  out_indices->erase(out_indices->begin(), out_indices->begin() + 1);
4414  }
4415 
4416  if((*out_indices)[out_indices->size()-1] >= uBeginSav + uLenSav)
4417  {
4418  out_indices->pop_back();
4419  uKeepLen = static_cast<TSeqPos>(out_indices->size());
4420  }
4421 
4422  if((uKeepBeg != 0) || (uKeepLen != 0))
4423  uNumAmbigs = KeepNcbi4na(out_seq, uKeepBeg, uKeepLen);
4424 
4425  return uNumAmbigs;
4426 }
4427 
4428 
4429 // Loops through an iupacna input sequence and determines
4430 // the ambiguities that would result from conversion to an ncbi2na sequence.
4431 // On return, out_seq contains the iupacna bases that become ambiguous and
4432 // out_indices contains the indices of the abiguous bases in in_seq. The
4433 // return is the number of ambiguities found.
4435 (const CSeq_data& in_seq,
4436  CSeq_data* out_seq,
4437  vector<TSeqPos>* out_indices,
4438  TSeqPos uBeginIdx,
4439  TSeqPos uLength)
4440  const
4441 {
4442  // Get read-only reference to in_seq data
4443  const string& in_seq_data = in_seq.GetIupacna().Get();
4444 
4445  // Get read & write reference to out_seq data
4446  out_seq->Reset();
4447  string& out_seq_data = out_seq->SetIupacna().Set();
4448 
4449  // Validate/adjust uBeginIdx and uLength
4450  if(uBeginIdx >= in_seq_data.size())
4451  return 0;
4452 
4453  if((uLength == 0) || ((uBeginIdx + uLength) > in_seq_data.size()))
4454  uLength = static_cast<TSeqPos>(in_seq_data.size()) - uBeginIdx;
4455 
4456  // Allocate memory for out_seq_data and out_indices
4457  // Note, these will be shrunk at the end to correspond
4458  // to actual memory needed.
4459  out_seq_data.resize(uLength);
4460  out_indices->resize(uLength);
4461 
4462  // Variable to track number of ambigs
4463  TSeqPos uNumAmbigs = 0;
4464 
4465  // Get iterators to input sequence
4466  string::const_iterator i_in;
4467  string::const_iterator i_in_begin = in_seq_data.begin() + uBeginIdx;
4468  string::const_iterator i_in_end = i_in_begin + uLength;
4469 
4470  // Get iterators to out_seq_data and out_indices
4471  string::iterator i_out_seq = out_seq_data.begin();
4472  vector<TSeqPos>::iterator i_out_idx = out_indices->begin();
4473 
4474  // Index of current input seq base
4475  TSeqPos uIdx = uBeginIdx;
4476 
4477  // Loop through input sequence looking for ambiguities
4478  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
4479  {
4480  if(m_DetectAmbigIupacnaNcbi2na->m_Table
4481  [static_cast<unsigned char>(*i_in)] == 1)
4482  {
4483  (*i_out_seq) = (*i_in);
4484  ++i_out_seq;
4485  (*i_out_idx) = uIdx;
4486  ++i_out_idx;
4487  ++uNumAmbigs;
4488  }
4489 
4490  ++uIdx;
4491  }
4492 
4493  out_seq_data.resize(uNumAmbigs);
4494  out_indices->resize(uNumAmbigs);
4495 
4496  return uNumAmbigs;
4497 }
4498 
4499 
4500 // Method to implement Keep for Ncbi2na. Returns length of
4501 // kept sequence
4503 (CSeq_data* in_seq,
4504  TSeqPos uBeginIdx,
4505  TSeqPos uLength)
4506  const
4507 {
4508  // Get a reference to in_seq
4509  vector<char>& in_seq_data = in_seq->SetNcbi2na().Set();
4510  TSeqPos in_seq_length = static_cast<TSeqPos>(in_seq_data.size());
4511 
4512  // If uBeginIdx past the end of in_seq, return empty in_seq
4513  if(uBeginIdx >= in_seq_data.size()*4)
4514  {
4515  in_seq_data.clear();
4516  return 0;
4517  }
4518 
4519  // If uLength == 0, Keep from uBeginIdx to end of in_seq
4520  if(uLength == 0)
4521  uLength = 4 * in_seq_length - uBeginIdx;
4522 
4523 
4524  // If uLength goes beyond the end of the sequence, trim
4525  // it back to the end of the sequence
4526  if(uLength > (4*in_seq_data.size() - uBeginIdx))
4527  uLength = 4 * in_seq_length - uBeginIdx;
4528 
4529  // If entire sequence is being requested, just return
4530  if((uBeginIdx == 0) && (uLength >= 4*in_seq_data.size()))
4531  return uLength;
4532 
4533  // Determine index in in_seq_data that holds uBeginIdx residue
4534  TSeqPos uStart = uBeginIdx/4;
4535 
4536  // Determine index within start byte
4537  TSeqPos uStartInByte = 2 * (uBeginIdx % 4);
4538 
4539  // Calculate masks
4540  unsigned char rightMask = 0xff << uStartInByte;
4541  unsigned char leftMask = ~rightMask;
4542 
4543  // Determine index in in_seq_data that holds uBeginIdx + uLength
4544  // residue
4545  TSeqPos uEnd = (uBeginIdx + uLength - 1)/4;
4546 
4547  // Get iterator for writting
4548  vector<char>::iterator i_write;
4549 
4550  // Determine begin and end of read
4551  vector<char>::iterator i_read = in_seq_data.begin() + uStart;
4552  vector<char>::iterator i_read_end = in_seq_data.begin() + uEnd;
4553 
4554  // Loop through in_seq_data and copy data of desire
4555  // sub sequence to begining of in_seq_data
4556  for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
4557  (*i_write) = (((*i_read) << uStartInByte) | leftMask) &
4558  (((*(i_read+1)) >> (8-uStartInByte)) | rightMask);
4559  ++i_read;
4560  }
4561 
4562  // Handle last byte
4563  (*i_write) = (*i_read) << uStartInByte;
4564 
4565  // Shrink in_seq to to size needed
4566  TSeqPos uSize = uLength/4;
4567  if((uLength % 4) != 0)
4568  uSize++;
4569  in_seq_data.resize(uSize);
4570 
4571  return uLength;
4572 }
4573 
4574 
4575 // Method to implement Keep for Ncbi4na. Returns length of
4576 // kept sequence.
4578 (CSeq_data* in_seq,
4579  TSeqPos uBeginIdx,
4580  TSeqPos uLength)
4581  const
4582 {
4583  // Get a reference to in_seq
4584  vector<char>& in_seq_data = in_seq->SetNcbi4na().Set();
4585  TSeqPos in_seq_length = static_cast<TSeqPos>(in_seq_data.size());
4586 
4587  // If uBeginIdx past the end of in_seq, return empty in_seq
4588  if(uBeginIdx >= in_seq_data.size()*2)
4589  {
4590  in_seq_data.clear();
4591  return 0;
4592  }
4593 
4594  // If uLength == 0, Keep from uBeginIdx to end of in_seq
4595  if(uLength == 0)
4596  uLength = 2 * in_seq_length - uBeginIdx;
4597 
4598 
4599  // If uLength goes beyond the end of the sequence, trim
4600  // it back to the end of the sequence
4601  if(uLength > (2*in_seq_data.size() - uBeginIdx))
4602  uLength = 2 * in_seq_length - uBeginIdx;
4603 
4604  // If entire sequence is being requested, just return
4605  if((uBeginIdx == 0) && (uLength >= 2*in_seq_data.size()))
4606  return uLength;
4607 
4608  // Determine index in in_seq_data that holds uBeginIdx residue
4609  TSeqPos uStart = uBeginIdx/2;
4610 
4611  // Determine index within start byte
4612  unsigned int uStartInByte = 4 * (uBeginIdx % 2);
4613 
4614  // Calculate masks
4615  unsigned char rightMask = 0xff << uStartInByte;
4616  unsigned char leftMask = ~rightMask;
4617 
4618  // Determine index in in_seq_data that holds uBeginIdx + uLength
4619  // residue
4620  TSeqPos uEnd = (uBeginIdx + uLength - 1)/2;
4621 
4622  // Get iterator for writting
4623  vector<char>::iterator i_write;
4624 
4625  // Determine begin and end of read
4626  vector<char>::iterator i_read = in_seq_data.begin() + uStart;
4627  vector<char>::iterator i_read_end = in_seq_data.begin() + uEnd;
4628 
4629  // Loop through in_seq_data and copy data of desire
4630  // sub sequence to begining of in_seq_data
4631  for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
4632  (*i_write) = (((*i_read) << uStartInByte) | leftMask) &
4633  (((*(i_read+1)) >> (8-uStartInByte)) | rightMask);
4634  ++i_read;
4635  }
4636 
4637  // Handle last byte
4638  (*i_write) = (*i_read) << uStartInByte;
4639 
4640  // Shrink in_seq to to size needed
4641  TSeqPos uSize = uLength/2;
4642  if((uLength % 2) != 0)
4643  uSize++;
4644  in_seq_data.resize(uSize);
4645 
4646  return uLength;
4647 }
4648 
4649 
4650 // Method to implement Keep for Iupacna. Return length
4651 // of kept sequence
4653 (CSeq_data* in_seq,
4654  TSeqPos uBeginIdx,
4655  TSeqPos uLength)
4656  const
4657 {
4658  // Get a reference to in_seq
4659  string& in_seq_data = in_seq->SetIupacna().Set();
4660  TSeqPos in_seq_length = static_cast<TSeqPos>(in_seq_data.size());
4661 
4662 
4663  // If uBeginIdx past end of in_seq, return empty in_seq
4664  if(uBeginIdx >= in_seq_data.size())
4665  {
4666  in_seq_data.erase();
4667  return 0;
4668  }
4669 
4670  // If uLength is 0, Keep from uBeginIdx to end of in_seq
4671  if(uLength == 0)
4672  uLength = in_seq_length - uBeginIdx;
4673 
4674  // Check that uLength does not go beyond end of in_seq
4675  if((uBeginIdx + uLength) > in_seq_data.size())
4676  uLength = in_seq_length - uBeginIdx;
4677 
4678  // If uBeginIdx == 0 and uLength == in_seq_data.size()
4679  // just return as the entire sequence is being requested
4680  if((uBeginIdx == 0) && (uLength >= in_seq_data.size()))
4681  return uLength;
4682 
4683  // Get two iterators on in_seq, one read and one write
4684  string::iterator i_read;
4685  string::iterator i_write;
4686 
4687  // Determine begin and end of read
4688  i_read = in_seq_data.begin() + uBeginIdx;
4689  string::iterator i_read_end = i_read + uLength;
4690 
4691  // Loop through in_seq for uLength bases
4692  // and shift uBeginIdx to beginning
4693  for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write)
4694  {
4695  (*i_write) = (*i_read);
4696  ++i_read;
4697  }
4698 
4699  // Resize in_seq_data to uLength
4700  in_seq_data.resize(uLength);
4701 
4702  return uLength;
4703 }
4704 
4705 
4706 // Method to implement Keep for Ncbieaa
4708 (CSeq_data* in_seq,
4709  TSeqPos uBeginIdx,
4710  TSeqPos uLength)
4711  const
4712 {
4713  // Get a reference to in_seq
4714  string& in_seq_data = in_seq->SetNcbieaa().Set();
4715  TSeqPos in_seq_length = static_cast<TSeqPos>(in_seq_data.size());
4716 
4717 
4718  // If uBeginIdx past end of in_seq, return empty in_seq
4719  if(uBeginIdx >= in_seq_data.size())
4720  {
4721  in_seq_data.erase();
4722  return 0;
4723  }
4724 
4725  // If uLength is 0, Keep from uBeginIdx to end of in_seq
4726  if(uLength == 0)
4727  uLength = in_seq_length - uBeginIdx;
4728 
4729  // Check that uLength does not go beyond end of in_seq
4730  if((uBeginIdx + uLength) > in_seq_data.size())
4731  uLength = in_seq_length - uBeginIdx;
4732 
4733  // If uBeginIdx == 0 and uLength == in_seq_data.size()
4734  // just return as the entire sequence is being requested
4735  if((uBeginIdx == 0) && (uLength >= in_seq_data.size()))
4736  return uLength;
4737 
4738  // Get two iterators on in_seq, one read and one write
4739  string::iterator i_read;
4740  string::iterator i_write;
4741 
4742  // Determine begin and end of read
4743  i_read = in_seq_data.begin() + uBeginIdx;
4744  string::iterator i_read_end = i_read + uLength;
4745 
4746  // Loop through in_seq for uLength bases
4747  // and shift uBeginIdx to beginning
4748  for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
4749  (*i_write) = (*i_read);
4750  ++i_read;
4751  }
4752 
4753  // Resize in_seq_data to uLength
4754  in_seq_data.resize(uLength);
4755 
4756  return uLength;
4757 }
4758 
4759 
4760 // Method to implement Keep for Ncbistdaa
4762 (CSeq_data* in_seq,
4763  TSeqPos uBeginIdx,
4764  TSeqPos uLength)
4765  const
4766 {
4767  // Get a reference to in_seq
4768  vector<char>& in_seq_data = in_seq->SetNcbistdaa().Set();
4769  TSeqPos in_seq_length = static_cast<TSeqPos>(in_seq_data.size());
4770 
4771  // If uBeginIdx past end of in_seq, return empty in_seq
4772  if(uBeginIdx >= in_seq_data.size())
4773  {
4774  in_seq_data.clear();
4775  return 0;
4776  }
4777 
4778  // If uLength is 0, Keep from uBeginIdx to end of in_seq
4779  if(uLength == 0)
4780  uLength = in_seq_length - uBeginIdx;
4781 
4782  // Check that uLength does not go beyond end of in_seq
4783  if((uBeginIdx + uLength) > in_seq_data.size())
4784  uLength = in_seq_length - uBeginIdx;
4785 
4786  // If uBeginIdx == 0 and uLength == in_seq_data.size()
4787  // just return as the entire sequence is being requested
4788  if((uBeginIdx == 0) && (uLength >= in_seq_data.size()))
4789  return uLength;
4790 
4791  // Get two iterators on in_seq, one read and one write
4792  vector<char>::iterator i_read;
4793  vector<char>::iterator i_write;
4794 
4795  // Determine begin and end of read
4796  i_read = in_seq_data.begin() + uBeginIdx;
4797  vector<char>::iterator i_read_end = i_read + uLength;
4798 
4799  // Loop through in_seq for uLength bases
4800  // and shift uBeginIdx to beginning
4801  for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
4802  (*i_write) = (*i_read);
4803  ++i_read;
4804  }
4805 
4806  // Resize in_seq_data to uLength
4807  in_seq_data.resize(uLength);
4808 
4809  return uLength;
4810 }
4811 
4812 
4813 // Method to implement Keep for Iupacaa
4815 (CSeq_data* in_seq,
4816  TSeqPos uBeginIdx,
4817  TSeqPos uLength)
4818  const
4819 {
4820  // Get a reference to in_seq
4821  string& in_seq_data = in_seq->SetIupacaa().Set();
4822  TSeqPos in_seq_length = static_cast<TSeqPos>(in_seq_data.size());
4823 
4824 
4825  // If uBeginIdx past end of in_seq, return empty in_seq
4826  if (uBeginIdx >= in_seq_data.size()) {
4827  in_seq_data.erase();
4828  return 0;
4829  }
4830 
4831  // If uLength is 0, Keep from uBeginIdx to end of in_seq
4832  if(uLength == 0)
4833  uLength = in_seq_length - uBeginIdx;
4834 
4835  // Check that uLength does not go beyond end of in_seq
4836  if((uBeginIdx + uLength) > in_seq_data.size())
4837  uLength = in_seq_length - uBeginIdx;
4838 
4839  // If uBeginIdx == 0 and uLength == in_seq_data.size()
4840  // just return as the entire sequence is being requested
4841  if((uBeginIdx == 0) && (uLength >= in_seq_data.size()))
4842  return uLength;
4843 
4844  // Get two iterators on in_seq, one read and one write
4845  string::iterator i_read;
4846  string::iterator i_write;
4847 
4848  // Determine begin and end of read
4849  i_read = in_seq_data.begin() + uBeginIdx;
4850  string::iterator i_read_end = i_read + uLength;
4851 
4852  // Loop through in_seq for uLength bases
4853  // and shift uBeginIdx to beginning
4854  for(i_write = in_seq_data.begin(); i_read != i_read_end; ++i_write) {
4855  (*i_write) = (*i_read);
4856  ++i_read;
4857  }
4858 
4859  // Resize in_seq_data to uLength
4860  in_seq_data.resize(uLength);
4861 
4862  return uLength;
4863 }
4864 
4865 
4866 
4867 // Methods to complement na sequences
4868 
4869 // In place methods
4871 (CSeq_data* in_seq,
4872  TSeqPos uBeginIdx,
4873  TSeqPos uLength)
4874  const
4875 {
4876  // Keep just the part of in_seq that will be complemented
4877  TSeqPos uKept = KeepIupacna(in_seq, uBeginIdx, uLength);
4878 
4879  // Get in_seq data
4880  string& in_seq_data = in_seq->SetIupacna().Set();
4881 
4882  // Get an iterator to in_seq_data
4883  string::iterator i_data;
4884 
4885  // Get end of iteration--needed for performance
4886  string::iterator i_data_end = in_seq_data.end();
4887 
4888  // Loop through the input sequence and complement it
4889  for(i_data = in_seq_data.begin(); i_data != i_data_end; ++i_data)
4890  (*i_data) =
4891  m_Iupacna_complement->m_Table[static_cast<unsigned char>(*i_data)];
4892 
4893  return uKept;
4894 }
4895 
4896 
4898 (CSeq_data* in_seq,
4899  TSeqPos uBeginIdx,
4900  TSeqPos uLength)
4901  const
4902 {
4903  // Keep just the part of in_seq that will be complemented
4904  TSeqPos uKept = KeepNcbi2na(in_seq, uBeginIdx, uLength);
4905 
4906  // Get in_seq data
4907  vector<char>& in_seq_data = in_seq->SetNcbi2na().Set();
4908 
4909  // Get an iterator to in_seq_data
4910  vector<char>::iterator i_data;
4911 
4912  // Get end of iteration
4913  vector<char>::iterator i_data_end = in_seq_data.end();
4914 
4915  // Loop through the input sequence and complement it
4916  for(i_data = in_seq_data.begin(); i_data != i_data_end; ++i_data)
4917  (*i_data) =
4918  m_Ncbi2naComplement->m_Table[static_cast<unsigned char>(*i_data)];
4919 
4920  return uKept;
4921 }
4922 
4923 
4925 (CSeq_data* in_seq,
4926  TSeqPos uBeginIdx,
4927  TSeqPos uLength)
4928  const
4929 {
4930  // Keep just the part of in_seq that will be complemented
4931  TSeqPos uKept = KeepNcbi4na(in_seq, uBeginIdx, uLength);
4932 
4933  // Get in_seq data
4934  vector<char>& in_seq_data = in_seq->SetNcbi4na().Set();
4935 
4936  // Get an iterator to in_seq_data
4937  vector<char>::iterator i_data;
4938 
4939  // Get end of iteration--done for performance
4940  vector<char>::iterator i_data_end = in_seq_data.end();
4941 
4942  // Loop through the input sequence and complement it
4943  for(i_data = in_seq_data.begin(); i_data != i_data_end; ++i_data)
4944  (*i_data) =
4945  m_Ncbi4naComplement->m_Table[static_cast<unsigned char>(*i_data)];
4946 
4947  return uKept;
4948 }
4949 
4950 
4951 // Complement in copy methods
4953 (const CSeq_data& in_seq,
4954  CSeq_data* out_seq,
4955  TSeqPos uBeginIdx,
4956  TSeqPos uLength)
4957  const
4958 {
4959  TSeqPos uKept = GetIupacnaCopy(in_seq, out_seq, uBeginIdx, uLength);
4960  TSeqPos uIdx1 = 0, uIdx2 = 0;
4961  ComplementIupacna(out_seq, uIdx1, uIdx2);
4962  return uKept;
4963 }
4964 
4965 
4967 (const CSeq_data& in_seq,
4968  CSeq_data* out_seq,
4969  TSeqPos uBeginIdx,
4970  TSeqPos uLength)
4971  const
4972 {
4973  TSeqPos uKept = GetNcbi2naCopy(in_seq, out_seq, uBeginIdx, uLength);
4974  TSeqPos uIdx1 = 0, uIdx2 = 0;
4975  ComplementNcbi2na(out_seq, uIdx1, uIdx2);
4976  return uKept;
4977 }
4978 
4979 
4981 (const CSeq_data& in_seq,
4982  CSeq_data* out_seq,
4983  TSeqPos uBeginIdx,
4984  TSeqPos uLength)
4985  const
4986 {
4987  TSeqPos uKept = GetNcbi4naCopy(in_seq, out_seq, uBeginIdx, uLength);
4988  TSeqPos uIdx1 = 0, uIdx2 = 0;
4989  ComplementNcbi4na(out_seq, uIdx1, uIdx2);
4990  return uKept;
4991 }
4992 
4993 
4994 // Methods to reverse na sequences
4995 
4996 // In place methods
4998 (CSeq_data* in_seq,
4999  TSeqPos uBeginIdx,
5000  TSeqPos uLength)
5001  const
5002 {
5003  // Keep just the part of in_seq that will be reversed
5004  TSeqPos uKept = KeepIupacna(in_seq, uBeginIdx, uLength);
5005 
5006  // Get in_seq data
5007  string& in_seq_data = in_seq->SetIupacna().Set();
5008 
5009  // Reverse the order of the string
5010  reverse(in_seq_data.begin(), in_seq_data.end());
5011 
5012  return uKept;
5013 }
5014 
5015 
5017 (CSeq_data* in_seq,
5018  TSeqPos uBeginIdx,
5019  TSeqPos uLength)
5020  const
5021 {
5022  // Get a reference to in_seq data
5023  vector<char>& in_seq_data = in_seq->SetNcbi2na().Set();
5024  TSeqPos in_seq_length = static_cast<TSeqPos>(in_seq_data.size());
5025 
5026  // Validate and adjust uBeginIdx and uLength
5027  if(uBeginIdx >= 4*in_seq_data.size())
5028  {
5029  in_seq_data.erase(in_seq_data.begin(), in_seq_data.end());
5030  return 0;
5031  }
5032 
5033  // If uLength is zero, set to end of sequence
5034  if(uLength == 0)
5035  uLength = 4 * in_seq_length - uBeginIdx;
5036 
5037  // Ensure that uLength not beyond end of sequence
5038  if((uBeginIdx + uLength) > (4 * in_seq_data.size()))
5039  uLength = 4 * in_seq_length - uBeginIdx;
5040 
5041  // Determine start and end bytes
5042  TSeqPos uStart = uBeginIdx/4;
5043  TSeqPos uEnd = uStart + (uLength - 1 +(uBeginIdx % 4))/4 + 1;
5044 
5045  // Declare an iterator and get end of sequence
5046  vector<char>::iterator i_in;
5047  vector<char>::iterator i_in_begin = in_seq_data.begin() + uStart;
5048  vector<char>::iterator i_in_end = in_seq_data.begin() + uEnd;
5049 
5050  // Loop through in_seq_data and reverse residues in each byte
5051  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
5052  (*i_in) = m_Ncbi2naRev->m_Table[static_cast<unsigned char>(*i_in)];
5053 
5054  // Reverse the bytes in the sequence
5055  reverse(i_in_begin, i_in_end);
5056 
5057  // Keep just the requested part of the sequence
5058  TSeqPos uJagged = 3 - ((uBeginIdx + uLength - 1) % 4) + 4*uStart;
5059  return KeepNcbi2na(in_seq, uJagged, uLength);
5060 }
5061 
5062 
5064 (CSeq_data* in_seq,
5065  TSeqPos uBeginIdx,
5066  TSeqPos uLength)
5067  const
5068 {
5069  // Get a reference to in_seq data
5070  vector<char>& in_seq_data = in_seq->SetNcbi4na().Set();
5071  TSeqPos in_seq_length = static_cast<TSeqPos>(in_seq_data.size());
5072 
5073  // Validate and adjust uBeginIdx and uLength
5074  if(uBeginIdx >= 2*in_seq_data.size())
5075  {
5076  in_seq_data.erase(in_seq_data.begin(), in_seq_data.end());
5077  return 0;
5078  }
5079 
5080  // If uLength is zero, set to end of sequence
5081  if(uLength == 0)
5082  uLength = 2 * in_seq_length - uBeginIdx;
5083 
5084  // Ensure that uLength not beyond end of sequence
5085  if((uBeginIdx + uLength) > (2 * in_seq_data.size()))
5086  uLength = 2 * in_seq_length - uBeginIdx;
5087 
5088  // Determine start and end bytes
5089  TSeqPos uStart = uBeginIdx/2;
5090  TSeqPos uEnd = uStart + (uLength - 1 +(uBeginIdx % 2))/2 + 1;
5091 
5092  // Declare an iterator and get end of sequence
5093  vector<char>::iterator i_in;
5094  vector<char>::iterator i_in_begin = in_seq_data.begin() + uStart;
5095  vector<char>::iterator i_in_end = in_seq_data.begin() + uEnd;
5096 
5097  // Loop through in_seq_data and reverse residues in each byte
5098  for(i_in = i_in_begin; i_in != i_in_end; ++i_in)
5099  (*i_in) = m_Ncbi4naRev->m_Table[static_cast<unsigned char>(*i_in)];
5100 
5101  // Reverse the bytes in the sequence
5102  reverse(i_in_begin, i_in_end);
5103 
5104  // Keep just the requested part of the sequence
5105  TSeqPos uJagged = 1 - ((uBeginIdx + uLength - 1) % 2) + 2*uStart;
5106  return KeepNcbi4na(in_seq, uJagged, uLength);
5107 }
5108 
5109 
5110 // Reverse in copy methods
5112 (const CSeq_data& in_seq,
5113  CSeq_data* out_seq,
5114  TSeqPos uBeginIdx,
5115  TSeqPos uLength)
5116  const
5117 {
5118  GetIupacnaCopy(in_seq, out_seq, uBeginIdx, uLength);
5119 
5120  TSeqPos uIdx1 = 0, uIdx2 = uLength;
5121  return ReverseIupacna(out_seq, uIdx1, uIdx2);
5122 }
5123 
5124 
5126 (const CSeq_data& in_seq,
5127  CSeq_data* out_seq,
5128  TSeqPos uBeginIdx,
5129  TSeqPos uLength)
5130  const
5131 {
5132  GetNcbi2naCopy(in_seq, out_seq, uBeginIdx, uLength);
5133 
5134  TSeqPos uIdx1 = 0, uIdx2 = uLength;
5135  return ReverseNcbi2na(out_seq, uIdx1, uIdx2);
5136 }
5137 
5138 
5140 (const CSeq_data& in_seq,
5141  CSeq_data* out_seq,
5142  TSeqPos uBeginIdx,
5143  TSeqPos uLength)
5144  const
5145 {
5146  GetNcbi4naCopy(in_seq, out_seq, uBeginIdx, uLength);
5147 
5148  TSeqPos uIdx1 = 0, uIdx2 = uLength;
5149  return ReverseNcbi4na(out_seq, uIdx1, uIdx2);
5150 }
5151 
5152 
5153 // Methods to reverse-complement an na sequences
5154 
5155 // In place methods
5157 (CSeq_data* in_seq,
5158  TSeqPos uBeginIdx,
5159  TSeqPos uLength)
5160  const
5161 {
5162  ReverseIupacna(in_seq, uBeginIdx, uLength);
5163 
5164  TSeqPos uIdx = 0;
5165  return ComplementIupacna(in_seq, uIdx, uLength);
5166 }
5167 
5168 
5170 (CSeq_data* in_seq,
5171  TSeqPos uBeginIdx,
5172  TSeqPos uLength)
5173  const
5174 {
5175  ReverseNcbi2na(in_seq, uBeginIdx, uLength);
5176 
5177  TSeqPos uIdx = 0;
5178  return ComplementNcbi2na(in_seq, uIdx, uLength);
5179 }
5180 
5181 
5183 (CSeq_data* in_seq,
5184  TSeqPos uBeginIdx,
5185  TSeqPos uLength)
5186  const
5187 {
5188  ReverseNcbi4na(in_seq, uBeginIdx, uLength);
5189 
5190  TSeqPos uIdx = 0;
5191  return ComplementNcbi4na(in_seq, uIdx, uLength);
5192 }
5193 
5194 
5195 // Reverse in copy methods
5197 (const CSeq_data& in_seq,
5198  CSeq_data* out_seq,
5199  TSeqPos uBeginIdx,
5200  TSeqPos uLength)
5201  const
5202 {
5203  ReverseIupacna(in_seq, out_seq, uBeginIdx, uLength);
5204 
5205  TSeqPos uIdx = 0;
5206  return ComplementIupacna(out_seq, uIdx, uLength);
5207 }
5208 
5209 
5211 (const CSeq_data& in_seq,
5212  CSeq_data* out_seq,
5213  TSeqPos uBeginIdx,
5214  TSeqPos uLength)
5215  const
5216 {
5217  ReverseNcbi2na(in_seq, out_seq, uBeginIdx, uLength);
5218 
5219  TSeqPos uIdx = 0;
5220  return ComplementNcbi2na(out_seq, uIdx, uLength);
5221 }
5222 
5223 
5225 (const CSeq_data& in_seq,
5226  CSeq_data* out_seq,
5227  TSeqPos uBeginIdx,
5228  TSeqPos uLength)
5229  const
5230 {
5231  ReverseNcbi4na(in_seq, out_seq, uBeginIdx, uLength);
5232 
5233  TSeqPos uIdx = 0;
5234  return ComplementNcbi4na(out_seq, uIdx, uLength);
5235 }
5236 
5237 
5238 // Append methods
5240 (CSeq_data* out_seq,
5241  const CSeq_data& in_seq1,
5242  TSeqPos uBeginIdx1,
5243  TSeqPos uLength1,
5244  const CSeq_data& in_seq2,
5245  TSeqPos uBeginIdx2,
5246  TSeqPos uLength2)
5247  const
5248 {
5249  // Get references to in_seqs
5250  const string& in_seq1_data = in_seq1.GetIupacna().Get();
5251  const string& in_seq2_data = in_seq2.GetIupacna().Get();
5252 
5253  // Get a reference to out_seq
5254  out_seq->Reset();
5255  string& out_seq_data = out_seq->SetIupacna().Set();
5256 
5257  // Validate and Adjust uBeginIdx_ and uLength_
5258  if((uBeginIdx1 >= in_seq1_data.size()) &&
5259  (uBeginIdx2 >= in_seq2_data.size()))
5260  return 0;
5261 
5262  if(((uBeginIdx1 + uLength1) > in_seq1_data.size()) || uLength1 == 0)
5263  uLength1 = static_cast<TSeqPos>(in_seq1_data.size()) - uBeginIdx1;
5264 
5265  if(((uBeginIdx2 + uLength2) > in_seq2_data.size()) || uLength2 == 0)
5266  uLength2 = static_cast<TSeqPos>(in_seq2_data.size()) - uBeginIdx2;
5267 
5268  // Append the strings
5269  out_seq_data.append(in_seq1_data.substr(uBeginIdx1,uLength1));
5270  out_seq_data.append(in_seq2_data.substr(uBeginIdx2,uLength2));
5271 
5272  return uLength1 + uLength2;
5273 }
5274 
5275 
5277 (CSeq_data* out_seq,
5278  const CSeq_data& in_seq1,
5279  TSeqPos uBeginIdx1,
5280  TSeqPos uLength1,
5281  const CSeq_data& in_seq2,
5282  TSeqPos uBeginIdx2,
5283  TSeqPos uLength2)
5284  const
5285 {
5286  // Get references to in_seqs
5287  const vector<char>& in_seq1_data = in_seq1.GetNcbi2na().Get();
5288  const vector<char>& in_seq2_data = in_seq2.GetNcbi2na().Get();
5289 
5290  // Get a reference to out_seq
5291  out_seq->Reset();
5292  vector<char>& out_seq_data = out_seq->SetNcbi2na().Set();
5293 
5294  // Handle case where both uBeginidx go beyond in_seq
5295  if((uBeginIdx1 >= 4*in_seq1_data.size()) &&
5296  (uBeginIdx2 >= 4*in_seq2_data.size()))
5297  return 0;
5298 
5299  // Handle case where uBeginIdx1 goes beyond end of in_seq1
5300  if(uBeginIdx1 >= 4*in_seq1_data.size())
5301  return GetNcbi2naCopy(in_seq2, out_seq, uBeginIdx2, uLength2);
5302 
5303  // Handle case where uBeginIdx2 goes beyond end of in_seq2
5304  if(uBeginIdx2 >= 4*in_seq2_data.size())
5305  return GetNcbi2naCopy(in_seq1, out_seq, uBeginIdx1, uLength1);
5306 
5307  // Validate and Adjust uBeginIdx_ and uLength_
5308  if(((uBeginIdx1 + uLength1) > 4*in_seq1_data.size()) || uLength1 == 0)
5309  uLength1 = 4 * static_cast<TSeqPos>(in_seq1_data.size()) - uBeginIdx1;
5310 
5311  if(((uBeginIdx2 + uLength2) > 4*in_seq2_data.size()) || uLength2 == 0)
5312  uLength2 = 4 * static_cast<TSeqPos>(in_seq2_data.size()) - uBeginIdx2;
5313 
5314 
5315  // Resize out_seq_data to hold appended sequence
5316  TSeqPos uTotalLength = uLength1 + uLength2;
5317  if((uTotalLength % 4) == 0)
5318  out_seq_data.resize(uTotalLength/4);
5319  else
5320  out_seq_data.resize(uTotalLength/4 + 1);
5321 
5322  // Calculate bit shifts required for in_seq1
5323  unsigned int lShift1 = 2*(uBeginIdx1 % 4);
5324  unsigned int rShift1 = 8 - lShift1;
5325 
5326  // Calculate bit shifts required for in_seq2
5327  unsigned int lShift2, rShift2, uCase;
5328  unsigned int uVacantIdx = 2*(uLength1 % 4);
5329  unsigned int uStartIdx = 2*(uBeginIdx2 % 4);
5330  if((uVacantIdx < uStartIdx) && (uVacantIdx > 0))
5331  {
5332  uCase = 0;
5333  lShift2 = uStartIdx - uVacantIdx;
5334  rShift2 = 8 - lShift2;
5335  }
5336  else if((uVacantIdx < uStartIdx) && (uVacantIdx == 0))
5337  {
5338  uCase = 1;
5339  lShift2 = uStartIdx;
5340  rShift2 = 8 - lShift2;
5341  }
5342  else if((uVacantIdx == uStartIdx) && (uVacantIdx > 0))
5343  {
5344  uCase = 2;
5345  lShift2 = 0;
5346  rShift2 = 8;
5347  }
5348  else if((uVacantIdx == uStartIdx) && (uVacantIdx == 0))
5349  {
5350  uCase = 3;
5351  lShift2 = 0;
5352  rShift2 = 8;
5353  }
5354  else
5355  {
5356  uCase = 4;
5357  rShift2 = uVacantIdx - uStartIdx;
5358  lShift2 = 8 - rShift2;
5359  }
5360 
5361 
5362  // Determine begin and end points for iterators.
5363  TSeqPos uStart1 = uBeginIdx1/4;
5364  TSeqPos uEnd1;
5365  if(((uBeginIdx1 + uLength1) % 4) == 0)
5366  uEnd1 = (uBeginIdx1 + uLength1)/4;
5367  else
5368  uEnd1 = (uBeginIdx1 + uLength1)/4 + 1;
5369 
5370  TSeqPos uStart2 = uBeginIdx2/4;
5371  TSeqPos uEnd2;
5372  if(((uBeginIdx2 + uLength2) % 4) == 0)
5373  uEnd2 = (uBeginIdx2 + uLength2)/4;
5374  else
5375  uEnd2 = (uBeginIdx2 + uLength2)/4 + 1;
5376 
5377  // Get begin and end positions on in_seqs
5378  vector<char>::const_iterator i_in1_begin = in_seq1_data.begin() + uStart1;
5379  vector<char>::const_iterator i_in1_end = in_seq1_data.begin() + uEnd1 - 1;
5380  vector<char>::const_iterator i_in2_begin = in_seq2_data.begin() + uStart2;
5381  vector<char>::const_iterator i_in2_end = in_seq2_data.begin() + uEnd2;
5382 
5383  // Declare iterators
5384  vector<char>::iterator i_out = out_seq_data.begin() - 1;
5385  vector<char>::const_iterator i_in1;
5386  vector<char>::const_iterator i_in2;
5387 
5388  // Insert in_seq1 into out_seq
5389  for(i_in1 = i_in1_begin; i_in1 != i_in1_end; ++i_in1)
5390  (*(++i_out)) = ((*i_in1) << lShift1) | ((*(i_in1+1) & 255) >> rShift1);
5391 
5392  // Handle last byte for in_seq1 if necessary
5393  TSeqPos uEndOutByte;
5394  if((uLength1 % 4) == 0)
5395  uEndOutByte = uLength1/4 - 1;
5396  else
5397  uEndOutByte = uLength1/4;
5398  if(i_out != (out_seq_data.begin() + uEndOutByte))
5399  (*(++i_out)) = (*i_in1) << lShift1;
5400 
5401  // Connect in_seq1 and in_seq2
5402  unsigned char uMask1 = 255 << (8 - 2*(uLength1 % 4));
5403  unsigned char uMask2 = 255 >> (2*(uBeginIdx2 % 4));
5404  TSeqPos uSeq2Inc = 1;
5405 
5406  switch (uCase) {
5407  case 0: // 0 < uVacantIdx < uStartIdx
5408  if((i_in2_begin + 1) == i_in2_end)
5409  {
5410  (*i_out) &= uMask1;
5411  (*i_out) |= ((*i_in2_begin) & uMask2) << lShift2;
5412  return uTotalLength;
5413  }
5414  else
5415  {
5416  (*i_out) &= uMask1;
5417  (*i_out) |=
5418  (((*i_in2_begin) & uMask2) << lShift2) |
5419  (((*(i_in2_begin+1)) & 255) >> rShift2);
5420  }
5421  break;
5422  case 1: // 0 == uVacantIdx < uStartIdx
5423  if((i_in2_begin + 1) == i_in2_end)
5424  {
5425  (*(++i_out)) = (*i_in2_begin) << lShift2;
5426  return uTotalLength;
5427  }
5428  else
5429  {
5430  (*(++i_out)) =
5431  ((*i_in2_begin) << lShift2) |
5432  (((*(i_in2_begin+1)) & 255) >> rShift2);
5433  }
5434  break;
5435  case 2: // uVacantIdx == uStartIdx > 0
5436  (*i_out) &= uMask1;
5437  (*i_out) |= (*i_in2_begin) & uMask2;
5438  if((i_in2_begin + 1) == i_in2_end)
5439  return uTotalLength;
5440  break;
5441  case 3: // uVacantIdx == uStartIdx == 0
5442  (*(++i_out)) = (*i_in2_begin);
5443  if((i_in2_begin + 1) == i_in2_end)
5444  return uTotalLength;
5445  break;
5446  case 4: // uVacantIdx > uStartIdx
5447  if((i_in2_begin + 1) == i_in2_end)
5448  {
5449  (*i_out) &= uMask1;
5450  (*i_out) |= ((*i_in2_begin) & uMask2) >> rShift2;
5451  if(++i_out != out_seq_data.end())
5452  (*i_out) = (*i_in2_begin) << lShift2;
5453  return uTotalLength;
5454  }
5455  else
5456  {
5457  (*i_out) &= uMask1;
5458  (*i_out) |=
5459  (((*i_in2_begin) & uMask2) >> rShift2) |
5460  ((*(i_in2_begin+1) & ~uMask2) << lShift2);
5461  uSeq2Inc = 0;
5462  }
5463 
5464  }
5465 
5466  // Insert in_seq2 into out_seq
5467  for(i_in2 = i_in2_begin+uSeq2Inc; (i_in2 != i_in2_end) &&
5468  ((i_in2+1) != i_in2_end); ++i_in2) {
5469  (*(++i_out)) = ((*i_in2) << lShift2) | ((*(i_in2+1) & 255) >> rShift2);
5470  }
5471 
5472  // Handle last byte for in_seq2, if there is one
5473  if((++i_out != out_seq_data.end()) && (i_in2 != i_in2_end))
5474  (*i_out) = (*i_in2) << lShift2;
5475 
5476  return uLength1 + uLength2;
5477 }
5478 
5479 
5481 (CSeq_data* out_seq,
5482  const CSeq_data& in_seq1,
5483  TSeqPos uBeginIdx1,
5484  TSeqPos uLength1,
5485  const CSeq_data& in_seq2,
5486  TSeqPos uBeginIdx2,
5487  TSeqPos uLength2)
5488  const
5489 {
5490  // Get references to in_seqs
5491  const vector<char>& in_seq1_data = in_seq1.GetNcbi4na().Get();
5492  const vector<char>& in_seq2_data = in_seq2.GetNcbi4na().Get();
5493 
5494  // Get a reference to out_seq
5495  out_seq->Reset();
5496  vector<char>& out_seq_data = out_seq->SetNcbi4na().Set();
5497 
5498  // Handle both uBeginidx go beyond end of in_seq
5499  if((uBeginIdx1 >= 4*in_seq1_data.size()) &&
5500  (uBeginIdx2 >= 4*in_seq2_data.size()))
5501  return 0;
5502 
5503  // Handle case where uBeginIdx1 goes beyond end of in_seq1
5504  if(uBeginIdx1 >= 4*in_seq1_data.size())
5505  return GetNcbi4naCopy(in_seq2, out_seq, uBeginIdx2, uLength2);
5506 
5507  // Handle case where uBeginIdx2 goes beyond end of in_seq2
5508  if(uBeginIdx2 >= 4*in_seq2_data.size())
5509  return GetNcbi4naCopy(in_seq1, out_seq, uBeginIdx1, uLength1);
5510 
5511  // Validate and Adjust uBeginIdx_ and uLength_
5512  if(((uBeginIdx1 + uLength1) > 2*in_seq1_data.size()) || uLength1 == 0)
5513  uLength1 = 2 * static_cast<TSeqPos>(in_seq1_data.size()) - uBeginIdx1;
5514 
5515  if(((uBeginIdx2 + uLength2) > 2*in_seq2_data.size()) || uLength2 == 0)
5516  uLength2 = 2 * static_cast<TSeqPos>(in_seq2_data.size()) - uBeginIdx2;
5517 
5518  // Resize out_seq_data to hold appended sequence
5519  TSeqPos uTotalLength = uLength1 + uLength2;
5520  if((uTotalLength % 2) == 0)
5521  out_seq_data.resize(uTotalLength/2);
5522  else
5523  out_seq_data.resize(uTotalLength/2 + 1);
5524 
5525  // Calculate bit shifts required for in_seq1
5526  unsigned int lShift1 = 4*(uBeginIdx1 % 2);
5527  unsigned int rShift1 = 8 - lShift1;
5528 
5529  // Calculate bit shifts required for in_seq2
5530  unsigned int lShift2, rShift2, uCase;
5531  unsigned int uVacantIdx = 4*(uLength1 % 2);
5532  unsigned int uStartIdx = 4*(uBeginIdx2 % 2);
5533  if((uVacantIdx < uStartIdx))
5534  {
5535  uCase = 1;
5536  lShift2 = uStartIdx;
5537  rShift2 = 8 - lShift2;
5538  }
5539  else if((uVacantIdx == uStartIdx) && (uVacantIdx > 0))
5540  {
5541  uCase = 2;
5542  lShift2 = 0;
5543  rShift2 = 8;
5544  }
5545  else if((uVacantIdx == uStartIdx) && (uVacantIdx == 0))
5546  {
5547  uCase = 3;
5548  lShift2 = 0;
5549  rShift2 = 8;
5550  }
5551  else
5552  {
5553  uCase = 4;
5554  rShift2 = uVacantIdx - uStartIdx;
5555  lShift2 = 8 - rShift2;
5556  }
5557 
5558 
5559  // Determine begin and end points for iterators.
5560  TSeqPos uStart1 = uBeginIdx1/2;
5561  TSeqPos uEnd1;
5562  if(((uBeginIdx1 + uLength1) % 2) == 0)
5563  uEnd1 = (uBeginIdx1 + uLength1)/2;
5564  else
5565  uEnd1 = (uBeginIdx1 + uLength1)/2 + 1;
5566 
5567  TSeqPos uStart2 = uBeginIdx2/2;
5568  TSeqPos uEnd2;
5569  if(((uBeginIdx2 + uLength2) % 2) == 0)
5570  uEnd2 = (uBeginIdx2 + uLength2)/2;
5571  else
5572  uEnd2 = (uBeginIdx2 + uLength2)/2 + 1;
5573 
5574  // Get begin and end positions on in_seqs
5575  vector<char>::const_iterator i_in1_begin = in_seq1_data.begin() + uStart1;
5576  vector<char>::const_iterator i_in1_end = in_seq1_data.begin() + uEnd1 - 1;
5577  vector<char>::const_iterator i_in2_begin = in_seq2_data.begin() + uStart2;
5578  vector<char>::const_iterator i_in2_end = in_seq2_data.begin() + uEnd2;
5579 
5580  // Declare iterators
5581  vector<char>::iterator i_out = out_seq_data.begin() - 1;
5582  vector<char>::const_iterator i_in1;
5583  vector<char>::const_iterator i_in2;
5584 
5585  // Insert in_seq1 into out_seq
5586  for(i_in1 = i_in1_begin; i_in1 != i_in1_end; ++i_in1)
5587  (*(++i_out)) = ((*i_in1) << lShift1) | ((*(i_in1+1) & 255) >> rShift1);
5588 
5589  // Handle last byte for in_seq1 if necessary
5590  TSeqPos uEndOutByte;
5591  if((uLength1 % 2) == 0)
5592  uEndOutByte = uLength1/2 - 1;
5593  else
5594  uEndOutByte = uLength1/2;
5595  if(i_out != (out_seq_data.begin() + uEndOutByte))
5596  (*(++i_out)) = (*i_in1) << lShift1;
5597 
5598  // Connect in_seq1 and in_seq2
5599  unsigned char uMask1 = 255 << (8 - 4*(uLength1 % 2));
5600  unsigned char uMask2 = 255 >> (4*(uBeginIdx2 % 2));
5601  TSeqPos uSeq2Inc = 1;
5602 
5603  switch (uCase) {
5604  case 1: // 0 == uVacantIdx < uStartIdx
5605  if((i_in2_begin+1) == i_in2_end)
5606  {
5607  (*(++i_out)) = (*i_in2_begin) << lShift2;
5608  return uTotalLength;
5609  }
5610  else
5611  {
5612  (*(++i_out)) =
5613  ((*i_in2_begin) << lShift2) |
5614  (((*(i_in2_begin+1)) & 255) >> rShift2);
5615  }
5616  break;
5617  case 2: // uVacantIdx == uStartIdx > 0
5618  (*i_out) &= uMask1;
5619  (*i_out) |= (*i_in2_begin) & uMask2;
5620  if((i_in2_begin+1) == i_in2_end)
5621  return uTotalLength;
5622  break;
5623  case 3: // uVacantIdx == uStartIdx == 0
5624  (*(++i_out)) = (*i_in2_begin);
5625  if((i_in2_begin+1) == i_in2_end)
5626  return uTotalLength;
5627  break;
5628  case 4: // uVacantIdx > uStartIdx
5629  if((i_in2_begin+1) == i_in2_end)
5630  {
5631  (*i_out) &= uMask1;
5632  (*i_out) |= ((*i_in2_begin) & uMask2) >> rShift2;
5633  if(++i_out != out_seq_data.end())
5634  (*i_out) = (*i_in2_begin) << lShift2;
5635  return uTotalLength;
5636  }
5637  else
5638  {
5639  (*i_out) &= uMask1;
5640  (*i_out) |=
5641  (((*i_in2_begin) & uMask2) >> rShift2) |
5642  ((*(i_in2_begin+1) & ~uMask2) << lShift2);
5643  uSeq2Inc = 0;
5644  }
5645 
5646  }
5647 
5648  // Insert in_seq2 into out_seq
5649  for(i_in2 = i_in2_begin+uSeq2Inc; (i_in2 != i_in2_end) &&
5650  ((i_in2+1) != i_in2_end); ++i_in2) {
5651  (*(++i_out)) =
5652  ((*i_in2) << lShift2) | ((*(i_in2+1) & 255) >> rShift2);
5653  }
5654 
5655  // Handle last byte for in_seq2, if there is one
5656  if((++i_out != out_seq_data.end()) && (i_in2 != i_in2_end))
5657  (*i_out) = (*i_in2) << lShift2;
5658 
5659  return uTotalLength;
5660 }
5661 
5662 
5664 (CSeq_data* out_seq,
5665  const CSeq_data& in_seq1,
5666  TSeqPos uBeginIdx1,
5667  TSeqPos uLength1,
5668  const CSeq_data& in_seq2,
5669  TSeqPos uBeginIdx2,
5670  TSeqPos uLength2)
5671  const
5672 {
5673  // Get references to in_seqs
5674  const string& in_seq1_data = in_seq1.GetNcbieaa().Get();
5675  const string& in_seq2_data = in_seq2.GetNcbieaa().Get();
5676 
5677  // Get a reference to out_seq
5678  out_seq->Reset();
5679  string& out_seq_data = out_seq->SetNcbieaa().Set();
5680 
5681  // Validate and Adjust uBeginIdx_ and uLength_
5682  if((uBeginIdx1 >= in_seq1_data.size()) &&
5683  (uBeginIdx2 >= in_seq2_data.size()))
5684  {
5685  return 0;
5686  }
5687 
5688  if(((uBeginIdx1 + uLength1) > in_seq1_data.size()) || uLength1 == 0)
5689  uLength1 = static_cast<TSeqPos>(in_seq1_data.size()) - uBeginIdx1;
5690 
5691  if(((uBeginIdx2 + uLength2) > in_seq2_data.size()) || uLength2 == 0)
5692  uLength2 = static_cast<TSeqPos>(in_seq2_data.size()) - uBeginIdx2;
5693 
5694  // Append the strings
5695  out_seq_data.append(in_seq1_data.substr(uBeginIdx1,uLength1));
5696  out_seq_data.append(in_seq2_data.substr(uBeginIdx2,uLength2));
5697 
5698  return uLength1 + uLength2;
5699 }
5700 
5701 
5703 (CSeq_data* out_seq,
5704  const CSeq_data& in_seq1,
5705  TSeqPos uBeginIdx1,
5706  TSeqPos uLength1,
5707  const CSeq_data& in_seq2,
5708  TSeqPos uBeginIdx2,
5709  TSeqPos uLength2)
5710  const
5711 {
5712  // Get references to in_seqs
5713  const vector<char>& in_seq1_data = in_seq1.GetNcbistdaa().Get();
5714  const vector<char>& in_seq2_data = in_seq2.GetNcbistdaa().Get();
5715 
5716  // Get a reference to out_seq
5717  out_seq->Reset();
5718  vector<char>& out_seq_data = out_seq->SetNcbistdaa().Set();
5719 
5720  // Validate and Adjust uBeginIdx_ and uLength_
5721  if((uBeginIdx1 >= in_seq1_data.size()) &&
5722  (uBeginIdx2 >= in_seq2_data.size()))
5723  return 0;
5724 
5725  if(((uBeginIdx1 + uLength1) > in_seq1_data.size()) || uLength1 == 0)
5726  uLength1 = static_cast<TSeqPos>(in_seq1_data.size()) - uBeginIdx1;
5727 
5728  if(((uBeginIdx2 + uLength2) > in_seq2_data.size()) || uLength2 == 0)
5729  uLength2 = static_cast<TSeqPos>(in_seq2_data.size()) - uBeginIdx2;
5730 
5731  // Get begin and end positions on in_seqs
5732  vector<char>::const_iterator i_in1_begin =
5733  in_seq1_data.begin() + uBeginIdx1;
5734  vector<char>::const_iterator i_in1_end = i_in1_begin + uLength1;
5735  vector<char>::const_iterator i_in2_begin =
5736  in_seq2_data.begin() + uBeginIdx2;
5737  vector<char>::const_iterator i_in2_end = i_in2_begin + uLength2;
5738 
5739  // Insert the in_seqs into out_seq
5740  out_seq_data.insert(out_seq_data.end(), i_in1_begin, i_in1_end);
5741  out_seq_data.insert(out_seq_data.end(), i_in2_begin, i_in2_end);
5742 
5743  return uLength1 + uLength2;
5744 }
5745 
5746 
5748 (CSeq_data* out_seq,
5749  const CSeq_data& in_seq1,
5750  TSeqPos uBeginIdx1,
5751  TSeqPos uLength1,
5752  const CSeq_data& in_seq2,
5753  TSeqPos uBeginIdx2,
5754  TSeqPos uLength2)
5755  const
5756 {
5757  // Get references to in_seqs
5758  const string& in_seq1_data = in_seq1.GetIupacaa().Get();
5759  const string& in_seq2_data = in_seq2.GetIupacaa().Get();
5760 
5761  // Get a reference to out_seq
5762  out_seq->Reset();
5763  string& out_seq_data = out_seq->SetIupacaa().Set();
5764 
5765  // Validate and Adjust uBeginIdx_ and uLength_
5766  if((uBeginIdx1 >= in_seq1_data.size()) &&
5767  (uBeginIdx2 >= in_seq2_data.size()))
5768  {
5769  return 0;
5770  }
5771 
5772  if(((uBeginIdx1 + uLength1) > in_seq1_data.size()) || uLength1 == 0)
5773  uLength1 = static_cast<TSeqPos>(in_seq1_data.size()) - uBeginIdx1;
5774 
5775  if(((uBeginIdx2 + uLength2) > in_seq2_data.size()) || uLength2 == 0)
5776  uLength2 = static_cast<TSeqPos>(in_seq2_data.size()) - uBeginIdx2;
5777 
5778  // Append the strings
5779  out_seq_data.append(in_seq1_data.substr(uBeginIdx1,uLength1));
5780  out_seq_data.append(in_seq2_data.substr(uBeginIdx2,uLength2));
5781 
5782  return uLength1 + uLength2;
5783 }
5784 
5785 // Returns the 3 letter Iupacaa3 code for an ncbistdaa index
5787 (TIndex ncbistdaa)
5788 {
5789  return GetCodeOrName(eSeq_code_type_iupacaa3, ncbistdaa, true);
5790 }
5791 
5792 // Returns true if code type is available
5794 (CSeq_data::E_Choice code_type)
5795 {
5796  if (code_type == CSeq_data::e_not_set) {
5797  return false;
5798  } else {
5799  return IsCodeAvailable(EChoiceToESeq(code_type));
5800  }
5801 }
5802 
5803 // Return true if code type is available
5805 {
5806  // Iterate through Seq-code-set looking for code type
5808  if((*i_ct)->GetCode() == code_type) {
5809  return true;
5810  }
5811  }
5812  return false;
5813 }
5814 
5815 // Return a pair containing the first index (start-at) and last index
5816 // for code_type.
5818 (CSeq_data::E_Choice code_type)
5819 {
5820  return GetCodeIndexFromTo(EChoiceToESeq(code_type));
5821 }
5822 
5823 // Return a pair containing the first index (start-at) and last index
5824 // for code_type.
5826 (ESeq_code_type code_type)
5827 {
5828  // Iterate through Seq-code-set looking for code type
5829  TPair p;
5831  if((*i_ct)->GetCode() == code_type) {
5832  if ( (*i_ct)->IsSetStart_at() ) {
5833  p.first = static_cast<TIndex>((*i_ct)->GetStart_at());
5834  } else {
5835  p.first = 0;
5836  }
5837  p.second = p.first + static_cast<TIndex>((*i_ct)->GetNum() - 1);
5838  return p;
5839  }
5840  }
5841  throw CSeqportUtil::CBadType("GetCodeIndexFromTo");
5842 }
5843 
5844 // Converts CSeq_data::E_Choice type to ESeq_code_type
5845 // and calls overloaded GetCodeOrName()
5847 (CSeq_data::E_Choice code_type,
5848  TIndex idx,
5849  bool get_code)
5850 {
5851  return GetCodeOrName(EChoiceToESeq(code_type), idx, get_code);
5852 }
5853 
5854 // Returns the code (symbol) of type code_type for index idx.
5856 (ESeq_code_type code_type,
5857  TIndex idx,
5858  bool get_code)
5859 {
5860  if ( !m_IndexString[get_code][code_type-1].size() ) {
5861  throw CSeqportUtil::CBadType("GetCodeOrName");
5862  }
5863  idx -= m_StartAt[code_type-1];
5864  if (idx >= m_IndexString[get_code][code_type-1].size()) {
5865  throw CSeqportUtil::CBadIndex(idx, "GetCodeOrName");
5866  }
5867  return m_IndexString[get_code][code_type-1][idx];
5868 
5869 }
5870 
5871 // Converts CSeq_data::E_Choice type to ESeq_code_type and call
5872 // overloaded GetIndex();
5874 (CSeq_data::E_Choice code_type,
5875  const string& code)
5876 {
5877  return GetIndex(EChoiceToESeq(code_type), code);
5878 }
5879 
5880 // Get the index for code of type code_type. If not found, return -1
5882 (ESeq_code_type code_type,
5883  const string& code)
5884 {
5885  // Iterator to a map mapping a string code to a code index
5887 
5888  if ( !m_StringIndex[code_type-1].size() ) {
5889  throw CSeqportUtil::CBadType("GetIndex");
5890  }
5891  pos = m_StringIndex[code_type-1].find(code);
5892  if (pos != m_StringIndex[code_type-1].end()) {
5893  return pos->second;
5894  } else {
5895  throw CSeqportUtil::CBadSymbol(code, "GetIndex");
5896  }
5897 
5898 }
5899 
5900 // Gets complement of index for code type. Returns -1 if code
5901 // type does not exist
5903 (CSeq_data::E_Choice code_type,
5904  TIndex idx)
5905 {
5906  return GetIndexComplement(EChoiceToESeq(code_type), idx);
5907 }
5908 
5909 // Returns the complement of the index for code_type. If code_type
5910 // does not exist, or complements for code_type do not exist,
5911 // returns -1
5913 (ESeq_code_type code_type,
5914  TIndex idx)
5915 {
5916 
5917  // Check that code is available
5918  if (!m_IndexComplement[code_type-1].size()) {
5919  throw CSeqportUtil::CBadType("GetIndexComplement");
5920  }
5921 
5922  // Check that idx is in range of code indices
5923  idx -= m_StartAt[code_type-1];
5924  if ( idx >= m_IndexComplement[code_type-1].size() ) {
5925  throw CSeqportUtil::CBadIndex(idx, "GetIndexComplement");
5926  }
5927 
5928  // Return the index of the complement
5929  return m_IndexComplement[code_type-1][idx];
5930  }
5931 
5933 (CSeq_data::E_Choice from_type,
5934  CSeq_data::E_Choice to_type,
5935  TIndex from_idx)
5936 {
5937  return GetMapToIndex(EChoiceToESeq(from_type),
5938  EChoiceToESeq(to_type),
5939  from_idx);
5940 }
5941 
5943 (ESeq_code_type from_type,
5944  ESeq_code_type to_type,
5945  TIndex from_idx)
5946 {
5947  CMap_table* Map = 0;
5948 
5949  if (from_type == eSeq_code_type_iupacna) {
5950  if (to_type == eSeq_code_type_ncbi2na) {
5951  Map = m_IupacnaNcbi2na.GetPointer();
5952  } else if (to_type == eSeq_code_type_ncbi4na) {
5953  Map = m_IupacnaNcbi4na.GetPointer();
5954  }
5955  } else if (from_type == eSeq_code_type_ncbi4na) {
5956  if (to_type == eSeq_code_type_iupacna) {
5957  Map = m_Ncbi4naIupacna.GetPointer();
5958  } else if (to_type == eSeq_code_type_ncbi2na) {
5959  Map = m_Ncbi4naNcbi2na.GetPointer();
5960  }
5961  } else if (from_type == eSeq_code_type_ncbi2na) {
5962  if (to_type == eSeq_code_type_iupacna) {
5963  Map = m_Ncbi2naIupacna.GetPointer();
5964  } else if (to_type == eSeq_code_type_ncbi4na) {
5965  Map = m_Ncbi2naNcbi4na.GetPointer();
5966  }
5967  } else if (from_type == eSeq_code_type_iupacaa) {
5968  if (to_type == eSeq_code_type_ncbieaa) {
5969  Map = m_IupacaaNcbieaa.GetPointer();
5970  } else if (to_type == eSeq_code_type_ncbistdaa) {
5971  Map = m_IupacaaNcbistdaa.GetPointer();
5972  }
5973  } else if (from_type == eSeq_code_type_ncbieaa) {
5974  if (to_type == eSeq_code_type_iupacaa) {
5975  Map = m_NcbieaaIupacaa.GetPointer();
5976  } else if (to_type == eSeq_code_type_ncbistdaa) {
5977  Map = m_NcbieaaNcbistdaa.GetPointer();
5978  }
5979  } else if (from_type == eSeq_code_type_ncbistdaa) {
5980  if (to_type == eSeq_code_type_iupacaa) {
5981  Map = m_NcbistdaaIupacaa.GetPointer();
5982  } else if (to_type == eSeq_code_type_ncbieaa) {
5983  Map = m_NcbistdaaNcbieaa.GetPointer();
5984  }
5985  }
5986 
5987  // Check that requested map is available
5988  if (!Map) {
5989  throw CSeqportUtil::CBadType("GetMapToIndex");
5990  }
5991 
5992  // Check that from_idx is within range of from_type
5993  if (from_idx - (*Map).m_StartAt >= (TIndex)(*Map).m_Size) {
5995  from_idx - static_cast<TIndex>((*Map).m_StartAt),
5996  "GetMapToIndex");
5997  }
5998 
5999  // Return map value
6000  return (*Map).m_Table[from_idx];
6001 
6002 
6003 }
6004 
6005 
6007 (const CSeq_data& data,
6008  const string** str,
6009  const vector<char>** vec)
6010  const
6011 {
6012  *str = 0;
6013  *vec = 0;
6014 
6015  switch ( data.Which() ) {
6016  case CSeq_data::e_Iupacna:
6017  *str = &(data.GetIupacna().Get());
6018  break;
6019 
6020  case CSeq_data::e_Ncbi2na:
6021  *vec = &(data.GetNcbi2na().Get());
6022  break;
6023 
6024  case CSeq_data::e_Ncbi4na:
6025  *vec = &(data.GetNcbi4na().Get());
6026  break;
6027 
6028  case CSeq_data::e_Ncbi8na:
6029  *vec = &(data.GetNcbi8na().Get());
6030  break;
6031 
6032  case CSeq_data::e_Iupacaa:
6033  *str = &(data.GetIupacaa().Get());
6034  break;
6035 
6036  case CSeq_data::e_Ncbi8aa:
6037  *vec = &(data.GetNcbi8aa().Get());
6038  break;
6039 
6040  case CSeq_data::e_Ncbieaa:
6041  *str = &(data.GetNcbieaa().Get());
6042  break;
6043 
6045  *vec = &(data.GetNcbistdaa().Get());
6046  break;
6047 
6048  case CSeq_data::e_not_set:
6049  case CSeq_data::e_Ncbipna:
6050  case CSeq_data::e_Ncbipaa:
6051  case CSeq_data::e_Gap:
6052  break;
6053  } // end of switch statement
6054 }
6055 
6056 
6057 // same as above, but takes a non-const CSeq_data object.
6059 (CSeq_data& data,
6060  string** str,
6061  vector<char>** vec)
6062  const
6063 {
6064  *str = 0;
6065  *vec = 0;
6066 
6067  switch ( data.Which() ) {
6068  case CSeq_data::e_Iupacna:
6069  *str = &(data.SetIupacna().Set());
6070  break;
6071 
6072  case CSeq_data::e_Ncbi2na:
6073  *vec = &(data.SetNcbi2na().Set());
6074  break;
6075 
6076  case CSeq_data::e_Ncbi4na:
6077  *vec = &(data.SetNcbi4na().Set());
6078  break;
6079 
6080  case CSeq_data::e_Ncbi8na:
6081  *vec = &(data.SetNcbi8na().Set());
6082  break;
6083 
6084  case CSeq_data::e_Iupacaa:
6085  *str = &(data.SetIupacaa().Set());
6086  break;
6087 
6088  case CSeq_data::e_Ncbi8aa:
6089  *vec = &(data.SetNcbi8aa().Set());
6090  break;
6091 
6092  case CSeq_data::e_Ncbieaa:
6093  *str = &(data.SetNcbieaa().Set());
6094  break;
6095 
6097  *vec = &(data.SetNcbistdaa().Set());
6098  break;
6099 
6100  case CSeq_data::e_not_set:
6101  case CSeq_data::e_Ncbipna:
6102  case CSeq_data::e_Ncbipaa:
6103  case CSeq_data::e_Gap:
6104  break;
6105  } // end of switch statement
6106 }
6107 
6108 
6109 /////////////////////////////////////////////////////////////////////////////
6110 // CSeqportUtil_implementation::sm_StrAsnData -- some very long and ugly string
6111 //
6112 
6113 // local copy of seqcode.prt sequence alphabet and conversion table ASN.1
6115 {
6116  "-- This is the set of NCBI sequence code tables\n",
6117  "-- J.Ostell 10/18/91\n",
6118  "--\n",
6119  "\n",
6120  "Seq-code-set ::= {\n",
6121  " codes { -- codes\n",
6122  " { -- IUPACna\n",
6123  " code iupacna ,\n",
6124  " num 25 , -- continuous 65-89\n",
6125  " one-letter TRUE , -- all one letter codes\n",
6126  " start-at 65 , -- starts with A, ASCII 65\n",
6127  " table {\n",
6128  " { symbol \"A\", name \"Adenine\" },\n",
6129  " { symbol \"B\" , name \"G or T or C\" },\n",
6130  " { symbol \"C\", name \"Cytosine\" },\n",
6131  " { symbol \"D\", name \"G or A or T\" },\n",
6132  " { symbol \"\", name \"\" },\n",
6133  " { symbol \"\", name \"\" },\n",
6134  " { symbol \"G\", name \"Guanine\" },\n",
6135  " { symbol \"H\", name \"A or C or T\" } ,\n",
6136  " { symbol \"\", name \"\" },\n",
6137  " { symbol \"\", name \"\" },\n",
6138  " { symbol \"K\", name \"G or T\" },\n",
6139  " { symbol \"\", name \"\"},\n",
6140  " { symbol \"M\", name \"A or C\" },\n",
6141  " { symbol \"N\", name \"A or G or C or T\" } ,\n",
6142  " { symbol \"\", name \"\" },\n",
6143  " { symbol \"\", name \"\" },\n",
6144  " { symbol \"\", name \"\"},\n",
6145  " { symbol \"R\", name \"G or A\"},\n",
6146  " { symbol \"S\", name \"G or C\"},\n",
6147  " { symbol \"T\", name \"Thymine\"},\n",
6148  " { symbol \"\", name \"\"},\n",
6149  " { symbol \"V\", name \"G or C or A\"},\n",
6150  " { symbol \"W\", name \"A or T\" },\n",
6151  " { symbol \"\", name \"\"},\n",
6152  " { symbol \"Y\", name \"T or C\"}\n",
6153  " } , -- end of table\n",
6154  " comps { -- complements\n",
6155  " 84,\n",
6156  " 86,\n",
6157  " 71,\n",
6158  " 72,\n",
6159  " 69,\n",
6160  " 70,\n",
6161  " 67,\n",
6162  " 68,\n",
6163  " 73,\n",
6164  " 74,\n",
6165  " 77,\n",
6166  " 76,\n",
6167  " 75,\n",
6168  " 78,\n",
6169  " 79,\n",
6170  " 80,\n",
6171  " 81,\n",
6172  " 89,\n",
6173  " 83,\n",
6174  " 65,\n",
6175  " 85,\n",
6176  " 66,\n",
6177  " 87,\n",
6178  " 88,\n",
6179  " 82\n",
6180  " }\n",
6181  " } ,\n",
6182  " { -- IUPACaa\n",
6183  " code iupacaa ,\n",
6184  " num 26 , -- continuous 65-90\n",
6185  " one-letter TRUE , -- all one letter codes\n",
6186  " start-at 65 , -- starts with A, ASCII 65\n",
6187  " table {\n",
6188  " { symbol \"A\", name \"Alanine\" },\n",
6189  " { symbol \"B\" , name \"Asp or Asn\" },\n",
6190  " { symbol \"C\", name \"Cysteine\" },\n",
6191  " { symbol \"D\", name \"Aspartic Acid\" },\n",
6192  " { symbol \"E\", name \"Glutamic Acid\" },\n",
6193  " { symbol \"F\", name \"Phenylalanine\" },\n",
6194  " { symbol \"G\", name \"Glycine\" },\n",
6195  " { symbol \"H\", name \"Histidine\" } ,\n",
6196  " { symbol \"I\", name \"Isoleucine\" },\n",
6197  " { symbol \"J\", name \"Leu or Ile\" },\n",
6198  " { symbol \"K\", name \"Lysine\" },\n",
6199  " { symbol \"L\", name \"Leucine\" },\n",
6200  " { symbol \"M\", name \"Methionine\" },\n",
6201  " { symbol \"N\", name \"Asparagine\" } ,\n",
6202  " { symbol \"O\", name \"Pyrrolysine\" },\n",
6203  " { symbol \"P\", name \"Proline\" },\n",
6204  " { symbol \"Q\", name \"Glutamine\"},\n",
6205  " { symbol \"R\", name \"Arginine\"},\n",
6206  " { symbol \"S\", name \"Serine\"},\n",
6207  " { symbol \"T\", name \"Threonine\"},\n",
6208  " { symbol \"U\", name \"Selenocysteine\"}, -- was empty\n",
6209  " { symbol \"V\", name \"Valine\"},\n",
6210  " { symbol \"W\", name \"Tryptophan\" },\n",
6211  " { symbol \"X\", name \"Undetermined or atypical\"},\n",
6212  " { symbol \"Y\", name \"Tyrosine\"},\n",
6213  " { symbol \"Z\", name \"Glu or Gln\" }\n",
6214  " } -- end of table \n",
6215  " } ,\n",
6216  " { -- IUPACeaa\n",
6217  " code ncbieaa ,\n",
6218  " num 49 , -- continuous 42-90\n",
6219  " one-letter TRUE , -- all one letter codes\n",
6220  " start-at 42 , -- starts with *, ASCII 42\n",
6221  " table {\n",
6222  " { symbol \"*\", name \"Termination\" } ,\n",
6223  " { symbol \"\", name \"\" } ,\n",
6224  " { symbol \"\", name \"\" } ,\n",
6225  " { symbol \"-\", name \"Gap\" } ,\n",
6226  " { symbol \"\", name \"\" } ,\n",
6227  " { symbol \"\", name \"\" } ,\n",
6228  " { symbol \"\", name \"\" } ,\n",
6229  " { symbol \"\", name \"\" } ,\n",
6230  " { symbol \"\", name \"\" } ,\n",
6231  " { symbol \"\", name \"\" } ,\n",
6232  " { symbol \"\", name \"\" } ,\n",
6233  " { symbol \"\", name \"\" } ,\n",
6234  " { symbol \"\", name \"\" } ,\n",
6235  " { symbol \"\", name \"\" } ,\n",
6236  " { symbol \"\", name \"\" } ,\n",
6237  " { symbol \"\", name \"\" } ,\n",
6238  " { symbol \"\", name \"\" } ,\n",
6239  " { symbol \"\", name \"\" } ,\n",
6240  " { symbol \"\", name \"\" } ,\n",
6241  " { symbol \"\", name \"\" } ,\n",
6242  " { symbol \"\", name \"\" } ,\n",
6243  " { symbol \"\", name \"\" } ,\n",
6244  " { symbol \"\", name \"\" } ,\n",
6245  " { symbol \"A\", name \"Alanine\" },\n",
6246  " { symbol \"B\" , name \"Asp or Asn\" },\n",
6247  " { symbol \"C\", name \"Cysteine\" },\n",
6248  " { symbol \"D\", name \"Aspartic Acid\" },\n",
6249  " { symbol \"E\", name \"Glutamic Acid\" },\n",
6250  " { symbol \"F\", name \"Phenylalanine\" },\n",
6251  " { symbol \"G\", name \"Glycine\" },\n",
6252  " { symbol \"H\", name \"Histidine\" } ,\n",
6253  " { symbol \"I\", name \"Isoleucine\" },\n",
6254  " { symbol \"J\", name \"Leu or Ile\" },\n",
6255  " { symbol \"K\", name \"Lysine\" },\n",
6256  " { symbol \"L\", name \"Leucine\" },\n",
6257  " { symbol \"M\", name \"Methionine\" },\n",
6258  " { symbol \"N\", name \"Asparagine\" } ,\n",
6259  " { symbol \"O\", name \"Pyrrolysine\" },\n",
6260  " { symbol \"P\", name \"Proline\" },\n",
6261  " { symbol \"Q\", name \"Glutamine\"},\n",
6262  " { symbol \"R\", name \"Arginine\"},\n",
6263  " { symbol \"S\", name \"Serine\"},\n",
6264  " { symbol \"T\", name \"Threonine\"},\n",
6265  " { symbol \"U\", name \"Selenocysteine\"},\n",
6266  " { symbol \"V\", name \"Valine\"},\n",
6267  " { symbol \"W\", name \"Tryptophan\" },\n",
6268  " { symbol \"X\", name \"Undetermined or atypical\"},\n",
6269  " { symbol \"Y\", name \"Tyrosine\"},\n",
6270  " { symbol \"Z\", name \"Glu or Gln\" }\n",
6271  " } -- end of table \n",
6272  " } ,\n",
6273  " { -- IUPACaa3\n",
6274  " code iupacaa3 ,\n",
6275  " num 28 , -- continuous 0-27\n",
6276  " one-letter FALSE , -- all 3 letter codes\n",
6277  " table {\n",
6278  " { symbol \"---\", name \"Gap\" } ,\n",
6279  " { symbol \"Ala\", name \"Alanine\" },\n",
6280  " { symbol \"Asx\" , name \"Asp or Asn\" },\n",
6281  " { symbol \"Cys\", name \"Cysteine\" },\n",
6282  " { symbol \"Asp\", name \"Aspartic Acid\" },\n",
6283  " { symbol \"Glu\", name \"Glutamic Acid\" },\n",
6284  " { symbol \"Phe\", name \"Phenylalanine\" },\n",
6285  " { symbol \"Gly\", name \"Glycine\" },\n",
6286  " { symbol \"His\", name \"Histidine\" } ,\n",
6287  " { symbol \"Ile\", name \"Isoleucine\" },\n",
6288  " { symbol \"Lys\", name \"Lysine\" },\n",
6289  " { symbol \"Leu\", name \"Leucine\" },\n",
6290  " { symbol \"Met\", name \"Methionine\" },\n",
6291  " { symbol \"Asn\", name \"Asparagine\" } ,\n",
6292  " { symbol \"Pro\", name \"Proline\" },\n",
6293  " { symbol \"Gln\", name \"Glutamine\"},\n",
6294  " { symbol \"Arg\", name \"Arginine\"},\n",
6295  " { symbol \"Ser\", name \"Serine\"},\n",
6296  " { symbol \"Thr\", name \"Threonine\"},\n",
6297  " { symbol \"Val\", name \"Valine\"},\n",
6298  " { symbol \"Trp\", name \"Tryptophan\" },\n",
6299  " { symbol \"Xxx\", name \"Undetermined or atypical\"},\n",
6300  " { symbol \"Tyr\", name \"Tyrosine\"},\n",
6301  " { symbol \"Glx\", name \"Glu or Gln\" },\n",
6302  " { symbol \"Sec\", name \"Selenocysteine\"},\n",
6303  " { symbol \"Ter\", name \"Termination\" },\n",
6304  " { symbol \"Pyl\", name \"Pyrrolysine\"},\n",
6305  " { symbol \"Xle\", name \"Leu or Ile\"}\n",
6306  " } -- end of table \n",
6307  " } ,\n",
6308  " { -- NCBIstdaa\n",
6309  " code ncbistdaa ,\n",
6310  " num 28 , -- continuous 0-27\n",
6311  " one-letter TRUE , -- all one letter codes\n",
6312  " table {\n",
6313  " { symbol \"-\", name \"Gap\" } , -- 0\n",
6314  " { symbol \"A\", name \"Alanine\" }, -- 1\n",
6315  " { symbol \"B\" , name \"Asp or Asn\" }, -- 2\n",
6316  " { symbol \"C\", name \"Cysteine\" }, -- 3\n",
6317  " { symbol \"D\", name \"Aspartic Acid\" }, -- 4\n",
6318  " { symbol \"E\", name \"Glutamic Acid\" }, -- 5\n",
6319  " { symbol \"F\", name \"Phenylalanine\" }, -- 6\n",
6320  " { symbol \"G\", name \"Glycine\" }, -- 7\n",
6321  " { symbol \"H\", name \"Histidine\" } , -- 8\n",
6322  " { symbol \"I\", name \"Isoleucine\" }, -- 9\n",
6323  " { symbol \"K\", name \"Lysine\" }, -- 10\n",
6324  " { symbol \"L\", name \"Leucine\" }, -- 11\n",
6325  " { symbol \"M\", name \"Methionine\" }, -- 12\n",
6326  " { symbol \"N\", name \"Asparagine\" } , -- 13\n",
6327  " { symbol \"P\", name \"Proline\" }, -- 14\n",
6328  " { symbol \"Q\", name \"Glutamine\"}, -- 15\n",
6329  " { symbol \"R\", name \"Arginine\"}, -- 16\n",
6330  " { symbol \"S\", name \"Serine\"}, -- 17\n",
6331  " { symbol \"T\", name \"Threoine\"}, -- 18\n",
6332  " { symbol \"V\", name \"Valine\"}, -- 19\n",
6333  " { symbol \"W\", name \"Tryptophan\" }, -- 20\n",
6334  " { symbol \"X\", name \"Undetermined or atypical\"}, -- 21\n",
6335  " { symbol \"Y\", name \"Tyrosine\"}, -- 22\n",
6336  " { symbol \"Z\", name \"Glu or Gln\" }, -- 23\n",
6337  " { symbol \"U\", name \"Selenocysteine\"}, -- 24 \n",
6338  " { symbol \"*\", name \"Termination\" }, -- 25\n",
6339  " { symbol \"O\", name \"Pyrrolysine\" }, -- 26\n",
6340  " { symbol \"J\", name \"Leu or Ile\" } -- 27\n",
6341  " } -- end of table \n",
6342  " } ,\n",
6343  " { -- NCBI2na\n",
6344  " code ncbi2na ,\n",
6345  " num 4 , -- continuous 0-3\n",
6346  " one-letter TRUE , -- all one letter codes\n",
6347  " table {\n",
6348  " { symbol \"A\", name \"Adenine\" },\n",
6349  " { symbol \"C\", name \"Cytosine\" },\n",
6350  " { symbol \"G\", name \"Guanine\" },\n",
6351  " { symbol \"T\", name \"Thymine/Uracil\"}\n",
6352  " } , -- end of table \n",
6353  " comps { -- complements\n",
6354  " 3,\n",
6355  " 2,\n",
6356  " 1,\n",
6357  " 0\n",
6358  " }\n",
6359  " } ,\n",
6360  " { -- NCBI4na\n",
6361  " code ncbi4na ,\n",
6362  " num 16 , -- continuous 0-15\n",
6363  " one-letter TRUE , -- all one letter codes\n",
6364  " table {\n",
6365  " { symbol \"-\", name \"Gap\" } ,\n",
6366  " { symbol \"A\", name \"Adenine\" },\n",
6367  " { symbol \"C\", name \"Cytosine\" },\n",
6368  " { symbol \"M\", name \"A or C\" },\n",
6369  " { symbol \"G\", name \"Guanine\" },\n",
6370  " { symbol \"R\", name \"G or A\"},\n",
6371  " { symbol \"S\", name \"G or C\"},\n",
6372  " { symbol \"V\", name \"G or C or A\"},\n",
6373  " { symbol \"T\", name \"Thymine/Uracil\"},\n",
6374  " { symbol \"W\", name \"A or T\" },\n",
6375  " { symbol \"Y\", name \"T or C\"} ,\n",
6376  " { symbol \"H\", name \"A or C or T\" } ,\n",
6377  " { symbol \"K\", name \"G or T\" },\n",
6378  " { symbol \"D\", name \"G or A or T\" },\n",
6379  " { symbol \"B\" , name \"G or T or C\" },\n",
6380  " { symbol \"N\", name \"A or G or C or T\" }\n",
6381  " } , -- end of table \n",
6382  " comps { -- complements\n",
6383  " 0 ,\n",
6384  " 8 ,\n",
6385  " 4 ,\n",
6386  " 12,\n",
6387  " 2 ,\n",
6388  " 10,\n",
6389  " 6 ,\n",
6390  " 14,\n",
6391  " 1 ,\n",
6392  " 9 ,\n",
6393  " 5 ,\n",
6394  " 13,\n",
6395  " 3 ,\n",
6396  " 11,\n",
6397  " 7 ,\n",
6398  " 15\n",
6399  " }\n",
6400  " }\n",
6401  " } , -- end of codes\n",
6402  " maps {\n",
6403  " {\n",
6404  " from iupacna ,\n",
6405  " to ncbi2na ,\n",
6406  " num 25 ,\n",
6407  " start-at 65 ,\n",
6408  " table {\n",
6409  " 0, -- A -> A\n",
6410  " 1, -- B -> C\n",
6411  " 1, -- C -> C\n",
6412  " 2, -- D -> G\n",
6413  " 255,\n",
6414  " 255,\n",
6415  " 2, -- G -> G\n",
6416  " 0, -- H -> A\n",
6417  " 255,\n",
6418  " 255,\n",
6419  " 2, -- K -> G\n",
6420  " 255,\n",
6421  " 1, -- M -> C\n",
6422  " 0, -- N -> A\n",
6423  " 255,\n",
6424  " 255,\n",
6425  " 255,\n",
6426  " 2, -- R -> G\n",
6427  " 1, -- S -> C\n",
6428  " 3, -- T -> T\n",
6429  " 255,\n",
6430  " 0, -- V -> A\n",
6431  " 3, -- W -> T\n",
6432  " 255,\n",
6433  " 3 } -- Y -> T\n",
6434  " } ,\n",
6435  " {\n",
6436  " from iupacna ,\n",
6437  " to ncbi4na ,\n",
6438  " num 26 ,\n",
6439  " start-at 64 ,\n",
6440  " table {\n",
6441  " 0, -- @ used by FastaToSeqEntry to convert hyphen to gap\n",
6442  " 1, -- A\n",
6443  " 14, -- B\n",
6444  " 2, -- C\n",
6445  " 13, -- D\n",
6446  " 255,\n",
6447  " 255,\n",
6448  " 4, -- G\n",
6449  " 11, -- H\n",
6450  " 255,\n",
6451  " 255,\n",
6452  " 12, -- K\n",
6453  " 255,\n",
6454  " 3, -- M\n",
6455  " 15, -- N\n",
6456  " 255,\n",
6457  " 255,\n",
6458  " 255,\n",
6459  " 5, -- R\n",
6460  " 6, -- S\n",
6461  " 8, -- T\n",
6462  " 255,\n",
6463  " 7, -- V\n",
6464  " 9, -- W\n",
6465  " 255,\n",
6466  " 10 } -- Y\n",
6467  " } ,\n",
6468  " {\n",
6469  " from ncbi2na ,\n",
6470  " to iupacna ,\n",
6471  " num 4 ,\n",
6472  " table {\n",
6473  " 65, -- A\n",
6474  " 67, -- C\n",
6475  " 71, -- G\n",
6476  " 84 } -- T\n",
6477  " } ,\n",
6478  " {\n",
6479  " from ncbi2na ,\n",
6480  " to ncbi4na ,\n",
6481  " num 4 ,\n",
6482  " table {\n",
6483  " 1, -- A\n",
6484  " 2, -- C\n",
6485  " 4, -- G\n",
6486  " 8 } -- T\n",
6487  " } ,\n",
6488  " {\n",
6489  " from ncbi4na ,\n",
6490  " to iupacna ,\n",
6491  " num 16 ,\n",
6492  " table {\n",
6493  " 78, -- gap -> N\n",
6494  " 65, -- A\n",
6495  " 67, -- C\n",
6496  " 77, -- M\n",
6497  " 71, -- G\n",
6498  " 82, -- R\n",
6499  " 83, -- S\n",
6500  " 86, -- V\n",
6501  " 84, -- T\n",
6502  " 87, -- W\n",
6503  " 89, -- Y\n",
6504  " 72, -- H\n",
6505  " 75, -- K\n",
6506  " 68, -- D\n",
6507  " 66, -- B\n",
6508  " 78 } -- N\n",
6509  " } ,\n",
6510  " {\n",
6511  " from ncbi4na ,\n",
6512  " to ncbi2na ,\n",
6513  " num 16 ,\n",
6514  " table {\n",
6515  " 3, -- gap -> T\n",
6516  " 0, -- A -> A\n",
6517  " 1, -- C -> C\n",
6518  " 1, -- M -> C\n",
6519  " 2, -- G -> G\n",
6520  " 2, -- R -> G\n",
6521  " 1, -- S -> C\n",
6522  " 0, -- V -> A\n",
6523  " 3, -- T -> T\n",
6524  " 3, -- W -> T\n",
6525  " 3, -- Y -> T\n",
6526  " 0, -- H -> A\n",
6527  " 2, -- K -> G\n",
6528  " 2, -- D -> G\n",
6529  " 1, -- B -> C\n",
6530  " 0 } -- N -> A\n",
6531  " } ,\n",
6532  " {\n",
6533  " from iupacaa ,\n",
6534  " to ncbieaa ,\n",
6535  " num 26 ,\n",
6536  " start-at 65 ,\n",
6537  " table {\n",
6538  " 65 , -- they map directly\n",
6539  " 66 ,\n",
6540  " 67 ,\n",
6541  " 68,\n",
6542  " 69,\n",
6543  " 70,\n",
6544  " 71,\n",
6545  " 72,\n",
6546  " 73,\n",
6547  " 74, -- J - was 255\n",
6548  " 75,\n",
6549  " 76,\n",
6550  " 77,\n",
6551  " 78,\n",
6552  " 79, -- O - was 255\n",
6553  " 80,\n",
6554  " 81,\n",
6555  " 82,\n",
6556  " 83,\n",
6557  " 84,\n",
6558  " 85, -- U - was 255\n",
6559  " 86,\n",
6560  " 87,\n",
6561  " 88,\n",
6562  " 89,\n",
6563  " 90 }\n",
6564  " } ,\n",
6565  " {\n",
6566  " from ncbieaa ,\n",
6567  " to iupacaa ,\n",
6568  " num 49 ,\n",
6569  " start-at 42 ,\n",
6570  " table {\n",
6571  " 88 , -- termination -> X\n",
6572  " 255,\n",
6573  " 255,\n",
6574  " 88, -- Gap -> X\n",
6575  " 255,\n",
6576  " 255,\n",
6577  " 255,\n",
6578  " 255,\n",
6579  " 255,\n",
6580  " 255,\n",
6581  " 255,\n",
6582  " 255,\n",
6583  " 255,\n",
6584  " 255,\n",
6585  " 255,\n",
6586  " 255,\n",
6587  " 255,\n",
6588  " 255,\n",
6589  " 255,\n",
6590  " 255,\n",
6591  " 255,\n",
6592  " 255,\n",
6593  " 255,\n",
6594  " 65 , -- from here they map directly\n",
6595  " 66 ,\n",
6596  " 67 ,\n",
6597  " 68,\n",
6598  " 69,\n",
6599  " 70,\n",
6600  " 71,\n",
6601  " 72,\n",
6602  " 73,\n",
6603  " 74, -- J - was 255\n",
6604  " 75,\n",
6605  " 76,\n",
6606  " 77,\n",
6607  " 78,\n",
6608  " 79, -- O - was 255\n",
6609  " 80,\n",
6610  " 81,\n",
6611  " 82,\n",
6612  " 83,\n",
6613  " 84,\n",
6614  " 85, -- U was -> X 88\n",
6615  " 86,\n",
6616  " 87,\n",
6617  " 88,\n",
6618  " 89,\n",
6619  " 90 }\n",
6620  " } ,\n",
6621  " {\n",
6622  " from iupacaa ,\n",
6623  " to ncbistdaa ,\n",
6624  " num 26 ,\n",
6625  " start-at 65 ,\n",
6626  " table {\n",
6627  " 1 , -- they map directly\n",
6628  " 2 ,\n",
6629  " 3 ,\n",
6630  " 4,\n",
6631  " 5,\n",
6632  " 6,\n",
6633  " 7,\n",
6634  " 8,\n",
6635  " 9,\n",
6636  " 27, -- J - was 255\n",
6637  " 10,\n",
6638  " 11,\n",
6639  " 12,\n",
6640  " 13,\n",
6641  " 26, -- O - was 255\n",
6642  " 14,\n",
6643  " 15,\n",
6644  " 16,\n",
6645  " 17,\n",
6646  " 18,\n",
6647  " 24, -- U - was 255\n",
6648  " 19,\n",
6649  " 20,\n",
6650  " 21,\n",
6651  " 22,\n",
6652  " 23 }\n",
6653  " } ,\n",
6654  " {\n",
6655  " from ncbieaa ,\n",
6656  " to ncbistdaa ,\n",
6657  " num 49 ,\n",
6658  " start-at 42 ,\n",
6659  " table {\n",
6660  " 25, -- termination\n",
6661  " 255,\n",
6662  " 255,\n",
6663  " 0, -- Gap\n",
6664  " 255,\n",
6665  " 255,\n",
6666  " 255,\n",
6667  " 255,\n",
6668  " 255,\n",
6669  " 255,\n",
6670  " 255,\n",
6671  " 255,\n",
6672  " 255,\n",
6673  " 255,\n",
6674  " 255,\n",
6675  " 255,\n",
6676  " 255,\n",
6677  " 255,\n",
6678  " 255,\n",
6679  " 255,\n",
6680  " 255,\n",
6681  " 255,\n",
6682  " 255,\n",
6683  " 1 , -- they map directly\n",
6684  " 2 ,\n",
6685  " 3 ,\n",
6686  " 4,\n",
6687  " 5,\n",
6688  " 6,\n",
6689  " 7,\n",
6690  " 8,\n",
6691  " 9,\n",
6692  " 27, -- J - was 255\n",
6693  " 10,\n",
6694  " 11,\n",
6695  " 12,\n",
6696  " 13,\n",
6697  " 26, -- O - was 255\n",
6698  " 14,\n",
6699  " 15,\n",
6700  " 16,\n",
6701  " 17,\n",
6702  " 18,\n",
6703  " 24, -- U\n",
6704  " 19,\n",
6705  " 20,\n",
6706  " 21,\n",
6707  " 22,\n",
6708  " 23 }\n",
6709  " } ,\n",
6710  " {\n",
6711  " from ncbistdaa ,\n",
6712  " to ncbieaa ,\n",
6713  " num 28 ,\n",
6714  " table {\n",
6715  " 45 , -- \"-\"\n",
6716  " 65 , -- they map directly with holes for O and J\n",
6717  " 66 ,\n",
6718  " 67 ,\n",
6719  " 68,\n",
6720  " 69,\n",
6721  " 70,\n",
6722  " 71,\n",
6723  " 72,\n",
6724  " 73,\n",
6725  " 75,\n",
6726  " 76,\n",
6727  " 77,\n",
6728  " 78,\n",
6729  " 80,\n",
6730  " 81,\n",
6731  " 82,\n",
6732  " 83,\n",
6733  " 84,\n",
6734  " 86,\n",
6735  " 87,\n",
6736  " 88,\n",
6737  " 89,\n",
6738  " 90,\n",
6739  " 85, -- U\n",
6740  " 42, -- *\n",
6741  " 79, -- O - new\n",
6742  " 74} -- J - new\n",
6743  " } ,\n",
6744  " {\n",
6745  " from ncbistdaa ,\n",
6746  " to iupacaa ,\n",
6747  " num 28 ,\n",
6748  " table {\n",
6749  " 255 , -- \"-\"\n",
6750  " 65 , -- they map directly with holes for O and J\n",
6751  " 66 ,\n",
6752  " 67 ,\n",
6753  " 68,\n",
6754  " 69,\n",
6755  " 70,\n",
6756  " 71,\n",
6757  " 72,\n",
6758  " 73,\n",
6759  " 75,\n",
6760  " 76,\n",
6761  " 77,\n",
6762  " 78,\n",
6763  " 80,\n",
6764  " 81,\n",
6765  " 82,\n",
6766  " 83,\n",
6767  " 84,\n",
6768  " 86,\n",
6769  " 87,\n",
6770  " 88,\n",
6771  " 89,\n",
6772  " 90,\n",
6773  " 85, -- U - was 88\n",
6774  " 255, -- *\n",
6775  " 79, -- O - new\n",
6776  " 74} -- J - new\n",
6777  " } \n",
6778  " }\n",
6779  "-- end of seq-code-set -- }", // make sure '}' is last symbol of ASN text
6780  0 // to indicate that there is no more data
6781 };
6782 
6783 
6784 END_objects_SCOPE
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
ncbi::TMaskedQueryRegions mask
void AddAmbiguity(char in_byte, TSeqPos &seq_pos)
char m_LastAmbChar
Last previous ambiguity character.
CAmbiguityContext(vector< Uint4 > &amb_buff, int seq_length)
vector< Uint4 > & m_vAmbBuf
Ambiguity buffer to fill.
CObject –.
Definition: ncbiobj.hpp:180
CRandom::
Definition: random_gen.hpp:66
CSafeStatic<>::
static SIZE_TYPE Pack(const string &src, TCoding src_coding, vector< char > &dst, TCoding &dst_coding, TSeqPos length=ncbi::numeric_limits< TSeqPos >::max())
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
static SIZE_TYPE Reverse(const string &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst)
static SIZE_TYPE ReverseComplement(const string &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst)
static SIZE_TYPE Complement(const string &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst)
@ e_Ncbi8na
Definition: sequtil.hpp:52
@ e_Iupacna
Definition: sequtil.hpp:47
@ e_Ncbieaa
Definition: sequtil.hpp:57
@ e_not_set
Definition: sequtil.hpp:44
@ e_Ncbi8aa
Definition: sequtil.hpp:56
@ e_Ncbi4na
Definition: sequtil.hpp:50
@ e_Ncbi2na
Definition: sequtil.hpp:48
@ e_Ncbistdaa
Definition: sequtil.hpp:58
@ e_Iupacaa
Definition: sequtil.hpp:55
CSeq_code_set –.
CWrapper_2D(size_t size1, int start1, size_t size2, int start2)
CWrapper_table(size_t size, size_t start)
const string & GetIupacaa3(TIndex ncbistdaa)
CRef< CMap_table > m_Ncbi2naNcbi4na
CRef< CFast_table2 > InitFastNcbi4naIupacna()
TSeqPos GetNcbistdaaCopy(const CSeq_data &in_seq, CSeq_data *out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos AppendNcbi4na(CSeq_data *out_seq, const CSeq_data &in_seq1, TSeqPos uBeginIdx1, TSeqPos uLength1, const CSeq_data &in_seq2, TSeqPos uBeginIdx2, TSeqPos uLength2) const
TSeqPos GetIupacnaCopy(const CSeq_data &in_seq, CSeq_data *out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos Reverse(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CRef< CCode_comp > m_Ncbi4naComplement
CRef< CCode_table > m_Ncbieaa
TSeqPos AppendIupacaa(CSeq_data *out_seq, const CSeq_data &in_seq1, TSeqPos uBeginIdx1, TSeqPos uLength1, const CSeq_data &in_seq2, TSeqPos uBeginIdx2, TSeqPos uLength2) const
CRef< CMap_table > m_IupacaaNcbistdaa
CRef< CMap_table > InitMaps(ESeq_code_type from_type, ESeq_code_type to_type)
TSeqPos KeepNcbi4na(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CRef< CCode_comp > InitIupacnaComplement()
CRef< CFast_4_1 > InitFastIupacnaNcbi2na()
CRef< CCode_comp > InitNcbi2naComplement()
TSeqPos GetNcbi2naCopy(const CSeq_data &in_seq, CSeq_data *out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos GetCopy(const CSeq_data &in_seq, CSeq_data *out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos KeepIupacna(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos ComplementIupacna(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CRef< CCode_table > m_Ncbistdaa
CRef< CMap_table > m_NcbistdaaIupacaa
CRef< CFast_2_1 > InitFastNcbi4naNcbi2na()
CRef< CAmbig_detect > InitAmbigNcbi4naNcbi2na()
TSeqPos ReverseNcbi4na(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CWrapper_table< char > CCode_table
TSeqPos KeepNcbi2na(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
bool FastValidateNcbieaa(const CSeq_data &in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos AppendNcbieaa(CSeq_data *out_seq, const CSeq_data &in_seq1, TSeqPos uBeginIdx1, TSeqPos uLength1, const CSeq_data &in_seq2, TSeqPos uBeginIdx2, TSeqPos uLength2) const
CSeqportUtil::TPair TPair
TSeqPos ReverseComplement(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CRef< CMap_table > m_IupacnaNcbi2na
vector< map< string, TIndex > > m_StringIndex
CRef< CMap_table > m_Ncbi2naIupacna
bool IsCodeAvailable(CSeq_data::E_Choice code_type)
CRef< CCode_comp > m_Ncbi2naComplement
CRef< CCode_table > m_Iupacna
TSeqPos GetNcbieaaCopy(const CSeq_data &in_seq, CSeq_data *out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos ReverseComplementNcbi4na(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CRef< CMap_table > m_NcbieaaNcbistdaa
vector< vector< TIndex > > m_IndexComplement
CWrapper_2D< unsigned char > CFast_4_1
CRef< CMap_table > m_IupacaaNcbieaa
TSeqPos GetAmbigs(const CSeq_data &in_seq, CSeq_data *out_seq, vector< TSeqPos > *out_indices, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx, TSeqPos uLength) const
const string & GetCodeOrName(CSeq_data::E_Choice code_type, TIndex idx, bool get_code)
TPair GetCodeIndexFromTo(CSeq_data::E_Choice code_type)
CRef< CSeq_code_set > m_SeqCodeSet
vector< vector< string > > m_IndexString[2]
CWrapper_2D< unsigned char > CFast_2_1
CRef< CCode_comp > InitNcbi4naComplement()
CWrapper_table< int > CMap_table
CRef< CCode_rev > InitNcbi2naRev()
CRef< CSeq_code_set > Init()
TSeqPos KeepIupacaa(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CWrapper_table< char > CCode_rev
CRef< CAmbig_detect > m_DetectAmbigNcbi4naNcbi2na
CRef< SMasksArray > m_Masks
TSeqPos ComplementNcbi4na(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
size_t Pack(CSeq_data *in_seq, TSeqPos uLength) const
bool FastValidateIupacaa(const CSeq_data &in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
void ValidateIupacna(const CSeq_data &in_seq, vector< TSeqPos > *badIdx, TSeqPos uBeginIdx, TSeqPos uLength) const
CRef< CMap_table > m_Ncbi4naNcbi2na
CRef< CCode_rev > InitNcbi4naRev()
TIndex GetIndexComplement(CSeq_data::E_Choice code_type, TIndex idx)
TSeqPos ReverseNcbi2na(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CRef< CCode_comp > m_Iupacna_complement
CSeqportUtil::TIndex TIndex
TSeqPos ReverseIupacna(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CWrapper_table< unsigned short > CFast_table2
TIndex GetMapToIndex(CSeq_data::E_Choice from_type, CSeq_data::E_Choice to_type, TIndex from_idx)
CRef< CFast_2_1 > m_FastNcbi4naNcbi2na
CRef< CCode_table > InitCodes(ESeq_code_type code_type)
CRef< CFast_4_1 > m_FastIupacnaNcbi2na
CRef< CFast_2_1 > m_FastIupacnaNcbi4na
CRef< CMap_table > m_Ncbi4naIupacna
TSeqPos Keep(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CRef< CAmbig_detect > InitAmbigIupacnaNcbi2na()
TSeqPos GetAmbigs_iupacna_ncbi2na(const CSeq_data &in_seq, CSeq_data *out_seq, vector< TSeqPos > *out_indices, TSeqPos uBeginIdx, TSeqPos uLength) const
void Validate(const CSeq_data &in_seq, vector< TSeqPos > *badIdx, TSeqPos uBeginIdx, TSeqPos uLength) const
void ValidateNcbieaa(const CSeq_data &in_seq, vector< TSeqPos > *badIdx, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos GetAmbigs_ncbi4na_ncbi2na(const CSeq_data &in_seq, CSeq_data *out_seq, vector< TSeqPos > *out_indices, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos ReverseComplementIupacna(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos Adjust(TSeqPos *uBeginIdx, TSeqPos *uLength, TSeqPos uInSeqBytes, TSeqPos uInSeqsPerByte, TSeqPos uOutSeqsPerByte) const
TIndex GetIndex(CSeq_data::E_Choice code_type, const string &code)
CRef< CMap_table > m_NcbistdaaNcbieaa
TSeqPos Complement(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CWrapper_table< string > CCode_table_str
static const char * sm_StrAsnData[]
CRef< CCode_table > m_Iupacaa
CRef< CCode_rev > m_Ncbi2naRev
TSeqPos AppendNcbi2na(CSeq_data *out_seq, const CSeq_data &in_seq1, TSeqPos uBeginIdx1, TSeqPos uLength1, const CSeq_data &in_seq2, TSeqPos uBeginIdx2, TSeqPos uLength2) const
TSeqPos AppendNcbistdaa(CSeq_data *out_seq, const CSeq_data &in_seq1, TSeqPos uBeginIdx1, TSeqPos uLength1, const CSeq_data &in_seq2, TSeqPos uBeginIdx2, TSeqPos uLength2) const
bool FastValidateNcbistdaa(const CSeq_data &in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos Append(CSeq_data *out_seq, const CSeq_data &in_seq1, TSeqPos uBeginIdx1, TSeqPos uLength1, const CSeq_data &in_seq2, TSeqPos uBeginIdx2, TSeqPos uLength2) const
CWrapper_table< unsigned int > CFast_table4
TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx, TSeqPos uLength, bool bAmbig, CRandom::TValue seed, TSeqPos total_length=0, TSeqPos *out_seq_length=0, vector< Uint4 > *blast_ambig=0) const
TSeqPos KeepNcbieaa(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CWrapper_table< unsigned char > CAmbig_detect
bool FastValidateIupacna(const CSeq_data &in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos ComplementNcbi2na(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos AppendIupacna(CSeq_data *out_seq, const CSeq_data &in_seq1, TSeqPos uBeginIdx1, TSeqPos uLength1, const CSeq_data &in_seq2, TSeqPos uBeginIdx2, TSeqPos uLength2) const
void ValidateIupacaa(const CSeq_data &in_seq, vector< TSeqPos > *badIdx, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos x_ConvertAmbig(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx, TSeqPos uLength, CRandom::TValue seed, TSeqPos total_length=0, TSeqPos *out_seq_length=0, vector< Uint4 > *blast_ambig=0) const
CRef< CMap_table > m_IupacnaNcbi4na
CRef< CFast_2_1 > InitFastIupacnaNcbi4na()
CWrapper_table< char > CCode_comp
void x_GetSeqFromSeqData(const CSeq_data &data, const string **str, const vector< char > **vec) const
bool FastValidate(const CSeq_data &in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos GetNcbi4naCopy(const CSeq_data &in_seq, CSeq_data *out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
TSeqPos MapIupacnaToNcbi2na(const CSeq_data &in_seq, CSeq_data *out_seq, TSeqPos uBeginIdx, TSeqPos uLength, bool bAmbig, CRandom::TValue seed, TSeqPos total_length, TSeqPos *out_seq_length, vector< Uint4 > *blast_ambig) const
CRef< CMap_table > m_NcbieaaIupacaa
TSeqPos ReverseComplementNcbi2na(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CRef< CAmbig_detect > m_DetectAmbigIupacnaNcbi2na
TSeqPos GetIupacaaCopy(const CSeq_data &in_seq, CSeq_data *out_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
void ValidateNcbistdaa(const CSeq_data &in_seq, vector< TSeqPos > *badIdx, TSeqPos uBeginIdx, TSeqPos uLength) const
CRef< SMasksArray > InitMasks()
TSeqPos MapNcbi4naToNcbi2na(const CSeq_data &in_seq, CSeq_data *out_seq, TSeqPos uBeginIdx, TSeqPos uLength, bool bAmbig, CRandom::TValue seed, TSeqPos total_length, TSeqPos *out_seq_length, vector< Uint4 > *blast_ambig) const
CRef< CFast_table2 > InitFastNcbi2naNcbi4na()
TSeqPos KeepNcbistdaa(CSeq_data *in_seq, TSeqPos uBeginIdx, TSeqPos uLength) const
CRef< CCode_rev > m_Ncbi4naRev
static CSeqportUtil_implementation & x_GetImplementation(void)
static TPair GetCodeIndexFromTo(CSeq_data::E_Choice code_type)
static TSeqPos Reverse(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static const string & GetName(CSeq_data::E_Choice code_type, TIndex idx)
static bool IsCodeAvailable(CSeq_data::E_Choice code_type)
static bool FastValidate(const CSeq_data &in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
unsigned int TIndex
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
static TIndex GetIndex(CSeq_data::E_Choice code_type, const string &code)
static TIndex GetIndexComplement(CSeq_data::E_Choice code_type, TIndex idx)
static TSeqPos ConvertWithBlastAmbig(const CSeq_data &in_seq, CSeq_data *out_seq, TSeqPos uBeginIdx, TSeqPos uLength, TSeqPos total_length, TSeqPos *out_seq_length, vector< Uint4 > *blast_ambig)
static TSeqPos GetAmbigs(const CSeq_data &in_seq, CSeq_data *out_seq, vector< TSeqPos > *out_indices, CSeq_data::E_Choice to_code=CSeq_data::e_Ncbi2na, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static TIndex GetMapToIndex(CSeq_data::E_Choice from_type, CSeq_data::E_Choice to_type, TIndex from_idx)
static TSeqPos GetCopy(const CSeq_data &in_seq, CSeq_data *out_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static TSeqPos Complement(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static TSeqPos Keep(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static TSeqPos ReverseComplement(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static const string & GetCode(CSeq_data::E_Choice code_type, TIndex idx)
pair< TIndex, TIndex > TPair
static void Validate(const CSeq_data &in_seq, vector< TSeqPos > *badIdx, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static const string & GetIupacaa3(TIndex ncbistdaa)
static TSeqPos Pack(CSeq_data *in_seq, TSeqPos uLength=ncbi::numeric_limits< TSeqPos >::max())
static TSeqPos Append(CSeq_data *out_seq, const CSeq_data &in_seq1, TSeqPos uBeginIdx1, TSeqPos uLength1, const CSeq_data &in_seq2, TSeqPos uBeginIdx2, TSeqPos uLength2)
container_type::const_iterator const_iterator
Definition: map.hpp:53
#define T(s)
Definition: common.h:230
static void chk(int check, const char *fmt,...)
Definition: ct_dynamic.c:49
static const char * str(char *buf, int n)
Definition: stats.c:84
char data[12]
Definition: iconv.c:80
CRange< Position > Map(const CRange< Position > &target, const CRange< Position > &range)
Definition: blast_aux.cpp:826
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
TPrim & Set(void)
Definition: serialbase.hpp:351
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
Definition: serialbase.hpp:347
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
Uint4 TValue
Type of the generated integer value and/or the seed value.
Definition: random_gen.hpp:69
TValue GetRand(void)
Get the next random number in the interval [0..GetMax()] (inclusive)
Definition: random_gen.hpp:238
void SetSeed(TValue seed)
Seed the random number generator with "seed".
Definition: random_gen.cpp:287
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
list< CRef< C_E_Table > > TTable
list< CRef< CSeq_code_table > > TCodes
const TMaps & GetMaps(void) const
Get the Maps member data.
ESeq_code_type
sequence representations
const TCodes & GetCodes(void) const
Get the Codes member data.
@ eSeq_code_type_ncbi2na
2 bit nucleic acid code
@ eSeq_code_type_ncbistdaa
consecutive codes for std aas, 0-25
@ eSeq_code_type_ncbi4na
4 bit nucleic acid code
@ eSeq_code_type_ncbieaa
extended ASCII 1 letter aa codes
@ eSeq_code_type_ncbipaa
amino acid probabilities
@ eSeq_code_type_ncbi8aa
8 bit extended amino acid codes
@ eSeq_code_type_ncbi8na
8 bit extended nucleic acid code
@ eSeq_code_type_ncbipna
nucleic acid probabilities
@ eSeq_code_type_iupacaa3
3 letter code only for display
@ eSeq_code_type_iupacaa
IUPAC 1 letter amino acid code.
@ eSeq_code_type_iupacna
IUPAC 1 letter nuc acid code.
const TIupacaa & GetIupacaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:530
const TIupacna & GetIupacna(void) const
Get the variant data.
Definition: Seq_data_.hpp:510
const TNcbipaa & GetNcbipaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:670
const TNcbi8aa & GetNcbi8aa(void) const
Get the variant data.
Definition: Seq_data_.hpp:630
TNcbieaa & SetNcbieaa(void)
Select the variant.
Definition: Seq_data_.hpp:657
E_Choice
Choice variants.
Definition: Seq_data_.hpp:102
TIupacna & SetIupacna(void)
Select the variant.
Definition: Seq_data_.hpp:517
const TNcbieaa & GetNcbieaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:650
const TNcbistdaa & GetNcbistdaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:690
TNcbistdaa & SetNcbistdaa(void)
Select the variant.
Definition: Seq_data_.hpp:697
const TNcbi4na & GetNcbi4na(void) const
Get the variant data.
Definition: Seq_data_.hpp:570
TNcbi2na & SetNcbi2na(void)
Select the variant.
Definition: Seq_data_.hpp:557
const TNcbi2na & GetNcbi2na(void) const
Get the variant data.
Definition: Seq_data_.hpp:550
TNcbi4na & SetNcbi4na(void)
Select the variant.
Definition: Seq_data_.hpp:577
TIupacaa & SetIupacaa(void)
Select the variant.
Definition: Seq_data_.hpp:537
virtual void Reset(void)
Reset the whole object.
Definition: Seq_data_.cpp:51
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_data_.hpp:475
@ e_not_set
No variant selected.
Definition: Seq_data_.hpp:103
@ e_Ncbipna
nucleic acid probabilities
Definition: Seq_data_.hpp:109
@ e_Gap
gap types
Definition: Seq_data_.hpp:114
@ e_Ncbieaa
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
@ e_Ncbistdaa
consecutive codes for std aas
Definition: Seq_data_.hpp:113
@ e_Ncbi2na
2 bit nucleic acid code
Definition: Seq_data_.hpp:106
@ e_Iupacna
IUPAC 1 letter nuc acid code.
Definition: Seq_data_.hpp:104
@ e_Ncbipaa
amino acid probabilities
Definition: Seq_data_.hpp:112
@ e_Ncbi8na
8 bit extended nucleic acid code
Definition: Seq_data_.hpp:108
@ e_Ncbi4na
4 bit nucleic acid code
Definition: Seq_data_.hpp:107
@ e_Iupacaa
IUPAC 1 letter amino acid code.
Definition: Seq_data_.hpp:105
@ e_Ncbi8aa
8 bit extended amino acid codes
Definition: Seq_data_.hpp:110
int i
const struct ncbi::grid::netcache::search::fields::SIZE size
Static variables safety - create on demand, destroy on application termination.
int ssize_t
Definition: ncbiconf_msvc.h:93
T max(T x_, T y_)
static const unsigned int kNumCodes
static const bool kName
static ESeq_code_type EChoiceToESeq(CSeq_data::E_Choice from_type)
static const char kAmbig4na[16]
static CSafeStatic< CSeqportUtil_implementation > sx_Implementation
static const bool kSymbol
static CSeqUtil::TCoding s_SeqDataToSeqUtil[]
Definition: inftrees.h:24
#define _TROUBLE
#define _ASSERT
static int seed
Definition: test_table.cpp:132
else result
Definition: token2.c:20
static bool ambig(char c)
unsigned char uch
Definition: zutil.h:39
Modified on Wed Apr 24 14:14:24 2024 by modify_doxy.py rev. 669887