NCBI C++ ToolKit
phrap.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: phrap.cpp 99140 2023-02-17 14:32:51Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Aleksey Grichenko, NCBI.
27 *
28 * File Description:
29 * Reader for Phrap-format files.
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <util/range.hpp>
36 #include <util/rangemap.hpp>
37 #include <objects/general/Date.hpp>
41 #include <objects/seq/Bioseq.hpp>
43 #include <objects/seq/Seqdesc.hpp>
44 #include <objects/seq/Seq_inst.hpp>
45 #include <objects/seq/Seq_data.hpp>
46 #include <objects/seq/IUPACna.hpp>
61 #include <objtools/error_codes.hpp>
62 
63 #include <algorithm>
64 
65 
66 #define NCBI_USE_ERRCODE_X Objtools_Rd_Phrap
67 
70 
71 
72 // Read whole line from a stream
73 inline
75 {
76  in >> ws;
77  string ret;
78  getline(in, ret);
79  return ret;
80 }
81 
82 
83 inline
85  string err_msg)
86 {
87  if ( in.fail() ) {
88  in.clear(); // to get correct position
90  "ReadPhrap: failed to read " + err_msg,
91  in.tellg() - CT_POS_TYPE(0));
92  }
93 }
94 
95 
96 inline bool IsOldComplementedName(const string& name)
97 {
98  // In old ACE complemented reads have names ending with '.comp'
99  const string kOldNameCompFlag = ".comp";
100  return NStr::Find(name, kOldNameCompFlag, NStr::eLast) ==
101  name.size() - kOldNameCompFlag.size();
102 }
103 
104 
105 class CPhrap_Seq : public CObject
106 {
107 public:
109  CPhrap_Seq(const string& name, TPhrapReaderFlags flags);
110  virtual ~CPhrap_Seq(void) {}
111 
112  void Read(CNcbiIstream& in);
113  void ReadData(CNcbiIstream& in);
114  virtual void ReadTag(CNcbiIstream& in, char tag) = 0;
115 
117  bool IsComplemented(void) const
118  {
120  }
121 
122  // Pad position is 0-based number indicating where to insert the pad.
123  // E.g.:
124  // unpadded pos: 0 1 2 3 4 5 6 7 8 9
125  // padded pos: 0 1 - 2 3 - 4 - - 5
126  // pad value: 2 4 5 5
127  // sequence: a a * a a * a * * a
128 
129  TPhrapReaderFlags GetFlags(void) const { return m_Flags; }
131  { return (m_Flags & value) != 0; }
132 
133  const string& GetName(void) const { return m_Name; }
134  TSeqPos GetPaddedLength(void) const { return m_PaddedLength; }
136  const string& GetData(void) const { return m_Data; }
137  CRef<CSeq_id> GetId(void) const;
138 
139  TSeqPos GetPaddedPos(TSeqPos unpadded) const;
141  TSeqPos* link = 0) const;
142 
143  CRef<CBioseq> CreateBioseq(void) const;
144 
146  const TPadMap& GetPadMap(void) const { return m_PadMap; }
147 
148  TSeqPos GetAlignedFrom(void) const { return m_AlignedFrom; }
149  TSeqPos GetAlignedTo(void) const { return m_AlignedTo; }
150 
151 protected:
152  void CreateComplementedDescr(CRef<CSeq_descr>& descr) const;
153  void CreatePadsFeat(CRef<CSeq_annot>& annot) const;
154  void SetAligned(TSeqPos from, TSeqPos to)
155  {
156  m_AlignedFrom = from;
157  m_AlignedTo = to;
158  }
159 
160 private:
161  void x_FillSeqData(CSeq_data& data) const;
162 
163  friend class CPhrap_Sequence;
164  void CopyFrom(CPhrap_Seq& seq);
165 
167 
168  string m_Name;
171  string m_Data; // pads are already removed
172  TPadMap m_PadMap; // shifts for unpadded positions
177 };
178 
179 
180 const char kPadChar = '*';
181 
183  : m_Flags(flags),
184  m_PaddedLength(0),
185  m_UnpaddedLength(0),
187  m_AlignedFrom(0),
189 {
190 }
191 
192 
194  : m_Flags(flags),
195  m_Name(name),
196  m_PaddedLength(0),
197  m_UnpaddedLength(0),
199  m_AlignedFrom(0),
201 {
202 }
203 
204 
206 {
207  m_Flags = seq.m_Flags;
208  m_Name = seq.m_Name;
211  _ASSERT(m_Data.empty());
212  m_Data.swap(seq.m_Data);
214  m_PadMap.swap(seq.m_PadMap);
217  m_AlignedTo = seq.m_AlignedTo;
218  m_Id = seq.m_Id;
219 }
220 
221 
223 {
224  if ( m_Name.empty() ) {
225  in >> m_Name;
226  CheckStreamState(in, "sequence header.");
227  }
228  in >> m_PaddedLength;
229  CheckStreamState(in, "sequence header.");
230 }
231 
232 
234 {
235  _ASSERT(m_Data.empty());
236  string line;
237  TSeqPos cnt = 0;
238  if ((m_Flags & fPhrap_OldVersion) != 0) {
239  // Prepare to read as many bases as possible
241  }
242  while (!in.eof() && cnt < m_PaddedLength) {
243  // in >> line;
244  line = ReadLine(in);
245  char c = in.peek();
246  m_Data += NStr::ToUpper(line);
247  cnt += line.size();
248  if ((m_Flags & fPhrap_OldVersion) != 0 && isspace(c)) {
249  break;
250  }
251  }
252  if ((m_Flags & fPhrap_OldVersion) != 0) {
254  }
255  char next = in.eof() ? ' ' : in.peek();
256  if ( m_Data.size() != m_PaddedLength || !isspace((unsigned char) next) ) {
258  "ReadPhrap: invalid data length for " + m_Name + ".",
259  in.tellg() - CT_POS_TYPE(0));
260  }
261  TSeqPos new_pos = 0;
262  for (TSeqPos pos = 0; pos < m_PaddedLength; pos++) {
263  if (m_Data[pos] == kPadChar) {
264  m_PadMap[pos] = pos - new_pos;
265  continue;
266  }
267  m_Data[new_pos] = m_Data[pos];
268  new_pos++;
269  }
270  m_UnpaddedLength = new_pos;
271  m_Data.resize(m_UnpaddedLength);
274 }
275 
276 
277 inline
279 {
281  while (unpadded <= pad->first - pad->second) {
282  pad++;
283  _ASSERT(pad != m_PadMap.end());
284  }
285  return unpadded + pad->second;
286 }
287 
288 
289 inline
291  TSeqPos* link) const
292 {
294  while (pad != m_PadMap.end() && pad->first == padded) {
295  ++padded;
296  ++pad;
297  if (link) {
298  ++(*link);
299  }
300  }
301  if (pad == m_PadMap.end()) {
302  return kInvalidSeqPos;
303  }
304  return padded - pad->second;
305 }
306 
307 
308 inline
310 {
311  if (!m_Id) {
312  m_Id.Reset(new CSeq_id);
314  }
315  return m_Id;
316 }
317 
318 
320 {
321  CRef<CBioseq> seq(new CBioseq);
322  seq->SetId().push_back(GetId());
323  CSeq_inst& inst = seq->SetInst();
326 
327  x_FillSeqData(inst.SetSeq_data());
328 
329  return seq;
330 }
331 
332 
334 {
335  data.SetIupacna().Set(m_Data);
336  if ( IsComplemented() ) {
338  }
339  if ( FlagSet(fPhrap_PackSeqData) ) {
341  }
342 }
343 
344 
346 {
347  if ( m_Complemented ) {
348  if ( !descr ) {
349  descr.Reset(new CSeq_descr);
350  }
351  CRef<CSeqdesc> desc(new CSeqdesc);
352  if ( FlagSet(fPhrap_NoComplement) ) {
353  // Should be complemented, ignored due to options selected
354  desc->SetComment("Complemented flag ignored");
355  }
356  else {
357  // The sequence is complemented
358  desc->SetComment("Complemented");
359  }
360  descr->Set().push_back(desc);
361  }
362 }
363 
364 
366 {
367  // One pad is artificial and indicates end of sequence
368  if ( !FlagSet(fPhrap_FeatGaps) || m_PadMap.size() <= 1 ) {
369  return;
370  }
371  CRef<CSeq_feat> feat(new CSeq_feat);
372  feat->SetData().SetImp().SetKey("gap_set");
373  feat->SetComment("Gap set for " + m_Name);
374  CPacked_seqpnt& pnts = feat->SetLocation().SetPacked_pnt();
375  pnts.SetId(*GetId());
376 
377  size_t num_gaps = m_PadMap.size() - 1;
378  pnts.SetPoints().resize(num_gaps);
379  size_t i = 0;
380  ITERATE(TPadMap, pad_it, m_PadMap) {
381  if ( pad_it->first >= GetPaddedLength() ) {
382  // Skip the last artficial pad
383  break;
384  }
385  TSeqPos pos = pad_it->first - pad_it->second;
386  if ( IsComplemented() ) {
387  pnts.SetPoints()[num_gaps - i - 1] =
388  GetUnpaddedLength() - pos;
389  }
390  else {
391  pnts.SetPoints()[i] = pos;
392  }
393  i++;
394  }
395  if ( !annot ) {
396  annot.Reset(new CSeq_annot);
397  }
398  annot->SetData().SetFtable().push_back(feat);
399 }
400 
401 
402 class CPhrap_Read : public CPhrap_Seq
403 {
404 public:
406 
407  CPhrap_Read(const string& name, TPhrapReaderFlags flags);
408  virtual ~CPhrap_Read(void);
409 
410  void Read(CNcbiIstream& in);
411 
412  struct SReadDS
413  {
415  string m_PhdFile;
416  string m_Time;
417  string m_Chem;
418  string m_Dye;
419  string m_Template;
420  string m_Direction;
421  };
422 
423  struct SReadTag
424  {
425  string m_Type;
426  string m_Program;
429  string m_Date;
430  };
431  typedef vector<SReadTag> TReadTags;
434 
435  void AddReadLoc(TSignedSeqPos start, bool complemented);
436 
437  TStart GetStart(void) const { return m_Start; }
438 
439  void ReadQuality(CNcbiIstream& in); // QA
440  void ReadDS(CNcbiIstream& in); // DS
441  virtual void ReadTag(CNcbiIstream& in, char tag); // RT{}
442 
443  CRef<CSeq_entry> CreateRead(void) const;
444 
445  bool IsCircular(void) const;
446 
447 private:
448  void x_CreateFeat(CBioseq& bioseq) const;
449  void x_CreateDesc(CBioseq& bioseq) const;
450  void x_AddTagFeats(CRef<CSeq_annot>& annot) const;
451  void x_AddQualityFeat(CRef<CSeq_annot>& annot) const;
452 
459 };
460 
461 
463  : CPhrap_Seq(name, flags),
464  m_NumInfoItems(0),
465  m_NumReadTags(0),
466  m_HiQualRange(TRange::GetEmpty()),
467  m_Start(0),
468  m_DS(0)
469 {
470 }
471 
472 
474 {
475  if ( m_DS ) {
476  delete m_DS;
477  }
478 }
479 
480 
482 {
485  CheckStreamState(in, "RD data.");
486 }
487 
488 
489 bool CPhrap_Read::IsCircular(void) const
490 {
491  return m_Start + (TStart)GetAlignedFrom() < 0;
492 }
493 
494 
496 {
497  TSignedSeqPos start, stop;
498  in >> start >> stop;
499  CheckStreamState(in, "QA data.");
500  if (start > 0 && stop > 0) {
501  m_HiQualRange.Set(start - 1, stop - 1);
502  }
503  if ((GetFlags() & fPhrap_OldVersion) != 0) {
504  return;
505  }
506  in >> start >> stop;
507  CheckStreamState(in, "QA data.");
508  if (start > 0 && stop > 0) {
509  SetAligned(start - 1, stop - 1);
510  }
511 }
512 
513 
515 {
516  if ( m_DS ) {
518  "ReadPhrap: DS redifinition for " + GetName() + ".",
519  in.tellg() - CT_POS_TYPE(0));
520  }
521  m_DS = new SReadDS;
522  string tag = ReadLine(in);
523  list<string> values;
524  NStr::Split(tag, " ", values, 0);
525  bool in_time = false;
526  ITERATE(list<string>, it, values) {
527  if (*it == "CHROMAT_FILE:") {
528  m_DS->m_ChromatFile = *(++it);
529  }
530  else if (*it == "PHD_FILE:") {
531  m_DS->m_PhdFile = *(++it);
532  }
533  else if (*it == "CHEM:") {
534  m_DS->m_Chem = *(++it);
535  }
536  else if (*it == "DYE:") {
537  m_DS->m_Dye = *(++it);
538  }
539  else if (*it == "TEMPLATE:") {
540  m_DS->m_Template = *(++it);
541  }
542  else if (*it == "DIRECTION:") {
543  m_DS->m_Direction = *(++it);
544  }
545  else if (*it == "TIME:") {
546  in_time = true;
547  m_DS->m_Time = *(++it);
548  continue;
549  }
550  else {
551  if ( in_time ) {
552  m_DS->m_Time += " " + *it;
553  continue;
554  }
555  // _ASSERT("unknown value", 0);
556  }
557  in_time = false;
558  }
559 }
560 
561 
563 {
564  _ASSERT(tag == 'R');
565  SReadTag rt;
566  in >> rt.m_Type
567  >> rt.m_Program
568  >> rt.m_Start
569  >> rt.m_End
570  >> rt.m_Date
571  >> ws; // skip spaces
572  CheckStreamState(in, "RT{} data.");
573  if (in.get() != '}') {
575  "ReadPhrap: '}' expected after RT tag",
576  in.tellg() - CT_POS_TYPE(0));
577  }
578  if (rt.m_Start > 0) {
579  rt.m_Start--;
580  }
581  if( rt.m_End > 0) {
582  rt.m_End--;
583  }
584  m_Tags.push_back(rt);
585 }
586 
587 
588 inline
589 void CPhrap_Read::AddReadLoc(TSignedSeqPos start, bool complemented)
590 {
591  _ASSERT(m_Start == 0);
592  SetComplemented(complemented);
593  m_Start = start;
594 }
595 
596 
598 {
599  if ( !FlagSet(fPhrap_FeatTags) || m_Tags.empty() ) {
600  return;
601  }
602  if (m_Tags.size() != m_NumReadTags) {
604  "ReadPhrap: invalid number of RT tags for " + GetName() + ".",
605  CT_POS_TYPE(0));
606  }
607  if ( !annot ) {
608  annot.Reset(new CSeq_annot);
609  }
610  ITERATE(TReadTags, tag_it, m_Tags) {
611  const SReadTag& tag = *tag_it;
612  CRef<CSeq_feat> feat(new CSeq_feat);
613  feat->SetTitle("created " + tag.m_Date + " by " + tag.m_Program);
614  feat->SetData().SetImp().SetKey(tag.m_Type);
615  CSeq_loc& loc = feat->SetLocation();
616  loc.SetInt().SetId(*GetId());
617  TSeqPos unpadded_start = GetUnpaddedPos(tag.m_Start);
618  TSeqPos unpadded_end = GetUnpaddedPos(tag.m_End);
619  if ( IsComplemented() ) {
620  loc.SetInt().SetFrom(GetUnpaddedLength() -
621  unpadded_end - 1);
622  loc.SetInt().SetTo(GetUnpaddedLength() -
623  unpadded_start - 1);
624  loc.SetInt().SetStrand(eNa_strand_minus);
625  if ( FlagSet(fPhrap_PadsToFuzz) ) {
626  loc.SetInt().SetFuzz_from().
627  SetP_m(tag.m_End - unpadded_end);
628  loc.SetInt().SetFuzz_to().
629  SetP_m(tag.m_Start - unpadded_start);
630  }
631  }
632  else {
633  loc.SetInt().SetFrom(unpadded_start);
634  loc.SetInt().SetTo(GetUnpaddedPos(tag.m_End));
635  if ( FlagSet(fPhrap_PadsToFuzz) ) {
636  loc.SetInt().SetFuzz_from().
637  SetP_m(tag.m_Start - unpadded_start);
638  loc.SetInt().SetFuzz_to().
639  SetP_m(tag.m_End - unpadded_end);
640  }
641  }
642  annot->SetData().SetFtable().push_back(feat);
643  }
644 }
645 
646 
648 {
649  if ( !FlagSet(fPhrap_FeatQuality) ) {
650  return;
651  }
653  return;
654  }
655  if ( !annot ) {
656  annot.Reset(new CSeq_annot);
657  }
658  if ( !m_HiQualRange.Empty() ) {
659  CRef<CSeq_feat> feat(new CSeq_feat);
660  feat->SetData().SetImp().SetKey("high_quality_segment");
661  CSeq_loc& loc = feat->SetLocation();
662  loc.SetInt().SetId(*GetId());
665  if ( IsComplemented() ) {
666  loc.SetInt().SetFrom(GetUnpaddedLength() - stop - 1);
667  loc.SetInt().SetTo(GetUnpaddedLength() - start - 1);
668  loc.SetInt().SetStrand(eNa_strand_minus);
669  if ( FlagSet(fPhrap_PadsToFuzz) ) {
670  loc.SetInt().SetFuzz_from().
671  SetP_m(m_HiQualRange.GetTo() - stop);
672  loc.SetInt().SetFuzz_to().
673  SetP_m(m_HiQualRange.GetFrom() - start);
674  }
675  }
676  else {
677  loc.SetInt().SetFrom(start);
678  loc.SetInt().SetTo(stop);
679  if ( FlagSet(fPhrap_PadsToFuzz) ) {
680  loc.SetInt().SetFuzz_from().
681  SetP_m(m_HiQualRange.GetFrom() - start);
682  loc.SetInt().SetFuzz_to().
683  SetP_m(m_HiQualRange.GetTo() - stop);
684  }
685  }
686  annot->SetData().SetFtable().push_back(feat);
687  }
688  if (GetAlignedTo() != kInvalidSeqPos) {
689  CRef<CSeq_feat> feat(new CSeq_feat);
690  feat->SetData().SetImp().SetKey("aligned_segment");
691  CSeq_loc& loc = feat->SetLocation();
692  loc.SetInt().SetId(*GetId());
695  if ( IsComplemented() ) {
696  loc.SetInt().SetFrom(GetUnpaddedLength() - stop - 1);
697  loc.SetInt().SetTo(GetUnpaddedLength() - start - 1);
698  loc.SetInt().SetStrand(eNa_strand_minus);
699  if ( FlagSet(fPhrap_PadsToFuzz) ) {
700  loc.SetInt().SetFuzz_from().SetP_m(GetAlignedTo() - stop);
701  loc.SetInt().SetFuzz_to().SetP_m(GetAlignedFrom() - start);
702  }
703  }
704  else {
705  loc.SetInt().SetFrom(start);
706  loc.SetInt().SetTo(stop);
707  if ( FlagSet(fPhrap_PadsToFuzz) ) {
708  loc.SetInt().SetFuzz_from().SetP_m(GetAlignedFrom() - start);
709  loc.SetInt().SetFuzz_to().SetP_m(GetAlignedTo() - stop);
710  }
711  }
712  annot->SetData().SetFtable().push_back(feat);
713  }
714 }
715 
716 
718 {
719  CRef<CSeq_annot> annot;
720  CreatePadsFeat(annot);
721  x_AddTagFeats(annot);
722  x_AddQualityFeat(annot);
723  if ( annot ) {
724  bioseq.SetAnnot().push_back(annot);
725  }
726 }
727 
728 
730 {
731  CRef<CSeq_descr> descr;
732 
733  // Always add desc.comment = "Complemented" to indicate reversed read
735 
736  if ( FlagSet(fPhrap_Descr) && m_DS ) {
737  if ( !descr ) {
738  descr.Reset(new CSeq_descr);
739  }
740  CRef<CSeqdesc> desc;
741 
742  if ( !m_DS->m_ChromatFile.empty() ) {
743  desc.Reset(new CSeqdesc);
744  desc->SetComment("CHROMAT_FILE: " + m_DS->m_ChromatFile);
745  descr->Set().push_back(desc);
746  }
747  if ( !m_DS->m_PhdFile.empty() ) {
748  desc.Reset(new CSeqdesc);
749  desc->SetComment("PHD_FILE: " + m_DS->m_PhdFile);
750  descr->Set().push_back(desc);
751  }
752  if ( !m_DS->m_Chem.empty() ) {
753  desc.Reset(new CSeqdesc);
754  desc->SetComment("CHEM: " + m_DS->m_Chem);
755  descr->Set().push_back(desc);
756  }
757  if ( !m_DS->m_Direction.empty() ) {
758  desc.Reset(new CSeqdesc);
759  desc->SetComment("DIRECTION: " + m_DS->m_Direction);
760  descr->Set().push_back(desc);
761  }
762  if ( !m_DS->m_Dye.empty() ) {
763  desc.Reset(new CSeqdesc);
764  desc->SetComment("DYE: " + m_DS->m_Dye);
765  descr->Set().push_back(desc);
766  }
767  if ( !m_DS->m_Template.empty() ) {
768  desc.Reset(new CSeqdesc);
769  desc->SetComment("TEMPLATE: " + m_DS->m_Template);
770  descr->Set().push_back(desc);
771  }
772  if ( !m_DS->m_Time.empty() ) {
773  desc.Reset(new CSeqdesc);
774  desc->SetCreate_date().SetStr(m_DS->m_Time);
775  descr->Set().push_back(desc);
776  }
777  }
778  if ( descr && !descr->Get().empty() ) {
779  bioseq.SetDescr(*descr);
780  }
781 }
782 
783 
785 {
786  CRef<CSeq_entry> entry(new CSeq_entry);
787  CRef<CBioseq> bioseq = CreateBioseq();
788  _ASSERT(bioseq);
789  bioseq->SetInst().SetRepr(CSeq_inst::eRepr_raw);
790 
791  x_CreateDesc(*bioseq);
792  x_CreateFeat(*bioseq);
793 
794  entry->SetSeq(*bioseq);
795 
796  return entry;
797 }
798 
799 
800 class CPhrap_Contig : public CPhrap_Seq
801 {
802 public:
804  void Read(CNcbiIstream& in);
805 
806  struct SBaseSeg
807  {
808  TSeqPos m_Start; // padded start consensus position
809  TSeqPos m_End; // padded end consensus position
810  };
811 
812  struct SOligo
813  {
814  string m_Name;
815  string m_Data;
816  string m_MeltTemp;
818  };
819  struct SContigTag
820  {
821  string m_Type;
822  string m_Program;
825  string m_Date;
826  bool m_NoTrans;
827  vector<string> m_Comments;
829  };
830 
831  typedef vector<int> TBaseQuals;
832  typedef vector<SBaseSeg> TBaseSegs;
834  typedef vector<SContigTag> TContigTags;
836 
837  const TBaseQuals& GetBaseQualities(void) const { return m_BaseQuals; }
838 
839  void ReadBaseQualities(CNcbiIstream& in); // BQ
841  void ReadReadLocation(CNcbiIstream& in, TSeqs& seqs); // AF
842  void ReadBaseSegment(CNcbiIstream& in); // BS
843  virtual void ReadTag(CNcbiIstream& in, char tag); // CT{}
844 
845  CRef<CSeq_entry> CreateContig(int level) const;
846 
847  bool IsCircular(void) const;
848 
849 private:
850  void x_CreateAlign(CBioseq_set& bioseq_set) const;
851  void x_CreateGraph(CBioseq& bioseq) const;
852  void x_CreateFeat(CBioseq& bioseq) const;
853  void x_CreateDesc(CBioseq& bioseq) const;
854 
855  void x_AddBaseSegFeats(CRef<CSeq_annot>& annot) const;
856  void x_AddReadLocFeats(CRef<CSeq_annot>& annot) const;
857  void x_AddTagFeats(CRef<CSeq_annot>& annot) const;
858 
859  void x_CreateAlignPairs(CBioseq_set& bioseq_set) const;
860  void x_CreateAlignAll(CBioseq_set& bioseq_set) const;
861  void x_CreateAlignOptimized(CBioseq_set& bioseq_set) const;
862 
863  struct SAlignInfo {
865 
866  SAlignInfo(size_t idx) : m_SeqIndex(idx) {}
867 
868  size_t m_SeqIndex; // index of read (>0) or contig (0)
869  TSeqPos m_Start; // ungapped aligned start
870  };
873  typedef vector< CConstRef<CPhrap_Seq> > TAlignRows;
874 
875  bool x_AddAlignRanges(TSeqPos global_start,
876  TSeqPos global_stop,
877  const CPhrap_Seq& seq,
878  size_t seq_idx,
880  TAlignMap& aln_map,
881  TAlignStarts& aln_starts) const;
883  TAlignStarts& aln_starts,
884  TAlignRows& rows) const;
885 
886 
887  size_t m_NumReads;
888  size_t m_NumSegs;
892  mutable TReads m_Reads;
893 };
894 
895 
897  : CPhrap_Seq(flags),
898  m_NumReads(0),
899  m_NumSegs(0)
900 {
901 }
902 
903 
905 {
907  char flag;
908  in >> m_NumReads >> m_NumSegs >> flag;
909  CheckStreamState(in, "CO data.");
910  SetComplemented(flag == 'C');
911 }
912 
913 
915 {
916  TSeqPos bq;
917  for (TSeqPos i = 0; i < GetUnpaddedLength(); i++) {
918  in >> bq;
919  m_BaseQuals.push_back(bq);
920  bq = i;
921  }
922  CheckStreamState(in, "BQ data.");
923  _ASSERT( isspace((unsigned char) in.peek()) );
924 }
925 
926 
928 {
929  string name;
930  bool complemented = false;
931  TSignedSeqPos start;
932  if ((GetFlags() & fPhrap_OldVersion) == 0) {
933  char c;
934  in >> name >> c >> start;
935  CheckStreamState(in, "AF data.");
936  complemented = (c == 'C');
937  }
938  else {
939  TSignedSeqPos stop;
940  in >> name >> start >> stop;
941  CheckStreamState(in, "Assembled_from data.");
942  }
943  start--;
944  CRef<CPhrap_Read>& read = m_Reads[name];
945  if ( !read ) {
946  CRef<CPhrap_Seq>& seq = seqs[name];
947  if ( seq ) {
948  read.Reset(dynamic_cast<CPhrap_Read*>(seq.GetPointer()));
949  if ( !read ) {
951  "ReadPhrap: invalid sequence type (" + GetName() + ").",
952  in.tellg() - CT_POS_TYPE(0));
953  }
954  }
955  else {
956  read.Reset(new CPhrap_Read(name, GetFlags()));
957  seq = CRef<CPhrap_Seq>(read.GetPointer());
958  }
959  }
960  read->AddReadLoc(start, complemented);
961 }
962 
963 
965 {
966  ITERATE(TReads, read, m_Reads) {
967  if ( read->second->IsCircular() ) {
968  return true;
969  }
970  }
971  return false;
972 }
973 
974 
976 {
977  SBaseSeg seg;
978  string name;
979  in >> seg.m_Start >> seg.m_End >> name;
980  if ((GetFlags() & fPhrap_OldVersion) != 0) {
981  ReadLine(in);
982  }
983  CheckStreamState(in, "Base segment data.");
984  seg.m_Start--;
985  seg.m_End--;
986  m_BaseSegMap[name].push_back(seg);
987 }
988 
989 
991 {
992  _ASSERT(tag == 'C');
993  SContigTag ct;
994  string data = ReadLine(in);
995  list<string> fields;
996  NStr::Split(data, " ", fields,
998  list<string>::const_iterator f = fields.begin();
999 
1000  // Need some tricks to get optional NoTrans flag
1001  if (f == fields.end()) {
1003  "ReadPhrap: incomplete CT tag for " + GetName() + ".",
1004  in.tellg() - CT_POS_TYPE(0));
1005  }
1006  ct.m_Type = *f;
1007  f++;
1008  if (f == fields.end()) {
1010  "ReadPhrap: incomplete CT tag for " + GetName() + ".",
1011  in.tellg() - CT_POS_TYPE(0));
1012  }
1013  ct.m_Program = *f;
1014  f++;
1015  if (f == fields.end()) {
1017  "ReadPhrap: incomplete CT tag for " + GetName() + ".",
1018  in.tellg() - CT_POS_TYPE(0));
1019  }
1020  ct.m_Start = NStr::StringToInt(*f);
1021  if (ct.m_Start > 0) {
1022  ct.m_Start--;
1023  }
1024  f++;
1025  if (f == fields.end()) {
1027  "ReadPhrap: incomplete CT tag for " + GetName() + ".",
1028  in.tellg() - CT_POS_TYPE(0));
1029  }
1030  ct.m_End = NStr::StringToInt(*f);
1031  if (ct.m_End > 0) {
1032  ct.m_End--;
1033  }
1034  f++;
1035  if (f == fields.end()) {
1037  "ReadPhrap: incomplete CT tag for " + GetName() + ".",
1038  in.tellg() - CT_POS_TYPE(0));
1039  }
1040  ct.m_Date = *f;
1041  f++;
1042  ct.m_NoTrans = (f != fields.end() && *f == "NoTrans");
1043  in >> ws;
1044 
1045  // Read oligo tag: <oligo_name> <(stop-start+1) bases> <melting temp> <U|C>
1046  if (ct.m_Type == "oligo") {
1047  char c;
1048  in >> ct.m_Oligo.m_Name
1049  >> ct.m_Oligo.m_Data
1050  >> ct.m_Oligo.m_MeltTemp
1051  >> c
1052  >> ws;
1053  CheckStreamState(in, "CT{} oligo data.");
1054  ct.m_Oligo.m_Complemented = (c == 'C');
1055  if (ct.m_Oligo.m_Data.size() != ct.m_End - ct.m_Start + 1) {
1057  "ReadPhrap: invalid oligo data length.",
1058  in.tellg() - CT_POS_TYPE(0));
1059  }
1060  }
1061  // Read all lines untill closing '}'
1062  for (string c = ReadLine(in); c != "}"; c = ReadLine(in)) {
1063  ct.m_Comments.push_back(c);
1064  }
1065  m_Tags.push_back(ct);
1066 }
1067 
1068 
1070 {
1071  if ( m_BaseQuals.empty() ) {
1072  return;
1073  }
1074  CRef<CSeq_annot> annot(new CSeq_annot);
1075  CRef<CSeq_graph> graph(new CSeq_graph);
1076  graph->SetTitle("Phrap Quality");
1077  graph->SetLoc().SetWhole().SetLocal().SetStr(GetName());
1078  graph->SetNumval(GetUnpaddedLength());
1079  CByte_graph::TValues& values = graph->SetGraph().SetByte().SetValues();
1080  values.resize(GetUnpaddedLength());
1081  int max_val = 0;
1082  for (size_t i = 0; i < GetUnpaddedLength(); i++) {
1083  values[i] = m_BaseQuals[i];
1084  if (m_BaseQuals[i] > max_val) {
1085  max_val = m_BaseQuals[i];
1086  }
1087  }
1088  graph->SetGraph().SetByte().SetMin(0);
1089  graph->SetGraph().SetByte().SetMax(max_val);
1090  graph->SetGraph().SetByte().SetAxis(0);
1091 
1092  annot->SetData().SetGraph().push_back(graph);
1093  bioseq.SetAnnot().push_back(annot);
1094 }
1095 
1096 
1098  TSeqPos global_stop,
1099  const CPhrap_Seq& seq,
1100  size_t seq_idx,
1102  TAlignMap& aln_map,
1103  TAlignStarts& aln_starts) const
1104 {
1105  TSeqPos aln_from = seq.GetAlignedFrom();
1106  TSeqPos aln_len = seq.GetAlignedTo() - aln_from;
1107  if (global_start >= seq.GetPaddedLength() + offset + aln_from) {
1108  return false;
1109  }
1110  bool ret = false;
1111  TSeqPos pstart = max(offset + TSignedSeqPos(aln_from),
1112  TSignedSeqPos(global_start));
1113  TSeqPos ustart = seq.GetUnpaddedPos(pstart - offset, &pstart);
1114  if (ustart == kInvalidSeqPos) {
1115  return false;
1116  }
1117  const TPadMap& pads = seq.GetPadMap();
1118  SAlignInfo info(seq_idx);
1120  ITERATE(TPadMap, pad_it, pads) {
1121  TSeqPos pad = pad_it->first - pad_it->second;
1122  if (pad <= ustart) {
1123  if (ret) pstart++;
1124  continue;
1125  }
1126  if (pstart >= GetPaddedLength() || pstart >= global_stop) {
1127  break;
1128  }
1129  TSeqPos len = pad - ustart;
1130  if (len > aln_len) {
1131  len = aln_len;
1132  }
1133  if (pstart + len > global_stop) {
1134  len = global_stop - pstart;
1135  }
1136  rg.Set(pstart, pstart + len - 1);
1137  pstart += len + 1; // +1 to skip gap
1138  info.m_Start = ustart;
1139  ustart += len;
1140  aln_starts.insert(rg.GetFrom());
1141  aln_starts.insert(rg.GetToOpen());
1142  aln_map.insert(TAlignMap::value_type(rg, info));
1143  ret = true;
1144  if ( (aln_len -= len) == 0) {
1145  break;
1146  }
1147  }
1148  _ASSERT(seq.GetUnpaddedLength() >= ustart);
1149  TSeqPos len = min(aln_len, seq.GetUnpaddedLength() - ustart);
1150  if (len > 0 && pstart < global_stop) {
1151  if (pstart + len > global_stop) {
1152  len = global_stop - pstart;
1153  }
1154  rg.Set(pstart, pstart + len - 1);
1155  if (rg.GetFrom() < GetPaddedLength()) {
1156  info.m_Start = ustart;
1157  aln_starts.insert(rg.GetFrom());
1158  aln_starts.insert(rg.GetToOpen());
1159  aln_map.insert(TAlignMap::value_type(rg, info));
1160  ret = true;
1161  }
1162  }
1163  return ret;
1164 }
1165 
1166 
1168  TAlignStarts& aln_starts,
1169  TAlignRows& rows) const
1170 {
1171  CSeq_align::TDim dim = CSeq_align::TDim(rows.size());
1172  if ( dim < 2 ) {
1173  return CRef<CSeq_align>(0);
1174  }
1175  CRef<CSeq_align> align(new CSeq_align);
1177  align->SetDim(dim); // contig + one reads
1178  CDense_seg& dseg = align->SetSegs().SetDenseg();
1179  dseg.SetDim(dim);
1180  ITERATE(TAlignRows, row, rows) {
1181  dseg.SetIds().push_back((*row)->GetId());
1182  }
1183  CDense_seg::TNumseg numseg = 0;
1184  size_t data_size = 0;
1185  CDense_seg::TStarts& starts = dseg.SetStarts();
1186  CDense_seg::TStrands& strands = dseg.SetStrands();
1187  starts.resize(dim*aln_starts.size(), -1);
1188  strands.resize(starts.size(), eNa_strand_unknown);
1189  TAlignStarts::const_iterator seg_end = aln_starts.begin();
1190  ITERATE(TAlignStarts, seg_start, aln_starts) {
1191  if (*seg_start >= GetPaddedLength()) {
1192  break;
1193  }
1194  ++seg_end;
1195  TAlignMap::iterator rg_it =
1196  aln_map.begin(TAlignMap::range_type(*seg_start, *seg_start));
1197  if ( !rg_it ) {
1198  // Skip global gap
1199  continue;
1200  }
1201  _ASSERT(seg_end != aln_starts.end());
1202  size_t row_count = 0;
1203  for ( ; rg_it; ++rg_it) {
1204  row_count++;
1205  const TAlignMap::range_type& aln_rg = rg_it->first;
1206  const SAlignInfo& info = rg_it->second;
1207  size_t idx = data_size + info.m_SeqIndex;
1208  const CPhrap_Seq& seq = *rows[info.m_SeqIndex];
1209  if (seq.IsComplemented()) {
1210  starts[idx] =
1211  seq.GetUnpaddedLength() -
1212  info.m_Start + aln_rg.GetFrom() - *seg_end;
1213  //strands[idx] = eNa_strand_minus;
1214  }
1215  else {
1216  starts[idx] = info.m_Start + *seg_start - aln_rg.GetFrom();
1217  //strands[idx] = eNa_strand_plus;
1218  }
1219  }
1220  if (row_count < 2) {
1221  // Need at least 2 sequences to align
1222  continue;
1223  }
1224  for (size_t row = 0; row < size_t(dim); row++) {
1225  strands[data_size + row] = (rows[row]->IsComplemented()) ?
1227  }
1228  dseg.SetLens().push_back(*seg_end - *seg_start);
1229  numseg++;
1230  data_size += dim;
1231  }
1232  starts.resize(data_size);
1233  strands.resize(data_size);
1234  dseg.SetNumseg(numseg);
1235  return align;
1236 }
1237 
1238 
1240 {
1241  if ( m_Reads.empty() ) {
1242  return;
1243  }
1244  switch ( GetFlags() & fPhrap_Align ) {
1245  case fPhrap_AlignAll:
1246  x_CreateAlignAll(bioseq_set);
1247  break;
1248  case fPhrap_AlignPairs:
1249  x_CreateAlignPairs(bioseq_set);
1250  break;
1251  case fPhrap_AlignOptimized:
1252  x_CreateAlignOptimized(bioseq_set);
1253  break;
1254  }
1255 }
1256 
1257 
1259 {
1260  CRef<CSeq_annot> annot(new CSeq_annot);
1261 
1262  // Align unpadded contig and each unpadded read to padded contig coords
1263  TAlignMap aln_map;
1264  TAlignStarts aln_starts;
1265  TAlignRows rows;
1266  size_t dim = 0;
1267  TSeqPos global_start = 0;
1268  TSeqPos global_stop = GetPaddedLength();
1269  if ( x_AddAlignRanges(global_start, global_stop,
1270  *this, 0, 0, aln_map, aln_starts) ) {
1271  rows.push_back(CConstRef<CPhrap_Seq>(this));
1272  dim = 1;
1273  }
1274  ITERATE (TReads, rd, m_Reads) {
1275  const CPhrap_Read& read = *rd->second;
1276  TSignedSeqPos start = read.GetStart();
1277  while ( start < TSignedSeqPos(GetPaddedLength()) ) {
1278  if (x_AddAlignRanges(global_start, global_stop,
1279  read, dim, start, aln_map, aln_starts)) {
1280  dim++;
1281  rows.push_back(CConstRef<CPhrap_Seq>(&read));
1282  }
1283  start += GetPaddedLength();
1284  }
1285  }
1286  CRef<CSeq_align> align = x_CreateSeq_align(aln_map, aln_starts, rows);
1287  if ( !align ) {
1288  return;
1289  }
1290  annot->SetData().SetAlign().push_back(align);
1291  bioseq_set.SetAnnot().push_back(annot);
1292 }
1293 
1294 
1296 {
1297  // One-to one version
1298  CRef<CSeq_annot> annot(new CSeq_annot);
1299  ITERATE(TReads, rd, m_Reads) {
1300  TAlignMap aln_map;
1301  TAlignStarts aln_starts;
1302  TAlignRows rows;
1303  const CPhrap_Read& read = *rd->second;
1304 
1305  size_t dim = 1;
1306  rows.push_back(CConstRef<CPhrap_Seq>(this));
1307  // Align unpadded contig and each loc of each read to padded coords
1308 // ITERATE(CPhrap_Read::TStarts, offset, read.GetStarts()) {
1309  TSignedSeqPos start = read.GetStart();
1310  while ( start < TSignedSeqPos(GetPaddedLength()) ) {
1311  TSignedSeqPos global_start = read.GetStart() < 0 ? 0 : start;
1312  TSignedSeqPos global_stop = read.GetPaddedLength() + start;
1313  x_AddAlignRanges(global_start, global_stop,
1314  *this, 0, 0, aln_map, aln_starts);
1315  if ( x_AddAlignRanges(global_start, global_stop,
1316  read, dim, start, aln_map, aln_starts) ) {
1317  rows.push_back(CConstRef<CPhrap_Seq>(&read));
1318  dim++;
1319  }
1320  start += GetPaddedLength();
1321  }
1322  CRef<CSeq_align> align = x_CreateSeq_align(aln_map, aln_starts, rows);
1323  if ( !align ) {
1324  continue;
1325  }
1326  annot->SetData().SetAlign().push_back(align);
1327  }
1328  bioseq_set.SetAnnot().push_back(annot);
1329 }
1330 
1331 
1332 const TSeqPos kMaxSegLength = 100000;
1333 
1335 {
1336  // Optimized (diagonal) set of alignments
1337  CRef<CSeq_annot> annot(new CSeq_annot);
1338 
1339  for (TSeqPos g_start = 0; g_start < GetPaddedLength();
1340  g_start += kMaxSegLength) {
1341  TSeqPos g_stop = g_start + kMaxSegLength;
1342  TAlignMap aln_map;
1343  TAlignStarts aln_starts;
1344  TAlignRows rows;
1345  size_t dim = 0;
1346  if ( x_AddAlignRanges(g_start, g_stop,
1347  *this, 0, 0, aln_map, aln_starts) ) {
1348  rows.push_back(CConstRef<CPhrap_Seq>(this));
1349  dim = 1;
1350  }
1351  ITERATE (TReads, rd, m_Reads) {
1352  const CPhrap_Read& read = *rd->second;
1353  TSignedSeqPos start = read.GetStart();
1354  while (start < TSignedSeqPos(GetPaddedLength())) {
1355  if (x_AddAlignRanges(g_start, g_stop,
1356  read, dim, start, aln_map, aln_starts)) {
1357  dim++;
1358  rows.push_back(CConstRef<CPhrap_Seq>(&read));
1359  }
1360  start += GetPaddedLength();
1361  }
1362  }
1363  CRef<CSeq_align> align = x_CreateSeq_align(aln_map, aln_starts, rows);
1364  if ( !align ) {
1365  continue;
1366  }
1367  annot->SetData().SetAlign().push_back(align);
1368  }
1369  bioseq_set.SetAnnot().push_back(annot);
1370 }
1371 
1372 
1374 {
1376  return;
1377  }
1378  if ( !annot ) {
1379  annot.Reset(new CSeq_annot);
1380  }
1381  ITERATE(TBaseSegMap, bs_set, m_BaseSegMap) {
1382  CRef<CPhrap_Read> read = m_Reads[bs_set->first];
1383  if ( !read ) {
1385  "ReadPhrap: referenced read " + bs_set->first + " not found.",
1386  CT_POS_TYPE(0));
1387  }
1388  ITERATE(TBaseSegs, bs, bs_set->second) {
1389  TSignedSeqPos rd_start = read->GetStart();
1390  while (rd_start < TSignedSeqPos(GetPaddedLength())) {
1391  //TSignedSeqPos aln_start = rd_start + read->GetAlignedFrom();
1392  TSignedSeqPos aln_stop = rd_start + read->GetAlignedTo();
1393  if (/*TSignedSeqPos(bs->m_Start) >= aln_start &&*/
1394  TSignedSeqPos(bs->m_End) <= aln_stop) {
1395  break;
1396  }
1397  rd_start += GetPaddedLength();
1398  }
1399  _ASSERT(rd_start < TSignedSeqPos(GetPaddedLength()));
1400  TSeqPos start = bs->m_Start - rd_start;
1401  TSeqPos stop = bs->m_End - rd_start;
1402  start = read->GetUnpaddedPos(start);
1403  stop = read->GetUnpaddedPos(stop);
1404  _ASSERT(start != kInvalidSeqPos);
1405  _ASSERT(stop != kInvalidSeqPos);
1406  CRef<CSeq_feat> bs_feat(new CSeq_feat);
1407  bs_feat->SetData().SetImp().SetKey("base_segment");
1408  CSeq_loc& loc = bs_feat->SetLocation();
1409  loc.SetInt().SetId(*read->GetId());
1410  if ( read->IsComplemented() ) {
1411  loc.SetInt().SetFrom(read->GetUnpaddedLength() - stop - 1);
1412  loc.SetInt().SetTo(read->GetUnpaddedLength() - start - 1);
1413  loc.SetInt().SetStrand(eNa_strand_minus);
1414  }
1415  else {
1416  loc.SetInt().SetFrom(start);
1417  loc.SetInt().SetTo(stop);
1418  }
1419  start = GetUnpaddedPos(bs->m_Start);
1420  stop = GetUnpaddedPos(bs->m_End);
1421  _ASSERT(start != kInvalidSeqPos);
1422  _ASSERT(stop != kInvalidSeqPos);
1423  CSeq_loc& prod = bs_feat->SetProduct();
1424  prod.SetInt().SetId(*GetId());
1425  prod.SetInt().SetFrom(start);
1426  prod.SetInt().SetTo(stop);
1427  annot->SetData().SetFtable().push_back(bs_feat);
1428  }
1429  }
1430 }
1431 
1432 
1434 {
1435  if ( !FlagSet(fPhrap_FeatReadLocs) || m_Reads.empty() ) {
1436  return;
1437  }
1438  if ( !annot ) {
1439  annot.Reset(new CSeq_annot);
1440  }
1441  ITERATE(TReads, read, m_Reads) {
1442  TSignedSeqPos rd_start = read->second->GetStart() +
1443  read->second->GetAlignedFrom();
1444  while (rd_start < 0) {
1445  rd_start += GetPaddedLength();
1446  }
1447  CRef<CSeq_feat> loc_feat(new CSeq_feat);
1448  loc_feat->SetData().SetImp().SetKey("read_start");
1449  CSeq_loc& loc = loc_feat->SetLocation();
1450  TSeqPos aln_rd_start = read->second->GetUnpaddedPos(
1451  read->second->GetAlignedFrom());
1452  TSeqPos aln_rd_stop = read->second->GetUnpaddedPos(
1453  read->second->GetAlignedTo());
1454  loc.SetInt().SetId(*read->second->GetId());
1455  loc.SetInt().SetFrom(aln_rd_start);
1456  loc.SetInt().SetTo(aln_rd_stop - 1);
1457  if ( read->second->IsComplemented() ) {
1458  loc.SetInt().SetStrand(eNa_strand_minus);
1459  }
1460  if ( FlagSet(fPhrap_PadsToFuzz) ) {
1461  loc.SetInt().SetFuzz_from().
1462  SetP_m(read->second->GetAlignedFrom() - aln_rd_start);
1463  loc.SetInt().SetFuzz_to().
1464  SetP_m(read->second->GetAlignedTo() - aln_rd_stop);
1465  }
1466  CSeq_loc& prod = loc_feat->SetProduct();
1467  TSignedSeqPos rd_stop = rd_start +
1468  read->second->GetAlignedTo() - read->second->GetAlignedFrom();
1469  if (rd_stop >= TSignedSeqPos(GetPaddedLength())) {
1470  // Circular contig, split ranges
1472  GetUnpaddedPos(rd_start), GetUnpaddedLength() - 1));
1473  if ( FlagSet(fPhrap_PadsToFuzz) ) {
1474  rg1->SetFuzz_from().SetP_m(rd_start - rg1->GetFrom());
1475  rg1->SetFuzz_to().SetP_m(GetPaddedLength() - GetUnpaddedLength());
1476  }
1477  prod.SetPacked_int().Set().push_back(rg1);
1478 
1480  0, GetUnpaddedPos(rd_stop - GetPaddedLength())));
1481  if ( FlagSet(fPhrap_PadsToFuzz) ) {
1482  rg2->SetFuzz_from().SetP_m(0);
1483  rg2->SetFuzz_to().
1484  SetP_m(rd_stop - GetPaddedLength() - rg2->GetTo());
1485  }
1486  prod.SetPacked_int().Set().push_back(rg2);
1487  }
1488  else {
1489  prod.SetInt().SetId(*GetId());
1490  prod.SetInt().SetFrom(GetUnpaddedPos(rd_start));
1491  prod.SetInt().SetTo(GetUnpaddedPos(rd_stop));
1492  if ( FlagSet(fPhrap_PadsToFuzz) ) {
1493  prod.SetInt().SetFuzz_from().
1494  SetP_m(rd_start - prod.SetInt().GetFrom());
1495  prod.SetInt().SetFuzz_to().
1496  SetP_m(rd_stop - prod.SetInt().GetTo());
1497  }
1498  }
1499  annot->SetData().SetFtable().push_back(loc_feat);
1500  }
1501 }
1502 
1503 
1505 {
1506  if ( !FlagSet(fPhrap_FeatTags) || m_Tags.empty() ) {
1507  return;
1508  }
1509  if ( !annot ) {
1510  annot.Reset(new CSeq_annot);
1511  }
1512  ITERATE(TContigTags, tag_it, m_Tags) {
1513  const SContigTag& tag = *tag_it;
1514  CRef<CSeq_feat> feat(new CSeq_feat);
1515  string& title = feat->SetTitle();
1516  title = "created " + tag.m_Date + " by " + tag.m_Program;
1517  if ( tag.m_NoTrans ) {
1518  title += " (NoTrans)";
1519  }
1520  string comment;
1521  ITERATE(vector<string>, c, tag.m_Comments) {
1522  comment += (comment.empty() ? "" : " | ") + *c;
1523  }
1524  if ( !comment.empty() ) {
1525  feat->SetComment(comment);
1526  }
1527  feat->SetData().SetImp().SetKey(tag.m_Type);
1528  if ( !tag.m_Oligo.m_Name.empty() ) {
1529  feat->SetData().SetImp().SetDescr(
1530  tag.m_Oligo.m_Name + " " +
1531  tag.m_Oligo.m_Data + " " +
1532  tag.m_Oligo.m_MeltTemp + " " +
1533  (tag.m_Oligo.m_Complemented ? "C" : "U"));
1534  }
1535  CSeq_loc& loc = feat->SetLocation();
1536  loc.SetInt().SetId(*GetId());
1537  loc.SetInt().SetFrom(GetUnpaddedPos(tag.m_Start));
1538  loc.SetInt().SetTo(GetUnpaddedPos(tag.m_End));
1539  if ( FlagSet(fPhrap_PadsToFuzz) ) {
1540  loc.SetInt().SetFuzz_from().
1541  SetP_m(tag.m_Start - loc.SetInt().GetFrom());
1542  loc.SetInt().SetFuzz_to().
1543  SetP_m(tag.m_End - loc.SetInt().GetTo());
1544  }
1545  annot->SetData().SetFtable().push_back(feat);
1546  }
1547 }
1548 
1549 
1551 {
1552  CRef<CSeq_annot> annot;
1553  CreatePadsFeat(annot);
1554  x_AddReadLocFeats(annot);
1555  x_AddBaseSegFeats(annot);
1556  x_AddTagFeats(annot);
1557  if ( annot ) {
1558  bioseq.SetAnnot().push_back(annot);
1559  }
1560 }
1561 
1562 
1564 {
1565  CRef<CSeq_descr> descr;
1566  CreateComplementedDescr(descr);
1567 
1568  if ( FlagSet(fPhrap_Descr) ) {
1569  // Reserved for possible descriptors
1570  }
1571 
1572  if ( descr && !descr->Get().empty() ) {
1573  bioseq.SetDescr(*descr);
1574  }
1575 }
1576 
1577 
1579 {
1580  CRef<CSeq_entry> cont_entry(new CSeq_entry);
1581  CRef<CBioseq> bioseq = CreateBioseq();
1582  _ASSERT(bioseq);
1583  bioseq->SetInst().SetRepr(CSeq_inst::eRepr_consen);
1584  if ( IsCircular() ) {
1585  bioseq->SetInst().SetTopology(CSeq_inst::eTopology_circular);
1586  }
1587  cont_entry->SetSeq(*bioseq);
1588 
1589  x_CreateDesc(*bioseq);
1590  x_CreateGraph(*bioseq);
1591  x_CreateFeat(*bioseq);
1592 
1593  CRef<CSeq_entry> set_entry(new CSeq_entry);
1594  CBioseq_set& bioseq_set = set_entry->SetSet();
1595  bioseq_set.SetLevel(level);
1597  bioseq_set.SetSeq_set().push_back(cont_entry);
1598  x_CreateAlign(bioseq_set);
1599  ITERATE(TReads, it, m_Reads) {
1600  CRef<CSeq_entry> rd_entry = it->second->CreateRead();
1601  bioseq_set.SetSeq_set().push_back(rd_entry);
1602  }
1603  return set_entry;
1604 }
1605 
1606 
1608 {
1609 public:
1610  CPhrap_Sequence(const string& name, TPhrapReaderFlags flags);
1611  virtual void ReadTag(CNcbiIstream& in, char tag);
1612 
1613  // Convert to contig or read depending on the loaded data
1614  bool IsContig(void) const;
1616 
1617  bool IsRead(void) const;
1618  CRef<CPhrap_Read> GetRead(void);
1619  void SetRead(CPhrap_Read& read);
1620 
1621 private:
1623 };
1624 
1625 
1627  : CPhrap_Seq(name, flags),
1628  m_Seq(0)
1629 {
1630  // Check if name ends with '.comp'
1632  return;
1633 }
1634 
1635 
1637 {
1639  "ReadPhrap: unexpected tag.",
1640  in.tellg() - CT_POS_TYPE(0));
1641 }
1642 
1643 
1645 {
1646  return m_Seq &&
1647  dynamic_cast<const CPhrap_Contig*>(m_Seq.GetPointer()) != 0;
1648 }
1649 
1650 
1652 {
1653  if ( !m_Seq ) {
1655  // Copy existing data into the contig
1656  m_Seq->CopyFrom(*this);
1657  }
1658  _ASSERT( IsContig() );
1659  return Ref(&dynamic_cast<CPhrap_Contig&>(*m_Seq));
1660 }
1661 
1662 
1663 bool CPhrap_Sequence::IsRead(void) const
1664 {
1665  return m_Seq &&
1666  dynamic_cast<const CPhrap_Read*>(m_Seq.GetPointer()) != 0;
1667 }
1668 
1669 
1671 {
1672  if ( !m_Seq ) {
1673  m_Seq.Reset(new CPhrap_Read(GetName(), GetFlags()));
1674  // Copy existing data into the read
1675  m_Seq->CopyFrom(*this);
1676  }
1677  _ASSERT( IsRead() );
1678  return Ref(&dynamic_cast<CPhrap_Read&>(*m_Seq));
1679 }
1680 
1681 
1683 {
1684  _ASSERT( !m_Seq );
1685  m_Seq.Reset(CRef<CPhrap_Seq>(&read));
1686  _ASSERT(GetName() == read.GetName());
1687  // Copy sequence data, length, pad map etc.
1688  read.CopyFrom(*this);
1689 }
1690 
1691 
1693 {
1694 public:
1696  CRef<CSeq_entry> Read(void);
1697 
1698 private:
1699  enum EPhrapTag {
1700  ePhrap_not_set, // empty value for m_LastTag
1701  ePhrap_unknown, // unknown tag (error)
1702  ePhrap_eof, // end of file
1703  ePhrap_AS, // Header: <contigs in file> <reads in file>
1704  ePhrap_CO, // Contig: <name> <# bases> <# reads> <# base segments> <U or C>
1705  ePhrap_BQ, // Base qualities for the unpadded consensus bases
1706  ePhrap_AF, // Location of the read in the contig:
1707  // <read> <C or U> <padded start consensus position>
1708  ePhrap_BS, // Base segment:
1709  // <padded start position> <padded end position> <read name>
1710  ePhrap_RD, // Read:
1711  // <name> <# padded bases> <# whole read info items> <# read tags>
1712  ePhrap_QA, // Quality alignment:
1713  // <qual start> <qual end> <align start> <align end>
1714  ePhrap_DS, // Original data
1715  ePhrap_RT, // {...}
1716  ePhrap_CT, // {...}
1717  ePhrap_WA, // {...}
1718  ePhrap_WR, // WRong, tag must be ignored
1719 
1720  // Old format tags
1730  };
1731 
1732  struct SAssmTag
1733  {
1734  string m_Type;
1735  string m_Program;
1736  string m_Date;
1737  vector<string> m_Comments;
1738  };
1739  typedef vector<SAssmTag> TAssmTags;
1740 
1741  void x_ConvertContig(void);
1742  void x_ReadContig(void);
1743  void x_ReadRead(void);
1744  void x_ReadTag(const string& tag); // CT{} and RT{}
1745  void x_ReadWA(void); // WA{}
1746  void x_SkipTag(const string& tag,
1747  const string& data); // WR{}, standalone CT{} and RT{}
1748 
1749  void x_ReadOldFormatData(void); // Read old ACE format data
1753 
1754  void x_DetectFormatVersion(void);
1755  EPhrapTag x_GetTag(void);
1756  EPhrapTag x_GetNewTag(void); // read new ACE tag (AS, CO etc.)
1757  EPhrapTag x_GetOldTag(void); // read old ACE tag (Sequence, DNA etc.)
1758 
1759  void x_UngetTag(EPhrapTag tag);
1760 
1761  CPhrap_Seq* x_FindSeq(const string& name);
1762 
1763  void x_CreateDesc(CBioseq_set& bioseq) const;
1764 
1765  typedef vector< CRef<CPhrap_Contig> > TContigs;
1767 
1773  size_t m_NumReads;
1777 };
1778 
1779 
1781  : m_Stream(in),
1782  m_Flags(flags),
1783  m_LastTag(ePhrap_not_set),
1784  m_NumContigs(0),
1785  m_NumReads(0)
1786 {
1787  return;
1788 }
1789 
1790 
1792 {
1793  if ( !m_Stream ) {
1795  "ReadPhrap: input stream no longer valid",
1796  m_Stream.tellg() - CT_POS_TYPE(0));
1797  }
1799  EPhrapTag tag = x_GetTag();
1800  if ((m_Flags & fPhrap_OldVersion) == 0) {
1801  // Read new ACE format
1802  if (tag != ePhrap_AS) {
1804  "ReadPhrap: invalid data, AS tag expected.",
1805  m_Stream.tellg() - CT_POS_TYPE(0));
1806  }
1808  CheckStreamState(m_Stream, "invalid data in AS tag.");
1809  for (size_t i = 0; i < m_NumContigs; i++) {
1810  x_ReadContig();
1811  x_ConvertContig();
1812  }
1813  if (x_GetTag() != ePhrap_eof) {
1815  "ReadPhrap: unrecognized extra-data, EOF expected.",
1816  m_Stream.tellg() - CT_POS_TYPE(0));
1817  }
1818  }
1819  else {
1820  // Read old ACE format
1821  x_UngetTag(tag);
1823  }
1824  _ASSERT( m_Entry && m_Entry->IsSet() );
1826 
1827  return m_Entry;
1828 }
1829 
1830 
1832 {
1833  if ( m_Contigs.empty() ) {
1834  return;
1835  }
1836  _ASSERT(m_Contigs.size() == 1);
1837  CRef<CSeq_entry> entry = m_Contigs[0]->CreateContig(
1838  m_NumContigs > 1 ? 2 : 1);
1839  m_Contigs.clear();
1840  m_Seqs.clear();
1841  if (m_NumContigs == 1) {
1842  _ASSERT( !m_Entry );
1843  m_Entry = entry;
1844  }
1845  else {
1846  if ( !m_Entry ) {
1847  m_Entry.Reset(new CSeq_entry);
1848  CBioseq_set& bset = m_Entry->SetSet();
1849  bset.SetLevel(1);
1850  }
1851  m_Entry->SetSet().SetSeq_set().push_back(entry);
1852  }
1853 }
1854 
1855 
1857 {
1859  m_LastTag = tag;
1860 }
1861 
1862 
1864 {
1868  // Version is forced
1869  return;
1870  }
1871  m_Flags &= ~fPhrap_Version;
1872  m_Stream >> ws;
1873  if ( m_Stream.eof() ) {
1874  return;
1875  }
1877  string str_tag;
1878  m_Stream >> str_tag;
1879  if (str_tag == "AS") {
1880  tag = ePhrap_AS;
1881  }
1882  else if (str_tag == "DNA") {
1883  tag = ePhrap_DNA;
1884  }
1885  else if (str_tag == "Sequence") {
1886  tag = ePhrap_Sequence;
1887  }
1888  else if (str_tag == "BaseQuality") {
1890  }
1891  if (tag != ePhrap_not_set) {
1892  x_UngetTag(tag);
1894  return;
1895  }
1897  "ReadPhrap: Can not autodetect ACE format version.",
1898  m_Stream.tellg() - CT_POS_TYPE(0));
1899 }
1900 
1901 
1903 {
1904  if (m_LastTag != ePhrap_not_set) {
1905  EPhrapTag ret = m_LastTag;
1907  return ret;
1908  }
1909  m_Stream >> ws;
1910  if ( m_Stream.eof() ) {
1911  return ePhrap_eof;
1912  }
1913  return ((m_Flags & fPhrap_OldVersion) != 0) ?
1914  x_GetOldTag() : x_GetNewTag();
1915 }
1916 
1917 
1919 {
1920  switch (m_Stream.get()) {
1921  case 'A': // AS, AF
1922  switch (m_Stream.get()) {
1923  case 'F':
1924  return ePhrap_AF;
1925  case 'S':
1926  // No duplicate 'AS' tags
1927  if (m_NumContigs != 0) {
1929  "ReadPhrap: duplicate AS tag.",
1930  m_Stream.tellg() - CT_POS_TYPE(0));
1931  }
1932  return ePhrap_AS;
1933  }
1934  break;
1935  case 'B': // BQ, BS
1936  switch (m_Stream.get()) {
1937  case 'S':
1938  return ePhrap_BS;
1939  case 'Q':
1940  return ePhrap_BQ;
1941  }
1942  break;
1943  case 'C': // CO, CT
1944  switch (m_Stream.get()) {
1945  case 'O':
1946  return ePhrap_CO;
1947  case 'T':
1948  return ePhrap_CT;
1949  }
1950  break;
1951  case 'D': // DS
1952  if (m_Stream.get() == 'S') {
1953  return ePhrap_DS;
1954  }
1955  break;
1956  case 'Q': // QA
1957  if (m_Stream.get() == 'A') {
1958  return ePhrap_QA;
1959  }
1960  break;
1961  case 'R': // RD, RT
1962  switch (m_Stream.get()) {
1963  case 'D':
1964  return ePhrap_RD;
1965  case 'T':
1966  return ePhrap_RT;
1967  }
1968  break;
1969  case 'W': // WA
1970  switch (m_Stream.get()) {
1971  case 'A':
1972  return ePhrap_WA;
1973  case 'R':
1974  return ePhrap_WR;
1975  }
1976  break;
1977  }
1978  CheckStreamState(m_Stream, "tag.");
1979  m_Stream >> ws;
1981  "ReadPhrap: unknown tag.",
1982  m_Stream.tellg() - CT_POS_TYPE(0));
1983  return ePhrap_unknown;
1984 }
1985 
1986 
1987 
1989 {
1990  EPhrapTag tag;
1991  string str_tag;
1992  m_Stream >> str_tag;
1993  if (str_tag == "DNA") {
1994  tag = ePhrap_DNA;
1995  }
1996  else if (str_tag == "Sequence") {
1997  tag = ePhrap_Sequence;
1998  }
1999  else if (str_tag == "BaseQuality") {
2001  }
2002  else if (str_tag == "Assembled_from") {
2004  }
2005  else if (str_tag == "Assembled_from*") {
2007  }
2008  else if (str_tag == "Base_segment") {
2010  }
2011  else if (str_tag == "Base_segment*") {
2013  }
2014  else if (str_tag == "Clipping") {
2015  tag = ePhrap_Clipping;
2016  }
2017  else if (str_tag == "Clipping*") {
2019  }
2020  else {
2022  "ReadPhrap: unknown tag.",
2023  m_Stream.tellg() - CT_POS_TYPE(0));
2024  }
2025  CheckStreamState(m_Stream, "tag.");
2026  m_Stream >> ws;
2027  return tag;
2028 }
2029 
2030 
2031 
2032 inline
2034 {
2035  TSeqs::iterator seq = m_Seqs.find(name);
2036  if (seq == m_Seqs.end()) {
2037  ERR_POST_X(1, Warning <<
2038  "Referenced contig or read not found: " << name << ".");
2039  return 0;
2040  }
2041  return &*seq->second;
2042 }
2043 
2044 
2045 void CPhrapReader::x_ReadTag(const string& tag)
2046 {
2047  m_Stream >> ws;
2048  if (m_Stream.get() != '{') {
2050  "ReadPhrap: '{' expected after " + tag + " tag.",
2051  m_Stream.tellg() - CT_POS_TYPE(0));
2052  }
2053  string name;
2054  m_Stream >> name;
2055  CheckStreamState(m_Stream, tag + "{} data.");
2056  CPhrap_Seq* seq = x_FindSeq(name);
2057  if ( seq ) {
2058  seq->ReadTag(m_Stream, tag[0]);
2059  }
2060  else {
2061  x_SkipTag(tag, "{\n" + name + " ");
2062  }
2063 }
2064 
2065 
2067 {
2068  m_Stream >> ws;
2069  if (m_Stream.get() != '{') {
2071  "ReadPhrap: '{' expected after WA tag.",
2072  m_Stream.tellg() - CT_POS_TYPE(0));
2073  }
2074  SAssmTag wt;
2075  m_Stream
2076  >> wt.m_Type
2077  >> wt.m_Program
2078  >> wt.m_Date
2079  >> ws;
2080  CheckStreamState(m_Stream, "WA{} data.");
2081  // Read all lines untill closing '}'
2082  for (string c = NStr::TruncateSpaces(ReadLine(m_Stream));
2083  c != "}"; c = NStr::TruncateSpaces(ReadLine(m_Stream))) {
2084  wt.m_Comments.push_back(c);
2085  }
2086  m_AssmTags.push_back(wt);
2087 }
2088 
2089 
2090 void CPhrapReader::x_SkipTag(const string& tag, const string& data)
2091 {
2092  m_Stream >> ws;
2093  string content = data;
2094  for (string c = NStr::TruncateSpaces(ReadLine(m_Stream));
2095  c != "}"; c = NStr::TruncateSpaces(ReadLine(m_Stream))) {
2096  content += c + "\n";
2097  }
2098  content += "}";
2099  CheckStreamState(m_Stream, tag + "{} data.");
2100  ERR_POST_X(2, Warning << "Skipping tag:\n" << tag << content);
2101  m_Stream >> ws;
2102 }
2103 
2104 
2106 {
2107  EPhrapTag tag = x_GetTag();
2108  if (tag != ePhrap_CO) {
2110  "ReadPhrap: invalid data, contig tag expected.",
2111  m_Stream.tellg() - CT_POS_TYPE(0));
2112  }
2114  contig->Read(m_Stream);
2115  contig->ReadData(m_Stream);
2116  m_Contigs.push_back(contig);
2117  m_Seqs[contig->GetName()] = contig;
2118  for (tag = x_GetTag(); tag != ePhrap_eof; tag = x_GetTag()) {
2119  switch ( tag ) {
2120  case ePhrap_BQ:
2121  contig->ReadBaseQualities(m_Stream);
2122  continue;
2123  case ePhrap_AF:
2124  contig->ReadReadLocation(m_Stream, m_Seqs);
2125  continue;
2126  case ePhrap_BS:
2127  contig->ReadBaseSegment(m_Stream);
2128  continue;
2129  case ePhrap_eof:
2130  return;
2131  default:
2132  x_UngetTag(tag);
2133  }
2134  break;
2135  }
2136  // Read to the next contig or eof:
2137  while ((tag = x_GetTag()) != ePhrap_eof) {
2138  switch ( tag ) {
2139  case ePhrap_RD:
2140  x_ReadRead();
2141  continue;
2142  case ePhrap_RT:
2143  x_ReadTag("RT");
2144  continue;
2145  case ePhrap_CT:
2146  x_ReadTag("CT");
2147  continue;
2148  case ePhrap_WA:
2149  x_ReadWA();
2150  continue;
2151  case ePhrap_WR:
2152  x_SkipTag("WR", kEmptyStr);
2153  continue;
2154  case ePhrap_eof:
2155  return;
2156  default:
2157  x_UngetTag(tag);
2158  }
2159  break;
2160  }
2161 }
2162 
2163 
2165 {
2166  string read_name;
2167  m_Stream >> read_name;
2168  CRef<CPhrap_Read> read;
2169  {{
2170  CRef<CPhrap_Seq> seq = m_Seqs[read_name];
2171  if ( !seq ) {
2172  read.Reset(new CPhrap_Read(read_name, m_Flags));
2173  m_Seqs[read_name].Reset(read.GetPointer());
2174  }
2175  else {
2176  read.Reset(dynamic_cast<CPhrap_Read*>(seq.GetPointer()));
2177  }
2178  }}
2179  _ASSERT( read );
2180  read->Read(m_Stream);
2181  read->ReadData(m_Stream);
2182  m_Seqs[read->GetName()] = read;
2183  for (EPhrapTag tag = x_GetTag(); tag != ePhrap_eof; tag = x_GetTag()) {
2184  switch ( tag ) {
2185  case ePhrap_QA:
2186  read->ReadQuality(m_Stream);
2187  break;
2188  case ePhrap_DS:
2189  read->ReadDS(m_Stream);
2190  break;
2191  case ePhrap_eof:
2192  return;
2193  default:
2194  x_UngetTag(tag);
2195  return;
2196  }
2197  }
2198 }
2199 
2200 
2202 {
2203  if ( seq.IsRead() ) {
2205  "ReadPhrap: sequence type redifinition for " +
2206  seq.GetName() + " - was 'read'.",
2207  m_Stream.tellg() - CT_POS_TYPE(0));
2208  }
2209  // If have a loaded contig, convert it first
2210  x_ConvertContig();
2211  // Contig can not be already registered
2212  CRef<CPhrap_Contig> contig = seq.GetContig();
2213  m_Contigs.push_back(contig);
2214  m_Seqs[contig->GetName()] = CRef<CPhrap_Seq>(contig.GetPointer());
2215  _ASSERT(contig);
2216  return contig;
2217 }
2218 
2219 
2221 {
2222  if ( seq.IsContig() ) {
2224  "ReadPhrap: sequence type redifinition for " +
2225  seq.GetName() + " - was 'contig'.",
2226  m_Stream.tellg() - CT_POS_TYPE(0));
2227  }
2228  CRef<CPhrap_Read> read;
2229  TSeqs::iterator it = m_Seqs.find(seq.GetName());
2230  if ( it != m_Seqs.end() ) {
2231  // Read is already registered
2232  read.Reset(dynamic_cast<CPhrap_Read*>(it->second.GetPointer()));
2233  if ( !read ) {
2235  "ReadPhrap: sequence type redifinition for " +
2236  seq.GetName() + " - was 'contig'.",
2237  m_Stream.tellg() - CT_POS_TYPE(0));
2238  }
2239  seq.SetRead(*read);
2240  }
2241  else {
2242  read = seq.GetRead();
2243  m_Seqs[read->GetName()] = CRef<CPhrap_Seq>(read.GetPointer());
2244  }
2245  _ASSERT(read);
2246  return read;
2247 }
2248 
2249 
2251 {
2252  typedef map<string, CRef<CPhrap_Sequence> > TSequences;
2253  TSequences seqs;
2255  for (EPhrapTag tag = x_GetTag(); tag != ePhrap_eof; tag = x_GetTag()) {
2256  string seq_name;
2257  m_Stream >> seq_name;
2258  // Check if we have a new sequence
2259  if ( !seq || seq->GetName() != seq_name ) {
2260  TSequences::iterator seq_it = seqs.find(seq_name);
2261  if (seq_it != seqs.end()) {
2262  seq = seq_it->second;
2263  }
2264  else {
2265  seq.Reset(new CPhrap_Sequence(seq_name, m_Flags));
2266  seqs[seq_name] = seq;
2267  }
2268  }
2269  switch ( tag ) {
2270  case ePhrap_DNA:
2271  seq->ReadData(m_Stream);
2272  break;
2273  case ePhrap_Sequence:
2274  x_ReadOldSequence(*seq);
2275  break;
2276  case ePhrap_BaseQuality:
2277  // BaseQuality tag is defined only for contigs
2278  x_AddContig(*seq)->ReadBaseQualities(m_Stream);
2279  break;
2280  case ePhrap_eof:
2281  continue;
2282  default:
2284  "ReadPhrap: unexpected tag.",
2285  m_Stream.tellg() - CT_POS_TYPE(0));
2286  }
2287  }
2288  x_ConvertContig();
2289 }
2290 
2291 
2293 {
2294  CRef<CPhrap_Contig> contig;
2295  if ( seq.IsContig() ) {
2296  contig = seq.GetContig();
2297  }
2298  CRef<CPhrap_Read> read;
2299  if ( seq.IsRead() ) {
2300  read = seq.GetRead();
2301  }
2302  for (EPhrapTag tag = x_GetTag(); tag != ePhrap_eof; tag = x_GetTag()) {
2303  // Assembled_from[*] name start stop
2304  // Base_segment[*] c_start c_stop name r_start r_stop
2305  // Clipping[*] start stop
2306  switch ( tag ) {
2307  case ePhrap_Assembled_from:
2308  case ePhrap_Base_segment:
2309  case ePhrap_Clipping:
2310  // Ignore unpadded coordinates, use only padded versions
2311  ReadLine(m_Stream);
2312  continue;
2314  if ( !contig ) {
2315  contig = x_AddContig(seq);
2316  }
2317  contig->ReadReadLocation(m_Stream, m_Seqs);
2318  break;
2320  if ( !contig ) {
2321  contig = x_AddContig(seq);
2322  }
2323  contig->ReadBaseSegment(m_Stream);
2324  break;
2325  case ePhrap_Clipping_Pad:
2326  if ( !read ) {
2327  read = x_AddRead(seq);
2328  }
2329  read->ReadQuality(m_Stream);
2330  break;
2331  case ePhrap_DNA:
2332  case ePhrap_Sequence:
2333  case ePhrap_BaseQuality:
2334  // Unget tag and return
2335  x_UngetTag(tag);
2336  case ePhrap_eof:
2337  return;
2338  default:
2340  "ReadPhrap: unexpected tag.",
2341  m_Stream.tellg() - CT_POS_TYPE(0));
2342  }
2343  if ( read && contig ) {
2345  "ReadPhrap: sequence type redifinition.",
2346  m_Stream.tellg() - CT_POS_TYPE(0));
2347  }
2348  }
2349 }
2350 
2351 
2353 {
2354  if ( ( (m_Flags & fPhrap_Descr) == 0) || m_AssmTags.empty() ) {
2355  return;
2356  }
2357  CRef<CSeq_descr> descr(new CSeq_descr);
2358  CRef<CSeqdesc> desc;
2359 
2361  desc.Reset(new CSeqdesc);
2362  string comment;
2363  ITERATE(vector<string>, c, tag->m_Comments) {
2364  comment += " | " + *c;
2365  }
2366  desc->SetComment(
2367  tag->m_Type + " " +
2368  tag->m_Program + " " +
2369  tag->m_Date +
2370  comment);
2371  descr->Set().push_back(desc);
2372  }
2373  bioseq_set.SetDescr(*descr);
2374 }
2375 
2376 
2378 {
2379  CPhrapReader reader(in, flags);
2380  return reader.Read();
2381 }
2382 
2383 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static void pad(Char *s)
Definition: bzip2.c:908
CConstRef –.
Definition: ncbiobj.hpp:1266
CObject –.
Definition: ncbiobj.hpp:180
void x_ReadWA(void)
Definition: phrap.cpp:2066
EPhrapTag x_GetOldTag(void)
Definition: phrap.cpp:1988
map< string, CRef< CPhrap_Seq > > TSeqs
Definition: phrap.cpp:1766
vector< CRef< CPhrap_Contig > > TContigs
Definition: phrap.cpp:1765
void x_ReadOldFormatData(void)
Definition: phrap.cpp:2250
CRef< CSeq_entry > m_Entry
Definition: phrap.cpp:1771
CRef< CPhrap_Contig > x_AddContig(CPhrap_Sequence &seq)
Definition: phrap.cpp:2201
void x_SkipTag(const string &tag, const string &data)
Definition: phrap.cpp:2090
EPhrapTag x_GetNewTag(void)
Definition: phrap.cpp:1918
EPhrapTag m_LastTag
Definition: phrap.cpp:1770
CRef< CPhrap_Read > x_AddRead(CPhrap_Sequence &seq)
Definition: phrap.cpp:2220
vector< SAssmTag > TAssmTags
Definition: phrap.cpp:1739
EPhrapTag x_GetTag(void)
Definition: phrap.cpp:1902
void x_ConvertContig(void)
Definition: phrap.cpp:1831
TContigs m_Contigs
Definition: phrap.cpp:1774
TPhrapReaderFlags m_Flags
Definition: phrap.cpp:1769
size_t m_NumContigs
Definition: phrap.cpp:1772
void x_CreateDesc(CBioseq_set &bioseq) const
Definition: phrap.cpp:2352
void x_ReadRead(void)
Definition: phrap.cpp:2164
TAssmTags m_AssmTags
Definition: phrap.cpp:1776
void x_ReadTag(const string &tag)
Definition: phrap.cpp:2045
void x_ReadContig(void)
Definition: phrap.cpp:2105
TSeqs m_Seqs
Definition: phrap.cpp:1775
void x_ReadOldSequence(CPhrap_Sequence &seq)
Definition: phrap.cpp:2292
CPhrapReader(CNcbiIstream &in, TPhrapReaderFlags flags)
Definition: phrap.cpp:1780
CPhrap_Seq * x_FindSeq(const string &name)
Definition: phrap.cpp:2033
void x_UngetTag(EPhrapTag tag)
Definition: phrap.cpp:1856
CRef< CSeq_entry > Read(void)
Definition: phrap.cpp:1791
CNcbiIstream & m_Stream
Definition: phrap.cpp:1768
size_t m_NumReads
Definition: phrap.cpp:1773
@ ePhrap_BaseQuality
Definition: phrap.cpp:1723
@ ePhrap_Base_segment
Definition: phrap.cpp:1726
@ ePhrap_Base_segment_Pad
Definition: phrap.cpp:1727
@ ePhrap_not_set
Definition: phrap.cpp:1700
@ ePhrap_Clipping_Pad
Definition: phrap.cpp:1729
@ ePhrap_Clipping
Definition: phrap.cpp:1728
@ ePhrap_Assembled_from
Definition: phrap.cpp:1724
@ ePhrap_unknown
Definition: phrap.cpp:1701
@ ePhrap_Sequence
Definition: phrap.cpp:1722
@ ePhrap_Assembled_from_Pad
Definition: phrap.cpp:1725
void x_DetectFormatVersion(void)
Definition: phrap.cpp:1863
CRef< CSeq_align > x_CreateSeq_align(TAlignMap &aln_map, TAlignStarts &aln_starts, TAlignRows &rows) const
Definition: phrap.cpp:1167
vector< SContigTag > TContigTags
Definition: phrap.cpp:834
const TBaseQuals & GetBaseQualities(void) const
Definition: phrap.cpp:837
CPhrap_Read::TReads TReads
Definition: phrap.cpp:835
vector< SBaseSeg > TBaseSegs
Definition: phrap.cpp:832
void x_CreateGraph(CBioseq &bioseq) const
Definition: phrap.cpp:1069
void x_CreateDesc(CBioseq &bioseq) const
Definition: phrap.cpp:1563
map< string, TBaseSegs > TBaseSegMap
Definition: phrap.cpp:833
map< string, CRef< CPhrap_Seq > > TSeqs
Definition: phrap.cpp:840
void ReadReadLocation(CNcbiIstream &in, TSeqs &seqs)
Definition: phrap.cpp:927
bool IsCircular(void) const
Definition: phrap.cpp:964
void x_AddBaseSegFeats(CRef< CSeq_annot > &annot) const
Definition: phrap.cpp:1373
TBaseSegMap m_BaseSegMap
Definition: phrap.cpp:890
size_t m_NumReads
Definition: phrap.cpp:887
vector< CConstRef< CPhrap_Seq > > TAlignRows
Definition: phrap.cpp:873
TBaseQuals m_BaseQuals
Definition: phrap.cpp:889
void x_CreateAlignAll(CBioseq_set &bioseq_set) const
Definition: phrap.cpp:1258
size_t m_NumSegs
Definition: phrap.cpp:888
CRef< CSeq_entry > CreateContig(int level) const
Definition: phrap.cpp:1578
void x_CreateAlign(CBioseq_set &bioseq_set) const
Definition: phrap.cpp:1239
TContigTags m_Tags
Definition: phrap.cpp:891
void x_CreateAlignPairs(CBioseq_set &bioseq_set) const
Definition: phrap.cpp:1295
bool x_AddAlignRanges(TSeqPos global_start, TSeqPos global_stop, const CPhrap_Seq &seq, size_t seq_idx, TSignedSeqPos offset, TAlignMap &aln_map, TAlignStarts &aln_starts) const
Definition: phrap.cpp:1097
void ReadBaseSegment(CNcbiIstream &in)
Definition: phrap.cpp:975
set< TSeqPos > TAlignStarts
Definition: phrap.cpp:872
void x_AddTagFeats(CRef< CSeq_annot > &annot) const
Definition: phrap.cpp:1504
void x_AddReadLocFeats(CRef< CSeq_annot > &annot) const
Definition: phrap.cpp:1433
void Read(CNcbiIstream &in)
Definition: phrap.cpp:904
TReads m_Reads
Definition: phrap.cpp:892
void ReadBaseQualities(CNcbiIstream &in)
Definition: phrap.cpp:914
void x_CreateFeat(CBioseq &bioseq) const
Definition: phrap.cpp:1550
virtual void ReadTag(CNcbiIstream &in, char tag)
Definition: phrap.cpp:990
CPhrap_Contig(TPhrapReaderFlags flags)
Definition: phrap.cpp:896
void x_CreateAlignOptimized(CBioseq_set &bioseq_set) const
Definition: phrap.cpp:1334
vector< int > TBaseQuals
Definition: phrap.cpp:831
CRangeMultimap< SAlignInfo, TSeqPos > TAlignMap
Definition: phrap.cpp:871
SReadDS * m_DS
Definition: phrap.cpp:457
void x_AddTagFeats(CRef< CSeq_annot > &annot) const
Definition: phrap.cpp:597
TStart m_Start
Definition: phrap.cpp:456
void x_CreateFeat(CBioseq &bioseq) const
Definition: phrap.cpp:717
CPhrap_Read(const string &name, TPhrapReaderFlags flags)
Definition: phrap.cpp:462
map< string, CRef< CPhrap_Read > > TReads
Definition: phrap.cpp:405
void x_AddQualityFeat(CRef< CSeq_annot > &annot) const
Definition: phrap.cpp:647
bool IsCircular(void) const
Definition: phrap.cpp:489
void x_CreateDesc(CBioseq &bioseq) const
Definition: phrap.cpp:729
TReadTags m_Tags
Definition: phrap.cpp:458
void ReadQuality(CNcbiIstream &in)
Definition: phrap.cpp:495
CRange< TSignedSeqPos > TRange
Definition: phrap.cpp:433
CRef< CSeq_entry > CreateRead(void) const
Definition: phrap.cpp:784
void AddReadLoc(TSignedSeqPos start, bool complemented)
Definition: phrap.cpp:589
size_t m_NumInfoItems
Definition: phrap.cpp:453
void ReadDS(CNcbiIstream &in)
Definition: phrap.cpp:514
virtual void ReadTag(CNcbiIstream &in, char tag)
Definition: phrap.cpp:562
virtual ~CPhrap_Read(void)
Definition: phrap.cpp:473
TSignedSeqPos TStart
Definition: phrap.cpp:432
vector< SReadTag > TReadTags
Definition: phrap.cpp:431
void Read(CNcbiIstream &in)
Definition: phrap.cpp:481
size_t m_NumReadTags
Definition: phrap.cpp:454
TRange m_HiQualRange
Definition: phrap.cpp:455
TStart GetStart(void) const
Definition: phrap.cpp:437
void CreateComplementedDescr(CRef< CSeq_descr > &descr) const
Definition: phrap.cpp:345
TSeqPos m_AlignedTo
Definition: phrap.cpp:175
string m_Name
Definition: phrap.cpp:168
void Read(CNcbiIstream &in)
Definition: phrap.cpp:222
CRef< CBioseq > CreateBioseq(void) const
Definition: phrap.cpp:319
void SetAligned(TSeqPos from, TSeqPos to)
Definition: phrap.cpp:154
void x_FillSeqData(CSeq_data &data) const
Definition: phrap.cpp:333
const TPadMap & GetPadMap(void) const
Definition: phrap.cpp:146
const string & GetName(void) const
Definition: phrap.cpp:133
void ReadData(CNcbiIstream &in)
Definition: phrap.cpp:233
TPhrapReaderFlags m_Flags
Definition: phrap.cpp:166
TSeqPos m_UnpaddedLength
Definition: phrap.cpp:170
void CreatePadsFeat(CRef< CSeq_annot > &annot) const
Definition: phrap.cpp:365
TPhrapReaderFlags GetFlags(void) const
Definition: phrap.cpp:129
TSeqPos GetUnpaddedLength(void) const
Definition: phrap.cpp:135
bool IsComplemented(void) const
Definition: phrap.cpp:117
TSeqPos GetAlignedTo(void) const
Definition: phrap.cpp:149
TSeqPos GetAlignedFrom(void) const
Definition: phrap.cpp:148
void CopyFrom(CPhrap_Seq &seq)
Definition: phrap.cpp:205
map< TSeqPos, TSeqPos > TPadMap
Definition: phrap.cpp:145
friend class CPhrap_Sequence
Definition: phrap.cpp:163
CRef< CSeq_id > GetId(void) const
Definition: phrap.cpp:309
bool m_Complemented
Definition: phrap.cpp:173
TSeqPos m_PaddedLength
Definition: phrap.cpp:169
TSeqPos GetPaddedLength(void) const
Definition: phrap.cpp:134
TSeqPos GetPaddedPos(TSeqPos unpadded) const
Definition: phrap.cpp:278
bool FlagSet(EPhrapReaderFlags value) const
Definition: phrap.cpp:130
CPhrap_Seq(TPhrapReaderFlags flags)
Definition: phrap.cpp:182
string m_Data
Definition: phrap.cpp:171
virtual void ReadTag(CNcbiIstream &in, char tag)=0
virtual ~CPhrap_Seq(void)
Definition: phrap.cpp:110
CRef< CSeq_id > m_Id
Definition: phrap.cpp:176
const string & GetData(void) const
Definition: phrap.cpp:136
TPadMap m_PadMap
Definition: phrap.cpp:172
TSeqPos GetUnpaddedPos(TSeqPos padded, TSeqPos *link=0) const
Definition: phrap.cpp:290
TSeqPos m_AlignedFrom
Definition: phrap.cpp:174
void SetComplemented(bool value)
Definition: phrap.cpp:116
CRef< CPhrap_Contig > GetContig(void)
Definition: phrap.cpp:1651
virtual void ReadTag(CNcbiIstream &in, char tag)
Definition: phrap.cpp:1636
void SetRead(CPhrap_Read &read)
Definition: phrap.cpp:1682
bool IsContig(void) const
Definition: phrap.cpp:1644
CRef< CPhrap_Read > GetRead(void)
Definition: phrap.cpp:1670
CRef< CPhrap_Seq > m_Seq
Definition: phrap.cpp:1622
bool IsRead(void) const
Definition: phrap.cpp:1663
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
Definition: Seq_entry.hpp:56
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
static TSeqPos ReverseComplement(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static TSeqPos Pack(CSeq_data *in_seq, TSeqPos uLength=ncbi::numeric_limits< TSeqPos >::max())
size_type size() const
Definition: map.hpp:148
const_iterator end() const
Definition: map.hpp:152
const_iterator lower_bound(const key_type &key) const
Definition: map.hpp:154
bool empty() const
Definition: map.hpp:149
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
void swap(this_type &m)
Definition: map.hpp:118
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
size_type size() const
Definition: set.hpp:132
const_iterator end() const
Definition: set.hpp:136
parent_type::const_iterator const_iterator
Definition: set.hpp:79
static uch flags
#define false
Definition: bool.h:36
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
int offset
Definition: replacements.h:160
char data[12]
Definition: iconv.c:80
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
Definition: ncbiexpt.hpp:1754
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TParent::value_type value_type
Definition: rangemap.hpp:611
iterator insert(const value_type &value)
Definition: rangemap.hpp:629
const_iterator begin(void) const
Definition: rangemap.hpp:451
TThisType & Set(position_type from, position_type to)
Definition: range.hpp:188
bool Empty(void) const
Definition: range.hpp:148
TParent::range_type range_type
Definition: rangemap.hpp:609
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define CT_POS_TYPE
Definition: ncbistre.hpp:730
#define kEmptyStr
Definition: ncbistr.hpp:123
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3186
@ fSplit_Truncate
Definition: ncbistr.hpp:2501
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
Definition: ncbistr.hpp:2498
@ eLast
Last occurrence.
Definition: ncbistr.hpp:1941
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
TStr & SetStr(void)
Select the variant.
Definition: Date_.hpp:313
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
vector< ENa_strand > TStrands
Definition: Dense_seg_.hpp:109
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Seq_align_.hpp:865
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:427
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
TStarts & SetStarts(void)
Assign a value to Starts data member.
Definition: Dense_seg_.hpp:536
TStrands & SetStrands(void)
Assign a value to Strands data member.
Definition: Dense_seg_.hpp:586
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:474
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
@ eType_partial
mapping pieces together
Definition: Seq_align_.hpp:103
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
void SetComment(const TComment &value)
Assign a value to Comment data member.
Definition: Seq_feat_.hpp:1058
void SetProduct(TProduct &value)
Assign a value to Product data member.
Definition: Seq_feat_.cpp:110
void SetTitle(const TTitle &value)
Assign a value to Title data member.
Definition: Seq_feat_.hpp:1181
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
TFrom GetFrom(void) const
Get the From member data.
void SetId(TId &value)
Assign a value to Id data member.
TPoints & SetPoints(void)
Assign a value to Points data member.
void SetFuzz_to(TFuzz_to &value)
Assign a value to Fuzz_to data member.
void SetFuzz_from(TFuzz_from &value)
Assign a value to Fuzz_from data member.
TLocal & SetLocal(void)
Select the variant.
Definition: Seq_id_.cpp:199
TTo GetTo(void) const
Get the To member data.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
void SetTitle(const TTitle &value)
Assign a value to Title data member.
Definition: Seq_graph_.hpp:784
vector< char > TValues
Definition: Byte_graph_.hpp:89
void SetNumval(TNumval value)
Assign a value to Numval data member.
void SetGraph(TGraph &value)
Assign a value to Graph data member.
Definition: Seq_graph_.cpp:250
void SetLoc(TLoc &value)
Assign a value to Loc data member.
Definition: Seq_graph_.cpp:224
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
void SetLevel(TLevel value)
Assign a value to Level data member.
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
void SetClass(TClass value)
Assign a value to Class data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_conset
constructed sequence + parts
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:372
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
TComment & SetComment(void)
Select the variant.
Definition: Seqdesc_.hpp:1065
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
Tdata & Set(void)
Assign a value to data member.
Definition: Seq_descr_.hpp:172
void SetLength(TLength value)
Assign a value to Length data member.
Definition: Seq_inst_.hpp:668
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
Definition: Seq_inst_.cpp:130
TCreate_date & SetCreate_date(void)
Select the variant.
Definition: Seqdesc_.cpp:478
void SetMol(TMol value)
Assign a value to Mol data member.
Definition: Seq_inst_.hpp:621
@ eRepr_consen
consensus sequence or pattern
Definition: Seq_inst_.hpp:98
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
Definition of all error codes used in objtools libraries.
int i
int len
static MDB_envinfo info
Definition: mdb_load.c:37
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
const char * tag
int isspace(Uchar c)
Definition: ncbictype.hpp:69
T max(T x_, T y_)
T min(T x_, T y_)
std::istream & in(std::istream &in_, double &x_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static unsigned cnt[256]
const TSeqPos kMaxSegLength
Definition: phrap.cpp:1332
string ReadLine(CNcbiIstream &in)
Definition: phrap.cpp:74
void CheckStreamState(CNcbiIstream &in, string err_msg)
Definition: phrap.cpp:84
const char kPadChar
Definition: phrap.cpp:180
CRef< CSeq_entry > ReadPhrap(CNcbiIstream &in, TPhrapReaderFlags flags)
Definition: phrap.cpp:2377
bool IsOldComplementedName(const string &name)
Definition: phrap.cpp:96
int TPhrapReaderFlags
Definition: phrap.hpp:78
EPhrapReaderFlags
Definition: phrap.hpp:47
@ fPhrap_AlignAll
global all-in-one alignment
Definition: phrap.hpp:60
@ fPhrap_FeatBaseSegs
add features with base segments
Definition: phrap.hpp:54
@ fPhrap_AlignPairs
separate alignment for each trace
Definition: phrap.hpp:61
@ fPhrap_NoComplement
ignore "complemented" flags of traces.
Definition: phrap.hpp:48
@ fPhrap_FeatGaps
add features with list of gaps
Definition: phrap.hpp:53
@ fPhrap_NewVersion
force new ACE format
Definition: phrap.hpp:65
@ fPhrap_Descr
add descriptors (DS, WA)
Definition: phrap.hpp:59
@ fPhrap_Version
Definition: phrap.hpp:66
@ fPhrap_PadsToFuzz
Add int-fuzz.p-m to indicate padded coordinates offset.
Definition: phrap.hpp:67
@ fPhrap_FeatReadLocs
add padded read starts
Definition: phrap.hpp:55
@ fPhrap_FeatQuality
add quality/alignment features
Definition: phrap.hpp:57
@ fPhrap_AlignOptimized
split global alignment into parts
Definition: phrap.hpp:62
@ fPhrap_OldVersion
force old ACE format
Definition: phrap.hpp:64
@ fPhrap_FeatTags
convert CT and RT tags to features
Definition: phrap.hpp:56
@ fPhrap_PackSeqData
use best coding to pack sequence data
Definition: phrap.hpp:51
@ fPhrap_Align
mask for alignment flags, not a value
Definition: phrap.hpp:63
#define row(bind, expected)
Definition: string_bind.c:73
vector< string > m_Comments
Definition: phrap.cpp:1737
CRange< TSeqPos > TRange
Definition: phrap.cpp:864
SAlignInfo(size_t idx)
Definition: phrap.cpp:866
vector< string > m_Comments
Definition: phrap.cpp:827
string m_ChromatFile
Definition: phrap.cpp:414
#define _ASSERT
Modified on Wed Apr 17 13:09:37 2024 by modify_doxy.py rev. 669887