NCBI C++ ToolKit
agp_seq_entry.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: agp_seq_entry.hpp 99481 2023-04-04 13:16:28Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Mike DiCuccio, Michael Kornbluh
27  *
28  * File Description:
29  * Convert an AGP file into a vector of Seq-entries
30  *
31  */
32 
33 #ifndef __OBJTOOLS_READERS_AGP_SEQ_ENTRY__HPP_
34 #define __OBJTOOLS_READERS_AGP_SEQ_ENTRY__HPP_
35 
37 
38 #include <objects/seq/Bioseq.hpp>
39 
41 
42 namespace objects {
43  class CBioseq;
44  class CSeq_entry;
45  class CSeq_id;
46  class CSeq_gap;
47 }
48 
49 /// This class is used to turn an AGP file into a vector of Seq-entry's
51 public:
52 
53  /// This is the way the results will be returned
54  /// Each Seq-entry contains just one Bioseq, built from the AGP file(s).
55  typedef vector< CRef<objects::CSeq_entry> > TSeqEntryRefVec;
56 
57  enum EFlags {
58  /// Found gaps will not be given Seq-data such as Type and Linkage
59  fSetSeqGap = (1 << 0),
60  /// All IDs will be treated as local IDs.
61  /// The default if this is NOT set is to first try to parse the ID,
62  /// and only make local if parsing fails.
63  fForceLocalId = (1 << 1)
64  };
65  typedef int TFlags;
66 
67  /// After construction, you probably want to do something like
68  /// call ReadStream and then GetResult.
69  ///
70  /// @param agp_version
71  /// What is the AGP version of the input? Default is to auto-detect AGP version,
72  /// which is likely what the user wants to do most of the time.
73  CAgpToSeqEntry(TFlags fFlags = 0,
74  EAgpVersion agp_version = eAgpVersion_auto,
75  CAgpErr* arg = nullptr);
76 
77  /// This gets the results found, but don't call before finalizing. We are intentionally
78  /// giving a non-const reference because the caller is free to
79  /// take the seq-entries inside and do whatever they like with them.
80  /// Each Seq-entry contains just one Bioseq, built from the AGP file(s).
81  TSeqEntryRefVec & GetResult(void) { return m_entries; }
82 
83  /// This is the default method used to turn strings into Seq-ids in AGP contexts.
84  ///
85  /// @sa x_GetSeqIdFromStr
86  static CRef<objects::CSeq_id> s_DefaultSeqIdFromStr( const std::string & str );
87 
88  /// Turn a string into a local Seq-id (removing "lcl|" from the beginning if needed)
89  static CRef<objects::CSeq_id> s_LocalSeqIdFromStr( const std::string & str );
90 
91 protected:
92 
94 
95  /// Builds new part of delta-seq in current bioseq, or adds bioseq
96  /// and starts building a new one.
97  virtual void OnGapOrComponent(void);
98 
99  /// Parent finalize plus making sure last m_bioseq is added.
100  virtual int Finalize(void);
101 
102  /// Our own finalization after parent's finalization.
103  void x_FinishedBioseq(void);
104 
105  /// If you must change exactly how strings are turned into Seq-ids,
106  /// you can override this in a subclass. The default
107  // is to use s_DefaultSeqIdFromStr.
108  virtual CRef<objects::CSeq_id> x_GetSeqIdFromStr( const std::string & str );
109 
110  /// Fills in out_gap_info based on current CAgpRow
111  void x_SetSeqGap( objects::CSeq_gap & out_gap_info );
112 
113  /// This is the bioseq currently being built
115  /// Holds the results
116  vector< CRef<objects::CSeq_entry> > m_entries;
117 
118 private:
119 
120  // forbid copy and assignment
122  CAgpToSeqEntry & operator = (const CAgpToSeqEntry & );
123 };
124 
126 
127 #endif // end of "include-guard"
EAgpVersion
Definition: agp_util.hpp:55
@ eAgpVersion_auto
auto-detect using the first gap line
Definition: agp_util.hpp:56
Detects scaffolds, object boundaries, errors that involve 2 consecutive lines, and is intended as a s...
Definition: agp_util.hpp:327
virtual int Finalize()
This is called at the end of the file, usually automatically but can be called manually if the automa...
Definition: agp_util.cpp:1160
virtual void OnGapOrComponent()
Definition: agp_util.hpp:445
This class is used to turn an AGP file into a vector of Seq-entry's.
const TFlags m_fFlags
vector< CRef< objects::CSeq_entry > > m_entries
Holds the results.
vector< CRef< objects::CSeq_entry > > TSeqEntryRefVec
This is the way the results will be returned Each Seq-entry contains just one Bioseq,...
TSeqEntryRefVec & GetResult(void)
This gets the results found, but don't call before finalizing.
CAgpToSeqEntry(const CAgpToSeqEntry &)
CRef< objects::CBioseq > m_bioseq
This is the bioseq currently being built.
Definition: Seq_entry.hpp:56
static const char * str(char *buf, int n)
Definition: stats.c:84
string
Definition: cgiapp.hpp:687
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NCBI_XOBJREAD_EXPORT
Definition: ncbi_export.h:1315
Modified on Wed Jul 17 13:16:59 2024 by modify_doxy.py rev. 669887