NCBI C++ ToolKit
cobalt_app_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cobalt_app_util.cpp 71954 2016-04-07 17:48:29Z boratyng $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25 
26 /*****************************************************************************
27 
28 File name: cobalt_app_util.cpp
29 
30 Author: Jason Papadopoulos
31 
32 Contents: Utility functions for COBALT command line applications
33 
34 ******************************************************************************/
35 
36 #include <ncbi_pch.hpp>
37 #include <corelib/ncbiapp.hpp>
40 #include <serial/iterator.hpp>
42 
43 #include "cobalt_app_util.hpp"
44 
46 BEGIN_SCOPE(cobalt);
47 
48 
49 /// Modification of the CFastaReader class that allows for reading a single
50 /// sequence as a degenarate multiple sequence alignment
52 {
53 public:
54  /// Constructor
56  : CFastaReader(reader, flags) {}
57 
58  /// Read alignment
59  /// Modified parent class function that allows reading one sequence as
60  /// aligned set
63  CRef<CSeq_entry> entry = x_ReadSeqsToAlign(ids, pMessageListener);
64  CRef<CSeq_annot> annot(new CSeq_annot);
65 
66  x_AddMultiwayAlignment(*annot, ids);
67  entry->SetSet().SetAnnot().push_back(annot);
68 
69  return entry;
70  }
71 };
72 
74  vector< CRef<objects::CSeq_loc> >& seqs,
75  CRef<objects::CScope>& scope,
76  objects::CFastaReader::TFlags flags)
77 {
78  seqs.clear();
79 
80  // read one query at a time, and use a separate seq_entry,
81  // scope, and lowercase mask for each query. This lets different
82  // query sequences have the same ID. Later code will distinguish
83  // between queries by using different elements of retval[]
84 
85  CStreamLineReader line_reader(instream);
86  CFastaReader fasta_reader(line_reader, flags);
87 
88  while (!line_reader.AtEOF()) {
89 
90  CRef<CSeq_entry> entry = fasta_reader.ReadOneSeq();
91 
92  if (entry == 0) {
94  "Could not retrieve seq entry");
95  }
96  scope->AddTopLevelSeqEntry(*entry);
98  CRef<CSeq_loc> seqloc(new CSeq_loc());
99  seqloc->SetWhole().Assign(*itr->GetId().front());
100  seqs.push_back(seqloc);
101  }
102 }
103 
104 
106  CRef<objects::CScope>& scope,
107  objects::CFastaReader::TFlags flags,
108  objects::CSeqIdGenerator& id_generator)
109 {
110  // read all sequences as a multiple sequence alignment and put
111  // the alignment in a single seq_entry
112 
113  CStreamLineReader line_reader(instream);
114  CFastaReaderEx fasta_reader(line_reader, flags);
115  fasta_reader.SetIDGenerator(id_generator);
116 
117  CRef<CSeq_entry> entry = fasta_reader.ReadAlignedSet();
118 
119  if (entry == 0) {
120  NCBI_THROW(CObjReaderException, eInvalid,
121  "Could not retrieve seq entry");
122  }
123  scope->AddTopLevelSeqEntry(*entry);
124 
125  // notify of a problem if the whole file was not read
126  if (!line_reader.AtEOF()) {
127  NCBI_THROW(CObjReaderException, eInvalid, "Problem reading MSA");
128  }
129 
131  = entry->GetAnnot().front()->GetData().GetAlign().front();
132 
133  return align;
134 }
135 
136 
137 END_SCOPE(cobalt);
Data loader implementation that uses the blast databases.
Modification of the CFastaReader class that allows for reading a single sequence as a degenarate mult...
CRef< CSeq_entry > ReadAlignedSet(ILineErrorListener *pMessageListener=0)
Read alignment Modified parent class function that allows reading one sequence as aligned set.
CFastaReaderEx(ILineReader &reader, CFastaReader::TFlags flags=0)
Constructor.
Base class for reading FASTA sequences.
Definition: fasta.hpp:80
const TAnnot & GetAnnot(void) const
Definition: Seq_entry.cpp:179
Simple implementation of ILineReader for i(o)streams.
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
void GetSeqLocFromStream(CNcbiIstream &instream, vector< CRef< objects::CSeq_loc > > &seqs, CRef< objects::CScope > &scope, objects::CFastaReader::TFlags flags)
Reads fasta sequences from stream, adds them to scope, and returns them as the list of Seq_locs.
CRef< objects::CSeq_align > GetAlignmentFromStream(CNcbiIstream &instream, CRef< objects::CScope > &scope, objects::CFastaReader::TFlags flags, objects::CSeqIdGenerator &id_generator)
Reads fasta sequences as multiple sequence alignment.
static uch flags
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual CRef< CSeq_entry > ReadOneSeq(ILineErrorListener *pMessageListener=nullptr)
Read a single effective sequence, which may turn out to be a segmented set.
Definition: fasta.cpp:312
CSeqIdGenerator & SetIDGenerator(void)
Definition: fasta.hpp:174
long TFlags
binary OR of EFlags
Definition: fasta.hpp:117
CRef< CSeq_entry > x_ReadSeqsToAlign(TIds &ids, ILineErrorListener *pMessageListener)
Definition: fasta.cpp:1708
void x_AddMultiwayAlignment(CSeq_annot &annot, const TIds &ids)
Definition: fasta.cpp:1805
vector< CRef< CSeq_id > > TIds
Definition: fasta.hpp:276
bool AtEOF(void) const
Indicates (negatively) whether there is any more input.
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:1012
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
The Object manager core.
Modified on Wed Apr 17 13:08:56 2024 by modify_doxy.py rev. 669887