NCBI C++ ToolKit
|
Search Toolkit Book for CAlnReader
class CAlnReader supports importing a large variety of text-based alignment formats into standard data structures. More...
#include <objtools/readers/aln_reader.hpp>
Classes | |
class | CAlnErrorContainer |
Public Types | |
enum | EAlphabet { eAlpha_Default , eAlpha_Nucleotide , eAlpha_Protein , eAlpha_Dna , eAlpha_Rna , eAlpha_Dna_no_ambiguity , eAlpha_Rna_no_ambiguity } |
enum | EReadFlags { fReadDefaults = 0 , fGenerateLocalIDs = 1 } |
Read the file This are the main functions. More... | |
typedef CAlnErrorContainer | TErrorList |
using | TLineInfo = objects::SLineInfo |
using | FValidateIds = function< void(const list< CRef< objects::CSeq_id > > &, int, objects::CAlnErrorReporter *)> |
using | FIdValidate = function< void(const objects::CSeq_id &, int, objects::CAlnErrorReporter *)> |
typedef int | TReadFlags |
binary OR of EReadFlags More... | |
using | TFastaFlags = objects::CFastaDeflineReader::TFastaFlags |
Public Member Functions | |
CAlnReader (CNcbiIstream &is, FValidateIds fIdValidate=nullptr) | |
CAlnReader (CNcbiIstream &is, FIdValidate fSingleIdValidate) | |
virtual | ~CAlnReader (void) |
const string & | GetAlphabet (void) const |
void | SetAlphabet (const string &value) |
void | SetAlphabet (EAlphabet alpha) |
bool | IsAlphabet (EAlphabet alpha) const |
const string & | GetBeginningGap (void) const |
void | SetBeginningGap (const string &value) |
const string & | GetMiddleGap (void) const |
void | SetMiddleGap (const string &value) |
const string & | GetEndGap (void) const |
void | SetEndGap (const string &value) |
bool | GetUseNexusInfo () const |
void | SetUseNexusInfo (bool useNexusInfo) |
void | SetAllGap (const string &value) |
Convenience function for setting beginning, middle, and end gap to the same thing. More... | |
const string & | GetMissing (void) const |
void | SetMissing (const string &value) |
const string & | GetMatch (void) const |
void | SetMatch (const string &value) |
void | SetFastaGap (EAlphabet alpha) |
Alternative & easy way to choose alphabet, etc. More... | |
void | SetClustal (EAlphabet alpha) |
void | SetPhylip (EAlphabet alpha) |
void | SetPaup (EAlphabet alpha) |
void | Read (bool guess, bool generate_local_ids=false, objects::ILineErrorListener *pErrorListener=nullptr) |
void | Read (TReadFlags=fReadDefaults, objects::ILineErrorListener *pErrorListener=nullptr) |
const vector< string > & | GetIds (void) const |
Parsed result data accessors. More... | |
const vector< string > & | GetSeqs (void) const |
const vector< string > & | GetOrganisms (void) const |
const vector< string > & | GetDeflines (void) const |
const vector< TLineInfo > & | GetDeflineInfo (void) const |
int | GetDim (void) const |
EAlignFormat | GetLastAlignmentFileFormat (void) const |
const TErrorList & | GetErrorList (void) const |
CRef< objects::CSeq_align > | GetSeqAlign (TFastaFlags fasta_flags=0, objects::ILineErrorListener *pErrorListener=nullptr) |
Create ASN.1 classes from the parsed alignment. More... | |
CRef< objects::CSeq_entry > | GetSeqEntry (TFastaFlags fasta_flags=objects::CFastaReader::fAddMods, objects::ILineErrorListener *pErrorListener=nullptr) |
objects::CSeq_inst::EMol | GetSequenceMolType (const string &alphabet, const string &seqData, objects::ILineErrorListener *pErrorListener=nullptr) |
Get a sequence's moltype, also considering the alphabet used to read it. More... | |
Static Public Member Functions | |
static string | GetAlphabetLetters (EAlphabet) |
Protected Types | |
using | SLineTextAndLoc = objects::CFastaDeflineReader::SLineTextAndLoc |
using | TSeqTitles = objects::CFastaDeflineReader::TSeqTitles |
using | SDeflineParseInfo = objects::CFastaDeflineReader::SDeflineParseInfo |
using | TIgnoredProblems = objects::CFastaDeflineReader::TIgnoredProblems |
Protected Member Functions | |
virtual CRef< objects::CSeq_id > | GenerateID (const string &fasta_defline, const TSeqPos &line_number, TFastaFlags fasta_flags) |
void | ParseDefline (const string &defline, const SDeflineParseInfo &info, const TIgnoredProblems &ignoredErrors, list< CRef< objects::CSeq_id >> &ids, bool &hasRange, TSeqPos &rangeStart, TSeqPos &rangeEnd, TSeqTitles &seqTitles, objects::ILineErrorListener *pMessageListener) |
Protected Attributes | |
objects::CFastaIdHandler | m_FastaIdHandler |
Private Types | |
using | TIdList = list< CRef< objects::CSeq_id > > |
Parsed result data (analogous to SAlignmentFile) Seqs are upper-case strings representing the sequences, with '-' for a gap. More... | |
typedef pair< TSeqPos, TSeqPos > | TAlignMiddleInterval |
characters have different contexts, depending on whether they are before the first non-gap character, after the last non-gap character, or between the first and last non-gap character. More... | |
typedef vector< TAlignMiddleInterval > | TAlignMiddles |
typedef objects::CDense_seg::TDim | TNumrow |
Private Member Functions | |
CAlnReader (const CAlnReader &value) | |
Prohibit copy constructor and assignment operator. More... | |
CAlnReader & | operator= (const CAlnReader &value) |
int | x_GetGCD (const int a, const int b) const |
bool | x_IsReplicatedSequence (const char *sequence_data, int sequence_length, int repeat_interval) const |
void | x_VerifyAlignmentInfo (const ncbi::objects::SAlignmentFile &, TReadFlags readFlags) |
CRef< objects::CSeq_inst > | x_GetSeqInst (objects::CSeq_inst::EMol mol, const string &seqData) const |
objects::CSeq_inst::EMol | x_GetSequenceMolType (const string &alphabet, const string &seqData, const string &seqId="", objects::ILineErrorListener *pErrorListener=nullptr) |
void | x_CalculateMiddleSections () |
bool | x_IsGap (TNumrow row, TSeqPos pos, const string &residue) |
void | x_AssignDensegIds (TFastaFlags fasta_flags, objects::CDense_seg &denseg) |
void | x_ParseAndValidateSeqIds (const TLineInfo &seqIdInfo, TReadFlags flags, TIdList &ids) |
void | x_AddMods (const TLineInfo &defline_info, objects::CBioseq &bioseq, objects::ILineErrorListener *pErrorListener) |
void | x_AddTitle (const string &defline, objects::CBioseq &bioseq) |
Private Attributes | |
ncbi::objects::CSequenceInfo | mSequenceInfo |
vector< string > | m_IdStrings |
vector< TIdList > | m_Ids |
vector< string > | m_Seqs |
vector< string > | m_Organisms |
vector< string > | m_Deflines |
vector< TLineInfo > | m_DeflineInfo |
FValidateIds | m_fValidateIds =nullptr |
EAlignFormat | m_AlignFormat |
CNcbiIstream & | m_IS |
Other internal data. More... | |
bool | m_ReadDone |
bool | m_ReadSucceeded |
int | m_Dim |
CRef< objects::CSeq_align > | m_Aln |
CRef< objects::CSeq_entry > | m_Entry |
vector< string > | m_SeqVec |
vector< TSeqPos > | m_SeqLen |
TErrorList | m_Errors |
bool | m_UseNexusInfo |
TAlignMiddles | m_MiddleSections |
class CAlnReader supports importing a large variety of text-based alignment formats into standard data structures.
Definition at line 99 of file aln_reader.hpp.
using CAlnReader::FIdValidate = function<void(const objects::CSeq_id&, int, objects::CAlnErrorReporter*)> |
Definition at line 154 of file aln_reader.hpp.
using CAlnReader::FValidateIds = function<void(const list<CRef<objects::CSeq_id> >&, int, objects::CAlnErrorReporter*)> |
Definition at line 149 of file aln_reader.hpp.
|
protected |
Definition at line 338 of file aln_reader.hpp.
|
protected |
Definition at line 336 of file aln_reader.hpp.
|
private |
characters have different contexts, depending on whether they are before the first non-gap character, after the last non-gap character, or between the first and last non-gap character.
This must be precalculated before gap characters can be converted.
Definition at line 310 of file aln_reader.hpp.
|
private |
Definition at line 311 of file aln_reader.hpp.
Definition at line 142 of file aln_reader.hpp.
using CAlnReader::TFastaFlags = objects::CFastaDeflineReader::TFastaFlags |
Definition at line 236 of file aln_reader.hpp.
|
private |
Parsed result data (analogous to SAlignmentFile) Seqs are upper-case strings representing the sequences, with '-' for a gap.
Ids are ids read from file. Organisms and Deflines may not be set, depending on the file.
Definition at line 282 of file aln_reader.hpp.
|
protected |
Definition at line 339 of file aln_reader.hpp.
Definition at line 148 of file aln_reader.hpp.
|
private |
Definition at line 314 of file aln_reader.hpp.
typedef int CAlnReader::TReadFlags |
binary OR of EReadFlags
Definition at line 212 of file aln_reader.hpp.
|
protected |
Definition at line 337 of file aln_reader.hpp.
Enumerator | |
---|---|
eAlpha_Default | |
eAlpha_Nucleotide | |
eAlpha_Protein | |
eAlpha_Dna | |
eAlpha_Rna | |
eAlpha_Dna_no_ambiguity | |
eAlpha_Rna_no_ambiguity |
Definition at line 103 of file aln_reader.hpp.
Read the file This are the main functions.
either would parse the alignment file and create the result data
Enumerator | |
---|---|
fReadDefaults | |
fGenerateLocalIDs |
Definition at line 208 of file aln_reader.hpp.
CAlnReader::CAlnReader | ( | CNcbiIstream & | is, |
FValidateIds | fIdValidate = nullptr |
||
) |
Definition at line 171 of file aln_reader.cpp.
References CAlnReader::CAlnErrorContainer::clear(), eAlpha_Protein, m_Errors, m_fValidateIds, SetAllGap(), and SetAlphabet().
CAlnReader::CAlnReader | ( | CNcbiIstream & | is, |
FIdValidate | fSingleIdValidate | ||
) |
Definition at line 202 of file aln_reader.cpp.
|
virtual |
Definition at line 886 of file aln_reader.cpp.
|
private |
Prohibit copy constructor and assignment operator.
|
protectedvirtual |
Reimplemented in CTextAlnReader.
Definition at line 485 of file aln_reader.cpp.
References _ASSERT, CSeq_id::BestRank(), ctll::empty(), FindBestChoice(), m_Dim, and m_Ids.
Referenced by CTextAlnReader::GenerateID(), and x_AssignDensegIds().
Definition at line 364 of file aln_reader.hpp.
References mSequenceInfo.
Referenced by GetSeqEntry(), and IsAlphabet().
Definition at line 207 of file aln_reader.cpp.
Referenced by IsAlphabet(), and SetAlphabet().
Definition at line 378 of file aln_reader.hpp.
References mSequenceInfo.
Referenced by x_CalculateMiddleSections(), and x_IsGap().
Definition at line 229 of file aln_reader.hpp.
References m_DeflineInfo.
Definition at line 228 of file aln_reader.hpp.
References m_Deflines.
|
inline |
Definition at line 230 of file aln_reader.hpp.
References m_Dim.
Definition at line 406 of file aln_reader.hpp.
References mSequenceInfo.
Referenced by x_CalculateMiddleSections(), and x_IsGap().
|
inline |
Definition at line 234 of file aln_reader.hpp.
References m_Errors.
Referenced by CUpdateAlign::x_ReadAlign().
Parsed result data accessors.
Definition at line 225 of file aln_reader.hpp.
References m_IdStrings.
Referenced by CUpdateAlign::x_ReadAlign().
|
inline |
Definition at line 439 of file aln_reader.hpp.
References m_AlignFormat.
Definition at line 194 of file aln_reader.hpp.
References mSequenceInfo.
Definition at line 191 of file aln_reader.hpp.
References mSequenceInfo.
Referenced by x_GetSequenceMolType().
Definition at line 227 of file aln_reader.hpp.
References m_Organisms.
CRef< CSeq_align > CAlnReader::GetSeqAlign | ( | TFastaFlags | fasta_flags = 0 , |
objects::ILineErrorListener * | pErrorListener = nullptr |
||
) |
Create ASN.1 classes from the parsed alignment.
Definition at line 516 of file aln_reader.cpp.
References _ASSERT, CSeq_align_Base::eType_not_set, m_Aln, m_Dim, m_ReadDone, m_ReadSucceeded, m_SeqLen, m_Seqs, m_SeqVec, NCBI_THROW2, CDense_seg_Base::SetDim(), CDense_seg_Base::SetLens(), CDense_seg_Base::SetNumseg(), CDense_seg_Base::SetStarts(), ncbi::grid::netcache::search::fields::size, NStr::ToUpper(), x_AssignDensegIds(), and x_IsGap().
Referenced by GetSeqEntry(), and CUpdateAlign::x_ReadAlign().
CRef< CSeq_entry > CAlnReader::GetSeqEntry | ( | TFastaFlags | fasta_flags = objects::CFastaReader::fAddMods , |
objects::ILineErrorListener * | pErrorListener = nullptr |
||
) |
Definition at line 722 of file aln_reader.cpp.
References CBioseq_set_Base::eClass_pop_set, CSeq_inst_Base::eMol_aa, CSeq_inst_Base::eMol_na, CSeq_inst_Base::eMol_not_set, CSeq_id::fAcc_nuc, CSeq_id::fAcc_prot, CFastaReader::fAddMods, GetAlphabet(), GetSeqAlign(), i, m_DeflineInfo, m_Dim, m_Entry, m_Ids, m_ReadDone, m_ReadSucceeded, m_SeqVec, NCBI_THROW2, Ref(), CSeq_annot_Base::SetData(), x_AddMods(), x_AddTitle(), x_GetSeqInst(), and x_GetSequenceMolType().
Referenced by CTextAlnReader::GetFilteredSeqEntry(), CMultiReader::ReadAlignment(), CPsiBlastInputClustalW::x_ReadAsciiMsa(), and CMultiReaderApp::xProcessAlignment().
Definition at line 226 of file aln_reader.hpp.
References m_Seqs.
Referenced by CPsiBlastInputClustalW::x_ReadAsciiMsa().
CSeq_inst::EMol CAlnReader::GetSequenceMolType | ( | const string & | alphabet, |
const string & | seqData, | ||
objects::ILineErrorListener * | pErrorListener = nullptr |
||
) |
Get a sequence's moltype, also considering the alphabet used to read it.
Definition at line 643 of file aln_reader.cpp.
References x_GetSequenceMolType().
|
inline |
Definition at line 184 of file aln_reader.hpp.
References m_UseNexusInfo.
Definition at line 426 of file aln_reader.hpp.
References GetAlphabet(), and GetAlphabetLetters().
|
private |
|
protected |
Definition at line 863 of file aln_reader.cpp.
References info, and CFastaDeflineReader::ParseDefline().
void CAlnReader::Read | ( | TReadFlags | = fReadDefaults , |
objects::ILineErrorListener * | pErrorListener = nullptr |
||
) |
Convenience function for setting beginning, middle, and end gap to the same thing.
Definition at line 433 of file aln_reader.hpp.
References mSequenceInfo, and rapidjson::value.
Referenced by CAlnReader(), CMultiReader::ReadAlignment(), SetClustal(), SetFastaGap(), SetPaup(), and SetPhylip().
Definition at line 371 of file aln_reader.hpp.
References mSequenceInfo, and rapidjson::value.
Referenced by CAlnReader(), CTextAlignObjectLoader::LoadFromStream(), CMultiReader::ReadAlignment(), SetAlphabet(), SetClustal(), SetFastaGap(), SetPaup(), SetPhylip(), CUpdateAlign::x_ReadAlign(), and CMultiReaderApp::xProcessAlignment().
|
inline |
Definition at line 419 of file aln_reader.hpp.
References GetAlphabetLetters(), and SetAlphabet().
Definition at line 385 of file aln_reader.hpp.
References mSequenceInfo, and rapidjson::value.
Referenced by CTextAlignObjectLoader::LoadFromStream(), and CUpdateAlign::x_ReadAlign().
void CAlnReader::SetClustal | ( | EAlphabet | alpha | ) |
Definition at line 244 of file aln_reader.cpp.
References SetAllGap(), and SetAlphabet().
Referenced by CPsiBlastInputClustalW::x_ReadAsciiMsa().
Definition at line 412 of file aln_reader.hpp.
References mSequenceInfo, and rapidjson::value.
Referenced by CTextAlignObjectLoader::LoadFromStream(), and CUpdateAlign::x_ReadAlign().
void CAlnReader::SetFastaGap | ( | EAlphabet | alpha | ) |
Alternative & easy way to choose alphabet, etc.
Definition at line 237 of file aln_reader.cpp.
References SetAllGap(), and SetAlphabet().
Definition at line 195 of file aln_reader.hpp.
References mSequenceInfo, and rapidjson::value.
Referenced by CTextAlignObjectLoader::LoadFromStream(), and CUpdateAlign::x_ReadAlign().
Definition at line 399 of file aln_reader.hpp.
References mSequenceInfo, and rapidjson::value.
Referenced by CTextAlignObjectLoader::LoadFromStream(), and CUpdateAlign::x_ReadAlign().
Definition at line 192 of file aln_reader.hpp.
References mSequenceInfo, and rapidjson::value.
Referenced by CTextAlignObjectLoader::LoadFromStream(), CMultiReader::ReadAlignment(), and CUpdateAlign::x_ReadAlign().
void CAlnReader::SetPaup | ( | EAlphabet | alpha | ) |
Definition at line 251 of file aln_reader.cpp.
References SetAllGap(), and SetAlphabet().
void CAlnReader::SetPhylip | ( | EAlphabet | alpha | ) |
Definition at line 258 of file aln_reader.cpp.
References SetAllGap(), and SetAlphabet().
|
inline |
Definition at line 185 of file aln_reader.hpp.
References m_UseNexusInfo.
|
private |
Definition at line 807 of file aln_reader.cpp.
References _ASSERT, CModHandler::AddMods(), CModAdder::Apply(), CTitleParser::Apply(), CModHandler::eAppendReplace, eDiag_Info, CBioseq::GetFirstId(), NStr::IsBlank(), SLineInfo::mData, SLineInfo::mNumLine, s_AppendMods(), IObjtoolsListener::SevEnabled(), NStr::TruncateSpacesInPlace(), and x_AddTitle().
Referenced by GetSeqEntry().
Definition at line 852 of file aln_reader.cpp.
References NStr::IsBlank(), Ref(), and CBioseq_Base::SetDescr().
Referenced by GetSeqEntry(), and x_AddMods().
|
private |
Definition at line 496 of file aln_reader.cpp.
References GenerateID(), i, m_DeflineInfo, m_Dim, m_Ids, m_IdStrings, and CDense_seg_Base::SetIds().
Referenced by GetSeqAlign().
|
private |
Definition at line 433 of file aln_reader.cpp.
References GetBeginningGap(), GetEndGap(), m_Dim, m_MiddleSections, and m_Seqs.
Referenced by x_IsGap().
|
private |
Definition at line 703 of file aln_reader.cpp.
References data, CSeq_inst_Base::eMol_aa, CSeq_inst_Base::eRepr_raw, CSeqportUtil::Pack(), and Ref().
Referenced by GetSeqEntry().
|
private |
Definition at line 654 of file aln_reader.cpp.
References eAlnSubcode_InconsistentMolType, eDiag_Error, CSeq_inst_Base::eMol_aa, CSeq_inst_Base::eMol_dna, CSeq_inst_Base::eMol_rna, CFormatGuess::eProtein, eReader_Alignment, CFormatGuess::eUndefined, GetMissing(), msg(), remove_if(), CFormatGuess::SequenceType(), and sReportError().
Referenced by GetSeqEntry(), and GetSequenceMolType().
Definition at line 456 of file aln_reader.cpp.
References NStr::Find(), first(), GetBeginningGap(), GetEndGap(), GetMiddleGap(), m_MiddleSections, row, and x_CalculateMiddleSections().
Referenced by GetSeqAlign().
|
private |
|
private |
Definition at line 342 of file aln_reader.cpp.
References CSeq_id_Base::e_Local, eAlnSubcode_IllegalSequenceId, fGenerateLocalIDs, flags, CSeq_id::fParse_AnyLocal, CSeq_id::fParse_RawText, m_fValidateIds, SLineInfo::mData, SLineInfo::mNumLine, CSeq_id::ParseIDs(), Ref(), and theErrorReporter.
Referenced by x_VerifyAlignmentInfo().
|
private |
Definition at line 372 of file aln_reader.cpp.
References eAlnSubcode_BadSequenceCount, eAlnSubcode_InsufficientDeflineInfo, ErrorPrintf(), flags, i, m_DeflineInfo, m_Ids, m_IdStrings, m_Seqs, SAlignmentFile::mDeflines, SAlignmentFile::mIds, SAlignmentFile::mSequences, SAlignmentFile::NumDeflines(), SAlignmentFile::NumSequences(), theErrorReporter, NStr::TruncateSpaces(), and x_ParseAndValidateSeqIds().
|
private |
Definition at line 290 of file aln_reader.hpp.
Referenced by GetLastAlignmentFileFormat().
|
private |
Definition at line 298 of file aln_reader.hpp.
Referenced by GetSeqAlign().
|
private |
Definition at line 288 of file aln_reader.hpp.
Referenced by GetDeflineInfo(), GetSeqEntry(), x_AssignDensegIds(), and x_VerifyAlignmentInfo().
|
private |
Definition at line 287 of file aln_reader.hpp.
Referenced by GetDeflines().
|
private |
Definition at line 297 of file aln_reader.hpp.
Referenced by GenerateID(), GetDim(), GetSeqAlign(), GetSeqEntry(), x_AssignDensegIds(), and x_CalculateMiddleSections().
|
private |
Definition at line 299 of file aln_reader.hpp.
Referenced by GetSeqEntry().
|
private |
Definition at line 302 of file aln_reader.hpp.
Referenced by CAlnReader(), and GetErrorList().
|
protected |
Definition at line 353 of file aln_reader.hpp.
|
private |
Definition at line 289 of file aln_reader.hpp.
Referenced by CAlnReader(), and x_ParseAndValidateSeqIds().
|
private |
Definition at line 284 of file aln_reader.hpp.
Referenced by GenerateID(), GetSeqEntry(), x_AssignDensegIds(), and x_VerifyAlignmentInfo().
|
private |
Definition at line 283 of file aln_reader.hpp.
Referenced by GetIds(), x_AssignDensegIds(), and x_VerifyAlignmentInfo().
|
private |
Other internal data.
Definition at line 294 of file aln_reader.hpp.
|
private |
Definition at line 312 of file aln_reader.hpp.
Referenced by x_CalculateMiddleSections(), and x_IsGap().
|
private |
Definition at line 286 of file aln_reader.hpp.
Referenced by GetOrganisms().
|
private |
Definition at line 295 of file aln_reader.hpp.
Referenced by GetSeqAlign(), and GetSeqEntry().
|
private |
Definition at line 296 of file aln_reader.hpp.
Referenced by GetSeqAlign(), and GetSeqEntry().
|
private |
Definition at line 301 of file aln_reader.hpp.
Referenced by GetSeqAlign().
|
private |
Definition at line 285 of file aln_reader.hpp.
Referenced by GetSeqAlign(), GetSeqs(), x_CalculateMiddleSections(), and x_VerifyAlignmentInfo().
|
private |
Definition at line 300 of file aln_reader.hpp.
Referenced by GetSeqAlign(), and GetSeqEntry().
|
private |
Definition at line 303 of file aln_reader.hpp.
Referenced by GetUseNexusInfo(), and SetUseNexusInfo().
|
private |
Definition at line 274 of file aln_reader.hpp.
Referenced by GetAlphabet(), GetBeginningGap(), GetEndGap(), GetMatch(), GetMiddleGap(), GetMissing(), SetAllGap(), SetAlphabet(), SetBeginningGap(), SetEndGap(), SetMatch(), SetMiddleGap(), and SetMissing().