NCBI C++ ToolKit
|
#include <ncbi_pch.hpp>
#include "ftacpp.hpp"
#include <objects/seq/Seq_inst.hpp>
#include <objects/seqset/Seq_entry.hpp>
#include <objects/seqset/Bioseq_set.hpp>
#include <objects/seq/Bioseq.hpp>
#include <objects/seqloc/Seq_loc.hpp>
#include <serial/objostr.hpp>
#include <serial/serial.hpp>
#include <objects/seq/Seq_ext.hpp>
#include <objects/seq/Delta_seq.hpp>
#include <objects/seq/Delta_ext.hpp>
#include <objects/seqfeat/Org_ref.hpp>
#include <objects/seqfeat/OrgName.hpp>
#include <objects/seq/Seq_descr.hpp>
#include <objmgr/scope.hpp>
#include <objects/general/User_object.hpp>
#include <objects/general/Object_id.hpp>
#include <objects/seqfeat/SubSource.hpp>
#include <objects/seqfeat/BioSource.hpp>
#include <objects/seqcode/Seq_code_type.hpp>
#include <objects/seq/Pubdesc.hpp>
#include <objects/seq/MolInfo.hpp>
#include "index.h"
#include "genbank.h"
#include <objtools/flatfile/flatfile_parser.hpp>
#include <objtools/flatfile/flatdefn.h>
#include "ftanet.h"
#include "ftaerr.hpp"
#include "asci_blk.h"
#include "indx_blk.h"
#include "utilref.h"
#include "utilfeat.h"
#include "loadfeat.h"
#include "gb_ascii.h"
#include "add.h"
#include "nucprot.h"
#include "fta_qscore.h"
#include "citation.h"
#include "fcleanup.h"
#include "utilfun.h"
#include "entry.h"
#include "ref.h"
#include "xgbparint.h"
#include "xutils.h"
Go to the source code of this file.
Go to the SVN repository for this file.
Macros | |
#define | THIS_FILE "gb_ascii.cpp" |
#define THIS_FILE "gb_ascii.cpp" |
Definition at line 91 of file gb_ascii.cpp.
|
static |
Definition at line 103 of file gb_ascii.cpp.
References Indexblk::acnum, NStr::CompareNocase(), Parser::DDBJ, Indexblk::division, Indexblk::drop, Parser::EMBL, ERR_DIVISION_ConDivInSegset, ERR_DIVISION_ConDivLacksContig, ERR_DIVISION_MappedtoCON, ERR_FORMAT_ContigWithSequenceData, ERR_FORMAT_MissingContigFeature, ERR_FORMAT_MissingSequenceData, ErrPostEx, Indexblk::is_contig, Indexblk::is_mga, Indexblk::locusname, Indexblk::origin, Indexblk::segnum, SEV_ERROR, SEV_INFO, SEV_REJECT, SEV_WARNING, and rapidjson::source.
Referenced by GenBankAscii(), and GenBankAsciiOrig().
void CheckFeatSeqLoc | ( | TEntryList & | seq_entries | ) |
Definition at line 2375 of file gb_ascii.cpp.
References FindFeatSeqLoc(), CBioseq_set_Base::GetAnnot(), GetParts(), CBioseq_set_Base::SetAnnot(), and CSeq_annot_Base::SetData().
Referenced by GenBankAsciiOrig(), and XMLAscii().
Definition at line 757 of file gb_ascii.cpp.
References CBioSource_Base::eGenome_plasmid, ERR_ORGANISM_NoOrganism, ErrPostStr, GetGenBankLineage(), CBioSource_Base::GetGenome(), GetGenomeInfo(), COrg_ref_Base::GetTaxname(), len, ParFlat_COL_DATA, ParFlat_ORGANISM, ParFlat_SOURCE, CBioseq_Base::SetDescr(), CBioSource_Base::SetOrg(), COrg_ref_Base::SetOrgname(), CSeqdesc_Base::SetSource(), COrg_ref_Base::SetTaxname(), SEV_WARNING, SrchNodeSubType(), StringChr, and StringEquN().
Referenced by GenBankAsciiOrig().
|
static |
Definition at line 2321 of file gb_ascii.cpp.
References Begin(), IsSegBioseq(), and SrchFeatSeqLoc().
Referenced by CheckFeatSeqLoc().
|
static |
Definition at line 1013 of file gb_ascii.cpp.
References len, MemFree(), offset, ParFlat_COL_DATA, CRef< C, Locker >::Reset(), CUser_field_Base::SetData(), CUser_object_Base::SetData(), CUser_field_Base::SetLabel(), CUser_object_Base::SetType(), CSeqdesc_Base::SetUser(), str(), StringChr, and StringSave.
Referenced by GetGenBankDescr().
|
static |
Definition at line 914 of file gb_ascii.cpp.
References MemFree(), r(), CUser_object_Base::SetData(), CUser_field_Base::SetLabel(), CUser_field::SetString(), StringLen(), StringNew(), StringStr, and tag.
Referenced by fta_get_user_object().
|
static |
Definition at line 840 of file gb_ascii.cpp.
References CUser_field_Base::IsSetData(), MemFree(), CRef< C, Locker >::Reset(), CUser_field_Base::SetData(), CUser_object_Base::SetData(), CUser_field_Base::SetLabel(), CUser_field_Base::SetNum(), CUser_field::SetString(), StringEquN(), StringSave, StringStr, and tag.
Referenced by fta_get_user_object().
|
static |
Definition at line 962 of file gb_ascii.cpp.
References fta_get_str_user_field(), fta_get_user_field(), CSeq_entry_Base::IsSeq(), CUser_object_Base::IsSetData(), MemFree(), ParFlat_COL_DATA, ParFlat_USER, r(), CBioseq_Base::SetDescr(), CBioseq_set_Base::SetDescr(), CSeq_entry_Base::SetSeq(), CSeq_entry_Base::SetSet(), CUser_object_Base::SetType(), CSeqdesc_Base::SetUser(), StringEquN(), StringSave, StringStr, and xSrchNodeType().
Referenced by GenBankAsciiOrig().
Definition at line 97 of file gb_ascii.cpp.
References DataBlk::mOffset.
Referenced by GenBankGetDivision(), GetGBBlock(), and GetGenBankMolInfo().
Definition at line 1784 of file gb_ascii.cpp.
References Parser::accver, Indexblk::acnum, CScope::AddBioseq(), Indexblk::bases, CheckContigEverywhere(), conv, CreateEntryBioseq(), Parser::curindx, Indexblk::division, Indexblk::drop, Parser::entrylist, ERR_ENTRY_ParsingComplete, ERR_ENTRY_Skipped, ERR_FORMAT_MissingEnd, err_install(), ERR_SEQUENCE_BadData, ERR_TSA_UnexpectedPrimaryAccession, ErrPostEx, ErrPostStr, FtaDeletePrefix(), GetDNAConv(), GetProteinConv(), GetScope(), i, Parser::indx, Indexblk::is_prot, Indexblk::is_tsa, Indexblk::len, LoadEntryGenbank(), Indexblk::locusname, Indexblk::offset, ParFlat_COL_DATA, ParFlat_END, ParFlat_NCBI_GI, PREFIX_ACCESSION, PREFIX_LOCUS, Indexblk::segnum, SEV_ERROR, SEV_INFO, SEV_REJECT, SEV_WARNING, Parser::source, StringEqu(), Indexblk::tsa_allowed, xGenBankGetDivision(), xGetGenBankBlocks(), and xGetGenBankSubBlocks().
Referenced by fta_parse_buf().
Definition at line 1363 of file gb_ascii.cpp.
References Parser::accver, Indexblk::acnum, CScope::AddBioseq(), AddNIDSeqId(), AssemblyGapsToDelta(), Indexblk::bases, LocusCont::bp, BuildBioSegHeader(), EntryBlk::chain, CheckContigEverywhere(), CheckDupDates(), CheckFeatSeqLoc(), Parser::citat, Parser::cleanup, conv, Parser::convert, CreateEntryBioseq(), Parser::curindx, Parser::DDBJ, DealWithGenes(), Parser::debug, Indexblk::division, Indexblk::drop, Parser::entries, EntryCheckDivCode(), Parser::entrylist, CSeq_inst_Base::eRepr_raw, ERR_DATE_IllegalDate, ERR_DIVISION_Mismatch, ERR_ENTRY_GBBlock_not_Empty, ERR_ENTRY_LongHTGSSequence, ERR_ENTRY_LongSequence, ERR_ENTRY_Parsed, ERR_ENTRY_ParsingComplete, ERR_ENTRY_Skipped, ERR_FORMAT_MissingEnd, err_install(), ERR_QSCORE_FailedToParse, ERR_REFERENCE_No_references, ERR_SEGMENT_OnlyOneMember, ERR_SEGMENT_Rejected, ERR_SEQUENCE_BadData, ERR_TSA_UnexpectedPrimaryAccession, ErrPostEx, ErrPostStr, FakeGenBankBioSources(), Parser::ff_get_qscore, Parser::ff_get_qscore_pp, FinalCleanup(), Parser::Flybase, Parser::format, fta_EntryCheckGBBlock(), fta_find_pub_explore(), fta_get_user_object(), fta_remove_cleanup_user_object(), fta_set_molinfo_completeness(), fta_set_strandedness(), fta_sort_descr(), fta_sort_seqfeat_cit(), fta_tsa_tls_comment_dblink_check(), FtaDeletePrefix(), g_InstantiateMissingProteins(), Indexblk::gaps, GapsToDelta(), GenBankGetDivision(), CSeq_descr_Base::Get(), CBioseq_Base::GetDescr(), GetDNAConv(), GetGenBankBlock(), GetGenBankDescr(), GetGenBankInst(), GetGenBankSubBlock(), CBioseq_Base::GetInst(), CObjectManager::GetInstance(), GetProteinConv(), GetQSFromFile(), CSeq_inst_Base::GetRepr(), GetScope(), GetSeqExt(), Indexblk::htg, i, Parser::ign_bad_qs, Parser::indx, Indexblk::is_pat, Indexblk::is_prot, Indexblk::is_tls, Indexblk::is_tsa, Indexblk::is_wgs, CSeq_inst::IsNa(), CBioseq_Base::IsSetAnnot(), Indexblk::lc, Indexblk::len, Parser::limit, LoadEntry(), LoadFeat(), Indexblk::locusname, MaybeCutGbblockSource(), Parser::mode, Parser::no_date, no_date(), no_reference(), CRef< C, Locker >::NotEmpty(), Indexblk::offset, PackEntries(), ParFlat_COL_DATA, ParFlat_END, ParFlat_LOCUS, ParFlat_NCBI_GI, PREFIX_ACCESSION, PREFIX_LOCUS, ProcessCitations(), Indexblk::psip, Parser::qamode, QscoreToSeqAnnot(), Parser::qsfd, Indexblk::qslength, Ref(), Parser::Refseq, Parser::Relaxed, CRef< C, Locker >::Reset(), Indexblk::segnum, Indexblk::segtotal, EntryBlk::seq_entry, SeqToDelta(), CBioseq_Base::SetId(), SEV_ERROR, SEV_INFO, SEV_REJECT, SEV_WARNING, Parser::source, StringEqu(), StringEquN(), StripSerialNumbers(), Indexblk::tsa_allowed, Indexblk::vernum, Indexblk::wgs_and_gi, xFreeEntry(), and Parser::xml_comp.
Referenced by sParseFlatfile().
Definition at line 1343 of file gb_ascii.cpp.
References GBDivOffset(), and StringNCpy().
Referenced by GenBankAsciiOrig().
|
static |
Definition at line 279 of file gb_ascii.cpp.
References Indexblk::acnum, Parser::allow_uwsec, Indexblk::bases, check_cds(), check_div(), CheckDIV(), CheckHTGDivision(), Parser::curindx, Parser::DDBJ, DefVsHTGKeywords(), LocusCont::div, Indexblk::division, Parser::EMBL, Parser::entrylist, Indexblk::env_sample_qual, CBioSource_Base::eOrigin_synthetic, ERR_DIVISION_BadTPADivcode, ERR_DIVISION_BadTSADivcode, ERR_DIVISION_HTCWrongMolType, ERR_DIVISION_InvalidHTCKeyword, ERR_DIVISION_MappedtoEST, ERR_DIVISION_MissingHTCKeyword, ERR_DIVISION_MissingHTGKeywords, ERR_DIVISION_UnknownDivCode, ERR_KEYWORD_ConflictingKeywords, ERR_KEYWORD_ENV_NoMatchingQualifier, ERR_KEYWORD_ESTSubstring, ERR_KEYWORD_HTGPlusENV, ERR_KEYWORD_IllegalForCON, ERR_KEYWORD_MissingTLS, ERR_KEYWORD_MissingTPA, ERR_KEYWORD_MissingTSA, ERR_KEYWORD_NoGeneExpressionKeywords, ERR_KEYWORD_ShouldNotBeCAGE, ERR_KEYWORD_ShouldNotBeTLS, ERR_KEYWORD_ShouldNotBeTPA, ERR_KEYWORD_ShouldNotBeTSA, ERR_KEYWORD_STSSubstring, ErrPostEx, ErrPostStr, Indexblk::EST, CSubSource_Base::eSubtype_environmental_sample, COrgMod_Base::eSubtype_metagenome_source, CMolInfo_Base::eTech_est, CMolInfo_Base::eTech_fli_cdna, CMolInfo_Base::eTech_htc, CMolInfo_Base::eTech_htgs_0, CMolInfo_Base::eTech_htgs_1, CMolInfo_Base::eTech_htgs_2, CMolInfo_Base::eTech_htgs_3, CMolInfo_Base::eTech_sts, CMolInfo_Base::eTech_survey, CMolInfo_Base::eTech_unknown, Parser::format, fta_check_htg_kwds(), fta_check_mga_keywords(), fta_keywords_check(), fta_remove_env_keywords(), fta_remove_keywords(), fta_remove_mag_keywords(), fta_remove_tls_keywords(), fta_remove_tpa_keywords(), fta_remove_tsa_keywords(), fta_tls_keywords_check(), fta_tpa_keywords_check(), fta_tsa_keywords_check(), GBDivOffset(), GetBlkDataReplaceNewLine(), COrgName_Base::GetDiv(), GetExtraAccession(), COrgName_Base::GetMod(), CBioSource_Base::GetOrg(), COrg_ref_Base::GetOrgname(), CBioSource_Base::GetOrigin(), GetSequenceOfKeywords(), CBioSource_Base::GetSubtype(), CMolInfo_Base::GetTech(), Indexblk::GSS, HasHtc(), HasHtg(), Indexblk::HTC, Indexblk::htg, i, Indexblk::is_contig, Indexblk::is_mga, Indexblk::is_pat, Indexblk::is_tls, Indexblk::is_tpa, Indexblk::is_tsa, Indexblk::is_wgs, IsCancelled(), IsNewAccessFormat(), COrgName_Base::IsSetDiv(), COrgName_Base::IsSetMod(), CBioSource_Base::IsSetOrg(), COrg_ref_Base::IsSetOrgname(), CBioSource_Base::IsSetOrigin(), CBioSource_Base::IsSetSubtype(), CMolInfo_Base::IsSetTech(), isspace(), ncbi::grid::netcache::search::fields::key, Indexblk::keywords, Indexblk::lc, len, MemCpy(), MemFree(), mod(), Parser::mode, DataBlk::mOffset, LocusCont::molecule, CRef< C, Locker >::NotEmpty(), ParFlat_COL_DATA, ParFlat_DEFINITION, ParFlat_KEYWORDS, ParFlat_ORIGIN, ParFlat_SOURCE, Indexblk::psip, Parser::Relaxed, RemoveHtgPhase(), CRef< C, Locker >::Reset(), CMolInfo_Base::ResetTech(), CMolInfo_Base::SetTech(), SEV_ERROR, SEV_INFO, SEV_REJECT, SEV_WARNING, Parser::source, SrchTheChar(), str(), StringCpy(), StringEqu(), StringEquN(), StringSave, Indexblk::STS, Indexblk::wgssec, and xSrchNodeType().
Referenced by GetGenBankDescr().
Definition at line 1061 of file gb_ascii.cpp.
References Indexblk::acnum, Indexblk::bases, Parser::curindx, Parser::date, LocusCont::date, Parser::DDBJ, DescrRefs(), Indexblk::division, Indexblk::drop, CSeq_id_Base::e_Ddbj, CTime::eCurrent, Parser::EMBL, CRef< C, Locker >::Empty(), Parser::entrylist, ERR_DEFINITION_MissingTLS, ERR_DEFINITION_MissingTPA, ERR_DEFINITION_MissingTSA, ERR_DEFINITION_ShouldNotBeTLS, ERR_DEFINITION_ShouldNotBeTPA, ERR_DEFINITION_ShouldNotBeTSA, ERR_TPA_TpaSpansMissing, ErrPostEx, CMolInfo_Base::eTech_htgs_0, CMolInfo_Base::eTech_htgs_1, CMolInfo_Base::eTech_htgs_2, CMolInfo_Base::eTech_unknown, Indexblk::experimental, fta_add_hist(), fta_check_con_for_wgs(), fta_dblink_has_sra(), fta_fix_orgref_div(), fta_get_dblink_user_object(), fta_get_mga_user_object(), fta_get_project_user_object(), fta_parse_structured_comment(), fta_parse_tpa_tsa_block(), Parser::GenBank, CBioseq_Base::GetAnnot(), GetBlkDataReplaceNewLine(), GetDescrComment(), GetGBBlock(), GetGenBankMolInfo(), GetUpdateDate(), i, Indexblk::inferential, Indexblk::is_contig, Indexblk::is_mga, Indexblk::is_pat, Indexblk::is_tls, Indexblk::is_tpa, Indexblk::is_tsa, Indexblk::is_wgs, CBioSource_Base::IsSetOrg(), Indexblk::lc, len, MemFree(), DataBlk::mOffset, DataBlk::mpNext, DataBlk::mType, Parser::no_date, CRef< C, Locker >::NotEmpty(), offset, ParFlat_COL_DATA, ParFlat_COMMENT, ParFlat_DBLINK, ParFlat_DEFINITION, ParFlat_MGA, ParFlat_PRIMARY, ParFlat_PROJECT, ParFlat_REF_END, ParFlat_REF_NO_TARGET, CRef< C, Locker >::Reset(), CSeqdesc_Base::SetComment(), CBioseq_Base::SetDescr(), CSeqdesc_Base::SetGenbank(), CSeqdesc_Base::SetMolinfo(), CBioSource_Base::SetOrg(), CSeqdesc_Base::SetPub(), CDate_Base::SetStd(), CSeqdesc_Base::SetTitle(), CDate::SetToTime(), CSeqdesc_Base::SetUpdate_date(), CSeqdesc_Base::SetUser(), SEV_REJECT, Parser::source, Indexblk::specialist_db, str(), StringEquN(), StringEquNI(), Parser::taxserver, TrackNodeType(), Indexblk::vernum, Parser::xml_comp, and xSrchNodeType().
Referenced by GenBankAsciiOrig().
Definition at line 215 of file gb_ascii.cpp.
References CheckSTRAND(), CheckTPG(), Parser::curindx, Parser::entrylist, CSeq_inst_Base::eRepr_raw, CSeq_inst_Base::eRepr_virtual, eSeq_code_type_iupacaa, eSeq_code_type_iupacna, GetGenBankInstContig(), GetSeqData(), Indexblk::is_contig, Indexblk::is_mga, Indexblk::is_prot, Indexblk::lc, DataBlk::mOffset, DataBlk::mpData, ParFlat_ORIGIN, EntryBlk::seq_entry, CBioseq_Base::SetInst(), CSeq_inst_Base::SetRepr(), CSeq_inst_Base::SetStrand(), CSeq_inst_Base::SetTopology(), LocusCont::strand, and LocusCont::topology.
Referenced by GenBankAsciiOrig().
Definition at line 135 of file gb_ascii.cpp.
References Parser::accver, Parser::allow_crossdb_featloc, Parser::buf, CRef< C, Locker >::Empty(), CSeq_inst_Base::eRepr_delta, ERR_LOCATION_ContigHasNull, ErrPostEx, fta_create_far_fetch_policy_user_object(), fta_fix_seq_loc_id(), CBioseq_Base::GetId(), CSeq_loc_Base::GetMix(), i, CSeq_loc_Base::IsMix(), DataBlk::len, MemFree(), DataBlk::mOffset, ParFlat_COL_DATA, ParFlat_CONTIG, r(), CBioseq_Base::SetInst(), SEV_REJECT, StringNCpy(), StringNew(), TrackNodeType(), XGappedSeqLocsToDeltaSeqs(), and xgbparseint_ver().
Referenced by get_bioseq(), and GetGenBankInst().
Definition at line 254 of file gb_ascii.cpp.
References GetBlkDataReplaceNewLine(), ParFlat_COL_DATA, and str().
Referenced by FakeGenBankBioSources().
|
static |
Definition at line 710 of file gb_ascii.cpp.
References Parser::curindx, LocusCont::div, CMolInfo_Base::eBiomol_unknown, Parser::entrylist, CMolInfo_Base::eTech_est, CMolInfo_Base::eTech_htgs_1, CMolInfo_Base::eTech_other, CMolInfo_Base::eTech_sts, CMolInfo_Base::eTech_survey, CMolInfo_Base::eTech_targeted, CMolInfo_Base::eTech_tsa, CMolInfo_Base::eTech_wgs, GBDivOffset(), GetFlatBiomol(), Indexblk::is_mga, Indexblk::is_tls, Indexblk::is_tsa, Indexblk::is_wgs, Indexblk::lc, DataBlk::mOffset, LocusCont::molecule, and StringEquN().
Referenced by GetGenBankDescr().
|
static |
Definition at line 2352 of file gb_ascii.cpp.
References Begin(), and CBioseq_set_Base::eClass_parts.
Referenced by CheckFeatSeqLoc().
|
static |
Definition at line 2297 of file gb_ascii.cpp.
Referenced by FindFeatSeqLoc().
USING_SCOPE | ( | objects | ) |