NCBI C++ ToolKit
Macros | Functions
gb_ascii.cpp File Reference
#include <ncbi_pch.hpp>
#include "ftacpp.hpp"
#include <objects/seq/Seq_inst.hpp>
#include <objects/seqset/Seq_entry.hpp>
#include <objects/seqset/Bioseq_set.hpp>
#include <objects/seq/Bioseq.hpp>
#include <objects/seqloc/Seq_loc.hpp>
#include <serial/objostr.hpp>
#include <serial/serial.hpp>
#include <objects/seq/Seq_ext.hpp>
#include <objects/seq/Delta_seq.hpp>
#include <objects/seq/Delta_ext.hpp>
#include <objects/seqfeat/Org_ref.hpp>
#include <objects/seqfeat/OrgName.hpp>
#include <objects/seq/Seq_descr.hpp>
#include <objmgr/scope.hpp>
#include <objects/general/User_object.hpp>
#include <objects/general/Object_id.hpp>
#include <objects/seqfeat/SubSource.hpp>
#include <objects/seqfeat/BioSource.hpp>
#include <objects/seqcode/Seq_code_type.hpp>
#include <objects/seq/Pubdesc.hpp>
#include <objects/seq/MolInfo.hpp>
#include "index.h"
#include "genbank.h"
#include <objtools/flatfile/flatfile_parser.hpp>
#include <objtools/flatfile/flatdefn.h>
#include "ftanet.h"
#include "ftaerr.hpp"
#include "asci_blk.h"
#include "indx_blk.h"
#include "utilref.h"
#include "utilfeat.h"
#include "loadfeat.h"
#include "gb_ascii.h"
#include "add.h"
#include "nucprot.h"
#include "fta_qscore.h"
#include "citation.h"
#include "fcleanup.h"
#include "utilfun.h"
#include "entry.h"
#include "ref.h"
#include "xgbparint.h"
#include "xutils.h"
+ Include dependency graph for gb_ascii.cpp:

Go to the source code of this file.

Go to the SVN repository for this file.

Macros

#define THIS_FILE   "gb_ascii.cpp"
 

Functions

 USING_SCOPE (objects)
 
static char * GBDivOffset (const DataBlk &entry, Int4 div_shift)
 
static void CheckContigEverywhere (IndexblkPtr ibp, Parser::ESource source)
 
bool GetGenBankInstContig (const DataBlk &entry, CBioseq &bsp, ParserPtr pp)
 
static bool GetGenBankInst (ParserPtr pp, const DataBlk &entry, unsigned char *dnaconv)
 
static char * GetGenBankLineage (char *start, char *end)
 
static CRef< CGB_blockGetGBBlock (ParserPtr pp, const DataBlk &entry, CMolInfo &mol_info, CBioSource *bio_src)
 
static CRef< CMolInfoGetGenBankMolInfo (ParserPtr pp, const DataBlk &entry, const COrg_ref *org_ref)
 
static void FakeGenBankBioSources (const DataBlk &entry, CBioseq &bioseq)
 
static void fta_get_user_field (char *line, const Char *tag, CUser_object &user_obj)
 
static void fta_get_str_user_field (char *line, const Char *tag, CUser_object &user_obj)
 
static void fta_get_user_object (CSeq_entry &seq_entry, const DataBlk &entry)
 
static void fta_get_mga_user_object (TSeqdescList &descrs, char *offset, size_t len)
 
static void GetGenBankDescr (ParserPtr pp, const DataBlk &entry, CBioseq &bioseq)
 
static void GenBankGetDivision (char *division, Int4 div, const DataBlk &entry)
 
static void xGenBankGetDivision (char *division, Int4 div, const string &locusText)
 
bool GenBankAsciiOrig (ParserPtr pp)
 
bool GenBankAscii (ParserPtr pp)
 
static void SrchFeatSeqLoc (TSeqFeatList &feats, CSeq_annot::C_Data::TFtable &feat_table)
 
static void FindFeatSeqLoc (TEntryList &seq_entries, TSeqFeatList &feats)
 
static CBioseq_setGetParts (TEntryList &seq_entries)
 
void CheckFeatSeqLoc (TEntryList &seq_entries)
 

Macro Definition Documentation

◆ THIS_FILE

#define THIS_FILE   "gb_ascii.cpp"

Definition at line 91 of file gb_ascii.cpp.

Function Documentation

◆ CheckContigEverywhere()

static void CheckContigEverywhere ( IndexblkPtr  ibp,
Parser::ESource  source 
)
static

◆ CheckFeatSeqLoc()

void CheckFeatSeqLoc ( TEntryList seq_entries)

◆ FakeGenBankBioSources()

static void FakeGenBankBioSources ( const DataBlk entry,
CBioseq bioseq 
)
static

◆ FindFeatSeqLoc()

static void FindFeatSeqLoc ( TEntryList seq_entries,
TSeqFeatList feats 
)
static

Definition at line 2349 of file gb_ascii.cpp.

References Begin(), IsSegBioseq(), and SrchFeatSeqLoc().

Referenced by CheckFeatSeqLoc().

◆ fta_get_mga_user_object()

static void fta_get_mga_user_object ( TSeqdescList descrs,
char *  offset,
size_t  len 
)
static

◆ fta_get_str_user_field()

static void fta_get_str_user_field ( char *  line,
const Char tag,
CUser_object user_obj 
)
static

◆ fta_get_user_field()

static void fta_get_user_field ( char *  line,
const Char tag,
CUser_object user_obj 
)
static

◆ fta_get_user_object()

static void fta_get_user_object ( CSeq_entry seq_entry,
const DataBlk entry 
)
static

◆ GBDivOffset()

static char* GBDivOffset ( const DataBlk entry,
Int4  div_shift 
)
static

Definition at line 97 of file gb_ascii.cpp.

References DataBlk::mOffset.

Referenced by GenBankGetDivision(), GetGBBlock(), and GetGenBankMolInfo().

◆ GenBankAscii()

bool GenBankAscii ( ParserPtr  pp)

◆ GenBankAsciiOrig()

bool GenBankAsciiOrig ( ParserPtr  pp)

Definition at line 1391 of file gb_ascii.cpp.

References Parser::accver, Indexblk::acnum, CScope::AddBioseq(), AddNIDSeqId(), AssemblyGapsToDelta(), Indexblk::bases, LocusCont::bp, BuildBioSegHeader(), EntryBlk::chain, CheckContigEverywhere(), CheckDupDates(), CheckFeatSeqLoc(), Parser::citat, Parser::cleanup, conv, Parser::convert, CreateEntryBioseq(), Parser::curindx, Parser::DDBJ, DealWithGenes(), Parser::debug, Indexblk::division, Indexblk::drop, Parser::entries, EntryCheckDivCode(), Parser::entrylist, CSeq_inst_Base::eRepr_raw, ERR_DATE_IllegalDate, ERR_DIVISION_Mismatch, ERR_ENTRY_GBBlock_not_Empty, ERR_ENTRY_LongHTGSSequence, ERR_ENTRY_LongSequence, ERR_ENTRY_Parsed, ERR_ENTRY_ParsingComplete, ERR_ENTRY_Skipped, ERR_FORMAT_MissingEnd, err_install(), ERR_QSCORE_FailedToParse, ERR_REFERENCE_No_references, ERR_SEGMENT_OnlyOneMember, ERR_SEGMENT_Rejected, ERR_SEQUENCE_BadData, ERR_TSA_UnexpectedPrimaryAccession, ErrPostEx, ErrPostStr, FakeGenBankBioSources(), Parser::ff_get_qscore, Parser::ff_get_qscore_pp, FinalCleanup(), Parser::Flybase, Parser::format, fta_EntryCheckGBBlock(), fta_find_pub_explore(), fta_get_user_object(), fta_remove_cleanup_user_object(), fta_set_molinfo_completeness(), fta_set_strandedness(), fta_sort_descr(), fta_sort_seqfeat_cit(), fta_tsa_tls_comment_dblink_check(), FtaDeletePrefix(), g_InstantiateMissingProteins(), Indexblk::gaps, GapsToDelta(), GenBankGetDivision(), CSeq_descr_Base::Get(), CBioseq_Base::GetDescr(), GetDNAConv(), GetGenBankBlock(), GetGenBankDescr(), GetGenBankInst(), GetGenBankSubBlock(), CBioseq_Base::GetInst(), CObjectManager::GetInstance(), GetProteinConv(), GetQSFromFile(), CSeq_inst_Base::GetRepr(), GetScope(), GetSeqExt(), Indexblk::htg, i, Parser::ign_bad_qs, Parser::indx, Indexblk::is_pat, Indexblk::is_prot, Indexblk::is_tls, Indexblk::is_tsa, Indexblk::is_wgs, CSeq_inst::IsNa(), CBioseq_Base::IsSetAnnot(), Indexblk::lc, Indexblk::len, Parser::limit, LoadEntry(), LoadFeat(), Indexblk::locusname, MaybeCutGbblockSource(), Parser::mode, Parser::no_date, no_date(), no_reference(), CRef< C, Locker >::NotEmpty(), Indexblk::offset, PackEntries(), ParFlat_COL_DATA, ParFlat_END, ParFlat_LOCUS, ParFlat_NCBI_GI, PREFIX_ACCESSION, PREFIX_LOCUS, ProcessCitations(), Indexblk::psip, Parser::qamode, QscoreToSeqAnnot(), Parser::qsfd, Indexblk::qslength, Ref(), Parser::Refseq, Parser::Relaxed, CRef< C, Locker >::Reset(), Indexblk::segnum, Indexblk::segtotal, EntryBlk::seq_entry, SeqToDelta(), CBioseq_Base::SetId(), SEV_ERROR, SEV_INFO, SEV_REJECT, SEV_WARNING, Parser::source, StringEqu(), StringEquN(), StripSerialNumbers(), Indexblk::tsa_allowed, Indexblk::vernum, Indexblk::wgs_and_gi, xFreeEntry(), and Parser::xml_comp.

Referenced by sParseFlatfile().

◆ GenBankGetDivision()

static void GenBankGetDivision ( char *  division,
Int4  div,
const DataBlk entry 
)
static

Definition at line 1371 of file gb_ascii.cpp.

References GBDivOffset(), and StringNCpy().

Referenced by GenBankAsciiOrig().

◆ GetGBBlock()

static CRef<CGB_block> GetGBBlock ( ParserPtr  pp,
const DataBlk entry,
CMolInfo mol_info,
CBioSource bio_src 
)
static

Definition at line 297 of file gb_ascii.cpp.

References Indexblk::acnum, Parser::allow_uwsec, Indexblk::bases, check_cds(), check_div(), CheckDIV(), CheckHTGDivision(), Parser::curindx, Parser::DDBJ, DefVsHTGKeywords(), LocusCont::div, Indexblk::division, Parser::EMBL, Parser::entrylist, Indexblk::env_sample_qual, CBioSource_Base::eOrigin_synthetic, ERR_DIVISION_BadTPADivcode, ERR_DIVISION_BadTSADivcode, ERR_DIVISION_HTCWrongMolType, ERR_DIVISION_InvalidHTCKeyword, ERR_DIVISION_MappedtoEST, ERR_DIVISION_MissingHTCKeyword, ERR_DIVISION_MissingHTGKeywords, ERR_DIVISION_UnknownDivCode, ERR_KEYWORD_ConflictingKeywords, ERR_KEYWORD_ENV_NoMatchingQualifier, ERR_KEYWORD_ESTSubstring, ERR_KEYWORD_HTGPlusENV, ERR_KEYWORD_IllegalForCON, ERR_KEYWORD_MissingTLS, ERR_KEYWORD_MissingTPA, ERR_KEYWORD_MissingTSA, ERR_KEYWORD_NoGeneExpressionKeywords, ERR_KEYWORD_ShouldNotBeCAGE, ERR_KEYWORD_ShouldNotBeTLS, ERR_KEYWORD_ShouldNotBeTPA, ERR_KEYWORD_ShouldNotBeTSA, ERR_KEYWORD_STSSubstring, ErrPostEx, ErrPostStr, Indexblk::EST, CSubSource_Base::eSubtype_environmental_sample, COrgMod_Base::eSubtype_metagenome_source, CMolInfo_Base::eTech_est, CMolInfo_Base::eTech_fli_cdna, CMolInfo_Base::eTech_htc, CMolInfo_Base::eTech_htgs_0, CMolInfo_Base::eTech_htgs_1, CMolInfo_Base::eTech_htgs_2, CMolInfo_Base::eTech_htgs_3, CMolInfo_Base::eTech_sts, CMolInfo_Base::eTech_survey, CMolInfo_Base::eTech_unknown, Parser::format, fta_check_htg_kwds(), fta_check_mga_keywords(), fta_keywords_check(), fta_remove_env_keywords(), fta_remove_keywords(), fta_remove_mag_keywords(), fta_remove_tls_keywords(), fta_remove_tpa_keywords(), fta_remove_tsa_keywords(), fta_tls_keywords_check(), fta_tpa_keywords_check(), fta_tsa_keywords_check(), GBDivOffset(), GetBlkDataReplaceNewLine(), COrgName_Base::GetDiv(), GetExtraAccession(), COrgName_Base::GetMod(), CBioSource_Base::GetOrg(), COrg_ref_Base::GetOrgname(), CBioSource_Base::GetOrigin(), GetSequenceOfKeywords(), CBioSource_Base::GetSubtype(), CMolInfo_Base::GetTech(), Indexblk::GSS, HasHtc(), HasHtg(), Indexblk::HTC, Indexblk::htg, i, Indexblk::is_contig, Indexblk::is_mga, Indexblk::is_pat, Indexblk::is_tls, Indexblk::is_tpa, Indexblk::is_tsa, Indexblk::is_wgs, IsCancelled(), IsNewAccessFormat(), COrgName_Base::IsSetDiv(), COrgName_Base::IsSetMod(), CBioSource_Base::IsSetOrg(), COrg_ref_Base::IsSetOrgname(), CBioSource_Base::IsSetOrigin(), CBioSource_Base::IsSetSubtype(), CMolInfo_Base::IsSetTech(), isspace(), ncbi::grid::netcache::search::fields::key, Indexblk::keywords, Indexblk::lc, len, MemCpy(), MemFree(), mod(), Parser::mode, DataBlk::mOffset, LocusCont::molecule, CRef< C, Locker >::NotEmpty(), ParFlat_COL_DATA, ParFlat_DEFINITION, ParFlat_KEYWORDS, ParFlat_ORIGIN, ParFlat_SOURCE, Indexblk::psip, Parser::Relaxed, RemoveHtgPhase(), CRef< C, Locker >::Reset(), CMolInfo_Base::ResetTech(), CMolInfo_Base::SetTech(), SEV_ERROR, SEV_INFO, SEV_REJECT, SEV_WARNING, Parser::source, SrchTheChar(), str(), StringCpy(), StringEqu(), StringEquN(), StringRChr(), StringSave(), StringStr, Indexblk::STS, Indexblk::wgssec, and xSrchNodeType().

Referenced by GetGenBankDescr().

◆ GetGenBankDescr()

static void GetGenBankDescr ( ParserPtr  pp,
const DataBlk entry,
CBioseq bioseq 
)
static

Definition at line 1091 of file gb_ascii.cpp.

References Indexblk::acnum, Indexblk::bases, Parser::curindx, Parser::date, LocusCont::date, Parser::DDBJ, DescrRefs(), Indexblk::division, Indexblk::drop, CSeq_id_Base::e_Ddbj, CTime::eCurrent, Parser::EMBL, CRef< C, Locker >::Empty(), Parser::entrylist, ERR_DEFINITION_MissingTLS, ERR_DEFINITION_MissingTPA, ERR_DEFINITION_MissingTSA, ERR_DEFINITION_ShouldNotBeTLS, ERR_DEFINITION_ShouldNotBeTPA, ERR_DEFINITION_ShouldNotBeTSA, ERR_TPA_TpaSpansMissing, ErrPostEx, CMolInfo_Base::eTech_htgs_0, CMolInfo_Base::eTech_htgs_1, CMolInfo_Base::eTech_htgs_2, CMolInfo_Base::eTech_unknown, Indexblk::experimental, fta_add_hist(), fta_check_con_for_wgs(), fta_dblink_has_sra(), fta_fix_orgref_div(), fta_get_dblink_user_object(), fta_get_mga_user_object(), fta_get_project_user_object(), fta_parse_structured_comment(), fta_parse_tpa_tsa_block(), fta_StringCpy(), Parser::GenBank, CBioseq_Base::GetAnnot(), GetBlkDataReplaceNewLine(), GetDescrComment(), GetGBBlock(), GetGenBankMolInfo(), GetUpdateDate(), Indexblk::inferential, Indexblk::is_contig, Indexblk::is_mga, Indexblk::is_pat, Indexblk::is_tls, Indexblk::is_tpa, Indexblk::is_tsa, Indexblk::is_wgs, CBioSource_Base::IsSetOrg(), Indexblk::lc, len, MemFree(), DataBlk::mOffset, DataBlk::mpNext, DataBlk::mType, Parser::no_date, CRef< C, Locker >::NotEmpty(), offset, ParFlat_COL_DATA, ParFlat_COMMENT, ParFlat_DBLINK, ParFlat_DEFINITION, ParFlat_MGA, ParFlat_PRIMARY, ParFlat_PROJECT, ParFlat_REF_END, ParFlat_REF_NO_TARGET, CRef< C, Locker >::Reset(), CSeqdesc_Base::SetComment(), CBioseq_Base::SetDescr(), CSeqdesc_Base::SetGenbank(), CSeqdesc_Base::SetMolinfo(), CBioSource_Base::SetOrg(), CSeqdesc_Base::SetPub(), CDate_Base::SetStd(), CSeqdesc_Base::SetTitle(), CDate::SetToTime(), CSeqdesc_Base::SetUpdate_date(), CSeqdesc_Base::SetUser(), SEV_REJECT, Parser::source, Indexblk::specialist_db, str(), StringEquN(), StringEquNI(), Parser::taxserver, TrackNodeType(), Indexblk::vernum, Parser::xml_comp, and xSrchNodeType().

Referenced by GenBankAsciiOrig().

◆ GetGenBankInst()

static bool GetGenBankInst ( ParserPtr  pp,
const DataBlk entry,
unsigned char *  dnaconv 
)
static

◆ GetGenBankInstContig()

bool GetGenBankInstContig ( const DataBlk entry,
CBioseq bsp,
ParserPtr  pp 
)

◆ GetGenBankLineage()

static char* GetGenBankLineage ( char *  start,
char *  end 
)
static

Definition at line 256 of file gb_ascii.cpp.

References GetBlkDataReplaceNewLine(), MemFree(), ParFlat_COL_DATA, and str().

Referenced by FakeGenBankBioSources().

◆ GetGenBankMolInfo()

static CRef<CMolInfo> GetGenBankMolInfo ( ParserPtr  pp,
const DataBlk entry,
const COrg_ref org_ref 
)
static

◆ GetParts()

static CBioseq_set* GetParts ( TEntryList seq_entries)
static

Definition at line 2380 of file gb_ascii.cpp.

References Begin(), and CBioseq_set_Base::eClass_parts.

Referenced by CheckFeatSeqLoc().

◆ SrchFeatSeqLoc()

static void SrchFeatSeqLoc ( TSeqFeatList feats,
CSeq_annot::C_Data::TFtable feat_table 
)
static

Definition at line 2325 of file gb_ascii.cpp.

Referenced by FindFeatSeqLoc().

◆ USING_SCOPE()

USING_SCOPE ( objects  )

◆ xGenBankGetDivision()

static void xGenBankGetDivision ( char *  division,
Int4  div,
const string locusText 
)
static

Definition at line 1377 of file gb_ascii.cpp.

References StringCpy().

Referenced by GenBankAscii().

Modified on Sun Feb 25 03:04:47 2024 by modify_doxy.py rev. 669887