Go to the SVN repository for this file
Go to list of all specification files

--$Id: blastdb.asn 96099 2022-02-09 14:02:27Z camacho $
-- Notes:
-- taxonomy: an integer is proposed, which would require some sort of 
-- table (or network connection) to do the conversions from integer 
-- to various names.  This could save quite a bit of space for databases 
-- that are predominantly of one organism (e.g., human in htgs).
-- I've proposed here that table contain scientific-, common-, and 
-- blast-names at the advice of Scott Federhen.  Scott also was in 
-- favor of having the complete lineage in the file, but it seems like 
-- this would be seldom used and we could have a view with a link back 
-- to the taxonomy page for anyone needing it. Since one file would 
-- suffice for all blast databases, it seems like this should be a new file.
-- memberships: a sequence of integers is proposed.  Each bit of an integer 
-- would indicate membership in some (virtual) blast database (e.g., pdb, 
-- swissprot) or some classification (e.g., mRNA, genomic).
-- links: a sequence of integers is proposed.  Each bit of an integer would 
-- indicate a link that could be established based upon the gi of the 
-- database sequence.


EXPORTS Blast-def-line-set, Blast-def-line;
IMPORTS Seq-id, Seq-loc FROM NCBI-Seqloc;

Blast-def-line-set ::= SEQUENCE OF Blast-def-line  -- all deflines for an entry

Blast-def-line ::= SEQUENCE {
	title VisibleString OPTIONAL,             -- simple title
	seqid SEQUENCE OF Seq-id,                 -- Regular NCBI Seq-Id
    -- taxonomy id, should not be set if multiple taxids are available 
    -- (see links below)
	taxid  INTEGER OPTIONAL,                  
	memberships SEQUENCE OF INTEGER OPTIONAL, -- bit arrays
    -- Repurposed to store the (multiple) taxIDs associated with WP proteins. 
    -- In proteins this stores the PIG, in nucleotides this stores the "origin
    -- GIs" (if one sequence is described in the ASN.1 as an identical copy of
    -- another)

-- This defines the possible sequence filtering algorithms to be used in a
-- BLAST database
Blast-filter-program ::= INTEGER {
    not-set             (0),
    dust                (10),
    seg                 (20),
    windowmasker        (30),
    repeat              (40),
    other               (100),
    max                 (255)

Blast-mask-list ::= SEQUENCE {
    -- masks for a single sequence should be grouped in a Packed-seqint
    masks SEQUENCE OF Seq-loc,  
    -- as of 01/21/2010, this field is set to false always, indicating that the
    -- entire object (set of Seq-loc) is contained in this object
    more BOOLEAN

-- Defines the masking information for a set of sequences
Blast-db-mask-info ::= SEQUENCE {
    algo-id      INTEGER,
    algo-program Blast-filter-program,
    algo-options VisibleString,
    -- This object was originally created to allow pagination of the sequence
    -- masks, but this feature was discontinued in 01/21/2010
    masks        Blast-mask-list 

Blast-db-metadata ::= SEQUENCE {
    -- Default indicates the starting version
    version                         VisibleString DEFAULT "1.2",
    dbname                          VisibleString,
    dbtype                          VisibleString,
    -- Default indicates the starting version
    db-version                      INTEGER DEFAULT 5,
    description                     VisibleString OPTIONAL,
    number-of-letters               INTEGER,
    number-of-sequences             INTEGER,
    last-updated                    VisibleString,
    number-of-volumes               INTEGER,
    number-of-taxids                INTEGER OPTIONAL,
    bytes-total                     INTEGER,
    bytes-to-cache                  INTEGER,
    bytes-total-compressed          INTEGER OPTIONAL,
    files SEQUENCE OF               VisibleString OPTIONAL,
    compressed-files SEQUENCE OF    VisibleString OPTIONAL