src/objects/variation/variation.asn
Go to the SVN repository for this file
Go to list of all specification files
--$Revision: 66445 $
--**********************************************************************
--
-- NCBI Variation container
-- by Variation Working Group, 2011
--
-- The Variation type describes a sequence change at location(s),
-- or a hierarchical combination thereof.
--
-- Related location-centric type is SeqFeatData.Variation-ref
--
--**********************************************************************
NCBI-VariationPackage DEFINITIONS ::=
BEGIN
EXPORTS Variation, VariantPlacement;
IMPORTS Int-fuzz, Dbtag, User-object, Object-id FROM NCBI-General
Population-data, Phenotype, Variation-inst, VariantProperties FROM NCBI-Variation
Seq-loc FROM NCBI-Seqloc
SubSource FROM NCBI-BioSource
Seq-literal, Bioseq FROM NCBI-Sequence
Pub-set FROM NCBI-Pub;
VariationException ::= SEQUENCE
{
code INTEGER {
hgvs-parsing (1), --invalid hgvs expression
hgvs-exon-boundary (2), --anchor position in an intronic HGVS expression is not at an exon boundary
inconsistent-consequence (3), --consequence protein variation attached to precursor variation's consequence
--could not be derived from it.
inconsistent-asserted-allele (4), --asserted allele is inconsistent with the reference
no-mapping (5), --could not remap
partial-mapping (6), --mapped location is shorter than the query
split-mapping (7), --a source interval maps to multiple non-abutting intervals.
mismatches-in-mapping (8), --the source sequence differs from sequence at mapped loc
inconsistent-asserted-moltype (9), --asserted mol-type is inconsistent with seq-id (e.g. NM_12345.6:g.)
bioseq-state (10),
ambiguous-sequence (11),
ref-same-as-variant (12), --reference sequence at the location is same as variant sequence in the variation
seqfetch-too-long (13), --can't fetch sequence because location is longer than specified threshold
seqfetch-intronic (14), --can't fetch sequence for an intronic (anchor+offset)-based location
seqfetch-invalid (15), --can't fetch sequence because location is invalid (e.g. extends past the end)
no-mapping-from-newer-version (16), --have mapping from older version of a sequence, but not from newer
source-location-overhang (17), --The source location overhangs the alignment by at least 5kb (VAR-1307)
hgvs-exon-boundary-induced (18) --Similar to (2), except induced by 5'/3'-terminal or an exon extension (VAR-1309)
} OPTIONAL,
message VisibleString
}
VariantPlacement ::= SEQUENCE
{
-- actual concrete placement we are considering
loc Seq-loc,
mol INTEGER {
unknown(0),
genomic(1), --"g." coordinates in HGVS
cdna(2), --"c." coordinates in HGVS
rna(3), --"n." coordinates in HGVS
protein(4), --"p." coordinates in HGVS
mitochondrion(5) --"mt." coordinates in HGVS
},
-- location flags
placement-method INTEGER {
projected(1),
asserted(2),
aligned(3)
} OPTIONAL,
-- location refinements, describing offsets into introns from product coordinates.
-- Biological semantics: start-offset/stop-offset apply to bio-start/bio-stop respectively.
-- positive = downstream; negative = upstream.
start-offset INTEGER OPTIONAL,
start-offset-fuzz Int-fuzz OPTIONAL,
stop-offset INTEGER OPTIONAL,
stop-offset-fuzz Int-fuzz OPTIONAL,
-- 0-based position of bio-start relative to containing codon
frame INTEGER OPTIONAL,
-- for situations in which a raw location isn't sufficient
seq Seq-literal OPTIONAL,
-- reference to the assembly (GenColl ID) for this location
assembly Dbtag OPTIONAL,
hgvs-name VisibleString OPTIONAL,
-- the reference location for this variant
comment VisibleString OPTIONAL,
exceptions SET OF VariationException OPTIONAL,
dbxrefs SET OF Dbtag OPTIONAL, --e.g. rs#, that are placement-specific
ext SET OF User-object OPTIONAL, --for process-specific placement tags/labels
gene-location INTEGER OPTIONAL, --Same semantics as VariantProperties.gene-location, except placement-specific
id Object-id OPTIONAL,
parent-id Object-id OPTIONAL, --id of the placement from which this one was derived
so-terms SEQUENCE OF INTEGER OPTIONAL --Sequence Ontology terms for this placement
}
VariationMethod ::= SEQUENCE
{
-- sequencing / acuisition method
method SET OF INTEGER {
unknown (0),
bac-acgh (1),
computational (2),
curated (3),
digital-array (4),
expression-array (5),
fish (6),
flanking-sequence (7),
maph (8),
mcd-analysis (9),
mlpa (10),
oea-assembly (11),
oligo-acgh (12),
paired-end (13),
pcr (14),
qpcr (15),
read-depth (16),
roma (17),
rt-pcr (18),
sage (19),
sequence-alignment (20),
sequencing (21),
snp-array (22),
snp-genoytyping (23),
southern (24),
western (25),
optical-mapping (26),
other (255)
},
-- if sequence-based validation methods are used,
-- what reference sequence location validated the presence of this?
reference-location Seq-loc OPTIONAL
}
Variation ::= SEQUENCE
{
-- ids (i.e., SNP rsid / ssid, dbVar nsv/nssv)
-- expected values include 'dbSNP|rs12334', 'dbSNP|ss12345', 'dbVar|nsv1'
--
-- we relate three kinds of IDs here:
-- - our current object's id
-- - the id of this object's parent, if it exists
-- - the sample ID that this item originates from
id Dbtag OPTIONAL,
parent-id Dbtag OPTIONAL,
sample-id SET OF Object-id OPTIONAL,
other-ids SET OF Dbtag OPTIONAL,
-- names and synonyms
-- some variants have well-known canonical names and possible accepted
-- synonyms
name VisibleString OPTIONAL,
synonyms SET OF VisibleString OPTIONAL,
-- tag for comment and descriptions
description VisibleString OPTIONAL,
-- where this beast is seen
-- note that this is a set of locations, and there are no restrictions to
-- the contents to this set.
placements SEQUENCE OF VariantPlacement OPTIONAL,
-- phenotype
phenotype SET OF Phenotype OPTIONAL,
-- sequencing / acuisition method
method VariationMethod OPTIONAL,
-- Note about SNP representation and pretinent fields: allele-frequency,
-- population, quality-codes:
-- The case of multiple alleles for a SNP would be described by
-- parent-feature of type Variation-set.diff-alleles, where the child
-- features of type Variation-inst, all at the same location, would
-- describe individual alleles.
-- population data
population-data SET OF Population-data OPTIONAL,
-- variant properties bit fields
variant-prop VariantProperties OPTIONAL,
-- publication support; same type as in seq-feat
pub Pub-set OPTIONAL,
-- References to external
clinical-test Dbtag OPTIONAL,
data CHOICE {
unknown NULL,
note VisibleString, --free-form
uniparental-disomy NULL,
-- actual sequence-edit at feat.location
instance Variation-inst,
-- Set of related Variations.
-- Location of the set equals to the union of member locations
set SEQUENCE {
type INTEGER {
unknown (0),
compound (1), -- complex change at the same location on the
-- same molecule
products (2), -- different products arising from the same
-- variation in a precursor, e.g. r.[13g>a,
-- 13_88del]
haplotype (3), -- changes on the same allele, e.g
-- r.[13g>a;15u>c]
genotype (4), -- changes on different alleles in the same
-- genotype, e.g. g.[476C>T]+[476C>T]
mosaic (5), -- different genotypes in the same individual
individual (6), -- same organism; allele relationship unknown,
-- e.g. g.[476C>T(+)183G>C]
population (7), -- population
alleles (8), -- set represents a set of observed alleles
package (9), -- set represents a package of observations at
-- a given location, generally containing
-- asserted + reference
chimeric (10), -- e.g. c.[1C>T//2G>T]
other (255)
},
variations SET OF Variation,
name VisibleString OPTIONAL
},
-- variant is a complex and undescribed change at the location
-- This type of variant is known to occur in dbVar submissions
complex NULL,
seq Bioseq -- Sequnece as it exists post-alteration
},
consequence SET OF CHOICE {
unknown NULL,
splicing NULL, --some effect on splicing
note VisibleString, --freeform
-- Describe resulting variation in the product, e.g. missense,
-- nonsense, silent, neutral, etc in a protein, that arises from
-- THIS variation.
variation Variation,
loss-of-heterozygosity SEQUENCE {
-- In germline comparison, it will be reference genome assembly
-- (default) or reference/normal population. In somatic mutation,
-- it will be a name of the normal tissue.
reference VisibleString OPTIONAL,
-- Name of the testing subject type or the testing tissue.
test VisibleString OPTIONAL
}
} OPTIONAL,
-- Frameshift-related info. Applies only to protein-level variations.
-- see http://www.hgvs.org/mutnomen/recs-prot.html
frameshift SEQUENCE {
phase INTEGER OPTIONAL,
x-length INTEGER OPTIONAL
} OPTIONAL,
-- Additional undescribed extensions
ext SET OF User-object OPTIONAL,
somatic-origin SET OF SEQUENCE {
-- description of the somatic origin itself
source SubSource OPTIONAL,
-- condition related to this origin's type
condition SEQUENCE {
description VisibleString OPTIONAL,
-- reference to BioTerm / other descriptive database
object-id SET OF Dbtag OPTIONAL
} OPTIONAL
} OPTIONAL,
exceptions SET OF VariationException OPTIONAL,
so-terms SET OF INTEGER OPTIONAL
}
END