Go to the SVN repository for this file
Go to list of all specification files

-- $Id: pcassay2.asn 92948 2021-02-24 09:44:03Z chengt2 $
-- ===========================================================================
--                            PUBLIC DOMAIN NOTICE
--               National Center for Biotechnology Information
--  This software/database is a "United States Government Work" under the
--  terms of the United States Copyright Act.  It was written as part of
--  the author's official duties as a United States Government employee and
--  thus cannot be copyrighted.  This software/database is freely available
--  to the public for use. The National Library of Medicine and the U.S.
--  Government have not placed any restriction on its use or reproduction.
--  Although all reasonable efforts have been taken to ensure the accuracy
--  and reliability of the software and data, the NLM and the U.S.
--  Government do not and cannot warrant the performance or results that
--  may be obtained by using this software or data. The NLM and the U.S.
--  Government disclaim all warranties, express or implied, including
--  warranties of performance, merchantability or fitness for any particular
--  purpose.
--  Please cite the author in any work or product based on this material.
-- ===========================================================================
-- Authors:  NCBI Structure Group
-- File Description:
--      ASN.1 definitions for PubChem biological assay data database (PCAssay2)
-- ===========================================================================


IMPORTS BioSource                        FROM NCBI-BioSource
        Date                             FROM NCBI-General
        PC-ID, PC-Source, PC-XRefData    FROM NCBI-PCSubstance;


-- Container for multiple Assay Data Submissions
PC-AssayContainer ::= SEQUENCE OF PC-AssaySubmit

-- Container for Data Depositions and Assay Definitions
PC-AssaySubmit ::= SEQUENCE {
    assay             CHOICE {                               -- Assay Description or pre-existing Identifier
                          aid           INTEGER,             --   Assay Identifier
                          aid-source    PC-Source,           --   External Assay Identifier
                          descr         PC-AssayDescription, --   Assay Description (new or updated)
                          aidver        PC-ID                --   Assay Identifier/Version (for internal use)
    data              SEQUENCE OF PC-AssayResults  OPTIONAL, -- Assay Data Deposition (vector)
    revoke            SEQUENCE OF INTEGER          OPTIONAL  -- List of SID's whose data is to be suppressed (vector)

-- Container for multiple Assay Result Sets
-- PC-AssayResultsSet ::= SEQUENCE OF PC-AssayResults

-- Assay Results provided for a given Substance tested, with respect to the results types defined in the
--   referenced Assay Description
PC-AssayResults ::= SEQUENCE {
    -- Internal/External Tracking Information
    sid               INTEGER,                               -- Tested Substance ID/Version  [Either valid ID or,
                                                             --   if "sid-source" is used, this is a "0" value]
                                                             --   Note: A valid ID is greater than "0"
    sid-source        PC-Source                    OPTIONAL, -- External Identifier for this Substance
                                                             --   Note: May be used in-lieu of "sid"
                                                             --   Note: This is non-optional if "sid" is "0"
    version           INTEGER                      OPTIONAL, -- Version identifier for this AID-SID Result
                                                             --   Note: Incoming data should set this to be "0"

    -- Data Annotation/Qualifier and URL to further Depositor Information
    comment           UTF8String                   OPTIONAL, -- Annotation or qualifier for this Result

    -- Assay Result Data for this Sample
    --   Note: Users need populate only those "tid"s, for which there is data, in any order.
    outcome           INTEGER {                              -- Assay Outcome
                          inactive        (1),               --   Substance is considered Inactive
                          active          (2),               --   Substance is considered Active
                          inconclusive    (3),               --   Substance is Inconclusive
                          unspecified     (4),               --   Substance Outcome is Unspecified
                          probe           (5)                --   Substance Outcome is Unspecified
                      }               DEFAULT unspecified,
    rank              INTEGER                      OPTIONAL, -- Rank of Assay Outcome (for result ordering)
                                                             --   Note: Larger numbers are more active
    data              SEQUENCE OF PC-AssayData     OPTIONAL, -- Assay Data Reported for this SID (vector)
    url               UTF8String                   OPTIONAL, -- Depositor provided URL for this Result
    xref              SEQUENCE OF PC-AnnotatedXRef OPTIONAL, -- annotated Cross-Reference Information
                                                             -- to be removed,
                                                             -- instead regulard TID will be generated
                                                             -- which are then annotated by xref type
    date              Date                         OPTIONAL  -- Pubchem Release Date

-- Assay Readouts/Results for a Tested Substance
PC-AssayData ::= SEQUENCE {
    tid               INTEGER,                            -- Assay Result Field Type ID (TID)
                                                          --   Note: Result Field ID's must be greater than "0"
    value             CHOICE {                            -- Assay Result, must be the same type as defined for TID
                          ival    INTEGER,
                          fval    REAL,
                          bval    BOOLEAN,
                          sval    UTF8String

-- Assay Description provided by an Organization that describes the assay/protocol performed and defines the
--   measured end-points and parameters to be stored.  An Assay Description is not a database table.  You can
--   define as many Result Definitions as needed and they need not be used by all Substances tested.
-- Assay Descriptions can be modified on both description text and Result Definitions after initial submission
-- as desired, and such udpates will be tracked in PubChem

PC-AssayDescription ::= SEQUENCE {
    -- Internal/External Tracking Information
    aid               PC-ID,                                 -- Assay Description ID/Version  [Either valid ID
                                                             --   or, if "aid-source" is used, a "0" dummy value]
                                                             --   Note: Version is for internal use (only?)
                                                             --   Note: A valid ID is greater than "0"
    aid-source        PC-Source                    OPTIONAL, -- External Identifier for this Assay Description
                                                             --   Note: May be used in-lieu of "aid"
                                                             --   Note: This is non-optional if "aid" ID is "0"

    -- Assay Description Information
    name              UTF8String,                            -- Short Assay Name (for display purposes)
    description       SEQUENCE OF UTF8String       OPTIONAL, -- Description of Assay
    protocol          SEQUENCE OF UTF8String       OPTIONAL, -- Procedure used to generate results
    comment           SEQUENCE OF UTF8String       OPTIONAL, -- Comments or additional information
    xref              SEQUENCE OF PC-AnnotatedXRef OPTIONAL, -- Annotated Cross-Reference Information

    -- Allowed Assay Result Types
    results           SEQUENCE OF PC-ResultType    OPTIONAL, -- Result Definitions (vector)

    -- Additional Information
    -- pub               SEQUENCE OF Pub                 OPTIONAL, Depositor provided publications for this assay (never used)
    revision          INTEGER                         OPTIONAL, -- Revision identifier for textual description
    target            SEQUENCE OF PC-AssayTargetInfo  OPTIONAL, -- Target information
    activity-outcome-method  INTEGER {                         -- Assay Outcome Qualifier
                               other              (0),          --   All Other Type
                               screening          (1),          --   Primary Screen Assay
                               confirmatory       (2),          --   Confirmatory Assay
                               summary            (3)           --   Probe Summary Assay
    }                                                 OPTIONAL,

    dr                SEQUENCE OF PC-AssayDRAttr      OPTIONAL, -- Dose-Response Attribution
    substance-type    INTEGER {
                         small-molecule                (1),
                         nucleotide                    (2),
                         other                       (255)
                      }   OPTIONAL,                             -- to distinguish the type of substance used in the screening

    -- Grant and project category information
    grant-number      SEQUENCE OF VisibleString     OPTIONAL,   -- grant proposal number
                                                                -- required for 'MLSCN' & 'MLPCN' projects
    project-category  INTEGER {                   -- to distinguish projects funded through MLSCN, MLPCN or other
                        mlscn                  (1), -- assay depositions from MLSCN screen center
                        mlpcn                  (2), -- assay depositions from MLPCN screen center
                        mlscn-ap               (3), -- assay depositions from MLSCN assay provider
                        mlpcn-ap               (4), -- assay depositions from MLPCN assay provider
                        journal-article        (5), -- to be deprecated and replaced by option 7, 8 & 9
                        assay-vendor           (6), -- assay depositions from assay vendors
                        literature-extracted   (7), -- data from literature, extracted by curators
                        literature-author      (8), -- data from literature, submitted by author of articles
                        literature-publisher   (9), -- data from literature, submitted by journals/publishers
                        rnaigi                (10), -- RNAi screenings from RNAi Global Initiative
                        other                (255)
                      }                 OPTIONAL,

    assay-group       SEQUENCE OF VisibleString   OPTIONAL,     -- annotation for assay gruop information, e.g. this
                                                                -- assay belongs to a group of assay associated by
                                                                -- a unique name, e.g. the value assigned to 'assay-group',
                                                                -- assays belonging to this group can be
                                                                -- retrieved using this unique name
                                                                -- it is depositor's responsibility to make the name
                                                                -- distinct if necessary
                                                                -- this can be used as the mechanism to specify 'related bioassays'
                                                                -- before hand so it is not necessary to update the descriptions of
                                                                -- related assays when new group member submitted to PubChem

    -- is-mlp-late-stage       BOOLEAN OPTIONAL,                   verification for 'late-stage-data' deposition (never used)
    categorized-comment SEQUENCE OF PC-CategorizedComment OPTIONAL
                                                                -- to report categorized description/comment by associating with a category title

-- Definition for Categorized description/comment
-- This field is added to provide flexibility for depositors to present textual description/comments in a desirable way
-- and to facilitate information validation by the depositor and data exchange with PubChem
PC-CategorizedComment ::= SEQUENCE {
    title     UTF8String,                                    -- title for the description/comment
    comment   SEQUENCE OF UTF8String                         -- description/comment content

--  Assay Dose-response attribute information used to define a set of readouts
--    as being part of a dose-response curve (for curve plotting/analysis)
PC-AssayDRAttr ::=SEQUENCE {
    id                INTEGER,                               -- Unique dose-response test set identifier
                                                             --   Note: A valid ID is greater than "0"
    descr             UTF8String               OPTIONAL,     -- Dose-Response Curve Description (used as curve title)
    dn                UTF8String               OPTIONAL,     -- Dose Axis Description (used as axis name)
    rn                UTF8String               OPTIONAL,     -- Response Axis Description (used as axis name)
    type              INTEGER {
                        experimental (0),                    -- dose-response data points measured directly by experiment
                        calculated   (1)                     -- dose-response data points derived from fitted curve
                      }          OPTIONAL

-- Molecular target information provides by organization describes the functionality of the target,
-- facilitates the linking between PubChem bioassays, and the linking between target molecule to other NCBI resources
PC-AssayTargetInfo ::= SEQUENCE {
   name                    UTF8String,                     -- Molecular name of target
   mol-id                  CHOICE {                           -- database and identifier of the target molecule
                               gene-id              INTEGER,       -- target is a NCBI Gene ID
                               protein-accession    VisibleString, -- target is a NCBI Protein Accession
                               nucleotide-accession VisibleString, -- target is a NCBI Nucleotide Accession
                               other                VisibleString, -- target is beyond supported type (format = TYPE::RESOURCE::IDENTIFIER)
                               tax-id               INTEGER        -- target is a NCBI Taxonomy ID
   organism                BioSource               OPTIONAL,  -- Target Organism
   descr                   UTF8String              OPTIONAL,  -- Target Description  (e.g., cellular functionality and location)
   comment                 SEQUENCE OF UTF8String  OPTIONAL   -- Comments or Additional Information

-- Annotated Cross-Reference (XRef) Information to allow the XRef to be qualified, as to its meaning or context
PC-AnnotatedXRef ::= SEQUENCE {
    xref              PC-XRefData,                    -- Cross-Reference Information
    comment           UTF8String         OPTIONAL,    -- Annotation qualifier describing Cross-Reference meaning
    type              INTEGER {
                         pcit        (1),             -- primary PMID/citation directly associated with the current assay data
                         pgene       (2)              -- gene encoding the protein assay target
                      }                  OPTIONAL

-- Definition of Allowed Result Types for a given Assay
PC-ResultType ::= SEQUENCE {
    -- Tracking or Description Information
    tid               INTEGER,                             -- Assay Result Field Type ID (TID)
    name              UTF8String,                          -- Result Field Name (short name for display)
    description       SEQUENCE OF UTF8String  OPTIONAL,    -- Result Field Description

    -- Result Data Type and Validation Information
    type             INTEGER {                             -- Result Data Type
                         float            (1),
                         int              (2),
                         bool             (3),
                         string           (4)
    constraints      CHOICE {                              -- Allowed Values, used for validating incoming data
                         -- If type is "float"
                         fset      SEQUENCE OF REAL,       --   Allowed values must be equal to one of these
                         fmin      REAL,                   --   Allowed values (x) must be [ fmin <= x ]
                         fmax      REAL,                   --   Allowed values (x) must be [ x <= fmax ]
                         frange    PC-RealMinMax,          --   Minimum/Maximum Range [ min <= x <= max ]

                         -- If type is "int"
                         iset      SEQUENCE OF INTEGER,    --   Allowed values must be equal to one of these
                         imin      INTEGER,                --   Allowed values (x) must be [ imin <= x ]
                         imax      INTEGER,                --   Allowed values (x) must be [ x <= imax ]
                         irange    PC-IntegerMinMax,       --   Minimum/Maximum Range [ min <= x <= max ]

                         -- If type is "string"
                         sset   SEQUENCE OF VisibleString  --   Allowed values must be equal to one of these
                     }                           OPTIONAL,

    -- Unit information provides the units for the values reported for this TID.  For example, if the values
    --   reported for this TID are a concentration, e.g., micro-molar, setting the unit "um" allows PubChem to
    --   know that the value, e.g., "1.3", is actually "1.3 uM".  This also allows PubChem to properly report the
    --   units when displaying the reported values for this TID.  If the enumerated units provided below are
    --   insufficient, you may represent the units as a string in the optional "sunit" field (see below).
    unit             INTEGER {                             -- Units for Value
                         ppt              (1),             -- Parts per Thousand
                         ppm              (2),             -- Parts per Million
                         ppb              (3),             -- Parts per Billion
                         mm               (4),             -- milliM
                         um               (5),             -- microM
                         nm               (6),             -- nanoM
                         pm               (7),             -- picoM
                         fm               (8),             -- femtoM
                         mgml             (9),             -- milligrams per mL
                         ugml            (10),             -- micrograms per mL
                         ngml            (11),             -- nanograms per mL
                         pgml            (12),             -- picograms per mL
                         fgml            (13),             -- femtograms per mL
                         m               (14),             -- Molar
                         percent         (15),             -- Percent
                         ratio           (16),             -- Ratio
                         sec             (17),             -- Seconds
                         rsec            (18),             -- Reciprocal Seconds
                         min             (19),             -- Minutes
                         rmin            (20),             -- Reciprocal Minutes
                         day             (21),             -- Days
                         rday            (22),             -- Reciprocal Days
                         ml-min-kg       (23),             -- milliliter / minute / kilogram
                         l-kg            (24),             -- liter / kilogram
                         hr-ng-ml        (25),             -- hour * nanogram / milliliter
                         cm-sec          (26),             -- centimeter / second
                         mg-kg           (27),             -- milligram / kilogram
                         none           (254),
                         unspecified    (255)
                     }                           OPTIONAL,

    -- ATTENTION: sunit field is DEPRECATED. It is no longer
    --            supported and remains for legacy data only.
    sunit            VisibleString               OPTIONAL, -- Unit Type (as a String)

    -- Value Transform information qualifies the values reported for this TID.  For example, if the values
    --   reported for this TID are "-Log10 GI50", you may want to consider setting
    --   the "nlog" value below.  In doing so, PubChem would know that the value, e.g., "5.0"
    --   is actually "1.0e-5".  If the transformation applied is not listed, you may represent
    --   this transformation as a string in the "stransform" (see below) for eventual inclusion
    --   in the enumerated transform list below.
    -- ATTENTION: transform field is DEPRECATED. It is no longer
    --            supported and remains for legacy data only.
    transform        INTEGER {                              -- Value Type Details
                         linear           (1),              -- Linear Scale (x)
                         ln               (2),              -- Natural Log Scale (ln x)
                         log              (3),              -- Log Base 10 Scale (log10 x)
                         reciprocal       (4),              -- Reciprocal Scale (1/x)
                         negative         (5),              -- Negative Linear Scale (-x)
                         nlog             (6),              -- Negative Log Base 10 Scale (-log10 x)
                         nln              (7)               -- Negative Natural Log Scane (-ln x)
                 }                           OPTIONAL,

    -- ATTENTION: stransform field is DEPRECATED. It is no longer
    --            supported and remains for legacy data only.
    -- stransform       VisibleString               OPTIONAL,  Value Transform Type as a string (never used)

    tc               PC-ConcentrationAttr        OPTIONAL,  -- Tested concentration attribute
    ac               BOOLEAN                     OPTIONAL,  -- if true, indicates that this TID field
                                                            -- provides active concentration summary by
                                                            -- reporting the concentration which produces
                                                            -- 50% of the maximum possible biological response
                                                            -- such as IC50, EC50, AC50, GI50 etc.
                                                            -- or by reporting constant parameters such as Ki,
                                                            -- that based on which the activity outcome in this assay is called
    ac-qualifier     BOOLEAN                     OPTIONAL, -- endpoint qualifier (e.g. <, <=, =, >, >=) associated with the ac field above

    annot            INTEGER {
                           pmid                          (1), -- PubMed ID
                           mmdb                          (2), -- MMDB ID
                           url                           (3), -- indicate TID data is a url that provides supplementary information
                        -- protein-gi                    (4), GenBank General ID (GI) for a Protein
                        -- nucleotide-gi                 (5), GenBank General ID (GI) for a Nucleotide
                           taxonomy                      (6), -- Taxonomy ID for an Organism
                           mim                           (7), -- MIM, Mendelian Inheritance in Man, ID
                           gene                          (8), -- Entrez Gene ID
                           probe                         (9), -- Entrez Probe ID
                           aid                          (10), -- PubChem BioAssay ID, may be used in 'Summary' assay
                           sid                          (11), -- PubChem Substance ID, may be used in 'Summary' assay
                           cid                          (12), -- PubChem Compound ID
                        -- protein-target-gi            (13), GenBank General ID (GI) for a Protein target
                        -- biosystems-target-id         (14), NCBI BioSystems ID
                           target-name                  (15), -- target name
                           target-descr                 (16), -- brief target description
                           target-tax-id                (17), -- NCBI Taxonomy ID for target molecule
                           gene-target-id               (18), -- NCBI Gene ID for a gene target
                        -- dna-nucleotide-target-gi     (19), GenBank General ID (GI) for a DNA Nucleotide target
                        -- rna-nucleotide-target-gi     (20), GenBank General ID (GI) for a RNA Nucleotide target
                           protein-target-accession     (21), -- GenBank Accession for a Protein target
                           nucleotide-target-accession  (22), -- GenBank Accession for a DNA/RNA Nucleotide target
                           other                       (255)  -- for identifier types not currently support
                     }   OPTIONAL                           -- treat substance associated cross-reference as regular TID
                                                            -- web servers would make a link
                                                            -- to the corresponding record in Entrez databases
                                                            -- treat substance associated target information as regular TID
                                                            -- for example, for RNAi screening data, each 'substance' may
                                                            -- correspond to a specific gene target

--The concentration attribute is to indicate that the readout under this test result field is biological concentration-response data, the attribute provides the value and unit of the tested concentration
PC-ConcentrationAttr ::= SEQUENCE {
    concentration    REAL,
    unit             INTEGER {                             -- Units for Concentration
                         um               (5)              -- microM
   dr-id             INTEGER                     OPTIONAL  -- Dose-Response Attribution ID (if applicable)

-- Mininum and Maximum Constraints on an Integer Value (used for validating incoming data)
PC-IntegerMinMax ::= SEQUENCE {
    min               INTEGER,                             -- Minimum Value Allowed
    max               INTEGER                              -- Maximum Value Allowed

-- Mininum and Maximum Constraints on a Real Value (used for validating incoming data)
PC-RealMinMax ::= SEQUENCE {
    min               REAL,                                -- Minimum Value Allowed
    max               REAL                                 -- Maximum Value Allowed